{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.010031634190783, "eval_steps": 500, "global_step": 500000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.0200632683815657e-05, "grad_norm": 316.38043212890625, "learning_rate": 2e-09, "loss": 26.2272, "step": 10 }, { "epoch": 4.040126536763131e-05, "grad_norm": 576.4369506835938, "learning_rate": 4e-09, "loss": 32.9983, "step": 20 }, { "epoch": 6.060189805144697e-05, "grad_norm": 472.15478515625, "learning_rate": 6e-09, "loss": 23.2158, "step": 30 }, { "epoch": 8.080253073526263e-05, "grad_norm": 20.11992835998535, "learning_rate": 8e-09, "loss": 19.9305, "step": 40 }, { "epoch": 0.00010100316341907829, "grad_norm": 766.17529296875, "learning_rate": 1e-08, "loss": 35.6484, "step": 50 }, { "epoch": 0.00012120379610289395, "grad_norm": 649.330810546875, "learning_rate": 1.2e-08, "loss": 23.5938, "step": 60 }, { "epoch": 0.0001414044287867096, "grad_norm": 638.9619750976562, "learning_rate": 1.4000000000000001e-08, "loss": 20.7426, "step": 70 }, { "epoch": 0.00016160506147052525, "grad_norm": 703.6967163085938, "learning_rate": 1.6e-08, "loss": 33.9524, "step": 80 }, { "epoch": 0.0001818056941543409, "grad_norm": 785.22998046875, "learning_rate": 1.8000000000000002e-08, "loss": 29.1029, "step": 90 }, { "epoch": 0.00020200632683815657, "grad_norm": 676.3937377929688, "learning_rate": 2e-08, "loss": 26.1458, "step": 100 }, { "epoch": 0.00022220695952197223, "grad_norm": 246.762451171875, "learning_rate": 2.2000000000000002e-08, "loss": 25.4453, "step": 110 }, { "epoch": 0.0002424075922057879, "grad_norm": 1033.1109619140625, "learning_rate": 2.4e-08, "loss": 38.0849, "step": 120 }, { "epoch": 0.0002626082248896035, "grad_norm": 724.1307983398438, "learning_rate": 2.6e-08, "loss": 42.023, "step": 130 }, { "epoch": 0.0002828088575734192, "grad_norm": 493.44427490234375, "learning_rate": 2.8000000000000003e-08, "loss": 27.2123, "step": 140 }, { "epoch": 0.00030300949025723485, "grad_norm": 665.4328002929688, "learning_rate": 3.0000000000000004e-08, "loss": 34.0098, "step": 150 }, { "epoch": 0.0003232101229410505, "grad_norm": 844.9791259765625, "learning_rate": 3.2e-08, "loss": 34.3665, "step": 160 }, { "epoch": 0.00034341075562486617, "grad_norm": 32.71566390991211, "learning_rate": 3.4e-08, "loss": 33.4008, "step": 170 }, { "epoch": 0.0003636113883086818, "grad_norm": 1014.241455078125, "learning_rate": 3.6000000000000005e-08, "loss": 37.7545, "step": 180 }, { "epoch": 0.0003838120209924975, "grad_norm": 883.66259765625, "learning_rate": 3.8e-08, "loss": 27.2009, "step": 190 }, { "epoch": 0.00040401265367631315, "grad_norm": 984.9612426757812, "learning_rate": 4e-08, "loss": 16.6661, "step": 200 }, { "epoch": 0.0004242132863601288, "grad_norm": 553.194091796875, "learning_rate": 4.2e-08, "loss": 36.3195, "step": 210 }, { "epoch": 0.00044441391904394446, "grad_norm": 313.7496032714844, "learning_rate": 4.4000000000000004e-08, "loss": 16.8871, "step": 220 }, { "epoch": 0.0004646145517277601, "grad_norm": 525.6804809570312, "learning_rate": 4.6e-08, "loss": 17.3601, "step": 230 }, { "epoch": 0.0004848151844115758, "grad_norm": 206.4374237060547, "learning_rate": 4.8e-08, "loss": 34.4888, "step": 240 }, { "epoch": 0.0005050158170953914, "grad_norm": 665.89453125, "learning_rate": 5.0000000000000004e-08, "loss": 40.5891, "step": 250 }, { "epoch": 0.000525216449779207, "grad_norm": 1490.542724609375, "learning_rate": 5.2e-08, "loss": 26.4835, "step": 260 }, { "epoch": 0.0005454170824630227, "grad_norm": 157.42401123046875, "learning_rate": 5.400000000000001e-08, "loss": 10.1466, "step": 270 }, { "epoch": 0.0005656177151468384, "grad_norm": 896.1552124023438, "learning_rate": 5.6000000000000005e-08, "loss": 46.9781, "step": 280 }, { "epoch": 0.000585818347830654, "grad_norm": 1364.9993896484375, "learning_rate": 5.8e-08, "loss": 40.9419, "step": 290 }, { "epoch": 0.0006060189805144697, "grad_norm": 1216.1474609375, "learning_rate": 6.000000000000001e-08, "loss": 30.2287, "step": 300 }, { "epoch": 0.0006262196131982854, "grad_norm": 328.39678955078125, "learning_rate": 6.2e-08, "loss": 38.7195, "step": 310 }, { "epoch": 0.000646420245882101, "grad_norm": 433.4118957519531, "learning_rate": 6.4e-08, "loss": 22.1306, "step": 320 }, { "epoch": 0.0006666208785659167, "grad_norm": 420.328857421875, "learning_rate": 6.600000000000001e-08, "loss": 27.7195, "step": 330 }, { "epoch": 0.0006868215112497323, "grad_norm": 185.5102081298828, "learning_rate": 6.8e-08, "loss": 37.9056, "step": 340 }, { "epoch": 0.000707022143933548, "grad_norm": 337.02044677734375, "learning_rate": 7e-08, "loss": 22.7152, "step": 350 }, { "epoch": 0.0007272227766173637, "grad_norm": 670.1768188476562, "learning_rate": 7.200000000000001e-08, "loss": 28.3785, "step": 360 }, { "epoch": 0.0007474234093011793, "grad_norm": 475.4079895019531, "learning_rate": 7.400000000000001e-08, "loss": 22.1121, "step": 370 }, { "epoch": 0.000767624041984995, "grad_norm": 607.6908569335938, "learning_rate": 7.6e-08, "loss": 31.8589, "step": 380 }, { "epoch": 0.0007878246746688106, "grad_norm": 800.4915161132812, "learning_rate": 7.8e-08, "loss": 21.862, "step": 390 }, { "epoch": 0.0008080253073526263, "grad_norm": 1185.45849609375, "learning_rate": 8e-08, "loss": 35.8806, "step": 400 }, { "epoch": 0.000828225940036442, "grad_norm": 452.5521240234375, "learning_rate": 8.200000000000002e-08, "loss": 17.0217, "step": 410 }, { "epoch": 0.0008484265727202576, "grad_norm": 125.98954010009766, "learning_rate": 8.4e-08, "loss": 29.4705, "step": 420 }, { "epoch": 0.0008686272054040733, "grad_norm": 301.3433532714844, "learning_rate": 8.6e-08, "loss": 22.8971, "step": 430 }, { "epoch": 0.0008888278380878889, "grad_norm": 578.7197875976562, "learning_rate": 8.800000000000001e-08, "loss": 20.5219, "step": 440 }, { "epoch": 0.0009090284707717046, "grad_norm": 1337.2423095703125, "learning_rate": 9e-08, "loss": 38.1059, "step": 450 }, { "epoch": 0.0009292291034555202, "grad_norm": 971.8609619140625, "learning_rate": 9.2e-08, "loss": 28.7212, "step": 460 }, { "epoch": 0.0009494297361393359, "grad_norm": 635.0576171875, "learning_rate": 9.400000000000001e-08, "loss": 28.1718, "step": 470 }, { "epoch": 0.0009696303688231516, "grad_norm": 347.902587890625, "learning_rate": 9.6e-08, "loss": 35.6753, "step": 480 }, { "epoch": 0.0009898310015069671, "grad_norm": 804.568115234375, "learning_rate": 9.8e-08, "loss": 52.5154, "step": 490 }, { "epoch": 0.0010100316341907828, "grad_norm": 357.3641662597656, "learning_rate": 1.0000000000000001e-07, "loss": 23.8221, "step": 500 }, { "epoch": 0.0010302322668745984, "grad_norm": 695.1265869140625, "learning_rate": 1.0200000000000001e-07, "loss": 29.7152, "step": 510 }, { "epoch": 0.001050432899558414, "grad_norm": 466.5853271484375, "learning_rate": 1.04e-07, "loss": 23.3024, "step": 520 }, { "epoch": 0.0010706335322422298, "grad_norm": 725.106201171875, "learning_rate": 1.0600000000000001e-07, "loss": 20.0807, "step": 530 }, { "epoch": 0.0010908341649260454, "grad_norm": 500.51708984375, "learning_rate": 1.0800000000000001e-07, "loss": 16.9731, "step": 540 }, { "epoch": 0.001111034797609861, "grad_norm": 643.19580078125, "learning_rate": 1.1e-07, "loss": 35.556, "step": 550 }, { "epoch": 0.0011312354302936767, "grad_norm": 606.3302001953125, "learning_rate": 1.1200000000000001e-07, "loss": 27.9096, "step": 560 }, { "epoch": 0.0011514360629774924, "grad_norm": 455.76116943359375, "learning_rate": 1.1400000000000001e-07, "loss": 27.992, "step": 570 }, { "epoch": 0.001171636695661308, "grad_norm": 890.6718139648438, "learning_rate": 1.16e-07, "loss": 20.3737, "step": 580 }, { "epoch": 0.0011918373283451237, "grad_norm": 521.8313598632812, "learning_rate": 1.1800000000000001e-07, "loss": 21.4314, "step": 590 }, { "epoch": 0.0012120379610289394, "grad_norm": 526.7328491210938, "learning_rate": 1.2000000000000002e-07, "loss": 35.9307, "step": 600 }, { "epoch": 0.001232238593712755, "grad_norm": 491.2201843261719, "learning_rate": 1.22e-07, "loss": 15.4924, "step": 610 }, { "epoch": 0.0012524392263965707, "grad_norm": 1669.0023193359375, "learning_rate": 1.24e-07, "loss": 31.0749, "step": 620 }, { "epoch": 0.0012726398590803864, "grad_norm": 747.734619140625, "learning_rate": 1.2600000000000002e-07, "loss": 27.6358, "step": 630 }, { "epoch": 0.001292840491764202, "grad_norm": 415.5186767578125, "learning_rate": 1.28e-07, "loss": 34.9266, "step": 640 }, { "epoch": 0.0013130411244480177, "grad_norm": 812.7051391601562, "learning_rate": 1.3e-07, "loss": 28.9554, "step": 650 }, { "epoch": 0.0013332417571318333, "grad_norm": 391.8579406738281, "learning_rate": 1.3200000000000002e-07, "loss": 16.3211, "step": 660 }, { "epoch": 0.001353442389815649, "grad_norm": 569.1922607421875, "learning_rate": 1.34e-07, "loss": 40.6097, "step": 670 }, { "epoch": 0.0013736430224994647, "grad_norm": 784.5740356445312, "learning_rate": 1.36e-07, "loss": 38.8948, "step": 680 }, { "epoch": 0.0013938436551832803, "grad_norm": 284.9936218261719, "learning_rate": 1.3800000000000002e-07, "loss": 28.3477, "step": 690 }, { "epoch": 0.001414044287867096, "grad_norm": 518.3870239257812, "learning_rate": 1.4e-07, "loss": 25.8091, "step": 700 }, { "epoch": 0.0014342449205509116, "grad_norm": 448.25848388671875, "learning_rate": 1.4200000000000003e-07, "loss": 21.6013, "step": 710 }, { "epoch": 0.0014544455532347273, "grad_norm": 175.99990844726562, "learning_rate": 1.4400000000000002e-07, "loss": 30.1821, "step": 720 }, { "epoch": 0.001474646185918543, "grad_norm": 863.377685546875, "learning_rate": 1.46e-07, "loss": 51.8515, "step": 730 }, { "epoch": 0.0014948468186023586, "grad_norm": 175.85592651367188, "learning_rate": 1.4800000000000003e-07, "loss": 25.4575, "step": 740 }, { "epoch": 0.0015150474512861743, "grad_norm": 640.6859741210938, "learning_rate": 1.5000000000000002e-07, "loss": 26.2666, "step": 750 }, { "epoch": 0.00153524808396999, "grad_norm": 811.0674438476562, "learning_rate": 1.52e-07, "loss": 24.3395, "step": 760 }, { "epoch": 0.0015554487166538056, "grad_norm": 628.3569946289062, "learning_rate": 1.5400000000000003e-07, "loss": 25.5079, "step": 770 }, { "epoch": 0.0015756493493376213, "grad_norm": 796.8837890625, "learning_rate": 1.56e-07, "loss": 22.1897, "step": 780 }, { "epoch": 0.001595849982021437, "grad_norm": 634.3350830078125, "learning_rate": 1.5800000000000004e-07, "loss": 13.3538, "step": 790 }, { "epoch": 0.0016160506147052526, "grad_norm": 0.0, "learning_rate": 1.6e-07, "loss": 40.9726, "step": 800 }, { "epoch": 0.0016362512473890682, "grad_norm": 891.0134887695312, "learning_rate": 1.62e-07, "loss": 33.7623, "step": 810 }, { "epoch": 0.001656451880072884, "grad_norm": 124.5013198852539, "learning_rate": 1.6400000000000004e-07, "loss": 32.6563, "step": 820 }, { "epoch": 0.0016766525127566996, "grad_norm": 557.0490112304688, "learning_rate": 1.66e-07, "loss": 11.069, "step": 830 }, { "epoch": 0.0016968531454405152, "grad_norm": 370.4107971191406, "learning_rate": 1.68e-07, "loss": 18.7746, "step": 840 }, { "epoch": 0.0017170537781243309, "grad_norm": 470.0067443847656, "learning_rate": 1.7000000000000001e-07, "loss": 53.0731, "step": 850 }, { "epoch": 0.0017372544108081465, "grad_norm": 250.24252319335938, "learning_rate": 1.72e-07, "loss": 19.3808, "step": 860 }, { "epoch": 0.0017574550434919622, "grad_norm": 1099.9942626953125, "learning_rate": 1.74e-07, "loss": 50.3866, "step": 870 }, { "epoch": 0.0017776556761757779, "grad_norm": 599.7510375976562, "learning_rate": 1.7600000000000001e-07, "loss": 23.1743, "step": 880 }, { "epoch": 0.0017978563088595935, "grad_norm": 401.0577087402344, "learning_rate": 1.78e-07, "loss": 18.497, "step": 890 }, { "epoch": 0.0018180569415434092, "grad_norm": 1774.6864013671875, "learning_rate": 1.8e-07, "loss": 39.7275, "step": 900 }, { "epoch": 0.0018382575742272248, "grad_norm": 950.1702880859375, "learning_rate": 1.8200000000000002e-07, "loss": 34.1336, "step": 910 }, { "epoch": 0.0018584582069110405, "grad_norm": 681.1715087890625, "learning_rate": 1.84e-07, "loss": 22.6729, "step": 920 }, { "epoch": 0.0018786588395948562, "grad_norm": 548.5640869140625, "learning_rate": 1.86e-07, "loss": 17.3724, "step": 930 }, { "epoch": 0.0018988594722786718, "grad_norm": 570.0945434570312, "learning_rate": 1.8800000000000002e-07, "loss": 40.4859, "step": 940 }, { "epoch": 0.0019190601049624875, "grad_norm": 411.9521789550781, "learning_rate": 1.9e-07, "loss": 19.4746, "step": 950 }, { "epoch": 0.0019392607376463031, "grad_norm": 781.8963623046875, "learning_rate": 1.92e-07, "loss": 54.7843, "step": 960 }, { "epoch": 0.0019594613703301186, "grad_norm": 296.8689270019531, "learning_rate": 1.9400000000000002e-07, "loss": 29.0034, "step": 970 }, { "epoch": 0.0019796620030139342, "grad_norm": 842.5889282226562, "learning_rate": 1.96e-07, "loss": 37.4423, "step": 980 }, { "epoch": 0.00199986263569775, "grad_norm": 948.009033203125, "learning_rate": 1.9800000000000003e-07, "loss": 30.0773, "step": 990 }, { "epoch": 0.0020200632683815656, "grad_norm": 215.3368377685547, "learning_rate": 2.0000000000000002e-07, "loss": 26.6014, "step": 1000 }, { "epoch": 0.0020402639010653812, "grad_norm": 494.67578125, "learning_rate": 2.02e-07, "loss": 21.6492, "step": 1010 }, { "epoch": 0.002060464533749197, "grad_norm": 234.77099609375, "learning_rate": 2.0400000000000003e-07, "loss": 24.3645, "step": 1020 }, { "epoch": 0.0020806651664330125, "grad_norm": 327.6997985839844, "learning_rate": 2.0600000000000002e-07, "loss": 24.5405, "step": 1030 }, { "epoch": 0.002100865799116828, "grad_norm": 611.9917602539062, "learning_rate": 2.08e-07, "loss": 24.577, "step": 1040 }, { "epoch": 0.002121066431800644, "grad_norm": 133.8150177001953, "learning_rate": 2.1000000000000003e-07, "loss": 33.8902, "step": 1050 }, { "epoch": 0.0021412670644844595, "grad_norm": 163.73504638671875, "learning_rate": 2.1200000000000002e-07, "loss": 17.783, "step": 1060 }, { "epoch": 0.002161467697168275, "grad_norm": 193.5026397705078, "learning_rate": 2.14e-07, "loss": 20.8054, "step": 1070 }, { "epoch": 0.002181668329852091, "grad_norm": 624.8909301757812, "learning_rate": 2.1600000000000003e-07, "loss": 10.6967, "step": 1080 }, { "epoch": 0.0022018689625359065, "grad_norm": 430.49560546875, "learning_rate": 2.1800000000000002e-07, "loss": 45.0397, "step": 1090 }, { "epoch": 0.002222069595219722, "grad_norm": 1073.7850341796875, "learning_rate": 2.2e-07, "loss": 42.1204, "step": 1100 }, { "epoch": 0.002242270227903538, "grad_norm": 486.751708984375, "learning_rate": 2.2200000000000003e-07, "loss": 23.1064, "step": 1110 }, { "epoch": 0.0022624708605873535, "grad_norm": 1003.8533935546875, "learning_rate": 2.2400000000000002e-07, "loss": 43.3564, "step": 1120 }, { "epoch": 0.002282671493271169, "grad_norm": 572.0302734375, "learning_rate": 2.26e-07, "loss": 23.8053, "step": 1130 }, { "epoch": 0.002302872125954985, "grad_norm": 447.3976745605469, "learning_rate": 2.2800000000000003e-07, "loss": 18.1645, "step": 1140 }, { "epoch": 0.0023230727586388005, "grad_norm": 931.8524780273438, "learning_rate": 2.3000000000000002e-07, "loss": 30.1086, "step": 1150 }, { "epoch": 0.002343273391322616, "grad_norm": 875.0267333984375, "learning_rate": 2.32e-07, "loss": 44.5649, "step": 1160 }, { "epoch": 0.0023634740240064318, "grad_norm": 683.9933471679688, "learning_rate": 2.3400000000000003e-07, "loss": 28.6991, "step": 1170 }, { "epoch": 0.0023836746566902474, "grad_norm": 1272.7320556640625, "learning_rate": 2.3600000000000002e-07, "loss": 53.9565, "step": 1180 }, { "epoch": 0.002403875289374063, "grad_norm": 540.0521240234375, "learning_rate": 2.3800000000000004e-07, "loss": 23.6139, "step": 1190 }, { "epoch": 0.0024240759220578788, "grad_norm": 578.9132690429688, "learning_rate": 2.4000000000000003e-07, "loss": 22.7089, "step": 1200 }, { "epoch": 0.0024442765547416944, "grad_norm": 382.25244140625, "learning_rate": 2.42e-07, "loss": 33.4145, "step": 1210 }, { "epoch": 0.00246447718742551, "grad_norm": 1396.89697265625, "learning_rate": 2.44e-07, "loss": 35.8553, "step": 1220 }, { "epoch": 0.0024846778201093257, "grad_norm": 507.25848388671875, "learning_rate": 2.46e-07, "loss": 39.8554, "step": 1230 }, { "epoch": 0.0025048784527931414, "grad_norm": 236.29849243164062, "learning_rate": 2.48e-07, "loss": 20.3849, "step": 1240 }, { "epoch": 0.002525079085476957, "grad_norm": 766.2069702148438, "learning_rate": 2.5000000000000004e-07, "loss": 35.435, "step": 1250 }, { "epoch": 0.0025452797181607727, "grad_norm": 92.45375061035156, "learning_rate": 2.5200000000000003e-07, "loss": 33.7958, "step": 1260 }, { "epoch": 0.0025654803508445884, "grad_norm": 206.84521484375, "learning_rate": 2.54e-07, "loss": 19.2633, "step": 1270 }, { "epoch": 0.002585680983528404, "grad_norm": 1618.84521484375, "learning_rate": 2.56e-07, "loss": 36.3744, "step": 1280 }, { "epoch": 0.0026058816162122197, "grad_norm": 322.9664001464844, "learning_rate": 2.58e-07, "loss": 19.4322, "step": 1290 }, { "epoch": 0.0026260822488960354, "grad_norm": 479.2985534667969, "learning_rate": 2.6e-07, "loss": 44.4554, "step": 1300 }, { "epoch": 0.002646282881579851, "grad_norm": 839.0703125, "learning_rate": 2.6200000000000004e-07, "loss": 25.7475, "step": 1310 }, { "epoch": 0.0026664835142636667, "grad_norm": 496.9524841308594, "learning_rate": 2.6400000000000003e-07, "loss": 14.5702, "step": 1320 }, { "epoch": 0.0026866841469474823, "grad_norm": 206.1656951904297, "learning_rate": 2.66e-07, "loss": 30.1201, "step": 1330 }, { "epoch": 0.002706884779631298, "grad_norm": 688.9887084960938, "learning_rate": 2.68e-07, "loss": 20.7835, "step": 1340 }, { "epoch": 0.0027270854123151137, "grad_norm": 173.92941284179688, "learning_rate": 2.7e-07, "loss": 38.9963, "step": 1350 }, { "epoch": 0.0027472860449989293, "grad_norm": 416.7894287109375, "learning_rate": 2.72e-07, "loss": 40.8383, "step": 1360 }, { "epoch": 0.002767486677682745, "grad_norm": 602.2549438476562, "learning_rate": 2.7400000000000004e-07, "loss": 22.1494, "step": 1370 }, { "epoch": 0.0027876873103665606, "grad_norm": 1489.0169677734375, "learning_rate": 2.7600000000000004e-07, "loss": 30.0528, "step": 1380 }, { "epoch": 0.0028078879430503763, "grad_norm": 410.69769287109375, "learning_rate": 2.7800000000000003e-07, "loss": 36.4594, "step": 1390 }, { "epoch": 0.002828088575734192, "grad_norm": 99.08312225341797, "learning_rate": 2.8e-07, "loss": 19.1275, "step": 1400 }, { "epoch": 0.0028482892084180076, "grad_norm": 579.4361572265625, "learning_rate": 2.82e-07, "loss": 23.6028, "step": 1410 }, { "epoch": 0.0028684898411018233, "grad_norm": 932.9219970703125, "learning_rate": 2.8400000000000005e-07, "loss": 33.0755, "step": 1420 }, { "epoch": 0.002888690473785639, "grad_norm": 736.8807373046875, "learning_rate": 2.8600000000000005e-07, "loss": 33.5045, "step": 1430 }, { "epoch": 0.0029088911064694546, "grad_norm": 1126.778076171875, "learning_rate": 2.8800000000000004e-07, "loss": 33.3801, "step": 1440 }, { "epoch": 0.0029290917391532703, "grad_norm": 1085.3497314453125, "learning_rate": 2.9000000000000003e-07, "loss": 28.1494, "step": 1450 }, { "epoch": 0.002949292371837086, "grad_norm": 374.22149658203125, "learning_rate": 2.92e-07, "loss": 32.5694, "step": 1460 }, { "epoch": 0.0029694930045209016, "grad_norm": 238.65066528320312, "learning_rate": 2.94e-07, "loss": 18.5137, "step": 1470 }, { "epoch": 0.0029896936372047172, "grad_norm": 2642.603759765625, "learning_rate": 2.9600000000000006e-07, "loss": 33.0043, "step": 1480 }, { "epoch": 0.003009894269888533, "grad_norm": 371.8793640136719, "learning_rate": 2.9800000000000005e-07, "loss": 22.4324, "step": 1490 }, { "epoch": 0.0030300949025723486, "grad_norm": 463.6688232421875, "learning_rate": 3.0000000000000004e-07, "loss": 19.1007, "step": 1500 }, { "epoch": 0.0030502955352561642, "grad_norm": 710.45068359375, "learning_rate": 3.0200000000000003e-07, "loss": 17.671, "step": 1510 }, { "epoch": 0.00307049616793998, "grad_norm": 231.896240234375, "learning_rate": 3.04e-07, "loss": 25.546, "step": 1520 }, { "epoch": 0.0030906968006237955, "grad_norm": 263.27044677734375, "learning_rate": 3.06e-07, "loss": 27.5296, "step": 1530 }, { "epoch": 0.003110897433307611, "grad_norm": 447.924072265625, "learning_rate": 3.0800000000000006e-07, "loss": 65.7333, "step": 1540 }, { "epoch": 0.003131098065991427, "grad_norm": 415.6585998535156, "learning_rate": 3.1000000000000005e-07, "loss": 17.1337, "step": 1550 }, { "epoch": 0.0031512986986752425, "grad_norm": 743.8123779296875, "learning_rate": 3.12e-07, "loss": 31.4509, "step": 1560 }, { "epoch": 0.003171499331359058, "grad_norm": 379.4447326660156, "learning_rate": 3.14e-07, "loss": 33.4806, "step": 1570 }, { "epoch": 0.003191699964042874, "grad_norm": 1378.80615234375, "learning_rate": 3.160000000000001e-07, "loss": 41.5661, "step": 1580 }, { "epoch": 0.0032119005967266895, "grad_norm": 721.19384765625, "learning_rate": 3.1800000000000007e-07, "loss": 24.3997, "step": 1590 }, { "epoch": 0.003232101229410505, "grad_norm": 447.39154052734375, "learning_rate": 3.2e-07, "loss": 20.2326, "step": 1600 }, { "epoch": 0.003252301862094321, "grad_norm": 616.3694458007812, "learning_rate": 3.22e-07, "loss": 28.001, "step": 1610 }, { "epoch": 0.0032725024947781365, "grad_norm": 942.26904296875, "learning_rate": 3.24e-07, "loss": 34.0719, "step": 1620 }, { "epoch": 0.003292703127461952, "grad_norm": 340.3528137207031, "learning_rate": 3.26e-07, "loss": 23.7475, "step": 1630 }, { "epoch": 0.003312903760145768, "grad_norm": 616.3563842773438, "learning_rate": 3.280000000000001e-07, "loss": 25.5421, "step": 1640 }, { "epoch": 0.0033331043928295835, "grad_norm": 287.14404296875, "learning_rate": 3.3e-07, "loss": 21.6099, "step": 1650 }, { "epoch": 0.003353305025513399, "grad_norm": 325.98907470703125, "learning_rate": 3.32e-07, "loss": 19.8994, "step": 1660 }, { "epoch": 0.0033735056581972148, "grad_norm": 455.73468017578125, "learning_rate": 3.34e-07, "loss": 24.4489, "step": 1670 }, { "epoch": 0.0033937062908810304, "grad_norm": 970.495361328125, "learning_rate": 3.36e-07, "loss": 25.1155, "step": 1680 }, { "epoch": 0.003413906923564846, "grad_norm": 820.18359375, "learning_rate": 3.38e-07, "loss": 35.97, "step": 1690 }, { "epoch": 0.0034341075562486618, "grad_norm": 135.357177734375, "learning_rate": 3.4000000000000003e-07, "loss": 22.5324, "step": 1700 }, { "epoch": 0.0034543081889324774, "grad_norm": 280.0291748046875, "learning_rate": 3.42e-07, "loss": 27.6606, "step": 1710 }, { "epoch": 0.003474508821616293, "grad_norm": 345.8559265136719, "learning_rate": 3.44e-07, "loss": 39.866, "step": 1720 }, { "epoch": 0.0034947094543001087, "grad_norm": 705.6763305664062, "learning_rate": 3.46e-07, "loss": 18.8156, "step": 1730 }, { "epoch": 0.0035149100869839244, "grad_norm": 416.8535461425781, "learning_rate": 3.48e-07, "loss": 33.5592, "step": 1740 }, { "epoch": 0.00353511071966774, "grad_norm": 552.7217407226562, "learning_rate": 3.5000000000000004e-07, "loss": 12.8888, "step": 1750 }, { "epoch": 0.0035553113523515557, "grad_norm": 208.27780151367188, "learning_rate": 3.5200000000000003e-07, "loss": 27.6918, "step": 1760 }, { "epoch": 0.0035755119850353714, "grad_norm": 537.1244506835938, "learning_rate": 3.54e-07, "loss": 44.3155, "step": 1770 }, { "epoch": 0.003595712617719187, "grad_norm": 284.45733642578125, "learning_rate": 3.56e-07, "loss": 28.734, "step": 1780 }, { "epoch": 0.0036159132504030027, "grad_norm": 387.14532470703125, "learning_rate": 3.58e-07, "loss": 18.956, "step": 1790 }, { "epoch": 0.0036361138830868184, "grad_norm": 370.3746337890625, "learning_rate": 3.6e-07, "loss": 27.5186, "step": 1800 }, { "epoch": 0.003656314515770634, "grad_norm": 302.4902038574219, "learning_rate": 3.6200000000000004e-07, "loss": 30.0567, "step": 1810 }, { "epoch": 0.0036765151484544497, "grad_norm": 381.73919677734375, "learning_rate": 3.6400000000000003e-07, "loss": 29.6831, "step": 1820 }, { "epoch": 0.0036967157811382653, "grad_norm": 463.9573974609375, "learning_rate": 3.66e-07, "loss": 51.3698, "step": 1830 }, { "epoch": 0.003716916413822081, "grad_norm": 156.73703002929688, "learning_rate": 3.68e-07, "loss": 27.487, "step": 1840 }, { "epoch": 0.0037371170465058967, "grad_norm": 124.4668197631836, "learning_rate": 3.7e-07, "loss": 14.7367, "step": 1850 }, { "epoch": 0.0037573176791897123, "grad_norm": 733.3515625, "learning_rate": 3.72e-07, "loss": 28.2449, "step": 1860 }, { "epoch": 0.003777518311873528, "grad_norm": 337.3455810546875, "learning_rate": 3.7400000000000004e-07, "loss": 38.7027, "step": 1870 }, { "epoch": 0.0037977189445573436, "grad_norm": 987.9752197265625, "learning_rate": 3.7600000000000003e-07, "loss": 46.9669, "step": 1880 }, { "epoch": 0.0038179195772411593, "grad_norm": 186.5377655029297, "learning_rate": 3.78e-07, "loss": 38.3815, "step": 1890 }, { "epoch": 0.003838120209924975, "grad_norm": 49.196285247802734, "learning_rate": 3.8e-07, "loss": 18.3016, "step": 1900 }, { "epoch": 0.0038583208426087906, "grad_norm": 309.545166015625, "learning_rate": 3.82e-07, "loss": 17.9834, "step": 1910 }, { "epoch": 0.0038785214752926063, "grad_norm": 540.118408203125, "learning_rate": 3.84e-07, "loss": 29.434, "step": 1920 }, { "epoch": 0.003898722107976422, "grad_norm": 20.59901237487793, "learning_rate": 3.8600000000000004e-07, "loss": 34.1916, "step": 1930 }, { "epoch": 0.003918922740660237, "grad_norm": 742.4661254882812, "learning_rate": 3.8800000000000003e-07, "loss": 26.6047, "step": 1940 }, { "epoch": 0.003939123373344053, "grad_norm": 275.4309387207031, "learning_rate": 3.9e-07, "loss": 15.0218, "step": 1950 }, { "epoch": 0.0039593240060278685, "grad_norm": 547.0128173828125, "learning_rate": 3.92e-07, "loss": 30.1546, "step": 1960 }, { "epoch": 0.003979524638711684, "grad_norm": 109.45767211914062, "learning_rate": 3.94e-07, "loss": 22.6386, "step": 1970 }, { "epoch": 0.0039997252713955, "grad_norm": 498.2757263183594, "learning_rate": 3.9600000000000005e-07, "loss": 28.6018, "step": 1980 }, { "epoch": 0.0040199259040793155, "grad_norm": 2670.8740234375, "learning_rate": 3.9800000000000004e-07, "loss": 45.6287, "step": 1990 }, { "epoch": 0.004040126536763131, "grad_norm": 733.6322021484375, "learning_rate": 4.0000000000000003e-07, "loss": 22.9162, "step": 2000 }, { "epoch": 0.004060327169446947, "grad_norm": 774.7802734375, "learning_rate": 4.02e-07, "loss": 19.3823, "step": 2010 }, { "epoch": 0.0040805278021307624, "grad_norm": 323.9580078125, "learning_rate": 4.04e-07, "loss": 23.0773, "step": 2020 }, { "epoch": 0.004100728434814578, "grad_norm": 1654.9927978515625, "learning_rate": 4.06e-07, "loss": 37.3542, "step": 2030 }, { "epoch": 0.004120929067498394, "grad_norm": 831.7189331054688, "learning_rate": 4.0800000000000005e-07, "loss": 19.7393, "step": 2040 }, { "epoch": 0.004141129700182209, "grad_norm": 921.7901611328125, "learning_rate": 4.1000000000000004e-07, "loss": 44.1891, "step": 2050 }, { "epoch": 0.004161330332866025, "grad_norm": 432.77813720703125, "learning_rate": 4.1200000000000004e-07, "loss": 17.2742, "step": 2060 }, { "epoch": 0.004181530965549841, "grad_norm": 1103.80029296875, "learning_rate": 4.1400000000000003e-07, "loss": 34.2473, "step": 2070 }, { "epoch": 0.004201731598233656, "grad_norm": 534.106689453125, "learning_rate": 4.16e-07, "loss": 45.6846, "step": 2080 }, { "epoch": 0.004221932230917472, "grad_norm": 867.72412109375, "learning_rate": 4.18e-07, "loss": 47.1584, "step": 2090 }, { "epoch": 0.004242132863601288, "grad_norm": 230.99696350097656, "learning_rate": 4.2000000000000006e-07, "loss": 18.7068, "step": 2100 }, { "epoch": 0.004262333496285103, "grad_norm": 531.1705322265625, "learning_rate": 4.2200000000000005e-07, "loss": 33.7492, "step": 2110 }, { "epoch": 0.004282534128968919, "grad_norm": 897.9804077148438, "learning_rate": 4.2400000000000004e-07, "loss": 29.6693, "step": 2120 }, { "epoch": 0.004302734761652735, "grad_norm": 1721.717529296875, "learning_rate": 4.2600000000000003e-07, "loss": 39.9116, "step": 2130 }, { "epoch": 0.00432293539433655, "grad_norm": 799.24658203125, "learning_rate": 4.28e-07, "loss": 36.7716, "step": 2140 }, { "epoch": 0.004343136027020366, "grad_norm": 243.7528076171875, "learning_rate": 4.3e-07, "loss": 17.6017, "step": 2150 }, { "epoch": 0.004363336659704182, "grad_norm": 611.0807495117188, "learning_rate": 4.3200000000000006e-07, "loss": 28.1989, "step": 2160 }, { "epoch": 0.004383537292387997, "grad_norm": 146.26116943359375, "learning_rate": 4.3400000000000005e-07, "loss": 35.3578, "step": 2170 }, { "epoch": 0.004403737925071813, "grad_norm": 1156.3270263671875, "learning_rate": 4.3600000000000004e-07, "loss": 41.4281, "step": 2180 }, { "epoch": 0.004423938557755629, "grad_norm": 309.5587463378906, "learning_rate": 4.3800000000000003e-07, "loss": 18.0717, "step": 2190 }, { "epoch": 0.004444139190439444, "grad_norm": 573.9265747070312, "learning_rate": 4.4e-07, "loss": 21.9999, "step": 2200 }, { "epoch": 0.00446433982312326, "grad_norm": 86.95663452148438, "learning_rate": 4.4200000000000007e-07, "loss": 33.4672, "step": 2210 }, { "epoch": 0.004484540455807076, "grad_norm": 818.9146118164062, "learning_rate": 4.4400000000000006e-07, "loss": 30.7386, "step": 2220 }, { "epoch": 0.004504741088490891, "grad_norm": 523.1422729492188, "learning_rate": 4.4600000000000005e-07, "loss": 29.29, "step": 2230 }, { "epoch": 0.004524941721174707, "grad_norm": 1081.639892578125, "learning_rate": 4.4800000000000004e-07, "loss": 24.7567, "step": 2240 }, { "epoch": 0.004545142353858523, "grad_norm": 438.20458984375, "learning_rate": 4.5000000000000003e-07, "loss": 24.615, "step": 2250 }, { "epoch": 0.004565342986542338, "grad_norm": 248.22909545898438, "learning_rate": 4.52e-07, "loss": 25.8396, "step": 2260 }, { "epoch": 0.004585543619226154, "grad_norm": 861.9166870117188, "learning_rate": 4.5400000000000007e-07, "loss": 29.9438, "step": 2270 }, { "epoch": 0.00460574425190997, "grad_norm": 823.51318359375, "learning_rate": 4.5600000000000006e-07, "loss": 30.2976, "step": 2280 }, { "epoch": 0.004625944884593785, "grad_norm": 1101.48779296875, "learning_rate": 4.5800000000000005e-07, "loss": 32.0947, "step": 2290 }, { "epoch": 0.004646145517277601, "grad_norm": 512.0722045898438, "learning_rate": 4.6000000000000004e-07, "loss": 37.1648, "step": 2300 }, { "epoch": 0.004666346149961417, "grad_norm": 1179.549560546875, "learning_rate": 4.6200000000000003e-07, "loss": 41.9576, "step": 2310 }, { "epoch": 0.004686546782645232, "grad_norm": 472.5940246582031, "learning_rate": 4.64e-07, "loss": 22.1231, "step": 2320 }, { "epoch": 0.004706747415329048, "grad_norm": 0.0, "learning_rate": 4.6600000000000007e-07, "loss": 22.0882, "step": 2330 }, { "epoch": 0.0047269480480128636, "grad_norm": 783.9161376953125, "learning_rate": 4.6800000000000006e-07, "loss": 29.3172, "step": 2340 }, { "epoch": 0.004747148680696679, "grad_norm": 1057.4423828125, "learning_rate": 4.7000000000000005e-07, "loss": 28.8642, "step": 2350 }, { "epoch": 0.004767349313380495, "grad_norm": 563.2919921875, "learning_rate": 4.7200000000000004e-07, "loss": 21.4855, "step": 2360 }, { "epoch": 0.0047875499460643105, "grad_norm": 594.2389526367188, "learning_rate": 4.7400000000000004e-07, "loss": 38.7323, "step": 2370 }, { "epoch": 0.004807750578748126, "grad_norm": 207.3581085205078, "learning_rate": 4.760000000000001e-07, "loss": 28.6296, "step": 2380 }, { "epoch": 0.004827951211431942, "grad_norm": 874.9584350585938, "learning_rate": 4.78e-07, "loss": 22.6688, "step": 2390 }, { "epoch": 0.0048481518441157575, "grad_norm": 76.04383087158203, "learning_rate": 4.800000000000001e-07, "loss": 21.4943, "step": 2400 }, { "epoch": 0.004868352476799573, "grad_norm": 961.9263305664062, "learning_rate": 4.82e-07, "loss": 17.3612, "step": 2410 }, { "epoch": 0.004888553109483389, "grad_norm": 789.2753295898438, "learning_rate": 4.84e-07, "loss": 17.5381, "step": 2420 }, { "epoch": 0.0049087537421672045, "grad_norm": 526.267578125, "learning_rate": 4.86e-07, "loss": 15.962, "step": 2430 }, { "epoch": 0.00492895437485102, "grad_norm": 909.1647338867188, "learning_rate": 4.88e-07, "loss": 31.8372, "step": 2440 }, { "epoch": 0.004949155007534836, "grad_norm": 392.2228698730469, "learning_rate": 4.900000000000001e-07, "loss": 28.5269, "step": 2450 }, { "epoch": 0.0049693556402186515, "grad_norm": 572.967529296875, "learning_rate": 4.92e-07, "loss": 21.5063, "step": 2460 }, { "epoch": 0.004989556272902467, "grad_norm": 1103.427001953125, "learning_rate": 4.940000000000001e-07, "loss": 32.4583, "step": 2470 }, { "epoch": 0.005009756905586283, "grad_norm": 816.0195922851562, "learning_rate": 4.96e-07, "loss": 27.6086, "step": 2480 }, { "epoch": 0.0050299575382700985, "grad_norm": 440.2334289550781, "learning_rate": 4.98e-07, "loss": 29.0266, "step": 2490 }, { "epoch": 0.005050158170953914, "grad_norm": 528.32421875, "learning_rate": 5.000000000000001e-07, "loss": 47.6612, "step": 2500 }, { "epoch": 0.00507035880363773, "grad_norm": 596.6814575195312, "learning_rate": 5.02e-07, "loss": 26.5699, "step": 2510 }, { "epoch": 0.0050905594363215454, "grad_norm": 503.26715087890625, "learning_rate": 5.040000000000001e-07, "loss": 35.7597, "step": 2520 }, { "epoch": 0.005110760069005361, "grad_norm": 402.7926330566406, "learning_rate": 5.06e-07, "loss": 49.0305, "step": 2530 }, { "epoch": 0.005130960701689177, "grad_norm": 734.1798706054688, "learning_rate": 5.08e-07, "loss": 33.3107, "step": 2540 }, { "epoch": 0.005151161334372992, "grad_norm": 430.47332763671875, "learning_rate": 5.1e-07, "loss": 17.1561, "step": 2550 }, { "epoch": 0.005171361967056808, "grad_norm": 171.5887451171875, "learning_rate": 5.12e-07, "loss": 24.0431, "step": 2560 }, { "epoch": 0.005191562599740624, "grad_norm": 303.52740478515625, "learning_rate": 5.140000000000001e-07, "loss": 25.7299, "step": 2570 }, { "epoch": 0.005211763232424439, "grad_norm": 103.0549087524414, "learning_rate": 5.16e-07, "loss": 50.7616, "step": 2580 }, { "epoch": 0.005231963865108255, "grad_norm": 713.0372314453125, "learning_rate": 5.180000000000001e-07, "loss": 30.2423, "step": 2590 }, { "epoch": 0.005252164497792071, "grad_norm": 860.4334716796875, "learning_rate": 5.2e-07, "loss": 22.214, "step": 2600 }, { "epoch": 0.005272365130475886, "grad_norm": 2196.531494140625, "learning_rate": 5.22e-07, "loss": 48.7065, "step": 2610 }, { "epoch": 0.005292565763159702, "grad_norm": 193.64439392089844, "learning_rate": 5.240000000000001e-07, "loss": 20.2737, "step": 2620 }, { "epoch": 0.005312766395843518, "grad_norm": 609.69140625, "learning_rate": 5.26e-07, "loss": 47.1675, "step": 2630 }, { "epoch": 0.005332967028527333, "grad_norm": 1125.103759765625, "learning_rate": 5.280000000000001e-07, "loss": 44.6597, "step": 2640 }, { "epoch": 0.005353167661211149, "grad_norm": 624.8217163085938, "learning_rate": 5.3e-07, "loss": 29.0659, "step": 2650 }, { "epoch": 0.005373368293894965, "grad_norm": 366.8795471191406, "learning_rate": 5.32e-07, "loss": 28.1106, "step": 2660 }, { "epoch": 0.00539356892657878, "grad_norm": 585.3496704101562, "learning_rate": 5.340000000000001e-07, "loss": 22.2636, "step": 2670 }, { "epoch": 0.005413769559262596, "grad_norm": 785.0697631835938, "learning_rate": 5.36e-07, "loss": 54.8097, "step": 2680 }, { "epoch": 0.005433970191946412, "grad_norm": 433.1402893066406, "learning_rate": 5.380000000000001e-07, "loss": 14.2988, "step": 2690 }, { "epoch": 0.005454170824630227, "grad_norm": 614.6635131835938, "learning_rate": 5.4e-07, "loss": 36.9219, "step": 2700 }, { "epoch": 0.005474371457314043, "grad_norm": 1097.97314453125, "learning_rate": 5.420000000000001e-07, "loss": 32.1943, "step": 2710 }, { "epoch": 0.005494572089997859, "grad_norm": 221.27752685546875, "learning_rate": 5.44e-07, "loss": 17.7383, "step": 2720 }, { "epoch": 0.005514772722681674, "grad_norm": 846.3823852539062, "learning_rate": 5.46e-07, "loss": 32.384, "step": 2730 }, { "epoch": 0.00553497335536549, "grad_norm": 1222.7154541015625, "learning_rate": 5.480000000000001e-07, "loss": 40.9341, "step": 2740 }, { "epoch": 0.005555173988049306, "grad_norm": 1121.3111572265625, "learning_rate": 5.5e-07, "loss": 23.7586, "step": 2750 }, { "epoch": 0.005575374620733121, "grad_norm": 1418.0941162109375, "learning_rate": 5.520000000000001e-07, "loss": 44.7959, "step": 2760 }, { "epoch": 0.005595575253416937, "grad_norm": 352.4377746582031, "learning_rate": 5.54e-07, "loss": 18.5532, "step": 2770 }, { "epoch": 0.005615775886100753, "grad_norm": 682.9746704101562, "learning_rate": 5.560000000000001e-07, "loss": 62.9547, "step": 2780 }, { "epoch": 0.005635976518784568, "grad_norm": 69.55480194091797, "learning_rate": 5.580000000000001e-07, "loss": 44.0819, "step": 2790 }, { "epoch": 0.005656177151468384, "grad_norm": 178.6747589111328, "learning_rate": 5.6e-07, "loss": 28.2389, "step": 2800 }, { "epoch": 0.0056763777841522, "grad_norm": 662.1491088867188, "learning_rate": 5.620000000000001e-07, "loss": 19.7388, "step": 2810 }, { "epoch": 0.005696578416836015, "grad_norm": 310.38323974609375, "learning_rate": 5.64e-07, "loss": 39.9212, "step": 2820 }, { "epoch": 0.005716779049519831, "grad_norm": 256.0929870605469, "learning_rate": 5.660000000000001e-07, "loss": 22.1921, "step": 2830 }, { "epoch": 0.0057369796822036466, "grad_norm": 934.5228881835938, "learning_rate": 5.680000000000001e-07, "loss": 32.7585, "step": 2840 }, { "epoch": 0.005757180314887462, "grad_norm": 703.0299072265625, "learning_rate": 5.7e-07, "loss": 37.5443, "step": 2850 }, { "epoch": 0.005777380947571278, "grad_norm": 687.6595458984375, "learning_rate": 5.720000000000001e-07, "loss": 24.1511, "step": 2860 }, { "epoch": 0.0057975815802550935, "grad_norm": 400.9228820800781, "learning_rate": 5.74e-07, "loss": 23.191, "step": 2870 }, { "epoch": 0.005817782212938909, "grad_norm": 810.8984375, "learning_rate": 5.760000000000001e-07, "loss": 28.8146, "step": 2880 }, { "epoch": 0.005837982845622725, "grad_norm": 528.00634765625, "learning_rate": 5.78e-07, "loss": 27.1717, "step": 2890 }, { "epoch": 0.0058581834783065405, "grad_norm": 282.5981140136719, "learning_rate": 5.800000000000001e-07, "loss": 19.146, "step": 2900 }, { "epoch": 0.005878384110990356, "grad_norm": 767.5343627929688, "learning_rate": 5.820000000000001e-07, "loss": 29.7168, "step": 2910 }, { "epoch": 0.005898584743674172, "grad_norm": 1321.3330078125, "learning_rate": 5.84e-07, "loss": 51.2614, "step": 2920 }, { "epoch": 0.0059187853763579875, "grad_norm": 422.82183837890625, "learning_rate": 5.860000000000001e-07, "loss": 25.8771, "step": 2930 }, { "epoch": 0.005938986009041803, "grad_norm": 170.95977783203125, "learning_rate": 5.88e-07, "loss": 16.7884, "step": 2940 }, { "epoch": 0.005959186641725619, "grad_norm": 752.912841796875, "learning_rate": 5.900000000000001e-07, "loss": 24.5547, "step": 2950 }, { "epoch": 0.0059793872744094345, "grad_norm": 47.68494415283203, "learning_rate": 5.920000000000001e-07, "loss": 22.6978, "step": 2960 }, { "epoch": 0.00599958790709325, "grad_norm": 537.8870849609375, "learning_rate": 5.94e-07, "loss": 49.1119, "step": 2970 }, { "epoch": 0.006019788539777066, "grad_norm": 487.58251953125, "learning_rate": 5.960000000000001e-07, "loss": 33.8852, "step": 2980 }, { "epoch": 0.0060399891724608815, "grad_norm": 578.7280883789062, "learning_rate": 5.98e-07, "loss": 19.5557, "step": 2990 }, { "epoch": 0.006060189805144697, "grad_norm": 357.2806701660156, "learning_rate": 6.000000000000001e-07, "loss": 13.2956, "step": 3000 }, { "epoch": 0.006080390437828513, "grad_norm": 49.976646423339844, "learning_rate": 6.02e-07, "loss": 23.7729, "step": 3010 }, { "epoch": 0.0061005910705123284, "grad_norm": 1392.4190673828125, "learning_rate": 6.040000000000001e-07, "loss": 27.7141, "step": 3020 }, { "epoch": 0.006120791703196144, "grad_norm": 547.042236328125, "learning_rate": 6.060000000000001e-07, "loss": 22.2183, "step": 3030 }, { "epoch": 0.00614099233587996, "grad_norm": 363.829833984375, "learning_rate": 6.08e-07, "loss": 25.6521, "step": 3040 }, { "epoch": 0.006161192968563775, "grad_norm": 1138.9224853515625, "learning_rate": 6.100000000000001e-07, "loss": 28.1272, "step": 3050 }, { "epoch": 0.006181393601247591, "grad_norm": 388.612548828125, "learning_rate": 6.12e-07, "loss": 25.8512, "step": 3060 }, { "epoch": 0.006201594233931407, "grad_norm": 633.0219116210938, "learning_rate": 6.140000000000001e-07, "loss": 19.1016, "step": 3070 }, { "epoch": 0.006221794866615222, "grad_norm": 0.0, "learning_rate": 6.160000000000001e-07, "loss": 38.8512, "step": 3080 }, { "epoch": 0.006241995499299038, "grad_norm": 579.1844482421875, "learning_rate": 6.180000000000001e-07, "loss": 23.9865, "step": 3090 }, { "epoch": 0.006262196131982854, "grad_norm": 860.4633178710938, "learning_rate": 6.200000000000001e-07, "loss": 20.4271, "step": 3100 }, { "epoch": 0.006282396764666669, "grad_norm": 406.67791748046875, "learning_rate": 6.22e-07, "loss": 32.6324, "step": 3110 }, { "epoch": 0.006302597397350485, "grad_norm": 110.9633560180664, "learning_rate": 6.24e-07, "loss": 14.6998, "step": 3120 }, { "epoch": 0.006322798030034301, "grad_norm": 445.50244140625, "learning_rate": 6.260000000000001e-07, "loss": 16.6768, "step": 3130 }, { "epoch": 0.006342998662718116, "grad_norm": 1032.125244140625, "learning_rate": 6.28e-07, "loss": 37.2087, "step": 3140 }, { "epoch": 0.006363199295401932, "grad_norm": 284.1348876953125, "learning_rate": 6.3e-07, "loss": 25.7215, "step": 3150 }, { "epoch": 0.006383399928085748, "grad_norm": 1127.0638427734375, "learning_rate": 6.320000000000002e-07, "loss": 43.177, "step": 3160 }, { "epoch": 0.006403600560769563, "grad_norm": 858.960205078125, "learning_rate": 6.34e-07, "loss": 29.6178, "step": 3170 }, { "epoch": 0.006423801193453379, "grad_norm": 632.6044921875, "learning_rate": 6.360000000000001e-07, "loss": 24.2948, "step": 3180 }, { "epoch": 0.006444001826137195, "grad_norm": 385.60772705078125, "learning_rate": 6.38e-07, "loss": 29.6091, "step": 3190 }, { "epoch": 0.00646420245882101, "grad_norm": 735.461181640625, "learning_rate": 6.4e-07, "loss": 36.3189, "step": 3200 }, { "epoch": 0.006484403091504826, "grad_norm": 806.14208984375, "learning_rate": 6.42e-07, "loss": 40.9133, "step": 3210 }, { "epoch": 0.006504603724188642, "grad_norm": 197.8910675048828, "learning_rate": 6.44e-07, "loss": 22.794, "step": 3220 }, { "epoch": 0.006524804356872457, "grad_norm": 464.4063720703125, "learning_rate": 6.460000000000001e-07, "loss": 25.8888, "step": 3230 }, { "epoch": 0.006545004989556273, "grad_norm": 897.4555053710938, "learning_rate": 6.48e-07, "loss": 45.3982, "step": 3240 }, { "epoch": 0.006565205622240089, "grad_norm": 1184.5333251953125, "learning_rate": 6.5e-07, "loss": 45.8197, "step": 3250 }, { "epoch": 0.006585406254923904, "grad_norm": 245.31578063964844, "learning_rate": 6.52e-07, "loss": 22.9282, "step": 3260 }, { "epoch": 0.00660560688760772, "grad_norm": 396.6833190917969, "learning_rate": 6.54e-07, "loss": 36.6589, "step": 3270 }, { "epoch": 0.006625807520291536, "grad_norm": 827.7760620117188, "learning_rate": 6.560000000000002e-07, "loss": 27.5275, "step": 3280 }, { "epoch": 0.006646008152975351, "grad_norm": 698.2625122070312, "learning_rate": 6.58e-07, "loss": 43.7663, "step": 3290 }, { "epoch": 0.006666208785659167, "grad_norm": 804.8634033203125, "learning_rate": 6.6e-07, "loss": 42.3979, "step": 3300 }, { "epoch": 0.006686409418342983, "grad_norm": 381.66253662109375, "learning_rate": 6.62e-07, "loss": 28.6512, "step": 3310 }, { "epoch": 0.006706610051026798, "grad_norm": 274.6836242675781, "learning_rate": 6.64e-07, "loss": 13.3558, "step": 3320 }, { "epoch": 0.006726810683710614, "grad_norm": 640.2205200195312, "learning_rate": 6.660000000000002e-07, "loss": 31.3027, "step": 3330 }, { "epoch": 0.0067470113163944296, "grad_norm": 545.2610473632812, "learning_rate": 6.68e-07, "loss": 19.2501, "step": 3340 }, { "epoch": 0.006767211949078245, "grad_norm": 652.1705322265625, "learning_rate": 6.7e-07, "loss": 37.1378, "step": 3350 }, { "epoch": 0.006787412581762061, "grad_norm": 1061.9658203125, "learning_rate": 6.72e-07, "loss": 21.7822, "step": 3360 }, { "epoch": 0.0068076132144458765, "grad_norm": 556.8729248046875, "learning_rate": 6.74e-07, "loss": 24.4347, "step": 3370 }, { "epoch": 0.006827813847129692, "grad_norm": 522.956787109375, "learning_rate": 6.76e-07, "loss": 27.4032, "step": 3380 }, { "epoch": 0.006848014479813508, "grad_norm": 531.7928466796875, "learning_rate": 6.78e-07, "loss": 19.0332, "step": 3390 }, { "epoch": 0.0068682151124973235, "grad_norm": 804.1568603515625, "learning_rate": 6.800000000000001e-07, "loss": 34.7435, "step": 3400 }, { "epoch": 0.006888415745181139, "grad_norm": 1052.230712890625, "learning_rate": 6.82e-07, "loss": 25.6694, "step": 3410 }, { "epoch": 0.006908616377864955, "grad_norm": 526.1654663085938, "learning_rate": 6.84e-07, "loss": 22.8071, "step": 3420 }, { "epoch": 0.0069288170105487705, "grad_norm": 1492.260986328125, "learning_rate": 6.86e-07, "loss": 20.8931, "step": 3430 }, { "epoch": 0.006949017643232586, "grad_norm": 2416.557373046875, "learning_rate": 6.88e-07, "loss": 45.9088, "step": 3440 }, { "epoch": 0.006969218275916402, "grad_norm": 138.57374572753906, "learning_rate": 6.900000000000001e-07, "loss": 20.0898, "step": 3450 }, { "epoch": 0.0069894189086002175, "grad_norm": 517.4945678710938, "learning_rate": 6.92e-07, "loss": 26.9102, "step": 3460 }, { "epoch": 0.007009619541284033, "grad_norm": 426.5979919433594, "learning_rate": 6.94e-07, "loss": 39.5767, "step": 3470 }, { "epoch": 0.007029820173967849, "grad_norm": 208.76681518554688, "learning_rate": 6.96e-07, "loss": 42.304, "step": 3480 }, { "epoch": 0.0070500208066516645, "grad_norm": 478.20050048828125, "learning_rate": 6.98e-07, "loss": 23.9574, "step": 3490 }, { "epoch": 0.00707022143933548, "grad_norm": 1073.7861328125, "learning_rate": 7.000000000000001e-07, "loss": 40.8396, "step": 3500 }, { "epoch": 0.007090422072019296, "grad_norm": 164.99581909179688, "learning_rate": 7.02e-07, "loss": 54.3387, "step": 3510 }, { "epoch": 0.0071106227047031114, "grad_norm": 559.994873046875, "learning_rate": 7.040000000000001e-07, "loss": 23.7497, "step": 3520 }, { "epoch": 0.007130823337386927, "grad_norm": 248.2763671875, "learning_rate": 7.06e-07, "loss": 14.4129, "step": 3530 }, { "epoch": 0.007151023970070743, "grad_norm": 115.21528625488281, "learning_rate": 7.08e-07, "loss": 20.9172, "step": 3540 }, { "epoch": 0.007171224602754558, "grad_norm": 529.1926879882812, "learning_rate": 7.1e-07, "loss": 29.7155, "step": 3550 }, { "epoch": 0.007191425235438374, "grad_norm": 931.08056640625, "learning_rate": 7.12e-07, "loss": 48.6689, "step": 3560 }, { "epoch": 0.00721162586812219, "grad_norm": 325.41485595703125, "learning_rate": 7.140000000000001e-07, "loss": 19.5409, "step": 3570 }, { "epoch": 0.007231826500806005, "grad_norm": 255.26974487304688, "learning_rate": 7.16e-07, "loss": 13.3323, "step": 3580 }, { "epoch": 0.007252027133489821, "grad_norm": 174.78330993652344, "learning_rate": 7.18e-07, "loss": 27.6813, "step": 3590 }, { "epoch": 0.007272227766173637, "grad_norm": 259.9703369140625, "learning_rate": 7.2e-07, "loss": 23.6667, "step": 3600 }, { "epoch": 0.007292428398857452, "grad_norm": 510.34515380859375, "learning_rate": 7.22e-07, "loss": 43.1148, "step": 3610 }, { "epoch": 0.007312629031541268, "grad_norm": 369.45806884765625, "learning_rate": 7.240000000000001e-07, "loss": 18.297, "step": 3620 }, { "epoch": 0.007332829664225084, "grad_norm": 492.9736022949219, "learning_rate": 7.26e-07, "loss": 34.2994, "step": 3630 }, { "epoch": 0.007353030296908899, "grad_norm": 611.5191650390625, "learning_rate": 7.280000000000001e-07, "loss": 40.5437, "step": 3640 }, { "epoch": 0.007373230929592715, "grad_norm": 478.23126220703125, "learning_rate": 7.3e-07, "loss": 42.8498, "step": 3650 }, { "epoch": 0.007393431562276531, "grad_norm": 822.2637329101562, "learning_rate": 7.32e-07, "loss": 42.8805, "step": 3660 }, { "epoch": 0.007413632194960346, "grad_norm": 572.3778686523438, "learning_rate": 7.340000000000001e-07, "loss": 21.5523, "step": 3670 }, { "epoch": 0.007433832827644162, "grad_norm": 610.9832153320312, "learning_rate": 7.36e-07, "loss": 24.501, "step": 3680 }, { "epoch": 0.007454033460327978, "grad_norm": 444.3849792480469, "learning_rate": 7.380000000000001e-07, "loss": 29.0815, "step": 3690 }, { "epoch": 0.007474234093011793, "grad_norm": 334.1302490234375, "learning_rate": 7.4e-07, "loss": 29.4803, "step": 3700 }, { "epoch": 0.007494434725695609, "grad_norm": 484.7538757324219, "learning_rate": 7.420000000000001e-07, "loss": 34.7338, "step": 3710 }, { "epoch": 0.007514635358379425, "grad_norm": 557.7028198242188, "learning_rate": 7.44e-07, "loss": 45.3084, "step": 3720 }, { "epoch": 0.00753483599106324, "grad_norm": 419.6079406738281, "learning_rate": 7.46e-07, "loss": 38.1011, "step": 3730 }, { "epoch": 0.007555036623747056, "grad_norm": 0.0, "learning_rate": 7.480000000000001e-07, "loss": 34.1749, "step": 3740 }, { "epoch": 0.007575237256430872, "grad_norm": 608.0559692382812, "learning_rate": 7.5e-07, "loss": 12.6068, "step": 3750 }, { "epoch": 0.007595437889114687, "grad_norm": 1825.6767578125, "learning_rate": 7.520000000000001e-07, "loss": 36.8878, "step": 3760 }, { "epoch": 0.007615638521798503, "grad_norm": 347.3094787597656, "learning_rate": 7.54e-07, "loss": 26.2681, "step": 3770 }, { "epoch": 0.007635839154482319, "grad_norm": 1999.683837890625, "learning_rate": 7.56e-07, "loss": 40.3283, "step": 3780 }, { "epoch": 0.007656039787166134, "grad_norm": 520.2977905273438, "learning_rate": 7.580000000000001e-07, "loss": 34.4978, "step": 3790 }, { "epoch": 0.00767624041984995, "grad_norm": 457.4718322753906, "learning_rate": 7.6e-07, "loss": 37.1305, "step": 3800 }, { "epoch": 0.007696441052533766, "grad_norm": 241.23533630371094, "learning_rate": 7.620000000000001e-07, "loss": 34.1395, "step": 3810 }, { "epoch": 0.007716641685217581, "grad_norm": 1090.16943359375, "learning_rate": 7.64e-07, "loss": 32.3401, "step": 3820 }, { "epoch": 0.007736842317901397, "grad_norm": 799.810546875, "learning_rate": 7.660000000000001e-07, "loss": 18.844, "step": 3830 }, { "epoch": 0.0077570429505852126, "grad_norm": 513.3098754882812, "learning_rate": 7.68e-07, "loss": 56.5756, "step": 3840 }, { "epoch": 0.007777243583269028, "grad_norm": 412.8589172363281, "learning_rate": 7.7e-07, "loss": 28.4811, "step": 3850 }, { "epoch": 0.007797444215952844, "grad_norm": 818.9476318359375, "learning_rate": 7.720000000000001e-07, "loss": 23.9097, "step": 3860 }, { "epoch": 0.00781764484863666, "grad_norm": 660.37158203125, "learning_rate": 7.74e-07, "loss": 33.1159, "step": 3870 }, { "epoch": 0.007837845481320474, "grad_norm": 554.0394287109375, "learning_rate": 7.760000000000001e-07, "loss": 41.6907, "step": 3880 }, { "epoch": 0.00785804611400429, "grad_norm": 609.9306640625, "learning_rate": 7.78e-07, "loss": 23.2773, "step": 3890 }, { "epoch": 0.007878246746688106, "grad_norm": 509.7363586425781, "learning_rate": 7.8e-07, "loss": 27.8946, "step": 3900 }, { "epoch": 0.007898447379371922, "grad_norm": 617.0547485351562, "learning_rate": 7.820000000000001e-07, "loss": 40.5662, "step": 3910 }, { "epoch": 0.007918648012055737, "grad_norm": 45.31270217895508, "learning_rate": 7.84e-07, "loss": 19.5, "step": 3920 }, { "epoch": 0.007938848644739553, "grad_norm": 936.0264282226562, "learning_rate": 7.860000000000001e-07, "loss": 31.398, "step": 3930 }, { "epoch": 0.007959049277423368, "grad_norm": 4046.48974609375, "learning_rate": 7.88e-07, "loss": 61.3853, "step": 3940 }, { "epoch": 0.007979249910107185, "grad_norm": 678.829345703125, "learning_rate": 7.900000000000001e-07, "loss": 52.0202, "step": 3950 }, { "epoch": 0.007999450542791, "grad_norm": 174.16665649414062, "learning_rate": 7.920000000000001e-07, "loss": 18.2646, "step": 3960 }, { "epoch": 0.008019651175474816, "grad_norm": 631.7503051757812, "learning_rate": 7.94e-07, "loss": 22.1172, "step": 3970 }, { "epoch": 0.008039851808158631, "grad_norm": 863.083984375, "learning_rate": 7.960000000000001e-07, "loss": 31.1579, "step": 3980 }, { "epoch": 0.008060052440842447, "grad_norm": 395.8502197265625, "learning_rate": 7.98e-07, "loss": 36.5626, "step": 3990 }, { "epoch": 0.008080253073526262, "grad_norm": 551.8558349609375, "learning_rate": 8.000000000000001e-07, "loss": 27.3743, "step": 4000 }, { "epoch": 0.008100453706210079, "grad_norm": 350.1429138183594, "learning_rate": 8.02e-07, "loss": 27.0405, "step": 4010 }, { "epoch": 0.008120654338893894, "grad_norm": 1742.940185546875, "learning_rate": 8.04e-07, "loss": 44.1298, "step": 4020 }, { "epoch": 0.00814085497157771, "grad_norm": 773.5975341796875, "learning_rate": 8.060000000000001e-07, "loss": 22.7084, "step": 4030 }, { "epoch": 0.008161055604261525, "grad_norm": 996.5304565429688, "learning_rate": 8.08e-07, "loss": 40.6753, "step": 4040 }, { "epoch": 0.008181256236945341, "grad_norm": 671.2755126953125, "learning_rate": 8.100000000000001e-07, "loss": 39.0983, "step": 4050 }, { "epoch": 0.008201456869629156, "grad_norm": 350.6563415527344, "learning_rate": 8.12e-07, "loss": 30.7569, "step": 4060 }, { "epoch": 0.008221657502312973, "grad_norm": 1149.395263671875, "learning_rate": 8.140000000000001e-07, "loss": 28.9356, "step": 4070 }, { "epoch": 0.008241858134996788, "grad_norm": 990.6906127929688, "learning_rate": 8.160000000000001e-07, "loss": 39.5627, "step": 4080 }, { "epoch": 0.008262058767680604, "grad_norm": 364.4676208496094, "learning_rate": 8.18e-07, "loss": 14.7625, "step": 4090 }, { "epoch": 0.008282259400364419, "grad_norm": 811.5744018554688, "learning_rate": 8.200000000000001e-07, "loss": 22.5022, "step": 4100 }, { "epoch": 0.008302460033048235, "grad_norm": 582.2328491210938, "learning_rate": 8.22e-07, "loss": 39.7949, "step": 4110 }, { "epoch": 0.00832266066573205, "grad_norm": 1127.8995361328125, "learning_rate": 8.240000000000001e-07, "loss": 37.47, "step": 4120 }, { "epoch": 0.008342861298415867, "grad_norm": 502.9598693847656, "learning_rate": 8.260000000000001e-07, "loss": 50.2378, "step": 4130 }, { "epoch": 0.008363061931099681, "grad_norm": 630.9242553710938, "learning_rate": 8.280000000000001e-07, "loss": 25.1314, "step": 4140 }, { "epoch": 0.008383262563783498, "grad_norm": 632.32958984375, "learning_rate": 8.300000000000001e-07, "loss": 42.2978, "step": 4150 }, { "epoch": 0.008403463196467313, "grad_norm": 513.2865600585938, "learning_rate": 8.32e-07, "loss": 22.9661, "step": 4160 }, { "epoch": 0.00842366382915113, "grad_norm": 437.45941162109375, "learning_rate": 8.340000000000001e-07, "loss": 24.9955, "step": 4170 }, { "epoch": 0.008443864461834944, "grad_norm": 732.6044311523438, "learning_rate": 8.36e-07, "loss": 37.1115, "step": 4180 }, { "epoch": 0.00846406509451876, "grad_norm": 270.413330078125, "learning_rate": 8.380000000000001e-07, "loss": 20.6703, "step": 4190 }, { "epoch": 0.008484265727202575, "grad_norm": 8.429944038391113, "learning_rate": 8.400000000000001e-07, "loss": 25.673, "step": 4200 }, { "epoch": 0.008504466359886392, "grad_norm": 555.270751953125, "learning_rate": 8.42e-07, "loss": 22.8865, "step": 4210 }, { "epoch": 0.008524666992570207, "grad_norm": 638.0531005859375, "learning_rate": 8.440000000000001e-07, "loss": 21.5478, "step": 4220 }, { "epoch": 0.008544867625254023, "grad_norm": 1256.038818359375, "learning_rate": 8.46e-07, "loss": 41.5287, "step": 4230 }, { "epoch": 0.008565068257937838, "grad_norm": 414.123046875, "learning_rate": 8.480000000000001e-07, "loss": 19.9983, "step": 4240 }, { "epoch": 0.008585268890621655, "grad_norm": 594.0344848632812, "learning_rate": 8.500000000000001e-07, "loss": 33.3682, "step": 4250 }, { "epoch": 0.00860546952330547, "grad_norm": 580.2907104492188, "learning_rate": 8.520000000000001e-07, "loss": 26.1064, "step": 4260 }, { "epoch": 0.008625670155989286, "grad_norm": 910.2868041992188, "learning_rate": 8.540000000000001e-07, "loss": 25.9703, "step": 4270 }, { "epoch": 0.0086458707886731, "grad_norm": 401.0901184082031, "learning_rate": 8.56e-07, "loss": 31.2655, "step": 4280 }, { "epoch": 0.008666071421356917, "grad_norm": 586.83447265625, "learning_rate": 8.580000000000001e-07, "loss": 22.6068, "step": 4290 }, { "epoch": 0.008686272054040732, "grad_norm": 384.7112731933594, "learning_rate": 8.6e-07, "loss": 20.9198, "step": 4300 }, { "epoch": 0.008706472686724549, "grad_norm": 861.9733276367188, "learning_rate": 8.620000000000001e-07, "loss": 30.633, "step": 4310 }, { "epoch": 0.008726673319408363, "grad_norm": 508.044189453125, "learning_rate": 8.640000000000001e-07, "loss": 37.4199, "step": 4320 }, { "epoch": 0.00874687395209218, "grad_norm": 757.0943603515625, "learning_rate": 8.66e-07, "loss": 31.3223, "step": 4330 }, { "epoch": 0.008767074584775995, "grad_norm": 481.5451965332031, "learning_rate": 8.680000000000001e-07, "loss": 30.192, "step": 4340 }, { "epoch": 0.008787275217459811, "grad_norm": 1465.626220703125, "learning_rate": 8.7e-07, "loss": 36.7876, "step": 4350 }, { "epoch": 0.008807475850143626, "grad_norm": 537.8186645507812, "learning_rate": 8.720000000000001e-07, "loss": 18.696, "step": 4360 }, { "epoch": 0.008827676482827443, "grad_norm": 421.6712951660156, "learning_rate": 8.740000000000001e-07, "loss": 16.2779, "step": 4370 }, { "epoch": 0.008847877115511257, "grad_norm": 625.7432861328125, "learning_rate": 8.760000000000001e-07, "loss": 23.2634, "step": 4380 }, { "epoch": 0.008868077748195074, "grad_norm": 630.1290893554688, "learning_rate": 8.780000000000001e-07, "loss": 22.0466, "step": 4390 }, { "epoch": 0.008888278380878889, "grad_norm": 19.57767105102539, "learning_rate": 8.8e-07, "loss": 32.943, "step": 4400 }, { "epoch": 0.008908479013562705, "grad_norm": 563.815185546875, "learning_rate": 8.820000000000001e-07, "loss": 20.2435, "step": 4410 }, { "epoch": 0.00892867964624652, "grad_norm": 565.490234375, "learning_rate": 8.840000000000001e-07, "loss": 30.1036, "step": 4420 }, { "epoch": 0.008948880278930336, "grad_norm": 553.708984375, "learning_rate": 8.860000000000001e-07, "loss": 24.4606, "step": 4430 }, { "epoch": 0.008969080911614151, "grad_norm": 433.3461608886719, "learning_rate": 8.880000000000001e-07, "loss": 24.9225, "step": 4440 }, { "epoch": 0.008989281544297968, "grad_norm": 939.7255859375, "learning_rate": 8.900000000000001e-07, "loss": 32.4033, "step": 4450 }, { "epoch": 0.009009482176981783, "grad_norm": 207.4413604736328, "learning_rate": 8.920000000000001e-07, "loss": 38.8472, "step": 4460 }, { "epoch": 0.0090296828096656, "grad_norm": 1215.0419921875, "learning_rate": 8.94e-07, "loss": 41.1395, "step": 4470 }, { "epoch": 0.009049883442349414, "grad_norm": 761.4100341796875, "learning_rate": 8.960000000000001e-07, "loss": 31.6368, "step": 4480 }, { "epoch": 0.00907008407503323, "grad_norm": 815.1241455078125, "learning_rate": 8.980000000000001e-07, "loss": 25.794, "step": 4490 }, { "epoch": 0.009090284707717045, "grad_norm": 664.5227661132812, "learning_rate": 9.000000000000001e-07, "loss": 26.1395, "step": 4500 }, { "epoch": 0.009110485340400862, "grad_norm": 621.558349609375, "learning_rate": 9.020000000000001e-07, "loss": 20.145, "step": 4510 }, { "epoch": 0.009130685973084677, "grad_norm": 1977.96240234375, "learning_rate": 9.04e-07, "loss": 31.9802, "step": 4520 }, { "epoch": 0.009150886605768493, "grad_norm": 638.37109375, "learning_rate": 9.060000000000001e-07, "loss": 20.8006, "step": 4530 }, { "epoch": 0.009171087238452308, "grad_norm": 1067.8172607421875, "learning_rate": 9.080000000000001e-07, "loss": 55.018, "step": 4540 }, { "epoch": 0.009191287871136124, "grad_norm": 1054.7120361328125, "learning_rate": 9.100000000000001e-07, "loss": 28.5558, "step": 4550 }, { "epoch": 0.00921148850381994, "grad_norm": 297.5315856933594, "learning_rate": 9.120000000000001e-07, "loss": 29.3597, "step": 4560 }, { "epoch": 0.009231689136503756, "grad_norm": 683.9236450195312, "learning_rate": 9.140000000000001e-07, "loss": 20.7864, "step": 4570 }, { "epoch": 0.00925188976918757, "grad_norm": 822.9036865234375, "learning_rate": 9.160000000000001e-07, "loss": 26.9667, "step": 4580 }, { "epoch": 0.009272090401871387, "grad_norm": 630.2400512695312, "learning_rate": 9.180000000000001e-07, "loss": 12.3359, "step": 4590 }, { "epoch": 0.009292291034555202, "grad_norm": 608.8886108398438, "learning_rate": 9.200000000000001e-07, "loss": 23.5769, "step": 4600 }, { "epoch": 0.009312491667239018, "grad_norm": 827.4570922851562, "learning_rate": 9.220000000000001e-07, "loss": 18.0923, "step": 4610 }, { "epoch": 0.009332692299922833, "grad_norm": 412.2288818359375, "learning_rate": 9.240000000000001e-07, "loss": 38.5852, "step": 4620 }, { "epoch": 0.00935289293260665, "grad_norm": 950.310302734375, "learning_rate": 9.260000000000001e-07, "loss": 28.0527, "step": 4630 }, { "epoch": 0.009373093565290464, "grad_norm": 442.42523193359375, "learning_rate": 9.28e-07, "loss": 20.4062, "step": 4640 }, { "epoch": 0.009393294197974281, "grad_norm": 524.7567749023438, "learning_rate": 9.300000000000001e-07, "loss": 27.0559, "step": 4650 }, { "epoch": 0.009413494830658096, "grad_norm": 602.6281127929688, "learning_rate": 9.320000000000001e-07, "loss": 26.432, "step": 4660 }, { "epoch": 0.009433695463341912, "grad_norm": 1005.23974609375, "learning_rate": 9.340000000000001e-07, "loss": 30.3101, "step": 4670 }, { "epoch": 0.009453896096025727, "grad_norm": 511.9738464355469, "learning_rate": 9.360000000000001e-07, "loss": 20.6791, "step": 4680 }, { "epoch": 0.009474096728709544, "grad_norm": 503.60693359375, "learning_rate": 9.380000000000001e-07, "loss": 26.5743, "step": 4690 }, { "epoch": 0.009494297361393358, "grad_norm": 674.8211059570312, "learning_rate": 9.400000000000001e-07, "loss": 31.4228, "step": 4700 }, { "epoch": 0.009514497994077175, "grad_norm": 473.37396240234375, "learning_rate": 9.420000000000002e-07, "loss": 44.0672, "step": 4710 }, { "epoch": 0.00953469862676099, "grad_norm": 831.3383178710938, "learning_rate": 9.440000000000001e-07, "loss": 47.9142, "step": 4720 }, { "epoch": 0.009554899259444806, "grad_norm": 2031.18798828125, "learning_rate": 9.460000000000001e-07, "loss": 43.8193, "step": 4730 }, { "epoch": 0.009575099892128621, "grad_norm": 1036.27978515625, "learning_rate": 9.480000000000001e-07, "loss": 19.8682, "step": 4740 }, { "epoch": 0.009595300524812438, "grad_norm": 367.123046875, "learning_rate": 9.500000000000001e-07, "loss": 19.759, "step": 4750 }, { "epoch": 0.009615501157496252, "grad_norm": 1182.7408447265625, "learning_rate": 9.520000000000002e-07, "loss": 29.7404, "step": 4760 }, { "epoch": 0.009635701790180069, "grad_norm": 326.5006408691406, "learning_rate": 9.54e-07, "loss": 62.1775, "step": 4770 }, { "epoch": 0.009655902422863884, "grad_norm": 206.4904022216797, "learning_rate": 9.56e-07, "loss": 20.3732, "step": 4780 }, { "epoch": 0.0096761030555477, "grad_norm": 148.8917999267578, "learning_rate": 9.58e-07, "loss": 32.3255, "step": 4790 }, { "epoch": 0.009696303688231515, "grad_norm": 392.4678649902344, "learning_rate": 9.600000000000001e-07, "loss": 36.1733, "step": 4800 }, { "epoch": 0.009716504320915332, "grad_norm": 1750.965087890625, "learning_rate": 9.62e-07, "loss": 28.3648, "step": 4810 }, { "epoch": 0.009736704953599146, "grad_norm": 149.6742706298828, "learning_rate": 9.64e-07, "loss": 16.249, "step": 4820 }, { "epoch": 0.009756905586282963, "grad_norm": 268.1315002441406, "learning_rate": 9.660000000000002e-07, "loss": 24.6534, "step": 4830 }, { "epoch": 0.009777106218966778, "grad_norm": 312.3633728027344, "learning_rate": 9.68e-07, "loss": 19.7529, "step": 4840 }, { "epoch": 0.009797306851650594, "grad_norm": 335.4103698730469, "learning_rate": 9.7e-07, "loss": 25.3802, "step": 4850 }, { "epoch": 0.009817507484334409, "grad_norm": 588.1651000976562, "learning_rate": 9.72e-07, "loss": 37.7548, "step": 4860 }, { "epoch": 0.009837708117018226, "grad_norm": 1084.7984619140625, "learning_rate": 9.740000000000001e-07, "loss": 28.1253, "step": 4870 }, { "epoch": 0.00985790874970204, "grad_norm": 841.3825073242188, "learning_rate": 9.76e-07, "loss": 17.4928, "step": 4880 }, { "epoch": 0.009878109382385857, "grad_norm": 258.460205078125, "learning_rate": 9.78e-07, "loss": 26.3879, "step": 4890 }, { "epoch": 0.009898310015069672, "grad_norm": 299.3013916015625, "learning_rate": 9.800000000000001e-07, "loss": 13.678, "step": 4900 }, { "epoch": 0.009918510647753488, "grad_norm": 191.6232452392578, "learning_rate": 9.82e-07, "loss": 21.8842, "step": 4910 }, { "epoch": 0.009938711280437303, "grad_norm": 565.6702270507812, "learning_rate": 9.84e-07, "loss": 11.0409, "step": 4920 }, { "epoch": 0.00995891191312112, "grad_norm": 291.95526123046875, "learning_rate": 9.86e-07, "loss": 36.9195, "step": 4930 }, { "epoch": 0.009979112545804934, "grad_norm": 428.879150390625, "learning_rate": 9.880000000000001e-07, "loss": 26.7375, "step": 4940 }, { "epoch": 0.00999931317848875, "grad_norm": 177.6942138671875, "learning_rate": 9.9e-07, "loss": 26.29, "step": 4950 }, { "epoch": 0.010019513811172566, "grad_norm": 662.8630981445312, "learning_rate": 9.92e-07, "loss": 25.1648, "step": 4960 }, { "epoch": 0.010039714443856382, "grad_norm": 489.55889892578125, "learning_rate": 9.940000000000001e-07, "loss": 41.3737, "step": 4970 }, { "epoch": 0.010059915076540197, "grad_norm": 350.4941101074219, "learning_rate": 9.96e-07, "loss": 42.1294, "step": 4980 }, { "epoch": 0.010080115709224013, "grad_norm": 320.653076171875, "learning_rate": 9.98e-07, "loss": 26.9559, "step": 4990 }, { "epoch": 0.010100316341907828, "grad_norm": 231.248046875, "learning_rate": 1.0000000000000002e-06, "loss": 6.5142, "step": 5000 }, { "epoch": 0.010120516974591645, "grad_norm": 521.7716674804688, "learning_rate": 1.002e-06, "loss": 31.8698, "step": 5010 }, { "epoch": 0.01014071760727546, "grad_norm": 800.10888671875, "learning_rate": 1.004e-06, "loss": 28.1427, "step": 5020 }, { "epoch": 0.010160918239959276, "grad_norm": 420.4501037597656, "learning_rate": 1.006e-06, "loss": 28.2438, "step": 5030 }, { "epoch": 0.010181118872643091, "grad_norm": 480.6717224121094, "learning_rate": 1.0080000000000001e-06, "loss": 20.2761, "step": 5040 }, { "epoch": 0.010201319505326907, "grad_norm": 1004.8648681640625, "learning_rate": 1.01e-06, "loss": 22.479, "step": 5050 }, { "epoch": 0.010221520138010722, "grad_norm": 591.0374755859375, "learning_rate": 1.012e-06, "loss": 12.5654, "step": 5060 }, { "epoch": 0.010241720770694539, "grad_norm": 921.0972290039062, "learning_rate": 1.0140000000000002e-06, "loss": 28.9, "step": 5070 }, { "epoch": 0.010261921403378354, "grad_norm": 537.0875244140625, "learning_rate": 1.016e-06, "loss": 14.163, "step": 5080 }, { "epoch": 0.01028212203606217, "grad_norm": 672.0023193359375, "learning_rate": 1.018e-06, "loss": 40.8729, "step": 5090 }, { "epoch": 0.010302322668745985, "grad_norm": 761.9652709960938, "learning_rate": 1.02e-06, "loss": 29.6747, "step": 5100 }, { "epoch": 0.010322523301429801, "grad_norm": 934.7273559570312, "learning_rate": 1.0220000000000001e-06, "loss": 23.6142, "step": 5110 }, { "epoch": 0.010342723934113616, "grad_norm": 364.7134704589844, "learning_rate": 1.024e-06, "loss": 29.4059, "step": 5120 }, { "epoch": 0.010362924566797433, "grad_norm": 662.9189453125, "learning_rate": 1.026e-06, "loss": 25.676, "step": 5130 }, { "epoch": 0.010383125199481247, "grad_norm": 1470.47802734375, "learning_rate": 1.0280000000000002e-06, "loss": 58.6466, "step": 5140 }, { "epoch": 0.010403325832165064, "grad_norm": 1297.0006103515625, "learning_rate": 1.03e-06, "loss": 34.2537, "step": 5150 }, { "epoch": 0.010423526464848879, "grad_norm": 546.3383178710938, "learning_rate": 1.032e-06, "loss": 18.1663, "step": 5160 }, { "epoch": 0.010443727097532695, "grad_norm": 1221.919189453125, "learning_rate": 1.0340000000000002e-06, "loss": 27.4059, "step": 5170 }, { "epoch": 0.01046392773021651, "grad_norm": 460.9737243652344, "learning_rate": 1.0360000000000001e-06, "loss": 16.2116, "step": 5180 }, { "epoch": 0.010484128362900327, "grad_norm": 413.9505920410156, "learning_rate": 1.038e-06, "loss": 21.2082, "step": 5190 }, { "epoch": 0.010504328995584141, "grad_norm": 227.77557373046875, "learning_rate": 1.04e-06, "loss": 27.9558, "step": 5200 }, { "epoch": 0.010524529628267958, "grad_norm": 606.6741943359375, "learning_rate": 1.0420000000000001e-06, "loss": 20.9653, "step": 5210 }, { "epoch": 0.010544730260951773, "grad_norm": 541.234619140625, "learning_rate": 1.044e-06, "loss": 36.7112, "step": 5220 }, { "epoch": 0.01056493089363559, "grad_norm": 145.1033477783203, "learning_rate": 1.046e-06, "loss": 30.0167, "step": 5230 }, { "epoch": 0.010585131526319404, "grad_norm": 373.5177917480469, "learning_rate": 1.0480000000000002e-06, "loss": 31.7274, "step": 5240 }, { "epoch": 0.01060533215900322, "grad_norm": 563.7691650390625, "learning_rate": 1.0500000000000001e-06, "loss": 30.9538, "step": 5250 }, { "epoch": 0.010625532791687035, "grad_norm": 170.43959045410156, "learning_rate": 1.052e-06, "loss": 44.6468, "step": 5260 }, { "epoch": 0.010645733424370852, "grad_norm": 695.9778442382812, "learning_rate": 1.054e-06, "loss": 42.9755, "step": 5270 }, { "epoch": 0.010665934057054667, "grad_norm": 390.760009765625, "learning_rate": 1.0560000000000001e-06, "loss": 28.6079, "step": 5280 }, { "epoch": 0.010686134689738483, "grad_norm": 483.7234191894531, "learning_rate": 1.058e-06, "loss": 22.693, "step": 5290 }, { "epoch": 0.010706335322422298, "grad_norm": 694.895263671875, "learning_rate": 1.06e-06, "loss": 31.7093, "step": 5300 }, { "epoch": 0.010726535955106115, "grad_norm": 0.03282935544848442, "learning_rate": 1.0620000000000002e-06, "loss": 29.5697, "step": 5310 }, { "epoch": 0.01074673658778993, "grad_norm": 197.685546875, "learning_rate": 1.064e-06, "loss": 25.7651, "step": 5320 }, { "epoch": 0.010766937220473746, "grad_norm": 692.8652954101562, "learning_rate": 1.066e-06, "loss": 28.2902, "step": 5330 }, { "epoch": 0.01078713785315756, "grad_norm": 295.2193603515625, "learning_rate": 1.0680000000000002e-06, "loss": 35.8585, "step": 5340 }, { "epoch": 0.010807338485841377, "grad_norm": 179.0888214111328, "learning_rate": 1.0700000000000001e-06, "loss": 13.3468, "step": 5350 }, { "epoch": 0.010827539118525192, "grad_norm": 369.4256896972656, "learning_rate": 1.072e-06, "loss": 42.9108, "step": 5360 }, { "epoch": 0.010847739751209009, "grad_norm": 380.3387756347656, "learning_rate": 1.074e-06, "loss": 35.2701, "step": 5370 }, { "epoch": 0.010867940383892823, "grad_norm": 735.1011962890625, "learning_rate": 1.0760000000000002e-06, "loss": 37.2414, "step": 5380 }, { "epoch": 0.01088814101657664, "grad_norm": 459.38720703125, "learning_rate": 1.078e-06, "loss": 36.9988, "step": 5390 }, { "epoch": 0.010908341649260455, "grad_norm": 1027.1058349609375, "learning_rate": 1.08e-06, "loss": 22.0428, "step": 5400 }, { "epoch": 0.010928542281944271, "grad_norm": 2424.2099609375, "learning_rate": 1.0820000000000002e-06, "loss": 31.5498, "step": 5410 }, { "epoch": 0.010948742914628086, "grad_norm": 194.22132873535156, "learning_rate": 1.0840000000000001e-06, "loss": 73.3618, "step": 5420 }, { "epoch": 0.010968943547311902, "grad_norm": 756.6248168945312, "learning_rate": 1.086e-06, "loss": 74.6274, "step": 5430 }, { "epoch": 0.010989144179995717, "grad_norm": 472.0841369628906, "learning_rate": 1.088e-06, "loss": 22.9419, "step": 5440 }, { "epoch": 0.011009344812679534, "grad_norm": 600.0186157226562, "learning_rate": 1.0900000000000002e-06, "loss": 25.0131, "step": 5450 }, { "epoch": 0.011029545445363349, "grad_norm": 305.5976257324219, "learning_rate": 1.092e-06, "loss": 31.3711, "step": 5460 }, { "epoch": 0.011049746078047165, "grad_norm": 108.77481842041016, "learning_rate": 1.094e-06, "loss": 17.3308, "step": 5470 }, { "epoch": 0.01106994671073098, "grad_norm": 1116.296142578125, "learning_rate": 1.0960000000000002e-06, "loss": 37.9978, "step": 5480 }, { "epoch": 0.011090147343414796, "grad_norm": 390.4914245605469, "learning_rate": 1.0980000000000001e-06, "loss": 14.3431, "step": 5490 }, { "epoch": 0.011110347976098611, "grad_norm": 598.1354370117188, "learning_rate": 1.1e-06, "loss": 30.984, "step": 5500 }, { "epoch": 0.011130548608782428, "grad_norm": 711.5731201171875, "learning_rate": 1.1020000000000002e-06, "loss": 30.1973, "step": 5510 }, { "epoch": 0.011150749241466243, "grad_norm": 371.409423828125, "learning_rate": 1.1040000000000001e-06, "loss": 24.4026, "step": 5520 }, { "epoch": 0.011170949874150059, "grad_norm": 665.7667846679688, "learning_rate": 1.106e-06, "loss": 19.5368, "step": 5530 }, { "epoch": 0.011191150506833874, "grad_norm": 502.86566162109375, "learning_rate": 1.108e-06, "loss": 18.791, "step": 5540 }, { "epoch": 0.01121135113951769, "grad_norm": 404.4401550292969, "learning_rate": 1.1100000000000002e-06, "loss": 20.3444, "step": 5550 }, { "epoch": 0.011231551772201505, "grad_norm": 323.6523132324219, "learning_rate": 1.1120000000000001e-06, "loss": 36.3596, "step": 5560 }, { "epoch": 0.011251752404885322, "grad_norm": 743.5354614257812, "learning_rate": 1.114e-06, "loss": 29.4033, "step": 5570 }, { "epoch": 0.011271953037569137, "grad_norm": 281.8879699707031, "learning_rate": 1.1160000000000002e-06, "loss": 23.6649, "step": 5580 }, { "epoch": 0.011292153670252953, "grad_norm": 812.1799926757812, "learning_rate": 1.1180000000000001e-06, "loss": 34.1818, "step": 5590 }, { "epoch": 0.011312354302936768, "grad_norm": 148.88833618164062, "learning_rate": 1.12e-06, "loss": 28.1046, "step": 5600 }, { "epoch": 0.011332554935620584, "grad_norm": 471.3416748046875, "learning_rate": 1.122e-06, "loss": 24.278, "step": 5610 }, { "epoch": 0.0113527555683044, "grad_norm": 353.2952575683594, "learning_rate": 1.1240000000000002e-06, "loss": 30.7999, "step": 5620 }, { "epoch": 0.011372956200988216, "grad_norm": 585.4158325195312, "learning_rate": 1.126e-06, "loss": 16.3788, "step": 5630 }, { "epoch": 0.01139315683367203, "grad_norm": 629.95703125, "learning_rate": 1.128e-06, "loss": 22.8701, "step": 5640 }, { "epoch": 0.011413357466355847, "grad_norm": 1085.2030029296875, "learning_rate": 1.1300000000000002e-06, "loss": 31.6779, "step": 5650 }, { "epoch": 0.011433558099039662, "grad_norm": 1200.4349365234375, "learning_rate": 1.1320000000000001e-06, "loss": 26.5144, "step": 5660 }, { "epoch": 0.011453758731723478, "grad_norm": 869.242919921875, "learning_rate": 1.134e-06, "loss": 26.5842, "step": 5670 }, { "epoch": 0.011473959364407293, "grad_norm": 107.99213409423828, "learning_rate": 1.1360000000000002e-06, "loss": 27.156, "step": 5680 }, { "epoch": 0.01149415999709111, "grad_norm": 908.802734375, "learning_rate": 1.1380000000000002e-06, "loss": 34.6566, "step": 5690 }, { "epoch": 0.011514360629774924, "grad_norm": 497.6568298339844, "learning_rate": 1.14e-06, "loss": 21.8019, "step": 5700 }, { "epoch": 0.011534561262458741, "grad_norm": 293.9489440917969, "learning_rate": 1.142e-06, "loss": 22.4049, "step": 5710 }, { "epoch": 0.011554761895142556, "grad_norm": 295.17230224609375, "learning_rate": 1.1440000000000002e-06, "loss": 37.8035, "step": 5720 }, { "epoch": 0.011574962527826372, "grad_norm": 475.5271301269531, "learning_rate": 1.1460000000000001e-06, "loss": 24.764, "step": 5730 }, { "epoch": 0.011595163160510187, "grad_norm": 1417.2218017578125, "learning_rate": 1.148e-06, "loss": 31.2254, "step": 5740 }, { "epoch": 0.011615363793194004, "grad_norm": 271.8209533691406, "learning_rate": 1.1500000000000002e-06, "loss": 14.2182, "step": 5750 }, { "epoch": 0.011635564425877818, "grad_norm": 612.4166259765625, "learning_rate": 1.1520000000000002e-06, "loss": 18.7975, "step": 5760 }, { "epoch": 0.011655765058561635, "grad_norm": 314.46990966796875, "learning_rate": 1.154e-06, "loss": 19.6362, "step": 5770 }, { "epoch": 0.01167596569124545, "grad_norm": 627.936767578125, "learning_rate": 1.156e-06, "loss": 38.5957, "step": 5780 }, { "epoch": 0.011696166323929266, "grad_norm": 1000.9813842773438, "learning_rate": 1.1580000000000002e-06, "loss": 34.4684, "step": 5790 }, { "epoch": 0.011716366956613081, "grad_norm": 637.470703125, "learning_rate": 1.1600000000000001e-06, "loss": 34.59, "step": 5800 }, { "epoch": 0.011736567589296898, "grad_norm": 750.3966674804688, "learning_rate": 1.162e-06, "loss": 44.1128, "step": 5810 }, { "epoch": 0.011756768221980712, "grad_norm": 1139.5133056640625, "learning_rate": 1.1640000000000002e-06, "loss": 23.7449, "step": 5820 }, { "epoch": 0.011776968854664529, "grad_norm": 370.25567626953125, "learning_rate": 1.1660000000000001e-06, "loss": 17.9951, "step": 5830 }, { "epoch": 0.011797169487348344, "grad_norm": 194.2151336669922, "learning_rate": 1.168e-06, "loss": 23.3352, "step": 5840 }, { "epoch": 0.01181737012003216, "grad_norm": 812.2650756835938, "learning_rate": 1.1700000000000002e-06, "loss": 21.2374, "step": 5850 }, { "epoch": 0.011837570752715975, "grad_norm": 422.30401611328125, "learning_rate": 1.1720000000000002e-06, "loss": 19.3065, "step": 5860 }, { "epoch": 0.011857771385399792, "grad_norm": 1042.8720703125, "learning_rate": 1.1740000000000001e-06, "loss": 38.1709, "step": 5870 }, { "epoch": 0.011877972018083606, "grad_norm": 463.48712158203125, "learning_rate": 1.176e-06, "loss": 20.1112, "step": 5880 }, { "epoch": 0.011898172650767423, "grad_norm": 251.95362854003906, "learning_rate": 1.1780000000000002e-06, "loss": 19.2058, "step": 5890 }, { "epoch": 0.011918373283451238, "grad_norm": 644.2528076171875, "learning_rate": 1.1800000000000001e-06, "loss": 55.8893, "step": 5900 }, { "epoch": 0.011938573916135054, "grad_norm": 546.8013305664062, "learning_rate": 1.182e-06, "loss": 65.9524, "step": 5910 }, { "epoch": 0.011958774548818869, "grad_norm": 649.1343994140625, "learning_rate": 1.1840000000000002e-06, "loss": 27.965, "step": 5920 }, { "epoch": 0.011978975181502685, "grad_norm": 526.4266357421875, "learning_rate": 1.1860000000000002e-06, "loss": 34.2897, "step": 5930 }, { "epoch": 0.0119991758141865, "grad_norm": 1568.1280517578125, "learning_rate": 1.188e-06, "loss": 35.1816, "step": 5940 }, { "epoch": 0.012019376446870317, "grad_norm": 318.4168395996094, "learning_rate": 1.19e-06, "loss": 22.1523, "step": 5950 }, { "epoch": 0.012039577079554132, "grad_norm": 402.7890930175781, "learning_rate": 1.1920000000000002e-06, "loss": 33.6881, "step": 5960 }, { "epoch": 0.012059777712237948, "grad_norm": 497.604736328125, "learning_rate": 1.1940000000000001e-06, "loss": 32.794, "step": 5970 }, { "epoch": 0.012079978344921763, "grad_norm": 444.6767883300781, "learning_rate": 1.196e-06, "loss": 37.6437, "step": 5980 }, { "epoch": 0.01210017897760558, "grad_norm": 863.6239624023438, "learning_rate": 1.1980000000000002e-06, "loss": 26.2152, "step": 5990 }, { "epoch": 0.012120379610289394, "grad_norm": 592.9383544921875, "learning_rate": 1.2000000000000002e-06, "loss": 14.3755, "step": 6000 }, { "epoch": 0.01214058024297321, "grad_norm": 1376.8350830078125, "learning_rate": 1.202e-06, "loss": 36.9957, "step": 6010 }, { "epoch": 0.012160780875657026, "grad_norm": 894.4027709960938, "learning_rate": 1.204e-06, "loss": 25.3654, "step": 6020 }, { "epoch": 0.012180981508340842, "grad_norm": 587.0653686523438, "learning_rate": 1.2060000000000002e-06, "loss": 29.8984, "step": 6030 }, { "epoch": 0.012201182141024657, "grad_norm": 121.9262466430664, "learning_rate": 1.2080000000000001e-06, "loss": 20.7306, "step": 6040 }, { "epoch": 0.012221382773708473, "grad_norm": 488.919677734375, "learning_rate": 1.21e-06, "loss": 27.6776, "step": 6050 }, { "epoch": 0.012241583406392288, "grad_norm": 461.9215393066406, "learning_rate": 1.2120000000000002e-06, "loss": 29.0396, "step": 6060 }, { "epoch": 0.012261784039076105, "grad_norm": 399.4093322753906, "learning_rate": 1.214e-06, "loss": 60.5062, "step": 6070 }, { "epoch": 0.01228198467175992, "grad_norm": 167.42552185058594, "learning_rate": 1.216e-06, "loss": 29.8078, "step": 6080 }, { "epoch": 0.012302185304443734, "grad_norm": 412.9476623535156, "learning_rate": 1.2180000000000002e-06, "loss": 25.1903, "step": 6090 }, { "epoch": 0.01232238593712755, "grad_norm": 402.8238220214844, "learning_rate": 1.2200000000000002e-06, "loss": 39.7322, "step": 6100 }, { "epoch": 0.012342586569811366, "grad_norm": 503.3050537109375, "learning_rate": 1.2220000000000001e-06, "loss": 19.9504, "step": 6110 }, { "epoch": 0.012362787202495182, "grad_norm": 251.07025146484375, "learning_rate": 1.224e-06, "loss": 29.8188, "step": 6120 }, { "epoch": 0.012382987835178997, "grad_norm": 1289.874755859375, "learning_rate": 1.2260000000000002e-06, "loss": 26.9208, "step": 6130 }, { "epoch": 0.012403188467862813, "grad_norm": 839.939453125, "learning_rate": 1.2280000000000001e-06, "loss": 29.809, "step": 6140 }, { "epoch": 0.012423389100546628, "grad_norm": 0.0, "learning_rate": 1.23e-06, "loss": 10.9996, "step": 6150 }, { "epoch": 0.012443589733230445, "grad_norm": 687.5811157226562, "learning_rate": 1.2320000000000002e-06, "loss": 30.5085, "step": 6160 }, { "epoch": 0.01246379036591426, "grad_norm": 127.76237487792969, "learning_rate": 1.234e-06, "loss": 29.9668, "step": 6170 }, { "epoch": 0.012483990998598076, "grad_norm": 1359.7757568359375, "learning_rate": 1.2360000000000001e-06, "loss": 30.7715, "step": 6180 }, { "epoch": 0.012504191631281891, "grad_norm": 436.6647644042969, "learning_rate": 1.238e-06, "loss": 33.9171, "step": 6190 }, { "epoch": 0.012524392263965707, "grad_norm": 281.2616882324219, "learning_rate": 1.2400000000000002e-06, "loss": 41.9189, "step": 6200 }, { "epoch": 0.012544592896649522, "grad_norm": 758.2962036132812, "learning_rate": 1.2420000000000001e-06, "loss": 33.9747, "step": 6210 }, { "epoch": 0.012564793529333339, "grad_norm": 684.6299438476562, "learning_rate": 1.244e-06, "loss": 35.7783, "step": 6220 }, { "epoch": 0.012584994162017154, "grad_norm": 1113.127197265625, "learning_rate": 1.2460000000000002e-06, "loss": 48.0597, "step": 6230 }, { "epoch": 0.01260519479470097, "grad_norm": 840.9987182617188, "learning_rate": 1.248e-06, "loss": 22.5444, "step": 6240 }, { "epoch": 0.012625395427384785, "grad_norm": 520.2071533203125, "learning_rate": 1.25e-06, "loss": 15.9313, "step": 6250 }, { "epoch": 0.012645596060068601, "grad_norm": 556.5119018554688, "learning_rate": 1.2520000000000003e-06, "loss": 32.9128, "step": 6260 }, { "epoch": 0.012665796692752416, "grad_norm": 228.95762634277344, "learning_rate": 1.2540000000000002e-06, "loss": 16.7764, "step": 6270 }, { "epoch": 0.012685997325436233, "grad_norm": 316.0266418457031, "learning_rate": 1.256e-06, "loss": 25.5342, "step": 6280 }, { "epoch": 0.012706197958120048, "grad_norm": 18.748266220092773, "learning_rate": 1.258e-06, "loss": 42.3436, "step": 6290 }, { "epoch": 0.012726398590803864, "grad_norm": 1323.001953125, "learning_rate": 1.26e-06, "loss": 32.5139, "step": 6300 }, { "epoch": 0.012746599223487679, "grad_norm": 942.7252197265625, "learning_rate": 1.2620000000000002e-06, "loss": 29.162, "step": 6310 }, { "epoch": 0.012766799856171495, "grad_norm": 1211.36962890625, "learning_rate": 1.2640000000000003e-06, "loss": 29.0307, "step": 6320 }, { "epoch": 0.01278700048885531, "grad_norm": 802.3575439453125, "learning_rate": 1.266e-06, "loss": 21.0279, "step": 6330 }, { "epoch": 0.012807201121539127, "grad_norm": 740.271728515625, "learning_rate": 1.268e-06, "loss": 20.676, "step": 6340 }, { "epoch": 0.012827401754222941, "grad_norm": 419.27020263671875, "learning_rate": 1.2700000000000001e-06, "loss": 51.1324, "step": 6350 }, { "epoch": 0.012847602386906758, "grad_norm": 692.2057495117188, "learning_rate": 1.2720000000000003e-06, "loss": 36.6126, "step": 6360 }, { "epoch": 0.012867803019590573, "grad_norm": 132.77894592285156, "learning_rate": 1.2740000000000002e-06, "loss": 21.4999, "step": 6370 }, { "epoch": 0.01288800365227439, "grad_norm": 595.9766845703125, "learning_rate": 1.276e-06, "loss": 28.293, "step": 6380 }, { "epoch": 0.012908204284958204, "grad_norm": 1459.1014404296875, "learning_rate": 1.278e-06, "loss": 21.5185, "step": 6390 }, { "epoch": 0.01292840491764202, "grad_norm": 1731.2054443359375, "learning_rate": 1.28e-06, "loss": 47.9539, "step": 6400 }, { "epoch": 0.012948605550325835, "grad_norm": 750.8800048828125, "learning_rate": 1.2820000000000002e-06, "loss": 42.8562, "step": 6410 }, { "epoch": 0.012968806183009652, "grad_norm": 646.1731567382812, "learning_rate": 1.284e-06, "loss": 32.3806, "step": 6420 }, { "epoch": 0.012989006815693467, "grad_norm": 306.4696044921875, "learning_rate": 1.286e-06, "loss": 24.9085, "step": 6430 }, { "epoch": 0.013009207448377283, "grad_norm": 169.34942626953125, "learning_rate": 1.288e-06, "loss": 32.6633, "step": 6440 }, { "epoch": 0.013029408081061098, "grad_norm": 469.367919921875, "learning_rate": 1.2900000000000001e-06, "loss": 27.3972, "step": 6450 }, { "epoch": 0.013049608713744915, "grad_norm": 553.5028686523438, "learning_rate": 1.2920000000000003e-06, "loss": 35.9573, "step": 6460 }, { "epoch": 0.01306980934642873, "grad_norm": 262.7359924316406, "learning_rate": 1.294e-06, "loss": 30.1157, "step": 6470 }, { "epoch": 0.013090009979112546, "grad_norm": 839.3622436523438, "learning_rate": 1.296e-06, "loss": 38.7697, "step": 6480 }, { "epoch": 0.01311021061179636, "grad_norm": 469.48895263671875, "learning_rate": 1.2980000000000001e-06, "loss": 33.9329, "step": 6490 }, { "epoch": 0.013130411244480177, "grad_norm": 435.9288635253906, "learning_rate": 1.3e-06, "loss": 30.6598, "step": 6500 }, { "epoch": 0.013150611877163992, "grad_norm": 1100.7294921875, "learning_rate": 1.3020000000000002e-06, "loss": 38.6149, "step": 6510 }, { "epoch": 0.013170812509847809, "grad_norm": 978.08349609375, "learning_rate": 1.304e-06, "loss": 29.158, "step": 6520 }, { "epoch": 0.013191013142531623, "grad_norm": 440.118408203125, "learning_rate": 1.306e-06, "loss": 51.669, "step": 6530 }, { "epoch": 0.01321121377521544, "grad_norm": 217.9273223876953, "learning_rate": 1.308e-06, "loss": 24.4326, "step": 6540 }, { "epoch": 0.013231414407899255, "grad_norm": 356.3488464355469, "learning_rate": 1.3100000000000002e-06, "loss": 26.9776, "step": 6550 }, { "epoch": 0.013251615040583071, "grad_norm": 340.18060302734375, "learning_rate": 1.3120000000000003e-06, "loss": 39.9926, "step": 6560 }, { "epoch": 0.013271815673266886, "grad_norm": 984.8867797851562, "learning_rate": 1.314e-06, "loss": 54.8693, "step": 6570 }, { "epoch": 0.013292016305950703, "grad_norm": 1010.78466796875, "learning_rate": 1.316e-06, "loss": 23.7788, "step": 6580 }, { "epoch": 0.013312216938634517, "grad_norm": 733.6953125, "learning_rate": 1.3180000000000001e-06, "loss": 34.9994, "step": 6590 }, { "epoch": 0.013332417571318334, "grad_norm": 373.2271728515625, "learning_rate": 1.32e-06, "loss": 37.3207, "step": 6600 }, { "epoch": 0.013352618204002149, "grad_norm": 171.13934326171875, "learning_rate": 1.3220000000000002e-06, "loss": 12.7663, "step": 6610 }, { "epoch": 0.013372818836685965, "grad_norm": 357.4339599609375, "learning_rate": 1.324e-06, "loss": 35.8358, "step": 6620 }, { "epoch": 0.01339301946936978, "grad_norm": 220.25674438476562, "learning_rate": 1.326e-06, "loss": 15.3749, "step": 6630 }, { "epoch": 0.013413220102053596, "grad_norm": 462.05841064453125, "learning_rate": 1.328e-06, "loss": 38.8295, "step": 6640 }, { "epoch": 0.013433420734737411, "grad_norm": 469.8040466308594, "learning_rate": 1.3300000000000002e-06, "loss": 22.9475, "step": 6650 }, { "epoch": 0.013453621367421228, "grad_norm": 971.557373046875, "learning_rate": 1.3320000000000003e-06, "loss": 44.1314, "step": 6660 }, { "epoch": 0.013473822000105043, "grad_norm": 298.933837890625, "learning_rate": 1.334e-06, "loss": 28.8329, "step": 6670 }, { "epoch": 0.013494022632788859, "grad_norm": 623.2842407226562, "learning_rate": 1.336e-06, "loss": 35.835, "step": 6680 }, { "epoch": 0.013514223265472674, "grad_norm": 441.5866394042969, "learning_rate": 1.3380000000000001e-06, "loss": 19.0923, "step": 6690 }, { "epoch": 0.01353442389815649, "grad_norm": 1859.7860107421875, "learning_rate": 1.34e-06, "loss": 35.8646, "step": 6700 }, { "epoch": 0.013554624530840305, "grad_norm": 64.71363067626953, "learning_rate": 1.3420000000000002e-06, "loss": 15.234, "step": 6710 }, { "epoch": 0.013574825163524122, "grad_norm": 333.45794677734375, "learning_rate": 1.344e-06, "loss": 22.3284, "step": 6720 }, { "epoch": 0.013595025796207937, "grad_norm": 393.3450622558594, "learning_rate": 1.3460000000000001e-06, "loss": 54.5108, "step": 6730 }, { "epoch": 0.013615226428891753, "grad_norm": 587.0012817382812, "learning_rate": 1.348e-06, "loss": 31.177, "step": 6740 }, { "epoch": 0.013635427061575568, "grad_norm": 309.5393371582031, "learning_rate": 1.3500000000000002e-06, "loss": 13.8727, "step": 6750 }, { "epoch": 0.013655627694259384, "grad_norm": 468.2892150878906, "learning_rate": 1.352e-06, "loss": 16.0866, "step": 6760 }, { "epoch": 0.0136758283269432, "grad_norm": 878.9414672851562, "learning_rate": 1.354e-06, "loss": 31.6945, "step": 6770 }, { "epoch": 0.013696028959627016, "grad_norm": 963.8592529296875, "learning_rate": 1.356e-06, "loss": 26.1409, "step": 6780 }, { "epoch": 0.01371622959231083, "grad_norm": 742.5006713867188, "learning_rate": 1.3580000000000002e-06, "loss": 20.0253, "step": 6790 }, { "epoch": 0.013736430224994647, "grad_norm": 302.4109802246094, "learning_rate": 1.3600000000000001e-06, "loss": 30.4902, "step": 6800 }, { "epoch": 0.013756630857678462, "grad_norm": 703.2691650390625, "learning_rate": 1.362e-06, "loss": 101.9395, "step": 6810 }, { "epoch": 0.013776831490362278, "grad_norm": 1015.4454956054688, "learning_rate": 1.364e-06, "loss": 22.2598, "step": 6820 }, { "epoch": 0.013797032123046093, "grad_norm": 394.2761535644531, "learning_rate": 1.3660000000000001e-06, "loss": 36.6149, "step": 6830 }, { "epoch": 0.01381723275572991, "grad_norm": 867.6441650390625, "learning_rate": 1.368e-06, "loss": 32.4753, "step": 6840 }, { "epoch": 0.013837433388413724, "grad_norm": 386.0970458984375, "learning_rate": 1.3700000000000002e-06, "loss": 31.9341, "step": 6850 }, { "epoch": 0.013857634021097541, "grad_norm": 650.8311767578125, "learning_rate": 1.372e-06, "loss": 25.4292, "step": 6860 }, { "epoch": 0.013877834653781356, "grad_norm": 281.85302734375, "learning_rate": 1.374e-06, "loss": 15.7737, "step": 6870 }, { "epoch": 0.013898035286465172, "grad_norm": 494.3153991699219, "learning_rate": 1.376e-06, "loss": 12.811, "step": 6880 }, { "epoch": 0.013918235919148987, "grad_norm": 103.57233428955078, "learning_rate": 1.3780000000000002e-06, "loss": 24.2835, "step": 6890 }, { "epoch": 0.013938436551832804, "grad_norm": 313.1036071777344, "learning_rate": 1.3800000000000001e-06, "loss": 26.341, "step": 6900 }, { "epoch": 0.013958637184516618, "grad_norm": 364.7337951660156, "learning_rate": 1.382e-06, "loss": 19.7548, "step": 6910 }, { "epoch": 0.013978837817200435, "grad_norm": 708.6334838867188, "learning_rate": 1.384e-06, "loss": 24.823, "step": 6920 }, { "epoch": 0.01399903844988425, "grad_norm": 370.78387451171875, "learning_rate": 1.3860000000000002e-06, "loss": 22.2033, "step": 6930 }, { "epoch": 0.014019239082568066, "grad_norm": 584.7822875976562, "learning_rate": 1.388e-06, "loss": 21.2259, "step": 6940 }, { "epoch": 0.014039439715251881, "grad_norm": 434.93896484375, "learning_rate": 1.3900000000000002e-06, "loss": 17.3879, "step": 6950 }, { "epoch": 0.014059640347935698, "grad_norm": 1412.916015625, "learning_rate": 1.392e-06, "loss": 43.3578, "step": 6960 }, { "epoch": 0.014079840980619512, "grad_norm": 690.4659423828125, "learning_rate": 1.3940000000000001e-06, "loss": 29.9748, "step": 6970 }, { "epoch": 0.014100041613303329, "grad_norm": 631.6376953125, "learning_rate": 1.396e-06, "loss": 27.4844, "step": 6980 }, { "epoch": 0.014120242245987144, "grad_norm": 136.48374938964844, "learning_rate": 1.3980000000000002e-06, "loss": 14.787, "step": 6990 }, { "epoch": 0.01414044287867096, "grad_norm": 419.7654724121094, "learning_rate": 1.4000000000000001e-06, "loss": 29.0004, "step": 7000 }, { "epoch": 0.014160643511354775, "grad_norm": 468.74468994140625, "learning_rate": 1.402e-06, "loss": 19.4345, "step": 7010 }, { "epoch": 0.014180844144038592, "grad_norm": 468.9650573730469, "learning_rate": 1.404e-06, "loss": 26.9539, "step": 7020 }, { "epoch": 0.014201044776722406, "grad_norm": 256.4075622558594, "learning_rate": 1.4060000000000002e-06, "loss": 21.5345, "step": 7030 }, { "epoch": 0.014221245409406223, "grad_norm": 363.6224060058594, "learning_rate": 1.4080000000000001e-06, "loss": 28.2582, "step": 7040 }, { "epoch": 0.014241446042090038, "grad_norm": 2213.649169921875, "learning_rate": 1.41e-06, "loss": 29.7299, "step": 7050 }, { "epoch": 0.014261646674773854, "grad_norm": 525.2031860351562, "learning_rate": 1.412e-06, "loss": 15.0173, "step": 7060 }, { "epoch": 0.014281847307457669, "grad_norm": 571.482666015625, "learning_rate": 1.4140000000000001e-06, "loss": 22.69, "step": 7070 }, { "epoch": 0.014302047940141486, "grad_norm": 432.14385986328125, "learning_rate": 1.416e-06, "loss": 21.501, "step": 7080 }, { "epoch": 0.0143222485728253, "grad_norm": 230.9308319091797, "learning_rate": 1.4180000000000002e-06, "loss": 23.7384, "step": 7090 }, { "epoch": 0.014342449205509117, "grad_norm": 689.4318237304688, "learning_rate": 1.42e-06, "loss": 16.4564, "step": 7100 }, { "epoch": 0.014362649838192932, "grad_norm": 833.6482543945312, "learning_rate": 1.4220000000000001e-06, "loss": 42.0199, "step": 7110 }, { "epoch": 0.014382850470876748, "grad_norm": 263.74810791015625, "learning_rate": 1.424e-06, "loss": 16.752, "step": 7120 }, { "epoch": 0.014403051103560563, "grad_norm": 69.91686248779297, "learning_rate": 1.4260000000000002e-06, "loss": 11.7691, "step": 7130 }, { "epoch": 0.01442325173624438, "grad_norm": 802.9617309570312, "learning_rate": 1.4280000000000001e-06, "loss": 29.6307, "step": 7140 }, { "epoch": 0.014443452368928194, "grad_norm": 1388.6160888671875, "learning_rate": 1.43e-06, "loss": 17.46, "step": 7150 }, { "epoch": 0.01446365300161201, "grad_norm": 202.5712127685547, "learning_rate": 1.432e-06, "loss": 14.3374, "step": 7160 }, { "epoch": 0.014483853634295826, "grad_norm": 547.8307495117188, "learning_rate": 1.4340000000000002e-06, "loss": 30.6452, "step": 7170 }, { "epoch": 0.014504054266979642, "grad_norm": 211.10414123535156, "learning_rate": 1.436e-06, "loss": 13.0712, "step": 7180 }, { "epoch": 0.014524254899663457, "grad_norm": 756.9300537109375, "learning_rate": 1.4380000000000003e-06, "loss": 23.8358, "step": 7190 }, { "epoch": 0.014544455532347273, "grad_norm": 378.86663818359375, "learning_rate": 1.44e-06, "loss": 26.3164, "step": 7200 }, { "epoch": 0.014564656165031088, "grad_norm": 337.78131103515625, "learning_rate": 1.4420000000000001e-06, "loss": 30.7286, "step": 7210 }, { "epoch": 0.014584856797714905, "grad_norm": 1024.835205078125, "learning_rate": 1.444e-06, "loss": 29.8971, "step": 7220 }, { "epoch": 0.01460505743039872, "grad_norm": 938.3273315429688, "learning_rate": 1.4460000000000002e-06, "loss": 27.8823, "step": 7230 }, { "epoch": 0.014625258063082536, "grad_norm": 552.2940063476562, "learning_rate": 1.4480000000000002e-06, "loss": 25.7623, "step": 7240 }, { "epoch": 0.014645458695766351, "grad_norm": 352.24383544921875, "learning_rate": 1.45e-06, "loss": 12.5614, "step": 7250 }, { "epoch": 0.014665659328450167, "grad_norm": 380.1072082519531, "learning_rate": 1.452e-06, "loss": 21.0116, "step": 7260 }, { "epoch": 0.014685859961133982, "grad_norm": 89.25300598144531, "learning_rate": 1.4540000000000002e-06, "loss": 37.3816, "step": 7270 }, { "epoch": 0.014706060593817799, "grad_norm": 742.039306640625, "learning_rate": 1.4560000000000001e-06, "loss": 20.3916, "step": 7280 }, { "epoch": 0.014726261226501614, "grad_norm": 408.6346435546875, "learning_rate": 1.4580000000000003e-06, "loss": 22.624, "step": 7290 }, { "epoch": 0.01474646185918543, "grad_norm": 489.7460632324219, "learning_rate": 1.46e-06, "loss": 51.9042, "step": 7300 }, { "epoch": 0.014766662491869245, "grad_norm": 378.1903991699219, "learning_rate": 1.4620000000000001e-06, "loss": 22.1225, "step": 7310 }, { "epoch": 0.014786863124553061, "grad_norm": 118.77698516845703, "learning_rate": 1.464e-06, "loss": 27.0962, "step": 7320 }, { "epoch": 0.014807063757236876, "grad_norm": 497.64141845703125, "learning_rate": 1.4660000000000002e-06, "loss": 23.5584, "step": 7330 }, { "epoch": 0.014827264389920693, "grad_norm": 26.968568801879883, "learning_rate": 1.4680000000000002e-06, "loss": 46.2282, "step": 7340 }, { "epoch": 0.014847465022604507, "grad_norm": 621.72314453125, "learning_rate": 1.4700000000000001e-06, "loss": 33.6739, "step": 7350 }, { "epoch": 0.014867665655288324, "grad_norm": 1012.1883544921875, "learning_rate": 1.472e-06, "loss": 30.1342, "step": 7360 }, { "epoch": 0.014887866287972139, "grad_norm": 164.61184692382812, "learning_rate": 1.4740000000000002e-06, "loss": 22.5415, "step": 7370 }, { "epoch": 0.014908066920655955, "grad_norm": 382.0716552734375, "learning_rate": 1.4760000000000001e-06, "loss": 63.1735, "step": 7380 }, { "epoch": 0.01492826755333977, "grad_norm": 807.2562866210938, "learning_rate": 1.478e-06, "loss": 51.1082, "step": 7390 }, { "epoch": 0.014948468186023587, "grad_norm": 600.788330078125, "learning_rate": 1.48e-06, "loss": 41.2442, "step": 7400 }, { "epoch": 0.014968668818707401, "grad_norm": 547.8676147460938, "learning_rate": 1.4820000000000002e-06, "loss": 33.4473, "step": 7410 }, { "epoch": 0.014988869451391218, "grad_norm": 785.7941284179688, "learning_rate": 1.4840000000000001e-06, "loss": 21.1182, "step": 7420 }, { "epoch": 0.015009070084075033, "grad_norm": 373.2097473144531, "learning_rate": 1.4860000000000003e-06, "loss": 19.4519, "step": 7430 }, { "epoch": 0.01502927071675885, "grad_norm": 283.32928466796875, "learning_rate": 1.488e-06, "loss": 33.4474, "step": 7440 }, { "epoch": 0.015049471349442664, "grad_norm": 601.2964477539062, "learning_rate": 1.4900000000000001e-06, "loss": 37.3402, "step": 7450 }, { "epoch": 0.01506967198212648, "grad_norm": 461.42108154296875, "learning_rate": 1.492e-06, "loss": 33.2678, "step": 7460 }, { "epoch": 0.015089872614810295, "grad_norm": 301.6255187988281, "learning_rate": 1.4940000000000002e-06, "loss": 21.1947, "step": 7470 }, { "epoch": 0.015110073247494112, "grad_norm": 528.6672973632812, "learning_rate": 1.4960000000000002e-06, "loss": 30.9436, "step": 7480 }, { "epoch": 0.015130273880177927, "grad_norm": 429.56304931640625, "learning_rate": 1.498e-06, "loss": 43.7286, "step": 7490 }, { "epoch": 0.015150474512861743, "grad_norm": 477.3526306152344, "learning_rate": 1.5e-06, "loss": 44.6997, "step": 7500 }, { "epoch": 0.015170675145545558, "grad_norm": 694.835205078125, "learning_rate": 1.5020000000000002e-06, "loss": 33.5782, "step": 7510 }, { "epoch": 0.015190875778229375, "grad_norm": 437.0287780761719, "learning_rate": 1.5040000000000001e-06, "loss": 14.402, "step": 7520 }, { "epoch": 0.01521107641091319, "grad_norm": 432.5226135253906, "learning_rate": 1.5060000000000003e-06, "loss": 54.8615, "step": 7530 }, { "epoch": 0.015231277043597006, "grad_norm": 739.1048583984375, "learning_rate": 1.508e-06, "loss": 28.0186, "step": 7540 }, { "epoch": 0.01525147767628082, "grad_norm": 751.8126220703125, "learning_rate": 1.5100000000000002e-06, "loss": 14.3841, "step": 7550 }, { "epoch": 0.015271678308964637, "grad_norm": 699.0371704101562, "learning_rate": 1.512e-06, "loss": 26.7135, "step": 7560 }, { "epoch": 0.015291878941648452, "grad_norm": 580.2321166992188, "learning_rate": 1.5140000000000002e-06, "loss": 43.0058, "step": 7570 }, { "epoch": 0.015312079574332269, "grad_norm": 953.4240112304688, "learning_rate": 1.5160000000000002e-06, "loss": 30.3746, "step": 7580 }, { "epoch": 0.015332280207016083, "grad_norm": 498.05792236328125, "learning_rate": 1.5180000000000001e-06, "loss": 19.2205, "step": 7590 }, { "epoch": 0.0153524808396999, "grad_norm": 510.3348083496094, "learning_rate": 1.52e-06, "loss": 15.5117, "step": 7600 }, { "epoch": 0.015372681472383715, "grad_norm": 1451.5672607421875, "learning_rate": 1.5220000000000002e-06, "loss": 29.8484, "step": 7610 }, { "epoch": 0.015392882105067531, "grad_norm": 402.60919189453125, "learning_rate": 1.5240000000000001e-06, "loss": 28.366, "step": 7620 }, { "epoch": 0.015413082737751346, "grad_norm": 191.9103240966797, "learning_rate": 1.5260000000000003e-06, "loss": 35.3297, "step": 7630 }, { "epoch": 0.015433283370435162, "grad_norm": 371.203125, "learning_rate": 1.528e-06, "loss": 10.1235, "step": 7640 }, { "epoch": 0.015453484003118977, "grad_norm": 465.6565856933594, "learning_rate": 1.5300000000000002e-06, "loss": 21.8699, "step": 7650 }, { "epoch": 0.015473684635802794, "grad_norm": 703.3648681640625, "learning_rate": 1.5320000000000001e-06, "loss": 32.0473, "step": 7660 }, { "epoch": 0.015493885268486609, "grad_norm": 468.8616638183594, "learning_rate": 1.5340000000000003e-06, "loss": 21.0127, "step": 7670 }, { "epoch": 0.015514085901170425, "grad_norm": 1880.29248046875, "learning_rate": 1.536e-06, "loss": 26.7551, "step": 7680 }, { "epoch": 0.01553428653385424, "grad_norm": 274.2958068847656, "learning_rate": 1.5380000000000001e-06, "loss": 19.2041, "step": 7690 }, { "epoch": 0.015554487166538056, "grad_norm": 539.3201904296875, "learning_rate": 1.54e-06, "loss": 32.8616, "step": 7700 }, { "epoch": 0.015574687799221871, "grad_norm": 87.47315216064453, "learning_rate": 1.5420000000000002e-06, "loss": 22.3236, "step": 7710 }, { "epoch": 0.015594888431905688, "grad_norm": 319.38226318359375, "learning_rate": 1.5440000000000002e-06, "loss": 30.9488, "step": 7720 }, { "epoch": 0.015615089064589503, "grad_norm": 1636.626220703125, "learning_rate": 1.546e-06, "loss": 30.0025, "step": 7730 }, { "epoch": 0.01563528969727332, "grad_norm": 128.85711669921875, "learning_rate": 1.548e-06, "loss": 15.6842, "step": 7740 }, { "epoch": 0.015655490329957136, "grad_norm": 682.8628540039062, "learning_rate": 1.5500000000000002e-06, "loss": 14.3482, "step": 7750 }, { "epoch": 0.01567569096264095, "grad_norm": 274.7837829589844, "learning_rate": 1.5520000000000001e-06, "loss": 26.8116, "step": 7760 }, { "epoch": 0.015695891595324765, "grad_norm": 698.118408203125, "learning_rate": 1.5540000000000003e-06, "loss": 40.2702, "step": 7770 }, { "epoch": 0.01571609222800858, "grad_norm": 505.4995422363281, "learning_rate": 1.556e-06, "loss": 19.0552, "step": 7780 }, { "epoch": 0.015736292860692398, "grad_norm": 604.3401489257812, "learning_rate": 1.5580000000000002e-06, "loss": 25.9554, "step": 7790 }, { "epoch": 0.01575649349337621, "grad_norm": 551.531494140625, "learning_rate": 1.56e-06, "loss": 31.4826, "step": 7800 }, { "epoch": 0.015776694126060028, "grad_norm": 597.856689453125, "learning_rate": 1.5620000000000002e-06, "loss": 33.1883, "step": 7810 }, { "epoch": 0.015796894758743844, "grad_norm": 255.5839385986328, "learning_rate": 1.5640000000000002e-06, "loss": 16.2388, "step": 7820 }, { "epoch": 0.01581709539142766, "grad_norm": 638.1707763671875, "learning_rate": 1.566e-06, "loss": 12.3245, "step": 7830 }, { "epoch": 0.015837296024111474, "grad_norm": 561.0966796875, "learning_rate": 1.568e-06, "loss": 34.3022, "step": 7840 }, { "epoch": 0.01585749665679529, "grad_norm": 181.12254333496094, "learning_rate": 1.5700000000000002e-06, "loss": 14.8665, "step": 7850 }, { "epoch": 0.015877697289479107, "grad_norm": 320.9978332519531, "learning_rate": 1.5720000000000002e-06, "loss": 40.0919, "step": 7860 }, { "epoch": 0.015897897922162924, "grad_norm": 488.3760681152344, "learning_rate": 1.5740000000000003e-06, "loss": 23.0373, "step": 7870 }, { "epoch": 0.015918098554846737, "grad_norm": 725.4952392578125, "learning_rate": 1.576e-06, "loss": 17.3022, "step": 7880 }, { "epoch": 0.015938299187530553, "grad_norm": 807.7362060546875, "learning_rate": 1.5780000000000002e-06, "loss": 29.6214, "step": 7890 }, { "epoch": 0.01595849982021437, "grad_norm": 402.11663818359375, "learning_rate": 1.5800000000000001e-06, "loss": 37.3078, "step": 7900 }, { "epoch": 0.015978700452898186, "grad_norm": 416.4609680175781, "learning_rate": 1.5820000000000003e-06, "loss": 24.7349, "step": 7910 }, { "epoch": 0.015998901085582, "grad_norm": 794.6650390625, "learning_rate": 1.5840000000000002e-06, "loss": 38.0387, "step": 7920 }, { "epoch": 0.016019101718265816, "grad_norm": 593.4451293945312, "learning_rate": 1.586e-06, "loss": 23.7971, "step": 7930 }, { "epoch": 0.016039302350949632, "grad_norm": 258.35321044921875, "learning_rate": 1.588e-06, "loss": 27.5747, "step": 7940 }, { "epoch": 0.01605950298363345, "grad_norm": 773.8060302734375, "learning_rate": 1.5900000000000002e-06, "loss": 38.5026, "step": 7950 }, { "epoch": 0.016079703616317262, "grad_norm": 408.9706115722656, "learning_rate": 1.5920000000000002e-06, "loss": 16.2559, "step": 7960 }, { "epoch": 0.01609990424900108, "grad_norm": 763.1421508789062, "learning_rate": 1.594e-06, "loss": 29.3452, "step": 7970 }, { "epoch": 0.016120104881684895, "grad_norm": 139.5187530517578, "learning_rate": 1.596e-06, "loss": 21.015, "step": 7980 }, { "epoch": 0.01614030551436871, "grad_norm": 920.0736083984375, "learning_rate": 1.5980000000000002e-06, "loss": 27.6155, "step": 7990 }, { "epoch": 0.016160506147052525, "grad_norm": 302.7886657714844, "learning_rate": 1.6000000000000001e-06, "loss": 38.6215, "step": 8000 }, { "epoch": 0.01618070677973634, "grad_norm": 741.8783569335938, "learning_rate": 1.6020000000000003e-06, "loss": 37.2063, "step": 8010 }, { "epoch": 0.016200907412420158, "grad_norm": 433.6229248046875, "learning_rate": 1.604e-06, "loss": 22.6892, "step": 8020 }, { "epoch": 0.016221108045103974, "grad_norm": 453.72900390625, "learning_rate": 1.606e-06, "loss": 22.5018, "step": 8030 }, { "epoch": 0.016241308677787787, "grad_norm": 1120.4896240234375, "learning_rate": 1.608e-06, "loss": 38.778, "step": 8040 }, { "epoch": 0.016261509310471604, "grad_norm": 410.22259521484375, "learning_rate": 1.6100000000000003e-06, "loss": 43.0265, "step": 8050 }, { "epoch": 0.01628170994315542, "grad_norm": 603.0123901367188, "learning_rate": 1.6120000000000002e-06, "loss": 26.0176, "step": 8060 }, { "epoch": 0.016301910575839237, "grad_norm": 504.2850341796875, "learning_rate": 1.614e-06, "loss": 53.1275, "step": 8070 }, { "epoch": 0.01632211120852305, "grad_norm": 294.2729797363281, "learning_rate": 1.616e-06, "loss": 33.1675, "step": 8080 }, { "epoch": 0.016342311841206866, "grad_norm": 470.6175537109375, "learning_rate": 1.6180000000000002e-06, "loss": 14.7254, "step": 8090 }, { "epoch": 0.016362512473890683, "grad_norm": 949.7745361328125, "learning_rate": 1.6200000000000002e-06, "loss": 35.2812, "step": 8100 }, { "epoch": 0.0163827131065745, "grad_norm": 1021.1493530273438, "learning_rate": 1.6220000000000003e-06, "loss": 60.0575, "step": 8110 }, { "epoch": 0.016402913739258312, "grad_norm": 210.9373779296875, "learning_rate": 1.624e-06, "loss": 29.2961, "step": 8120 }, { "epoch": 0.01642311437194213, "grad_norm": 972.39404296875, "learning_rate": 1.626e-06, "loss": 31.1454, "step": 8130 }, { "epoch": 0.016443315004625945, "grad_norm": 878.7973022460938, "learning_rate": 1.6280000000000001e-06, "loss": 26.4723, "step": 8140 }, { "epoch": 0.016463515637309762, "grad_norm": 205.9188232421875, "learning_rate": 1.6300000000000003e-06, "loss": 27.026, "step": 8150 }, { "epoch": 0.016483716269993575, "grad_norm": 527.09326171875, "learning_rate": 1.6320000000000002e-06, "loss": 18.5285, "step": 8160 }, { "epoch": 0.01650391690267739, "grad_norm": 667.0870361328125, "learning_rate": 1.634e-06, "loss": 45.9654, "step": 8170 }, { "epoch": 0.016524117535361208, "grad_norm": 418.41314697265625, "learning_rate": 1.636e-06, "loss": 36.5291, "step": 8180 }, { "epoch": 0.016544318168045025, "grad_norm": 132.48226928710938, "learning_rate": 1.6380000000000002e-06, "loss": 18.3192, "step": 8190 }, { "epoch": 0.016564518800728838, "grad_norm": 327.0763244628906, "learning_rate": 1.6400000000000002e-06, "loss": 30.456, "step": 8200 }, { "epoch": 0.016584719433412654, "grad_norm": 439.4847106933594, "learning_rate": 1.6420000000000003e-06, "loss": 23.4486, "step": 8210 }, { "epoch": 0.01660492006609647, "grad_norm": 508.11163330078125, "learning_rate": 1.644e-06, "loss": 23.7181, "step": 8220 }, { "epoch": 0.016625120698780287, "grad_norm": 689.62646484375, "learning_rate": 1.646e-06, "loss": 39.5628, "step": 8230 }, { "epoch": 0.0166453213314641, "grad_norm": 126.10978698730469, "learning_rate": 1.6480000000000001e-06, "loss": 32.6063, "step": 8240 }, { "epoch": 0.016665521964147917, "grad_norm": 348.2756042480469, "learning_rate": 1.6500000000000003e-06, "loss": 19.646, "step": 8250 }, { "epoch": 0.016685722596831733, "grad_norm": 159.87612915039062, "learning_rate": 1.6520000000000002e-06, "loss": 29.4104, "step": 8260 }, { "epoch": 0.01670592322951555, "grad_norm": 137.435546875, "learning_rate": 1.654e-06, "loss": 22.5388, "step": 8270 }, { "epoch": 0.016726123862199363, "grad_norm": 630.7896118164062, "learning_rate": 1.6560000000000001e-06, "loss": 22.1155, "step": 8280 }, { "epoch": 0.01674632449488318, "grad_norm": 581.4238891601562, "learning_rate": 1.6580000000000003e-06, "loss": 46.8483, "step": 8290 }, { "epoch": 0.016766525127566996, "grad_norm": 383.9845275878906, "learning_rate": 1.6600000000000002e-06, "loss": 36.6675, "step": 8300 }, { "epoch": 0.016786725760250813, "grad_norm": 303.4525146484375, "learning_rate": 1.662e-06, "loss": 35.5097, "step": 8310 }, { "epoch": 0.016806926392934626, "grad_norm": 214.48594665527344, "learning_rate": 1.664e-06, "loss": 25.3765, "step": 8320 }, { "epoch": 0.016827127025618442, "grad_norm": 675.5895385742188, "learning_rate": 1.666e-06, "loss": 43.5899, "step": 8330 }, { "epoch": 0.01684732765830226, "grad_norm": 477.797119140625, "learning_rate": 1.6680000000000002e-06, "loss": 33.3804, "step": 8340 }, { "epoch": 0.016867528290986075, "grad_norm": 279.4515075683594, "learning_rate": 1.6700000000000003e-06, "loss": 32.5172, "step": 8350 }, { "epoch": 0.016887728923669888, "grad_norm": 924.8307495117188, "learning_rate": 1.672e-06, "loss": 31.6212, "step": 8360 }, { "epoch": 0.016907929556353705, "grad_norm": 195.5023956298828, "learning_rate": 1.674e-06, "loss": 24.5261, "step": 8370 }, { "epoch": 0.01692813018903752, "grad_norm": 300.647705078125, "learning_rate": 1.6760000000000001e-06, "loss": 18.4933, "step": 8380 }, { "epoch": 0.016948330821721338, "grad_norm": 199.65211486816406, "learning_rate": 1.6780000000000003e-06, "loss": 19.2608, "step": 8390 }, { "epoch": 0.01696853145440515, "grad_norm": 353.1869812011719, "learning_rate": 1.6800000000000002e-06, "loss": 19.9096, "step": 8400 }, { "epoch": 0.016988732087088967, "grad_norm": 342.279296875, "learning_rate": 1.682e-06, "loss": 22.6966, "step": 8410 }, { "epoch": 0.017008932719772784, "grad_norm": 485.9510803222656, "learning_rate": 1.684e-06, "loss": 29.0974, "step": 8420 }, { "epoch": 0.0170291333524566, "grad_norm": 492.27593994140625, "learning_rate": 1.686e-06, "loss": 12.7701, "step": 8430 }, { "epoch": 0.017049333985140414, "grad_norm": 228.1291961669922, "learning_rate": 1.6880000000000002e-06, "loss": 25.0399, "step": 8440 }, { "epoch": 0.01706953461782423, "grad_norm": 641.7828369140625, "learning_rate": 1.6900000000000003e-06, "loss": 41.747, "step": 8450 }, { "epoch": 0.017089735250508047, "grad_norm": 469.2547302246094, "learning_rate": 1.692e-06, "loss": 28.9467, "step": 8460 }, { "epoch": 0.017109935883191863, "grad_norm": 474.65185546875, "learning_rate": 1.694e-06, "loss": 43.8951, "step": 8470 }, { "epoch": 0.017130136515875676, "grad_norm": 92.68016815185547, "learning_rate": 1.6960000000000002e-06, "loss": 21.1349, "step": 8480 }, { "epoch": 0.017150337148559493, "grad_norm": 437.2278747558594, "learning_rate": 1.6980000000000003e-06, "loss": 45.1486, "step": 8490 }, { "epoch": 0.01717053778124331, "grad_norm": 1121.7486572265625, "learning_rate": 1.7000000000000002e-06, "loss": 20.8305, "step": 8500 }, { "epoch": 0.017190738413927126, "grad_norm": 991.1923217773438, "learning_rate": 1.702e-06, "loss": 32.8894, "step": 8510 }, { "epoch": 0.01721093904661094, "grad_norm": 477.5975341796875, "learning_rate": 1.7040000000000001e-06, "loss": 24.5803, "step": 8520 }, { "epoch": 0.017231139679294755, "grad_norm": 63.715213775634766, "learning_rate": 1.706e-06, "loss": 11.6225, "step": 8530 }, { "epoch": 0.017251340311978572, "grad_norm": 919.2877197265625, "learning_rate": 1.7080000000000002e-06, "loss": 42.0231, "step": 8540 }, { "epoch": 0.01727154094466239, "grad_norm": 2045.6112060546875, "learning_rate": 1.7100000000000004e-06, "loss": 51.4979, "step": 8550 }, { "epoch": 0.0172917415773462, "grad_norm": 728.2069091796875, "learning_rate": 1.712e-06, "loss": 31.7765, "step": 8560 }, { "epoch": 0.017311942210030018, "grad_norm": 1136.805419921875, "learning_rate": 1.714e-06, "loss": 15.2412, "step": 8570 }, { "epoch": 0.017332142842713835, "grad_norm": 305.28338623046875, "learning_rate": 1.7160000000000002e-06, "loss": 11.1767, "step": 8580 }, { "epoch": 0.01735234347539765, "grad_norm": 480.2168884277344, "learning_rate": 1.7180000000000003e-06, "loss": 17.7779, "step": 8590 }, { "epoch": 0.017372544108081464, "grad_norm": 594.7160034179688, "learning_rate": 1.72e-06, "loss": 39.2654, "step": 8600 }, { "epoch": 0.01739274474076528, "grad_norm": 713.5752563476562, "learning_rate": 1.722e-06, "loss": 42.5888, "step": 8610 }, { "epoch": 0.017412945373449097, "grad_norm": 665.0403442382812, "learning_rate": 1.7240000000000001e-06, "loss": 31.9991, "step": 8620 }, { "epoch": 0.017433146006132914, "grad_norm": 502.7206115722656, "learning_rate": 1.726e-06, "loss": 20.1092, "step": 8630 }, { "epoch": 0.017453346638816727, "grad_norm": 610.836181640625, "learning_rate": 1.7280000000000002e-06, "loss": 29.8465, "step": 8640 }, { "epoch": 0.017473547271500543, "grad_norm": 435.49249267578125, "learning_rate": 1.73e-06, "loss": 18.4336, "step": 8650 }, { "epoch": 0.01749374790418436, "grad_norm": 337.9139709472656, "learning_rate": 1.732e-06, "loss": 27.6431, "step": 8660 }, { "epoch": 0.017513948536868176, "grad_norm": 623.5963745117188, "learning_rate": 1.734e-06, "loss": 23.6477, "step": 8670 }, { "epoch": 0.01753414916955199, "grad_norm": 474.98681640625, "learning_rate": 1.7360000000000002e-06, "loss": 39.8223, "step": 8680 }, { "epoch": 0.017554349802235806, "grad_norm": 210.0931854248047, "learning_rate": 1.7380000000000003e-06, "loss": 26.6607, "step": 8690 }, { "epoch": 0.017574550434919622, "grad_norm": 346.7015380859375, "learning_rate": 1.74e-06, "loss": 25.1463, "step": 8700 }, { "epoch": 0.01759475106760344, "grad_norm": 705.1433715820312, "learning_rate": 1.742e-06, "loss": 35.5323, "step": 8710 }, { "epoch": 0.017614951700287252, "grad_norm": 579.8189697265625, "learning_rate": 1.7440000000000002e-06, "loss": 11.3822, "step": 8720 }, { "epoch": 0.01763515233297107, "grad_norm": 245.68829345703125, "learning_rate": 1.746e-06, "loss": 22.4856, "step": 8730 }, { "epoch": 0.017655352965654885, "grad_norm": 226.94000244140625, "learning_rate": 1.7480000000000002e-06, "loss": 33.9477, "step": 8740 }, { "epoch": 0.0176755535983387, "grad_norm": 240.374267578125, "learning_rate": 1.75e-06, "loss": 41.3515, "step": 8750 }, { "epoch": 0.017695754231022515, "grad_norm": 460.3675842285156, "learning_rate": 1.7520000000000001e-06, "loss": 30.2187, "step": 8760 }, { "epoch": 0.01771595486370633, "grad_norm": 442.6200866699219, "learning_rate": 1.754e-06, "loss": 20.0955, "step": 8770 }, { "epoch": 0.017736155496390148, "grad_norm": 534.3021850585938, "learning_rate": 1.7560000000000002e-06, "loss": 15.8904, "step": 8780 }, { "epoch": 0.017756356129073964, "grad_norm": 429.71075439453125, "learning_rate": 1.7580000000000004e-06, "loss": 28.6299, "step": 8790 }, { "epoch": 0.017776556761757777, "grad_norm": 428.8110656738281, "learning_rate": 1.76e-06, "loss": 29.9164, "step": 8800 }, { "epoch": 0.017796757394441594, "grad_norm": 297.4836730957031, "learning_rate": 1.762e-06, "loss": 29.891, "step": 8810 }, { "epoch": 0.01781695802712541, "grad_norm": 514.8496704101562, "learning_rate": 1.7640000000000002e-06, "loss": 30.6904, "step": 8820 }, { "epoch": 0.017837158659809227, "grad_norm": 668.380126953125, "learning_rate": 1.7660000000000001e-06, "loss": 18.5691, "step": 8830 }, { "epoch": 0.01785735929249304, "grad_norm": 667.9898681640625, "learning_rate": 1.7680000000000003e-06, "loss": 21.9439, "step": 8840 }, { "epoch": 0.017877559925176856, "grad_norm": 515.5223999023438, "learning_rate": 1.77e-06, "loss": 35.0883, "step": 8850 }, { "epoch": 0.017897760557860673, "grad_norm": 456.4408874511719, "learning_rate": 1.7720000000000001e-06, "loss": 21.8389, "step": 8860 }, { "epoch": 0.01791796119054449, "grad_norm": 457.3421936035156, "learning_rate": 1.774e-06, "loss": 30.5864, "step": 8870 }, { "epoch": 0.017938161823228303, "grad_norm": 911.0169677734375, "learning_rate": 1.7760000000000002e-06, "loss": 21.5449, "step": 8880 }, { "epoch": 0.01795836245591212, "grad_norm": 1107.2392578125, "learning_rate": 1.7780000000000004e-06, "loss": 31.0002, "step": 8890 }, { "epoch": 0.017978563088595936, "grad_norm": 309.9970703125, "learning_rate": 1.7800000000000001e-06, "loss": 20.7838, "step": 8900 }, { "epoch": 0.017998763721279752, "grad_norm": 525.5556030273438, "learning_rate": 1.782e-06, "loss": 21.8871, "step": 8910 }, { "epoch": 0.018018964353963565, "grad_norm": 330.1131286621094, "learning_rate": 1.7840000000000002e-06, "loss": 33.9941, "step": 8920 }, { "epoch": 0.018039164986647382, "grad_norm": 1103.0606689453125, "learning_rate": 1.7860000000000001e-06, "loss": 36.5735, "step": 8930 }, { "epoch": 0.0180593656193312, "grad_norm": 505.9988708496094, "learning_rate": 1.788e-06, "loss": 17.7174, "step": 8940 }, { "epoch": 0.018079566252015015, "grad_norm": 143.58360290527344, "learning_rate": 1.79e-06, "loss": 26.4376, "step": 8950 }, { "epoch": 0.018099766884698828, "grad_norm": 1293.5728759765625, "learning_rate": 1.7920000000000002e-06, "loss": 38.0046, "step": 8960 }, { "epoch": 0.018119967517382644, "grad_norm": 1032.229248046875, "learning_rate": 1.794e-06, "loss": 26.7437, "step": 8970 }, { "epoch": 0.01814016815006646, "grad_norm": 393.4200744628906, "learning_rate": 1.7960000000000003e-06, "loss": 28.0399, "step": 8980 }, { "epoch": 0.018160368782750277, "grad_norm": 1051.5867919921875, "learning_rate": 1.798e-06, "loss": 28.9257, "step": 8990 }, { "epoch": 0.01818056941543409, "grad_norm": 266.2267150878906, "learning_rate": 1.8000000000000001e-06, "loss": 26.0235, "step": 9000 }, { "epoch": 0.018200770048117907, "grad_norm": 613.0821533203125, "learning_rate": 1.802e-06, "loss": 31.5658, "step": 9010 }, { "epoch": 0.018220970680801724, "grad_norm": 498.7320251464844, "learning_rate": 1.8040000000000002e-06, "loss": 23.0911, "step": 9020 }, { "epoch": 0.01824117131348554, "grad_norm": 68.88175964355469, "learning_rate": 1.8060000000000002e-06, "loss": 20.3582, "step": 9030 }, { "epoch": 0.018261371946169353, "grad_norm": 316.1387634277344, "learning_rate": 1.808e-06, "loss": 22.0128, "step": 9040 }, { "epoch": 0.01828157257885317, "grad_norm": 616.5128784179688, "learning_rate": 1.81e-06, "loss": 17.5261, "step": 9050 }, { "epoch": 0.018301773211536986, "grad_norm": 600.02197265625, "learning_rate": 1.8120000000000002e-06, "loss": 28.4692, "step": 9060 }, { "epoch": 0.018321973844220803, "grad_norm": 619.6358642578125, "learning_rate": 1.8140000000000001e-06, "loss": 37.1422, "step": 9070 }, { "epoch": 0.018342174476904616, "grad_norm": 122.20323944091797, "learning_rate": 1.8160000000000003e-06, "loss": 12.6013, "step": 9080 }, { "epoch": 0.018362375109588432, "grad_norm": 474.7579650878906, "learning_rate": 1.818e-06, "loss": 38.2704, "step": 9090 }, { "epoch": 0.01838257574227225, "grad_norm": 338.4971618652344, "learning_rate": 1.8200000000000002e-06, "loss": 49.1372, "step": 9100 }, { "epoch": 0.018402776374956065, "grad_norm": 358.2091064453125, "learning_rate": 1.822e-06, "loss": 20.0141, "step": 9110 }, { "epoch": 0.01842297700763988, "grad_norm": 779.68798828125, "learning_rate": 1.8240000000000002e-06, "loss": 45.5779, "step": 9120 }, { "epoch": 0.018443177640323695, "grad_norm": 0.0, "learning_rate": 1.8260000000000002e-06, "loss": 20.6921, "step": 9130 }, { "epoch": 0.01846337827300751, "grad_norm": 762.357177734375, "learning_rate": 1.8280000000000001e-06, "loss": 26.0861, "step": 9140 }, { "epoch": 0.018483578905691325, "grad_norm": 776.2432250976562, "learning_rate": 1.83e-06, "loss": 44.6114, "step": 9150 }, { "epoch": 0.01850377953837514, "grad_norm": 442.65899658203125, "learning_rate": 1.8320000000000002e-06, "loss": 19.8247, "step": 9160 }, { "epoch": 0.018523980171058958, "grad_norm": 314.3350830078125, "learning_rate": 1.8340000000000001e-06, "loss": 23.1571, "step": 9170 }, { "epoch": 0.018544180803742774, "grad_norm": 207.27491760253906, "learning_rate": 1.8360000000000003e-06, "loss": 26.3099, "step": 9180 }, { "epoch": 0.018564381436426587, "grad_norm": 669.66845703125, "learning_rate": 1.838e-06, "loss": 28.7703, "step": 9190 }, { "epoch": 0.018584582069110404, "grad_norm": 817.0938720703125, "learning_rate": 1.8400000000000002e-06, "loss": 40.8719, "step": 9200 }, { "epoch": 0.01860478270179422, "grad_norm": 783.8439331054688, "learning_rate": 1.8420000000000001e-06, "loss": 30.5884, "step": 9210 }, { "epoch": 0.018624983334478037, "grad_norm": 205.8710174560547, "learning_rate": 1.8440000000000003e-06, "loss": 17.3456, "step": 9220 }, { "epoch": 0.01864518396716185, "grad_norm": 422.68719482421875, "learning_rate": 1.846e-06, "loss": 20.6496, "step": 9230 }, { "epoch": 0.018665384599845666, "grad_norm": 21.8781795501709, "learning_rate": 1.8480000000000001e-06, "loss": 14.5447, "step": 9240 }, { "epoch": 0.018685585232529483, "grad_norm": 801.7711791992188, "learning_rate": 1.85e-06, "loss": 23.7558, "step": 9250 }, { "epoch": 0.0187057858652133, "grad_norm": 279.67120361328125, "learning_rate": 1.8520000000000002e-06, "loss": 15.6061, "step": 9260 }, { "epoch": 0.018725986497897112, "grad_norm": 771.0630493164062, "learning_rate": 1.8540000000000002e-06, "loss": 17.1959, "step": 9270 }, { "epoch": 0.01874618713058093, "grad_norm": 674.9140014648438, "learning_rate": 1.856e-06, "loss": 28.1603, "step": 9280 }, { "epoch": 0.018766387763264746, "grad_norm": 703.7227172851562, "learning_rate": 1.858e-06, "loss": 31.4647, "step": 9290 }, { "epoch": 0.018786588395948562, "grad_norm": 923.1442260742188, "learning_rate": 1.8600000000000002e-06, "loss": 27.7094, "step": 9300 }, { "epoch": 0.018806789028632375, "grad_norm": 874.7780151367188, "learning_rate": 1.8620000000000001e-06, "loss": 40.6183, "step": 9310 }, { "epoch": 0.01882698966131619, "grad_norm": 526.9776611328125, "learning_rate": 1.8640000000000003e-06, "loss": 31.6663, "step": 9320 }, { "epoch": 0.018847190294000008, "grad_norm": 696.9180908203125, "learning_rate": 1.866e-06, "loss": 33.0455, "step": 9330 }, { "epoch": 0.018867390926683825, "grad_norm": 287.4581298828125, "learning_rate": 1.8680000000000002e-06, "loss": 28.8934, "step": 9340 }, { "epoch": 0.018887591559367638, "grad_norm": 158.7447967529297, "learning_rate": 1.87e-06, "loss": 11.3888, "step": 9350 }, { "epoch": 0.018907792192051454, "grad_norm": 465.53643798828125, "learning_rate": 1.8720000000000002e-06, "loss": 20.2694, "step": 9360 }, { "epoch": 0.01892799282473527, "grad_norm": 504.10064697265625, "learning_rate": 1.8740000000000002e-06, "loss": 36.4953, "step": 9370 }, { "epoch": 0.018948193457419087, "grad_norm": 873.6696166992188, "learning_rate": 1.8760000000000001e-06, "loss": 16.1505, "step": 9380 }, { "epoch": 0.0189683940901029, "grad_norm": 356.315673828125, "learning_rate": 1.878e-06, "loss": 15.7461, "step": 9390 }, { "epoch": 0.018988594722786717, "grad_norm": 898.4686279296875, "learning_rate": 1.8800000000000002e-06, "loss": 46.1245, "step": 9400 }, { "epoch": 0.019008795355470533, "grad_norm": 704.2838745117188, "learning_rate": 1.8820000000000001e-06, "loss": 39.3683, "step": 9410 }, { "epoch": 0.01902899598815435, "grad_norm": 163.83099365234375, "learning_rate": 1.8840000000000003e-06, "loss": 20.4353, "step": 9420 }, { "epoch": 0.019049196620838163, "grad_norm": 624.2174072265625, "learning_rate": 1.886e-06, "loss": 25.4072, "step": 9430 }, { "epoch": 0.01906939725352198, "grad_norm": 2548.589599609375, "learning_rate": 1.8880000000000002e-06, "loss": 27.8204, "step": 9440 }, { "epoch": 0.019089597886205796, "grad_norm": 302.43408203125, "learning_rate": 1.8900000000000001e-06, "loss": 17.2062, "step": 9450 }, { "epoch": 0.019109798518889613, "grad_norm": 615.1204833984375, "learning_rate": 1.8920000000000003e-06, "loss": 28.6013, "step": 9460 }, { "epoch": 0.019129999151573426, "grad_norm": 710.912109375, "learning_rate": 1.8940000000000002e-06, "loss": 29.2889, "step": 9470 }, { "epoch": 0.019150199784257242, "grad_norm": 620.056884765625, "learning_rate": 1.8960000000000001e-06, "loss": 30.7066, "step": 9480 }, { "epoch": 0.01917040041694106, "grad_norm": 858.3339233398438, "learning_rate": 1.898e-06, "loss": 34.2609, "step": 9490 }, { "epoch": 0.019190601049624875, "grad_norm": 787.121826171875, "learning_rate": 1.9000000000000002e-06, "loss": 26.5648, "step": 9500 }, { "epoch": 0.01921080168230869, "grad_norm": 248.5078582763672, "learning_rate": 1.9020000000000002e-06, "loss": 11.6471, "step": 9510 }, { "epoch": 0.019231002314992505, "grad_norm": 395.26434326171875, "learning_rate": 1.9040000000000003e-06, "loss": 37.8632, "step": 9520 }, { "epoch": 0.01925120294767632, "grad_norm": 353.8317565917969, "learning_rate": 1.906e-06, "loss": 36.6945, "step": 9530 }, { "epoch": 0.019271403580360138, "grad_norm": 422.2906188964844, "learning_rate": 1.908e-06, "loss": 26.6615, "step": 9540 }, { "epoch": 0.01929160421304395, "grad_norm": 740.6773681640625, "learning_rate": 1.9100000000000003e-06, "loss": 23.6173, "step": 9550 }, { "epoch": 0.019311804845727767, "grad_norm": 691.2985229492188, "learning_rate": 1.912e-06, "loss": 26.8522, "step": 9560 }, { "epoch": 0.019332005478411584, "grad_norm": 248.71804809570312, "learning_rate": 1.9140000000000002e-06, "loss": 16.9979, "step": 9570 }, { "epoch": 0.0193522061110954, "grad_norm": 195.49794006347656, "learning_rate": 1.916e-06, "loss": 19.1804, "step": 9580 }, { "epoch": 0.019372406743779214, "grad_norm": 0.0, "learning_rate": 1.918e-06, "loss": 26.4794, "step": 9590 }, { "epoch": 0.01939260737646303, "grad_norm": 1275.6842041015625, "learning_rate": 1.9200000000000003e-06, "loss": 16.8437, "step": 9600 }, { "epoch": 0.019412808009146847, "grad_norm": 1192.946533203125, "learning_rate": 1.9220000000000004e-06, "loss": 36.0156, "step": 9610 }, { "epoch": 0.019433008641830663, "grad_norm": 599.123291015625, "learning_rate": 1.924e-06, "loss": 15.0497, "step": 9620 }, { "epoch": 0.019453209274514476, "grad_norm": 386.57440185546875, "learning_rate": 1.9260000000000003e-06, "loss": 45.0766, "step": 9630 }, { "epoch": 0.019473409907198293, "grad_norm": 687.6180419921875, "learning_rate": 1.928e-06, "loss": 17.8253, "step": 9640 }, { "epoch": 0.01949361053988211, "grad_norm": 94.41118621826172, "learning_rate": 1.93e-06, "loss": 31.9916, "step": 9650 }, { "epoch": 0.019513811172565926, "grad_norm": 939.4404296875, "learning_rate": 1.9320000000000003e-06, "loss": 28.7754, "step": 9660 }, { "epoch": 0.01953401180524974, "grad_norm": 535.195556640625, "learning_rate": 1.934e-06, "loss": 18.8377, "step": 9670 }, { "epoch": 0.019554212437933555, "grad_norm": 1218.1092529296875, "learning_rate": 1.936e-06, "loss": 19.8742, "step": 9680 }, { "epoch": 0.019574413070617372, "grad_norm": 822.753173828125, "learning_rate": 1.9380000000000003e-06, "loss": 34.7596, "step": 9690 }, { "epoch": 0.01959461370330119, "grad_norm": 553.1561279296875, "learning_rate": 1.94e-06, "loss": 39.8842, "step": 9700 }, { "epoch": 0.019614814335985, "grad_norm": 689.9955444335938, "learning_rate": 1.942e-06, "loss": 16.5021, "step": 9710 }, { "epoch": 0.019635014968668818, "grad_norm": 5.810490608215332, "learning_rate": 1.944e-06, "loss": 11.6419, "step": 9720 }, { "epoch": 0.019655215601352635, "grad_norm": 772.7295532226562, "learning_rate": 1.946e-06, "loss": 28.1842, "step": 9730 }, { "epoch": 0.01967541623403645, "grad_norm": 1233.431884765625, "learning_rate": 1.9480000000000002e-06, "loss": 39.9902, "step": 9740 }, { "epoch": 0.019695616866720264, "grad_norm": 492.22119140625, "learning_rate": 1.9500000000000004e-06, "loss": 17.8986, "step": 9750 }, { "epoch": 0.01971581749940408, "grad_norm": 244.38047790527344, "learning_rate": 1.952e-06, "loss": 25.3728, "step": 9760 }, { "epoch": 0.019736018132087897, "grad_norm": 304.52117919921875, "learning_rate": 1.9540000000000003e-06, "loss": 28.4243, "step": 9770 }, { "epoch": 0.019756218764771714, "grad_norm": 309.2403869628906, "learning_rate": 1.956e-06, "loss": 14.3713, "step": 9780 }, { "epoch": 0.019776419397455527, "grad_norm": 609.8701171875, "learning_rate": 1.958e-06, "loss": 43.4776, "step": 9790 }, { "epoch": 0.019796620030139343, "grad_norm": 296.7516174316406, "learning_rate": 1.9600000000000003e-06, "loss": 35.1687, "step": 9800 }, { "epoch": 0.01981682066282316, "grad_norm": 873.6741943359375, "learning_rate": 1.9620000000000004e-06, "loss": 30.533, "step": 9810 }, { "epoch": 0.019837021295506976, "grad_norm": 610.7356567382812, "learning_rate": 1.964e-06, "loss": 23.3563, "step": 9820 }, { "epoch": 0.01985722192819079, "grad_norm": 465.5621643066406, "learning_rate": 1.9660000000000003e-06, "loss": 11.8833, "step": 9830 }, { "epoch": 0.019877422560874606, "grad_norm": 169.4474334716797, "learning_rate": 1.968e-06, "loss": 19.3861, "step": 9840 }, { "epoch": 0.019897623193558422, "grad_norm": 507.5028381347656, "learning_rate": 1.97e-06, "loss": 22.765, "step": 9850 }, { "epoch": 0.01991782382624224, "grad_norm": 836.77587890625, "learning_rate": 1.972e-06, "loss": 40.2573, "step": 9860 }, { "epoch": 0.019938024458926052, "grad_norm": 826.0131225585938, "learning_rate": 1.974e-06, "loss": 36.6529, "step": 9870 }, { "epoch": 0.01995822509160987, "grad_norm": 577.0784912109375, "learning_rate": 1.9760000000000002e-06, "loss": 35.453, "step": 9880 }, { "epoch": 0.019978425724293685, "grad_norm": 592.3372802734375, "learning_rate": 1.9780000000000004e-06, "loss": 26.5375, "step": 9890 }, { "epoch": 0.0199986263569775, "grad_norm": 272.7134704589844, "learning_rate": 1.98e-06, "loss": 19.0487, "step": 9900 }, { "epoch": 0.020018826989661315, "grad_norm": 732.1380004882812, "learning_rate": 1.982e-06, "loss": 13.1576, "step": 9910 }, { "epoch": 0.02003902762234513, "grad_norm": 972.0679931640625, "learning_rate": 1.984e-06, "loss": 21.3812, "step": 9920 }, { "epoch": 0.020059228255028948, "grad_norm": 374.1379089355469, "learning_rate": 1.986e-06, "loss": 27.0574, "step": 9930 }, { "epoch": 0.020079428887712764, "grad_norm": 441.6177062988281, "learning_rate": 1.9880000000000003e-06, "loss": 43.1995, "step": 9940 }, { "epoch": 0.020099629520396577, "grad_norm": 377.7627868652344, "learning_rate": 1.9900000000000004e-06, "loss": 27.2779, "step": 9950 }, { "epoch": 0.020119830153080394, "grad_norm": 265.7279357910156, "learning_rate": 1.992e-06, "loss": 17.3237, "step": 9960 }, { "epoch": 0.02014003078576421, "grad_norm": 997.96923828125, "learning_rate": 1.9940000000000003e-06, "loss": 34.881, "step": 9970 }, { "epoch": 0.020160231418448027, "grad_norm": 457.5382080078125, "learning_rate": 1.996e-06, "loss": 28.9901, "step": 9980 }, { "epoch": 0.02018043205113184, "grad_norm": 1992.1358642578125, "learning_rate": 1.998e-06, "loss": 48.9051, "step": 9990 }, { "epoch": 0.020200632683815656, "grad_norm": 551.3404541015625, "learning_rate": 2.0000000000000003e-06, "loss": 20.4688, "step": 10000 }, { "epoch": 0.020220833316499473, "grad_norm": 725.7215576171875, "learning_rate": 2.002e-06, "loss": 20.0534, "step": 10010 }, { "epoch": 0.02024103394918329, "grad_norm": 341.03387451171875, "learning_rate": 2.004e-06, "loss": 40.7382, "step": 10020 }, { "epoch": 0.020261234581867103, "grad_norm": 1009.0243530273438, "learning_rate": 2.0060000000000004e-06, "loss": 18.6586, "step": 10030 }, { "epoch": 0.02028143521455092, "grad_norm": 393.10284423828125, "learning_rate": 2.008e-06, "loss": 17.7199, "step": 10040 }, { "epoch": 0.020301635847234736, "grad_norm": 488.662353515625, "learning_rate": 2.0100000000000002e-06, "loss": 28.6032, "step": 10050 }, { "epoch": 0.020321836479918552, "grad_norm": 770.6610717773438, "learning_rate": 2.012e-06, "loss": 35.9281, "step": 10060 }, { "epoch": 0.020342037112602365, "grad_norm": 432.2557067871094, "learning_rate": 2.014e-06, "loss": 33.4646, "step": 10070 }, { "epoch": 0.020362237745286182, "grad_norm": 854.4219360351562, "learning_rate": 2.0160000000000003e-06, "loss": 31.1678, "step": 10080 }, { "epoch": 0.02038243837797, "grad_norm": 269.6304626464844, "learning_rate": 2.0180000000000004e-06, "loss": 13.6562, "step": 10090 }, { "epoch": 0.020402639010653815, "grad_norm": 388.16375732421875, "learning_rate": 2.02e-06, "loss": 33.808, "step": 10100 }, { "epoch": 0.020422839643337628, "grad_norm": 1145.10546875, "learning_rate": 2.022e-06, "loss": 39.483, "step": 10110 }, { "epoch": 0.020443040276021444, "grad_norm": 0.0, "learning_rate": 2.024e-06, "loss": 29.5741, "step": 10120 }, { "epoch": 0.02046324090870526, "grad_norm": 239.4766082763672, "learning_rate": 2.026e-06, "loss": 23.1077, "step": 10130 }, { "epoch": 0.020483441541389077, "grad_norm": 693.35888671875, "learning_rate": 2.0280000000000003e-06, "loss": 19.3912, "step": 10140 }, { "epoch": 0.02050364217407289, "grad_norm": 448.5995788574219, "learning_rate": 2.0300000000000005e-06, "loss": 37.3191, "step": 10150 }, { "epoch": 0.020523842806756707, "grad_norm": 717.5645141601562, "learning_rate": 2.032e-06, "loss": 37.5115, "step": 10160 }, { "epoch": 0.020544043439440524, "grad_norm": 232.80697631835938, "learning_rate": 2.0340000000000003e-06, "loss": 33.9487, "step": 10170 }, { "epoch": 0.02056424407212434, "grad_norm": 231.29507446289062, "learning_rate": 2.036e-06, "loss": 31.3771, "step": 10180 }, { "epoch": 0.020584444704808153, "grad_norm": 637.5938720703125, "learning_rate": 2.0380000000000002e-06, "loss": 30.6479, "step": 10190 }, { "epoch": 0.02060464533749197, "grad_norm": 358.1338195800781, "learning_rate": 2.04e-06, "loss": 15.479, "step": 10200 }, { "epoch": 0.020624845970175786, "grad_norm": 267.0400085449219, "learning_rate": 2.042e-06, "loss": 15.4971, "step": 10210 }, { "epoch": 0.020645046602859603, "grad_norm": 526.010498046875, "learning_rate": 2.0440000000000003e-06, "loss": 45.7432, "step": 10220 }, { "epoch": 0.020665247235543416, "grad_norm": 507.1458435058594, "learning_rate": 2.0460000000000004e-06, "loss": 12.3678, "step": 10230 }, { "epoch": 0.020685447868227232, "grad_norm": 257.6839904785156, "learning_rate": 2.048e-06, "loss": 32.5355, "step": 10240 }, { "epoch": 0.02070564850091105, "grad_norm": 573.19482421875, "learning_rate": 2.05e-06, "loss": 19.0711, "step": 10250 }, { "epoch": 0.020725849133594865, "grad_norm": 686.1099243164062, "learning_rate": 2.052e-06, "loss": 20.8575, "step": 10260 }, { "epoch": 0.02074604976627868, "grad_norm": 415.1260070800781, "learning_rate": 2.054e-06, "loss": 22.5357, "step": 10270 }, { "epoch": 0.020766250398962495, "grad_norm": 484.9674377441406, "learning_rate": 2.0560000000000003e-06, "loss": 19.3595, "step": 10280 }, { "epoch": 0.02078645103164631, "grad_norm": 957.904052734375, "learning_rate": 2.0580000000000005e-06, "loss": 19.152, "step": 10290 }, { "epoch": 0.020806651664330128, "grad_norm": 580.3505859375, "learning_rate": 2.06e-06, "loss": 24.9238, "step": 10300 }, { "epoch": 0.02082685229701394, "grad_norm": 253.50173950195312, "learning_rate": 2.062e-06, "loss": 19.7234, "step": 10310 }, { "epoch": 0.020847052929697758, "grad_norm": 451.6479797363281, "learning_rate": 2.064e-06, "loss": 22.097, "step": 10320 }, { "epoch": 0.020867253562381574, "grad_norm": 219.24774169921875, "learning_rate": 2.066e-06, "loss": 21.591, "step": 10330 }, { "epoch": 0.02088745419506539, "grad_norm": 557.32958984375, "learning_rate": 2.0680000000000004e-06, "loss": 27.9227, "step": 10340 }, { "epoch": 0.020907654827749204, "grad_norm": 778.280517578125, "learning_rate": 2.07e-06, "loss": 32.9145, "step": 10350 }, { "epoch": 0.02092785546043302, "grad_norm": 685.9678344726562, "learning_rate": 2.0720000000000002e-06, "loss": 25.987, "step": 10360 }, { "epoch": 0.020948056093116837, "grad_norm": 627.4102783203125, "learning_rate": 2.0740000000000004e-06, "loss": 37.0447, "step": 10370 }, { "epoch": 0.020968256725800653, "grad_norm": 560.6248779296875, "learning_rate": 2.076e-06, "loss": 23.6747, "step": 10380 }, { "epoch": 0.020988457358484466, "grad_norm": 563.7539672851562, "learning_rate": 2.0780000000000003e-06, "loss": 33.1649, "step": 10390 }, { "epoch": 0.021008657991168283, "grad_norm": 498.6136474609375, "learning_rate": 2.08e-06, "loss": 26.6104, "step": 10400 }, { "epoch": 0.0210288586238521, "grad_norm": 1427.0985107421875, "learning_rate": 2.082e-06, "loss": 21.6541, "step": 10410 }, { "epoch": 0.021049059256535916, "grad_norm": 595.067138671875, "learning_rate": 2.0840000000000003e-06, "loss": 25.8228, "step": 10420 }, { "epoch": 0.02106925988921973, "grad_norm": 0.0, "learning_rate": 2.0860000000000004e-06, "loss": 7.918, "step": 10430 }, { "epoch": 0.021089460521903546, "grad_norm": 484.4460754394531, "learning_rate": 2.088e-06, "loss": 31.523, "step": 10440 }, { "epoch": 0.021109661154587362, "grad_norm": 427.2276611328125, "learning_rate": 2.09e-06, "loss": 17.9014, "step": 10450 }, { "epoch": 0.02112986178727118, "grad_norm": 231.73727416992188, "learning_rate": 2.092e-06, "loss": 37.3303, "step": 10460 }, { "epoch": 0.02115006241995499, "grad_norm": 1066.0413818359375, "learning_rate": 2.094e-06, "loss": 39.7092, "step": 10470 }, { "epoch": 0.021170263052638808, "grad_norm": 564.856689453125, "learning_rate": 2.0960000000000003e-06, "loss": 28.4659, "step": 10480 }, { "epoch": 0.021190463685322625, "grad_norm": 888.2863159179688, "learning_rate": 2.098e-06, "loss": 21.1032, "step": 10490 }, { "epoch": 0.02121066431800644, "grad_norm": 589.4425048828125, "learning_rate": 2.1000000000000002e-06, "loss": 46.9519, "step": 10500 }, { "epoch": 0.021230864950690254, "grad_norm": 630.2435913085938, "learning_rate": 2.102e-06, "loss": 47.407, "step": 10510 }, { "epoch": 0.02125106558337407, "grad_norm": 972.0075073242188, "learning_rate": 2.104e-06, "loss": 29.9414, "step": 10520 }, { "epoch": 0.021271266216057887, "grad_norm": 317.7508850097656, "learning_rate": 2.1060000000000002e-06, "loss": 13.2156, "step": 10530 }, { "epoch": 0.021291466848741704, "grad_norm": 616.036376953125, "learning_rate": 2.108e-06, "loss": 44.4217, "step": 10540 }, { "epoch": 0.021311667481425517, "grad_norm": 833.261474609375, "learning_rate": 2.11e-06, "loss": 28.1556, "step": 10550 }, { "epoch": 0.021331868114109333, "grad_norm": 382.53973388671875, "learning_rate": 2.1120000000000003e-06, "loss": 33.0207, "step": 10560 }, { "epoch": 0.02135206874679315, "grad_norm": 557.8203125, "learning_rate": 2.1140000000000004e-06, "loss": 22.0744, "step": 10570 }, { "epoch": 0.021372269379476967, "grad_norm": 1072.9012451171875, "learning_rate": 2.116e-06, "loss": 33.3419, "step": 10580 }, { "epoch": 0.02139247001216078, "grad_norm": 883.43603515625, "learning_rate": 2.118e-06, "loss": 21.8559, "step": 10590 }, { "epoch": 0.021412670644844596, "grad_norm": 466.87884521484375, "learning_rate": 2.12e-06, "loss": 32.9837, "step": 10600 }, { "epoch": 0.021432871277528413, "grad_norm": 445.830078125, "learning_rate": 2.122e-06, "loss": 34.1535, "step": 10610 }, { "epoch": 0.02145307191021223, "grad_norm": 915.6752319335938, "learning_rate": 2.1240000000000003e-06, "loss": 25.3785, "step": 10620 }, { "epoch": 0.021473272542896042, "grad_norm": 135.380859375, "learning_rate": 2.1260000000000005e-06, "loss": 20.2444, "step": 10630 }, { "epoch": 0.02149347317557986, "grad_norm": 963.3563842773438, "learning_rate": 2.128e-06, "loss": 24.7575, "step": 10640 }, { "epoch": 0.021513673808263675, "grad_norm": 416.7027893066406, "learning_rate": 2.13e-06, "loss": 25.5095, "step": 10650 }, { "epoch": 0.021533874440947492, "grad_norm": 443.62103271484375, "learning_rate": 2.132e-06, "loss": 36.6364, "step": 10660 }, { "epoch": 0.021554075073631305, "grad_norm": 429.9660339355469, "learning_rate": 2.1340000000000002e-06, "loss": 24.7778, "step": 10670 }, { "epoch": 0.02157427570631512, "grad_norm": 238.11019897460938, "learning_rate": 2.1360000000000004e-06, "loss": 23.2085, "step": 10680 }, { "epoch": 0.021594476338998938, "grad_norm": 742.6220703125, "learning_rate": 2.138e-06, "loss": 29.7714, "step": 10690 }, { "epoch": 0.021614676971682754, "grad_norm": 488.2852478027344, "learning_rate": 2.1400000000000003e-06, "loss": 22.6061, "step": 10700 }, { "epoch": 0.021634877604366567, "grad_norm": 997.8129272460938, "learning_rate": 2.142e-06, "loss": 40.28, "step": 10710 }, { "epoch": 0.021655078237050384, "grad_norm": 178.46737670898438, "learning_rate": 2.144e-06, "loss": 36.7309, "step": 10720 }, { "epoch": 0.0216752788697342, "grad_norm": 1194.1746826171875, "learning_rate": 2.1460000000000003e-06, "loss": 27.8014, "step": 10730 }, { "epoch": 0.021695479502418017, "grad_norm": 482.2351379394531, "learning_rate": 2.148e-06, "loss": 34.1063, "step": 10740 }, { "epoch": 0.02171568013510183, "grad_norm": 520.8737182617188, "learning_rate": 2.15e-06, "loss": 30.059, "step": 10750 }, { "epoch": 0.021735880767785647, "grad_norm": 334.5901794433594, "learning_rate": 2.1520000000000003e-06, "loss": 31.9883, "step": 10760 }, { "epoch": 0.021756081400469463, "grad_norm": 340.0887451171875, "learning_rate": 2.1540000000000005e-06, "loss": 24.3153, "step": 10770 }, { "epoch": 0.02177628203315328, "grad_norm": 569.4065551757812, "learning_rate": 2.156e-06, "loss": 34.2532, "step": 10780 }, { "epoch": 0.021796482665837093, "grad_norm": 480.6165771484375, "learning_rate": 2.158e-06, "loss": 38.8058, "step": 10790 }, { "epoch": 0.02181668329852091, "grad_norm": 992.14013671875, "learning_rate": 2.16e-06, "loss": 32.2644, "step": 10800 }, { "epoch": 0.021836883931204726, "grad_norm": 267.1018981933594, "learning_rate": 2.1620000000000002e-06, "loss": 21.6186, "step": 10810 }, { "epoch": 0.021857084563888542, "grad_norm": 423.60675048828125, "learning_rate": 2.1640000000000004e-06, "loss": 43.1586, "step": 10820 }, { "epoch": 0.021877285196572355, "grad_norm": 331.4594421386719, "learning_rate": 2.166e-06, "loss": 21.7183, "step": 10830 }, { "epoch": 0.021897485829256172, "grad_norm": 931.9136962890625, "learning_rate": 2.1680000000000002e-06, "loss": 40.9695, "step": 10840 }, { "epoch": 0.02191768646193999, "grad_norm": 667.7154541015625, "learning_rate": 2.17e-06, "loss": 18.6281, "step": 10850 }, { "epoch": 0.021937887094623805, "grad_norm": 1308.722900390625, "learning_rate": 2.172e-06, "loss": 36.8308, "step": 10860 }, { "epoch": 0.021958087727307618, "grad_norm": 1057.197509765625, "learning_rate": 2.1740000000000003e-06, "loss": 27.0626, "step": 10870 }, { "epoch": 0.021978288359991435, "grad_norm": 206.7808074951172, "learning_rate": 2.176e-06, "loss": 35.9422, "step": 10880 }, { "epoch": 0.02199848899267525, "grad_norm": 162.13706970214844, "learning_rate": 2.178e-06, "loss": 40.699, "step": 10890 }, { "epoch": 0.022018689625359068, "grad_norm": 902.00634765625, "learning_rate": 2.1800000000000003e-06, "loss": 57.6747, "step": 10900 }, { "epoch": 0.02203889025804288, "grad_norm": 483.8782653808594, "learning_rate": 2.182e-06, "loss": 38.8847, "step": 10910 }, { "epoch": 0.022059090890726697, "grad_norm": 536.849609375, "learning_rate": 2.184e-06, "loss": 32.1737, "step": 10920 }, { "epoch": 0.022079291523410514, "grad_norm": 821.1912231445312, "learning_rate": 2.186e-06, "loss": 29.3968, "step": 10930 }, { "epoch": 0.02209949215609433, "grad_norm": 963.5684204101562, "learning_rate": 2.188e-06, "loss": 17.3173, "step": 10940 }, { "epoch": 0.022119692788778143, "grad_norm": 978.8640747070312, "learning_rate": 2.19e-06, "loss": 37.9862, "step": 10950 }, { "epoch": 0.02213989342146196, "grad_norm": 797.7247924804688, "learning_rate": 2.1920000000000004e-06, "loss": 23.9345, "step": 10960 }, { "epoch": 0.022160094054145776, "grad_norm": 209.15562438964844, "learning_rate": 2.194e-06, "loss": 31.8476, "step": 10970 }, { "epoch": 0.022180294686829593, "grad_norm": 993.0297241210938, "learning_rate": 2.1960000000000002e-06, "loss": 27.8631, "step": 10980 }, { "epoch": 0.022200495319513406, "grad_norm": 1262.285400390625, "learning_rate": 2.198e-06, "loss": 39.0367, "step": 10990 }, { "epoch": 0.022220695952197222, "grad_norm": 957.9039916992188, "learning_rate": 2.2e-06, "loss": 28.4492, "step": 11000 }, { "epoch": 0.02224089658488104, "grad_norm": 154.41236877441406, "learning_rate": 2.2020000000000003e-06, "loss": 15.0966, "step": 11010 }, { "epoch": 0.022261097217564856, "grad_norm": 237.81320190429688, "learning_rate": 2.2040000000000004e-06, "loss": 28.2412, "step": 11020 }, { "epoch": 0.02228129785024867, "grad_norm": 614.3018798828125, "learning_rate": 2.206e-06, "loss": 30.5096, "step": 11030 }, { "epoch": 0.022301498482932485, "grad_norm": 548.9668579101562, "learning_rate": 2.2080000000000003e-06, "loss": 24.7881, "step": 11040 }, { "epoch": 0.0223216991156163, "grad_norm": 600.4301147460938, "learning_rate": 2.21e-06, "loss": 19.2475, "step": 11050 }, { "epoch": 0.022341899748300118, "grad_norm": 439.44921875, "learning_rate": 2.212e-06, "loss": 25.5505, "step": 11060 }, { "epoch": 0.02236210038098393, "grad_norm": 764.2405395507812, "learning_rate": 2.2140000000000003e-06, "loss": 30.3772, "step": 11070 }, { "epoch": 0.022382301013667748, "grad_norm": 1142.8809814453125, "learning_rate": 2.216e-06, "loss": 32.3536, "step": 11080 }, { "epoch": 0.022402501646351564, "grad_norm": 686.0266723632812, "learning_rate": 2.218e-06, "loss": 43.1244, "step": 11090 }, { "epoch": 0.02242270227903538, "grad_norm": 450.30145263671875, "learning_rate": 2.2200000000000003e-06, "loss": 25.1819, "step": 11100 }, { "epoch": 0.022442902911719194, "grad_norm": 689.900390625, "learning_rate": 2.222e-06, "loss": 20.4868, "step": 11110 }, { "epoch": 0.02246310354440301, "grad_norm": 380.3027648925781, "learning_rate": 2.2240000000000002e-06, "loss": 35.7335, "step": 11120 }, { "epoch": 0.022483304177086827, "grad_norm": 398.4967956542969, "learning_rate": 2.226e-06, "loss": 14.332, "step": 11130 }, { "epoch": 0.022503504809770643, "grad_norm": 377.4896240234375, "learning_rate": 2.228e-06, "loss": 9.5262, "step": 11140 }, { "epoch": 0.022523705442454457, "grad_norm": 149.10630798339844, "learning_rate": 2.2300000000000002e-06, "loss": 19.6389, "step": 11150 }, { "epoch": 0.022543906075138273, "grad_norm": 207.62815856933594, "learning_rate": 2.2320000000000004e-06, "loss": 38.4066, "step": 11160 }, { "epoch": 0.02256410670782209, "grad_norm": 401.92633056640625, "learning_rate": 2.234e-06, "loss": 33.9303, "step": 11170 }, { "epoch": 0.022584307340505906, "grad_norm": 344.4341125488281, "learning_rate": 2.2360000000000003e-06, "loss": 26.8016, "step": 11180 }, { "epoch": 0.02260450797318972, "grad_norm": 724.0303955078125, "learning_rate": 2.238e-06, "loss": 23.6284, "step": 11190 }, { "epoch": 0.022624708605873536, "grad_norm": 679.6116333007812, "learning_rate": 2.24e-06, "loss": 25.3203, "step": 11200 }, { "epoch": 0.022644909238557352, "grad_norm": 656.9351806640625, "learning_rate": 2.2420000000000003e-06, "loss": 16.0256, "step": 11210 }, { "epoch": 0.02266510987124117, "grad_norm": 1038.72314453125, "learning_rate": 2.244e-06, "loss": 33.1557, "step": 11220 }, { "epoch": 0.022685310503924982, "grad_norm": 396.61016845703125, "learning_rate": 2.246e-06, "loss": 30.6392, "step": 11230 }, { "epoch": 0.0227055111366088, "grad_norm": 1008.8660888671875, "learning_rate": 2.2480000000000003e-06, "loss": 32.2001, "step": 11240 }, { "epoch": 0.022725711769292615, "grad_norm": 527.4194946289062, "learning_rate": 2.25e-06, "loss": 51.1306, "step": 11250 }, { "epoch": 0.02274591240197643, "grad_norm": 780.5740356445312, "learning_rate": 2.252e-06, "loss": 15.5578, "step": 11260 }, { "epoch": 0.022766113034660244, "grad_norm": 178.80038452148438, "learning_rate": 2.254e-06, "loss": 20.6808, "step": 11270 }, { "epoch": 0.02278631366734406, "grad_norm": 151.0431365966797, "learning_rate": 2.256e-06, "loss": 28.8247, "step": 11280 }, { "epoch": 0.022806514300027877, "grad_norm": 640.3284301757812, "learning_rate": 2.2580000000000002e-06, "loss": 30.0148, "step": 11290 }, { "epoch": 0.022826714932711694, "grad_norm": 1466.9593505859375, "learning_rate": 2.2600000000000004e-06, "loss": 32.5548, "step": 11300 }, { "epoch": 0.022846915565395507, "grad_norm": 141.0211639404297, "learning_rate": 2.262e-06, "loss": 24.7166, "step": 11310 }, { "epoch": 0.022867116198079324, "grad_norm": 375.2640075683594, "learning_rate": 2.2640000000000003e-06, "loss": 33.2573, "step": 11320 }, { "epoch": 0.02288731683076314, "grad_norm": 296.55914306640625, "learning_rate": 2.266e-06, "loss": 13.459, "step": 11330 }, { "epoch": 0.022907517463446957, "grad_norm": 572.7025756835938, "learning_rate": 2.268e-06, "loss": 28.0534, "step": 11340 }, { "epoch": 0.02292771809613077, "grad_norm": 154.33242797851562, "learning_rate": 2.2700000000000003e-06, "loss": 22.2405, "step": 11350 }, { "epoch": 0.022947918728814586, "grad_norm": 109.69579315185547, "learning_rate": 2.2720000000000004e-06, "loss": 17.0903, "step": 11360 }, { "epoch": 0.022968119361498403, "grad_norm": 554.3320922851562, "learning_rate": 2.274e-06, "loss": 31.1534, "step": 11370 }, { "epoch": 0.02298831999418222, "grad_norm": 1792.9173583984375, "learning_rate": 2.2760000000000003e-06, "loss": 40.4314, "step": 11380 }, { "epoch": 0.023008520626866032, "grad_norm": 451.7829895019531, "learning_rate": 2.278e-06, "loss": 32.3919, "step": 11390 }, { "epoch": 0.02302872125954985, "grad_norm": 329.4390563964844, "learning_rate": 2.28e-06, "loss": 16.4796, "step": 11400 }, { "epoch": 0.023048921892233665, "grad_norm": 1457.64208984375, "learning_rate": 2.282e-06, "loss": 33.6929, "step": 11410 }, { "epoch": 0.023069122524917482, "grad_norm": 706.9736938476562, "learning_rate": 2.284e-06, "loss": 19.0316, "step": 11420 }, { "epoch": 0.023089323157601295, "grad_norm": 41.44367218017578, "learning_rate": 2.2860000000000002e-06, "loss": 26.5766, "step": 11430 }, { "epoch": 0.02310952379028511, "grad_norm": 243.57997131347656, "learning_rate": 2.2880000000000004e-06, "loss": 19.6041, "step": 11440 }, { "epoch": 0.023129724422968928, "grad_norm": 826.9224853515625, "learning_rate": 2.29e-06, "loss": 36.845, "step": 11450 }, { "epoch": 0.023149925055652745, "grad_norm": 426.7275695800781, "learning_rate": 2.2920000000000002e-06, "loss": 12.6838, "step": 11460 }, { "epoch": 0.023170125688336558, "grad_norm": 825.5134887695312, "learning_rate": 2.294e-06, "loss": 31.3542, "step": 11470 }, { "epoch": 0.023190326321020374, "grad_norm": 189.77012634277344, "learning_rate": 2.296e-06, "loss": 22.3825, "step": 11480 }, { "epoch": 0.02321052695370419, "grad_norm": 769.2461547851562, "learning_rate": 2.2980000000000003e-06, "loss": 29.7142, "step": 11490 }, { "epoch": 0.023230727586388007, "grad_norm": 485.4276428222656, "learning_rate": 2.3000000000000004e-06, "loss": 30.7081, "step": 11500 }, { "epoch": 0.02325092821907182, "grad_norm": 922.8281860351562, "learning_rate": 2.302e-06, "loss": 41.9903, "step": 11510 }, { "epoch": 0.023271128851755637, "grad_norm": 1233.290283203125, "learning_rate": 2.3040000000000003e-06, "loss": 47.3183, "step": 11520 }, { "epoch": 0.023291329484439453, "grad_norm": 375.4042053222656, "learning_rate": 2.306e-06, "loss": 16.7128, "step": 11530 }, { "epoch": 0.02331153011712327, "grad_norm": 698.8474731445312, "learning_rate": 2.308e-06, "loss": 30.0822, "step": 11540 }, { "epoch": 0.023331730749807083, "grad_norm": 491.28009033203125, "learning_rate": 2.3100000000000003e-06, "loss": 20.6356, "step": 11550 }, { "epoch": 0.0233519313824909, "grad_norm": 284.2083740234375, "learning_rate": 2.312e-06, "loss": 29.2223, "step": 11560 }, { "epoch": 0.023372132015174716, "grad_norm": 423.8869323730469, "learning_rate": 2.314e-06, "loss": 26.4561, "step": 11570 }, { "epoch": 0.023392332647858533, "grad_norm": 66.48220825195312, "learning_rate": 2.3160000000000004e-06, "loss": 40.7723, "step": 11580 }, { "epoch": 0.023412533280542346, "grad_norm": 377.4560852050781, "learning_rate": 2.318e-06, "loss": 22.6986, "step": 11590 }, { "epoch": 0.023432733913226162, "grad_norm": 300.7611999511719, "learning_rate": 2.3200000000000002e-06, "loss": 34.0606, "step": 11600 }, { "epoch": 0.02345293454590998, "grad_norm": 524.640625, "learning_rate": 2.322e-06, "loss": 28.0742, "step": 11610 }, { "epoch": 0.023473135178593795, "grad_norm": 772.0435180664062, "learning_rate": 2.324e-06, "loss": 19.5605, "step": 11620 }, { "epoch": 0.023493335811277608, "grad_norm": 766.981201171875, "learning_rate": 2.3260000000000003e-06, "loss": 23.2606, "step": 11630 }, { "epoch": 0.023513536443961425, "grad_norm": 902.1786499023438, "learning_rate": 2.3280000000000004e-06, "loss": 36.3294, "step": 11640 }, { "epoch": 0.02353373707664524, "grad_norm": 569.1446533203125, "learning_rate": 2.33e-06, "loss": 23.0629, "step": 11650 }, { "epoch": 0.023553937709329058, "grad_norm": 199.93630981445312, "learning_rate": 2.3320000000000003e-06, "loss": 16.678, "step": 11660 }, { "epoch": 0.02357413834201287, "grad_norm": 188.4761505126953, "learning_rate": 2.334e-06, "loss": 37.7719, "step": 11670 }, { "epoch": 0.023594338974696687, "grad_norm": 416.677001953125, "learning_rate": 2.336e-06, "loss": 29.8293, "step": 11680 }, { "epoch": 0.023614539607380504, "grad_norm": 316.926513671875, "learning_rate": 2.3380000000000003e-06, "loss": 32.9385, "step": 11690 }, { "epoch": 0.02363474024006432, "grad_norm": 644.300048828125, "learning_rate": 2.3400000000000005e-06, "loss": 57.1091, "step": 11700 }, { "epoch": 0.023654940872748133, "grad_norm": 855.0596923828125, "learning_rate": 2.342e-06, "loss": 36.5547, "step": 11710 }, { "epoch": 0.02367514150543195, "grad_norm": 955.8478393554688, "learning_rate": 2.3440000000000003e-06, "loss": 32.1896, "step": 11720 }, { "epoch": 0.023695342138115767, "grad_norm": 567.6542358398438, "learning_rate": 2.346e-06, "loss": 21.2947, "step": 11730 }, { "epoch": 0.023715542770799583, "grad_norm": 826.7879638671875, "learning_rate": 2.3480000000000002e-06, "loss": 24.1162, "step": 11740 }, { "epoch": 0.023735743403483396, "grad_norm": 471.20343017578125, "learning_rate": 2.35e-06, "loss": 28.2968, "step": 11750 }, { "epoch": 0.023755944036167213, "grad_norm": 60.918678283691406, "learning_rate": 2.352e-06, "loss": 22.3088, "step": 11760 }, { "epoch": 0.02377614466885103, "grad_norm": 232.92462158203125, "learning_rate": 2.3540000000000002e-06, "loss": 24.5559, "step": 11770 }, { "epoch": 0.023796345301534846, "grad_norm": 991.3208618164062, "learning_rate": 2.3560000000000004e-06, "loss": 36.239, "step": 11780 }, { "epoch": 0.02381654593421866, "grad_norm": 2609.4541015625, "learning_rate": 2.358e-06, "loss": 54.3551, "step": 11790 }, { "epoch": 0.023836746566902475, "grad_norm": 1228.8785400390625, "learning_rate": 2.3600000000000003e-06, "loss": 37.026, "step": 11800 }, { "epoch": 0.023856947199586292, "grad_norm": 1025.142333984375, "learning_rate": 2.362e-06, "loss": 34.8462, "step": 11810 }, { "epoch": 0.02387714783227011, "grad_norm": 550.499755859375, "learning_rate": 2.364e-06, "loss": 28.3715, "step": 11820 }, { "epoch": 0.02389734846495392, "grad_norm": 510.32012939453125, "learning_rate": 2.3660000000000003e-06, "loss": 14.5547, "step": 11830 }, { "epoch": 0.023917549097637738, "grad_norm": 844.2149658203125, "learning_rate": 2.3680000000000005e-06, "loss": 13.7765, "step": 11840 }, { "epoch": 0.023937749730321554, "grad_norm": 225.61790466308594, "learning_rate": 2.37e-06, "loss": 20.3906, "step": 11850 }, { "epoch": 0.02395795036300537, "grad_norm": 483.1874694824219, "learning_rate": 2.3720000000000003e-06, "loss": 22.1851, "step": 11860 }, { "epoch": 0.023978150995689184, "grad_norm": 462.12750244140625, "learning_rate": 2.374e-06, "loss": 29.5714, "step": 11870 }, { "epoch": 0.023998351628373, "grad_norm": 297.9912109375, "learning_rate": 2.376e-06, "loss": 16.9498, "step": 11880 }, { "epoch": 0.024018552261056817, "grad_norm": 686.6878662109375, "learning_rate": 2.3780000000000004e-06, "loss": 35.8019, "step": 11890 }, { "epoch": 0.024038752893740634, "grad_norm": 796.0972290039062, "learning_rate": 2.38e-06, "loss": 22.4394, "step": 11900 }, { "epoch": 0.024058953526424447, "grad_norm": 1740.537841796875, "learning_rate": 2.3820000000000002e-06, "loss": 27.8693, "step": 11910 }, { "epoch": 0.024079154159108263, "grad_norm": 504.74810791015625, "learning_rate": 2.3840000000000004e-06, "loss": 25.0214, "step": 11920 }, { "epoch": 0.02409935479179208, "grad_norm": 456.0522766113281, "learning_rate": 2.386e-06, "loss": 40.904, "step": 11930 }, { "epoch": 0.024119555424475896, "grad_norm": 195.16200256347656, "learning_rate": 2.3880000000000003e-06, "loss": 16.5891, "step": 11940 }, { "epoch": 0.02413975605715971, "grad_norm": 634.192626953125, "learning_rate": 2.39e-06, "loss": 35.2282, "step": 11950 }, { "epoch": 0.024159956689843526, "grad_norm": 404.48834228515625, "learning_rate": 2.392e-06, "loss": 20.9582, "step": 11960 }, { "epoch": 0.024180157322527342, "grad_norm": 280.0358581542969, "learning_rate": 2.3940000000000003e-06, "loss": 12.3714, "step": 11970 }, { "epoch": 0.02420035795521116, "grad_norm": 537.2835083007812, "learning_rate": 2.3960000000000004e-06, "loss": 13.5036, "step": 11980 }, { "epoch": 0.024220558587894972, "grad_norm": 683.5922241210938, "learning_rate": 2.398e-06, "loss": 26.1619, "step": 11990 }, { "epoch": 0.02424075922057879, "grad_norm": 380.0113220214844, "learning_rate": 2.4000000000000003e-06, "loss": 23.8011, "step": 12000 }, { "epoch": 0.024260959853262605, "grad_norm": 320.9659118652344, "learning_rate": 2.402e-06, "loss": 26.1275, "step": 12010 }, { "epoch": 0.02428116048594642, "grad_norm": 239.40464782714844, "learning_rate": 2.404e-06, "loss": 22.1058, "step": 12020 }, { "epoch": 0.024301361118630235, "grad_norm": 488.48895263671875, "learning_rate": 2.4060000000000003e-06, "loss": 27.6829, "step": 12030 }, { "epoch": 0.02432156175131405, "grad_norm": 677.6295776367188, "learning_rate": 2.408e-06, "loss": 26.9782, "step": 12040 }, { "epoch": 0.024341762383997868, "grad_norm": 706.8380126953125, "learning_rate": 2.4100000000000002e-06, "loss": 36.5065, "step": 12050 }, { "epoch": 0.024361963016681684, "grad_norm": 887.48486328125, "learning_rate": 2.4120000000000004e-06, "loss": 25.6709, "step": 12060 }, { "epoch": 0.024382163649365497, "grad_norm": 584.556396484375, "learning_rate": 2.414e-06, "loss": 16.778, "step": 12070 }, { "epoch": 0.024402364282049314, "grad_norm": 341.5478210449219, "learning_rate": 2.4160000000000002e-06, "loss": 20.7892, "step": 12080 }, { "epoch": 0.02442256491473313, "grad_norm": 372.71002197265625, "learning_rate": 2.418e-06, "loss": 30.1106, "step": 12090 }, { "epoch": 0.024442765547416947, "grad_norm": 443.1629333496094, "learning_rate": 2.42e-06, "loss": 18.8971, "step": 12100 }, { "epoch": 0.02446296618010076, "grad_norm": 303.4381103515625, "learning_rate": 2.4220000000000003e-06, "loss": 28.22, "step": 12110 }, { "epoch": 0.024483166812784576, "grad_norm": 246.98573303222656, "learning_rate": 2.4240000000000004e-06, "loss": 16.5033, "step": 12120 }, { "epoch": 0.024503367445468393, "grad_norm": 21.39580535888672, "learning_rate": 2.426e-06, "loss": 35.4571, "step": 12130 }, { "epoch": 0.02452356807815221, "grad_norm": 454.01171875, "learning_rate": 2.428e-06, "loss": 32.0765, "step": 12140 }, { "epoch": 0.024543768710836023, "grad_norm": 529.3364868164062, "learning_rate": 2.43e-06, "loss": 23.7121, "step": 12150 }, { "epoch": 0.02456396934351984, "grad_norm": 1050.9063720703125, "learning_rate": 2.432e-06, "loss": 19.5663, "step": 12160 }, { "epoch": 0.024584169976203656, "grad_norm": 118.3066635131836, "learning_rate": 2.4340000000000003e-06, "loss": 34.4346, "step": 12170 }, { "epoch": 0.02460437060888747, "grad_norm": 412.7197570800781, "learning_rate": 2.4360000000000005e-06, "loss": 20.1538, "step": 12180 }, { "epoch": 0.024624571241571285, "grad_norm": 853.94384765625, "learning_rate": 2.438e-06, "loss": 36.14, "step": 12190 }, { "epoch": 0.0246447718742551, "grad_norm": 529.248291015625, "learning_rate": 2.4400000000000004e-06, "loss": 37.7406, "step": 12200 }, { "epoch": 0.024664972506938918, "grad_norm": 352.5622253417969, "learning_rate": 2.442e-06, "loss": 39.3457, "step": 12210 }, { "epoch": 0.02468517313962273, "grad_norm": 284.78564453125, "learning_rate": 2.4440000000000002e-06, "loss": 22.3211, "step": 12220 }, { "epoch": 0.024705373772306548, "grad_norm": 436.9090576171875, "learning_rate": 2.4460000000000004e-06, "loss": 71.9069, "step": 12230 }, { "epoch": 0.024725574404990364, "grad_norm": 496.68157958984375, "learning_rate": 2.448e-06, "loss": 23.0423, "step": 12240 }, { "epoch": 0.02474577503767418, "grad_norm": 768.3191528320312, "learning_rate": 2.4500000000000003e-06, "loss": 33.8757, "step": 12250 }, { "epoch": 0.024765975670357994, "grad_norm": 665.5794067382812, "learning_rate": 2.4520000000000004e-06, "loss": 24.1933, "step": 12260 }, { "epoch": 0.02478617630304181, "grad_norm": 678.7279663085938, "learning_rate": 2.454e-06, "loss": 23.873, "step": 12270 }, { "epoch": 0.024806376935725627, "grad_norm": 600.2070922851562, "learning_rate": 2.4560000000000003e-06, "loss": 39.3005, "step": 12280 }, { "epoch": 0.024826577568409443, "grad_norm": 462.02838134765625, "learning_rate": 2.458e-06, "loss": 40.3633, "step": 12290 }, { "epoch": 0.024846778201093257, "grad_norm": 276.6780700683594, "learning_rate": 2.46e-06, "loss": 20.4624, "step": 12300 }, { "epoch": 0.024866978833777073, "grad_norm": 259.2904357910156, "learning_rate": 2.4620000000000003e-06, "loss": 25.5969, "step": 12310 }, { "epoch": 0.02488717946646089, "grad_norm": 259.7370910644531, "learning_rate": 2.4640000000000005e-06, "loss": 36.7475, "step": 12320 }, { "epoch": 0.024907380099144706, "grad_norm": 374.19952392578125, "learning_rate": 2.466e-06, "loss": 35.6893, "step": 12330 }, { "epoch": 0.02492758073182852, "grad_norm": 353.00543212890625, "learning_rate": 2.468e-06, "loss": 23.8052, "step": 12340 }, { "epoch": 0.024947781364512336, "grad_norm": 1175.15771484375, "learning_rate": 2.47e-06, "loss": 28.6438, "step": 12350 }, { "epoch": 0.024967981997196152, "grad_norm": 258.8357849121094, "learning_rate": 2.4720000000000002e-06, "loss": 23.1932, "step": 12360 }, { "epoch": 0.02498818262987997, "grad_norm": 804.6754150390625, "learning_rate": 2.4740000000000004e-06, "loss": 34.3967, "step": 12370 }, { "epoch": 0.025008383262563782, "grad_norm": 720.6771850585938, "learning_rate": 2.476e-06, "loss": 20.0733, "step": 12380 }, { "epoch": 0.0250285838952476, "grad_norm": 1006.309326171875, "learning_rate": 2.4780000000000002e-06, "loss": 25.1847, "step": 12390 }, { "epoch": 0.025048784527931415, "grad_norm": 500.6513366699219, "learning_rate": 2.4800000000000004e-06, "loss": 42.6183, "step": 12400 }, { "epoch": 0.02506898516061523, "grad_norm": 500.9101867675781, "learning_rate": 2.482e-06, "loss": 19.1311, "step": 12410 }, { "epoch": 0.025089185793299044, "grad_norm": 4534.8544921875, "learning_rate": 2.4840000000000003e-06, "loss": 39.3053, "step": 12420 }, { "epoch": 0.02510938642598286, "grad_norm": 861.6122436523438, "learning_rate": 2.486e-06, "loss": 37.6291, "step": 12430 }, { "epoch": 0.025129587058666678, "grad_norm": 218.59344482421875, "learning_rate": 2.488e-06, "loss": 23.8732, "step": 12440 }, { "epoch": 0.025149787691350494, "grad_norm": 515.483642578125, "learning_rate": 2.4900000000000003e-06, "loss": 25.6926, "step": 12450 }, { "epoch": 0.025169988324034307, "grad_norm": 160.76766967773438, "learning_rate": 2.4920000000000005e-06, "loss": 26.8572, "step": 12460 }, { "epoch": 0.025190188956718124, "grad_norm": 256.6075744628906, "learning_rate": 2.494e-06, "loss": 24.3597, "step": 12470 }, { "epoch": 0.02521038958940194, "grad_norm": 380.9504699707031, "learning_rate": 2.496e-06, "loss": 22.8597, "step": 12480 }, { "epoch": 0.025230590222085757, "grad_norm": 1375.69189453125, "learning_rate": 2.498e-06, "loss": 41.9291, "step": 12490 }, { "epoch": 0.02525079085476957, "grad_norm": 86.29317474365234, "learning_rate": 2.5e-06, "loss": 38.4579, "step": 12500 }, { "epoch": 0.025270991487453386, "grad_norm": 166.09983825683594, "learning_rate": 2.502e-06, "loss": 18.8884, "step": 12510 }, { "epoch": 0.025291192120137203, "grad_norm": 528.3165283203125, "learning_rate": 2.5040000000000005e-06, "loss": 37.1205, "step": 12520 }, { "epoch": 0.02531139275282102, "grad_norm": 620.2875366210938, "learning_rate": 2.5060000000000002e-06, "loss": 43.7914, "step": 12530 }, { "epoch": 0.025331593385504832, "grad_norm": 1003.9613037109375, "learning_rate": 2.5080000000000004e-06, "loss": 39.7105, "step": 12540 }, { "epoch": 0.02535179401818865, "grad_norm": 1062.6812744140625, "learning_rate": 2.51e-06, "loss": 30.7475, "step": 12550 }, { "epoch": 0.025371994650872465, "grad_norm": 968.7026977539062, "learning_rate": 2.512e-06, "loss": 29.899, "step": 12560 }, { "epoch": 0.025392195283556282, "grad_norm": 599.2593383789062, "learning_rate": 2.5140000000000004e-06, "loss": 15.0186, "step": 12570 }, { "epoch": 0.025412395916240095, "grad_norm": 342.73529052734375, "learning_rate": 2.516e-06, "loss": 16.4279, "step": 12580 }, { "epoch": 0.02543259654892391, "grad_norm": 2192.99462890625, "learning_rate": 2.5180000000000003e-06, "loss": 47.0873, "step": 12590 }, { "epoch": 0.025452797181607728, "grad_norm": 663.18896484375, "learning_rate": 2.52e-06, "loss": 30.0429, "step": 12600 }, { "epoch": 0.025472997814291545, "grad_norm": 222.49244689941406, "learning_rate": 2.522e-06, "loss": 22.5173, "step": 12610 }, { "epoch": 0.025493198446975358, "grad_norm": 169.51467895507812, "learning_rate": 2.5240000000000003e-06, "loss": 23.6477, "step": 12620 }, { "epoch": 0.025513399079659174, "grad_norm": 774.110595703125, "learning_rate": 2.526e-06, "loss": 21.803, "step": 12630 }, { "epoch": 0.02553359971234299, "grad_norm": 936.8486328125, "learning_rate": 2.5280000000000006e-06, "loss": 40.9788, "step": 12640 }, { "epoch": 0.025553800345026807, "grad_norm": 490.3465270996094, "learning_rate": 2.5300000000000003e-06, "loss": 38.5848, "step": 12650 }, { "epoch": 0.02557400097771062, "grad_norm": 962.7054443359375, "learning_rate": 2.532e-06, "loss": 29.9581, "step": 12660 }, { "epoch": 0.025594201610394437, "grad_norm": 298.3330383300781, "learning_rate": 2.5340000000000002e-06, "loss": 21.7127, "step": 12670 }, { "epoch": 0.025614402243078253, "grad_norm": 382.0990905761719, "learning_rate": 2.536e-06, "loss": 36.149, "step": 12680 }, { "epoch": 0.02563460287576207, "grad_norm": 657.50390625, "learning_rate": 2.5380000000000005e-06, "loss": 32.5378, "step": 12690 }, { "epoch": 0.025654803508445883, "grad_norm": 448.74298095703125, "learning_rate": 2.5400000000000002e-06, "loss": 30.5162, "step": 12700 }, { "epoch": 0.0256750041411297, "grad_norm": 600.510498046875, "learning_rate": 2.542e-06, "loss": 25.7998, "step": 12710 }, { "epoch": 0.025695204773813516, "grad_norm": 712.5263061523438, "learning_rate": 2.5440000000000005e-06, "loss": 31.6482, "step": 12720 }, { "epoch": 0.025715405406497333, "grad_norm": 610.2012939453125, "learning_rate": 2.5460000000000003e-06, "loss": 22.5416, "step": 12730 }, { "epoch": 0.025735606039181146, "grad_norm": 494.62847900390625, "learning_rate": 2.5480000000000004e-06, "loss": 26.9063, "step": 12740 }, { "epoch": 0.025755806671864962, "grad_norm": 426.4195556640625, "learning_rate": 2.55e-06, "loss": 36.5014, "step": 12750 }, { "epoch": 0.02577600730454878, "grad_norm": 796.3436889648438, "learning_rate": 2.552e-06, "loss": 29.9939, "step": 12760 }, { "epoch": 0.025796207937232595, "grad_norm": 763.7683715820312, "learning_rate": 2.5540000000000004e-06, "loss": 46.7902, "step": 12770 }, { "epoch": 0.025816408569916408, "grad_norm": 1770.569580078125, "learning_rate": 2.556e-06, "loss": 42.3702, "step": 12780 }, { "epoch": 0.025836609202600225, "grad_norm": 337.1780090332031, "learning_rate": 2.5580000000000003e-06, "loss": 27.2789, "step": 12790 }, { "epoch": 0.02585680983528404, "grad_norm": 154.74667358398438, "learning_rate": 2.56e-06, "loss": 13.85, "step": 12800 }, { "epoch": 0.025877010467967858, "grad_norm": 567.01416015625, "learning_rate": 2.562e-06, "loss": 30.453, "step": 12810 }, { "epoch": 0.02589721110065167, "grad_norm": 228.02456665039062, "learning_rate": 2.5640000000000004e-06, "loss": 17.5226, "step": 12820 }, { "epoch": 0.025917411733335487, "grad_norm": 331.27764892578125, "learning_rate": 2.566e-06, "loss": 20.7665, "step": 12830 }, { "epoch": 0.025937612366019304, "grad_norm": 730.0822143554688, "learning_rate": 2.568e-06, "loss": 22.1979, "step": 12840 }, { "epoch": 0.02595781299870312, "grad_norm": 770.920654296875, "learning_rate": 2.5700000000000004e-06, "loss": 25.3876, "step": 12850 }, { "epoch": 0.025978013631386934, "grad_norm": 241.62933349609375, "learning_rate": 2.572e-06, "loss": 31.7164, "step": 12860 }, { "epoch": 0.02599821426407075, "grad_norm": 306.0582580566406, "learning_rate": 2.5740000000000003e-06, "loss": 34.9088, "step": 12870 }, { "epoch": 0.026018414896754567, "grad_norm": 659.4343872070312, "learning_rate": 2.576e-06, "loss": 37.9334, "step": 12880 }, { "epoch": 0.026038615529438383, "grad_norm": 1271.260986328125, "learning_rate": 2.578e-06, "loss": 43.3547, "step": 12890 }, { "epoch": 0.026058816162122196, "grad_norm": 942.1395263671875, "learning_rate": 2.5800000000000003e-06, "loss": 26.849, "step": 12900 }, { "epoch": 0.026079016794806013, "grad_norm": 775.384521484375, "learning_rate": 2.582e-06, "loss": 18.4199, "step": 12910 }, { "epoch": 0.02609921742748983, "grad_norm": 668.6947631835938, "learning_rate": 2.5840000000000006e-06, "loss": 25.0121, "step": 12920 }, { "epoch": 0.026119418060173646, "grad_norm": 288.4560546875, "learning_rate": 2.5860000000000003e-06, "loss": 28.1159, "step": 12930 }, { "epoch": 0.02613961869285746, "grad_norm": 372.0718688964844, "learning_rate": 2.588e-06, "loss": 17.4488, "step": 12940 }, { "epoch": 0.026159819325541275, "grad_norm": 392.7027893066406, "learning_rate": 2.59e-06, "loss": 35.8984, "step": 12950 }, { "epoch": 0.026180019958225092, "grad_norm": 491.2992248535156, "learning_rate": 2.592e-06, "loss": 29.4578, "step": 12960 }, { "epoch": 0.02620022059090891, "grad_norm": 556.0179443359375, "learning_rate": 2.5940000000000005e-06, "loss": 27.1833, "step": 12970 }, { "epoch": 0.02622042122359272, "grad_norm": 505.3508605957031, "learning_rate": 2.5960000000000002e-06, "loss": 21.2242, "step": 12980 }, { "epoch": 0.026240621856276538, "grad_norm": 477.8688049316406, "learning_rate": 2.598e-06, "loss": 32.3539, "step": 12990 }, { "epoch": 0.026260822488960354, "grad_norm": 437.2344970703125, "learning_rate": 2.6e-06, "loss": 31.2965, "step": 13000 }, { "epoch": 0.02628102312164417, "grad_norm": 441.76470947265625, "learning_rate": 2.6020000000000002e-06, "loss": 31.3963, "step": 13010 }, { "epoch": 0.026301223754327984, "grad_norm": 348.7921142578125, "learning_rate": 2.6040000000000004e-06, "loss": 26.3613, "step": 13020 }, { "epoch": 0.0263214243870118, "grad_norm": 863.64111328125, "learning_rate": 2.606e-06, "loss": 35.6339, "step": 13030 }, { "epoch": 0.026341625019695617, "grad_norm": 506.5108947753906, "learning_rate": 2.608e-06, "loss": 23.0733, "step": 13040 }, { "epoch": 0.026361825652379434, "grad_norm": 802.7584228515625, "learning_rate": 2.6100000000000004e-06, "loss": 41.8787, "step": 13050 }, { "epoch": 0.026382026285063247, "grad_norm": 1173.11279296875, "learning_rate": 2.612e-06, "loss": 32.2501, "step": 13060 }, { "epoch": 0.026402226917747063, "grad_norm": 412.14056396484375, "learning_rate": 2.6140000000000003e-06, "loss": 23.2096, "step": 13070 }, { "epoch": 0.02642242755043088, "grad_norm": 631.819091796875, "learning_rate": 2.616e-06, "loss": 36.8238, "step": 13080 }, { "epoch": 0.026442628183114696, "grad_norm": 1280.29345703125, "learning_rate": 2.618e-06, "loss": 19.2417, "step": 13090 }, { "epoch": 0.02646282881579851, "grad_norm": 919.0343017578125, "learning_rate": 2.6200000000000003e-06, "loss": 28.6367, "step": 13100 }, { "epoch": 0.026483029448482326, "grad_norm": 461.51678466796875, "learning_rate": 2.622e-06, "loss": 27.8108, "step": 13110 }, { "epoch": 0.026503230081166142, "grad_norm": 352.7019348144531, "learning_rate": 2.6240000000000006e-06, "loss": 24.0391, "step": 13120 }, { "epoch": 0.02652343071384996, "grad_norm": 114.39356994628906, "learning_rate": 2.6260000000000004e-06, "loss": 17.8535, "step": 13130 }, { "epoch": 0.026543631346533772, "grad_norm": 487.0043029785156, "learning_rate": 2.628e-06, "loss": 29.1434, "step": 13140 }, { "epoch": 0.02656383197921759, "grad_norm": 837.979736328125, "learning_rate": 2.6300000000000002e-06, "loss": 36.1627, "step": 13150 }, { "epoch": 0.026584032611901405, "grad_norm": 575.2749633789062, "learning_rate": 2.632e-06, "loss": 22.1858, "step": 13160 }, { "epoch": 0.02660423324458522, "grad_norm": 407.6467590332031, "learning_rate": 2.6340000000000005e-06, "loss": 22.8118, "step": 13170 }, { "epoch": 0.026624433877269035, "grad_norm": 386.979736328125, "learning_rate": 2.6360000000000003e-06, "loss": 17.3379, "step": 13180 }, { "epoch": 0.02664463450995285, "grad_norm": 484.42340087890625, "learning_rate": 2.638e-06, "loss": 26.2426, "step": 13190 }, { "epoch": 0.026664835142636668, "grad_norm": 538.97998046875, "learning_rate": 2.64e-06, "loss": 33.0226, "step": 13200 }, { "epoch": 0.026685035775320484, "grad_norm": 1009.0739135742188, "learning_rate": 2.6420000000000003e-06, "loss": 29.435, "step": 13210 }, { "epoch": 0.026705236408004297, "grad_norm": 333.44110107421875, "learning_rate": 2.6440000000000004e-06, "loss": 32.6955, "step": 13220 }, { "epoch": 0.026725437040688114, "grad_norm": 1260.854248046875, "learning_rate": 2.646e-06, "loss": 15.8343, "step": 13230 }, { "epoch": 0.02674563767337193, "grad_norm": 466.25958251953125, "learning_rate": 2.648e-06, "loss": 30.9633, "step": 13240 }, { "epoch": 0.026765838306055747, "grad_norm": 1081.712646484375, "learning_rate": 2.6500000000000005e-06, "loss": 46.0157, "step": 13250 }, { "epoch": 0.02678603893873956, "grad_norm": 134.25177001953125, "learning_rate": 2.652e-06, "loss": 33.3005, "step": 13260 }, { "epoch": 0.026806239571423376, "grad_norm": 711.9703979492188, "learning_rate": 2.6540000000000003e-06, "loss": 24.0814, "step": 13270 }, { "epoch": 0.026826440204107193, "grad_norm": 453.7552490234375, "learning_rate": 2.656e-06, "loss": 20.5775, "step": 13280 }, { "epoch": 0.02684664083679101, "grad_norm": 1155.71435546875, "learning_rate": 2.6580000000000002e-06, "loss": 24.0799, "step": 13290 }, { "epoch": 0.026866841469474823, "grad_norm": 453.0130615234375, "learning_rate": 2.6600000000000004e-06, "loss": 17.9642, "step": 13300 }, { "epoch": 0.02688704210215864, "grad_norm": 157.45489501953125, "learning_rate": 2.662e-06, "loss": 15.6941, "step": 13310 }, { "epoch": 0.026907242734842456, "grad_norm": 676.5046997070312, "learning_rate": 2.6640000000000007e-06, "loss": 35.0601, "step": 13320 }, { "epoch": 0.026927443367526272, "grad_norm": 5.334859848022461, "learning_rate": 2.6660000000000004e-06, "loss": 24.3542, "step": 13330 }, { "epoch": 0.026947644000210085, "grad_norm": 446.80706787109375, "learning_rate": 2.668e-06, "loss": 32.5696, "step": 13340 }, { "epoch": 0.0269678446328939, "grad_norm": 753.0740356445312, "learning_rate": 2.6700000000000003e-06, "loss": 19.3994, "step": 13350 }, { "epoch": 0.026988045265577718, "grad_norm": 378.42303466796875, "learning_rate": 2.672e-06, "loss": 25.0429, "step": 13360 }, { "epoch": 0.027008245898261535, "grad_norm": 59.517295837402344, "learning_rate": 2.6740000000000006e-06, "loss": 28.762, "step": 13370 }, { "epoch": 0.027028446530945348, "grad_norm": 177.29518127441406, "learning_rate": 2.6760000000000003e-06, "loss": 20.5681, "step": 13380 }, { "epoch": 0.027048647163629164, "grad_norm": 259.7153015136719, "learning_rate": 2.678e-06, "loss": 37.4309, "step": 13390 }, { "epoch": 0.02706884779631298, "grad_norm": 1043.8868408203125, "learning_rate": 2.68e-06, "loss": 38.3263, "step": 13400 }, { "epoch": 0.027089048428996797, "grad_norm": 252.46734619140625, "learning_rate": 2.6820000000000003e-06, "loss": 29.4893, "step": 13410 }, { "epoch": 0.02710924906168061, "grad_norm": 494.4878234863281, "learning_rate": 2.6840000000000005e-06, "loss": 32.0994, "step": 13420 }, { "epoch": 0.027129449694364427, "grad_norm": 590.9215087890625, "learning_rate": 2.686e-06, "loss": 25.1901, "step": 13430 }, { "epoch": 0.027149650327048244, "grad_norm": 551.9838256835938, "learning_rate": 2.688e-06, "loss": 32.5244, "step": 13440 }, { "epoch": 0.02716985095973206, "grad_norm": 517.9137573242188, "learning_rate": 2.6900000000000005e-06, "loss": 30.3936, "step": 13450 }, { "epoch": 0.027190051592415873, "grad_norm": 737.2365112304688, "learning_rate": 2.6920000000000002e-06, "loss": 47.1574, "step": 13460 }, { "epoch": 0.02721025222509969, "grad_norm": 670.0670776367188, "learning_rate": 2.694e-06, "loss": 59.7301, "step": 13470 }, { "epoch": 0.027230452857783506, "grad_norm": 657.5059204101562, "learning_rate": 2.696e-06, "loss": 12.9818, "step": 13480 }, { "epoch": 0.027250653490467323, "grad_norm": 977.0647583007812, "learning_rate": 2.6980000000000003e-06, "loss": 40.2026, "step": 13490 }, { "epoch": 0.027270854123151136, "grad_norm": 1071.2728271484375, "learning_rate": 2.7000000000000004e-06, "loss": 46.9622, "step": 13500 }, { "epoch": 0.027291054755834952, "grad_norm": 769.448974609375, "learning_rate": 2.702e-06, "loss": 25.3011, "step": 13510 }, { "epoch": 0.02731125538851877, "grad_norm": 624.0108032226562, "learning_rate": 2.704e-06, "loss": 33.9974, "step": 13520 }, { "epoch": 0.027331456021202585, "grad_norm": 886.1692504882812, "learning_rate": 2.7060000000000004e-06, "loss": 20.8574, "step": 13530 }, { "epoch": 0.0273516566538864, "grad_norm": 345.9872131347656, "learning_rate": 2.708e-06, "loss": 27.7531, "step": 13540 }, { "epoch": 0.027371857286570215, "grad_norm": 443.1385192871094, "learning_rate": 2.7100000000000003e-06, "loss": 34.9584, "step": 13550 }, { "epoch": 0.02739205791925403, "grad_norm": 355.1740417480469, "learning_rate": 2.712e-06, "loss": 27.923, "step": 13560 }, { "epoch": 0.027412258551937848, "grad_norm": 915.899169921875, "learning_rate": 2.7139999999999998e-06, "loss": 36.5688, "step": 13570 }, { "epoch": 0.02743245918462166, "grad_norm": 1138.944091796875, "learning_rate": 2.7160000000000003e-06, "loss": 21.1642, "step": 13580 }, { "epoch": 0.027452659817305478, "grad_norm": 641.07080078125, "learning_rate": 2.718e-06, "loss": 46.8754, "step": 13590 }, { "epoch": 0.027472860449989294, "grad_norm": 1295.2271728515625, "learning_rate": 2.7200000000000002e-06, "loss": 32.1504, "step": 13600 }, { "epoch": 0.02749306108267311, "grad_norm": 575.6077270507812, "learning_rate": 2.7220000000000004e-06, "loss": 31.1421, "step": 13610 }, { "epoch": 0.027513261715356924, "grad_norm": 651.1995239257812, "learning_rate": 2.724e-06, "loss": 21.735, "step": 13620 }, { "epoch": 0.02753346234804074, "grad_norm": 384.02783203125, "learning_rate": 2.7260000000000002e-06, "loss": 30.4357, "step": 13630 }, { "epoch": 0.027553662980724557, "grad_norm": 283.46966552734375, "learning_rate": 2.728e-06, "loss": 25.7516, "step": 13640 }, { "epoch": 0.027573863613408373, "grad_norm": 254.54684448242188, "learning_rate": 2.7300000000000005e-06, "loss": 19.1426, "step": 13650 }, { "epoch": 0.027594064246092186, "grad_norm": 926.5336303710938, "learning_rate": 2.7320000000000003e-06, "loss": 33.8098, "step": 13660 }, { "epoch": 0.027614264878776003, "grad_norm": 769.6925048828125, "learning_rate": 2.734e-06, "loss": 19.3071, "step": 13670 }, { "epoch": 0.02763446551145982, "grad_norm": 402.8377380371094, "learning_rate": 2.736e-06, "loss": 21.2851, "step": 13680 }, { "epoch": 0.027654666144143636, "grad_norm": 491.4398193359375, "learning_rate": 2.7380000000000003e-06, "loss": 34.1278, "step": 13690 }, { "epoch": 0.02767486677682745, "grad_norm": 312.103759765625, "learning_rate": 2.7400000000000004e-06, "loss": 32.734, "step": 13700 }, { "epoch": 0.027695067409511265, "grad_norm": 529.0984497070312, "learning_rate": 2.742e-06, "loss": 15.2163, "step": 13710 }, { "epoch": 0.027715268042195082, "grad_norm": 734.2744750976562, "learning_rate": 2.744e-06, "loss": 32.0383, "step": 13720 }, { "epoch": 0.0277354686748789, "grad_norm": 937.9981079101562, "learning_rate": 2.7460000000000005e-06, "loss": 32.932, "step": 13730 }, { "epoch": 0.02775566930756271, "grad_norm": 1043.4871826171875, "learning_rate": 2.748e-06, "loss": 24.3559, "step": 13740 }, { "epoch": 0.027775869940246528, "grad_norm": 875.7940063476562, "learning_rate": 2.7500000000000004e-06, "loss": 31.5296, "step": 13750 }, { "epoch": 0.027796070572930345, "grad_norm": 649.774169921875, "learning_rate": 2.752e-06, "loss": 40.3713, "step": 13760 }, { "epoch": 0.02781627120561416, "grad_norm": 550.9602661132812, "learning_rate": 2.754e-06, "loss": 51.3406, "step": 13770 }, { "epoch": 0.027836471838297974, "grad_norm": 777.0423583984375, "learning_rate": 2.7560000000000004e-06, "loss": 31.8327, "step": 13780 }, { "epoch": 0.02785667247098179, "grad_norm": 448.1114807128906, "learning_rate": 2.758e-06, "loss": 17.6592, "step": 13790 }, { "epoch": 0.027876873103665607, "grad_norm": 725.5538330078125, "learning_rate": 2.7600000000000003e-06, "loss": 26.8034, "step": 13800 }, { "epoch": 0.027897073736349424, "grad_norm": 722.1695556640625, "learning_rate": 2.7620000000000004e-06, "loss": 34.467, "step": 13810 }, { "epoch": 0.027917274369033237, "grad_norm": 517.4332885742188, "learning_rate": 2.764e-06, "loss": 15.6391, "step": 13820 }, { "epoch": 0.027937475001717053, "grad_norm": 458.1663818359375, "learning_rate": 2.7660000000000003e-06, "loss": 36.6212, "step": 13830 }, { "epoch": 0.02795767563440087, "grad_norm": 411.2251892089844, "learning_rate": 2.768e-06, "loss": 21.7761, "step": 13840 }, { "epoch": 0.027977876267084686, "grad_norm": 364.0091552734375, "learning_rate": 2.7700000000000006e-06, "loss": 22.7303, "step": 13850 }, { "epoch": 0.0279980768997685, "grad_norm": 614.5543212890625, "learning_rate": 2.7720000000000003e-06, "loss": 21.6346, "step": 13860 }, { "epoch": 0.028018277532452316, "grad_norm": 713.9307861328125, "learning_rate": 2.774e-06, "loss": 23.9566, "step": 13870 }, { "epoch": 0.028038478165136133, "grad_norm": 884.5711669921875, "learning_rate": 2.776e-06, "loss": 26.8261, "step": 13880 }, { "epoch": 0.02805867879781995, "grad_norm": 616.1358642578125, "learning_rate": 2.7780000000000003e-06, "loss": 27.573, "step": 13890 }, { "epoch": 0.028078879430503762, "grad_norm": 491.07427978515625, "learning_rate": 2.7800000000000005e-06, "loss": 24.4312, "step": 13900 }, { "epoch": 0.02809908006318758, "grad_norm": 291.7569580078125, "learning_rate": 2.7820000000000002e-06, "loss": 16.2176, "step": 13910 }, { "epoch": 0.028119280695871395, "grad_norm": 719.0975341796875, "learning_rate": 2.784e-06, "loss": 24.8783, "step": 13920 }, { "epoch": 0.02813948132855521, "grad_norm": 247.43075561523438, "learning_rate": 2.7860000000000005e-06, "loss": 12.6267, "step": 13930 }, { "epoch": 0.028159681961239025, "grad_norm": 0.0, "learning_rate": 2.7880000000000002e-06, "loss": 21.0015, "step": 13940 }, { "epoch": 0.02817988259392284, "grad_norm": 683.0343627929688, "learning_rate": 2.7900000000000004e-06, "loss": 36.7794, "step": 13950 }, { "epoch": 0.028200083226606658, "grad_norm": 2201.24267578125, "learning_rate": 2.792e-06, "loss": 26.4449, "step": 13960 }, { "epoch": 0.028220283859290474, "grad_norm": 907.6283569335938, "learning_rate": 2.794e-06, "loss": 24.2639, "step": 13970 }, { "epoch": 0.028240484491974287, "grad_norm": 647.8742065429688, "learning_rate": 2.7960000000000004e-06, "loss": 26.6286, "step": 13980 }, { "epoch": 0.028260685124658104, "grad_norm": 434.18438720703125, "learning_rate": 2.798e-06, "loss": 20.2479, "step": 13990 }, { "epoch": 0.02828088575734192, "grad_norm": 572.0900268554688, "learning_rate": 2.8000000000000003e-06, "loss": 29.5283, "step": 14000 }, { "epoch": 0.028301086390025737, "grad_norm": 222.71372985839844, "learning_rate": 2.8020000000000004e-06, "loss": 31.4689, "step": 14010 }, { "epoch": 0.02832128702270955, "grad_norm": 832.9528198242188, "learning_rate": 2.804e-06, "loss": 27.296, "step": 14020 }, { "epoch": 0.028341487655393367, "grad_norm": 322.5573425292969, "learning_rate": 2.8060000000000003e-06, "loss": 41.7819, "step": 14030 }, { "epoch": 0.028361688288077183, "grad_norm": 436.2952575683594, "learning_rate": 2.808e-06, "loss": 31.179, "step": 14040 }, { "epoch": 0.028381888920761, "grad_norm": 245.27647399902344, "learning_rate": 2.8100000000000006e-06, "loss": 27.0722, "step": 14050 }, { "epoch": 0.028402089553444813, "grad_norm": 266.8973693847656, "learning_rate": 2.8120000000000004e-06, "loss": 18.3411, "step": 14060 }, { "epoch": 0.02842229018612863, "grad_norm": 362.5631103515625, "learning_rate": 2.814e-06, "loss": 30.4403, "step": 14070 }, { "epoch": 0.028442490818812446, "grad_norm": 571.2347412109375, "learning_rate": 2.8160000000000002e-06, "loss": 21.4641, "step": 14080 }, { "epoch": 0.028462691451496262, "grad_norm": 935.2764282226562, "learning_rate": 2.8180000000000004e-06, "loss": 29.7431, "step": 14090 }, { "epoch": 0.028482892084180075, "grad_norm": 447.9610290527344, "learning_rate": 2.82e-06, "loss": 36.1941, "step": 14100 }, { "epoch": 0.028503092716863892, "grad_norm": 363.7928771972656, "learning_rate": 2.8220000000000003e-06, "loss": 26.6371, "step": 14110 }, { "epoch": 0.02852329334954771, "grad_norm": 419.3346252441406, "learning_rate": 2.824e-06, "loss": 34.1951, "step": 14120 }, { "epoch": 0.028543493982231525, "grad_norm": 234.77806091308594, "learning_rate": 2.8260000000000006e-06, "loss": 9.7577, "step": 14130 }, { "epoch": 0.028563694614915338, "grad_norm": 385.75970458984375, "learning_rate": 2.8280000000000003e-06, "loss": 31.3418, "step": 14140 }, { "epoch": 0.028583895247599155, "grad_norm": 192.51187133789062, "learning_rate": 2.83e-06, "loss": 37.0611, "step": 14150 }, { "epoch": 0.02860409588028297, "grad_norm": 845.19921875, "learning_rate": 2.832e-06, "loss": 15.7974, "step": 14160 }, { "epoch": 0.028624296512966788, "grad_norm": 13.857260704040527, "learning_rate": 2.834e-06, "loss": 25.4985, "step": 14170 }, { "epoch": 0.0286444971456506, "grad_norm": 798.99560546875, "learning_rate": 2.8360000000000005e-06, "loss": 27.4345, "step": 14180 }, { "epoch": 0.028664697778334417, "grad_norm": 1094.974853515625, "learning_rate": 2.838e-06, "loss": 32.5372, "step": 14190 }, { "epoch": 0.028684898411018234, "grad_norm": 494.9541015625, "learning_rate": 2.84e-06, "loss": 39.5633, "step": 14200 }, { "epoch": 0.02870509904370205, "grad_norm": 525.530029296875, "learning_rate": 2.8420000000000005e-06, "loss": 26.2091, "step": 14210 }, { "epoch": 0.028725299676385863, "grad_norm": 350.001708984375, "learning_rate": 2.8440000000000002e-06, "loss": 23.9767, "step": 14220 }, { "epoch": 0.02874550030906968, "grad_norm": 285.36102294921875, "learning_rate": 2.8460000000000004e-06, "loss": 38.4315, "step": 14230 }, { "epoch": 0.028765700941753496, "grad_norm": 234.65621948242188, "learning_rate": 2.848e-06, "loss": 27.0473, "step": 14240 }, { "epoch": 0.028785901574437313, "grad_norm": 1140.964599609375, "learning_rate": 2.85e-06, "loss": 32.4249, "step": 14250 }, { "epoch": 0.028806102207121126, "grad_norm": 476.35528564453125, "learning_rate": 2.8520000000000004e-06, "loss": 11.1549, "step": 14260 }, { "epoch": 0.028826302839804942, "grad_norm": 312.71795654296875, "learning_rate": 2.854e-06, "loss": 23.1731, "step": 14270 }, { "epoch": 0.02884650347248876, "grad_norm": 307.3836364746094, "learning_rate": 2.8560000000000003e-06, "loss": 47.9001, "step": 14280 }, { "epoch": 0.028866704105172575, "grad_norm": 635.443603515625, "learning_rate": 2.8580000000000004e-06, "loss": 45.4959, "step": 14290 }, { "epoch": 0.02888690473785639, "grad_norm": 976.892333984375, "learning_rate": 2.86e-06, "loss": 43.5277, "step": 14300 }, { "epoch": 0.028907105370540205, "grad_norm": 691.7721557617188, "learning_rate": 2.8620000000000003e-06, "loss": 35.6253, "step": 14310 }, { "epoch": 0.02892730600322402, "grad_norm": 777.8397827148438, "learning_rate": 2.864e-06, "loss": 40.4026, "step": 14320 }, { "epoch": 0.028947506635907838, "grad_norm": 521.339111328125, "learning_rate": 2.8660000000000006e-06, "loss": 15.8446, "step": 14330 }, { "epoch": 0.02896770726859165, "grad_norm": 925.6652221679688, "learning_rate": 2.8680000000000003e-06, "loss": 30.4705, "step": 14340 }, { "epoch": 0.028987907901275468, "grad_norm": 591.0034790039062, "learning_rate": 2.87e-06, "loss": 30.5676, "step": 14350 }, { "epoch": 0.029008108533959284, "grad_norm": 3101.965087890625, "learning_rate": 2.872e-06, "loss": 49.2806, "step": 14360 }, { "epoch": 0.0290283091666431, "grad_norm": 438.4816589355469, "learning_rate": 2.874e-06, "loss": 30.102, "step": 14370 }, { "epoch": 0.029048509799326914, "grad_norm": 684.9572143554688, "learning_rate": 2.8760000000000005e-06, "loss": 32.759, "step": 14380 }, { "epoch": 0.02906871043201073, "grad_norm": 810.0599365234375, "learning_rate": 2.8780000000000002e-06, "loss": 17.7539, "step": 14390 }, { "epoch": 0.029088911064694547, "grad_norm": 1185.997802734375, "learning_rate": 2.88e-06, "loss": 33.1232, "step": 14400 }, { "epoch": 0.029109111697378363, "grad_norm": 452.52032470703125, "learning_rate": 2.8820000000000005e-06, "loss": 17.2496, "step": 14410 }, { "epoch": 0.029129312330062176, "grad_norm": 801.4060668945312, "learning_rate": 2.8840000000000003e-06, "loss": 24.6073, "step": 14420 }, { "epoch": 0.029149512962745993, "grad_norm": 243.73663330078125, "learning_rate": 2.8860000000000004e-06, "loss": 19.0073, "step": 14430 }, { "epoch": 0.02916971359542981, "grad_norm": 244.5675506591797, "learning_rate": 2.888e-06, "loss": 32.802, "step": 14440 }, { "epoch": 0.029189914228113626, "grad_norm": 700.1507568359375, "learning_rate": 2.89e-06, "loss": 29.7115, "step": 14450 }, { "epoch": 0.02921011486079744, "grad_norm": 291.5823059082031, "learning_rate": 2.8920000000000004e-06, "loss": 26.6881, "step": 14460 }, { "epoch": 0.029230315493481256, "grad_norm": 175.30331420898438, "learning_rate": 2.894e-06, "loss": 17.5004, "step": 14470 }, { "epoch": 0.029250516126165072, "grad_norm": 677.0678100585938, "learning_rate": 2.8960000000000003e-06, "loss": 28.2859, "step": 14480 }, { "epoch": 0.02927071675884889, "grad_norm": 1238.733154296875, "learning_rate": 2.8980000000000005e-06, "loss": 36.7792, "step": 14490 }, { "epoch": 0.029290917391532702, "grad_norm": 314.10546875, "learning_rate": 2.9e-06, "loss": 29.563, "step": 14500 }, { "epoch": 0.02931111802421652, "grad_norm": 455.9222717285156, "learning_rate": 2.9020000000000003e-06, "loss": 21.9228, "step": 14510 }, { "epoch": 0.029331318656900335, "grad_norm": 793.5556640625, "learning_rate": 2.904e-06, "loss": 17.6044, "step": 14520 }, { "epoch": 0.02935151928958415, "grad_norm": 481.8849182128906, "learning_rate": 2.9060000000000006e-06, "loss": 19.1541, "step": 14530 }, { "epoch": 0.029371719922267964, "grad_norm": 0.0, "learning_rate": 2.9080000000000004e-06, "loss": 20.9598, "step": 14540 }, { "epoch": 0.02939192055495178, "grad_norm": 744.2693481445312, "learning_rate": 2.91e-06, "loss": 31.1545, "step": 14550 }, { "epoch": 0.029412121187635597, "grad_norm": 337.4864196777344, "learning_rate": 2.9120000000000002e-06, "loss": 27.8448, "step": 14560 }, { "epoch": 0.029432321820319414, "grad_norm": 710.1856689453125, "learning_rate": 2.914e-06, "loss": 21.5089, "step": 14570 }, { "epoch": 0.029452522453003227, "grad_norm": 391.34686279296875, "learning_rate": 2.9160000000000005e-06, "loss": 45.8425, "step": 14580 }, { "epoch": 0.029472723085687044, "grad_norm": 736.681396484375, "learning_rate": 2.9180000000000003e-06, "loss": 20.4341, "step": 14590 }, { "epoch": 0.02949292371837086, "grad_norm": 806.5769653320312, "learning_rate": 2.92e-06, "loss": 22.0079, "step": 14600 }, { "epoch": 0.029513124351054677, "grad_norm": 203.32749938964844, "learning_rate": 2.9220000000000006e-06, "loss": 28.6116, "step": 14610 }, { "epoch": 0.02953332498373849, "grad_norm": 837.3760375976562, "learning_rate": 2.9240000000000003e-06, "loss": 29.4047, "step": 14620 }, { "epoch": 0.029553525616422306, "grad_norm": 671.6912841796875, "learning_rate": 2.9260000000000004e-06, "loss": 44.9515, "step": 14630 }, { "epoch": 0.029573726249106123, "grad_norm": 722.2235107421875, "learning_rate": 2.928e-06, "loss": 17.3497, "step": 14640 }, { "epoch": 0.02959392688178994, "grad_norm": 1134.066162109375, "learning_rate": 2.93e-06, "loss": 25.729, "step": 14650 }, { "epoch": 0.029614127514473752, "grad_norm": 6.874101638793945, "learning_rate": 2.9320000000000005e-06, "loss": 31.8318, "step": 14660 }, { "epoch": 0.02963432814715757, "grad_norm": 810.16845703125, "learning_rate": 2.934e-06, "loss": 26.2838, "step": 14670 }, { "epoch": 0.029654528779841385, "grad_norm": 908.3150024414062, "learning_rate": 2.9360000000000003e-06, "loss": 37.4466, "step": 14680 }, { "epoch": 0.029674729412525202, "grad_norm": 996.0183715820312, "learning_rate": 2.9380000000000005e-06, "loss": 27.5585, "step": 14690 }, { "epoch": 0.029694930045209015, "grad_norm": 613.7411499023438, "learning_rate": 2.9400000000000002e-06, "loss": 15.2366, "step": 14700 }, { "epoch": 0.02971513067789283, "grad_norm": 0.0, "learning_rate": 2.9420000000000004e-06, "loss": 10.9313, "step": 14710 }, { "epoch": 0.029735331310576648, "grad_norm": 396.75531005859375, "learning_rate": 2.944e-06, "loss": 32.1213, "step": 14720 }, { "epoch": 0.029755531943260465, "grad_norm": 1955.3218994140625, "learning_rate": 2.946e-06, "loss": 19.4884, "step": 14730 }, { "epoch": 0.029775732575944278, "grad_norm": 392.6688232421875, "learning_rate": 2.9480000000000004e-06, "loss": 21.4439, "step": 14740 }, { "epoch": 0.029795933208628094, "grad_norm": 1024.240234375, "learning_rate": 2.95e-06, "loss": 31.6278, "step": 14750 }, { "epoch": 0.02981613384131191, "grad_norm": 371.4592590332031, "learning_rate": 2.9520000000000003e-06, "loss": 18.927, "step": 14760 }, { "epoch": 0.029836334473995727, "grad_norm": 719.5804443359375, "learning_rate": 2.954e-06, "loss": 32.5111, "step": 14770 }, { "epoch": 0.02985653510667954, "grad_norm": 783.6981811523438, "learning_rate": 2.956e-06, "loss": 28.8026, "step": 14780 }, { "epoch": 0.029876735739363357, "grad_norm": 463.20074462890625, "learning_rate": 2.9580000000000003e-06, "loss": 38.6224, "step": 14790 }, { "epoch": 0.029896936372047173, "grad_norm": 757.3056640625, "learning_rate": 2.96e-06, "loss": 47.2772, "step": 14800 }, { "epoch": 0.02991713700473099, "grad_norm": 981.7617797851562, "learning_rate": 2.9620000000000006e-06, "loss": 23.6404, "step": 14810 }, { "epoch": 0.029937337637414803, "grad_norm": 492.99432373046875, "learning_rate": 2.9640000000000003e-06, "loss": 27.6951, "step": 14820 }, { "epoch": 0.02995753827009862, "grad_norm": 1653.3358154296875, "learning_rate": 2.966e-06, "loss": 42.0704, "step": 14830 }, { "epoch": 0.029977738902782436, "grad_norm": 619.4223022460938, "learning_rate": 2.9680000000000002e-06, "loss": 31.683, "step": 14840 }, { "epoch": 0.029997939535466252, "grad_norm": 496.3249206542969, "learning_rate": 2.97e-06, "loss": 16.9797, "step": 14850 }, { "epoch": 0.030018140168150065, "grad_norm": 862.78271484375, "learning_rate": 2.9720000000000005e-06, "loss": 25.0164, "step": 14860 }, { "epoch": 0.030038340800833882, "grad_norm": 523.188720703125, "learning_rate": 2.9740000000000002e-06, "loss": 19.1842, "step": 14870 }, { "epoch": 0.0300585414335177, "grad_norm": 859.2904052734375, "learning_rate": 2.976e-06, "loss": 29.1439, "step": 14880 }, { "epoch": 0.030078742066201515, "grad_norm": 510.52191162109375, "learning_rate": 2.9780000000000005e-06, "loss": 44.8978, "step": 14890 }, { "epoch": 0.030098942698885328, "grad_norm": 478.2204284667969, "learning_rate": 2.9800000000000003e-06, "loss": 30.1172, "step": 14900 }, { "epoch": 0.030119143331569145, "grad_norm": 514.6478271484375, "learning_rate": 2.9820000000000004e-06, "loss": 21.5455, "step": 14910 }, { "epoch": 0.03013934396425296, "grad_norm": 939.9052734375, "learning_rate": 2.984e-06, "loss": 34.8996, "step": 14920 }, { "epoch": 0.030159544596936778, "grad_norm": 715.1705322265625, "learning_rate": 2.986e-06, "loss": 20.6811, "step": 14930 }, { "epoch": 0.03017974522962059, "grad_norm": 682.1227416992188, "learning_rate": 2.9880000000000004e-06, "loss": 33.3376, "step": 14940 }, { "epoch": 0.030199945862304407, "grad_norm": 198.76992797851562, "learning_rate": 2.99e-06, "loss": 35.4152, "step": 14950 }, { "epoch": 0.030220146494988224, "grad_norm": 260.5537109375, "learning_rate": 2.9920000000000003e-06, "loss": 34.4215, "step": 14960 }, { "epoch": 0.03024034712767204, "grad_norm": 765.5437622070312, "learning_rate": 2.994e-06, "loss": 33.299, "step": 14970 }, { "epoch": 0.030260547760355853, "grad_norm": 333.50396728515625, "learning_rate": 2.996e-06, "loss": 40.3484, "step": 14980 }, { "epoch": 0.03028074839303967, "grad_norm": 68.164794921875, "learning_rate": 2.9980000000000003e-06, "loss": 31.7485, "step": 14990 }, { "epoch": 0.030300949025723486, "grad_norm": 627.5897827148438, "learning_rate": 3e-06, "loss": 33.2965, "step": 15000 }, { "epoch": 0.030321149658407303, "grad_norm": 333.0770568847656, "learning_rate": 3.0020000000000006e-06, "loss": 27.5349, "step": 15010 }, { "epoch": 0.030341350291091116, "grad_norm": 201.7154998779297, "learning_rate": 3.0040000000000004e-06, "loss": 40.4812, "step": 15020 }, { "epoch": 0.030361550923774933, "grad_norm": 638.8067016601562, "learning_rate": 3.006e-06, "loss": 30.0714, "step": 15030 }, { "epoch": 0.03038175155645875, "grad_norm": 721.8273315429688, "learning_rate": 3.0080000000000003e-06, "loss": 29.557, "step": 15040 }, { "epoch": 0.030401952189142566, "grad_norm": 940.406005859375, "learning_rate": 3.01e-06, "loss": 30.875, "step": 15050 }, { "epoch": 0.03042215282182638, "grad_norm": 537.4491577148438, "learning_rate": 3.0120000000000006e-06, "loss": 27.1281, "step": 15060 }, { "epoch": 0.030442353454510195, "grad_norm": 349.7826843261719, "learning_rate": 3.0140000000000003e-06, "loss": 33.0032, "step": 15070 }, { "epoch": 0.030462554087194012, "grad_norm": 615.8168334960938, "learning_rate": 3.016e-06, "loss": 29.8807, "step": 15080 }, { "epoch": 0.03048275471987783, "grad_norm": 543.2507934570312, "learning_rate": 3.0180000000000006e-06, "loss": 24.4638, "step": 15090 }, { "epoch": 0.03050295535256164, "grad_norm": 1828.9852294921875, "learning_rate": 3.0200000000000003e-06, "loss": 22.2808, "step": 15100 }, { "epoch": 0.030523155985245458, "grad_norm": 460.4822692871094, "learning_rate": 3.0220000000000005e-06, "loss": 18.9824, "step": 15110 }, { "epoch": 0.030543356617929274, "grad_norm": 363.14569091796875, "learning_rate": 3.024e-06, "loss": 17.4815, "step": 15120 }, { "epoch": 0.03056355725061309, "grad_norm": 574.8248901367188, "learning_rate": 3.026e-06, "loss": 16.694, "step": 15130 }, { "epoch": 0.030583757883296904, "grad_norm": 619.8360595703125, "learning_rate": 3.0280000000000005e-06, "loss": 18.2632, "step": 15140 }, { "epoch": 0.03060395851598072, "grad_norm": 720.516357421875, "learning_rate": 3.0300000000000002e-06, "loss": 44.8752, "step": 15150 }, { "epoch": 0.030624159148664537, "grad_norm": 769.7462768554688, "learning_rate": 3.0320000000000004e-06, "loss": 29.0794, "step": 15160 }, { "epoch": 0.030644359781348354, "grad_norm": 860.6575317382812, "learning_rate": 3.034e-06, "loss": 42.3765, "step": 15170 }, { "epoch": 0.030664560414032167, "grad_norm": 443.2711486816406, "learning_rate": 3.0360000000000002e-06, "loss": 33.6403, "step": 15180 }, { "epoch": 0.030684761046715983, "grad_norm": 733.3158569335938, "learning_rate": 3.0380000000000004e-06, "loss": 27.6277, "step": 15190 }, { "epoch": 0.0307049616793998, "grad_norm": 787.5064697265625, "learning_rate": 3.04e-06, "loss": 37.7274, "step": 15200 }, { "epoch": 0.030725162312083613, "grad_norm": 2191.33349609375, "learning_rate": 3.0420000000000007e-06, "loss": 53.4131, "step": 15210 }, { "epoch": 0.03074536294476743, "grad_norm": 680.395751953125, "learning_rate": 3.0440000000000004e-06, "loss": 19.9371, "step": 15220 }, { "epoch": 0.030765563577451246, "grad_norm": 281.8812561035156, "learning_rate": 3.046e-06, "loss": 26.9937, "step": 15230 }, { "epoch": 0.030785764210135062, "grad_norm": 522.1564331054688, "learning_rate": 3.0480000000000003e-06, "loss": 24.629, "step": 15240 }, { "epoch": 0.030805964842818875, "grad_norm": 254.390869140625, "learning_rate": 3.05e-06, "loss": 37.8011, "step": 15250 }, { "epoch": 0.030826165475502692, "grad_norm": 156.7596893310547, "learning_rate": 3.0520000000000006e-06, "loss": 20.2772, "step": 15260 }, { "epoch": 0.03084636610818651, "grad_norm": 224.06796264648438, "learning_rate": 3.0540000000000003e-06, "loss": 24.4388, "step": 15270 }, { "epoch": 0.030866566740870325, "grad_norm": 270.6825866699219, "learning_rate": 3.056e-06, "loss": 21.0277, "step": 15280 }, { "epoch": 0.030886767373554138, "grad_norm": 1013.138916015625, "learning_rate": 3.0580000000000006e-06, "loss": 34.1167, "step": 15290 }, { "epoch": 0.030906968006237955, "grad_norm": 577.4921264648438, "learning_rate": 3.0600000000000003e-06, "loss": 27.4165, "step": 15300 }, { "epoch": 0.03092716863892177, "grad_norm": 218.55142211914062, "learning_rate": 3.0620000000000005e-06, "loss": 29.5243, "step": 15310 }, { "epoch": 0.030947369271605588, "grad_norm": 683.9786987304688, "learning_rate": 3.0640000000000002e-06, "loss": 19.9124, "step": 15320 }, { "epoch": 0.0309675699042894, "grad_norm": 456.1734619140625, "learning_rate": 3.066e-06, "loss": 32.225, "step": 15330 }, { "epoch": 0.030987770536973217, "grad_norm": 379.7656555175781, "learning_rate": 3.0680000000000005e-06, "loss": 25.2169, "step": 15340 }, { "epoch": 0.031007971169657034, "grad_norm": 566.6279296875, "learning_rate": 3.0700000000000003e-06, "loss": 51.9398, "step": 15350 }, { "epoch": 0.03102817180234085, "grad_norm": 866.3634643554688, "learning_rate": 3.072e-06, "loss": 25.7627, "step": 15360 }, { "epoch": 0.031048372435024663, "grad_norm": 508.4942932128906, "learning_rate": 3.074e-06, "loss": 41.3464, "step": 15370 }, { "epoch": 0.03106857306770848, "grad_norm": 591.5121459960938, "learning_rate": 3.0760000000000003e-06, "loss": 23.6245, "step": 15380 }, { "epoch": 0.031088773700392296, "grad_norm": 600.5582275390625, "learning_rate": 3.0780000000000004e-06, "loss": 41.7437, "step": 15390 }, { "epoch": 0.031108974333076113, "grad_norm": 516.2938842773438, "learning_rate": 3.08e-06, "loss": 17.6149, "step": 15400 }, { "epoch": 0.031129174965759926, "grad_norm": 245.33395385742188, "learning_rate": 3.082e-06, "loss": 23.1306, "step": 15410 }, { "epoch": 0.031149375598443742, "grad_norm": 754.3231201171875, "learning_rate": 3.0840000000000005e-06, "loss": 42.3905, "step": 15420 }, { "epoch": 0.03116957623112756, "grad_norm": 183.8521728515625, "learning_rate": 3.086e-06, "loss": 26.3259, "step": 15430 }, { "epoch": 0.031189776863811376, "grad_norm": 543.6480712890625, "learning_rate": 3.0880000000000003e-06, "loss": 26.1311, "step": 15440 }, { "epoch": 0.03120997749649519, "grad_norm": 549.1738891601562, "learning_rate": 3.09e-06, "loss": 31.1268, "step": 15450 }, { "epoch": 0.031230178129179005, "grad_norm": 409.1370544433594, "learning_rate": 3.092e-06, "loss": 25.4154, "step": 15460 }, { "epoch": 0.03125037876186282, "grad_norm": 639.0555419921875, "learning_rate": 3.0940000000000004e-06, "loss": 20.0562, "step": 15470 }, { "epoch": 0.03127057939454664, "grad_norm": 562.7499389648438, "learning_rate": 3.096e-06, "loss": 19.9571, "step": 15480 }, { "epoch": 0.03129078002723045, "grad_norm": 588.2476196289062, "learning_rate": 3.0980000000000007e-06, "loss": 29.1326, "step": 15490 }, { "epoch": 0.03131098065991427, "grad_norm": 402.01629638671875, "learning_rate": 3.1000000000000004e-06, "loss": 42.9993, "step": 15500 }, { "epoch": 0.031331181292598084, "grad_norm": 464.4310607910156, "learning_rate": 3.102e-06, "loss": 28.3306, "step": 15510 }, { "epoch": 0.0313513819252819, "grad_norm": 1001.6248168945312, "learning_rate": 3.1040000000000003e-06, "loss": 27.8757, "step": 15520 }, { "epoch": 0.03137158255796572, "grad_norm": 291.364501953125, "learning_rate": 3.106e-06, "loss": 26.5913, "step": 15530 }, { "epoch": 0.03139178319064953, "grad_norm": 512.9525756835938, "learning_rate": 3.1080000000000006e-06, "loss": 22.5451, "step": 15540 }, { "epoch": 0.03141198382333334, "grad_norm": 583.1221313476562, "learning_rate": 3.1100000000000003e-06, "loss": 22.0143, "step": 15550 }, { "epoch": 0.03143218445601716, "grad_norm": 184.39239501953125, "learning_rate": 3.112e-06, "loss": 21.4702, "step": 15560 }, { "epoch": 0.031452385088700976, "grad_norm": 146.10752868652344, "learning_rate": 3.114e-06, "loss": 12.1912, "step": 15570 }, { "epoch": 0.031472585721384796, "grad_norm": 392.8891906738281, "learning_rate": 3.1160000000000003e-06, "loss": 44.2547, "step": 15580 }, { "epoch": 0.03149278635406861, "grad_norm": 429.8520812988281, "learning_rate": 3.1180000000000005e-06, "loss": 25.8197, "step": 15590 }, { "epoch": 0.03151298698675242, "grad_norm": 991.6886596679688, "learning_rate": 3.12e-06, "loss": 24.9521, "step": 15600 }, { "epoch": 0.03153318761943624, "grad_norm": 614.9202270507812, "learning_rate": 3.122e-06, "loss": 24.8537, "step": 15610 }, { "epoch": 0.031553388252120056, "grad_norm": 645.7086181640625, "learning_rate": 3.1240000000000005e-06, "loss": 15.0873, "step": 15620 }, { "epoch": 0.03157358888480387, "grad_norm": 1097.1376953125, "learning_rate": 3.1260000000000002e-06, "loss": 26.1902, "step": 15630 }, { "epoch": 0.03159378951748769, "grad_norm": 916.2590942382812, "learning_rate": 3.1280000000000004e-06, "loss": 33.1488, "step": 15640 }, { "epoch": 0.0316139901501715, "grad_norm": 716.00341796875, "learning_rate": 3.13e-06, "loss": 23.5244, "step": 15650 }, { "epoch": 0.03163419078285532, "grad_norm": 329.9820861816406, "learning_rate": 3.132e-06, "loss": 24.3839, "step": 15660 }, { "epoch": 0.031654391415539135, "grad_norm": 434.7264099121094, "learning_rate": 3.1340000000000004e-06, "loss": 20.0705, "step": 15670 }, { "epoch": 0.03167459204822295, "grad_norm": 1124.01708984375, "learning_rate": 3.136e-06, "loss": 31.049, "step": 15680 }, { "epoch": 0.03169479268090677, "grad_norm": 966.0602416992188, "learning_rate": 3.1380000000000003e-06, "loss": 40.8942, "step": 15690 }, { "epoch": 0.03171499331359058, "grad_norm": 1354.5238037109375, "learning_rate": 3.1400000000000004e-06, "loss": 25.6929, "step": 15700 }, { "epoch": 0.031735193946274394, "grad_norm": 986.6156005859375, "learning_rate": 3.142e-06, "loss": 15.9512, "step": 15710 }, { "epoch": 0.031755394578958214, "grad_norm": 782.9063110351562, "learning_rate": 3.1440000000000003e-06, "loss": 43.8836, "step": 15720 }, { "epoch": 0.03177559521164203, "grad_norm": 897.754150390625, "learning_rate": 3.146e-06, "loss": 23.8829, "step": 15730 }, { "epoch": 0.03179579584432585, "grad_norm": 811.4805908203125, "learning_rate": 3.1480000000000006e-06, "loss": 27.5326, "step": 15740 }, { "epoch": 0.03181599647700966, "grad_norm": 261.16192626953125, "learning_rate": 3.1500000000000003e-06, "loss": 38.2281, "step": 15750 }, { "epoch": 0.03183619710969347, "grad_norm": 1244.820068359375, "learning_rate": 3.152e-06, "loss": 51.8557, "step": 15760 }, { "epoch": 0.03185639774237729, "grad_norm": 416.2852478027344, "learning_rate": 3.154e-06, "loss": 14.7294, "step": 15770 }, { "epoch": 0.031876598375061106, "grad_norm": 627.7940063476562, "learning_rate": 3.1560000000000004e-06, "loss": 28.2093, "step": 15780 }, { "epoch": 0.03189679900774492, "grad_norm": 628.6441040039062, "learning_rate": 3.1580000000000005e-06, "loss": 22.0732, "step": 15790 }, { "epoch": 0.03191699964042874, "grad_norm": 381.6886291503906, "learning_rate": 3.1600000000000002e-06, "loss": 14.0432, "step": 15800 }, { "epoch": 0.03193720027311255, "grad_norm": 430.179931640625, "learning_rate": 3.162e-06, "loss": 33.138, "step": 15810 }, { "epoch": 0.03195740090579637, "grad_norm": 562.0535888671875, "learning_rate": 3.1640000000000005e-06, "loss": 34.5442, "step": 15820 }, { "epoch": 0.031977601538480185, "grad_norm": 603.3442993164062, "learning_rate": 3.1660000000000003e-06, "loss": 21.5006, "step": 15830 }, { "epoch": 0.031997802171164, "grad_norm": 367.27880859375, "learning_rate": 3.1680000000000004e-06, "loss": 35.3885, "step": 15840 }, { "epoch": 0.03201800280384782, "grad_norm": 612.0071411132812, "learning_rate": 3.17e-06, "loss": 33.6516, "step": 15850 }, { "epoch": 0.03203820343653163, "grad_norm": 335.4685974121094, "learning_rate": 3.172e-06, "loss": 31.1106, "step": 15860 }, { "epoch": 0.032058404069215445, "grad_norm": 356.3526306152344, "learning_rate": 3.1740000000000004e-06, "loss": 28.422, "step": 15870 }, { "epoch": 0.032078604701899265, "grad_norm": 0.0, "learning_rate": 3.176e-06, "loss": 21.6766, "step": 15880 }, { "epoch": 0.03209880533458308, "grad_norm": 1218.23583984375, "learning_rate": 3.1780000000000003e-06, "loss": 41.5568, "step": 15890 }, { "epoch": 0.0321190059672669, "grad_norm": 251.69009399414062, "learning_rate": 3.1800000000000005e-06, "loss": 38.6637, "step": 15900 }, { "epoch": 0.03213920659995071, "grad_norm": 393.4754333496094, "learning_rate": 3.182e-06, "loss": 57.3885, "step": 15910 }, { "epoch": 0.032159407232634524, "grad_norm": 527.3384399414062, "learning_rate": 3.1840000000000003e-06, "loss": 43.3291, "step": 15920 }, { "epoch": 0.032179607865318344, "grad_norm": 568.7590942382812, "learning_rate": 3.186e-06, "loss": 33.8822, "step": 15930 }, { "epoch": 0.03219980849800216, "grad_norm": 729.0355224609375, "learning_rate": 3.188e-06, "loss": 38.2755, "step": 15940 }, { "epoch": 0.03222000913068597, "grad_norm": 706.9700927734375, "learning_rate": 3.1900000000000004e-06, "loss": 27.8433, "step": 15950 }, { "epoch": 0.03224020976336979, "grad_norm": 757.6204833984375, "learning_rate": 3.192e-06, "loss": 42.868, "step": 15960 }, { "epoch": 0.0322604103960536, "grad_norm": 395.45770263671875, "learning_rate": 3.1940000000000003e-06, "loss": 22.6544, "step": 15970 }, { "epoch": 0.03228061102873742, "grad_norm": 357.60565185546875, "learning_rate": 3.1960000000000004e-06, "loss": 30.446, "step": 15980 }, { "epoch": 0.032300811661421236, "grad_norm": 440.5813293457031, "learning_rate": 3.198e-06, "loss": 38.8448, "step": 15990 }, { "epoch": 0.03232101229410505, "grad_norm": 292.3539733886719, "learning_rate": 3.2000000000000003e-06, "loss": 27.1719, "step": 16000 }, { "epoch": 0.03234121292678887, "grad_norm": 301.79400634765625, "learning_rate": 3.202e-06, "loss": 27.114, "step": 16010 }, { "epoch": 0.03236141355947268, "grad_norm": 477.06793212890625, "learning_rate": 3.2040000000000006e-06, "loss": 38.1142, "step": 16020 }, { "epoch": 0.032381614192156495, "grad_norm": 0.0, "learning_rate": 3.2060000000000003e-06, "loss": 27.6631, "step": 16030 }, { "epoch": 0.032401814824840315, "grad_norm": 891.1085815429688, "learning_rate": 3.208e-06, "loss": 44.7654, "step": 16040 }, { "epoch": 0.03242201545752413, "grad_norm": 272.23663330078125, "learning_rate": 3.21e-06, "loss": 44.5703, "step": 16050 }, { "epoch": 0.03244221609020795, "grad_norm": 58.88239288330078, "learning_rate": 3.212e-06, "loss": 32.7265, "step": 16060 }, { "epoch": 0.03246241672289176, "grad_norm": 791.5468139648438, "learning_rate": 3.2140000000000005e-06, "loss": 27.4122, "step": 16070 }, { "epoch": 0.032482617355575574, "grad_norm": 42.52566146850586, "learning_rate": 3.216e-06, "loss": 19.6953, "step": 16080 }, { "epoch": 0.032502817988259394, "grad_norm": 573.6635131835938, "learning_rate": 3.218e-06, "loss": 27.0275, "step": 16090 }, { "epoch": 0.03252301862094321, "grad_norm": 370.3996276855469, "learning_rate": 3.2200000000000005e-06, "loss": 17.8417, "step": 16100 }, { "epoch": 0.03254321925362702, "grad_norm": 424.79132080078125, "learning_rate": 3.2220000000000002e-06, "loss": 32.9271, "step": 16110 }, { "epoch": 0.03256341988631084, "grad_norm": 591.5966796875, "learning_rate": 3.2240000000000004e-06, "loss": 52.1652, "step": 16120 }, { "epoch": 0.03258362051899465, "grad_norm": 472.8325500488281, "learning_rate": 3.226e-06, "loss": 21.4173, "step": 16130 }, { "epoch": 0.03260382115167847, "grad_norm": 1556.498046875, "learning_rate": 3.228e-06, "loss": 25.8709, "step": 16140 }, { "epoch": 0.032624021784362287, "grad_norm": 441.6474914550781, "learning_rate": 3.2300000000000004e-06, "loss": 34.7153, "step": 16150 }, { "epoch": 0.0326442224170461, "grad_norm": 221.67434692382812, "learning_rate": 3.232e-06, "loss": 23.3257, "step": 16160 }, { "epoch": 0.03266442304972992, "grad_norm": 0.0, "learning_rate": 3.2340000000000003e-06, "loss": 22.1946, "step": 16170 }, { "epoch": 0.03268462368241373, "grad_norm": 361.2557373046875, "learning_rate": 3.2360000000000004e-06, "loss": 22.125, "step": 16180 }, { "epoch": 0.032704824315097546, "grad_norm": 480.7691650390625, "learning_rate": 3.238e-06, "loss": 14.4723, "step": 16190 }, { "epoch": 0.032725024947781366, "grad_norm": 1278.60009765625, "learning_rate": 3.2400000000000003e-06, "loss": 30.1542, "step": 16200 }, { "epoch": 0.03274522558046518, "grad_norm": 427.308837890625, "learning_rate": 3.242e-06, "loss": 28.6297, "step": 16210 }, { "epoch": 0.032765426213149, "grad_norm": 690.6139526367188, "learning_rate": 3.2440000000000006e-06, "loss": 23.8278, "step": 16220 }, { "epoch": 0.03278562684583281, "grad_norm": 759.2803955078125, "learning_rate": 3.2460000000000003e-06, "loss": 37.0073, "step": 16230 }, { "epoch": 0.032805827478516625, "grad_norm": 487.7970275878906, "learning_rate": 3.248e-06, "loss": 21.3471, "step": 16240 }, { "epoch": 0.032826028111200445, "grad_norm": 638.1491088867188, "learning_rate": 3.2500000000000002e-06, "loss": 22.9298, "step": 16250 }, { "epoch": 0.03284622874388426, "grad_norm": 230.1177215576172, "learning_rate": 3.252e-06, "loss": 30.0071, "step": 16260 }, { "epoch": 0.03286642937656807, "grad_norm": 596.5241088867188, "learning_rate": 3.2540000000000005e-06, "loss": 17.0724, "step": 16270 }, { "epoch": 0.03288663000925189, "grad_norm": 181.33509826660156, "learning_rate": 3.2560000000000003e-06, "loss": 23.5193, "step": 16280 }, { "epoch": 0.032906830641935704, "grad_norm": 404.99420166015625, "learning_rate": 3.258e-06, "loss": 22.4026, "step": 16290 }, { "epoch": 0.032927031274619524, "grad_norm": 352.00689697265625, "learning_rate": 3.2600000000000006e-06, "loss": 18.9548, "step": 16300 }, { "epoch": 0.03294723190730334, "grad_norm": 602.6947021484375, "learning_rate": 3.2620000000000003e-06, "loss": 26.5133, "step": 16310 }, { "epoch": 0.03296743253998715, "grad_norm": 1009.1510009765625, "learning_rate": 3.2640000000000004e-06, "loss": 23.3099, "step": 16320 }, { "epoch": 0.03298763317267097, "grad_norm": 650.4998168945312, "learning_rate": 3.266e-06, "loss": 23.7321, "step": 16330 }, { "epoch": 0.03300783380535478, "grad_norm": 463.7991943359375, "learning_rate": 3.268e-06, "loss": 19.2079, "step": 16340 }, { "epoch": 0.033028034438038596, "grad_norm": 732.532958984375, "learning_rate": 3.2700000000000005e-06, "loss": 36.9818, "step": 16350 }, { "epoch": 0.033048235070722416, "grad_norm": 540.9727783203125, "learning_rate": 3.272e-06, "loss": 19.345, "step": 16360 }, { "epoch": 0.03306843570340623, "grad_norm": 566.6883544921875, "learning_rate": 3.2740000000000003e-06, "loss": 26.3954, "step": 16370 }, { "epoch": 0.03308863633609005, "grad_norm": 441.031982421875, "learning_rate": 3.2760000000000005e-06, "loss": 25.699, "step": 16380 }, { "epoch": 0.03310883696877386, "grad_norm": 437.2505187988281, "learning_rate": 3.278e-06, "loss": 22.3838, "step": 16390 }, { "epoch": 0.033129037601457675, "grad_norm": 839.2191162109375, "learning_rate": 3.2800000000000004e-06, "loss": 52.5256, "step": 16400 }, { "epoch": 0.033149238234141495, "grad_norm": 385.2565612792969, "learning_rate": 3.282e-06, "loss": 32.6833, "step": 16410 }, { "epoch": 0.03316943886682531, "grad_norm": 336.7561340332031, "learning_rate": 3.2840000000000007e-06, "loss": 26.1039, "step": 16420 }, { "epoch": 0.03318963949950912, "grad_norm": 154.6703338623047, "learning_rate": 3.2860000000000004e-06, "loss": 19.3196, "step": 16430 }, { "epoch": 0.03320984013219294, "grad_norm": 307.9013671875, "learning_rate": 3.288e-06, "loss": 21.57, "step": 16440 }, { "epoch": 0.033230040764876755, "grad_norm": 304.7125549316406, "learning_rate": 3.2900000000000003e-06, "loss": 26.7927, "step": 16450 }, { "epoch": 0.033250241397560575, "grad_norm": 709.2648315429688, "learning_rate": 3.292e-06, "loss": 23.7141, "step": 16460 }, { "epoch": 0.03327044203024439, "grad_norm": 161.2997589111328, "learning_rate": 3.2940000000000006e-06, "loss": 17.6274, "step": 16470 }, { "epoch": 0.0332906426629282, "grad_norm": 234.00225830078125, "learning_rate": 3.2960000000000003e-06, "loss": 24.9305, "step": 16480 }, { "epoch": 0.03331084329561202, "grad_norm": 266.7884826660156, "learning_rate": 3.298e-06, "loss": 17.2077, "step": 16490 }, { "epoch": 0.033331043928295834, "grad_norm": 459.37664794921875, "learning_rate": 3.3000000000000006e-06, "loss": 31.7385, "step": 16500 }, { "epoch": 0.03335124456097965, "grad_norm": 394.70391845703125, "learning_rate": 3.3020000000000003e-06, "loss": 36.4949, "step": 16510 }, { "epoch": 0.03337144519366347, "grad_norm": 261.3290100097656, "learning_rate": 3.3040000000000005e-06, "loss": 16.4725, "step": 16520 }, { "epoch": 0.03339164582634728, "grad_norm": 590.04931640625, "learning_rate": 3.306e-06, "loss": 25.0828, "step": 16530 }, { "epoch": 0.0334118464590311, "grad_norm": 1402.6011962890625, "learning_rate": 3.308e-06, "loss": 54.7112, "step": 16540 }, { "epoch": 0.03343204709171491, "grad_norm": 662.274658203125, "learning_rate": 3.3100000000000005e-06, "loss": 36.125, "step": 16550 }, { "epoch": 0.033452247724398726, "grad_norm": 477.1455383300781, "learning_rate": 3.3120000000000002e-06, "loss": 39.5076, "step": 16560 }, { "epoch": 0.033472448357082546, "grad_norm": 240.7284698486328, "learning_rate": 3.314e-06, "loss": 35.2164, "step": 16570 }, { "epoch": 0.03349264898976636, "grad_norm": 531.02685546875, "learning_rate": 3.3160000000000005e-06, "loss": 29.3164, "step": 16580 }, { "epoch": 0.03351284962245017, "grad_norm": 568.7428588867188, "learning_rate": 3.3180000000000003e-06, "loss": 24.5444, "step": 16590 }, { "epoch": 0.03353305025513399, "grad_norm": 236.66079711914062, "learning_rate": 3.3200000000000004e-06, "loss": 18.6516, "step": 16600 }, { "epoch": 0.033553250887817805, "grad_norm": 331.0774230957031, "learning_rate": 3.322e-06, "loss": 16.5033, "step": 16610 }, { "epoch": 0.033573451520501625, "grad_norm": 357.0018310546875, "learning_rate": 3.324e-06, "loss": 26.9735, "step": 16620 }, { "epoch": 0.03359365215318544, "grad_norm": 430.9911193847656, "learning_rate": 3.3260000000000004e-06, "loss": 28.2231, "step": 16630 }, { "epoch": 0.03361385278586925, "grad_norm": 956.1023559570312, "learning_rate": 3.328e-06, "loss": 45.2886, "step": 16640 }, { "epoch": 0.03363405341855307, "grad_norm": 546.6738891601562, "learning_rate": 3.3300000000000003e-06, "loss": 30.9994, "step": 16650 }, { "epoch": 0.033654254051236884, "grad_norm": 175.43746948242188, "learning_rate": 3.332e-06, "loss": 20.7395, "step": 16660 }, { "epoch": 0.0336744546839207, "grad_norm": 673.0543823242188, "learning_rate": 3.334e-06, "loss": 29.1951, "step": 16670 }, { "epoch": 0.03369465531660452, "grad_norm": 93.5210189819336, "learning_rate": 3.3360000000000003e-06, "loss": 19.8843, "step": 16680 }, { "epoch": 0.03371485594928833, "grad_norm": 507.43133544921875, "learning_rate": 3.338e-06, "loss": 26.0732, "step": 16690 }, { "epoch": 0.03373505658197215, "grad_norm": 732.0919189453125, "learning_rate": 3.3400000000000006e-06, "loss": 39.2512, "step": 16700 }, { "epoch": 0.03375525721465596, "grad_norm": 207.01541137695312, "learning_rate": 3.3420000000000004e-06, "loss": 25.9437, "step": 16710 }, { "epoch": 0.033775457847339777, "grad_norm": 430.7888488769531, "learning_rate": 3.344e-06, "loss": 26.1944, "step": 16720 }, { "epoch": 0.033795658480023597, "grad_norm": 216.97750854492188, "learning_rate": 3.3460000000000002e-06, "loss": 17.1715, "step": 16730 }, { "epoch": 0.03381585911270741, "grad_norm": 896.1383056640625, "learning_rate": 3.348e-06, "loss": 27.9904, "step": 16740 }, { "epoch": 0.03383605974539122, "grad_norm": 424.6932678222656, "learning_rate": 3.3500000000000005e-06, "loss": 19.8765, "step": 16750 }, { "epoch": 0.03385626037807504, "grad_norm": 296.5496520996094, "learning_rate": 3.3520000000000003e-06, "loss": 14.7035, "step": 16760 }, { "epoch": 0.033876461010758856, "grad_norm": 694.663330078125, "learning_rate": 3.354e-06, "loss": 39.6883, "step": 16770 }, { "epoch": 0.033896661643442676, "grad_norm": 1065.7681884765625, "learning_rate": 3.3560000000000006e-06, "loss": 32.8346, "step": 16780 }, { "epoch": 0.03391686227612649, "grad_norm": 620.96826171875, "learning_rate": 3.3580000000000003e-06, "loss": 24.6682, "step": 16790 }, { "epoch": 0.0339370629088103, "grad_norm": 1143.7791748046875, "learning_rate": 3.3600000000000004e-06, "loss": 12.908, "step": 16800 }, { "epoch": 0.03395726354149412, "grad_norm": 414.8392333984375, "learning_rate": 3.362e-06, "loss": 36.5095, "step": 16810 }, { "epoch": 0.033977464174177935, "grad_norm": 129.9327850341797, "learning_rate": 3.364e-06, "loss": 25.4761, "step": 16820 }, { "epoch": 0.03399766480686175, "grad_norm": 928.8633422851562, "learning_rate": 3.3660000000000005e-06, "loss": 29.5998, "step": 16830 }, { "epoch": 0.03401786543954557, "grad_norm": 347.5965576171875, "learning_rate": 3.368e-06, "loss": 16.697, "step": 16840 }, { "epoch": 0.03403806607222938, "grad_norm": 429.3285827636719, "learning_rate": 3.3700000000000003e-06, "loss": 28.8614, "step": 16850 }, { "epoch": 0.0340582667049132, "grad_norm": 388.8118591308594, "learning_rate": 3.372e-06, "loss": 20.4107, "step": 16860 }, { "epoch": 0.034078467337597014, "grad_norm": 855.3677368164062, "learning_rate": 3.3740000000000002e-06, "loss": 32.0758, "step": 16870 }, { "epoch": 0.03409866797028083, "grad_norm": 424.2457580566406, "learning_rate": 3.3760000000000004e-06, "loss": 24.5486, "step": 16880 }, { "epoch": 0.03411886860296465, "grad_norm": 243.54652404785156, "learning_rate": 3.378e-06, "loss": 11.4537, "step": 16890 }, { "epoch": 0.03413906923564846, "grad_norm": 582.7598266601562, "learning_rate": 3.3800000000000007e-06, "loss": 23.3055, "step": 16900 }, { "epoch": 0.03415926986833227, "grad_norm": 443.8633117675781, "learning_rate": 3.3820000000000004e-06, "loss": 36.2278, "step": 16910 }, { "epoch": 0.03417947050101609, "grad_norm": 539.4478759765625, "learning_rate": 3.384e-06, "loss": 26.0477, "step": 16920 }, { "epoch": 0.034199671133699906, "grad_norm": 674.6696166992188, "learning_rate": 3.3860000000000003e-06, "loss": 26.3475, "step": 16930 }, { "epoch": 0.034219871766383726, "grad_norm": 360.42352294921875, "learning_rate": 3.388e-06, "loss": 22.5731, "step": 16940 }, { "epoch": 0.03424007239906754, "grad_norm": 218.48251342773438, "learning_rate": 3.3900000000000006e-06, "loss": 22.8484, "step": 16950 }, { "epoch": 0.03426027303175135, "grad_norm": 756.140380859375, "learning_rate": 3.3920000000000003e-06, "loss": 38.419, "step": 16960 }, { "epoch": 0.03428047366443517, "grad_norm": 429.0950012207031, "learning_rate": 3.394e-06, "loss": 55.9932, "step": 16970 }, { "epoch": 0.034300674297118985, "grad_norm": 429.5851135253906, "learning_rate": 3.3960000000000006e-06, "loss": 10.647, "step": 16980 }, { "epoch": 0.0343208749298028, "grad_norm": 314.9561462402344, "learning_rate": 3.3980000000000003e-06, "loss": 25.6604, "step": 16990 }, { "epoch": 0.03434107556248662, "grad_norm": 92.45370483398438, "learning_rate": 3.4000000000000005e-06, "loss": 13.6651, "step": 17000 }, { "epoch": 0.03436127619517043, "grad_norm": 1021.7738647460938, "learning_rate": 3.402e-06, "loss": 53.2146, "step": 17010 }, { "epoch": 0.03438147682785425, "grad_norm": 304.0646057128906, "learning_rate": 3.404e-06, "loss": 27.0317, "step": 17020 }, { "epoch": 0.034401677460538065, "grad_norm": 615.5056762695312, "learning_rate": 3.4060000000000005e-06, "loss": 21.2823, "step": 17030 }, { "epoch": 0.03442187809322188, "grad_norm": 178.4603729248047, "learning_rate": 3.4080000000000002e-06, "loss": 38.0386, "step": 17040 }, { "epoch": 0.0344420787259057, "grad_norm": 868.1829223632812, "learning_rate": 3.4100000000000004e-06, "loss": 31.8253, "step": 17050 }, { "epoch": 0.03446227935858951, "grad_norm": 365.075927734375, "learning_rate": 3.412e-06, "loss": 26.7497, "step": 17060 }, { "epoch": 0.034482479991273324, "grad_norm": 397.6850891113281, "learning_rate": 3.4140000000000003e-06, "loss": 20.5952, "step": 17070 }, { "epoch": 0.034502680623957144, "grad_norm": 1088.1448974609375, "learning_rate": 3.4160000000000004e-06, "loss": 29.8785, "step": 17080 }, { "epoch": 0.03452288125664096, "grad_norm": 1024.5538330078125, "learning_rate": 3.418e-06, "loss": 42.1487, "step": 17090 }, { "epoch": 0.03454308188932478, "grad_norm": 811.8573608398438, "learning_rate": 3.4200000000000007e-06, "loss": 35.5794, "step": 17100 }, { "epoch": 0.03456328252200859, "grad_norm": 488.4293212890625, "learning_rate": 3.4220000000000004e-06, "loss": 39.9756, "step": 17110 }, { "epoch": 0.0345834831546924, "grad_norm": 706.856201171875, "learning_rate": 3.424e-06, "loss": 16.4257, "step": 17120 }, { "epoch": 0.03460368378737622, "grad_norm": 500.7957458496094, "learning_rate": 3.4260000000000003e-06, "loss": 21.9832, "step": 17130 }, { "epoch": 0.034623884420060036, "grad_norm": 313.5267028808594, "learning_rate": 3.428e-06, "loss": 39.0117, "step": 17140 }, { "epoch": 0.03464408505274385, "grad_norm": 459.3201599121094, "learning_rate": 3.4300000000000006e-06, "loss": 26.1945, "step": 17150 }, { "epoch": 0.03466428568542767, "grad_norm": 694.1936645507812, "learning_rate": 3.4320000000000003e-06, "loss": 24.7299, "step": 17160 }, { "epoch": 0.03468448631811148, "grad_norm": 0.0, "learning_rate": 3.434e-06, "loss": 21.6834, "step": 17170 }, { "epoch": 0.0347046869507953, "grad_norm": 404.22015380859375, "learning_rate": 3.4360000000000006e-06, "loss": 22.3646, "step": 17180 }, { "epoch": 0.034724887583479115, "grad_norm": 912.0065307617188, "learning_rate": 3.4380000000000004e-06, "loss": 53.3527, "step": 17190 }, { "epoch": 0.03474508821616293, "grad_norm": 502.2011413574219, "learning_rate": 3.44e-06, "loss": 53.6234, "step": 17200 }, { "epoch": 0.03476528884884675, "grad_norm": 2277.644287109375, "learning_rate": 3.4420000000000002e-06, "loss": 46.4125, "step": 17210 }, { "epoch": 0.03478548948153056, "grad_norm": 428.8004455566406, "learning_rate": 3.444e-06, "loss": 22.3706, "step": 17220 }, { "epoch": 0.034805690114214374, "grad_norm": 797.9168090820312, "learning_rate": 3.4460000000000005e-06, "loss": 21.4223, "step": 17230 }, { "epoch": 0.034825890746898194, "grad_norm": 166.49786376953125, "learning_rate": 3.4480000000000003e-06, "loss": 19.3625, "step": 17240 }, { "epoch": 0.03484609137958201, "grad_norm": 639.303466796875, "learning_rate": 3.45e-06, "loss": 30.4237, "step": 17250 }, { "epoch": 0.03486629201226583, "grad_norm": 1690.0789794921875, "learning_rate": 3.452e-06, "loss": 31.5046, "step": 17260 }, { "epoch": 0.03488649264494964, "grad_norm": 234.71603393554688, "learning_rate": 3.4540000000000003e-06, "loss": 32.2156, "step": 17270 }, { "epoch": 0.034906693277633453, "grad_norm": 600.4459838867188, "learning_rate": 3.4560000000000005e-06, "loss": 34.8896, "step": 17280 }, { "epoch": 0.034926893910317273, "grad_norm": 702.6697998046875, "learning_rate": 3.458e-06, "loss": 23.0082, "step": 17290 }, { "epoch": 0.03494709454300109, "grad_norm": 317.1498107910156, "learning_rate": 3.46e-06, "loss": 19.3729, "step": 17300 }, { "epoch": 0.0349672951756849, "grad_norm": 749.3865966796875, "learning_rate": 3.4620000000000005e-06, "loss": 29.1623, "step": 17310 }, { "epoch": 0.03498749580836872, "grad_norm": 401.767822265625, "learning_rate": 3.464e-06, "loss": 20.5925, "step": 17320 }, { "epoch": 0.03500769644105253, "grad_norm": 470.0053405761719, "learning_rate": 3.4660000000000004e-06, "loss": 32.5431, "step": 17330 }, { "epoch": 0.03502789707373635, "grad_norm": 497.3592224121094, "learning_rate": 3.468e-06, "loss": 16.0818, "step": 17340 }, { "epoch": 0.035048097706420166, "grad_norm": 660.4264526367188, "learning_rate": 3.4700000000000002e-06, "loss": 18.0981, "step": 17350 }, { "epoch": 0.03506829833910398, "grad_norm": 625.1888427734375, "learning_rate": 3.4720000000000004e-06, "loss": 30.1002, "step": 17360 }, { "epoch": 0.0350884989717878, "grad_norm": 1246.976318359375, "learning_rate": 3.474e-06, "loss": 32.2393, "step": 17370 }, { "epoch": 0.03510869960447161, "grad_norm": 108.3302993774414, "learning_rate": 3.4760000000000007e-06, "loss": 8.8377, "step": 17380 }, { "epoch": 0.035128900237155425, "grad_norm": 718.4808959960938, "learning_rate": 3.4780000000000004e-06, "loss": 36.9861, "step": 17390 }, { "epoch": 0.035149100869839245, "grad_norm": 285.5974426269531, "learning_rate": 3.48e-06, "loss": 34.1444, "step": 17400 }, { "epoch": 0.03516930150252306, "grad_norm": 683.3985595703125, "learning_rate": 3.4820000000000003e-06, "loss": 17.0873, "step": 17410 }, { "epoch": 0.03518950213520688, "grad_norm": 893.44677734375, "learning_rate": 3.484e-06, "loss": 39.6395, "step": 17420 }, { "epoch": 0.03520970276789069, "grad_norm": 682.7764892578125, "learning_rate": 3.4860000000000006e-06, "loss": 75.2991, "step": 17430 }, { "epoch": 0.035229903400574504, "grad_norm": 647.171142578125, "learning_rate": 3.4880000000000003e-06, "loss": 15.9127, "step": 17440 }, { "epoch": 0.035250104033258324, "grad_norm": 618.6836547851562, "learning_rate": 3.49e-06, "loss": 26.5643, "step": 17450 }, { "epoch": 0.03527030466594214, "grad_norm": 348.82342529296875, "learning_rate": 3.492e-06, "loss": 32.2348, "step": 17460 }, { "epoch": 0.03529050529862595, "grad_norm": 591.755615234375, "learning_rate": 3.4940000000000003e-06, "loss": 38.7832, "step": 17470 }, { "epoch": 0.03531070593130977, "grad_norm": 855.585693359375, "learning_rate": 3.4960000000000005e-06, "loss": 37.9762, "step": 17480 }, { "epoch": 0.03533090656399358, "grad_norm": 319.1050109863281, "learning_rate": 3.4980000000000002e-06, "loss": 43.616, "step": 17490 }, { "epoch": 0.0353511071966774, "grad_norm": 951.2423706054688, "learning_rate": 3.5e-06, "loss": 19.7842, "step": 17500 }, { "epoch": 0.035371307829361216, "grad_norm": 605.2108154296875, "learning_rate": 3.5020000000000005e-06, "loss": 24.4234, "step": 17510 }, { "epoch": 0.03539150846204503, "grad_norm": 81.48918151855469, "learning_rate": 3.5040000000000002e-06, "loss": 23.4033, "step": 17520 }, { "epoch": 0.03541170909472885, "grad_norm": 352.0289001464844, "learning_rate": 3.5060000000000004e-06, "loss": 17.1986, "step": 17530 }, { "epoch": 0.03543190972741266, "grad_norm": 220.1028289794922, "learning_rate": 3.508e-06, "loss": 28.5471, "step": 17540 }, { "epoch": 0.035452110360096475, "grad_norm": 170.3765106201172, "learning_rate": 3.5100000000000003e-06, "loss": 29.259, "step": 17550 }, { "epoch": 0.035472310992780295, "grad_norm": 262.6505126953125, "learning_rate": 3.5120000000000004e-06, "loss": 18.0606, "step": 17560 }, { "epoch": 0.03549251162546411, "grad_norm": 424.7445068359375, "learning_rate": 3.514e-06, "loss": 26.9656, "step": 17570 }, { "epoch": 0.03551271225814793, "grad_norm": 1122.718994140625, "learning_rate": 3.5160000000000007e-06, "loss": 29.4671, "step": 17580 }, { "epoch": 0.03553291289083174, "grad_norm": 848.5406494140625, "learning_rate": 3.5180000000000005e-06, "loss": 28.6708, "step": 17590 }, { "epoch": 0.035553113523515555, "grad_norm": 661.636474609375, "learning_rate": 3.52e-06, "loss": 22.97, "step": 17600 }, { "epoch": 0.035573314156199375, "grad_norm": 2700.53515625, "learning_rate": 3.5220000000000003e-06, "loss": 62.9361, "step": 17610 }, { "epoch": 0.03559351478888319, "grad_norm": 1014.1585693359375, "learning_rate": 3.524e-06, "loss": 32.995, "step": 17620 }, { "epoch": 0.035613715421567, "grad_norm": 382.7838134765625, "learning_rate": 3.5260000000000006e-06, "loss": 20.5589, "step": 17630 }, { "epoch": 0.03563391605425082, "grad_norm": 455.59600830078125, "learning_rate": 3.5280000000000004e-06, "loss": 21.9954, "step": 17640 }, { "epoch": 0.035654116686934634, "grad_norm": 383.1478576660156, "learning_rate": 3.53e-06, "loss": 51.5302, "step": 17650 }, { "epoch": 0.035674317319618454, "grad_norm": 348.96173095703125, "learning_rate": 3.5320000000000002e-06, "loss": 9.3253, "step": 17660 }, { "epoch": 0.03569451795230227, "grad_norm": 456.492431640625, "learning_rate": 3.5340000000000004e-06, "loss": 17.8969, "step": 17670 }, { "epoch": 0.03571471858498608, "grad_norm": 391.4908447265625, "learning_rate": 3.5360000000000005e-06, "loss": 30.9601, "step": 17680 }, { "epoch": 0.0357349192176699, "grad_norm": 89.62699127197266, "learning_rate": 3.5380000000000003e-06, "loss": 23.1197, "step": 17690 }, { "epoch": 0.03575511985035371, "grad_norm": 8.72734260559082, "learning_rate": 3.54e-06, "loss": 27.353, "step": 17700 }, { "epoch": 0.035775320483037526, "grad_norm": 440.166259765625, "learning_rate": 3.5420000000000006e-06, "loss": 28.6035, "step": 17710 }, { "epoch": 0.035795521115721346, "grad_norm": 141.1392059326172, "learning_rate": 3.5440000000000003e-06, "loss": 23.6285, "step": 17720 }, { "epoch": 0.03581572174840516, "grad_norm": 1183.832275390625, "learning_rate": 3.5460000000000004e-06, "loss": 36.4731, "step": 17730 }, { "epoch": 0.03583592238108898, "grad_norm": 308.72686767578125, "learning_rate": 3.548e-06, "loss": 13.224, "step": 17740 }, { "epoch": 0.03585612301377279, "grad_norm": 625.1483154296875, "learning_rate": 3.5500000000000003e-06, "loss": 28.4961, "step": 17750 }, { "epoch": 0.035876323646456605, "grad_norm": 377.944091796875, "learning_rate": 3.5520000000000005e-06, "loss": 33.3541, "step": 17760 }, { "epoch": 0.035896524279140425, "grad_norm": 453.13470458984375, "learning_rate": 3.554e-06, "loss": 42.5362, "step": 17770 }, { "epoch": 0.03591672491182424, "grad_norm": 377.8801574707031, "learning_rate": 3.5560000000000008e-06, "loss": 23.4628, "step": 17780 }, { "epoch": 0.03593692554450805, "grad_norm": 64.29519653320312, "learning_rate": 3.5580000000000005e-06, "loss": 13.6451, "step": 17790 }, { "epoch": 0.03595712617719187, "grad_norm": 345.0412902832031, "learning_rate": 3.5600000000000002e-06, "loss": 44.6675, "step": 17800 }, { "epoch": 0.035977326809875684, "grad_norm": 381.2247619628906, "learning_rate": 3.5620000000000004e-06, "loss": 25.701, "step": 17810 }, { "epoch": 0.035997527442559504, "grad_norm": 1403.4259033203125, "learning_rate": 3.564e-06, "loss": 35.4093, "step": 17820 }, { "epoch": 0.03601772807524332, "grad_norm": 516.9909057617188, "learning_rate": 3.566e-06, "loss": 22.2979, "step": 17830 }, { "epoch": 0.03603792870792713, "grad_norm": 453.0965270996094, "learning_rate": 3.5680000000000004e-06, "loss": 23.0691, "step": 17840 }, { "epoch": 0.03605812934061095, "grad_norm": 1489.0709228515625, "learning_rate": 3.57e-06, "loss": 49.8878, "step": 17850 }, { "epoch": 0.036078329973294763, "grad_norm": 391.0494384765625, "learning_rate": 3.5720000000000003e-06, "loss": 20.696, "step": 17860 }, { "epoch": 0.03609853060597858, "grad_norm": 669.98779296875, "learning_rate": 3.5740000000000004e-06, "loss": 28.2965, "step": 17870 }, { "epoch": 0.0361187312386624, "grad_norm": 420.1402587890625, "learning_rate": 3.576e-06, "loss": 23.3762, "step": 17880 }, { "epoch": 0.03613893187134621, "grad_norm": 620.1629638671875, "learning_rate": 3.5780000000000003e-06, "loss": 18.8507, "step": 17890 }, { "epoch": 0.03615913250403003, "grad_norm": 995.0001831054688, "learning_rate": 3.58e-06, "loss": 48.6008, "step": 17900 }, { "epoch": 0.03617933313671384, "grad_norm": 569.3245239257812, "learning_rate": 3.5820000000000006e-06, "loss": 22.7512, "step": 17910 }, { "epoch": 0.036199533769397656, "grad_norm": 572.5311279296875, "learning_rate": 3.5840000000000003e-06, "loss": 42.9053, "step": 17920 }, { "epoch": 0.036219734402081476, "grad_norm": 473.06280517578125, "learning_rate": 3.586e-06, "loss": 23.6802, "step": 17930 }, { "epoch": 0.03623993503476529, "grad_norm": 416.5789794921875, "learning_rate": 3.588e-06, "loss": 37.7727, "step": 17940 }, { "epoch": 0.0362601356674491, "grad_norm": 627.5274658203125, "learning_rate": 3.5900000000000004e-06, "loss": 17.963, "step": 17950 }, { "epoch": 0.03628033630013292, "grad_norm": 332.6874694824219, "learning_rate": 3.5920000000000005e-06, "loss": 19.0672, "step": 17960 }, { "epoch": 0.036300536932816735, "grad_norm": 3338.54443359375, "learning_rate": 3.5940000000000002e-06, "loss": 51.021, "step": 17970 }, { "epoch": 0.036320737565500555, "grad_norm": 842.3485107421875, "learning_rate": 3.596e-06, "loss": 34.2707, "step": 17980 }, { "epoch": 0.03634093819818437, "grad_norm": 299.7825927734375, "learning_rate": 3.5980000000000005e-06, "loss": 32.0701, "step": 17990 }, { "epoch": 0.03636113883086818, "grad_norm": 951.4468994140625, "learning_rate": 3.6000000000000003e-06, "loss": 25.0228, "step": 18000 }, { "epoch": 0.036381339463552, "grad_norm": 422.8774108886719, "learning_rate": 3.6020000000000004e-06, "loss": 30.886, "step": 18010 }, { "epoch": 0.036401540096235814, "grad_norm": 254.622314453125, "learning_rate": 3.604e-06, "loss": 25.8877, "step": 18020 }, { "epoch": 0.03642174072891963, "grad_norm": 163.25782775878906, "learning_rate": 3.606e-06, "loss": 30.17, "step": 18030 }, { "epoch": 0.03644194136160345, "grad_norm": 355.1414489746094, "learning_rate": 3.6080000000000004e-06, "loss": 19.5321, "step": 18040 }, { "epoch": 0.03646214199428726, "grad_norm": 604.4697265625, "learning_rate": 3.61e-06, "loss": 13.9627, "step": 18050 }, { "epoch": 0.03648234262697108, "grad_norm": 414.0849914550781, "learning_rate": 3.6120000000000003e-06, "loss": 9.15, "step": 18060 }, { "epoch": 0.03650254325965489, "grad_norm": 669.28369140625, "learning_rate": 3.6140000000000005e-06, "loss": 24.235, "step": 18070 }, { "epoch": 0.036522743892338706, "grad_norm": 644.69873046875, "learning_rate": 3.616e-06, "loss": 29.0484, "step": 18080 }, { "epoch": 0.036542944525022526, "grad_norm": 537.6671142578125, "learning_rate": 3.6180000000000003e-06, "loss": 21.0134, "step": 18090 }, { "epoch": 0.03656314515770634, "grad_norm": 423.848876953125, "learning_rate": 3.62e-06, "loss": 20.3401, "step": 18100 }, { "epoch": 0.03658334579039015, "grad_norm": 1117.3096923828125, "learning_rate": 3.6220000000000006e-06, "loss": 50.8774, "step": 18110 }, { "epoch": 0.03660354642307397, "grad_norm": 1382.8843994140625, "learning_rate": 3.6240000000000004e-06, "loss": 37.2023, "step": 18120 }, { "epoch": 0.036623747055757785, "grad_norm": 284.3473205566406, "learning_rate": 3.626e-06, "loss": 32.0322, "step": 18130 }, { "epoch": 0.036643947688441605, "grad_norm": 1064.8023681640625, "learning_rate": 3.6280000000000002e-06, "loss": 38.1642, "step": 18140 }, { "epoch": 0.03666414832112542, "grad_norm": 359.7071533203125, "learning_rate": 3.6300000000000004e-06, "loss": 25.7966, "step": 18150 }, { "epoch": 0.03668434895380923, "grad_norm": 543.3189086914062, "learning_rate": 3.6320000000000005e-06, "loss": 21.5021, "step": 18160 }, { "epoch": 0.03670454958649305, "grad_norm": 690.4579467773438, "learning_rate": 3.6340000000000003e-06, "loss": 22.0808, "step": 18170 }, { "epoch": 0.036724750219176865, "grad_norm": 29.11857032775879, "learning_rate": 3.636e-06, "loss": 21.0527, "step": 18180 }, { "epoch": 0.03674495085186068, "grad_norm": 218.865234375, "learning_rate": 3.6380000000000006e-06, "loss": 44.4114, "step": 18190 }, { "epoch": 0.0367651514845445, "grad_norm": 823.153076171875, "learning_rate": 3.6400000000000003e-06, "loss": 40.9089, "step": 18200 }, { "epoch": 0.03678535211722831, "grad_norm": 815.5130004882812, "learning_rate": 3.6420000000000005e-06, "loss": 27.5567, "step": 18210 }, { "epoch": 0.03680555274991213, "grad_norm": 357.49871826171875, "learning_rate": 3.644e-06, "loss": 12.2424, "step": 18220 }, { "epoch": 0.036825753382595944, "grad_norm": 865.4989624023438, "learning_rate": 3.646e-06, "loss": 22.4987, "step": 18230 }, { "epoch": 0.03684595401527976, "grad_norm": 759.4170532226562, "learning_rate": 3.6480000000000005e-06, "loss": 24.3934, "step": 18240 }, { "epoch": 0.03686615464796358, "grad_norm": 542.9179077148438, "learning_rate": 3.65e-06, "loss": 20.8989, "step": 18250 }, { "epoch": 0.03688635528064739, "grad_norm": 210.0708465576172, "learning_rate": 3.6520000000000004e-06, "loss": 15.2844, "step": 18260 }, { "epoch": 0.0369065559133312, "grad_norm": 147.86729431152344, "learning_rate": 3.6540000000000005e-06, "loss": 24.368, "step": 18270 }, { "epoch": 0.03692675654601502, "grad_norm": 848.0860595703125, "learning_rate": 3.6560000000000002e-06, "loss": 15.2613, "step": 18280 }, { "epoch": 0.036946957178698836, "grad_norm": 471.3977355957031, "learning_rate": 3.6580000000000004e-06, "loss": 29.735, "step": 18290 }, { "epoch": 0.03696715781138265, "grad_norm": 221.9785614013672, "learning_rate": 3.66e-06, "loss": 19.94, "step": 18300 }, { "epoch": 0.03698735844406647, "grad_norm": 1696.908935546875, "learning_rate": 3.6620000000000007e-06, "loss": 60.9916, "step": 18310 }, { "epoch": 0.03700755907675028, "grad_norm": 803.4354248046875, "learning_rate": 3.6640000000000004e-06, "loss": 36.4354, "step": 18320 }, { "epoch": 0.0370277597094341, "grad_norm": 613.2241821289062, "learning_rate": 3.666e-06, "loss": 28.5093, "step": 18330 }, { "epoch": 0.037047960342117915, "grad_norm": 892.1907958984375, "learning_rate": 3.6680000000000003e-06, "loss": 34.7192, "step": 18340 }, { "epoch": 0.03706816097480173, "grad_norm": 288.34228515625, "learning_rate": 3.6700000000000004e-06, "loss": 17.6466, "step": 18350 }, { "epoch": 0.03708836160748555, "grad_norm": 946.3200073242188, "learning_rate": 3.6720000000000006e-06, "loss": 31.1397, "step": 18360 }, { "epoch": 0.03710856224016936, "grad_norm": 577.26416015625, "learning_rate": 3.6740000000000003e-06, "loss": 24.5398, "step": 18370 }, { "epoch": 0.037128762872853174, "grad_norm": 775.6914672851562, "learning_rate": 3.676e-06, "loss": 35.2331, "step": 18380 }, { "epoch": 0.037148963505536994, "grad_norm": 404.9434814453125, "learning_rate": 3.6780000000000006e-06, "loss": 32.2677, "step": 18390 }, { "epoch": 0.03716916413822081, "grad_norm": 995.0198364257812, "learning_rate": 3.6800000000000003e-06, "loss": 17.7147, "step": 18400 }, { "epoch": 0.03718936477090463, "grad_norm": 753.1340942382812, "learning_rate": 3.6820000000000005e-06, "loss": 17.9428, "step": 18410 }, { "epoch": 0.03720956540358844, "grad_norm": 2061.60888671875, "learning_rate": 3.6840000000000002e-06, "loss": 48.1814, "step": 18420 }, { "epoch": 0.037229766036272254, "grad_norm": 260.50567626953125, "learning_rate": 3.686e-06, "loss": 30.1308, "step": 18430 }, { "epoch": 0.037249966668956074, "grad_norm": 373.1856384277344, "learning_rate": 3.6880000000000005e-06, "loss": 19.3992, "step": 18440 }, { "epoch": 0.03727016730163989, "grad_norm": 483.49462890625, "learning_rate": 3.6900000000000002e-06, "loss": 18.4943, "step": 18450 }, { "epoch": 0.0372903679343237, "grad_norm": 266.2514953613281, "learning_rate": 3.692e-06, "loss": 24.6773, "step": 18460 }, { "epoch": 0.03731056856700752, "grad_norm": 536.02099609375, "learning_rate": 3.6940000000000005e-06, "loss": 21.7613, "step": 18470 }, { "epoch": 0.03733076919969133, "grad_norm": 332.672119140625, "learning_rate": 3.6960000000000003e-06, "loss": 32.3539, "step": 18480 }, { "epoch": 0.03735096983237515, "grad_norm": 308.9702453613281, "learning_rate": 3.6980000000000004e-06, "loss": 26.1693, "step": 18490 }, { "epoch": 0.037371170465058966, "grad_norm": 611.457763671875, "learning_rate": 3.7e-06, "loss": 27.8067, "step": 18500 }, { "epoch": 0.03739137109774278, "grad_norm": 357.3343505859375, "learning_rate": 3.702e-06, "loss": 16.9885, "step": 18510 }, { "epoch": 0.0374115717304266, "grad_norm": 1875.06591796875, "learning_rate": 3.7040000000000005e-06, "loss": 27.6352, "step": 18520 }, { "epoch": 0.03743177236311041, "grad_norm": 583.4190673828125, "learning_rate": 3.706e-06, "loss": 38.8018, "step": 18530 }, { "epoch": 0.037451972995794225, "grad_norm": 63.66743469238281, "learning_rate": 3.7080000000000003e-06, "loss": 23.2965, "step": 18540 }, { "epoch": 0.037472173628478045, "grad_norm": 179.1241912841797, "learning_rate": 3.7100000000000005e-06, "loss": 26.9773, "step": 18550 }, { "epoch": 0.03749237426116186, "grad_norm": 284.5545654296875, "learning_rate": 3.712e-06, "loss": 25.3305, "step": 18560 }, { "epoch": 0.03751257489384568, "grad_norm": 436.8062744140625, "learning_rate": 3.7140000000000004e-06, "loss": 54.7485, "step": 18570 }, { "epoch": 0.03753277552652949, "grad_norm": 298.08514404296875, "learning_rate": 3.716e-06, "loss": 26.955, "step": 18580 }, { "epoch": 0.037552976159213304, "grad_norm": 176.87818908691406, "learning_rate": 3.7180000000000007e-06, "loss": 24.4092, "step": 18590 }, { "epoch": 0.037573176791897124, "grad_norm": 442.5617980957031, "learning_rate": 3.7200000000000004e-06, "loss": 37.3868, "step": 18600 }, { "epoch": 0.03759337742458094, "grad_norm": 348.6301574707031, "learning_rate": 3.722e-06, "loss": 17.5085, "step": 18610 }, { "epoch": 0.03761357805726475, "grad_norm": 178.97604370117188, "learning_rate": 3.7240000000000003e-06, "loss": 27.4701, "step": 18620 }, { "epoch": 0.03763377868994857, "grad_norm": 574.3960571289062, "learning_rate": 3.726e-06, "loss": 26.0964, "step": 18630 }, { "epoch": 0.03765397932263238, "grad_norm": 278.8275146484375, "learning_rate": 3.7280000000000006e-06, "loss": 48.7265, "step": 18640 }, { "epoch": 0.0376741799553162, "grad_norm": 816.7874755859375, "learning_rate": 3.7300000000000003e-06, "loss": 43.5803, "step": 18650 }, { "epoch": 0.037694380588000016, "grad_norm": 453.5743713378906, "learning_rate": 3.732e-06, "loss": 33.0941, "step": 18660 }, { "epoch": 0.03771458122068383, "grad_norm": 672.2986450195312, "learning_rate": 3.7340000000000006e-06, "loss": 34.2774, "step": 18670 }, { "epoch": 0.03773478185336765, "grad_norm": 443.0143127441406, "learning_rate": 3.7360000000000003e-06, "loss": 20.1083, "step": 18680 }, { "epoch": 0.03775498248605146, "grad_norm": 176.78070068359375, "learning_rate": 3.7380000000000005e-06, "loss": 14.7314, "step": 18690 }, { "epoch": 0.037775183118735275, "grad_norm": 384.6247253417969, "learning_rate": 3.74e-06, "loss": 10.5879, "step": 18700 }, { "epoch": 0.037795383751419095, "grad_norm": 291.50128173828125, "learning_rate": 3.742e-06, "loss": 39.9164, "step": 18710 }, { "epoch": 0.03781558438410291, "grad_norm": 596.7171630859375, "learning_rate": 3.7440000000000005e-06, "loss": 38.8251, "step": 18720 }, { "epoch": 0.03783578501678673, "grad_norm": 798.280029296875, "learning_rate": 3.7460000000000002e-06, "loss": 18.4048, "step": 18730 }, { "epoch": 0.03785598564947054, "grad_norm": 673.656494140625, "learning_rate": 3.7480000000000004e-06, "loss": 25.538, "step": 18740 }, { "epoch": 0.037876186282154355, "grad_norm": 384.20733642578125, "learning_rate": 3.7500000000000005e-06, "loss": 30.774, "step": 18750 }, { "epoch": 0.037896386914838175, "grad_norm": 381.9043273925781, "learning_rate": 3.7520000000000002e-06, "loss": 34.7811, "step": 18760 }, { "epoch": 0.03791658754752199, "grad_norm": 290.19525146484375, "learning_rate": 3.7540000000000004e-06, "loss": 17.331, "step": 18770 }, { "epoch": 0.0379367881802058, "grad_norm": 447.8214416503906, "learning_rate": 3.756e-06, "loss": 22.043, "step": 18780 }, { "epoch": 0.03795698881288962, "grad_norm": 715.4262084960938, "learning_rate": 3.7580000000000007e-06, "loss": 19.8474, "step": 18790 }, { "epoch": 0.037977189445573434, "grad_norm": 296.80535888671875, "learning_rate": 3.7600000000000004e-06, "loss": 26.4522, "step": 18800 }, { "epoch": 0.037997390078257254, "grad_norm": 809.5372314453125, "learning_rate": 3.762e-06, "loss": 27.2477, "step": 18810 }, { "epoch": 0.03801759071094107, "grad_norm": 95.88043975830078, "learning_rate": 3.7640000000000003e-06, "loss": 28.4378, "step": 18820 }, { "epoch": 0.03803779134362488, "grad_norm": 326.20733642578125, "learning_rate": 3.766e-06, "loss": 19.4922, "step": 18830 }, { "epoch": 0.0380579919763087, "grad_norm": 882.5034790039062, "learning_rate": 3.7680000000000006e-06, "loss": 27.9748, "step": 18840 }, { "epoch": 0.03807819260899251, "grad_norm": 727.4618530273438, "learning_rate": 3.7700000000000003e-06, "loss": 25.738, "step": 18850 }, { "epoch": 0.038098393241676326, "grad_norm": 28.941675186157227, "learning_rate": 3.772e-06, "loss": 19.6749, "step": 18860 }, { "epoch": 0.038118593874360146, "grad_norm": 410.4114990234375, "learning_rate": 3.7740000000000006e-06, "loss": 24.7041, "step": 18870 }, { "epoch": 0.03813879450704396, "grad_norm": 427.81427001953125, "learning_rate": 3.7760000000000004e-06, "loss": 16.8343, "step": 18880 }, { "epoch": 0.03815899513972778, "grad_norm": 459.8018798828125, "learning_rate": 3.7780000000000005e-06, "loss": 30.7509, "step": 18890 }, { "epoch": 0.03817919577241159, "grad_norm": 1102.4735107421875, "learning_rate": 3.7800000000000002e-06, "loss": 25.0651, "step": 18900 }, { "epoch": 0.038199396405095405, "grad_norm": 731.9132690429688, "learning_rate": 3.782e-06, "loss": 31.4007, "step": 18910 }, { "epoch": 0.038219597037779225, "grad_norm": 449.3602294921875, "learning_rate": 3.7840000000000005e-06, "loss": 26.8892, "step": 18920 }, { "epoch": 0.03823979767046304, "grad_norm": 1201.197509765625, "learning_rate": 3.7860000000000003e-06, "loss": 30.9846, "step": 18930 }, { "epoch": 0.03825999830314685, "grad_norm": 785.8412475585938, "learning_rate": 3.7880000000000004e-06, "loss": 18.5037, "step": 18940 }, { "epoch": 0.03828019893583067, "grad_norm": 443.92962646484375, "learning_rate": 3.79e-06, "loss": 37.715, "step": 18950 }, { "epoch": 0.038300399568514484, "grad_norm": 1796.4639892578125, "learning_rate": 3.7920000000000003e-06, "loss": 32.3944, "step": 18960 }, { "epoch": 0.038320600201198304, "grad_norm": 455.0303955078125, "learning_rate": 3.7940000000000004e-06, "loss": 25.4756, "step": 18970 }, { "epoch": 0.03834080083388212, "grad_norm": 473.8932800292969, "learning_rate": 3.796e-06, "loss": 18.867, "step": 18980 }, { "epoch": 0.03836100146656593, "grad_norm": 446.3844299316406, "learning_rate": 3.7980000000000007e-06, "loss": 23.4951, "step": 18990 }, { "epoch": 0.03838120209924975, "grad_norm": 151.72618103027344, "learning_rate": 3.8000000000000005e-06, "loss": 26.9601, "step": 19000 }, { "epoch": 0.038401402731933564, "grad_norm": 473.8542785644531, "learning_rate": 3.802e-06, "loss": 8.5569, "step": 19010 }, { "epoch": 0.03842160336461738, "grad_norm": 406.4317626953125, "learning_rate": 3.8040000000000003e-06, "loss": 33.9643, "step": 19020 }, { "epoch": 0.0384418039973012, "grad_norm": 921.1906127929688, "learning_rate": 3.806e-06, "loss": 41.9266, "step": 19030 }, { "epoch": 0.03846200462998501, "grad_norm": 478.7823486328125, "learning_rate": 3.8080000000000006e-06, "loss": 27.2125, "step": 19040 }, { "epoch": 0.03848220526266883, "grad_norm": 218.63922119140625, "learning_rate": 3.8100000000000004e-06, "loss": 24.6184, "step": 19050 }, { "epoch": 0.03850240589535264, "grad_norm": 1037.8466796875, "learning_rate": 3.812e-06, "loss": 28.6672, "step": 19060 }, { "epoch": 0.038522606528036456, "grad_norm": 1018.7424926757812, "learning_rate": 3.8140000000000007e-06, "loss": 20.6852, "step": 19070 }, { "epoch": 0.038542807160720276, "grad_norm": 256.0588073730469, "learning_rate": 3.816e-06, "loss": 13.2979, "step": 19080 }, { "epoch": 0.03856300779340409, "grad_norm": 489.6903381347656, "learning_rate": 3.818e-06, "loss": 29.6457, "step": 19090 }, { "epoch": 0.0385832084260879, "grad_norm": 459.4139709472656, "learning_rate": 3.820000000000001e-06, "loss": 29.233, "step": 19100 }, { "epoch": 0.03860340905877172, "grad_norm": 996.184326171875, "learning_rate": 3.822e-06, "loss": 33.8466, "step": 19110 }, { "epoch": 0.038623609691455535, "grad_norm": 690.0992431640625, "learning_rate": 3.824e-06, "loss": 19.5614, "step": 19120 }, { "epoch": 0.038643810324139355, "grad_norm": 806.515869140625, "learning_rate": 3.826e-06, "loss": 19.1219, "step": 19130 }, { "epoch": 0.03866401095682317, "grad_norm": 422.1471862792969, "learning_rate": 3.8280000000000004e-06, "loss": 63.3176, "step": 19140 }, { "epoch": 0.03868421158950698, "grad_norm": 559.8536376953125, "learning_rate": 3.830000000000001e-06, "loss": 24.9242, "step": 19150 }, { "epoch": 0.0387044122221908, "grad_norm": 107.43516540527344, "learning_rate": 3.832e-06, "loss": 21.9817, "step": 19160 }, { "epoch": 0.038724612854874614, "grad_norm": 413.239013671875, "learning_rate": 3.834000000000001e-06, "loss": 12.4822, "step": 19170 }, { "epoch": 0.03874481348755843, "grad_norm": 283.6053161621094, "learning_rate": 3.836e-06, "loss": 35.2526, "step": 19180 }, { "epoch": 0.03876501412024225, "grad_norm": 451.2077941894531, "learning_rate": 3.838e-06, "loss": 14.6541, "step": 19190 }, { "epoch": 0.03878521475292606, "grad_norm": 488.7024230957031, "learning_rate": 3.8400000000000005e-06, "loss": 32.0479, "step": 19200 }, { "epoch": 0.03880541538560988, "grad_norm": 913.99755859375, "learning_rate": 3.842e-06, "loss": 24.4827, "step": 19210 }, { "epoch": 0.03882561601829369, "grad_norm": 368.6611022949219, "learning_rate": 3.844000000000001e-06, "loss": 22.7566, "step": 19220 }, { "epoch": 0.038845816650977506, "grad_norm": 582.5567016601562, "learning_rate": 3.846e-06, "loss": 18.064, "step": 19230 }, { "epoch": 0.038866017283661326, "grad_norm": 546.2643432617188, "learning_rate": 3.848e-06, "loss": 30.0649, "step": 19240 }, { "epoch": 0.03888621791634514, "grad_norm": 701.2210693359375, "learning_rate": 3.85e-06, "loss": 40.8547, "step": 19250 }, { "epoch": 0.03890641854902895, "grad_norm": 1697.3839111328125, "learning_rate": 3.8520000000000006e-06, "loss": 28.2806, "step": 19260 }, { "epoch": 0.03892661918171277, "grad_norm": 800.674560546875, "learning_rate": 3.854000000000001e-06, "loss": 25.5319, "step": 19270 }, { "epoch": 0.038946819814396585, "grad_norm": 330.03033447265625, "learning_rate": 3.856e-06, "loss": 25.4214, "step": 19280 }, { "epoch": 0.038967020447080405, "grad_norm": 159.86805725097656, "learning_rate": 3.858e-06, "loss": 15.4458, "step": 19290 }, { "epoch": 0.03898722107976422, "grad_norm": 540.61474609375, "learning_rate": 3.86e-06, "loss": 37.4303, "step": 19300 }, { "epoch": 0.03900742171244803, "grad_norm": 577.3270263671875, "learning_rate": 3.8620000000000005e-06, "loss": 28.7745, "step": 19310 }, { "epoch": 0.03902762234513185, "grad_norm": 161.7196502685547, "learning_rate": 3.864000000000001e-06, "loss": 23.6797, "step": 19320 }, { "epoch": 0.039047822977815665, "grad_norm": 377.9838562011719, "learning_rate": 3.866e-06, "loss": 24.1954, "step": 19330 }, { "epoch": 0.03906802361049948, "grad_norm": 531.781005859375, "learning_rate": 3.868e-06, "loss": 25.0392, "step": 19340 }, { "epoch": 0.0390882242431833, "grad_norm": 175.0643310546875, "learning_rate": 3.87e-06, "loss": 26.4424, "step": 19350 }, { "epoch": 0.03910842487586711, "grad_norm": 490.6230163574219, "learning_rate": 3.872e-06, "loss": 30.3526, "step": 19360 }, { "epoch": 0.03912862550855093, "grad_norm": 653.54736328125, "learning_rate": 3.8740000000000005e-06, "loss": 36.0134, "step": 19370 }, { "epoch": 0.039148826141234744, "grad_norm": 785.630615234375, "learning_rate": 3.876000000000001e-06, "loss": 29.7962, "step": 19380 }, { "epoch": 0.03916902677391856, "grad_norm": 442.81591796875, "learning_rate": 3.878e-06, "loss": 29.3342, "step": 19390 }, { "epoch": 0.03918922740660238, "grad_norm": 284.63079833984375, "learning_rate": 3.88e-06, "loss": 33.5628, "step": 19400 }, { "epoch": 0.03920942803928619, "grad_norm": 1588.289306640625, "learning_rate": 3.882e-06, "loss": 41.3988, "step": 19410 }, { "epoch": 0.03922962867197, "grad_norm": 248.40194702148438, "learning_rate": 3.884e-06, "loss": 31.8697, "step": 19420 }, { "epoch": 0.03924982930465382, "grad_norm": 332.9631042480469, "learning_rate": 3.8860000000000006e-06, "loss": 21.1231, "step": 19430 }, { "epoch": 0.039270029937337636, "grad_norm": 978.757080078125, "learning_rate": 3.888e-06, "loss": 39.1596, "step": 19440 }, { "epoch": 0.039290230570021456, "grad_norm": 407.6144104003906, "learning_rate": 3.89e-06, "loss": 12.5934, "step": 19450 }, { "epoch": 0.03931043120270527, "grad_norm": 265.3919372558594, "learning_rate": 3.892e-06, "loss": 21.8525, "step": 19460 }, { "epoch": 0.03933063183538908, "grad_norm": 401.0389709472656, "learning_rate": 3.894e-06, "loss": 28.4071, "step": 19470 }, { "epoch": 0.0393508324680729, "grad_norm": 441.74005126953125, "learning_rate": 3.8960000000000005e-06, "loss": 18.6537, "step": 19480 }, { "epoch": 0.039371033100756715, "grad_norm": 791.5307006835938, "learning_rate": 3.898e-06, "loss": 21.6818, "step": 19490 }, { "epoch": 0.03939123373344053, "grad_norm": 941.1528930664062, "learning_rate": 3.900000000000001e-06, "loss": 36.7498, "step": 19500 }, { "epoch": 0.03941143436612435, "grad_norm": 551.3410034179688, "learning_rate": 3.902e-06, "loss": 28.612, "step": 19510 }, { "epoch": 0.03943163499880816, "grad_norm": 1068.077392578125, "learning_rate": 3.904e-06, "loss": 30.1779, "step": 19520 }, { "epoch": 0.03945183563149198, "grad_norm": 702.95263671875, "learning_rate": 3.906e-06, "loss": 12.9496, "step": 19530 }, { "epoch": 0.039472036264175794, "grad_norm": 757.014404296875, "learning_rate": 3.9080000000000005e-06, "loss": 39.9891, "step": 19540 }, { "epoch": 0.03949223689685961, "grad_norm": 745.016845703125, "learning_rate": 3.910000000000001e-06, "loss": 25.0764, "step": 19550 }, { "epoch": 0.03951243752954343, "grad_norm": 836.84912109375, "learning_rate": 3.912e-06, "loss": 11.6613, "step": 19560 }, { "epoch": 0.03953263816222724, "grad_norm": 751.01220703125, "learning_rate": 3.914000000000001e-06, "loss": 28.168, "step": 19570 }, { "epoch": 0.039552838794911054, "grad_norm": 424.0301208496094, "learning_rate": 3.916e-06, "loss": 37.3475, "step": 19580 }, { "epoch": 0.039573039427594874, "grad_norm": 500.0715026855469, "learning_rate": 3.9180000000000004e-06, "loss": 17.2219, "step": 19590 }, { "epoch": 0.03959324006027869, "grad_norm": 280.797607421875, "learning_rate": 3.920000000000001e-06, "loss": 24.3042, "step": 19600 }, { "epoch": 0.03961344069296251, "grad_norm": 421.28741455078125, "learning_rate": 3.922e-06, "loss": 51.7029, "step": 19610 }, { "epoch": 0.03963364132564632, "grad_norm": 663.9564208984375, "learning_rate": 3.924000000000001e-06, "loss": 25.3921, "step": 19620 }, { "epoch": 0.03965384195833013, "grad_norm": 618.3223876953125, "learning_rate": 3.926e-06, "loss": 22.4242, "step": 19630 }, { "epoch": 0.03967404259101395, "grad_norm": 374.6733093261719, "learning_rate": 3.928e-06, "loss": 27.1011, "step": 19640 }, { "epoch": 0.039694243223697766, "grad_norm": 702.5897827148438, "learning_rate": 3.9300000000000005e-06, "loss": 34.1476, "step": 19650 }, { "epoch": 0.03971444385638158, "grad_norm": 373.4047546386719, "learning_rate": 3.932000000000001e-06, "loss": 18.5248, "step": 19660 }, { "epoch": 0.0397346444890654, "grad_norm": 535.1874389648438, "learning_rate": 3.934000000000001e-06, "loss": 38.1432, "step": 19670 }, { "epoch": 0.03975484512174921, "grad_norm": 1303.091796875, "learning_rate": 3.936e-06, "loss": 26.7679, "step": 19680 }, { "epoch": 0.03977504575443303, "grad_norm": 1000.7589721679688, "learning_rate": 3.938e-06, "loss": 21.4519, "step": 19690 }, { "epoch": 0.039795246387116845, "grad_norm": 316.948486328125, "learning_rate": 3.94e-06, "loss": 21.0921, "step": 19700 }, { "epoch": 0.03981544701980066, "grad_norm": 554.349609375, "learning_rate": 3.9420000000000005e-06, "loss": 32.6151, "step": 19710 }, { "epoch": 0.03983564765248448, "grad_norm": 569.5929565429688, "learning_rate": 3.944e-06, "loss": 34.9574, "step": 19720 }, { "epoch": 0.03985584828516829, "grad_norm": 199.5447540283203, "learning_rate": 3.946e-06, "loss": 27.4674, "step": 19730 }, { "epoch": 0.039876048917852104, "grad_norm": 492.1817626953125, "learning_rate": 3.948e-06, "loss": 31.3917, "step": 19740 }, { "epoch": 0.039896249550535924, "grad_norm": 1029.6348876953125, "learning_rate": 3.95e-06, "loss": 24.2852, "step": 19750 }, { "epoch": 0.03991645018321974, "grad_norm": 380.540283203125, "learning_rate": 3.9520000000000004e-06, "loss": 16.6415, "step": 19760 }, { "epoch": 0.03993665081590356, "grad_norm": 436.20355224609375, "learning_rate": 3.954e-06, "loss": 15.9499, "step": 19770 }, { "epoch": 0.03995685144858737, "grad_norm": 396.8489990234375, "learning_rate": 3.956000000000001e-06, "loss": 19.8399, "step": 19780 }, { "epoch": 0.03997705208127118, "grad_norm": 664.4743041992188, "learning_rate": 3.958e-06, "loss": 21.8307, "step": 19790 }, { "epoch": 0.039997252713955, "grad_norm": 334.7715759277344, "learning_rate": 3.96e-06, "loss": 27.2744, "step": 19800 }, { "epoch": 0.040017453346638816, "grad_norm": 503.5437927246094, "learning_rate": 3.962e-06, "loss": 33.6565, "step": 19810 }, { "epoch": 0.04003765397932263, "grad_norm": 355.5002746582031, "learning_rate": 3.964e-06, "loss": 43.4189, "step": 19820 }, { "epoch": 0.04005785461200645, "grad_norm": 397.135009765625, "learning_rate": 3.966000000000001e-06, "loss": 32.1225, "step": 19830 }, { "epoch": 0.04007805524469026, "grad_norm": 1114.3900146484375, "learning_rate": 3.968e-06, "loss": 27.9584, "step": 19840 }, { "epoch": 0.04009825587737408, "grad_norm": 305.44964599609375, "learning_rate": 3.97e-06, "loss": 29.9003, "step": 19850 }, { "epoch": 0.040118456510057895, "grad_norm": 609.80615234375, "learning_rate": 3.972e-06, "loss": 36.946, "step": 19860 }, { "epoch": 0.04013865714274171, "grad_norm": 6.036598205566406, "learning_rate": 3.974e-06, "loss": 9.1207, "step": 19870 }, { "epoch": 0.04015885777542553, "grad_norm": 352.4873962402344, "learning_rate": 3.9760000000000006e-06, "loss": 15.7391, "step": 19880 }, { "epoch": 0.04017905840810934, "grad_norm": 734.5411987304688, "learning_rate": 3.978e-06, "loss": 17.07, "step": 19890 }, { "epoch": 0.040199259040793155, "grad_norm": 532.0252685546875, "learning_rate": 3.980000000000001e-06, "loss": 31.1751, "step": 19900 }, { "epoch": 0.040219459673476975, "grad_norm": 639.2634887695312, "learning_rate": 3.982e-06, "loss": 41.4132, "step": 19910 }, { "epoch": 0.04023966030616079, "grad_norm": 328.55352783203125, "learning_rate": 3.984e-06, "loss": 16.7614, "step": 19920 }, { "epoch": 0.04025986093884461, "grad_norm": 298.96435546875, "learning_rate": 3.9860000000000005e-06, "loss": 44.6898, "step": 19930 }, { "epoch": 0.04028006157152842, "grad_norm": 578.205322265625, "learning_rate": 3.988000000000001e-06, "loss": 20.3982, "step": 19940 }, { "epoch": 0.040300262204212234, "grad_norm": 367.17327880859375, "learning_rate": 3.990000000000001e-06, "loss": 30.3627, "step": 19950 }, { "epoch": 0.040320462836896054, "grad_norm": 356.40936279296875, "learning_rate": 3.992e-06, "loss": 24.3473, "step": 19960 }, { "epoch": 0.04034066346957987, "grad_norm": 2358.637939453125, "learning_rate": 3.994e-06, "loss": 26.5293, "step": 19970 }, { "epoch": 0.04036086410226368, "grad_norm": 329.34173583984375, "learning_rate": 3.996e-06, "loss": 31.2316, "step": 19980 }, { "epoch": 0.0403810647349475, "grad_norm": 141.7036590576172, "learning_rate": 3.9980000000000005e-06, "loss": 18.3415, "step": 19990 }, { "epoch": 0.04040126536763131, "grad_norm": 246.96900939941406, "learning_rate": 4.000000000000001e-06, "loss": 21.3769, "step": 20000 }, { "epoch": 0.04042146600031513, "grad_norm": 492.83416748046875, "learning_rate": 4.002e-06, "loss": 22.297, "step": 20010 }, { "epoch": 0.040441666632998946, "grad_norm": 244.3651885986328, "learning_rate": 4.004e-06, "loss": 14.4005, "step": 20020 }, { "epoch": 0.04046186726568276, "grad_norm": 555.0216674804688, "learning_rate": 4.006e-06, "loss": 23.6152, "step": 20030 }, { "epoch": 0.04048206789836658, "grad_norm": 803.3456420898438, "learning_rate": 4.008e-06, "loss": 30.0964, "step": 20040 }, { "epoch": 0.04050226853105039, "grad_norm": 895.5619506835938, "learning_rate": 4.0100000000000006e-06, "loss": 28.1923, "step": 20050 }, { "epoch": 0.040522469163734205, "grad_norm": 160.98851013183594, "learning_rate": 4.012000000000001e-06, "loss": 41.5665, "step": 20060 }, { "epoch": 0.040542669796418025, "grad_norm": 487.1036682128906, "learning_rate": 4.014e-06, "loss": 29.195, "step": 20070 }, { "epoch": 0.04056287042910184, "grad_norm": 658.0955200195312, "learning_rate": 4.016e-06, "loss": 22.3123, "step": 20080 }, { "epoch": 0.04058307106178566, "grad_norm": 512.8436279296875, "learning_rate": 4.018e-06, "loss": 30.6884, "step": 20090 }, { "epoch": 0.04060327169446947, "grad_norm": 20.911073684692383, "learning_rate": 4.0200000000000005e-06, "loss": 14.7529, "step": 20100 }, { "epoch": 0.040623472327153284, "grad_norm": 220.57025146484375, "learning_rate": 4.022000000000001e-06, "loss": 21.3989, "step": 20110 }, { "epoch": 0.040643672959837104, "grad_norm": 1230.2901611328125, "learning_rate": 4.024e-06, "loss": 22.7145, "step": 20120 }, { "epoch": 0.04066387359252092, "grad_norm": 375.266357421875, "learning_rate": 4.026e-06, "loss": 26.7046, "step": 20130 }, { "epoch": 0.04068407422520473, "grad_norm": 627.1782836914062, "learning_rate": 4.028e-06, "loss": 30.1456, "step": 20140 }, { "epoch": 0.04070427485788855, "grad_norm": 146.41339111328125, "learning_rate": 4.03e-06, "loss": 22.8548, "step": 20150 }, { "epoch": 0.040724475490572364, "grad_norm": 1083.543701171875, "learning_rate": 4.0320000000000005e-06, "loss": 37.469, "step": 20160 }, { "epoch": 0.040744676123256184, "grad_norm": 253.15589904785156, "learning_rate": 4.034e-06, "loss": 30.0814, "step": 20170 }, { "epoch": 0.04076487675594, "grad_norm": 460.3359069824219, "learning_rate": 4.036000000000001e-06, "loss": 38.5531, "step": 20180 }, { "epoch": 0.04078507738862381, "grad_norm": 923.38525390625, "learning_rate": 4.038e-06, "loss": 30.4211, "step": 20190 }, { "epoch": 0.04080527802130763, "grad_norm": 426.0430908203125, "learning_rate": 4.04e-06, "loss": 24.8516, "step": 20200 }, { "epoch": 0.04082547865399144, "grad_norm": 599.9994506835938, "learning_rate": 4.0420000000000004e-06, "loss": 24.7395, "step": 20210 }, { "epoch": 0.040845679286675256, "grad_norm": 1300.9736328125, "learning_rate": 4.044e-06, "loss": 30.9326, "step": 20220 }, { "epoch": 0.040865879919359076, "grad_norm": 230.04566955566406, "learning_rate": 4.046000000000001e-06, "loss": 13.643, "step": 20230 }, { "epoch": 0.04088608055204289, "grad_norm": 585.1691284179688, "learning_rate": 4.048e-06, "loss": 29.9727, "step": 20240 }, { "epoch": 0.04090628118472671, "grad_norm": 885.5676879882812, "learning_rate": 4.05e-06, "loss": 29.2169, "step": 20250 }, { "epoch": 0.04092648181741052, "grad_norm": 555.5040893554688, "learning_rate": 4.052e-06, "loss": 44.2696, "step": 20260 }, { "epoch": 0.040946682450094335, "grad_norm": 752.9579467773438, "learning_rate": 4.0540000000000005e-06, "loss": 21.3835, "step": 20270 }, { "epoch": 0.040966883082778155, "grad_norm": 697.6809692382812, "learning_rate": 4.056000000000001e-06, "loss": 30.0799, "step": 20280 }, { "epoch": 0.04098708371546197, "grad_norm": 551.8582763671875, "learning_rate": 4.058e-06, "loss": 27.5682, "step": 20290 }, { "epoch": 0.04100728434814578, "grad_norm": 641.0758666992188, "learning_rate": 4.060000000000001e-06, "loss": 23.9342, "step": 20300 }, { "epoch": 0.0410274849808296, "grad_norm": 747.5087280273438, "learning_rate": 4.062e-06, "loss": 30.3778, "step": 20310 }, { "epoch": 0.041047685613513414, "grad_norm": 1012.8115844726562, "learning_rate": 4.064e-06, "loss": 27.7297, "step": 20320 }, { "epoch": 0.041067886246197234, "grad_norm": 462.903076171875, "learning_rate": 4.0660000000000005e-06, "loss": 26.1605, "step": 20330 }, { "epoch": 0.04108808687888105, "grad_norm": 692.3569946289062, "learning_rate": 4.068000000000001e-06, "loss": 33.3362, "step": 20340 }, { "epoch": 0.04110828751156486, "grad_norm": 91.17338562011719, "learning_rate": 4.07e-06, "loss": 32.5818, "step": 20350 }, { "epoch": 0.04112848814424868, "grad_norm": 314.2921142578125, "learning_rate": 4.072e-06, "loss": 22.8175, "step": 20360 }, { "epoch": 0.04114868877693249, "grad_norm": 198.06179809570312, "learning_rate": 4.074e-06, "loss": 40.0786, "step": 20370 }, { "epoch": 0.041168889409616306, "grad_norm": 1603.201171875, "learning_rate": 4.0760000000000004e-06, "loss": 38.0158, "step": 20380 }, { "epoch": 0.041189090042300126, "grad_norm": 375.8652038574219, "learning_rate": 4.078000000000001e-06, "loss": 19.2669, "step": 20390 }, { "epoch": 0.04120929067498394, "grad_norm": 589.5870361328125, "learning_rate": 4.08e-06, "loss": 21.8247, "step": 20400 }, { "epoch": 0.04122949130766776, "grad_norm": 775.0773315429688, "learning_rate": 4.082e-06, "loss": 27.2401, "step": 20410 }, { "epoch": 0.04124969194035157, "grad_norm": 568.6592407226562, "learning_rate": 4.084e-06, "loss": 21.7321, "step": 20420 }, { "epoch": 0.041269892573035385, "grad_norm": 523.823486328125, "learning_rate": 4.086e-06, "loss": 34.5144, "step": 20430 }, { "epoch": 0.041290093205719205, "grad_norm": 831.2176513671875, "learning_rate": 4.0880000000000005e-06, "loss": 32.7675, "step": 20440 }, { "epoch": 0.04131029383840302, "grad_norm": 158.96939086914062, "learning_rate": 4.09e-06, "loss": 19.2685, "step": 20450 }, { "epoch": 0.04133049447108683, "grad_norm": 521.3656005859375, "learning_rate": 4.092000000000001e-06, "loss": 17.2066, "step": 20460 }, { "epoch": 0.04135069510377065, "grad_norm": 206.03237915039062, "learning_rate": 4.094e-06, "loss": 23.385, "step": 20470 }, { "epoch": 0.041370895736454465, "grad_norm": 878.8639526367188, "learning_rate": 4.096e-06, "loss": 30.1263, "step": 20480 }, { "epoch": 0.041391096369138285, "grad_norm": 310.53326416015625, "learning_rate": 4.098e-06, "loss": 36.0104, "step": 20490 }, { "epoch": 0.0414112970018221, "grad_norm": 447.5044250488281, "learning_rate": 4.1e-06, "loss": 46.8563, "step": 20500 }, { "epoch": 0.04143149763450591, "grad_norm": 219.1395721435547, "learning_rate": 4.102000000000001e-06, "loss": 16.7209, "step": 20510 }, { "epoch": 0.04145169826718973, "grad_norm": 788.0994262695312, "learning_rate": 4.104e-06, "loss": 25.6534, "step": 20520 }, { "epoch": 0.041471898899873544, "grad_norm": 446.28533935546875, "learning_rate": 4.106e-06, "loss": 26.2097, "step": 20530 }, { "epoch": 0.04149209953255736, "grad_norm": 286.25164794921875, "learning_rate": 4.108e-06, "loss": 23.721, "step": 20540 }, { "epoch": 0.04151230016524118, "grad_norm": 1015.564208984375, "learning_rate": 4.1100000000000005e-06, "loss": 26.9486, "step": 20550 }, { "epoch": 0.04153250079792499, "grad_norm": 386.4916076660156, "learning_rate": 4.112000000000001e-06, "loss": 37.2864, "step": 20560 }, { "epoch": 0.04155270143060881, "grad_norm": 321.2320861816406, "learning_rate": 4.114e-06, "loss": 18.8058, "step": 20570 }, { "epoch": 0.04157290206329262, "grad_norm": 380.9525451660156, "learning_rate": 4.116000000000001e-06, "loss": 17.8627, "step": 20580 }, { "epoch": 0.041593102695976436, "grad_norm": 767.234619140625, "learning_rate": 4.118e-06, "loss": 31.6556, "step": 20590 }, { "epoch": 0.041613303328660256, "grad_norm": 434.169189453125, "learning_rate": 4.12e-06, "loss": 25.3457, "step": 20600 }, { "epoch": 0.04163350396134407, "grad_norm": 490.3370361328125, "learning_rate": 4.1220000000000005e-06, "loss": 17.2889, "step": 20610 }, { "epoch": 0.04165370459402788, "grad_norm": 480.8441467285156, "learning_rate": 4.124e-06, "loss": 26.1735, "step": 20620 }, { "epoch": 0.0416739052267117, "grad_norm": 275.5316162109375, "learning_rate": 4.126000000000001e-06, "loss": 27.8963, "step": 20630 }, { "epoch": 0.041694105859395515, "grad_norm": 215.60633850097656, "learning_rate": 4.128e-06, "loss": 36.1411, "step": 20640 }, { "epoch": 0.041714306492079335, "grad_norm": 645.2214965820312, "learning_rate": 4.13e-06, "loss": 22.818, "step": 20650 }, { "epoch": 0.04173450712476315, "grad_norm": 339.7776184082031, "learning_rate": 4.132e-06, "loss": 12.7336, "step": 20660 }, { "epoch": 0.04175470775744696, "grad_norm": 540.5206298828125, "learning_rate": 4.1340000000000006e-06, "loss": 16.8377, "step": 20670 }, { "epoch": 0.04177490839013078, "grad_norm": 492.8855285644531, "learning_rate": 4.136000000000001e-06, "loss": 19.1989, "step": 20680 }, { "epoch": 0.041795109022814594, "grad_norm": 551.1146850585938, "learning_rate": 4.138e-06, "loss": 23.7704, "step": 20690 }, { "epoch": 0.04181530965549841, "grad_norm": 675.3240966796875, "learning_rate": 4.14e-06, "loss": 26.2867, "step": 20700 }, { "epoch": 0.04183551028818223, "grad_norm": 690.68505859375, "learning_rate": 4.142e-06, "loss": 18.1141, "step": 20710 }, { "epoch": 0.04185571092086604, "grad_norm": 486.8795166015625, "learning_rate": 4.1440000000000005e-06, "loss": 20.9204, "step": 20720 }, { "epoch": 0.04187591155354986, "grad_norm": 317.1226501464844, "learning_rate": 4.146000000000001e-06, "loss": 22.8506, "step": 20730 }, { "epoch": 0.041896112186233674, "grad_norm": 1490.643798828125, "learning_rate": 4.148000000000001e-06, "loss": 48.6961, "step": 20740 }, { "epoch": 0.04191631281891749, "grad_norm": 782.0410766601562, "learning_rate": 4.15e-06, "loss": 37.6037, "step": 20750 }, { "epoch": 0.04193651345160131, "grad_norm": 582.6185913085938, "learning_rate": 4.152e-06, "loss": 41.3482, "step": 20760 }, { "epoch": 0.04195671408428512, "grad_norm": 272.0113830566406, "learning_rate": 4.154e-06, "loss": 24.9711, "step": 20770 }, { "epoch": 0.04197691471696893, "grad_norm": 79.8502197265625, "learning_rate": 4.1560000000000005e-06, "loss": 24.9636, "step": 20780 }, { "epoch": 0.04199711534965275, "grad_norm": 540.5105590820312, "learning_rate": 4.158000000000001e-06, "loss": 32.4017, "step": 20790 }, { "epoch": 0.042017315982336566, "grad_norm": 407.08599853515625, "learning_rate": 4.16e-06, "loss": 25.5935, "step": 20800 }, { "epoch": 0.042037516615020386, "grad_norm": 374.0444641113281, "learning_rate": 4.162e-06, "loss": 31.7913, "step": 20810 }, { "epoch": 0.0420577172477042, "grad_norm": 336.9476318359375, "learning_rate": 4.164e-06, "loss": 17.5922, "step": 20820 }, { "epoch": 0.04207791788038801, "grad_norm": 281.6684875488281, "learning_rate": 4.1660000000000004e-06, "loss": 36.504, "step": 20830 }, { "epoch": 0.04209811851307183, "grad_norm": 1016.922119140625, "learning_rate": 4.168000000000001e-06, "loss": 16.7514, "step": 20840 }, { "epoch": 0.042118319145755645, "grad_norm": 651.076416015625, "learning_rate": 4.17e-06, "loss": 26.3376, "step": 20850 }, { "epoch": 0.04213851977843946, "grad_norm": 348.99554443359375, "learning_rate": 4.172000000000001e-06, "loss": 23.9043, "step": 20860 }, { "epoch": 0.04215872041112328, "grad_norm": 204.27989196777344, "learning_rate": 4.174e-06, "loss": 19.5862, "step": 20870 }, { "epoch": 0.04217892104380709, "grad_norm": 568.8946533203125, "learning_rate": 4.176e-06, "loss": 41.6269, "step": 20880 }, { "epoch": 0.04219912167649091, "grad_norm": 515.9381713867188, "learning_rate": 4.1780000000000005e-06, "loss": 33.0815, "step": 20890 }, { "epoch": 0.042219322309174724, "grad_norm": 437.7154846191406, "learning_rate": 4.18e-06, "loss": 23.53, "step": 20900 }, { "epoch": 0.04223952294185854, "grad_norm": 603.0464477539062, "learning_rate": 4.182000000000001e-06, "loss": 24.8234, "step": 20910 }, { "epoch": 0.04225972357454236, "grad_norm": 336.46868896484375, "learning_rate": 4.184e-06, "loss": 22.0417, "step": 20920 }, { "epoch": 0.04227992420722617, "grad_norm": 297.6182556152344, "learning_rate": 4.186e-06, "loss": 27.2698, "step": 20930 }, { "epoch": 0.04230012483990998, "grad_norm": 258.5321044921875, "learning_rate": 4.188e-06, "loss": 12.9438, "step": 20940 }, { "epoch": 0.0423203254725938, "grad_norm": 378.1133117675781, "learning_rate": 4.1900000000000005e-06, "loss": 13.9638, "step": 20950 }, { "epoch": 0.042340526105277616, "grad_norm": 486.2174072265625, "learning_rate": 4.192000000000001e-06, "loss": 25.7613, "step": 20960 }, { "epoch": 0.042360726737961436, "grad_norm": 1096.19482421875, "learning_rate": 4.194e-06, "loss": 17.6337, "step": 20970 }, { "epoch": 0.04238092737064525, "grad_norm": 336.497314453125, "learning_rate": 4.196e-06, "loss": 40.8516, "step": 20980 }, { "epoch": 0.04240112800332906, "grad_norm": 255.76902770996094, "learning_rate": 4.198e-06, "loss": 23.0373, "step": 20990 }, { "epoch": 0.04242132863601288, "grad_norm": 757.0595092773438, "learning_rate": 4.2000000000000004e-06, "loss": 27.2249, "step": 21000 }, { "epoch": 0.042441529268696696, "grad_norm": 809.0731811523438, "learning_rate": 4.202000000000001e-06, "loss": 23.6627, "step": 21010 }, { "epoch": 0.04246172990138051, "grad_norm": 107.8954086303711, "learning_rate": 4.204e-06, "loss": 32.8102, "step": 21020 }, { "epoch": 0.04248193053406433, "grad_norm": 815.7806396484375, "learning_rate": 4.206e-06, "loss": 27.9359, "step": 21030 }, { "epoch": 0.04250213116674814, "grad_norm": 799.5923461914062, "learning_rate": 4.208e-06, "loss": 27.1343, "step": 21040 }, { "epoch": 0.04252233179943196, "grad_norm": 453.0689697265625, "learning_rate": 4.21e-06, "loss": 26.1435, "step": 21050 }, { "epoch": 0.042542532432115775, "grad_norm": 172.1396026611328, "learning_rate": 4.2120000000000005e-06, "loss": 20.6205, "step": 21060 }, { "epoch": 0.04256273306479959, "grad_norm": 497.14508056640625, "learning_rate": 4.214000000000001e-06, "loss": 23.1049, "step": 21070 }, { "epoch": 0.04258293369748341, "grad_norm": 343.8851318359375, "learning_rate": 4.216e-06, "loss": 26.9667, "step": 21080 }, { "epoch": 0.04260313433016722, "grad_norm": 127.44200897216797, "learning_rate": 4.218e-06, "loss": 21.4246, "step": 21090 }, { "epoch": 0.042623334962851034, "grad_norm": 117.90656280517578, "learning_rate": 4.22e-06, "loss": 14.2156, "step": 21100 }, { "epoch": 0.042643535595534854, "grad_norm": 997.7525024414062, "learning_rate": 4.222e-06, "loss": 20.8012, "step": 21110 }, { "epoch": 0.04266373622821867, "grad_norm": 814.050537109375, "learning_rate": 4.2240000000000006e-06, "loss": 22.8416, "step": 21120 }, { "epoch": 0.04268393686090249, "grad_norm": 757.04052734375, "learning_rate": 4.226e-06, "loss": 15.7824, "step": 21130 }, { "epoch": 0.0427041374935863, "grad_norm": 518.5238647460938, "learning_rate": 4.228000000000001e-06, "loss": 22.685, "step": 21140 }, { "epoch": 0.04272433812627011, "grad_norm": 719.8343505859375, "learning_rate": 4.23e-06, "loss": 25.4759, "step": 21150 }, { "epoch": 0.04274453875895393, "grad_norm": 431.3028869628906, "learning_rate": 4.232e-06, "loss": 22.5114, "step": 21160 }, { "epoch": 0.042764739391637746, "grad_norm": 621.0961303710938, "learning_rate": 4.2340000000000005e-06, "loss": 24.1686, "step": 21170 }, { "epoch": 0.04278494002432156, "grad_norm": 486.9888000488281, "learning_rate": 4.236e-06, "loss": 27.8239, "step": 21180 }, { "epoch": 0.04280514065700538, "grad_norm": 252.33909606933594, "learning_rate": 4.238000000000001e-06, "loss": 16.7194, "step": 21190 }, { "epoch": 0.04282534128968919, "grad_norm": 347.6598205566406, "learning_rate": 4.24e-06, "loss": 29.6472, "step": 21200 }, { "epoch": 0.04284554192237301, "grad_norm": 56.823944091796875, "learning_rate": 4.242e-06, "loss": 24.4186, "step": 21210 }, { "epoch": 0.042865742555056825, "grad_norm": 464.31768798828125, "learning_rate": 4.244e-06, "loss": 46.5157, "step": 21220 }, { "epoch": 0.04288594318774064, "grad_norm": 642.3323364257812, "learning_rate": 4.2460000000000005e-06, "loss": 37.0643, "step": 21230 }, { "epoch": 0.04290614382042446, "grad_norm": 822.0488891601562, "learning_rate": 4.248000000000001e-06, "loss": 30.2411, "step": 21240 }, { "epoch": 0.04292634445310827, "grad_norm": 329.6660461425781, "learning_rate": 4.25e-06, "loss": 20.6211, "step": 21250 }, { "epoch": 0.042946545085792084, "grad_norm": 846.5374145507812, "learning_rate": 4.252000000000001e-06, "loss": 22.8153, "step": 21260 }, { "epoch": 0.042966745718475904, "grad_norm": 179.9952850341797, "learning_rate": 4.254e-06, "loss": 25.1175, "step": 21270 }, { "epoch": 0.04298694635115972, "grad_norm": 972.83837890625, "learning_rate": 4.256e-06, "loss": 30.9154, "step": 21280 }, { "epoch": 0.04300714698384353, "grad_norm": 552.5194702148438, "learning_rate": 4.2580000000000006e-06, "loss": 24.2954, "step": 21290 }, { "epoch": 0.04302734761652735, "grad_norm": 539.9232177734375, "learning_rate": 4.26e-06, "loss": 24.2965, "step": 21300 }, { "epoch": 0.043047548249211164, "grad_norm": 679.17431640625, "learning_rate": 4.262000000000001e-06, "loss": 23.8783, "step": 21310 }, { "epoch": 0.043067748881894984, "grad_norm": 237.664306640625, "learning_rate": 4.264e-06, "loss": 28.1514, "step": 21320 }, { "epoch": 0.0430879495145788, "grad_norm": 342.8701171875, "learning_rate": 4.266e-06, "loss": 19.4848, "step": 21330 }, { "epoch": 0.04310815014726261, "grad_norm": 330.9326171875, "learning_rate": 4.2680000000000005e-06, "loss": 31.6348, "step": 21340 }, { "epoch": 0.04312835077994643, "grad_norm": 249.25387573242188, "learning_rate": 4.270000000000001e-06, "loss": 17.7517, "step": 21350 }, { "epoch": 0.04314855141263024, "grad_norm": 455.8252868652344, "learning_rate": 4.272000000000001e-06, "loss": 24.1304, "step": 21360 }, { "epoch": 0.043168752045314056, "grad_norm": 480.63372802734375, "learning_rate": 4.274e-06, "loss": 25.5798, "step": 21370 }, { "epoch": 0.043188952677997876, "grad_norm": 395.6570739746094, "learning_rate": 4.276e-06, "loss": 25.7934, "step": 21380 }, { "epoch": 0.04320915331068169, "grad_norm": 392.75115966796875, "learning_rate": 4.278e-06, "loss": 34.5832, "step": 21390 }, { "epoch": 0.04322935394336551, "grad_norm": 539.7963256835938, "learning_rate": 4.2800000000000005e-06, "loss": 25.2617, "step": 21400 }, { "epoch": 0.04324955457604932, "grad_norm": 310.9663391113281, "learning_rate": 4.282000000000001e-06, "loss": 28.5294, "step": 21410 }, { "epoch": 0.043269755208733135, "grad_norm": 250.9676971435547, "learning_rate": 4.284e-06, "loss": 32.0291, "step": 21420 }, { "epoch": 0.043289955841416955, "grad_norm": 267.46063232421875, "learning_rate": 4.286e-06, "loss": 35.4653, "step": 21430 }, { "epoch": 0.04331015647410077, "grad_norm": 443.68096923828125, "learning_rate": 4.288e-06, "loss": 22.1667, "step": 21440 }, { "epoch": 0.04333035710678458, "grad_norm": 655.0978393554688, "learning_rate": 4.2900000000000004e-06, "loss": 20.0005, "step": 21450 }, { "epoch": 0.0433505577394684, "grad_norm": 766.2430419921875, "learning_rate": 4.292000000000001e-06, "loss": 26.8155, "step": 21460 }, { "epoch": 0.043370758372152214, "grad_norm": 676.1296997070312, "learning_rate": 4.294000000000001e-06, "loss": 21.5991, "step": 21470 }, { "epoch": 0.043390959004836034, "grad_norm": 208.3021240234375, "learning_rate": 4.296e-06, "loss": 25.6596, "step": 21480 }, { "epoch": 0.04341115963751985, "grad_norm": 328.14739990234375, "learning_rate": 4.298e-06, "loss": 39.369, "step": 21490 }, { "epoch": 0.04343136027020366, "grad_norm": 18.049570083618164, "learning_rate": 4.3e-06, "loss": 32.681, "step": 21500 }, { "epoch": 0.04345156090288748, "grad_norm": 787.612548828125, "learning_rate": 4.3020000000000005e-06, "loss": 20.5986, "step": 21510 }, { "epoch": 0.04347176153557129, "grad_norm": 1166.1243896484375, "learning_rate": 4.304000000000001e-06, "loss": 59.2746, "step": 21520 }, { "epoch": 0.043491962168255106, "grad_norm": 529.6400756835938, "learning_rate": 4.306e-06, "loss": 23.4076, "step": 21530 }, { "epoch": 0.043512162800938926, "grad_norm": 445.4692077636719, "learning_rate": 4.308000000000001e-06, "loss": 31.6689, "step": 21540 }, { "epoch": 0.04353236343362274, "grad_norm": 295.2471618652344, "learning_rate": 4.31e-06, "loss": 22.4568, "step": 21550 }, { "epoch": 0.04355256406630656, "grad_norm": 414.5696716308594, "learning_rate": 4.312e-06, "loss": 13.6062, "step": 21560 }, { "epoch": 0.04357276469899037, "grad_norm": 62.93955612182617, "learning_rate": 4.3140000000000005e-06, "loss": 25.5571, "step": 21570 }, { "epoch": 0.043592965331674186, "grad_norm": 712.908447265625, "learning_rate": 4.316e-06, "loss": 38.6598, "step": 21580 }, { "epoch": 0.043613165964358006, "grad_norm": 766.0816650390625, "learning_rate": 4.318000000000001e-06, "loss": 27.1536, "step": 21590 }, { "epoch": 0.04363336659704182, "grad_norm": 401.24298095703125, "learning_rate": 4.32e-06, "loss": 26.8595, "step": 21600 }, { "epoch": 0.04365356722972563, "grad_norm": 594.0333862304688, "learning_rate": 4.322e-06, "loss": 28.8221, "step": 21610 }, { "epoch": 0.04367376786240945, "grad_norm": 216.5138397216797, "learning_rate": 4.3240000000000004e-06, "loss": 39.8338, "step": 21620 }, { "epoch": 0.043693968495093265, "grad_norm": 384.75299072265625, "learning_rate": 4.326000000000001e-06, "loss": 32.6422, "step": 21630 }, { "epoch": 0.043714169127777085, "grad_norm": 245.19192504882812, "learning_rate": 4.328000000000001e-06, "loss": 23.9303, "step": 21640 }, { "epoch": 0.0437343697604609, "grad_norm": 220.33888244628906, "learning_rate": 4.33e-06, "loss": 29.9623, "step": 21650 }, { "epoch": 0.04375457039314471, "grad_norm": 299.3458557128906, "learning_rate": 4.332e-06, "loss": 29.4365, "step": 21660 }, { "epoch": 0.04377477102582853, "grad_norm": 0.0, "learning_rate": 4.334e-06, "loss": 16.2625, "step": 21670 }, { "epoch": 0.043794971658512344, "grad_norm": 1265.8236083984375, "learning_rate": 4.3360000000000005e-06, "loss": 36.5488, "step": 21680 }, { "epoch": 0.04381517229119616, "grad_norm": 524.7084350585938, "learning_rate": 4.338000000000001e-06, "loss": 30.5848, "step": 21690 }, { "epoch": 0.04383537292387998, "grad_norm": 640.1143798828125, "learning_rate": 4.34e-06, "loss": 38.6122, "step": 21700 }, { "epoch": 0.04385557355656379, "grad_norm": 603.3218383789062, "learning_rate": 4.342e-06, "loss": 27.8987, "step": 21710 }, { "epoch": 0.04387577418924761, "grad_norm": 407.31451416015625, "learning_rate": 4.344e-06, "loss": 19.4499, "step": 21720 }, { "epoch": 0.04389597482193142, "grad_norm": 1363.579833984375, "learning_rate": 4.346e-06, "loss": 39.3889, "step": 21730 }, { "epoch": 0.043916175454615236, "grad_norm": 367.12200927734375, "learning_rate": 4.3480000000000006e-06, "loss": 32.9657, "step": 21740 }, { "epoch": 0.043936376087299056, "grad_norm": 1004.193603515625, "learning_rate": 4.350000000000001e-06, "loss": 36.1381, "step": 21750 }, { "epoch": 0.04395657671998287, "grad_norm": 673.9029541015625, "learning_rate": 4.352e-06, "loss": 27.1183, "step": 21760 }, { "epoch": 0.04397677735266668, "grad_norm": 451.542724609375, "learning_rate": 4.354e-06, "loss": 25.8514, "step": 21770 }, { "epoch": 0.0439969779853505, "grad_norm": 283.10491943359375, "learning_rate": 4.356e-06, "loss": 28.4057, "step": 21780 }, { "epoch": 0.044017178618034315, "grad_norm": 492.3885803222656, "learning_rate": 4.3580000000000005e-06, "loss": 23.065, "step": 21790 }, { "epoch": 0.044037379250718135, "grad_norm": 499.5768737792969, "learning_rate": 4.360000000000001e-06, "loss": 29.2149, "step": 21800 }, { "epoch": 0.04405757988340195, "grad_norm": 185.2124481201172, "learning_rate": 4.362e-06, "loss": 19.6476, "step": 21810 }, { "epoch": 0.04407778051608576, "grad_norm": 826.69873046875, "learning_rate": 4.364e-06, "loss": 23.436, "step": 21820 }, { "epoch": 0.04409798114876958, "grad_norm": 1106.534912109375, "learning_rate": 4.366e-06, "loss": 29.2976, "step": 21830 }, { "epoch": 0.044118181781453394, "grad_norm": 445.1199035644531, "learning_rate": 4.368e-06, "loss": 18.8045, "step": 21840 }, { "epoch": 0.04413838241413721, "grad_norm": 593.8103637695312, "learning_rate": 4.3700000000000005e-06, "loss": 27.7079, "step": 21850 }, { "epoch": 0.04415858304682103, "grad_norm": 526.8739624023438, "learning_rate": 4.372e-06, "loss": 20.9647, "step": 21860 }, { "epoch": 0.04417878367950484, "grad_norm": 300.53143310546875, "learning_rate": 4.374000000000001e-06, "loss": 18.3609, "step": 21870 }, { "epoch": 0.04419898431218866, "grad_norm": 473.26971435546875, "learning_rate": 4.376e-06, "loss": 33.3246, "step": 21880 }, { "epoch": 0.044219184944872474, "grad_norm": 600.6439819335938, "learning_rate": 4.378e-06, "loss": 15.1279, "step": 21890 }, { "epoch": 0.04423938557755629, "grad_norm": 832.4254150390625, "learning_rate": 4.38e-06, "loss": 20.1099, "step": 21900 }, { "epoch": 0.04425958621024011, "grad_norm": 382.2060852050781, "learning_rate": 4.382e-06, "loss": 16.0935, "step": 21910 }, { "epoch": 0.04427978684292392, "grad_norm": 397.8320007324219, "learning_rate": 4.384000000000001e-06, "loss": 21.0683, "step": 21920 }, { "epoch": 0.04429998747560773, "grad_norm": 252.54330444335938, "learning_rate": 4.386e-06, "loss": 24.7409, "step": 21930 }, { "epoch": 0.04432018810829155, "grad_norm": 62.560665130615234, "learning_rate": 4.388e-06, "loss": 17.3035, "step": 21940 }, { "epoch": 0.044340388740975366, "grad_norm": 2055.7509765625, "learning_rate": 4.39e-06, "loss": 40.6105, "step": 21950 }, { "epoch": 0.044360589373659186, "grad_norm": 162.68014526367188, "learning_rate": 4.3920000000000005e-06, "loss": 18.1075, "step": 21960 }, { "epoch": 0.044380790006343, "grad_norm": 806.7841796875, "learning_rate": 4.394000000000001e-06, "loss": 47.3545, "step": 21970 }, { "epoch": 0.04440099063902681, "grad_norm": 114.22157287597656, "learning_rate": 4.396e-06, "loss": 40.4132, "step": 21980 }, { "epoch": 0.04442119127171063, "grad_norm": 971.5330200195312, "learning_rate": 4.398000000000001e-06, "loss": 33.0444, "step": 21990 }, { "epoch": 0.044441391904394445, "grad_norm": 77.30171203613281, "learning_rate": 4.4e-06, "loss": 15.9642, "step": 22000 }, { "epoch": 0.04446159253707826, "grad_norm": 634.365234375, "learning_rate": 4.402e-06, "loss": 30.1873, "step": 22010 }, { "epoch": 0.04448179316976208, "grad_norm": 859.5525512695312, "learning_rate": 4.4040000000000005e-06, "loss": 29.8951, "step": 22020 }, { "epoch": 0.04450199380244589, "grad_norm": 607.345458984375, "learning_rate": 4.406000000000001e-06, "loss": 33.4096, "step": 22030 }, { "epoch": 0.04452219443512971, "grad_norm": 912.7593383789062, "learning_rate": 4.408000000000001e-06, "loss": 22.3692, "step": 22040 }, { "epoch": 0.044542395067813524, "grad_norm": 617.1201171875, "learning_rate": 4.41e-06, "loss": 29.6909, "step": 22050 }, { "epoch": 0.04456259570049734, "grad_norm": 623.3720092773438, "learning_rate": 4.412e-06, "loss": 20.6357, "step": 22060 }, { "epoch": 0.04458279633318116, "grad_norm": 799.55078125, "learning_rate": 4.4140000000000004e-06, "loss": 34.0577, "step": 22070 }, { "epoch": 0.04460299696586497, "grad_norm": 437.06719970703125, "learning_rate": 4.416000000000001e-06, "loss": 28.1422, "step": 22080 }, { "epoch": 0.04462319759854878, "grad_norm": 342.1743469238281, "learning_rate": 4.418000000000001e-06, "loss": 25.9955, "step": 22090 }, { "epoch": 0.0446433982312326, "grad_norm": 270.8495178222656, "learning_rate": 4.42e-06, "loss": 18.9199, "step": 22100 }, { "epoch": 0.044663598863916416, "grad_norm": 357.5106506347656, "learning_rate": 4.422e-06, "loss": 20.252, "step": 22110 }, { "epoch": 0.044683799496600236, "grad_norm": 453.18798828125, "learning_rate": 4.424e-06, "loss": 15.3347, "step": 22120 }, { "epoch": 0.04470400012928405, "grad_norm": 189.09625244140625, "learning_rate": 4.4260000000000005e-06, "loss": 15.3332, "step": 22130 }, { "epoch": 0.04472420076196786, "grad_norm": 91.37895202636719, "learning_rate": 4.428000000000001e-06, "loss": 27.1369, "step": 22140 }, { "epoch": 0.04474440139465168, "grad_norm": 858.945068359375, "learning_rate": 4.430000000000001e-06, "loss": 31.7565, "step": 22150 }, { "epoch": 0.044764602027335496, "grad_norm": 432.4228515625, "learning_rate": 4.432e-06, "loss": 27.2611, "step": 22160 }, { "epoch": 0.04478480266001931, "grad_norm": 629.383056640625, "learning_rate": 4.434e-06, "loss": 23.4669, "step": 22170 }, { "epoch": 0.04480500329270313, "grad_norm": 695.8665771484375, "learning_rate": 4.436e-06, "loss": 19.5067, "step": 22180 }, { "epoch": 0.04482520392538694, "grad_norm": 321.7768859863281, "learning_rate": 4.438e-06, "loss": 17.0404, "step": 22190 }, { "epoch": 0.04484540455807076, "grad_norm": 662.2603759765625, "learning_rate": 4.440000000000001e-06, "loss": 53.422, "step": 22200 }, { "epoch": 0.044865605190754575, "grad_norm": 1252.63623046875, "learning_rate": 4.442e-06, "loss": 36.9159, "step": 22210 }, { "epoch": 0.04488580582343839, "grad_norm": 585.7415161132812, "learning_rate": 4.444e-06, "loss": 18.0069, "step": 22220 }, { "epoch": 0.04490600645612221, "grad_norm": 947.59375, "learning_rate": 4.446e-06, "loss": 37.0844, "step": 22230 }, { "epoch": 0.04492620708880602, "grad_norm": 551.6622314453125, "learning_rate": 4.4480000000000004e-06, "loss": 19.1681, "step": 22240 }, { "epoch": 0.044946407721489834, "grad_norm": 297.9669494628906, "learning_rate": 4.450000000000001e-06, "loss": 10.2288, "step": 22250 }, { "epoch": 0.044966608354173654, "grad_norm": 514.8941040039062, "learning_rate": 4.452e-06, "loss": 12.0541, "step": 22260 }, { "epoch": 0.04498680898685747, "grad_norm": 902.2230224609375, "learning_rate": 4.454000000000001e-06, "loss": 28.4228, "step": 22270 }, { "epoch": 0.04500700961954129, "grad_norm": 650.3019409179688, "learning_rate": 4.456e-06, "loss": 42.667, "step": 22280 }, { "epoch": 0.0450272102522251, "grad_norm": 913.4136962890625, "learning_rate": 4.458e-06, "loss": 42.5742, "step": 22290 }, { "epoch": 0.04504741088490891, "grad_norm": 507.68182373046875, "learning_rate": 4.4600000000000005e-06, "loss": 22.1308, "step": 22300 }, { "epoch": 0.04506761151759273, "grad_norm": 257.96783447265625, "learning_rate": 4.462e-06, "loss": 21.2215, "step": 22310 }, { "epoch": 0.045087812150276546, "grad_norm": 486.583740234375, "learning_rate": 4.464000000000001e-06, "loss": 34.8724, "step": 22320 }, { "epoch": 0.04510801278296036, "grad_norm": 419.0981750488281, "learning_rate": 4.466e-06, "loss": 26.7813, "step": 22330 }, { "epoch": 0.04512821341564418, "grad_norm": 341.603515625, "learning_rate": 4.468e-06, "loss": 18.4198, "step": 22340 }, { "epoch": 0.04514841404832799, "grad_norm": 207.7368621826172, "learning_rate": 4.47e-06, "loss": 30.2265, "step": 22350 }, { "epoch": 0.04516861468101181, "grad_norm": 744.3402709960938, "learning_rate": 4.4720000000000006e-06, "loss": 18.9786, "step": 22360 }, { "epoch": 0.045188815313695625, "grad_norm": 314.3277282714844, "learning_rate": 4.474000000000001e-06, "loss": 20.393, "step": 22370 }, { "epoch": 0.04520901594637944, "grad_norm": 600.7283935546875, "learning_rate": 4.476e-06, "loss": 23.1794, "step": 22380 }, { "epoch": 0.04522921657906326, "grad_norm": 220.0872039794922, "learning_rate": 4.478e-06, "loss": 36.213, "step": 22390 }, { "epoch": 0.04524941721174707, "grad_norm": 363.21209716796875, "learning_rate": 4.48e-06, "loss": 40.3618, "step": 22400 }, { "epoch": 0.045269617844430884, "grad_norm": 1180.115234375, "learning_rate": 4.4820000000000005e-06, "loss": 27.6139, "step": 22410 }, { "epoch": 0.045289818477114704, "grad_norm": 824.1642456054688, "learning_rate": 4.484000000000001e-06, "loss": 25.0623, "step": 22420 }, { "epoch": 0.04531001910979852, "grad_norm": 424.63665771484375, "learning_rate": 4.486000000000001e-06, "loss": 38.7007, "step": 22430 }, { "epoch": 0.04533021974248234, "grad_norm": 540.9491577148438, "learning_rate": 4.488e-06, "loss": 36.2141, "step": 22440 }, { "epoch": 0.04535042037516615, "grad_norm": 432.8280944824219, "learning_rate": 4.49e-06, "loss": 23.0217, "step": 22450 }, { "epoch": 0.045370621007849964, "grad_norm": 618.2141723632812, "learning_rate": 4.492e-06, "loss": 19.9363, "step": 22460 }, { "epoch": 0.045390821640533784, "grad_norm": 909.5447998046875, "learning_rate": 4.4940000000000005e-06, "loss": 32.4973, "step": 22470 }, { "epoch": 0.0454110222732176, "grad_norm": 932.4380493164062, "learning_rate": 4.496000000000001e-06, "loss": 36.2815, "step": 22480 }, { "epoch": 0.04543122290590141, "grad_norm": 619.4166870117188, "learning_rate": 4.498e-06, "loss": 26.1476, "step": 22490 }, { "epoch": 0.04545142353858523, "grad_norm": 292.8052978515625, "learning_rate": 4.5e-06, "loss": 23.0709, "step": 22500 }, { "epoch": 0.04547162417126904, "grad_norm": 911.8253784179688, "learning_rate": 4.502e-06, "loss": 27.5998, "step": 22510 }, { "epoch": 0.04549182480395286, "grad_norm": 658.0123291015625, "learning_rate": 4.504e-06, "loss": 30.1506, "step": 22520 }, { "epoch": 0.045512025436636676, "grad_norm": 512.3121948242188, "learning_rate": 4.5060000000000006e-06, "loss": 28.9311, "step": 22530 }, { "epoch": 0.04553222606932049, "grad_norm": 882.61328125, "learning_rate": 4.508e-06, "loss": 34.5642, "step": 22540 }, { "epoch": 0.04555242670200431, "grad_norm": 323.4769592285156, "learning_rate": 4.510000000000001e-06, "loss": 19.5451, "step": 22550 }, { "epoch": 0.04557262733468812, "grad_norm": 549.1060180664062, "learning_rate": 4.512e-06, "loss": 32.6389, "step": 22560 }, { "epoch": 0.045592827967371935, "grad_norm": 265.7471008300781, "learning_rate": 4.514e-06, "loss": 47.5208, "step": 22570 }, { "epoch": 0.045613028600055755, "grad_norm": 2399.81103515625, "learning_rate": 4.5160000000000005e-06, "loss": 34.385, "step": 22580 }, { "epoch": 0.04563322923273957, "grad_norm": 521.2456665039062, "learning_rate": 4.518e-06, "loss": 23.1079, "step": 22590 }, { "epoch": 0.04565342986542339, "grad_norm": 392.2706604003906, "learning_rate": 4.520000000000001e-06, "loss": 14.7095, "step": 22600 }, { "epoch": 0.0456736304981072, "grad_norm": 368.1333312988281, "learning_rate": 4.522e-06, "loss": 19.6538, "step": 22610 }, { "epoch": 0.045693831130791014, "grad_norm": 251.01956176757812, "learning_rate": 4.524e-06, "loss": 32.6746, "step": 22620 }, { "epoch": 0.045714031763474834, "grad_norm": 686.638671875, "learning_rate": 4.526e-06, "loss": 29.6795, "step": 22630 }, { "epoch": 0.04573423239615865, "grad_norm": 422.67034912109375, "learning_rate": 4.5280000000000005e-06, "loss": 24.4751, "step": 22640 }, { "epoch": 0.04575443302884246, "grad_norm": 505.2594909667969, "learning_rate": 4.530000000000001e-06, "loss": 42.698, "step": 22650 }, { "epoch": 0.04577463366152628, "grad_norm": 897.6318359375, "learning_rate": 4.532e-06, "loss": 15.6817, "step": 22660 }, { "epoch": 0.04579483429421009, "grad_norm": 484.6026611328125, "learning_rate": 4.534000000000001e-06, "loss": 21.0916, "step": 22670 }, { "epoch": 0.04581503492689391, "grad_norm": 864.950927734375, "learning_rate": 4.536e-06, "loss": 30.6341, "step": 22680 }, { "epoch": 0.045835235559577726, "grad_norm": 221.57028198242188, "learning_rate": 4.5380000000000004e-06, "loss": 34.7302, "step": 22690 }, { "epoch": 0.04585543619226154, "grad_norm": 19.70734977722168, "learning_rate": 4.540000000000001e-06, "loss": 32.5144, "step": 22700 }, { "epoch": 0.04587563682494536, "grad_norm": 577.8696899414062, "learning_rate": 4.542e-06, "loss": 36.3954, "step": 22710 }, { "epoch": 0.04589583745762917, "grad_norm": 774.3411865234375, "learning_rate": 4.544000000000001e-06, "loss": 37.3928, "step": 22720 }, { "epoch": 0.045916038090312986, "grad_norm": 290.1728515625, "learning_rate": 4.546e-06, "loss": 27.9667, "step": 22730 }, { "epoch": 0.045936238722996806, "grad_norm": 558.4825439453125, "learning_rate": 4.548e-06, "loss": 12.0116, "step": 22740 }, { "epoch": 0.04595643935568062, "grad_norm": 616.5971069335938, "learning_rate": 4.5500000000000005e-06, "loss": 23.0712, "step": 22750 }, { "epoch": 0.04597663998836444, "grad_norm": 227.1874542236328, "learning_rate": 4.552000000000001e-06, "loss": 24.5525, "step": 22760 }, { "epoch": 0.04599684062104825, "grad_norm": 110.66157531738281, "learning_rate": 4.554000000000001e-06, "loss": 34.7334, "step": 22770 }, { "epoch": 0.046017041253732065, "grad_norm": 309.23272705078125, "learning_rate": 4.556e-06, "loss": 25.0262, "step": 22780 }, { "epoch": 0.046037241886415885, "grad_norm": 504.0193786621094, "learning_rate": 4.558e-06, "loss": 19.2853, "step": 22790 }, { "epoch": 0.0460574425190997, "grad_norm": 295.9443054199219, "learning_rate": 4.56e-06, "loss": 30.692, "step": 22800 }, { "epoch": 0.04607764315178351, "grad_norm": 507.11151123046875, "learning_rate": 4.5620000000000005e-06, "loss": 25.5672, "step": 22810 }, { "epoch": 0.04609784378446733, "grad_norm": 558.9776611328125, "learning_rate": 4.564e-06, "loss": 23.321, "step": 22820 }, { "epoch": 0.046118044417151144, "grad_norm": 350.1549987792969, "learning_rate": 4.566000000000001e-06, "loss": 34.0735, "step": 22830 }, { "epoch": 0.046138245049834964, "grad_norm": 663.969970703125, "learning_rate": 4.568e-06, "loss": 29.8717, "step": 22840 }, { "epoch": 0.04615844568251878, "grad_norm": 124.18944549560547, "learning_rate": 4.57e-06, "loss": 24.4411, "step": 22850 }, { "epoch": 0.04617864631520259, "grad_norm": 773.56591796875, "learning_rate": 4.5720000000000004e-06, "loss": 31.0459, "step": 22860 }, { "epoch": 0.04619884694788641, "grad_norm": 1089.1876220703125, "learning_rate": 4.574e-06, "loss": 20.154, "step": 22870 }, { "epoch": 0.04621904758057022, "grad_norm": 657.5020751953125, "learning_rate": 4.576000000000001e-06, "loss": 30.6423, "step": 22880 }, { "epoch": 0.046239248213254036, "grad_norm": 285.689697265625, "learning_rate": 4.578e-06, "loss": 23.1769, "step": 22890 }, { "epoch": 0.046259448845937856, "grad_norm": 456.78521728515625, "learning_rate": 4.58e-06, "loss": 21.8306, "step": 22900 }, { "epoch": 0.04627964947862167, "grad_norm": 617.4722900390625, "learning_rate": 4.582e-06, "loss": 29.9266, "step": 22910 }, { "epoch": 0.04629985011130549, "grad_norm": 425.8707275390625, "learning_rate": 4.5840000000000005e-06, "loss": 26.158, "step": 22920 }, { "epoch": 0.0463200507439893, "grad_norm": 545.0387573242188, "learning_rate": 4.586000000000001e-06, "loss": 31.1487, "step": 22930 }, { "epoch": 0.046340251376673115, "grad_norm": 751.0917358398438, "learning_rate": 4.588e-06, "loss": 17.7194, "step": 22940 }, { "epoch": 0.046360452009356935, "grad_norm": 545.7406616210938, "learning_rate": 4.590000000000001e-06, "loss": 21.742, "step": 22950 }, { "epoch": 0.04638065264204075, "grad_norm": 634.4990844726562, "learning_rate": 4.592e-06, "loss": 40.033, "step": 22960 }, { "epoch": 0.04640085327472456, "grad_norm": 299.349365234375, "learning_rate": 4.594e-06, "loss": 23.369, "step": 22970 }, { "epoch": 0.04642105390740838, "grad_norm": 407.2045593261719, "learning_rate": 4.5960000000000006e-06, "loss": 22.4216, "step": 22980 }, { "epoch": 0.046441254540092194, "grad_norm": 361.1532897949219, "learning_rate": 4.598e-06, "loss": 20.676, "step": 22990 }, { "epoch": 0.046461455172776014, "grad_norm": 962.6221923828125, "learning_rate": 4.600000000000001e-06, "loss": 29.557, "step": 23000 }, { "epoch": 0.04648165580545983, "grad_norm": 514.6326293945312, "learning_rate": 4.602e-06, "loss": 29.2714, "step": 23010 }, { "epoch": 0.04650185643814364, "grad_norm": 532.843505859375, "learning_rate": 4.604e-06, "loss": 20.6181, "step": 23020 }, { "epoch": 0.04652205707082746, "grad_norm": 177.03302001953125, "learning_rate": 4.6060000000000005e-06, "loss": 28.7298, "step": 23030 }, { "epoch": 0.046542257703511274, "grad_norm": 214.79881286621094, "learning_rate": 4.608000000000001e-06, "loss": 18.3866, "step": 23040 }, { "epoch": 0.04656245833619509, "grad_norm": 517.9189453125, "learning_rate": 4.610000000000001e-06, "loss": 25.5109, "step": 23050 }, { "epoch": 0.04658265896887891, "grad_norm": 599.70068359375, "learning_rate": 4.612e-06, "loss": 22.2792, "step": 23060 }, { "epoch": 0.04660285960156272, "grad_norm": 98.75985717773438, "learning_rate": 4.614e-06, "loss": 34.458, "step": 23070 }, { "epoch": 0.04662306023424654, "grad_norm": 420.987548828125, "learning_rate": 4.616e-06, "loss": 20.0997, "step": 23080 }, { "epoch": 0.04664326086693035, "grad_norm": 691.1615600585938, "learning_rate": 4.6180000000000005e-06, "loss": 20.6392, "step": 23090 }, { "epoch": 0.046663461499614166, "grad_norm": 637.1849365234375, "learning_rate": 4.620000000000001e-06, "loss": 31.1125, "step": 23100 }, { "epoch": 0.046683662132297986, "grad_norm": 816.533203125, "learning_rate": 4.622e-06, "loss": 46.0801, "step": 23110 }, { "epoch": 0.0467038627649818, "grad_norm": 549.866455078125, "learning_rate": 4.624e-06, "loss": 24.4189, "step": 23120 }, { "epoch": 0.04672406339766561, "grad_norm": 685.1592407226562, "learning_rate": 4.626e-06, "loss": 32.2336, "step": 23130 }, { "epoch": 0.04674426403034943, "grad_norm": 319.0460510253906, "learning_rate": 4.628e-06, "loss": 23.6327, "step": 23140 }, { "epoch": 0.046764464663033245, "grad_norm": 575.0809326171875, "learning_rate": 4.6300000000000006e-06, "loss": 19.5776, "step": 23150 }, { "epoch": 0.046784665295717065, "grad_norm": 328.0433349609375, "learning_rate": 4.632000000000001e-06, "loss": 11.1229, "step": 23160 }, { "epoch": 0.04680486592840088, "grad_norm": 133.4390411376953, "learning_rate": 4.634e-06, "loss": 22.5893, "step": 23170 }, { "epoch": 0.04682506656108469, "grad_norm": 399.1858825683594, "learning_rate": 4.636e-06, "loss": 33.8585, "step": 23180 }, { "epoch": 0.04684526719376851, "grad_norm": 256.7257080078125, "learning_rate": 4.638e-06, "loss": 22.3619, "step": 23190 }, { "epoch": 0.046865467826452324, "grad_norm": 544.70361328125, "learning_rate": 4.6400000000000005e-06, "loss": 19.1737, "step": 23200 }, { "epoch": 0.04688566845913614, "grad_norm": 787.859619140625, "learning_rate": 4.642000000000001e-06, "loss": 18.535, "step": 23210 }, { "epoch": 0.04690586909181996, "grad_norm": 411.52203369140625, "learning_rate": 4.644e-06, "loss": 10.0353, "step": 23220 }, { "epoch": 0.04692606972450377, "grad_norm": 543.5707397460938, "learning_rate": 4.646000000000001e-06, "loss": 22.038, "step": 23230 }, { "epoch": 0.04694627035718759, "grad_norm": 508.89617919921875, "learning_rate": 4.648e-06, "loss": 21.708, "step": 23240 }, { "epoch": 0.0469664709898714, "grad_norm": 1186.1513671875, "learning_rate": 4.65e-06, "loss": 35.502, "step": 23250 }, { "epoch": 0.046986671622555216, "grad_norm": 785.4786987304688, "learning_rate": 4.6520000000000005e-06, "loss": 23.0378, "step": 23260 }, { "epoch": 0.047006872255239036, "grad_norm": 331.0532531738281, "learning_rate": 4.654e-06, "loss": 37.4566, "step": 23270 }, { "epoch": 0.04702707288792285, "grad_norm": 323.4093017578125, "learning_rate": 4.656000000000001e-06, "loss": 16.0199, "step": 23280 }, { "epoch": 0.04704727352060666, "grad_norm": 464.79132080078125, "learning_rate": 4.658e-06, "loss": 29.0295, "step": 23290 }, { "epoch": 0.04706747415329048, "grad_norm": 634.6414794921875, "learning_rate": 4.66e-06, "loss": 23.0239, "step": 23300 }, { "epoch": 0.047087674785974296, "grad_norm": 317.646240234375, "learning_rate": 4.6620000000000004e-06, "loss": 13.0783, "step": 23310 }, { "epoch": 0.047107875418658116, "grad_norm": 131.8998260498047, "learning_rate": 4.664000000000001e-06, "loss": 22.0581, "step": 23320 }, { "epoch": 0.04712807605134193, "grad_norm": 505.8997497558594, "learning_rate": 4.666000000000001e-06, "loss": 15.9947, "step": 23330 }, { "epoch": 0.04714827668402574, "grad_norm": 349.62799072265625, "learning_rate": 4.668e-06, "loss": 23.1838, "step": 23340 }, { "epoch": 0.04716847731670956, "grad_norm": 415.26513671875, "learning_rate": 4.670000000000001e-06, "loss": 44.291, "step": 23350 }, { "epoch": 0.047188677949393375, "grad_norm": 591.033203125, "learning_rate": 4.672e-06, "loss": 62.548, "step": 23360 }, { "epoch": 0.04720887858207719, "grad_norm": 685.7374877929688, "learning_rate": 4.6740000000000005e-06, "loss": 32.069, "step": 23370 }, { "epoch": 0.04722907921476101, "grad_norm": 864.9512329101562, "learning_rate": 4.676000000000001e-06, "loss": 62.1026, "step": 23380 }, { "epoch": 0.04724927984744482, "grad_norm": 923.1744995117188, "learning_rate": 4.678e-06, "loss": 27.0128, "step": 23390 }, { "epoch": 0.04726948048012864, "grad_norm": 353.6065673828125, "learning_rate": 4.680000000000001e-06, "loss": 19.9927, "step": 23400 }, { "epoch": 0.047289681112812454, "grad_norm": 1081.422119140625, "learning_rate": 4.682e-06, "loss": 22.436, "step": 23410 }, { "epoch": 0.04730988174549627, "grad_norm": 374.8186340332031, "learning_rate": 4.684e-06, "loss": 18.5459, "step": 23420 }, { "epoch": 0.04733008237818009, "grad_norm": 409.6661376953125, "learning_rate": 4.6860000000000005e-06, "loss": 27.5921, "step": 23430 }, { "epoch": 0.0473502830108639, "grad_norm": 544.1531372070312, "learning_rate": 4.688000000000001e-06, "loss": 20.9284, "step": 23440 }, { "epoch": 0.04737048364354771, "grad_norm": 724.7265014648438, "learning_rate": 4.69e-06, "loss": 35.8183, "step": 23450 }, { "epoch": 0.04739068427623153, "grad_norm": 342.1605224609375, "learning_rate": 4.692e-06, "loss": 19.5327, "step": 23460 }, { "epoch": 0.047410884908915346, "grad_norm": 850.796630859375, "learning_rate": 4.694e-06, "loss": 22.3838, "step": 23470 }, { "epoch": 0.047431085541599166, "grad_norm": 171.35581970214844, "learning_rate": 4.6960000000000004e-06, "loss": 21.14, "step": 23480 }, { "epoch": 0.04745128617428298, "grad_norm": 156.30577087402344, "learning_rate": 4.698000000000001e-06, "loss": 23.0832, "step": 23490 }, { "epoch": 0.04747148680696679, "grad_norm": 580.611083984375, "learning_rate": 4.7e-06, "loss": 29.439, "step": 23500 }, { "epoch": 0.04749168743965061, "grad_norm": 407.2830505371094, "learning_rate": 4.702e-06, "loss": 16.5055, "step": 23510 }, { "epoch": 0.047511888072334425, "grad_norm": 446.4722900390625, "learning_rate": 4.704e-06, "loss": 17.8526, "step": 23520 }, { "epoch": 0.04753208870501824, "grad_norm": 509.90594482421875, "learning_rate": 4.706e-06, "loss": 25.0278, "step": 23530 }, { "epoch": 0.04755228933770206, "grad_norm": 1361.8035888671875, "learning_rate": 4.7080000000000005e-06, "loss": 37.3306, "step": 23540 }, { "epoch": 0.04757248997038587, "grad_norm": 82.47450256347656, "learning_rate": 4.71e-06, "loss": 9.7433, "step": 23550 }, { "epoch": 0.04759269060306969, "grad_norm": 530.8025512695312, "learning_rate": 4.712000000000001e-06, "loss": 34.0402, "step": 23560 }, { "epoch": 0.047612891235753504, "grad_norm": 420.6856689453125, "learning_rate": 4.714e-06, "loss": 25.9908, "step": 23570 }, { "epoch": 0.04763309186843732, "grad_norm": 506.2064514160156, "learning_rate": 4.716e-06, "loss": 26.0172, "step": 23580 }, { "epoch": 0.04765329250112114, "grad_norm": 558.8671264648438, "learning_rate": 4.718e-06, "loss": 26.991, "step": 23590 }, { "epoch": 0.04767349313380495, "grad_norm": 188.64163208007812, "learning_rate": 4.7200000000000005e-06, "loss": 26.4591, "step": 23600 }, { "epoch": 0.047693693766488764, "grad_norm": 831.897216796875, "learning_rate": 4.722000000000001e-06, "loss": 27.7616, "step": 23610 }, { "epoch": 0.047713894399172584, "grad_norm": 247.0804901123047, "learning_rate": 4.724e-06, "loss": 32.929, "step": 23620 }, { "epoch": 0.0477340950318564, "grad_norm": 682.9000244140625, "learning_rate": 4.726000000000001e-06, "loss": 29.1023, "step": 23630 }, { "epoch": 0.04775429566454022, "grad_norm": 476.44012451171875, "learning_rate": 4.728e-06, "loss": 23.3636, "step": 23640 }, { "epoch": 0.04777449629722403, "grad_norm": 297.98577880859375, "learning_rate": 4.7300000000000005e-06, "loss": 20.2331, "step": 23650 }, { "epoch": 0.04779469692990784, "grad_norm": 553.1089477539062, "learning_rate": 4.732000000000001e-06, "loss": 40.8277, "step": 23660 }, { "epoch": 0.04781489756259166, "grad_norm": 514.8232421875, "learning_rate": 4.734e-06, "loss": 36.9933, "step": 23670 }, { "epoch": 0.047835098195275476, "grad_norm": 709.031494140625, "learning_rate": 4.736000000000001e-06, "loss": 22.8072, "step": 23680 }, { "epoch": 0.04785529882795929, "grad_norm": 531.7922973632812, "learning_rate": 4.738e-06, "loss": 17.322, "step": 23690 }, { "epoch": 0.04787549946064311, "grad_norm": 525.7501831054688, "learning_rate": 4.74e-06, "loss": 26.0231, "step": 23700 }, { "epoch": 0.04789570009332692, "grad_norm": 724.34619140625, "learning_rate": 4.7420000000000005e-06, "loss": 21.1959, "step": 23710 }, { "epoch": 0.04791590072601074, "grad_norm": 863.712646484375, "learning_rate": 4.744000000000001e-06, "loss": 26.1296, "step": 23720 }, { "epoch": 0.047936101358694555, "grad_norm": 200.90933227539062, "learning_rate": 4.746000000000001e-06, "loss": 29.482, "step": 23730 }, { "epoch": 0.04795630199137837, "grad_norm": 465.8497009277344, "learning_rate": 4.748e-06, "loss": 34.7471, "step": 23740 }, { "epoch": 0.04797650262406219, "grad_norm": 517.1136474609375, "learning_rate": 4.75e-06, "loss": 29.8758, "step": 23750 }, { "epoch": 0.047996703256746, "grad_norm": 613.1840209960938, "learning_rate": 4.752e-06, "loss": 43.5371, "step": 23760 }, { "epoch": 0.048016903889429814, "grad_norm": 957.7688598632812, "learning_rate": 4.7540000000000006e-06, "loss": 22.8681, "step": 23770 }, { "epoch": 0.048037104522113634, "grad_norm": 539.5322265625, "learning_rate": 4.756000000000001e-06, "loss": 41.239, "step": 23780 }, { "epoch": 0.04805730515479745, "grad_norm": 495.3214416503906, "learning_rate": 4.758e-06, "loss": 24.3765, "step": 23790 }, { "epoch": 0.04807750578748127, "grad_norm": 4683.61572265625, "learning_rate": 4.76e-06, "loss": 50.0535, "step": 23800 }, { "epoch": 0.04809770642016508, "grad_norm": 551.005615234375, "learning_rate": 4.762e-06, "loss": 26.6315, "step": 23810 }, { "epoch": 0.04811790705284889, "grad_norm": 546.5647583007812, "learning_rate": 4.7640000000000005e-06, "loss": 21.7942, "step": 23820 }, { "epoch": 0.04813810768553271, "grad_norm": 450.4869689941406, "learning_rate": 4.766000000000001e-06, "loss": 14.371, "step": 23830 }, { "epoch": 0.048158308318216526, "grad_norm": 794.6976318359375, "learning_rate": 4.768000000000001e-06, "loss": 26.081, "step": 23840 }, { "epoch": 0.04817850895090034, "grad_norm": 412.3992919921875, "learning_rate": 4.77e-06, "loss": 25.6854, "step": 23850 }, { "epoch": 0.04819870958358416, "grad_norm": 484.59619140625, "learning_rate": 4.772e-06, "loss": 24.9677, "step": 23860 }, { "epoch": 0.04821891021626797, "grad_norm": 381.2744445800781, "learning_rate": 4.774e-06, "loss": 20.9764, "step": 23870 }, { "epoch": 0.04823911084895179, "grad_norm": 279.7530212402344, "learning_rate": 4.7760000000000005e-06, "loss": 34.305, "step": 23880 }, { "epoch": 0.048259311481635606, "grad_norm": 921.8336181640625, "learning_rate": 4.778000000000001e-06, "loss": 45.3881, "step": 23890 }, { "epoch": 0.04827951211431942, "grad_norm": 784.5956420898438, "learning_rate": 4.78e-06, "loss": 22.424, "step": 23900 }, { "epoch": 0.04829971274700324, "grad_norm": 574.390380859375, "learning_rate": 4.782e-06, "loss": 23.9142, "step": 23910 }, { "epoch": 0.04831991337968705, "grad_norm": 546.51416015625, "learning_rate": 4.784e-06, "loss": 22.9982, "step": 23920 }, { "epoch": 0.048340114012370865, "grad_norm": 187.9654083251953, "learning_rate": 4.7860000000000004e-06, "loss": 13.9286, "step": 23930 }, { "epoch": 0.048360314645054685, "grad_norm": 343.1470947265625, "learning_rate": 4.7880000000000006e-06, "loss": 21.124, "step": 23940 }, { "epoch": 0.0483805152777385, "grad_norm": 603.2246704101562, "learning_rate": 4.79e-06, "loss": 27.6511, "step": 23950 }, { "epoch": 0.04840071591042232, "grad_norm": 338.4572448730469, "learning_rate": 4.792000000000001e-06, "loss": 28.9174, "step": 23960 }, { "epoch": 0.04842091654310613, "grad_norm": 942.7728271484375, "learning_rate": 4.794e-06, "loss": 23.8593, "step": 23970 }, { "epoch": 0.048441117175789944, "grad_norm": 911.1484375, "learning_rate": 4.796e-06, "loss": 53.2721, "step": 23980 }, { "epoch": 0.048461317808473764, "grad_norm": 342.5573425292969, "learning_rate": 4.7980000000000005e-06, "loss": 22.4286, "step": 23990 }, { "epoch": 0.04848151844115758, "grad_norm": 232.32217407226562, "learning_rate": 4.800000000000001e-06, "loss": 31.6875, "step": 24000 }, { "epoch": 0.04850171907384139, "grad_norm": 1032.619873046875, "learning_rate": 4.802000000000001e-06, "loss": 29.0297, "step": 24010 }, { "epoch": 0.04852191970652521, "grad_norm": 470.9522705078125, "learning_rate": 4.804e-06, "loss": 25.9012, "step": 24020 }, { "epoch": 0.04854212033920902, "grad_norm": 280.8020324707031, "learning_rate": 4.806000000000001e-06, "loss": 18.4737, "step": 24030 }, { "epoch": 0.04856232097189284, "grad_norm": 607.37158203125, "learning_rate": 4.808e-06, "loss": 15.5083, "step": 24040 }, { "epoch": 0.048582521604576656, "grad_norm": 206.01007080078125, "learning_rate": 4.8100000000000005e-06, "loss": 27.6266, "step": 24050 }, { "epoch": 0.04860272223726047, "grad_norm": 217.52645874023438, "learning_rate": 4.812000000000001e-06, "loss": 51.8305, "step": 24060 }, { "epoch": 0.04862292286994429, "grad_norm": 450.17938232421875, "learning_rate": 4.814e-06, "loss": 26.5461, "step": 24070 }, { "epoch": 0.0486431235026281, "grad_norm": 2200.3466796875, "learning_rate": 4.816e-06, "loss": 43.0113, "step": 24080 }, { "epoch": 0.048663324135311915, "grad_norm": 487.3904724121094, "learning_rate": 4.818e-06, "loss": 37.733, "step": 24090 }, { "epoch": 0.048683524767995735, "grad_norm": 150.5276336669922, "learning_rate": 4.8200000000000004e-06, "loss": 19.6755, "step": 24100 }, { "epoch": 0.04870372540067955, "grad_norm": 505.09893798828125, "learning_rate": 4.822000000000001e-06, "loss": 36.4785, "step": 24110 }, { "epoch": 0.04872392603336337, "grad_norm": 1320.265625, "learning_rate": 4.824000000000001e-06, "loss": 22.6899, "step": 24120 }, { "epoch": 0.04874412666604718, "grad_norm": 425.0314025878906, "learning_rate": 4.826e-06, "loss": 16.7171, "step": 24130 }, { "epoch": 0.048764327298730994, "grad_norm": 572.2984619140625, "learning_rate": 4.828e-06, "loss": 30.4576, "step": 24140 }, { "epoch": 0.048784527931414814, "grad_norm": 663.1596069335938, "learning_rate": 4.83e-06, "loss": 24.7246, "step": 24150 }, { "epoch": 0.04880472856409863, "grad_norm": 497.4083251953125, "learning_rate": 4.8320000000000005e-06, "loss": 23.1729, "step": 24160 }, { "epoch": 0.04882492919678244, "grad_norm": 407.599609375, "learning_rate": 4.834000000000001e-06, "loss": 33.097, "step": 24170 }, { "epoch": 0.04884512982946626, "grad_norm": 864.8488159179688, "learning_rate": 4.836e-06, "loss": 50.6225, "step": 24180 }, { "epoch": 0.048865330462150074, "grad_norm": 681.3571166992188, "learning_rate": 4.838e-06, "loss": 32.4285, "step": 24190 }, { "epoch": 0.048885531094833894, "grad_norm": 503.53436279296875, "learning_rate": 4.84e-06, "loss": 33.0225, "step": 24200 }, { "epoch": 0.04890573172751771, "grad_norm": 1192.0885009765625, "learning_rate": 4.842e-06, "loss": 30.067, "step": 24210 }, { "epoch": 0.04892593236020152, "grad_norm": 1693.944091796875, "learning_rate": 4.8440000000000005e-06, "loss": 30.9601, "step": 24220 }, { "epoch": 0.04894613299288534, "grad_norm": 1430.281005859375, "learning_rate": 4.846e-06, "loss": 31.5682, "step": 24230 }, { "epoch": 0.04896633362556915, "grad_norm": 862.6809692382812, "learning_rate": 4.848000000000001e-06, "loss": 27.8008, "step": 24240 }, { "epoch": 0.048986534258252966, "grad_norm": 631.509033203125, "learning_rate": 4.85e-06, "loss": 23.8648, "step": 24250 }, { "epoch": 0.049006734890936786, "grad_norm": 606.4420776367188, "learning_rate": 4.852e-06, "loss": 28.055, "step": 24260 }, { "epoch": 0.0490269355236206, "grad_norm": 348.8546142578125, "learning_rate": 4.8540000000000005e-06, "loss": 30.1481, "step": 24270 }, { "epoch": 0.04904713615630442, "grad_norm": 219.95846557617188, "learning_rate": 4.856e-06, "loss": 27.218, "step": 24280 }, { "epoch": 0.04906733678898823, "grad_norm": 160.05877685546875, "learning_rate": 4.858000000000001e-06, "loss": 31.1108, "step": 24290 }, { "epoch": 0.049087537421672045, "grad_norm": 630.1192626953125, "learning_rate": 4.86e-06, "loss": 33.9822, "step": 24300 }, { "epoch": 0.049107738054355865, "grad_norm": 575.1064453125, "learning_rate": 4.862e-06, "loss": 29.9274, "step": 24310 }, { "epoch": 0.04912793868703968, "grad_norm": 3472.619140625, "learning_rate": 4.864e-06, "loss": 46.5845, "step": 24320 }, { "epoch": 0.04914813931972349, "grad_norm": 1044.9791259765625, "learning_rate": 4.8660000000000005e-06, "loss": 38.5026, "step": 24330 }, { "epoch": 0.04916833995240731, "grad_norm": 49.58512496948242, "learning_rate": 4.868000000000001e-06, "loss": 24.5532, "step": 24340 }, { "epoch": 0.049188540585091124, "grad_norm": 404.6474304199219, "learning_rate": 4.87e-06, "loss": 35.5195, "step": 24350 }, { "epoch": 0.04920874121777494, "grad_norm": 975.424560546875, "learning_rate": 4.872000000000001e-06, "loss": 25.8168, "step": 24360 }, { "epoch": 0.04922894185045876, "grad_norm": 822.1325073242188, "learning_rate": 4.874e-06, "loss": 46.778, "step": 24370 }, { "epoch": 0.04924914248314257, "grad_norm": 615.5921630859375, "learning_rate": 4.876e-06, "loss": 20.9036, "step": 24380 }, { "epoch": 0.04926934311582639, "grad_norm": 437.8776550292969, "learning_rate": 4.8780000000000006e-06, "loss": 30.9095, "step": 24390 }, { "epoch": 0.0492895437485102, "grad_norm": 321.6142883300781, "learning_rate": 4.880000000000001e-06, "loss": 19.3862, "step": 24400 }, { "epoch": 0.049309744381194016, "grad_norm": 385.64056396484375, "learning_rate": 4.882000000000001e-06, "loss": 24.997, "step": 24410 }, { "epoch": 0.049329945013877836, "grad_norm": 735.8389892578125, "learning_rate": 4.884e-06, "loss": 29.0518, "step": 24420 }, { "epoch": 0.04935014564656165, "grad_norm": 293.65618896484375, "learning_rate": 4.886e-06, "loss": 23.6545, "step": 24430 }, { "epoch": 0.04937034627924546, "grad_norm": 531.4093017578125, "learning_rate": 4.8880000000000005e-06, "loss": 19.9625, "step": 24440 }, { "epoch": 0.04939054691192928, "grad_norm": 592.0188598632812, "learning_rate": 4.890000000000001e-06, "loss": 19.9698, "step": 24450 }, { "epoch": 0.049410747544613096, "grad_norm": 1100.807861328125, "learning_rate": 4.892000000000001e-06, "loss": 42.1264, "step": 24460 }, { "epoch": 0.049430948177296916, "grad_norm": 1083.6982421875, "learning_rate": 4.894e-06, "loss": 40.0795, "step": 24470 }, { "epoch": 0.04945114880998073, "grad_norm": 481.7140808105469, "learning_rate": 4.896e-06, "loss": 25.7213, "step": 24480 }, { "epoch": 0.04947134944266454, "grad_norm": 319.0828552246094, "learning_rate": 4.898e-06, "loss": 48.0054, "step": 24490 }, { "epoch": 0.04949155007534836, "grad_norm": 708.9186401367188, "learning_rate": 4.9000000000000005e-06, "loss": 26.4516, "step": 24500 }, { "epoch": 0.049511750708032175, "grad_norm": 923.2367553710938, "learning_rate": 4.902000000000001e-06, "loss": 33.8592, "step": 24510 }, { "epoch": 0.04953195134071599, "grad_norm": 22.35181999206543, "learning_rate": 4.904000000000001e-06, "loss": 21.0809, "step": 24520 }, { "epoch": 0.04955215197339981, "grad_norm": 1950.263671875, "learning_rate": 4.906e-06, "loss": 23.3087, "step": 24530 }, { "epoch": 0.04957235260608362, "grad_norm": 792.556884765625, "learning_rate": 4.908e-06, "loss": 21.0367, "step": 24540 }, { "epoch": 0.04959255323876744, "grad_norm": 700.4295654296875, "learning_rate": 4.9100000000000004e-06, "loss": 15.7874, "step": 24550 }, { "epoch": 0.049612753871451254, "grad_norm": 370.0625, "learning_rate": 4.9120000000000006e-06, "loss": 31.146, "step": 24560 }, { "epoch": 0.04963295450413507, "grad_norm": 179.5491180419922, "learning_rate": 4.914000000000001e-06, "loss": 18.8988, "step": 24570 }, { "epoch": 0.04965315513681889, "grad_norm": 284.713623046875, "learning_rate": 4.916e-06, "loss": 15.084, "step": 24580 }, { "epoch": 0.0496733557695027, "grad_norm": 527.2337646484375, "learning_rate": 4.918e-06, "loss": 36.8712, "step": 24590 }, { "epoch": 0.04969355640218651, "grad_norm": 243.3169708251953, "learning_rate": 4.92e-06, "loss": 21.4062, "step": 24600 }, { "epoch": 0.04971375703487033, "grad_norm": 437.5592041015625, "learning_rate": 4.9220000000000005e-06, "loss": 18.7186, "step": 24610 }, { "epoch": 0.049733957667554146, "grad_norm": 833.3284301757812, "learning_rate": 4.924000000000001e-06, "loss": 23.0917, "step": 24620 }, { "epoch": 0.049754158300237966, "grad_norm": 586.6253662109375, "learning_rate": 4.926e-06, "loss": 13.9485, "step": 24630 }, { "epoch": 0.04977435893292178, "grad_norm": 841.759521484375, "learning_rate": 4.928000000000001e-06, "loss": 35.0147, "step": 24640 }, { "epoch": 0.04979455956560559, "grad_norm": 549.2539672851562, "learning_rate": 4.93e-06, "loss": 34.2477, "step": 24650 }, { "epoch": 0.04981476019828941, "grad_norm": 929.2364501953125, "learning_rate": 4.932e-06, "loss": 23.9616, "step": 24660 }, { "epoch": 0.049834960830973225, "grad_norm": 924.4048461914062, "learning_rate": 4.9340000000000005e-06, "loss": 50.0695, "step": 24670 }, { "epoch": 0.04985516146365704, "grad_norm": 229.69725036621094, "learning_rate": 4.936e-06, "loss": 28.8764, "step": 24680 }, { "epoch": 0.04987536209634086, "grad_norm": 772.1044921875, "learning_rate": 4.938000000000001e-06, "loss": 22.3637, "step": 24690 }, { "epoch": 0.04989556272902467, "grad_norm": 261.3887939453125, "learning_rate": 4.94e-06, "loss": 34.9425, "step": 24700 }, { "epoch": 0.04991576336170849, "grad_norm": 172.681396484375, "learning_rate": 4.942e-06, "loss": 17.9873, "step": 24710 }, { "epoch": 0.049935963994392304, "grad_norm": 762.8919677734375, "learning_rate": 4.9440000000000004e-06, "loss": 37.2343, "step": 24720 }, { "epoch": 0.04995616462707612, "grad_norm": 111.18013763427734, "learning_rate": 4.946000000000001e-06, "loss": 30.2754, "step": 24730 }, { "epoch": 0.04997636525975994, "grad_norm": 475.6956481933594, "learning_rate": 4.948000000000001e-06, "loss": 24.3305, "step": 24740 }, { "epoch": 0.04999656589244375, "grad_norm": 369.7061767578125, "learning_rate": 4.95e-06, "loss": 25.6071, "step": 24750 }, { "epoch": 0.050016766525127564, "grad_norm": 313.71014404296875, "learning_rate": 4.952e-06, "loss": 38.6992, "step": 24760 }, { "epoch": 0.050036967157811384, "grad_norm": 1185.03857421875, "learning_rate": 4.954e-06, "loss": 42.283, "step": 24770 }, { "epoch": 0.0500571677904952, "grad_norm": 1039.0086669921875, "learning_rate": 4.9560000000000005e-06, "loss": 32.5469, "step": 24780 }, { "epoch": 0.05007736842317902, "grad_norm": 208.5982666015625, "learning_rate": 4.958000000000001e-06, "loss": 37.9008, "step": 24790 }, { "epoch": 0.05009756905586283, "grad_norm": 206.29354858398438, "learning_rate": 4.960000000000001e-06, "loss": 14.8712, "step": 24800 }, { "epoch": 0.05011776968854664, "grad_norm": 524.4188232421875, "learning_rate": 4.962e-06, "loss": 32.2982, "step": 24810 }, { "epoch": 0.05013797032123046, "grad_norm": 579.9808959960938, "learning_rate": 4.964e-06, "loss": 44.4891, "step": 24820 }, { "epoch": 0.050158170953914276, "grad_norm": 1486.5931396484375, "learning_rate": 4.966e-06, "loss": 29.96, "step": 24830 }, { "epoch": 0.05017837158659809, "grad_norm": 689.2452392578125, "learning_rate": 4.9680000000000005e-06, "loss": 30.3831, "step": 24840 }, { "epoch": 0.05019857221928191, "grad_norm": 681.37353515625, "learning_rate": 4.970000000000001e-06, "loss": 37.4051, "step": 24850 }, { "epoch": 0.05021877285196572, "grad_norm": 763.9005126953125, "learning_rate": 4.972e-06, "loss": 31.2682, "step": 24860 }, { "epoch": 0.05023897348464954, "grad_norm": 898.73828125, "learning_rate": 4.974e-06, "loss": 33.807, "step": 24870 }, { "epoch": 0.050259174117333355, "grad_norm": 997.1743774414062, "learning_rate": 4.976e-06, "loss": 32.9283, "step": 24880 }, { "epoch": 0.05027937475001717, "grad_norm": 238.991943359375, "learning_rate": 4.9780000000000005e-06, "loss": 28.0845, "step": 24890 }, { "epoch": 0.05029957538270099, "grad_norm": 625.0321655273438, "learning_rate": 4.980000000000001e-06, "loss": 19.4249, "step": 24900 }, { "epoch": 0.0503197760153848, "grad_norm": 310.7481994628906, "learning_rate": 4.982e-06, "loss": 19.4228, "step": 24910 }, { "epoch": 0.050339976648068614, "grad_norm": 370.5620422363281, "learning_rate": 4.984000000000001e-06, "loss": 30.3915, "step": 24920 }, { "epoch": 0.050360177280752434, "grad_norm": 211.54971313476562, "learning_rate": 4.986e-06, "loss": 10.3256, "step": 24930 }, { "epoch": 0.05038037791343625, "grad_norm": 2.3289971351623535, "learning_rate": 4.988e-06, "loss": 35.4139, "step": 24940 }, { "epoch": 0.05040057854612007, "grad_norm": 155.1433868408203, "learning_rate": 4.9900000000000005e-06, "loss": 26.2599, "step": 24950 }, { "epoch": 0.05042077917880388, "grad_norm": 20.515810012817383, "learning_rate": 4.992e-06, "loss": 30.6685, "step": 24960 }, { "epoch": 0.05044097981148769, "grad_norm": 538.8347778320312, "learning_rate": 4.994000000000001e-06, "loss": 34.4724, "step": 24970 }, { "epoch": 0.05046118044417151, "grad_norm": 293.76153564453125, "learning_rate": 4.996e-06, "loss": 29.6654, "step": 24980 }, { "epoch": 0.050481381076855326, "grad_norm": 297.9040222167969, "learning_rate": 4.998e-06, "loss": 33.8897, "step": 24990 }, { "epoch": 0.05050158170953914, "grad_norm": 362.0632629394531, "learning_rate": 5e-06, "loss": 12.0616, "step": 25000 }, { "epoch": 0.05052178234222296, "grad_norm": 240.0549774169922, "learning_rate": 5.0020000000000006e-06, "loss": 17.6489, "step": 25010 }, { "epoch": 0.05054198297490677, "grad_norm": 312.6167297363281, "learning_rate": 5.004e-06, "loss": 29.2342, "step": 25020 }, { "epoch": 0.05056218360759059, "grad_norm": 13.600961685180664, "learning_rate": 5.006000000000001e-06, "loss": 20.8579, "step": 25030 }, { "epoch": 0.050582384240274406, "grad_norm": 524.6953125, "learning_rate": 5.008000000000001e-06, "loss": 36.6206, "step": 25040 }, { "epoch": 0.05060258487295822, "grad_norm": 282.6379089355469, "learning_rate": 5.01e-06, "loss": 16.8534, "step": 25050 }, { "epoch": 0.05062278550564204, "grad_norm": 256.9676208496094, "learning_rate": 5.0120000000000005e-06, "loss": 22.1196, "step": 25060 }, { "epoch": 0.05064298613832585, "grad_norm": 330.0611572265625, "learning_rate": 5.014e-06, "loss": 18.613, "step": 25070 }, { "epoch": 0.050663186771009665, "grad_norm": 2463.804931640625, "learning_rate": 5.016000000000001e-06, "loss": 59.9745, "step": 25080 }, { "epoch": 0.050683387403693485, "grad_norm": 1185.2828369140625, "learning_rate": 5.018000000000001e-06, "loss": 39.1468, "step": 25090 }, { "epoch": 0.0507035880363773, "grad_norm": 601.2512817382812, "learning_rate": 5.02e-06, "loss": 13.1898, "step": 25100 }, { "epoch": 0.05072378866906112, "grad_norm": 360.78369140625, "learning_rate": 5.022e-06, "loss": 21.2782, "step": 25110 }, { "epoch": 0.05074398930174493, "grad_norm": 2.3611438274383545, "learning_rate": 5.024e-06, "loss": 30.023, "step": 25120 }, { "epoch": 0.050764189934428744, "grad_norm": 518.9421997070312, "learning_rate": 5.026000000000001e-06, "loss": 53.1602, "step": 25130 }, { "epoch": 0.050784390567112564, "grad_norm": 327.6961975097656, "learning_rate": 5.028000000000001e-06, "loss": 24.3413, "step": 25140 }, { "epoch": 0.05080459119979638, "grad_norm": 2383.177001953125, "learning_rate": 5.03e-06, "loss": 54.4062, "step": 25150 }, { "epoch": 0.05082479183248019, "grad_norm": 389.3732604980469, "learning_rate": 5.032e-06, "loss": 22.387, "step": 25160 }, { "epoch": 0.05084499246516401, "grad_norm": 52.716522216796875, "learning_rate": 5.0339999999999996e-06, "loss": 64.8972, "step": 25170 }, { "epoch": 0.05086519309784782, "grad_norm": 324.30316162109375, "learning_rate": 5.0360000000000006e-06, "loss": 25.4915, "step": 25180 }, { "epoch": 0.05088539373053164, "grad_norm": 478.4629211425781, "learning_rate": 5.038000000000001e-06, "loss": 16.3394, "step": 25190 }, { "epoch": 0.050905594363215456, "grad_norm": 552.5181274414062, "learning_rate": 5.04e-06, "loss": 20.6003, "step": 25200 }, { "epoch": 0.05092579499589927, "grad_norm": 479.90289306640625, "learning_rate": 5.042e-06, "loss": 18.2474, "step": 25210 }, { "epoch": 0.05094599562858309, "grad_norm": 612.0270385742188, "learning_rate": 5.044e-06, "loss": 27.702, "step": 25220 }, { "epoch": 0.0509661962612669, "grad_norm": 1635.6959228515625, "learning_rate": 5.0460000000000005e-06, "loss": 41.5422, "step": 25230 }, { "epoch": 0.050986396893950715, "grad_norm": 454.4499816894531, "learning_rate": 5.048000000000001e-06, "loss": 31.0758, "step": 25240 }, { "epoch": 0.051006597526634535, "grad_norm": 480.1141357421875, "learning_rate": 5.050000000000001e-06, "loss": 30.5071, "step": 25250 }, { "epoch": 0.05102679815931835, "grad_norm": 433.2947082519531, "learning_rate": 5.052e-06, "loss": 27.527, "step": 25260 }, { "epoch": 0.05104699879200217, "grad_norm": 594.5740356445312, "learning_rate": 5.054e-06, "loss": 32.5962, "step": 25270 }, { "epoch": 0.05106719942468598, "grad_norm": 227.5206756591797, "learning_rate": 5.056000000000001e-06, "loss": 24.3647, "step": 25280 }, { "epoch": 0.051087400057369795, "grad_norm": 50.3040657043457, "learning_rate": 5.0580000000000005e-06, "loss": 31.5149, "step": 25290 }, { "epoch": 0.051107600690053615, "grad_norm": 1063.2158203125, "learning_rate": 5.060000000000001e-06, "loss": 40.9574, "step": 25300 }, { "epoch": 0.05112780132273743, "grad_norm": 0.0, "learning_rate": 5.062e-06, "loss": 14.2233, "step": 25310 }, { "epoch": 0.05114800195542124, "grad_norm": 363.94091796875, "learning_rate": 5.064e-06, "loss": 25.1291, "step": 25320 }, { "epoch": 0.05116820258810506, "grad_norm": 220.59275817871094, "learning_rate": 5.066000000000001e-06, "loss": 26.7395, "step": 25330 }, { "epoch": 0.051188403220788874, "grad_norm": 968.5359497070312, "learning_rate": 5.0680000000000004e-06, "loss": 24.0608, "step": 25340 }, { "epoch": 0.051208603853472694, "grad_norm": 459.5111999511719, "learning_rate": 5.070000000000001e-06, "loss": 27.4477, "step": 25350 }, { "epoch": 0.05122880448615651, "grad_norm": 610.3914794921875, "learning_rate": 5.072e-06, "loss": 22.6033, "step": 25360 }, { "epoch": 0.05124900511884032, "grad_norm": 192.24658203125, "learning_rate": 5.074e-06, "loss": 18.4317, "step": 25370 }, { "epoch": 0.05126920575152414, "grad_norm": 610.5033569335938, "learning_rate": 5.076000000000001e-06, "loss": 35.5431, "step": 25380 }, { "epoch": 0.05128940638420795, "grad_norm": 458.8445739746094, "learning_rate": 5.078e-06, "loss": 23.0244, "step": 25390 }, { "epoch": 0.051309607016891766, "grad_norm": 78.25531005859375, "learning_rate": 5.0800000000000005e-06, "loss": 31.0752, "step": 25400 }, { "epoch": 0.051329807649575586, "grad_norm": 114.08269500732422, "learning_rate": 5.082000000000001e-06, "loss": 17.9002, "step": 25410 }, { "epoch": 0.0513500082822594, "grad_norm": 569.843994140625, "learning_rate": 5.084e-06, "loss": 30.2337, "step": 25420 }, { "epoch": 0.05137020891494322, "grad_norm": 561.10107421875, "learning_rate": 5.086000000000001e-06, "loss": 14.2098, "step": 25430 }, { "epoch": 0.05139040954762703, "grad_norm": 566.8888549804688, "learning_rate": 5.088000000000001e-06, "loss": 17.1384, "step": 25440 }, { "epoch": 0.051410610180310845, "grad_norm": 1191.8035888671875, "learning_rate": 5.09e-06, "loss": 26.895, "step": 25450 }, { "epoch": 0.051430810812994665, "grad_norm": 789.3928833007812, "learning_rate": 5.0920000000000005e-06, "loss": 30.4841, "step": 25460 }, { "epoch": 0.05145101144567848, "grad_norm": 739.6887817382812, "learning_rate": 5.094e-06, "loss": 22.1056, "step": 25470 }, { "epoch": 0.05147121207836229, "grad_norm": 736.8461303710938, "learning_rate": 5.096000000000001e-06, "loss": 24.0518, "step": 25480 }, { "epoch": 0.05149141271104611, "grad_norm": 150.58872985839844, "learning_rate": 5.098000000000001e-06, "loss": 31.6776, "step": 25490 }, { "epoch": 0.051511613343729924, "grad_norm": 275.2126159667969, "learning_rate": 5.1e-06, "loss": 14.6784, "step": 25500 }, { "epoch": 0.051531813976413744, "grad_norm": 519.9595336914062, "learning_rate": 5.1020000000000004e-06, "loss": 36.6511, "step": 25510 }, { "epoch": 0.05155201460909756, "grad_norm": 542.5691528320312, "learning_rate": 5.104e-06, "loss": 23.9241, "step": 25520 }, { "epoch": 0.05157221524178137, "grad_norm": 875.2158813476562, "learning_rate": 5.106000000000001e-06, "loss": 43.4997, "step": 25530 }, { "epoch": 0.05159241587446519, "grad_norm": 1521.4261474609375, "learning_rate": 5.108000000000001e-06, "loss": 45.2179, "step": 25540 }, { "epoch": 0.051612616507149, "grad_norm": 265.73333740234375, "learning_rate": 5.11e-06, "loss": 32.8597, "step": 25550 }, { "epoch": 0.051632817139832816, "grad_norm": 205.95445251464844, "learning_rate": 5.112e-06, "loss": 31.428, "step": 25560 }, { "epoch": 0.051653017772516636, "grad_norm": 840.8372192382812, "learning_rate": 5.114e-06, "loss": 42.3025, "step": 25570 }, { "epoch": 0.05167321840520045, "grad_norm": 357.9036560058594, "learning_rate": 5.116000000000001e-06, "loss": 34.0856, "step": 25580 }, { "epoch": 0.05169341903788427, "grad_norm": 545.6062622070312, "learning_rate": 5.118000000000001e-06, "loss": 60.5683, "step": 25590 }, { "epoch": 0.05171361967056808, "grad_norm": 466.7431335449219, "learning_rate": 5.12e-06, "loss": 19.1757, "step": 25600 }, { "epoch": 0.051733820303251896, "grad_norm": 368.5271301269531, "learning_rate": 5.122e-06, "loss": 40.7344, "step": 25610 }, { "epoch": 0.051754020935935716, "grad_norm": 164.5128936767578, "learning_rate": 5.124e-06, "loss": 46.2509, "step": 25620 }, { "epoch": 0.05177422156861953, "grad_norm": 1387.013427734375, "learning_rate": 5.126e-06, "loss": 30.2785, "step": 25630 }, { "epoch": 0.05179442220130334, "grad_norm": 488.8163757324219, "learning_rate": 5.128000000000001e-06, "loss": 23.5856, "step": 25640 }, { "epoch": 0.05181462283398716, "grad_norm": 358.9884033203125, "learning_rate": 5.130000000000001e-06, "loss": 44.6823, "step": 25650 }, { "epoch": 0.051834823466670975, "grad_norm": 455.64697265625, "learning_rate": 5.132e-06, "loss": 35.8214, "step": 25660 }, { "epoch": 0.051855024099354795, "grad_norm": 241.6067657470703, "learning_rate": 5.134e-06, "loss": 22.7348, "step": 25670 }, { "epoch": 0.05187522473203861, "grad_norm": 388.8199157714844, "learning_rate": 5.136e-06, "loss": 32.5241, "step": 25680 }, { "epoch": 0.05189542536472242, "grad_norm": 361.8164367675781, "learning_rate": 5.138000000000001e-06, "loss": 18.9053, "step": 25690 }, { "epoch": 0.05191562599740624, "grad_norm": 522.1819458007812, "learning_rate": 5.140000000000001e-06, "loss": 29.0187, "step": 25700 }, { "epoch": 0.051935826630090054, "grad_norm": 1628.5198974609375, "learning_rate": 5.142e-06, "loss": 25.0132, "step": 25710 }, { "epoch": 0.05195602726277387, "grad_norm": 340.24664306640625, "learning_rate": 5.144e-06, "loss": 17.7403, "step": 25720 }, { "epoch": 0.05197622789545769, "grad_norm": 355.582275390625, "learning_rate": 5.1459999999999995e-06, "loss": 27.1877, "step": 25730 }, { "epoch": 0.0519964285281415, "grad_norm": 778.4319458007812, "learning_rate": 5.1480000000000005e-06, "loss": 28.3525, "step": 25740 }, { "epoch": 0.05201662916082532, "grad_norm": 404.39862060546875, "learning_rate": 5.150000000000001e-06, "loss": 29.3748, "step": 25750 }, { "epoch": 0.05203682979350913, "grad_norm": 9.620052337646484, "learning_rate": 5.152e-06, "loss": 29.4001, "step": 25760 }, { "epoch": 0.052057030426192946, "grad_norm": 383.12115478515625, "learning_rate": 5.154e-06, "loss": 25.0565, "step": 25770 }, { "epoch": 0.052077231058876766, "grad_norm": 1093.5860595703125, "learning_rate": 5.156e-06, "loss": 24.3118, "step": 25780 }, { "epoch": 0.05209743169156058, "grad_norm": 761.9879150390625, "learning_rate": 5.158e-06, "loss": 43.068, "step": 25790 }, { "epoch": 0.05211763232424439, "grad_norm": 204.88401794433594, "learning_rate": 5.1600000000000006e-06, "loss": 22.7605, "step": 25800 }, { "epoch": 0.05213783295692821, "grad_norm": 465.07574462890625, "learning_rate": 5.162000000000001e-06, "loss": 32.0228, "step": 25810 }, { "epoch": 0.052158033589612025, "grad_norm": 30.820846557617188, "learning_rate": 5.164e-06, "loss": 28.8089, "step": 25820 }, { "epoch": 0.052178234222295845, "grad_norm": 315.1690979003906, "learning_rate": 5.166e-06, "loss": 31.5778, "step": 25830 }, { "epoch": 0.05219843485497966, "grad_norm": 662.5512084960938, "learning_rate": 5.168000000000001e-06, "loss": 32.5336, "step": 25840 }, { "epoch": 0.05221863548766347, "grad_norm": 359.843017578125, "learning_rate": 5.1700000000000005e-06, "loss": 31.0579, "step": 25850 }, { "epoch": 0.05223883612034729, "grad_norm": 391.7662658691406, "learning_rate": 5.172000000000001e-06, "loss": 29.8749, "step": 25860 }, { "epoch": 0.052259036753031105, "grad_norm": 750.6495361328125, "learning_rate": 5.174e-06, "loss": 26.085, "step": 25870 }, { "epoch": 0.05227923738571492, "grad_norm": 456.728515625, "learning_rate": 5.176e-06, "loss": 27.9467, "step": 25880 }, { "epoch": 0.05229943801839874, "grad_norm": 314.82476806640625, "learning_rate": 5.178000000000001e-06, "loss": 25.5998, "step": 25890 }, { "epoch": 0.05231963865108255, "grad_norm": 866.3818969726562, "learning_rate": 5.18e-06, "loss": 39.6875, "step": 25900 }, { "epoch": 0.05233983928376637, "grad_norm": 504.4011535644531, "learning_rate": 5.1820000000000005e-06, "loss": 40.7531, "step": 25910 }, { "epoch": 0.052360039916450184, "grad_norm": 598.9783325195312, "learning_rate": 5.184e-06, "loss": 29.8502, "step": 25920 }, { "epoch": 0.052380240549134, "grad_norm": 439.1234130859375, "learning_rate": 5.186e-06, "loss": 26.7706, "step": 25930 }, { "epoch": 0.05240044118181782, "grad_norm": 307.6906433105469, "learning_rate": 5.188000000000001e-06, "loss": 21.9047, "step": 25940 }, { "epoch": 0.05242064181450163, "grad_norm": 665.8407592773438, "learning_rate": 5.19e-06, "loss": 13.5805, "step": 25950 }, { "epoch": 0.05244084244718544, "grad_norm": 433.3497009277344, "learning_rate": 5.1920000000000004e-06, "loss": 25.7976, "step": 25960 }, { "epoch": 0.05246104307986926, "grad_norm": 1.4457974433898926, "learning_rate": 5.194e-06, "loss": 12.8288, "step": 25970 }, { "epoch": 0.052481243712553076, "grad_norm": 427.34283447265625, "learning_rate": 5.196e-06, "loss": 23.3719, "step": 25980 }, { "epoch": 0.052501444345236896, "grad_norm": 48.45802688598633, "learning_rate": 5.198000000000001e-06, "loss": 33.9045, "step": 25990 }, { "epoch": 0.05252164497792071, "grad_norm": 507.5506896972656, "learning_rate": 5.2e-06, "loss": 19.1086, "step": 26000 }, { "epoch": 0.05254184561060452, "grad_norm": 601.8560791015625, "learning_rate": 5.202e-06, "loss": 22.5702, "step": 26010 }, { "epoch": 0.05256204624328834, "grad_norm": 856.8712768554688, "learning_rate": 5.2040000000000005e-06, "loss": 31.4809, "step": 26020 }, { "epoch": 0.052582246875972155, "grad_norm": 367.30010986328125, "learning_rate": 5.206e-06, "loss": 20.9368, "step": 26030 }, { "epoch": 0.05260244750865597, "grad_norm": 485.85498046875, "learning_rate": 5.208000000000001e-06, "loss": 14.6849, "step": 26040 }, { "epoch": 0.05262264814133979, "grad_norm": 573.361328125, "learning_rate": 5.210000000000001e-06, "loss": 15.6729, "step": 26050 }, { "epoch": 0.0526428487740236, "grad_norm": 273.30010986328125, "learning_rate": 5.212e-06, "loss": 33.1244, "step": 26060 }, { "epoch": 0.05266304940670742, "grad_norm": 379.7786560058594, "learning_rate": 5.214e-06, "loss": 22.4082, "step": 26070 }, { "epoch": 0.052683250039391234, "grad_norm": 784.4563598632812, "learning_rate": 5.216e-06, "loss": 25.6582, "step": 26080 }, { "epoch": 0.05270345067207505, "grad_norm": 449.5173645019531, "learning_rate": 5.218000000000001e-06, "loss": 25.8007, "step": 26090 }, { "epoch": 0.05272365130475887, "grad_norm": 201.3383026123047, "learning_rate": 5.220000000000001e-06, "loss": 10.4071, "step": 26100 }, { "epoch": 0.05274385193744268, "grad_norm": 658.666259765625, "learning_rate": 5.222e-06, "loss": 16.5268, "step": 26110 }, { "epoch": 0.05276405257012649, "grad_norm": 557.2667236328125, "learning_rate": 5.224e-06, "loss": 26.9591, "step": 26120 }, { "epoch": 0.05278425320281031, "grad_norm": 5351.18505859375, "learning_rate": 5.226e-06, "loss": 48.7041, "step": 26130 }, { "epoch": 0.052804453835494126, "grad_norm": 1389.563720703125, "learning_rate": 5.228000000000001e-06, "loss": 47.4646, "step": 26140 }, { "epoch": 0.052824654468177946, "grad_norm": 272.3363952636719, "learning_rate": 5.230000000000001e-06, "loss": 23.3891, "step": 26150 }, { "epoch": 0.05284485510086176, "grad_norm": 575.9677124023438, "learning_rate": 5.232e-06, "loss": 25.75, "step": 26160 }, { "epoch": 0.05286505573354557, "grad_norm": 689.4549560546875, "learning_rate": 5.234e-06, "loss": 31.6495, "step": 26170 }, { "epoch": 0.05288525636622939, "grad_norm": 23.626998901367188, "learning_rate": 5.236e-06, "loss": 10.3843, "step": 26180 }, { "epoch": 0.052905456998913206, "grad_norm": 392.9609069824219, "learning_rate": 5.2380000000000005e-06, "loss": 21.8561, "step": 26190 }, { "epoch": 0.05292565763159702, "grad_norm": 348.66900634765625, "learning_rate": 5.240000000000001e-06, "loss": 20.0394, "step": 26200 }, { "epoch": 0.05294585826428084, "grad_norm": 677.1149291992188, "learning_rate": 5.242000000000001e-06, "loss": 21.0038, "step": 26210 }, { "epoch": 0.05296605889696465, "grad_norm": 564.3142700195312, "learning_rate": 5.244e-06, "loss": 20.4818, "step": 26220 }, { "epoch": 0.05298625952964847, "grad_norm": 331.88494873046875, "learning_rate": 5.246e-06, "loss": 34.9781, "step": 26230 }, { "epoch": 0.053006460162332285, "grad_norm": 787.8898315429688, "learning_rate": 5.248000000000001e-06, "loss": 18.9843, "step": 26240 }, { "epoch": 0.0530266607950161, "grad_norm": 123.94518280029297, "learning_rate": 5.2500000000000006e-06, "loss": 43.6385, "step": 26250 }, { "epoch": 0.05304686142769992, "grad_norm": 96.2759017944336, "learning_rate": 5.252000000000001e-06, "loss": 30.9721, "step": 26260 }, { "epoch": 0.05306706206038373, "grad_norm": 746.7877197265625, "learning_rate": 5.254e-06, "loss": 35.5861, "step": 26270 }, { "epoch": 0.053087262693067544, "grad_norm": 134.33531188964844, "learning_rate": 5.256e-06, "loss": 16.4126, "step": 26280 }, { "epoch": 0.053107463325751364, "grad_norm": 256.2925109863281, "learning_rate": 5.258000000000001e-06, "loss": 23.2211, "step": 26290 }, { "epoch": 0.05312766395843518, "grad_norm": 440.4696960449219, "learning_rate": 5.2600000000000005e-06, "loss": 32.416, "step": 26300 }, { "epoch": 0.053147864591119, "grad_norm": 204.34536743164062, "learning_rate": 5.262000000000001e-06, "loss": 32.0368, "step": 26310 }, { "epoch": 0.05316806522380281, "grad_norm": 923.6111450195312, "learning_rate": 5.264e-06, "loss": 38.4226, "step": 26320 }, { "epoch": 0.05318826585648662, "grad_norm": 641.5208740234375, "learning_rate": 5.266e-06, "loss": 26.3782, "step": 26330 }, { "epoch": 0.05320846648917044, "grad_norm": 368.361328125, "learning_rate": 5.268000000000001e-06, "loss": 24.4569, "step": 26340 }, { "epoch": 0.053228667121854256, "grad_norm": 1073.1708984375, "learning_rate": 5.27e-06, "loss": 42.5734, "step": 26350 }, { "epoch": 0.05324886775453807, "grad_norm": 1146.4925537109375, "learning_rate": 5.2720000000000005e-06, "loss": 43.8536, "step": 26360 }, { "epoch": 0.05326906838722189, "grad_norm": 216.10980224609375, "learning_rate": 5.274e-06, "loss": 22.712, "step": 26370 }, { "epoch": 0.0532892690199057, "grad_norm": 3.1001787185668945, "learning_rate": 5.276e-06, "loss": 13.0433, "step": 26380 }, { "epoch": 0.05330946965258952, "grad_norm": 931.7252197265625, "learning_rate": 5.278000000000001e-06, "loss": 17.813, "step": 26390 }, { "epoch": 0.053329670285273335, "grad_norm": 404.67572021484375, "learning_rate": 5.28e-06, "loss": 18.4228, "step": 26400 }, { "epoch": 0.05334987091795715, "grad_norm": 165.9590606689453, "learning_rate": 5.282e-06, "loss": 23.0529, "step": 26410 }, { "epoch": 0.05337007155064097, "grad_norm": 537.7362060546875, "learning_rate": 5.2840000000000006e-06, "loss": 43.9211, "step": 26420 }, { "epoch": 0.05339027218332478, "grad_norm": 621.5162963867188, "learning_rate": 5.286e-06, "loss": 18.1904, "step": 26430 }, { "epoch": 0.053410472816008595, "grad_norm": 333.20269775390625, "learning_rate": 5.288000000000001e-06, "loss": 19.8481, "step": 26440 }, { "epoch": 0.053430673448692415, "grad_norm": 3.1346991062164307, "learning_rate": 5.290000000000001e-06, "loss": 25.3854, "step": 26450 }, { "epoch": 0.05345087408137623, "grad_norm": 352.5599365234375, "learning_rate": 5.292e-06, "loss": 39.946, "step": 26460 }, { "epoch": 0.05347107471406005, "grad_norm": 556.1923828125, "learning_rate": 5.2940000000000005e-06, "loss": 40.7556, "step": 26470 }, { "epoch": 0.05349127534674386, "grad_norm": 610.7059936523438, "learning_rate": 5.296e-06, "loss": 14.8259, "step": 26480 }, { "epoch": 0.053511475979427674, "grad_norm": 366.200439453125, "learning_rate": 5.298000000000001e-06, "loss": 30.2629, "step": 26490 }, { "epoch": 0.053531676612111494, "grad_norm": 455.81463623046875, "learning_rate": 5.300000000000001e-06, "loss": 62.1175, "step": 26500 }, { "epoch": 0.05355187724479531, "grad_norm": 386.99066162109375, "learning_rate": 5.302e-06, "loss": 20.9097, "step": 26510 }, { "epoch": 0.05357207787747912, "grad_norm": 311.41229248046875, "learning_rate": 5.304e-06, "loss": 25.0126, "step": 26520 }, { "epoch": 0.05359227851016294, "grad_norm": 589.1319580078125, "learning_rate": 5.306e-06, "loss": 42.5743, "step": 26530 }, { "epoch": 0.05361247914284675, "grad_norm": 496.6630859375, "learning_rate": 5.308000000000001e-06, "loss": 9.1405, "step": 26540 }, { "epoch": 0.05363267977553057, "grad_norm": 729.2218017578125, "learning_rate": 5.310000000000001e-06, "loss": 31.1716, "step": 26550 }, { "epoch": 0.053652880408214386, "grad_norm": 49.607547760009766, "learning_rate": 5.312e-06, "loss": 28.6454, "step": 26560 }, { "epoch": 0.0536730810408982, "grad_norm": 479.96075439453125, "learning_rate": 5.314e-06, "loss": 20.7474, "step": 26570 }, { "epoch": 0.05369328167358202, "grad_norm": 851.2595825195312, "learning_rate": 5.3160000000000004e-06, "loss": 25.3047, "step": 26580 }, { "epoch": 0.05371348230626583, "grad_norm": 662.3512573242188, "learning_rate": 5.318000000000001e-06, "loss": 26.4715, "step": 26590 }, { "epoch": 0.053733682938949645, "grad_norm": 940.9957275390625, "learning_rate": 5.320000000000001e-06, "loss": 42.6483, "step": 26600 }, { "epoch": 0.053753883571633465, "grad_norm": 612.0733642578125, "learning_rate": 5.322000000000001e-06, "loss": 22.5218, "step": 26610 }, { "epoch": 0.05377408420431728, "grad_norm": 237.8662872314453, "learning_rate": 5.324e-06, "loss": 14.5526, "step": 26620 }, { "epoch": 0.0537942848370011, "grad_norm": 835.09423828125, "learning_rate": 5.326e-06, "loss": 26.9957, "step": 26630 }, { "epoch": 0.05381448546968491, "grad_norm": 222.43446350097656, "learning_rate": 5.328000000000001e-06, "loss": 15.0452, "step": 26640 }, { "epoch": 0.053834686102368724, "grad_norm": 700.3217163085938, "learning_rate": 5.330000000000001e-06, "loss": 39.1123, "step": 26650 }, { "epoch": 0.053854886735052544, "grad_norm": 337.627197265625, "learning_rate": 5.332000000000001e-06, "loss": 46.5012, "step": 26660 }, { "epoch": 0.05387508736773636, "grad_norm": 424.27667236328125, "learning_rate": 5.334e-06, "loss": 27.7794, "step": 26670 }, { "epoch": 0.05389528800042017, "grad_norm": 866.9254760742188, "learning_rate": 5.336e-06, "loss": 27.4633, "step": 26680 }, { "epoch": 0.05391548863310399, "grad_norm": 563.7420043945312, "learning_rate": 5.338000000000001e-06, "loss": 32.9408, "step": 26690 }, { "epoch": 0.0539356892657878, "grad_norm": 257.2831726074219, "learning_rate": 5.3400000000000005e-06, "loss": 16.7897, "step": 26700 }, { "epoch": 0.05395588989847162, "grad_norm": 591.7484130859375, "learning_rate": 5.342000000000001e-06, "loss": 39.6581, "step": 26710 }, { "epoch": 0.053976090531155436, "grad_norm": 792.9822387695312, "learning_rate": 5.344e-06, "loss": 24.9949, "step": 26720 }, { "epoch": 0.05399629116383925, "grad_norm": 868.6183471679688, "learning_rate": 5.346e-06, "loss": 22.9792, "step": 26730 }, { "epoch": 0.05401649179652307, "grad_norm": 615.1279296875, "learning_rate": 5.348000000000001e-06, "loss": 33.8277, "step": 26740 }, { "epoch": 0.05403669242920688, "grad_norm": 352.4540710449219, "learning_rate": 5.3500000000000004e-06, "loss": 15.8923, "step": 26750 }, { "epoch": 0.054056893061890696, "grad_norm": 951.6677856445312, "learning_rate": 5.352000000000001e-06, "loss": 30.66, "step": 26760 }, { "epoch": 0.054077093694574516, "grad_norm": 1502.649169921875, "learning_rate": 5.354e-06, "loss": 25.9936, "step": 26770 }, { "epoch": 0.05409729432725833, "grad_norm": 392.32305908203125, "learning_rate": 5.356e-06, "loss": 22.2297, "step": 26780 }, { "epoch": 0.05411749495994215, "grad_norm": 237.27383422851562, "learning_rate": 5.358000000000001e-06, "loss": 17.8372, "step": 26790 }, { "epoch": 0.05413769559262596, "grad_norm": 539.2459106445312, "learning_rate": 5.36e-06, "loss": 25.7624, "step": 26800 }, { "epoch": 0.054157896225309775, "grad_norm": 1028.1107177734375, "learning_rate": 5.3620000000000005e-06, "loss": 33.4106, "step": 26810 }, { "epoch": 0.054178096857993595, "grad_norm": 974.437255859375, "learning_rate": 5.364000000000001e-06, "loss": 20.8408, "step": 26820 }, { "epoch": 0.05419829749067741, "grad_norm": 327.8843078613281, "learning_rate": 5.366e-06, "loss": 24.7608, "step": 26830 }, { "epoch": 0.05421849812336122, "grad_norm": 437.9151306152344, "learning_rate": 5.368000000000001e-06, "loss": 39.364, "step": 26840 }, { "epoch": 0.05423869875604504, "grad_norm": 712.4976806640625, "learning_rate": 5.370000000000001e-06, "loss": 18.5657, "step": 26850 }, { "epoch": 0.054258899388728854, "grad_norm": 497.657470703125, "learning_rate": 5.372e-06, "loss": 15.1377, "step": 26860 }, { "epoch": 0.054279100021412674, "grad_norm": 400.9642333984375, "learning_rate": 5.3740000000000006e-06, "loss": 27.0087, "step": 26870 }, { "epoch": 0.05429930065409649, "grad_norm": 445.33245849609375, "learning_rate": 5.376e-06, "loss": 26.6219, "step": 26880 }, { "epoch": 0.0543195012867803, "grad_norm": 434.8671875, "learning_rate": 5.378e-06, "loss": 39.3349, "step": 26890 }, { "epoch": 0.05433970191946412, "grad_norm": 150.58702087402344, "learning_rate": 5.380000000000001e-06, "loss": 27.9425, "step": 26900 }, { "epoch": 0.05435990255214793, "grad_norm": 894.13671875, "learning_rate": 5.382e-06, "loss": 22.3421, "step": 26910 }, { "epoch": 0.054380103184831746, "grad_norm": 230.39698791503906, "learning_rate": 5.3840000000000005e-06, "loss": 21.8025, "step": 26920 }, { "epoch": 0.054400303817515566, "grad_norm": 366.86859130859375, "learning_rate": 5.386e-06, "loss": 26.456, "step": 26930 }, { "epoch": 0.05442050445019938, "grad_norm": 938.0694580078125, "learning_rate": 5.388e-06, "loss": 30.2199, "step": 26940 }, { "epoch": 0.0544407050828832, "grad_norm": 464.6997985839844, "learning_rate": 5.390000000000001e-06, "loss": 22.3986, "step": 26950 }, { "epoch": 0.05446090571556701, "grad_norm": 477.46185302734375, "learning_rate": 5.392e-06, "loss": 32.1648, "step": 26960 }, { "epoch": 0.054481106348250825, "grad_norm": 951.6552124023438, "learning_rate": 5.394e-06, "loss": 38.6276, "step": 26970 }, { "epoch": 0.054501306980934645, "grad_norm": 219.8541717529297, "learning_rate": 5.3960000000000005e-06, "loss": 16.5651, "step": 26980 }, { "epoch": 0.05452150761361846, "grad_norm": 503.9372863769531, "learning_rate": 5.398e-06, "loss": 41.0035, "step": 26990 }, { "epoch": 0.05454170824630227, "grad_norm": 800.7157592773438, "learning_rate": 5.400000000000001e-06, "loss": 16.4245, "step": 27000 }, { "epoch": 0.05456190887898609, "grad_norm": 646.6304931640625, "learning_rate": 5.402000000000001e-06, "loss": 20.2078, "step": 27010 }, { "epoch": 0.054582109511669905, "grad_norm": 525.7156372070312, "learning_rate": 5.404e-06, "loss": 34.3341, "step": 27020 }, { "epoch": 0.054602310144353725, "grad_norm": 778.0181884765625, "learning_rate": 5.406e-06, "loss": 26.0942, "step": 27030 }, { "epoch": 0.05462251077703754, "grad_norm": 305.4831848144531, "learning_rate": 5.408e-06, "loss": 14.2851, "step": 27040 }, { "epoch": 0.05464271140972135, "grad_norm": 554.4490966796875, "learning_rate": 5.410000000000001e-06, "loss": 18.3887, "step": 27050 }, { "epoch": 0.05466291204240517, "grad_norm": 271.5789794921875, "learning_rate": 5.412000000000001e-06, "loss": 20.0249, "step": 27060 }, { "epoch": 0.054683112675088984, "grad_norm": 697.5492553710938, "learning_rate": 5.414e-06, "loss": 40.7504, "step": 27070 }, { "epoch": 0.0547033133077728, "grad_norm": 187.14932250976562, "learning_rate": 5.416e-06, "loss": 35.2618, "step": 27080 }, { "epoch": 0.05472351394045662, "grad_norm": 360.2887878417969, "learning_rate": 5.418e-06, "loss": 35.6201, "step": 27090 }, { "epoch": 0.05474371457314043, "grad_norm": 118.17030334472656, "learning_rate": 5.420000000000001e-06, "loss": 34.1158, "step": 27100 }, { "epoch": 0.05476391520582425, "grad_norm": 835.5042114257812, "learning_rate": 5.422000000000001e-06, "loss": 23.7981, "step": 27110 }, { "epoch": 0.05478411583850806, "grad_norm": 293.77093505859375, "learning_rate": 5.424e-06, "loss": 32.4919, "step": 27120 }, { "epoch": 0.054804316471191876, "grad_norm": 352.4538879394531, "learning_rate": 5.426e-06, "loss": 26.8516, "step": 27130 }, { "epoch": 0.054824517103875696, "grad_norm": 221.325439453125, "learning_rate": 5.4279999999999995e-06, "loss": 34.1622, "step": 27140 }, { "epoch": 0.05484471773655951, "grad_norm": 590.2091064453125, "learning_rate": 5.4300000000000005e-06, "loss": 14.4413, "step": 27150 }, { "epoch": 0.05486491836924332, "grad_norm": 734.5629272460938, "learning_rate": 5.432000000000001e-06, "loss": 21.4118, "step": 27160 }, { "epoch": 0.05488511900192714, "grad_norm": 799.1691284179688, "learning_rate": 5.434e-06, "loss": 41.7156, "step": 27170 }, { "epoch": 0.054905319634610955, "grad_norm": 371.6385803222656, "learning_rate": 5.436e-06, "loss": 21.9291, "step": 27180 }, { "epoch": 0.054925520267294775, "grad_norm": 286.462646484375, "learning_rate": 5.438e-06, "loss": 14.176, "step": 27190 }, { "epoch": 0.05494572089997859, "grad_norm": 326.9037780761719, "learning_rate": 5.4400000000000004e-06, "loss": 28.3926, "step": 27200 }, { "epoch": 0.0549659215326624, "grad_norm": 1330.7659912109375, "learning_rate": 5.442000000000001e-06, "loss": 53.405, "step": 27210 }, { "epoch": 0.05498612216534622, "grad_norm": 321.39202880859375, "learning_rate": 5.444000000000001e-06, "loss": 26.6436, "step": 27220 }, { "epoch": 0.055006322798030034, "grad_norm": 347.4004211425781, "learning_rate": 5.446e-06, "loss": 26.5977, "step": 27230 }, { "epoch": 0.05502652343071385, "grad_norm": 205.0798797607422, "learning_rate": 5.448e-06, "loss": 13.3988, "step": 27240 }, { "epoch": 0.05504672406339767, "grad_norm": 429.0446472167969, "learning_rate": 5.450000000000001e-06, "loss": 37.5181, "step": 27250 }, { "epoch": 0.05506692469608148, "grad_norm": 1414.781494140625, "learning_rate": 5.4520000000000005e-06, "loss": 25.0189, "step": 27260 }, { "epoch": 0.0550871253287653, "grad_norm": 271.6662292480469, "learning_rate": 5.454000000000001e-06, "loss": 14.168, "step": 27270 }, { "epoch": 0.05510732596144911, "grad_norm": 670.027099609375, "learning_rate": 5.456e-06, "loss": 36.156, "step": 27280 }, { "epoch": 0.055127526594132926, "grad_norm": 621.5324096679688, "learning_rate": 5.458e-06, "loss": 31.7824, "step": 27290 }, { "epoch": 0.055147727226816746, "grad_norm": 553.9097290039062, "learning_rate": 5.460000000000001e-06, "loss": 36.1782, "step": 27300 }, { "epoch": 0.05516792785950056, "grad_norm": 89.8622817993164, "learning_rate": 5.462e-06, "loss": 13.8733, "step": 27310 }, { "epoch": 0.05518812849218437, "grad_norm": 298.27581787109375, "learning_rate": 5.4640000000000005e-06, "loss": 53.1346, "step": 27320 }, { "epoch": 0.05520832912486819, "grad_norm": 692.3800048828125, "learning_rate": 5.466e-06, "loss": 27.956, "step": 27330 }, { "epoch": 0.055228529757552006, "grad_norm": 474.23175048828125, "learning_rate": 5.468e-06, "loss": 19.6887, "step": 27340 }, { "epoch": 0.05524873039023582, "grad_norm": 925.652587890625, "learning_rate": 5.470000000000001e-06, "loss": 17.0159, "step": 27350 }, { "epoch": 0.05526893102291964, "grad_norm": 488.05560302734375, "learning_rate": 5.472e-06, "loss": 15.8511, "step": 27360 }, { "epoch": 0.05528913165560345, "grad_norm": 387.1439514160156, "learning_rate": 5.4740000000000004e-06, "loss": 33.8116, "step": 27370 }, { "epoch": 0.05530933228828727, "grad_norm": 455.251220703125, "learning_rate": 5.476000000000001e-06, "loss": 21.4182, "step": 27380 }, { "epoch": 0.055329532920971085, "grad_norm": 940.2066650390625, "learning_rate": 5.478e-06, "loss": 34.1586, "step": 27390 }, { "epoch": 0.0553497335536549, "grad_norm": 457.9208984375, "learning_rate": 5.480000000000001e-06, "loss": 26.7798, "step": 27400 }, { "epoch": 0.05536993418633872, "grad_norm": 839.2479248046875, "learning_rate": 5.482000000000001e-06, "loss": 20.7492, "step": 27410 }, { "epoch": 0.05539013481902253, "grad_norm": 477.7880554199219, "learning_rate": 5.484e-06, "loss": 40.5622, "step": 27420 }, { "epoch": 0.055410335451706344, "grad_norm": 490.5758056640625, "learning_rate": 5.4860000000000005e-06, "loss": 27.7741, "step": 27430 }, { "epoch": 0.055430536084390164, "grad_norm": 643.51806640625, "learning_rate": 5.488e-06, "loss": 23.1479, "step": 27440 }, { "epoch": 0.05545073671707398, "grad_norm": 431.0497131347656, "learning_rate": 5.490000000000001e-06, "loss": 15.9826, "step": 27450 }, { "epoch": 0.0554709373497578, "grad_norm": 527.7308349609375, "learning_rate": 5.492000000000001e-06, "loss": 39.9755, "step": 27460 }, { "epoch": 0.05549113798244161, "grad_norm": 1432.783203125, "learning_rate": 5.494e-06, "loss": 30.6298, "step": 27470 }, { "epoch": 0.05551133861512542, "grad_norm": 804.2708129882812, "learning_rate": 5.496e-06, "loss": 28.3745, "step": 27480 }, { "epoch": 0.05553153924780924, "grad_norm": 501.12506103515625, "learning_rate": 5.498e-06, "loss": 11.7346, "step": 27490 }, { "epoch": 0.055551739880493056, "grad_norm": 718.4681396484375, "learning_rate": 5.500000000000001e-06, "loss": 34.8546, "step": 27500 }, { "epoch": 0.05557194051317687, "grad_norm": 392.0679626464844, "learning_rate": 5.502000000000001e-06, "loss": 22.2465, "step": 27510 }, { "epoch": 0.05559214114586069, "grad_norm": 1332.5615234375, "learning_rate": 5.504e-06, "loss": 29.1452, "step": 27520 }, { "epoch": 0.0556123417785445, "grad_norm": 188.90589904785156, "learning_rate": 5.506e-06, "loss": 13.0475, "step": 27530 }, { "epoch": 0.05563254241122832, "grad_norm": 598.53857421875, "learning_rate": 5.508e-06, "loss": 50.0424, "step": 27540 }, { "epoch": 0.055652743043912135, "grad_norm": 673.3407592773438, "learning_rate": 5.510000000000001e-06, "loss": 23.7318, "step": 27550 }, { "epoch": 0.05567294367659595, "grad_norm": 392.29791259765625, "learning_rate": 5.512000000000001e-06, "loss": 18.1459, "step": 27560 }, { "epoch": 0.05569314430927977, "grad_norm": 40.12874221801758, "learning_rate": 5.514e-06, "loss": 24.9282, "step": 27570 }, { "epoch": 0.05571334494196358, "grad_norm": 98.27652740478516, "learning_rate": 5.516e-06, "loss": 24.3814, "step": 27580 }, { "epoch": 0.055733545574647395, "grad_norm": 220.50205993652344, "learning_rate": 5.518e-06, "loss": 34.8111, "step": 27590 }, { "epoch": 0.055753746207331215, "grad_norm": 415.7088317871094, "learning_rate": 5.5200000000000005e-06, "loss": 31.3995, "step": 27600 }, { "epoch": 0.05577394684001503, "grad_norm": 536.6727905273438, "learning_rate": 5.522000000000001e-06, "loss": 18.9631, "step": 27610 }, { "epoch": 0.05579414747269885, "grad_norm": 312.55487060546875, "learning_rate": 5.524000000000001e-06, "loss": 16.6055, "step": 27620 }, { "epoch": 0.05581434810538266, "grad_norm": 1098.286376953125, "learning_rate": 5.526e-06, "loss": 32.6939, "step": 27630 }, { "epoch": 0.055834548738066474, "grad_norm": 935.418212890625, "learning_rate": 5.528e-06, "loss": 23.3494, "step": 27640 }, { "epoch": 0.055854749370750294, "grad_norm": 559.8922119140625, "learning_rate": 5.530000000000001e-06, "loss": 37.6899, "step": 27650 }, { "epoch": 0.05587495000343411, "grad_norm": 128.9083251953125, "learning_rate": 5.5320000000000006e-06, "loss": 20.8371, "step": 27660 }, { "epoch": 0.05589515063611792, "grad_norm": 861.5052490234375, "learning_rate": 5.534000000000001e-06, "loss": 18.674, "step": 27670 }, { "epoch": 0.05591535126880174, "grad_norm": 359.53204345703125, "learning_rate": 5.536e-06, "loss": 27.9867, "step": 27680 }, { "epoch": 0.05593555190148555, "grad_norm": 724.4784545898438, "learning_rate": 5.538e-06, "loss": 22.9095, "step": 27690 }, { "epoch": 0.05595575253416937, "grad_norm": 403.0739440917969, "learning_rate": 5.540000000000001e-06, "loss": 28.841, "step": 27700 }, { "epoch": 0.055975953166853186, "grad_norm": 130.4065704345703, "learning_rate": 5.5420000000000005e-06, "loss": 22.1536, "step": 27710 }, { "epoch": 0.055996153799537, "grad_norm": 407.27777099609375, "learning_rate": 5.544000000000001e-06, "loss": 27.9353, "step": 27720 }, { "epoch": 0.05601635443222082, "grad_norm": 445.86962890625, "learning_rate": 5.546e-06, "loss": 32.5215, "step": 27730 }, { "epoch": 0.05603655506490463, "grad_norm": 201.13758850097656, "learning_rate": 5.548e-06, "loss": 33.0553, "step": 27740 }, { "epoch": 0.056056755697588445, "grad_norm": 782.3064575195312, "learning_rate": 5.550000000000001e-06, "loss": 26.2349, "step": 27750 }, { "epoch": 0.056076956330272265, "grad_norm": 640.4067993164062, "learning_rate": 5.552e-06, "loss": 30.5439, "step": 27760 }, { "epoch": 0.05609715696295608, "grad_norm": 15.612726211547852, "learning_rate": 5.5540000000000005e-06, "loss": 10.874, "step": 27770 }, { "epoch": 0.0561173575956399, "grad_norm": 514.4762573242188, "learning_rate": 5.556000000000001e-06, "loss": 21.3809, "step": 27780 }, { "epoch": 0.05613755822832371, "grad_norm": 238.87423706054688, "learning_rate": 5.558e-06, "loss": 14.5158, "step": 27790 }, { "epoch": 0.056157758861007524, "grad_norm": 221.79910278320312, "learning_rate": 5.560000000000001e-06, "loss": 30.4484, "step": 27800 }, { "epoch": 0.056177959493691344, "grad_norm": 451.6899108886719, "learning_rate": 5.562000000000001e-06, "loss": 18.7587, "step": 27810 }, { "epoch": 0.05619816012637516, "grad_norm": 189.8982391357422, "learning_rate": 5.5640000000000004e-06, "loss": 30.954, "step": 27820 }, { "epoch": 0.05621836075905897, "grad_norm": 1845.3804931640625, "learning_rate": 5.566000000000001e-06, "loss": 35.3793, "step": 27830 }, { "epoch": 0.05623856139174279, "grad_norm": 196.89622497558594, "learning_rate": 5.568e-06, "loss": 38.2027, "step": 27840 }, { "epoch": 0.0562587620244266, "grad_norm": 14.610710144042969, "learning_rate": 5.570000000000001e-06, "loss": 21.6815, "step": 27850 }, { "epoch": 0.05627896265711042, "grad_norm": 822.1419677734375, "learning_rate": 5.572000000000001e-06, "loss": 26.8906, "step": 27860 }, { "epoch": 0.056299163289794237, "grad_norm": 721.5017700195312, "learning_rate": 5.574e-06, "loss": 26.2475, "step": 27870 }, { "epoch": 0.05631936392247805, "grad_norm": 285.6672058105469, "learning_rate": 5.5760000000000005e-06, "loss": 17.6087, "step": 27880 }, { "epoch": 0.05633956455516187, "grad_norm": 549.2312622070312, "learning_rate": 5.578e-06, "loss": 25.9142, "step": 27890 }, { "epoch": 0.05635976518784568, "grad_norm": 359.40325927734375, "learning_rate": 5.580000000000001e-06, "loss": 23.0134, "step": 27900 }, { "epoch": 0.056379965820529496, "grad_norm": 310.5208435058594, "learning_rate": 5.582000000000001e-06, "loss": 25.5474, "step": 27910 }, { "epoch": 0.056400166453213316, "grad_norm": 298.7782287597656, "learning_rate": 5.584e-06, "loss": 21.7849, "step": 27920 }, { "epoch": 0.05642036708589713, "grad_norm": 388.15966796875, "learning_rate": 5.586e-06, "loss": 22.7005, "step": 27930 }, { "epoch": 0.05644056771858095, "grad_norm": 476.7641296386719, "learning_rate": 5.588e-06, "loss": 28.5089, "step": 27940 }, { "epoch": 0.05646076835126476, "grad_norm": 199.51100158691406, "learning_rate": 5.590000000000001e-06, "loss": 35.6762, "step": 27950 }, { "epoch": 0.056480968983948575, "grad_norm": 1052.156005859375, "learning_rate": 5.592000000000001e-06, "loss": 16.5051, "step": 27960 }, { "epoch": 0.056501169616632395, "grad_norm": 970.8603515625, "learning_rate": 5.594e-06, "loss": 42.4477, "step": 27970 }, { "epoch": 0.05652137024931621, "grad_norm": 60.73828887939453, "learning_rate": 5.596e-06, "loss": 28.0446, "step": 27980 }, { "epoch": 0.05654157088200002, "grad_norm": 277.9892272949219, "learning_rate": 5.5980000000000004e-06, "loss": 25.1444, "step": 27990 }, { "epoch": 0.05656177151468384, "grad_norm": 363.2925109863281, "learning_rate": 5.600000000000001e-06, "loss": 18.1469, "step": 28000 }, { "epoch": 0.056581972147367654, "grad_norm": 1076.0853271484375, "learning_rate": 5.602000000000001e-06, "loss": 52.504, "step": 28010 }, { "epoch": 0.056602172780051474, "grad_norm": 755.5100708007812, "learning_rate": 5.604000000000001e-06, "loss": 38.6469, "step": 28020 }, { "epoch": 0.05662237341273529, "grad_norm": 540.84033203125, "learning_rate": 5.606e-06, "loss": 24.8698, "step": 28030 }, { "epoch": 0.0566425740454191, "grad_norm": 660.2512817382812, "learning_rate": 5.608e-06, "loss": 30.3958, "step": 28040 }, { "epoch": 0.05666277467810292, "grad_norm": 417.4425048828125, "learning_rate": 5.610000000000001e-06, "loss": 26.1476, "step": 28050 }, { "epoch": 0.05668297531078673, "grad_norm": 597.3268432617188, "learning_rate": 5.612000000000001e-06, "loss": 20.3178, "step": 28060 }, { "epoch": 0.056703175943470546, "grad_norm": 592.311279296875, "learning_rate": 5.614000000000001e-06, "loss": 35.6682, "step": 28070 }, { "epoch": 0.056723376576154366, "grad_norm": 420.3757629394531, "learning_rate": 5.616e-06, "loss": 20.0005, "step": 28080 }, { "epoch": 0.05674357720883818, "grad_norm": 8.282299995422363, "learning_rate": 5.618e-06, "loss": 27.9005, "step": 28090 }, { "epoch": 0.056763777841522, "grad_norm": 512.5679321289062, "learning_rate": 5.620000000000001e-06, "loss": 28.1501, "step": 28100 }, { "epoch": 0.05678397847420581, "grad_norm": 176.4183807373047, "learning_rate": 5.6220000000000006e-06, "loss": 30.822, "step": 28110 }, { "epoch": 0.056804179106889625, "grad_norm": 391.804931640625, "learning_rate": 5.624000000000001e-06, "loss": 28.2099, "step": 28120 }, { "epoch": 0.056824379739573445, "grad_norm": 374.36712646484375, "learning_rate": 5.626e-06, "loss": 32.8329, "step": 28130 }, { "epoch": 0.05684458037225726, "grad_norm": 729.12158203125, "learning_rate": 5.628e-06, "loss": 26.7154, "step": 28140 }, { "epoch": 0.05686478100494107, "grad_norm": 1403.9803466796875, "learning_rate": 5.63e-06, "loss": 22.4779, "step": 28150 }, { "epoch": 0.05688498163762489, "grad_norm": 307.5541076660156, "learning_rate": 5.6320000000000005e-06, "loss": 15.9695, "step": 28160 }, { "epoch": 0.056905182270308705, "grad_norm": 62.129268646240234, "learning_rate": 5.634000000000001e-06, "loss": 26.3056, "step": 28170 }, { "epoch": 0.056925382902992525, "grad_norm": 1090.5374755859375, "learning_rate": 5.636000000000001e-06, "loss": 14.7648, "step": 28180 }, { "epoch": 0.05694558353567634, "grad_norm": 1130.8397216796875, "learning_rate": 5.638e-06, "loss": 29.261, "step": 28190 }, { "epoch": 0.05696578416836015, "grad_norm": 694.0996704101562, "learning_rate": 5.64e-06, "loss": 46.7732, "step": 28200 }, { "epoch": 0.05698598480104397, "grad_norm": 255.87850952148438, "learning_rate": 5.642000000000001e-06, "loss": 23.1361, "step": 28210 }, { "epoch": 0.057006185433727784, "grad_norm": 1019.9904174804688, "learning_rate": 5.6440000000000005e-06, "loss": 23.3889, "step": 28220 }, { "epoch": 0.0570263860664116, "grad_norm": 362.83929443359375, "learning_rate": 5.646000000000001e-06, "loss": 27.8546, "step": 28230 }, { "epoch": 0.05704658669909542, "grad_norm": 252.81332397460938, "learning_rate": 5.648e-06, "loss": 20.3522, "step": 28240 }, { "epoch": 0.05706678733177923, "grad_norm": 412.9476623535156, "learning_rate": 5.65e-06, "loss": 28.927, "step": 28250 }, { "epoch": 0.05708698796446305, "grad_norm": 492.25262451171875, "learning_rate": 5.652000000000001e-06, "loss": 24.6241, "step": 28260 }, { "epoch": 0.05710718859714686, "grad_norm": 1780.2408447265625, "learning_rate": 5.654e-06, "loss": 53.1644, "step": 28270 }, { "epoch": 0.057127389229830676, "grad_norm": 1033.7132568359375, "learning_rate": 5.6560000000000006e-06, "loss": 26.9228, "step": 28280 }, { "epoch": 0.057147589862514496, "grad_norm": 213.87942504882812, "learning_rate": 5.658e-06, "loss": 23.8045, "step": 28290 }, { "epoch": 0.05716779049519831, "grad_norm": 137.75071716308594, "learning_rate": 5.66e-06, "loss": 22.3189, "step": 28300 }, { "epoch": 0.05718799112788212, "grad_norm": 113.6178207397461, "learning_rate": 5.662000000000001e-06, "loss": 30.8098, "step": 28310 }, { "epoch": 0.05720819176056594, "grad_norm": 164.42572021484375, "learning_rate": 5.664e-06, "loss": 7.4192, "step": 28320 }, { "epoch": 0.057228392393249755, "grad_norm": 1245.4927978515625, "learning_rate": 5.6660000000000005e-06, "loss": 25.0924, "step": 28330 }, { "epoch": 0.057248593025933575, "grad_norm": 1074.7960205078125, "learning_rate": 5.668e-06, "loss": 31.0009, "step": 28340 }, { "epoch": 0.05726879365861739, "grad_norm": 379.931396484375, "learning_rate": 5.67e-06, "loss": 20.8451, "step": 28350 }, { "epoch": 0.0572889942913012, "grad_norm": 500.2812194824219, "learning_rate": 5.672000000000001e-06, "loss": 24.8504, "step": 28360 }, { "epoch": 0.05730919492398502, "grad_norm": 1326.780029296875, "learning_rate": 5.674e-06, "loss": 33.9426, "step": 28370 }, { "epoch": 0.057329395556668834, "grad_norm": 334.5964660644531, "learning_rate": 5.676e-06, "loss": 19.2081, "step": 28380 }, { "epoch": 0.05734959618935265, "grad_norm": 432.8052673339844, "learning_rate": 5.6780000000000005e-06, "loss": 14.1455, "step": 28390 }, { "epoch": 0.05736979682203647, "grad_norm": 398.933837890625, "learning_rate": 5.68e-06, "loss": 25.0682, "step": 28400 }, { "epoch": 0.05738999745472028, "grad_norm": 225.60511779785156, "learning_rate": 5.682000000000001e-06, "loss": 10.3286, "step": 28410 }, { "epoch": 0.0574101980874041, "grad_norm": 473.37274169921875, "learning_rate": 5.684000000000001e-06, "loss": 46.4121, "step": 28420 }, { "epoch": 0.05743039872008791, "grad_norm": 379.1407775878906, "learning_rate": 5.686e-06, "loss": 28.0916, "step": 28430 }, { "epoch": 0.057450599352771727, "grad_norm": 263.548828125, "learning_rate": 5.6880000000000004e-06, "loss": 15.1848, "step": 28440 }, { "epoch": 0.057470799985455547, "grad_norm": 196.68814086914062, "learning_rate": 5.69e-06, "loss": 18.4118, "step": 28450 }, { "epoch": 0.05749100061813936, "grad_norm": 345.37841796875, "learning_rate": 5.692000000000001e-06, "loss": 27.8511, "step": 28460 }, { "epoch": 0.05751120125082317, "grad_norm": 506.7391052246094, "learning_rate": 5.694000000000001e-06, "loss": 39.8623, "step": 28470 }, { "epoch": 0.05753140188350699, "grad_norm": 420.26214599609375, "learning_rate": 5.696e-06, "loss": 24.7865, "step": 28480 }, { "epoch": 0.057551602516190806, "grad_norm": 0.0, "learning_rate": 5.698e-06, "loss": 24.2103, "step": 28490 }, { "epoch": 0.057571803148874626, "grad_norm": 754.0606689453125, "learning_rate": 5.7e-06, "loss": 28.7258, "step": 28500 }, { "epoch": 0.05759200378155844, "grad_norm": 399.36328125, "learning_rate": 5.702000000000001e-06, "loss": 31.9229, "step": 28510 }, { "epoch": 0.05761220441424225, "grad_norm": 570.1122436523438, "learning_rate": 5.704000000000001e-06, "loss": 30.0433, "step": 28520 }, { "epoch": 0.05763240504692607, "grad_norm": 348.0009765625, "learning_rate": 5.706e-06, "loss": 22.6739, "step": 28530 }, { "epoch": 0.057652605679609885, "grad_norm": 48.64787292480469, "learning_rate": 5.708e-06, "loss": 27.7151, "step": 28540 }, { "epoch": 0.0576728063122937, "grad_norm": 294.9898986816406, "learning_rate": 5.71e-06, "loss": 31.1165, "step": 28550 }, { "epoch": 0.05769300694497752, "grad_norm": 37.3386116027832, "learning_rate": 5.7120000000000005e-06, "loss": 31.529, "step": 28560 }, { "epoch": 0.05771320757766133, "grad_norm": 1066.211669921875, "learning_rate": 5.714000000000001e-06, "loss": 27.5422, "step": 28570 }, { "epoch": 0.05773340821034515, "grad_norm": 183.66064453125, "learning_rate": 5.716000000000001e-06, "loss": 44.7641, "step": 28580 }, { "epoch": 0.057753608843028964, "grad_norm": 352.4803771972656, "learning_rate": 5.718e-06, "loss": 28.767, "step": 28590 }, { "epoch": 0.05777380947571278, "grad_norm": 391.5818176269531, "learning_rate": 5.72e-06, "loss": 29.0731, "step": 28600 }, { "epoch": 0.0577940101083966, "grad_norm": 1137.9501953125, "learning_rate": 5.722000000000001e-06, "loss": 22.2734, "step": 28610 }, { "epoch": 0.05781421074108041, "grad_norm": 1351.6412353515625, "learning_rate": 5.724000000000001e-06, "loss": 29.5629, "step": 28620 }, { "epoch": 0.05783441137376422, "grad_norm": 258.6549072265625, "learning_rate": 5.726000000000001e-06, "loss": 22.9389, "step": 28630 }, { "epoch": 0.05785461200644804, "grad_norm": 661.7940063476562, "learning_rate": 5.728e-06, "loss": 20.8142, "step": 28640 }, { "epoch": 0.057874812639131856, "grad_norm": 267.9925231933594, "learning_rate": 5.73e-06, "loss": 17.6126, "step": 28650 }, { "epoch": 0.057895013271815676, "grad_norm": 1019.2171630859375, "learning_rate": 5.732000000000001e-06, "loss": 17.9472, "step": 28660 }, { "epoch": 0.05791521390449949, "grad_norm": 1014.6627197265625, "learning_rate": 5.7340000000000005e-06, "loss": 36.2276, "step": 28670 }, { "epoch": 0.0579354145371833, "grad_norm": 274.5294494628906, "learning_rate": 5.736000000000001e-06, "loss": 14.5732, "step": 28680 }, { "epoch": 0.05795561516986712, "grad_norm": 872.638916015625, "learning_rate": 5.738e-06, "loss": 26.672, "step": 28690 }, { "epoch": 0.057975815802550935, "grad_norm": 299.70025634765625, "learning_rate": 5.74e-06, "loss": 47.1673, "step": 28700 }, { "epoch": 0.05799601643523475, "grad_norm": 427.842041015625, "learning_rate": 5.742000000000001e-06, "loss": 33.3254, "step": 28710 }, { "epoch": 0.05801621706791857, "grad_norm": 760.5520629882812, "learning_rate": 5.744e-06, "loss": 31.6733, "step": 28720 }, { "epoch": 0.05803641770060238, "grad_norm": 713.591552734375, "learning_rate": 5.7460000000000006e-06, "loss": 22.4928, "step": 28730 }, { "epoch": 0.0580566183332862, "grad_norm": 800.7971801757812, "learning_rate": 5.748e-06, "loss": 25.0551, "step": 28740 }, { "epoch": 0.058076818965970015, "grad_norm": 345.371826171875, "learning_rate": 5.75e-06, "loss": 27.9757, "step": 28750 }, { "epoch": 0.05809701959865383, "grad_norm": 607.2361450195312, "learning_rate": 5.752000000000001e-06, "loss": 35.9077, "step": 28760 }, { "epoch": 0.05811722023133765, "grad_norm": 549.5829467773438, "learning_rate": 5.754e-06, "loss": 30.5123, "step": 28770 }, { "epoch": 0.05813742086402146, "grad_norm": 1182.8603515625, "learning_rate": 5.7560000000000005e-06, "loss": 29.0875, "step": 28780 }, { "epoch": 0.058157621496705274, "grad_norm": 617.0576171875, "learning_rate": 5.758000000000001e-06, "loss": 24.269, "step": 28790 }, { "epoch": 0.058177822129389094, "grad_norm": 317.1587219238281, "learning_rate": 5.76e-06, "loss": 22.0337, "step": 28800 }, { "epoch": 0.05819802276207291, "grad_norm": 493.50311279296875, "learning_rate": 5.762000000000001e-06, "loss": 36.1685, "step": 28810 }, { "epoch": 0.05821822339475673, "grad_norm": 894.058837890625, "learning_rate": 5.764000000000001e-06, "loss": 33.8523, "step": 28820 }, { "epoch": 0.05823842402744054, "grad_norm": 114.72391510009766, "learning_rate": 5.766e-06, "loss": 27.9367, "step": 28830 }, { "epoch": 0.05825862466012435, "grad_norm": 566.5399169921875, "learning_rate": 5.7680000000000005e-06, "loss": 24.7258, "step": 28840 }, { "epoch": 0.05827882529280817, "grad_norm": 473.056396484375, "learning_rate": 5.77e-06, "loss": 20.1041, "step": 28850 }, { "epoch": 0.058299025925491986, "grad_norm": 318.22601318359375, "learning_rate": 5.772000000000001e-06, "loss": 15.9743, "step": 28860 }, { "epoch": 0.0583192265581758, "grad_norm": 528.20361328125, "learning_rate": 5.774000000000001e-06, "loss": 26.7739, "step": 28870 }, { "epoch": 0.05833942719085962, "grad_norm": 289.8639221191406, "learning_rate": 5.776e-06, "loss": 17.5716, "step": 28880 }, { "epoch": 0.05835962782354343, "grad_norm": 610.1603393554688, "learning_rate": 5.778e-06, "loss": 19.3612, "step": 28890 }, { "epoch": 0.05837982845622725, "grad_norm": 513.0499267578125, "learning_rate": 5.78e-06, "loss": 22.6479, "step": 28900 }, { "epoch": 0.058400029088911065, "grad_norm": 651.9042358398438, "learning_rate": 5.782000000000001e-06, "loss": 36.6599, "step": 28910 }, { "epoch": 0.05842022972159488, "grad_norm": 806.208740234375, "learning_rate": 5.784000000000001e-06, "loss": 21.1247, "step": 28920 }, { "epoch": 0.0584404303542787, "grad_norm": 488.8306884765625, "learning_rate": 5.786e-06, "loss": 20.5835, "step": 28930 }, { "epoch": 0.05846063098696251, "grad_norm": 274.180419921875, "learning_rate": 5.788e-06, "loss": 19.2631, "step": 28940 }, { "epoch": 0.058480831619646324, "grad_norm": 688.8336181640625, "learning_rate": 5.7900000000000005e-06, "loss": 40.8068, "step": 28950 }, { "epoch": 0.058501032252330144, "grad_norm": 472.7912902832031, "learning_rate": 5.792000000000001e-06, "loss": 60.7401, "step": 28960 }, { "epoch": 0.05852123288501396, "grad_norm": 250.30914306640625, "learning_rate": 5.794000000000001e-06, "loss": 21.1607, "step": 28970 }, { "epoch": 0.05854143351769778, "grad_norm": 152.17755126953125, "learning_rate": 5.796000000000001e-06, "loss": 37.4124, "step": 28980 }, { "epoch": 0.05856163415038159, "grad_norm": 401.095458984375, "learning_rate": 5.798e-06, "loss": 29.4962, "step": 28990 }, { "epoch": 0.058581834783065403, "grad_norm": 544.0389404296875, "learning_rate": 5.8e-06, "loss": 14.7385, "step": 29000 }, { "epoch": 0.058602035415749223, "grad_norm": 470.6712341308594, "learning_rate": 5.802000000000001e-06, "loss": 17.5305, "step": 29010 }, { "epoch": 0.05862223604843304, "grad_norm": 594.8572998046875, "learning_rate": 5.804000000000001e-06, "loss": 18.4892, "step": 29020 }, { "epoch": 0.05864243668111685, "grad_norm": 411.4410400390625, "learning_rate": 5.806000000000001e-06, "loss": 32.5558, "step": 29030 }, { "epoch": 0.05866263731380067, "grad_norm": 666.5599365234375, "learning_rate": 5.808e-06, "loss": 29.7149, "step": 29040 }, { "epoch": 0.05868283794648448, "grad_norm": 1009.18603515625, "learning_rate": 5.81e-06, "loss": 29.5219, "step": 29050 }, { "epoch": 0.0587030385791683, "grad_norm": 312.2843017578125, "learning_rate": 5.812000000000001e-06, "loss": 24.1587, "step": 29060 }, { "epoch": 0.058723239211852116, "grad_norm": 183.3750457763672, "learning_rate": 5.814000000000001e-06, "loss": 25.3271, "step": 29070 }, { "epoch": 0.05874343984453593, "grad_norm": 323.680419921875, "learning_rate": 5.816000000000001e-06, "loss": 22.3888, "step": 29080 }, { "epoch": 0.05876364047721975, "grad_norm": 306.0316162109375, "learning_rate": 5.818e-06, "loss": 23.2418, "step": 29090 }, { "epoch": 0.05878384110990356, "grad_norm": 492.77154541015625, "learning_rate": 5.82e-06, "loss": 27.0235, "step": 29100 }, { "epoch": 0.058804041742587375, "grad_norm": 844.0591430664062, "learning_rate": 5.822000000000001e-06, "loss": 23.0761, "step": 29110 }, { "epoch": 0.058824242375271195, "grad_norm": 551.2830810546875, "learning_rate": 5.8240000000000005e-06, "loss": 30.2424, "step": 29120 }, { "epoch": 0.05884444300795501, "grad_norm": 521.5521850585938, "learning_rate": 5.826000000000001e-06, "loss": 24.9471, "step": 29130 }, { "epoch": 0.05886464364063883, "grad_norm": 288.21112060546875, "learning_rate": 5.828e-06, "loss": 15.9028, "step": 29140 }, { "epoch": 0.05888484427332264, "grad_norm": 757.6616821289062, "learning_rate": 5.83e-06, "loss": 23.7516, "step": 29150 }, { "epoch": 0.058905044906006454, "grad_norm": 628.8690795898438, "learning_rate": 5.832000000000001e-06, "loss": 15.357, "step": 29160 }, { "epoch": 0.058925245538690274, "grad_norm": 128.40367126464844, "learning_rate": 5.834e-06, "loss": 19.509, "step": 29170 }, { "epoch": 0.05894544617137409, "grad_norm": 635.416259765625, "learning_rate": 5.8360000000000005e-06, "loss": 22.5968, "step": 29180 }, { "epoch": 0.0589656468040579, "grad_norm": 470.54034423828125, "learning_rate": 5.838000000000001e-06, "loss": 32.891, "step": 29190 }, { "epoch": 0.05898584743674172, "grad_norm": 869.0327758789062, "learning_rate": 5.84e-06, "loss": 33.6656, "step": 29200 }, { "epoch": 0.05900604806942553, "grad_norm": 693.0562133789062, "learning_rate": 5.842000000000001e-06, "loss": 27.4441, "step": 29210 }, { "epoch": 0.05902624870210935, "grad_norm": 577.6492919921875, "learning_rate": 5.844000000000001e-06, "loss": 17.9136, "step": 29220 }, { "epoch": 0.059046449334793166, "grad_norm": 654.5208129882812, "learning_rate": 5.8460000000000004e-06, "loss": 30.9071, "step": 29230 }, { "epoch": 0.05906664996747698, "grad_norm": 454.5464172363281, "learning_rate": 5.848000000000001e-06, "loss": 16.8235, "step": 29240 }, { "epoch": 0.0590868506001608, "grad_norm": 664.4609375, "learning_rate": 5.85e-06, "loss": 14.7665, "step": 29250 }, { "epoch": 0.05910705123284461, "grad_norm": 202.01416015625, "learning_rate": 5.852000000000001e-06, "loss": 15.4509, "step": 29260 }, { "epoch": 0.059127251865528425, "grad_norm": 292.506103515625, "learning_rate": 5.854000000000001e-06, "loss": 31.813, "step": 29270 }, { "epoch": 0.059147452498212245, "grad_norm": 258.66070556640625, "learning_rate": 5.856e-06, "loss": 20.3693, "step": 29280 }, { "epoch": 0.05916765313089606, "grad_norm": 467.66607666015625, "learning_rate": 5.8580000000000005e-06, "loss": 24.9156, "step": 29290 }, { "epoch": 0.05918785376357988, "grad_norm": 478.3450012207031, "learning_rate": 5.86e-06, "loss": 16.7802, "step": 29300 }, { "epoch": 0.05920805439626369, "grad_norm": 183.59373474121094, "learning_rate": 5.862000000000001e-06, "loss": 24.7533, "step": 29310 }, { "epoch": 0.059228255028947505, "grad_norm": 174.06256103515625, "learning_rate": 5.864000000000001e-06, "loss": 18.6498, "step": 29320 }, { "epoch": 0.059248455661631325, "grad_norm": 600.2014770507812, "learning_rate": 5.866e-06, "loss": 22.7986, "step": 29330 }, { "epoch": 0.05926865629431514, "grad_norm": 542.9607543945312, "learning_rate": 5.868e-06, "loss": 31.9283, "step": 29340 }, { "epoch": 0.05928885692699895, "grad_norm": 691.7803955078125, "learning_rate": 5.8700000000000005e-06, "loss": 26.8886, "step": 29350 }, { "epoch": 0.05930905755968277, "grad_norm": 85.30669403076172, "learning_rate": 5.872000000000001e-06, "loss": 34.907, "step": 29360 }, { "epoch": 0.059329258192366584, "grad_norm": 566.6461791992188, "learning_rate": 5.874000000000001e-06, "loss": 19.5393, "step": 29370 }, { "epoch": 0.059349458825050404, "grad_norm": 593.9346313476562, "learning_rate": 5.876000000000001e-06, "loss": 30.4646, "step": 29380 }, { "epoch": 0.05936965945773422, "grad_norm": 287.89715576171875, "learning_rate": 5.878e-06, "loss": 32.8259, "step": 29390 }, { "epoch": 0.05938986009041803, "grad_norm": 294.3763122558594, "learning_rate": 5.8800000000000005e-06, "loss": 38.1019, "step": 29400 }, { "epoch": 0.05941006072310185, "grad_norm": 315.1238098144531, "learning_rate": 5.882e-06, "loss": 27.3966, "step": 29410 }, { "epoch": 0.05943026135578566, "grad_norm": 270.62091064453125, "learning_rate": 5.884000000000001e-06, "loss": 25.031, "step": 29420 }, { "epoch": 0.059450461988469476, "grad_norm": 895.1936645507812, "learning_rate": 5.886000000000001e-06, "loss": 35.2896, "step": 29430 }, { "epoch": 0.059470662621153296, "grad_norm": 1002.9981079101562, "learning_rate": 5.888e-06, "loss": 23.6158, "step": 29440 }, { "epoch": 0.05949086325383711, "grad_norm": 1407.23876953125, "learning_rate": 5.89e-06, "loss": 29.6958, "step": 29450 }, { "epoch": 0.05951106388652093, "grad_norm": 439.24884033203125, "learning_rate": 5.892e-06, "loss": 28.9774, "step": 29460 }, { "epoch": 0.05953126451920474, "grad_norm": 423.88079833984375, "learning_rate": 5.894000000000001e-06, "loss": 21.4935, "step": 29470 }, { "epoch": 0.059551465151888555, "grad_norm": 906.2281494140625, "learning_rate": 5.896000000000001e-06, "loss": 26.5107, "step": 29480 }, { "epoch": 0.059571665784572375, "grad_norm": 526.7550048828125, "learning_rate": 5.898e-06, "loss": 14.423, "step": 29490 }, { "epoch": 0.05959186641725619, "grad_norm": 251.88986206054688, "learning_rate": 5.9e-06, "loss": 28.0242, "step": 29500 }, { "epoch": 0.05961206704994, "grad_norm": 336.952880859375, "learning_rate": 5.9019999999999996e-06, "loss": 28.3279, "step": 29510 }, { "epoch": 0.05963226768262382, "grad_norm": 618.0857543945312, "learning_rate": 5.9040000000000006e-06, "loss": 32.9698, "step": 29520 }, { "epoch": 0.059652468315307634, "grad_norm": 900.3553466796875, "learning_rate": 5.906000000000001e-06, "loss": 32.9725, "step": 29530 }, { "epoch": 0.059672668947991454, "grad_norm": 574.4266967773438, "learning_rate": 5.908e-06, "loss": 24.3543, "step": 29540 }, { "epoch": 0.05969286958067527, "grad_norm": 407.38482666015625, "learning_rate": 5.91e-06, "loss": 22.2853, "step": 29550 }, { "epoch": 0.05971307021335908, "grad_norm": 427.1022033691406, "learning_rate": 5.912e-06, "loss": 16.2504, "step": 29560 }, { "epoch": 0.0597332708460429, "grad_norm": 117.8338394165039, "learning_rate": 5.9140000000000005e-06, "loss": 19.4939, "step": 29570 }, { "epoch": 0.059753471478726713, "grad_norm": 780.7388916015625, "learning_rate": 5.916000000000001e-06, "loss": 36.2819, "step": 29580 }, { "epoch": 0.05977367211141053, "grad_norm": 1370.9173583984375, "learning_rate": 5.918000000000001e-06, "loss": 23.8863, "step": 29590 }, { "epoch": 0.05979387274409435, "grad_norm": 487.5325927734375, "learning_rate": 5.92e-06, "loss": 36.2767, "step": 29600 }, { "epoch": 0.05981407337677816, "grad_norm": 534.7539672851562, "learning_rate": 5.922e-06, "loss": 21.9024, "step": 29610 }, { "epoch": 0.05983427400946198, "grad_norm": 304.4093017578125, "learning_rate": 5.924000000000001e-06, "loss": 35.4952, "step": 29620 }, { "epoch": 0.05985447464214579, "grad_norm": 727.6774291992188, "learning_rate": 5.9260000000000005e-06, "loss": 45.8392, "step": 29630 }, { "epoch": 0.059874675274829606, "grad_norm": 239.7957763671875, "learning_rate": 5.928000000000001e-06, "loss": 16.7934, "step": 29640 }, { "epoch": 0.059894875907513426, "grad_norm": 315.3820495605469, "learning_rate": 5.93e-06, "loss": 23.4135, "step": 29650 }, { "epoch": 0.05991507654019724, "grad_norm": 520.0550537109375, "learning_rate": 5.932e-06, "loss": 20.385, "step": 29660 }, { "epoch": 0.05993527717288105, "grad_norm": 549.9379272460938, "learning_rate": 5.934000000000001e-06, "loss": 28.5402, "step": 29670 }, { "epoch": 0.05995547780556487, "grad_norm": 255.78863525390625, "learning_rate": 5.9360000000000004e-06, "loss": 22.6324, "step": 29680 }, { "epoch": 0.059975678438248685, "grad_norm": 328.67315673828125, "learning_rate": 5.9380000000000006e-06, "loss": 38.5843, "step": 29690 }, { "epoch": 0.059995879070932505, "grad_norm": 311.1226501464844, "learning_rate": 5.94e-06, "loss": 24.8409, "step": 29700 }, { "epoch": 0.06001607970361632, "grad_norm": 496.9958190917969, "learning_rate": 5.942e-06, "loss": 19.7917, "step": 29710 }, { "epoch": 0.06003628033630013, "grad_norm": 999.963623046875, "learning_rate": 5.944000000000001e-06, "loss": 24.5253, "step": 29720 }, { "epoch": 0.06005648096898395, "grad_norm": 1216.32470703125, "learning_rate": 5.946e-06, "loss": 29.5817, "step": 29730 }, { "epoch": 0.060076681601667764, "grad_norm": 298.50994873046875, "learning_rate": 5.9480000000000005e-06, "loss": 31.0966, "step": 29740 }, { "epoch": 0.06009688223435158, "grad_norm": 653.6453247070312, "learning_rate": 5.950000000000001e-06, "loss": 25.8964, "step": 29750 }, { "epoch": 0.0601170828670354, "grad_norm": 553.80078125, "learning_rate": 5.952e-06, "loss": 18.5255, "step": 29760 }, { "epoch": 0.06013728349971921, "grad_norm": 333.39764404296875, "learning_rate": 5.954000000000001e-06, "loss": 23.6987, "step": 29770 }, { "epoch": 0.06015748413240303, "grad_norm": 514.716064453125, "learning_rate": 5.956000000000001e-06, "loss": 28.1732, "step": 29780 }, { "epoch": 0.06017768476508684, "grad_norm": 538.5047607421875, "learning_rate": 5.958e-06, "loss": 29.9077, "step": 29790 }, { "epoch": 0.060197885397770656, "grad_norm": 434.8916931152344, "learning_rate": 5.9600000000000005e-06, "loss": 26.0934, "step": 29800 }, { "epoch": 0.060218086030454476, "grad_norm": 4665.8720703125, "learning_rate": 5.962e-06, "loss": 33.1499, "step": 29810 }, { "epoch": 0.06023828666313829, "grad_norm": 1068.4248046875, "learning_rate": 5.964000000000001e-06, "loss": 28.2813, "step": 29820 }, { "epoch": 0.0602584872958221, "grad_norm": 1261.244384765625, "learning_rate": 5.966000000000001e-06, "loss": 31.9004, "step": 29830 }, { "epoch": 0.06027868792850592, "grad_norm": 410.3675231933594, "learning_rate": 5.968e-06, "loss": 24.5451, "step": 29840 }, { "epoch": 0.060298888561189735, "grad_norm": 367.4793701171875, "learning_rate": 5.9700000000000004e-06, "loss": 20.9293, "step": 29850 }, { "epoch": 0.060319089193873555, "grad_norm": 807.4345092773438, "learning_rate": 5.972e-06, "loss": 36.9779, "step": 29860 }, { "epoch": 0.06033928982655737, "grad_norm": 1063.368896484375, "learning_rate": 5.974000000000001e-06, "loss": 31.3741, "step": 29870 }, { "epoch": 0.06035949045924118, "grad_norm": 320.45513916015625, "learning_rate": 5.976000000000001e-06, "loss": 14.5253, "step": 29880 }, { "epoch": 0.060379691091925, "grad_norm": 337.5420227050781, "learning_rate": 5.978e-06, "loss": 22.4189, "step": 29890 }, { "epoch": 0.060399891724608815, "grad_norm": 515.797607421875, "learning_rate": 5.98e-06, "loss": 28.1367, "step": 29900 }, { "epoch": 0.06042009235729263, "grad_norm": 351.6451721191406, "learning_rate": 5.982e-06, "loss": 16.7312, "step": 29910 }, { "epoch": 0.06044029298997645, "grad_norm": 226.80123901367188, "learning_rate": 5.984000000000001e-06, "loss": 19.0455, "step": 29920 }, { "epoch": 0.06046049362266026, "grad_norm": 372.4876403808594, "learning_rate": 5.986000000000001e-06, "loss": 45.9898, "step": 29930 }, { "epoch": 0.06048069425534408, "grad_norm": 748.814208984375, "learning_rate": 5.988e-06, "loss": 21.8749, "step": 29940 }, { "epoch": 0.060500894888027894, "grad_norm": 396.53118896484375, "learning_rate": 5.99e-06, "loss": 32.856, "step": 29950 }, { "epoch": 0.06052109552071171, "grad_norm": 220.9365692138672, "learning_rate": 5.992e-06, "loss": 24.7886, "step": 29960 }, { "epoch": 0.06054129615339553, "grad_norm": 139.06324768066406, "learning_rate": 5.9940000000000005e-06, "loss": 21.2531, "step": 29970 }, { "epoch": 0.06056149678607934, "grad_norm": 371.89813232421875, "learning_rate": 5.996000000000001e-06, "loss": 19.0003, "step": 29980 }, { "epoch": 0.06058169741876315, "grad_norm": 906.2913818359375, "learning_rate": 5.998000000000001e-06, "loss": 25.2816, "step": 29990 }, { "epoch": 0.06060189805144697, "grad_norm": 551.5726318359375, "learning_rate": 6e-06, "loss": 28.9025, "step": 30000 }, { "epoch": 0.060622098684130786, "grad_norm": 230.41876220703125, "learning_rate": 6.002e-06, "loss": 28.3692, "step": 30010 }, { "epoch": 0.060642299316814606, "grad_norm": 437.0330810546875, "learning_rate": 6.004000000000001e-06, "loss": 30.0274, "step": 30020 }, { "epoch": 0.06066249994949842, "grad_norm": 119.54045104980469, "learning_rate": 6.006000000000001e-06, "loss": 25.2697, "step": 30030 }, { "epoch": 0.06068270058218223, "grad_norm": 893.7681274414062, "learning_rate": 6.008000000000001e-06, "loss": 23.0002, "step": 30040 }, { "epoch": 0.06070290121486605, "grad_norm": 666.832275390625, "learning_rate": 6.01e-06, "loss": 21.5462, "step": 30050 }, { "epoch": 0.060723101847549865, "grad_norm": 364.53448486328125, "learning_rate": 6.012e-06, "loss": 31.678, "step": 30060 }, { "epoch": 0.06074330248023368, "grad_norm": 388.3205871582031, "learning_rate": 6.014000000000001e-06, "loss": 21.7389, "step": 30070 }, { "epoch": 0.0607635031129175, "grad_norm": 307.82366943359375, "learning_rate": 6.0160000000000005e-06, "loss": 23.023, "step": 30080 }, { "epoch": 0.06078370374560131, "grad_norm": 654.1393432617188, "learning_rate": 6.018000000000001e-06, "loss": 26.87, "step": 30090 }, { "epoch": 0.06080390437828513, "grad_norm": 377.3679504394531, "learning_rate": 6.02e-06, "loss": 26.1135, "step": 30100 }, { "epoch": 0.060824105010968944, "grad_norm": 1051.38720703125, "learning_rate": 6.022e-06, "loss": 31.1944, "step": 30110 }, { "epoch": 0.06084430564365276, "grad_norm": 176.04995727539062, "learning_rate": 6.024000000000001e-06, "loss": 17.6782, "step": 30120 }, { "epoch": 0.06086450627633658, "grad_norm": 398.30059814453125, "learning_rate": 6.026e-06, "loss": 36.5331, "step": 30130 }, { "epoch": 0.06088470690902039, "grad_norm": 442.10675048828125, "learning_rate": 6.0280000000000006e-06, "loss": 23.4857, "step": 30140 }, { "epoch": 0.060904907541704204, "grad_norm": 497.2967529296875, "learning_rate": 6.030000000000001e-06, "loss": 29.0141, "step": 30150 }, { "epoch": 0.060925108174388024, "grad_norm": 419.6001892089844, "learning_rate": 6.032e-06, "loss": 30.8048, "step": 30160 }, { "epoch": 0.06094530880707184, "grad_norm": 311.8349609375, "learning_rate": 6.034000000000001e-06, "loss": 21.1442, "step": 30170 }, { "epoch": 0.06096550943975566, "grad_norm": 1235.242431640625, "learning_rate": 6.036000000000001e-06, "loss": 35.1707, "step": 30180 }, { "epoch": 0.06098571007243947, "grad_norm": 920.2532348632812, "learning_rate": 6.0380000000000005e-06, "loss": 23.4196, "step": 30190 }, { "epoch": 0.06100591070512328, "grad_norm": 1023.5107421875, "learning_rate": 6.040000000000001e-06, "loss": 35.9319, "step": 30200 }, { "epoch": 0.0610261113378071, "grad_norm": 3.849163770675659, "learning_rate": 6.042e-06, "loss": 21.4135, "step": 30210 }, { "epoch": 0.061046311970490916, "grad_norm": 175.37745666503906, "learning_rate": 6.044000000000001e-06, "loss": 20.4032, "step": 30220 }, { "epoch": 0.06106651260317473, "grad_norm": 527.3060302734375, "learning_rate": 6.046000000000001e-06, "loss": 30.6245, "step": 30230 }, { "epoch": 0.06108671323585855, "grad_norm": 461.25970458984375, "learning_rate": 6.048e-06, "loss": 38.1651, "step": 30240 }, { "epoch": 0.06110691386854236, "grad_norm": 544.8977661132812, "learning_rate": 6.0500000000000005e-06, "loss": 13.7668, "step": 30250 }, { "epoch": 0.06112711450122618, "grad_norm": 530.8619384765625, "learning_rate": 6.052e-06, "loss": 19.7257, "step": 30260 }, { "epoch": 0.061147315133909995, "grad_norm": 527.7964477539062, "learning_rate": 6.054000000000001e-06, "loss": 32.3616, "step": 30270 }, { "epoch": 0.06116751576659381, "grad_norm": 1873.7520751953125, "learning_rate": 6.056000000000001e-06, "loss": 24.6919, "step": 30280 }, { "epoch": 0.06118771639927763, "grad_norm": 659.0830078125, "learning_rate": 6.058e-06, "loss": 23.7662, "step": 30290 }, { "epoch": 0.06120791703196144, "grad_norm": 380.70068359375, "learning_rate": 6.0600000000000004e-06, "loss": 35.3404, "step": 30300 }, { "epoch": 0.061228117664645254, "grad_norm": 907.1119384765625, "learning_rate": 6.062e-06, "loss": 47.9935, "step": 30310 }, { "epoch": 0.061248318297329074, "grad_norm": 571.4940795898438, "learning_rate": 6.064000000000001e-06, "loss": 26.3058, "step": 30320 }, { "epoch": 0.06126851893001289, "grad_norm": 566.0093383789062, "learning_rate": 6.066000000000001e-06, "loss": 14.3268, "step": 30330 }, { "epoch": 0.06128871956269671, "grad_norm": 629.2047119140625, "learning_rate": 6.068e-06, "loss": 34.8342, "step": 30340 }, { "epoch": 0.06130892019538052, "grad_norm": 177.77943420410156, "learning_rate": 6.07e-06, "loss": 38.0346, "step": 30350 }, { "epoch": 0.06132912082806433, "grad_norm": 706.6749877929688, "learning_rate": 6.0720000000000005e-06, "loss": 33.0479, "step": 30360 }, { "epoch": 0.06134932146074815, "grad_norm": 424.2728576660156, "learning_rate": 6.074000000000001e-06, "loss": 30.1139, "step": 30370 }, { "epoch": 0.061369522093431966, "grad_norm": 693.5032958984375, "learning_rate": 6.076000000000001e-06, "loss": 20.3498, "step": 30380 }, { "epoch": 0.06138972272611578, "grad_norm": 748.1320190429688, "learning_rate": 6.078000000000001e-06, "loss": 28.7987, "step": 30390 }, { "epoch": 0.0614099233587996, "grad_norm": 350.1024475097656, "learning_rate": 6.08e-06, "loss": 34.3633, "step": 30400 }, { "epoch": 0.06143012399148341, "grad_norm": 470.6011657714844, "learning_rate": 6.082e-06, "loss": 26.6126, "step": 30410 }, { "epoch": 0.061450324624167225, "grad_norm": 171.24346923828125, "learning_rate": 6.084000000000001e-06, "loss": 29.7189, "step": 30420 }, { "epoch": 0.061470525256851045, "grad_norm": 281.1046447753906, "learning_rate": 6.086000000000001e-06, "loss": 28.274, "step": 30430 }, { "epoch": 0.06149072588953486, "grad_norm": 729.0284423828125, "learning_rate": 6.088000000000001e-06, "loss": 37.4033, "step": 30440 }, { "epoch": 0.06151092652221868, "grad_norm": 1197.5462646484375, "learning_rate": 6.09e-06, "loss": 21.4996, "step": 30450 }, { "epoch": 0.06153112715490249, "grad_norm": 180.23403930664062, "learning_rate": 6.092e-06, "loss": 11.861, "step": 30460 }, { "epoch": 0.061551327787586305, "grad_norm": 551.1353149414062, "learning_rate": 6.094000000000001e-06, "loss": 22.2927, "step": 30470 }, { "epoch": 0.061571528420270125, "grad_norm": 967.9293823242188, "learning_rate": 6.096000000000001e-06, "loss": 21.8577, "step": 30480 }, { "epoch": 0.06159172905295394, "grad_norm": 401.4379577636719, "learning_rate": 6.098000000000001e-06, "loss": 38.6864, "step": 30490 }, { "epoch": 0.06161192968563775, "grad_norm": 584.6387939453125, "learning_rate": 6.1e-06, "loss": 32.8549, "step": 30500 }, { "epoch": 0.06163213031832157, "grad_norm": 384.2839660644531, "learning_rate": 6.102e-06, "loss": 15.1533, "step": 30510 }, { "epoch": 0.061652330951005384, "grad_norm": 302.5103454589844, "learning_rate": 6.104000000000001e-06, "loss": 17.0309, "step": 30520 }, { "epoch": 0.061672531583689204, "grad_norm": 477.59912109375, "learning_rate": 6.1060000000000005e-06, "loss": 51.6508, "step": 30530 }, { "epoch": 0.06169273221637302, "grad_norm": 848.4017333984375, "learning_rate": 6.108000000000001e-06, "loss": 43.8784, "step": 30540 }, { "epoch": 0.06171293284905683, "grad_norm": 580.9694213867188, "learning_rate": 6.110000000000001e-06, "loss": 22.3975, "step": 30550 }, { "epoch": 0.06173313348174065, "grad_norm": 70.8717041015625, "learning_rate": 6.112e-06, "loss": 20.8196, "step": 30560 }, { "epoch": 0.06175333411442446, "grad_norm": 200.7247772216797, "learning_rate": 6.114000000000001e-06, "loss": 14.8975, "step": 30570 }, { "epoch": 0.061773534747108276, "grad_norm": 68.50727844238281, "learning_rate": 6.116000000000001e-06, "loss": 18.8313, "step": 30580 }, { "epoch": 0.061793735379792096, "grad_norm": 365.78631591796875, "learning_rate": 6.1180000000000005e-06, "loss": 31.1022, "step": 30590 }, { "epoch": 0.06181393601247591, "grad_norm": 924.5538330078125, "learning_rate": 6.120000000000001e-06, "loss": 18.5757, "step": 30600 }, { "epoch": 0.06183413664515973, "grad_norm": 309.0931091308594, "learning_rate": 6.122e-06, "loss": 26.9238, "step": 30610 }, { "epoch": 0.06185433727784354, "grad_norm": 982.596923828125, "learning_rate": 6.124000000000001e-06, "loss": 32.0771, "step": 30620 }, { "epoch": 0.061874537910527355, "grad_norm": 107.05025482177734, "learning_rate": 6.126000000000001e-06, "loss": 22.885, "step": 30630 }, { "epoch": 0.061894738543211175, "grad_norm": 861.4071044921875, "learning_rate": 6.1280000000000005e-06, "loss": 24.9972, "step": 30640 }, { "epoch": 0.06191493917589499, "grad_norm": 552.7598876953125, "learning_rate": 6.130000000000001e-06, "loss": 14.113, "step": 30650 }, { "epoch": 0.0619351398085788, "grad_norm": 980.1431884765625, "learning_rate": 6.132e-06, "loss": 16.5971, "step": 30660 }, { "epoch": 0.06195534044126262, "grad_norm": 354.41943359375, "learning_rate": 6.134e-06, "loss": 23.506, "step": 30670 }, { "epoch": 0.061975541073946434, "grad_norm": 525.1093139648438, "learning_rate": 6.136000000000001e-06, "loss": 45.2582, "step": 30680 }, { "epoch": 0.061995741706630254, "grad_norm": 1747.66748046875, "learning_rate": 6.138e-06, "loss": 28.076, "step": 30690 }, { "epoch": 0.06201594233931407, "grad_norm": 133.58242797851562, "learning_rate": 6.1400000000000005e-06, "loss": 29.0578, "step": 30700 }, { "epoch": 0.06203614297199788, "grad_norm": 971.960205078125, "learning_rate": 6.142e-06, "loss": 23.5747, "step": 30710 }, { "epoch": 0.0620563436046817, "grad_norm": 390.8254699707031, "learning_rate": 6.144e-06, "loss": 14.7553, "step": 30720 }, { "epoch": 0.062076544237365514, "grad_norm": 931.520751953125, "learning_rate": 6.146000000000001e-06, "loss": 15.7332, "step": 30730 }, { "epoch": 0.06209674487004933, "grad_norm": 725.7781372070312, "learning_rate": 6.148e-06, "loss": 27.3765, "step": 30740 }, { "epoch": 0.06211694550273315, "grad_norm": 184.05690002441406, "learning_rate": 6.15e-06, "loss": 27.3351, "step": 30750 }, { "epoch": 0.06213714613541696, "grad_norm": 147.2608184814453, "learning_rate": 6.1520000000000006e-06, "loss": 14.9159, "step": 30760 }, { "epoch": 0.06215734676810078, "grad_norm": 605.9739379882812, "learning_rate": 6.154e-06, "loss": 27.2257, "step": 30770 }, { "epoch": 0.06217754740078459, "grad_norm": 607.0244750976562, "learning_rate": 6.156000000000001e-06, "loss": 21.6824, "step": 30780 }, { "epoch": 0.062197748033468406, "grad_norm": 247.36895751953125, "learning_rate": 6.158000000000001e-06, "loss": 33.5357, "step": 30790 }, { "epoch": 0.062217948666152226, "grad_norm": 854.2710571289062, "learning_rate": 6.16e-06, "loss": 26.6299, "step": 30800 }, { "epoch": 0.06223814929883604, "grad_norm": 443.6426696777344, "learning_rate": 6.1620000000000005e-06, "loss": 26.0815, "step": 30810 }, { "epoch": 0.06225834993151985, "grad_norm": 189.7944793701172, "learning_rate": 6.164e-06, "loss": 34.9507, "step": 30820 }, { "epoch": 0.06227855056420367, "grad_norm": 1154.957275390625, "learning_rate": 6.166000000000001e-06, "loss": 30.0567, "step": 30830 }, { "epoch": 0.062298751196887485, "grad_norm": 879.346435546875, "learning_rate": 6.168000000000001e-06, "loss": 19.3368, "step": 30840 }, { "epoch": 0.062318951829571305, "grad_norm": 118.4106674194336, "learning_rate": 6.17e-06, "loss": 20.9169, "step": 30850 }, { "epoch": 0.06233915246225512, "grad_norm": 302.1161193847656, "learning_rate": 6.172e-06, "loss": 27.4503, "step": 30860 }, { "epoch": 0.06235935309493893, "grad_norm": 300.6957702636719, "learning_rate": 6.174e-06, "loss": 20.6476, "step": 30870 }, { "epoch": 0.06237955372762275, "grad_norm": 860.6785888671875, "learning_rate": 6.176000000000001e-06, "loss": 50.3861, "step": 30880 }, { "epoch": 0.062399754360306564, "grad_norm": 398.2225036621094, "learning_rate": 6.178000000000001e-06, "loss": 15.2574, "step": 30890 }, { "epoch": 0.06241995499299038, "grad_norm": 388.7832336425781, "learning_rate": 6.18e-06, "loss": 33.1759, "step": 30900 }, { "epoch": 0.0624401556256742, "grad_norm": 1116.7479248046875, "learning_rate": 6.182e-06, "loss": 22.5325, "step": 30910 }, { "epoch": 0.06246035625835801, "grad_norm": 470.1541442871094, "learning_rate": 6.184e-06, "loss": 12.1148, "step": 30920 }, { "epoch": 0.06248055689104183, "grad_norm": 761.6276245117188, "learning_rate": 6.1860000000000006e-06, "loss": 28.2619, "step": 30930 }, { "epoch": 0.06250075752372564, "grad_norm": 312.6024475097656, "learning_rate": 6.188000000000001e-06, "loss": 25.9364, "step": 30940 }, { "epoch": 0.06252095815640946, "grad_norm": 79.20448303222656, "learning_rate": 6.190000000000001e-06, "loss": 30.2768, "step": 30950 }, { "epoch": 0.06254115878909328, "grad_norm": 838.1323852539062, "learning_rate": 6.192e-06, "loss": 19.8499, "step": 30960 }, { "epoch": 0.0625613594217771, "grad_norm": 205.89064025878906, "learning_rate": 6.194e-06, "loss": 31.2351, "step": 30970 }, { "epoch": 0.0625815600544609, "grad_norm": 652.1013793945312, "learning_rate": 6.196000000000001e-06, "loss": 29.7136, "step": 30980 }, { "epoch": 0.06260176068714472, "grad_norm": 473.4364318847656, "learning_rate": 6.198000000000001e-06, "loss": 15.5697, "step": 30990 }, { "epoch": 0.06262196131982854, "grad_norm": 803.982177734375, "learning_rate": 6.200000000000001e-06, "loss": 42.4467, "step": 31000 }, { "epoch": 0.06264216195251235, "grad_norm": 490.3743896484375, "learning_rate": 6.202e-06, "loss": 19.5958, "step": 31010 }, { "epoch": 0.06266236258519617, "grad_norm": 1799.8692626953125, "learning_rate": 6.204e-06, "loss": 29.0938, "step": 31020 }, { "epoch": 0.06268256321787999, "grad_norm": 395.7022705078125, "learning_rate": 6.206000000000001e-06, "loss": 14.4859, "step": 31030 }, { "epoch": 0.0627027638505638, "grad_norm": 780.4808349609375, "learning_rate": 6.2080000000000005e-06, "loss": 28.1231, "step": 31040 }, { "epoch": 0.06272296448324761, "grad_norm": 642.3253173828125, "learning_rate": 6.210000000000001e-06, "loss": 23.1921, "step": 31050 }, { "epoch": 0.06274316511593143, "grad_norm": 771.5405883789062, "learning_rate": 6.212e-06, "loss": 30.7311, "step": 31060 }, { "epoch": 0.06276336574861524, "grad_norm": 260.2312927246094, "learning_rate": 6.214e-06, "loss": 25.5861, "step": 31070 }, { "epoch": 0.06278356638129906, "grad_norm": 457.0404052734375, "learning_rate": 6.216000000000001e-06, "loss": 16.7189, "step": 31080 }, { "epoch": 0.06280376701398288, "grad_norm": 109.95401000976562, "learning_rate": 6.2180000000000004e-06, "loss": 19.15, "step": 31090 }, { "epoch": 0.06282396764666669, "grad_norm": 976.7431030273438, "learning_rate": 6.220000000000001e-06, "loss": 27.3597, "step": 31100 }, { "epoch": 0.0628441682793505, "grad_norm": 385.8603515625, "learning_rate": 6.222e-06, "loss": 27.3513, "step": 31110 }, { "epoch": 0.06286436891203433, "grad_norm": 527.4700927734375, "learning_rate": 6.224e-06, "loss": 29.6751, "step": 31120 }, { "epoch": 0.06288456954471815, "grad_norm": 1336.8475341796875, "learning_rate": 6.226000000000001e-06, "loss": 41.498, "step": 31130 }, { "epoch": 0.06290477017740195, "grad_norm": 305.7466735839844, "learning_rate": 6.228e-06, "loss": 25.7337, "step": 31140 }, { "epoch": 0.06292497081008577, "grad_norm": 138.3699951171875, "learning_rate": 6.2300000000000005e-06, "loss": 34.1218, "step": 31150 }, { "epoch": 0.06294517144276959, "grad_norm": 585.0834350585938, "learning_rate": 6.232000000000001e-06, "loss": 19.9407, "step": 31160 }, { "epoch": 0.0629653720754534, "grad_norm": 746.5474243164062, "learning_rate": 6.234e-06, "loss": 51.4981, "step": 31170 }, { "epoch": 0.06298557270813722, "grad_norm": 270.2710266113281, "learning_rate": 6.236000000000001e-06, "loss": 36.1254, "step": 31180 }, { "epoch": 0.06300577334082104, "grad_norm": 1371.7850341796875, "learning_rate": 6.238000000000001e-06, "loss": 31.2997, "step": 31190 }, { "epoch": 0.06302597397350485, "grad_norm": 673.2770385742188, "learning_rate": 6.24e-06, "loss": 36.9119, "step": 31200 }, { "epoch": 0.06304617460618867, "grad_norm": 301.5875549316406, "learning_rate": 6.2420000000000005e-06, "loss": 14.8065, "step": 31210 }, { "epoch": 0.06306637523887249, "grad_norm": 581.1113891601562, "learning_rate": 6.244e-06, "loss": 32.8933, "step": 31220 }, { "epoch": 0.06308657587155629, "grad_norm": 321.9029846191406, "learning_rate": 6.246000000000001e-06, "loss": 25.5634, "step": 31230 }, { "epoch": 0.06310677650424011, "grad_norm": 990.6182250976562, "learning_rate": 6.248000000000001e-06, "loss": 26.876, "step": 31240 }, { "epoch": 0.06312697713692393, "grad_norm": 425.1790466308594, "learning_rate": 6.25e-06, "loss": 25.7884, "step": 31250 }, { "epoch": 0.06314717776960774, "grad_norm": 337.3599853515625, "learning_rate": 6.2520000000000004e-06, "loss": 30.3455, "step": 31260 }, { "epoch": 0.06316737840229156, "grad_norm": 2128.223876953125, "learning_rate": 6.254e-06, "loss": 52.7797, "step": 31270 }, { "epoch": 0.06318757903497538, "grad_norm": 400.2874450683594, "learning_rate": 6.256000000000001e-06, "loss": 17.9244, "step": 31280 }, { "epoch": 0.0632077796676592, "grad_norm": 628.6130981445312, "learning_rate": 6.258000000000001e-06, "loss": 29.7181, "step": 31290 }, { "epoch": 0.063227980300343, "grad_norm": 393.5443115234375, "learning_rate": 6.26e-06, "loss": 17.6657, "step": 31300 }, { "epoch": 0.06324818093302682, "grad_norm": 5.618537425994873, "learning_rate": 6.262e-06, "loss": 31.0318, "step": 31310 }, { "epoch": 0.06326838156571064, "grad_norm": 641.4444580078125, "learning_rate": 6.264e-06, "loss": 38.701, "step": 31320 }, { "epoch": 0.06328858219839445, "grad_norm": 293.72271728515625, "learning_rate": 6.266000000000001e-06, "loss": 27.233, "step": 31330 }, { "epoch": 0.06330878283107827, "grad_norm": 391.3696594238281, "learning_rate": 6.268000000000001e-06, "loss": 19.8116, "step": 31340 }, { "epoch": 0.06332898346376209, "grad_norm": 362.88311767578125, "learning_rate": 6.27e-06, "loss": 37.2613, "step": 31350 }, { "epoch": 0.0633491840964459, "grad_norm": 417.72015380859375, "learning_rate": 6.272e-06, "loss": 23.4655, "step": 31360 }, { "epoch": 0.06336938472912972, "grad_norm": 864.805419921875, "learning_rate": 6.274e-06, "loss": 38.7416, "step": 31370 }, { "epoch": 0.06338958536181354, "grad_norm": 366.3962707519531, "learning_rate": 6.2760000000000006e-06, "loss": 28.7714, "step": 31380 }, { "epoch": 0.06340978599449734, "grad_norm": 299.3522033691406, "learning_rate": 6.278000000000001e-06, "loss": 26.9056, "step": 31390 }, { "epoch": 0.06342998662718116, "grad_norm": 617.9619750976562, "learning_rate": 6.280000000000001e-06, "loss": 18.5786, "step": 31400 }, { "epoch": 0.06345018725986498, "grad_norm": 506.8660888671875, "learning_rate": 6.282e-06, "loss": 24.2889, "step": 31410 }, { "epoch": 0.06347038789254879, "grad_norm": 244.0608367919922, "learning_rate": 6.284e-06, "loss": 21.0116, "step": 31420 }, { "epoch": 0.06349058852523261, "grad_norm": 389.8390808105469, "learning_rate": 6.286000000000001e-06, "loss": 27.5439, "step": 31430 }, { "epoch": 0.06351078915791643, "grad_norm": 474.32275390625, "learning_rate": 6.288000000000001e-06, "loss": 37.2891, "step": 31440 }, { "epoch": 0.06353098979060025, "grad_norm": 518.6574096679688, "learning_rate": 6.290000000000001e-06, "loss": 23.395, "step": 31450 }, { "epoch": 0.06355119042328405, "grad_norm": 961.990234375, "learning_rate": 6.292e-06, "loss": 26.7638, "step": 31460 }, { "epoch": 0.06357139105596787, "grad_norm": 654.8812255859375, "learning_rate": 6.294e-06, "loss": 33.6812, "step": 31470 }, { "epoch": 0.0635915916886517, "grad_norm": 669.5548706054688, "learning_rate": 6.296000000000001e-06, "loss": 29.9715, "step": 31480 }, { "epoch": 0.0636117923213355, "grad_norm": 642.5733032226562, "learning_rate": 6.2980000000000005e-06, "loss": 30.399, "step": 31490 }, { "epoch": 0.06363199295401932, "grad_norm": 1096.3792724609375, "learning_rate": 6.300000000000001e-06, "loss": 27.0487, "step": 31500 }, { "epoch": 0.06365219358670314, "grad_norm": 182.9304962158203, "learning_rate": 6.302e-06, "loss": 28.001, "step": 31510 }, { "epoch": 0.06367239421938695, "grad_norm": 481.4287414550781, "learning_rate": 6.304e-06, "loss": 14.7878, "step": 31520 }, { "epoch": 0.06369259485207077, "grad_norm": 332.9795227050781, "learning_rate": 6.306000000000001e-06, "loss": 33.3002, "step": 31530 }, { "epoch": 0.06371279548475459, "grad_norm": 222.80126953125, "learning_rate": 6.308e-06, "loss": 15.4948, "step": 31540 }, { "epoch": 0.06373299611743839, "grad_norm": 375.0700378417969, "learning_rate": 6.3100000000000006e-06, "loss": 22.6265, "step": 31550 }, { "epoch": 0.06375319675012221, "grad_norm": 736.5299682617188, "learning_rate": 6.312000000000001e-06, "loss": 30.0435, "step": 31560 }, { "epoch": 0.06377339738280603, "grad_norm": 557.098876953125, "learning_rate": 6.314e-06, "loss": 17.332, "step": 31570 }, { "epoch": 0.06379359801548984, "grad_norm": 1109.0279541015625, "learning_rate": 6.316000000000001e-06, "loss": 29.9884, "step": 31580 }, { "epoch": 0.06381379864817366, "grad_norm": 246.1799774169922, "learning_rate": 6.318000000000001e-06, "loss": 20.189, "step": 31590 }, { "epoch": 0.06383399928085748, "grad_norm": 131.45822143554688, "learning_rate": 6.3200000000000005e-06, "loss": 41.6542, "step": 31600 }, { "epoch": 0.0638541999135413, "grad_norm": 508.4758605957031, "learning_rate": 6.322000000000001e-06, "loss": 27.1823, "step": 31610 }, { "epoch": 0.0638744005462251, "grad_norm": 616.266357421875, "learning_rate": 6.324e-06, "loss": 15.2132, "step": 31620 }, { "epoch": 0.06389460117890892, "grad_norm": 694.1072998046875, "learning_rate": 6.326000000000001e-06, "loss": 20.8863, "step": 31630 }, { "epoch": 0.06391480181159274, "grad_norm": 476.1229553222656, "learning_rate": 6.328000000000001e-06, "loss": 21.2325, "step": 31640 }, { "epoch": 0.06393500244427655, "grad_norm": 584.6588134765625, "learning_rate": 6.33e-06, "loss": 24.3116, "step": 31650 }, { "epoch": 0.06395520307696037, "grad_norm": 637.4476318359375, "learning_rate": 6.3320000000000005e-06, "loss": 33.3525, "step": 31660 }, { "epoch": 0.06397540370964419, "grad_norm": 428.1219482421875, "learning_rate": 6.334e-06, "loss": 4.7495, "step": 31670 }, { "epoch": 0.063995604342328, "grad_norm": 2742.7109375, "learning_rate": 6.336000000000001e-06, "loss": 35.9556, "step": 31680 }, { "epoch": 0.06401580497501182, "grad_norm": 357.019287109375, "learning_rate": 6.338000000000001e-06, "loss": 24.6162, "step": 31690 }, { "epoch": 0.06403600560769564, "grad_norm": 378.7497863769531, "learning_rate": 6.34e-06, "loss": 28.2976, "step": 31700 }, { "epoch": 0.06405620624037944, "grad_norm": 616.9625854492188, "learning_rate": 6.3420000000000004e-06, "loss": 44.8492, "step": 31710 }, { "epoch": 0.06407640687306326, "grad_norm": 279.8904724121094, "learning_rate": 6.344e-06, "loss": 23.5007, "step": 31720 }, { "epoch": 0.06409660750574708, "grad_norm": 15.231189727783203, "learning_rate": 6.346000000000001e-06, "loss": 24.3203, "step": 31730 }, { "epoch": 0.06411680813843089, "grad_norm": 624.3145751953125, "learning_rate": 6.348000000000001e-06, "loss": 30.2905, "step": 31740 }, { "epoch": 0.06413700877111471, "grad_norm": 362.0500183105469, "learning_rate": 6.35e-06, "loss": 23.1855, "step": 31750 }, { "epoch": 0.06415720940379853, "grad_norm": 706.1433715820312, "learning_rate": 6.352e-06, "loss": 18.3678, "step": 31760 }, { "epoch": 0.06417741003648235, "grad_norm": 518.0941162109375, "learning_rate": 6.3540000000000005e-06, "loss": 43.9327, "step": 31770 }, { "epoch": 0.06419761066916616, "grad_norm": 395.8467102050781, "learning_rate": 6.356000000000001e-06, "loss": 42.6036, "step": 31780 }, { "epoch": 0.06421781130184998, "grad_norm": 283.4975891113281, "learning_rate": 6.358000000000001e-06, "loss": 19.784, "step": 31790 }, { "epoch": 0.0642380119345338, "grad_norm": 301.6070556640625, "learning_rate": 6.360000000000001e-06, "loss": 31.1704, "step": 31800 }, { "epoch": 0.0642582125672176, "grad_norm": 72.76226043701172, "learning_rate": 6.362e-06, "loss": 22.481, "step": 31810 }, { "epoch": 0.06427841319990142, "grad_norm": 1006.912353515625, "learning_rate": 6.364e-06, "loss": 21.1927, "step": 31820 }, { "epoch": 0.06429861383258524, "grad_norm": 550.6497802734375, "learning_rate": 6.366000000000001e-06, "loss": 46.4444, "step": 31830 }, { "epoch": 0.06431881446526905, "grad_norm": 496.5099792480469, "learning_rate": 6.368000000000001e-06, "loss": 8.5152, "step": 31840 }, { "epoch": 0.06433901509795287, "grad_norm": 731.7362670898438, "learning_rate": 6.370000000000001e-06, "loss": 31.8156, "step": 31850 }, { "epoch": 0.06435921573063669, "grad_norm": 705.6895751953125, "learning_rate": 6.372e-06, "loss": 31.1569, "step": 31860 }, { "epoch": 0.0643794163633205, "grad_norm": 208.77008056640625, "learning_rate": 6.374e-06, "loss": 31.0229, "step": 31870 }, { "epoch": 0.06439961699600431, "grad_norm": 233.83226013183594, "learning_rate": 6.376e-06, "loss": 17.1957, "step": 31880 }, { "epoch": 0.06441981762868813, "grad_norm": 683.5333251953125, "learning_rate": 6.378000000000001e-06, "loss": 17.1572, "step": 31890 }, { "epoch": 0.06444001826137194, "grad_norm": 219.8205108642578, "learning_rate": 6.380000000000001e-06, "loss": 23.2732, "step": 31900 }, { "epoch": 0.06446021889405576, "grad_norm": 104.92378997802734, "learning_rate": 6.382e-06, "loss": 22.6116, "step": 31910 }, { "epoch": 0.06448041952673958, "grad_norm": 391.84906005859375, "learning_rate": 6.384e-06, "loss": 26.0803, "step": 31920 }, { "epoch": 0.0645006201594234, "grad_norm": 468.4878845214844, "learning_rate": 6.386e-06, "loss": 26.0893, "step": 31930 }, { "epoch": 0.0645208207921072, "grad_norm": 485.8976745605469, "learning_rate": 6.3880000000000005e-06, "loss": 33.3459, "step": 31940 }, { "epoch": 0.06454102142479103, "grad_norm": 492.9491271972656, "learning_rate": 6.390000000000001e-06, "loss": 20.5961, "step": 31950 }, { "epoch": 0.06456122205747485, "grad_norm": 346.7865905761719, "learning_rate": 6.392000000000001e-06, "loss": 28.4751, "step": 31960 }, { "epoch": 0.06458142269015865, "grad_norm": 1298.2874755859375, "learning_rate": 6.394e-06, "loss": 31.6345, "step": 31970 }, { "epoch": 0.06460162332284247, "grad_norm": 636.1900024414062, "learning_rate": 6.396e-06, "loss": 32.0049, "step": 31980 }, { "epoch": 0.06462182395552629, "grad_norm": 366.033203125, "learning_rate": 6.398000000000001e-06, "loss": 45.5496, "step": 31990 }, { "epoch": 0.0646420245882101, "grad_norm": 1303.1556396484375, "learning_rate": 6.4000000000000006e-06, "loss": 27.9838, "step": 32000 }, { "epoch": 0.06466222522089392, "grad_norm": 295.89105224609375, "learning_rate": 6.402000000000001e-06, "loss": 23.4524, "step": 32010 }, { "epoch": 0.06468242585357774, "grad_norm": 1682.515625, "learning_rate": 6.404e-06, "loss": 42.7231, "step": 32020 }, { "epoch": 0.06470262648626154, "grad_norm": 2316.4873046875, "learning_rate": 6.406e-06, "loss": 34.1805, "step": 32030 }, { "epoch": 0.06472282711894536, "grad_norm": 405.20916748046875, "learning_rate": 6.408000000000001e-06, "loss": 26.5095, "step": 32040 }, { "epoch": 0.06474302775162918, "grad_norm": 343.07684326171875, "learning_rate": 6.4100000000000005e-06, "loss": 28.8234, "step": 32050 }, { "epoch": 0.06476322838431299, "grad_norm": 927.8602294921875, "learning_rate": 6.412000000000001e-06, "loss": 32.3761, "step": 32060 }, { "epoch": 0.06478342901699681, "grad_norm": 832.3965454101562, "learning_rate": 6.414e-06, "loss": 35.6177, "step": 32070 }, { "epoch": 0.06480362964968063, "grad_norm": 385.98297119140625, "learning_rate": 6.416e-06, "loss": 40.5609, "step": 32080 }, { "epoch": 0.06482383028236445, "grad_norm": 680.3007202148438, "learning_rate": 6.418000000000001e-06, "loss": 18.3833, "step": 32090 }, { "epoch": 0.06484403091504826, "grad_norm": 706.6058349609375, "learning_rate": 6.42e-06, "loss": 32.0983, "step": 32100 }, { "epoch": 0.06486423154773208, "grad_norm": 458.92181396484375, "learning_rate": 6.4220000000000005e-06, "loss": 25.9199, "step": 32110 }, { "epoch": 0.0648844321804159, "grad_norm": 312.37945556640625, "learning_rate": 6.424e-06, "loss": 17.622, "step": 32120 }, { "epoch": 0.0649046328130997, "grad_norm": 419.53485107421875, "learning_rate": 6.426e-06, "loss": 38.1022, "step": 32130 }, { "epoch": 0.06492483344578352, "grad_norm": 449.0639343261719, "learning_rate": 6.428000000000001e-06, "loss": 22.688, "step": 32140 }, { "epoch": 0.06494503407846734, "grad_norm": 484.6932678222656, "learning_rate": 6.43e-06, "loss": 17.4194, "step": 32150 }, { "epoch": 0.06496523471115115, "grad_norm": 705.7068481445312, "learning_rate": 6.432e-06, "loss": 28.7297, "step": 32160 }, { "epoch": 0.06498543534383497, "grad_norm": 824.8680419921875, "learning_rate": 6.4340000000000006e-06, "loss": 32.2345, "step": 32170 }, { "epoch": 0.06500563597651879, "grad_norm": 481.2254333496094, "learning_rate": 6.436e-06, "loss": 20.6795, "step": 32180 }, { "epoch": 0.0650258366092026, "grad_norm": 523.8160400390625, "learning_rate": 6.438000000000001e-06, "loss": 21.3793, "step": 32190 }, { "epoch": 0.06504603724188641, "grad_norm": 1548.1605224609375, "learning_rate": 6.440000000000001e-06, "loss": 26.5394, "step": 32200 }, { "epoch": 0.06506623787457023, "grad_norm": 825.455078125, "learning_rate": 6.442e-06, "loss": 23.6385, "step": 32210 }, { "epoch": 0.06508643850725404, "grad_norm": 539.9469604492188, "learning_rate": 6.4440000000000005e-06, "loss": 26.1361, "step": 32220 }, { "epoch": 0.06510663913993786, "grad_norm": 320.2283020019531, "learning_rate": 6.446e-06, "loss": 26.6205, "step": 32230 }, { "epoch": 0.06512683977262168, "grad_norm": 509.67828369140625, "learning_rate": 6.448000000000001e-06, "loss": 20.657, "step": 32240 }, { "epoch": 0.0651470404053055, "grad_norm": 1097.630615234375, "learning_rate": 6.450000000000001e-06, "loss": 33.5726, "step": 32250 }, { "epoch": 0.0651672410379893, "grad_norm": 1908.3387451171875, "learning_rate": 6.452e-06, "loss": 33.8112, "step": 32260 }, { "epoch": 0.06518744167067313, "grad_norm": 439.6868591308594, "learning_rate": 6.454e-06, "loss": 30.5285, "step": 32270 }, { "epoch": 0.06520764230335695, "grad_norm": 438.2578430175781, "learning_rate": 6.456e-06, "loss": 37.5442, "step": 32280 }, { "epoch": 0.06522784293604075, "grad_norm": 481.5923156738281, "learning_rate": 6.458000000000001e-06, "loss": 28.0664, "step": 32290 }, { "epoch": 0.06524804356872457, "grad_norm": 774.6879272460938, "learning_rate": 6.460000000000001e-06, "loss": 24.0665, "step": 32300 }, { "epoch": 0.06526824420140839, "grad_norm": 291.2333679199219, "learning_rate": 6.462e-06, "loss": 18.9364, "step": 32310 }, { "epoch": 0.0652884448340922, "grad_norm": 1116.8297119140625, "learning_rate": 6.464e-06, "loss": 22.6748, "step": 32320 }, { "epoch": 0.06530864546677602, "grad_norm": 247.5875244140625, "learning_rate": 6.4660000000000004e-06, "loss": 29.2354, "step": 32330 }, { "epoch": 0.06532884609945984, "grad_norm": 364.7977600097656, "learning_rate": 6.468000000000001e-06, "loss": 29.4752, "step": 32340 }, { "epoch": 0.06534904673214365, "grad_norm": 442.0473327636719, "learning_rate": 6.470000000000001e-06, "loss": 22.6673, "step": 32350 }, { "epoch": 0.06536924736482747, "grad_norm": 288.97265625, "learning_rate": 6.472000000000001e-06, "loss": 20.4483, "step": 32360 }, { "epoch": 0.06538944799751129, "grad_norm": 332.4429626464844, "learning_rate": 6.474e-06, "loss": 45.8978, "step": 32370 }, { "epoch": 0.06540964863019509, "grad_norm": 511.8432922363281, "learning_rate": 6.476e-06, "loss": 29.034, "step": 32380 }, { "epoch": 0.06542984926287891, "grad_norm": 433.4559631347656, "learning_rate": 6.478000000000001e-06, "loss": 31.5313, "step": 32390 }, { "epoch": 0.06545004989556273, "grad_norm": 283.2999572753906, "learning_rate": 6.480000000000001e-06, "loss": 14.6274, "step": 32400 }, { "epoch": 0.06547025052824655, "grad_norm": 1162.132080078125, "learning_rate": 6.482000000000001e-06, "loss": 40.5958, "step": 32410 }, { "epoch": 0.06549045116093036, "grad_norm": 344.6202392578125, "learning_rate": 6.484e-06, "loss": 28.4084, "step": 32420 }, { "epoch": 0.06551065179361418, "grad_norm": 27.689971923828125, "learning_rate": 6.486e-06, "loss": 31.6779, "step": 32430 }, { "epoch": 0.065530852426298, "grad_norm": 364.23773193359375, "learning_rate": 6.488000000000001e-06, "loss": 21.4971, "step": 32440 }, { "epoch": 0.0655510530589818, "grad_norm": 1166.5234375, "learning_rate": 6.4900000000000005e-06, "loss": 37.5931, "step": 32450 }, { "epoch": 0.06557125369166562, "grad_norm": 428.5572204589844, "learning_rate": 6.492000000000001e-06, "loss": 27.2606, "step": 32460 }, { "epoch": 0.06559145432434944, "grad_norm": 1008.4483032226562, "learning_rate": 6.494e-06, "loss": 18.103, "step": 32470 }, { "epoch": 0.06561165495703325, "grad_norm": 712.0404052734375, "learning_rate": 6.496e-06, "loss": 36.1967, "step": 32480 }, { "epoch": 0.06563185558971707, "grad_norm": 345.80810546875, "learning_rate": 6.498000000000001e-06, "loss": 28.1766, "step": 32490 }, { "epoch": 0.06565205622240089, "grad_norm": 260.66192626953125, "learning_rate": 6.5000000000000004e-06, "loss": 9.4512, "step": 32500 }, { "epoch": 0.0656722568550847, "grad_norm": 346.3865051269531, "learning_rate": 6.502000000000001e-06, "loss": 43.6116, "step": 32510 }, { "epoch": 0.06569245748776852, "grad_norm": 260.44671630859375, "learning_rate": 6.504e-06, "loss": 19.4687, "step": 32520 }, { "epoch": 0.06571265812045234, "grad_norm": 568.5048828125, "learning_rate": 6.506e-06, "loss": 28.7252, "step": 32530 }, { "epoch": 0.06573285875313614, "grad_norm": 391.73309326171875, "learning_rate": 6.508000000000001e-06, "loss": 38.2764, "step": 32540 }, { "epoch": 0.06575305938581996, "grad_norm": 367.7117004394531, "learning_rate": 6.51e-06, "loss": 48.0191, "step": 32550 }, { "epoch": 0.06577326001850378, "grad_norm": 991.9547119140625, "learning_rate": 6.5120000000000005e-06, "loss": 50.86, "step": 32560 }, { "epoch": 0.0657934606511876, "grad_norm": 620.3666381835938, "learning_rate": 6.514000000000001e-06, "loss": 24.1156, "step": 32570 }, { "epoch": 0.06581366128387141, "grad_norm": 265.3491516113281, "learning_rate": 6.516e-06, "loss": 31.6695, "step": 32580 }, { "epoch": 0.06583386191655523, "grad_norm": 793.039306640625, "learning_rate": 6.518000000000001e-06, "loss": 42.4639, "step": 32590 }, { "epoch": 0.06585406254923905, "grad_norm": 682.8428955078125, "learning_rate": 6.520000000000001e-06, "loss": 34.3652, "step": 32600 }, { "epoch": 0.06587426318192285, "grad_norm": 316.3118896484375, "learning_rate": 6.522e-06, "loss": 30.0757, "step": 32610 }, { "epoch": 0.06589446381460667, "grad_norm": 754.0253295898438, "learning_rate": 6.5240000000000006e-06, "loss": 21.8935, "step": 32620 }, { "epoch": 0.0659146644472905, "grad_norm": 1146.3603515625, "learning_rate": 6.526e-06, "loss": 31.3927, "step": 32630 }, { "epoch": 0.0659348650799743, "grad_norm": 555.3991088867188, "learning_rate": 6.528000000000001e-06, "loss": 21.5758, "step": 32640 }, { "epoch": 0.06595506571265812, "grad_norm": 314.7017517089844, "learning_rate": 6.530000000000001e-06, "loss": 38.0184, "step": 32650 }, { "epoch": 0.06597526634534194, "grad_norm": 259.7373962402344, "learning_rate": 6.532e-06, "loss": 29.7005, "step": 32660 }, { "epoch": 0.06599546697802575, "grad_norm": 671.1743774414062, "learning_rate": 6.5340000000000005e-06, "loss": 23.3408, "step": 32670 }, { "epoch": 0.06601566761070957, "grad_norm": 899.598876953125, "learning_rate": 6.536e-06, "loss": 32.6499, "step": 32680 }, { "epoch": 0.06603586824339339, "grad_norm": 207.99526977539062, "learning_rate": 6.538000000000001e-06, "loss": 40.8453, "step": 32690 }, { "epoch": 0.06605606887607719, "grad_norm": 759.3076782226562, "learning_rate": 6.540000000000001e-06, "loss": 32.5255, "step": 32700 }, { "epoch": 0.06607626950876101, "grad_norm": 131.973388671875, "learning_rate": 6.542e-06, "loss": 29.4312, "step": 32710 }, { "epoch": 0.06609647014144483, "grad_norm": 171.9645233154297, "learning_rate": 6.544e-06, "loss": 19.8939, "step": 32720 }, { "epoch": 0.06611667077412865, "grad_norm": 578.665283203125, "learning_rate": 6.5460000000000005e-06, "loss": 29.4161, "step": 32730 }, { "epoch": 0.06613687140681246, "grad_norm": 300.7922668457031, "learning_rate": 6.548000000000001e-06, "loss": 18.9477, "step": 32740 }, { "epoch": 0.06615707203949628, "grad_norm": 429.305419921875, "learning_rate": 6.550000000000001e-06, "loss": 27.5192, "step": 32750 }, { "epoch": 0.0661772726721801, "grad_norm": 475.9083251953125, "learning_rate": 6.552000000000001e-06, "loss": 29.6226, "step": 32760 }, { "epoch": 0.0661974733048639, "grad_norm": 243.16636657714844, "learning_rate": 6.554e-06, "loss": 29.2542, "step": 32770 }, { "epoch": 0.06621767393754772, "grad_norm": 330.93707275390625, "learning_rate": 6.556e-06, "loss": 25.9672, "step": 32780 }, { "epoch": 0.06623787457023154, "grad_norm": 633.3106079101562, "learning_rate": 6.558000000000001e-06, "loss": 16.0552, "step": 32790 }, { "epoch": 0.06625807520291535, "grad_norm": 676.6880493164062, "learning_rate": 6.560000000000001e-06, "loss": 25.8537, "step": 32800 }, { "epoch": 0.06627827583559917, "grad_norm": 398.6380310058594, "learning_rate": 6.562000000000001e-06, "loss": 23.9169, "step": 32810 }, { "epoch": 0.06629847646828299, "grad_norm": 1161.01171875, "learning_rate": 6.564e-06, "loss": 26.0018, "step": 32820 }, { "epoch": 0.0663186771009668, "grad_norm": 528.7507934570312, "learning_rate": 6.566e-06, "loss": 33.0478, "step": 32830 }, { "epoch": 0.06633887773365062, "grad_norm": 517.101806640625, "learning_rate": 6.568000000000001e-06, "loss": 16.322, "step": 32840 }, { "epoch": 0.06635907836633444, "grad_norm": 289.5611572265625, "learning_rate": 6.570000000000001e-06, "loss": 16.0017, "step": 32850 }, { "epoch": 0.06637927899901824, "grad_norm": 819.7901611328125, "learning_rate": 6.572000000000001e-06, "loss": 18.3419, "step": 32860 }, { "epoch": 0.06639947963170206, "grad_norm": 297.51336669921875, "learning_rate": 6.574e-06, "loss": 30.9874, "step": 32870 }, { "epoch": 0.06641968026438588, "grad_norm": 402.6764221191406, "learning_rate": 6.576e-06, "loss": 19.8634, "step": 32880 }, { "epoch": 0.0664398808970697, "grad_norm": 232.97459411621094, "learning_rate": 6.578000000000001e-06, "loss": 22.6124, "step": 32890 }, { "epoch": 0.06646008152975351, "grad_norm": 69.75105285644531, "learning_rate": 6.5800000000000005e-06, "loss": 17.1709, "step": 32900 }, { "epoch": 0.06648028216243733, "grad_norm": 387.6180725097656, "learning_rate": 6.582000000000001e-06, "loss": 19.689, "step": 32910 }, { "epoch": 0.06650048279512115, "grad_norm": 84.04486083984375, "learning_rate": 6.584e-06, "loss": 20.6682, "step": 32920 }, { "epoch": 0.06652068342780496, "grad_norm": 775.0733642578125, "learning_rate": 6.586e-06, "loss": 23.5076, "step": 32930 }, { "epoch": 0.06654088406048878, "grad_norm": 890.520751953125, "learning_rate": 6.588000000000001e-06, "loss": 29.0297, "step": 32940 }, { "epoch": 0.0665610846931726, "grad_norm": 527.3336791992188, "learning_rate": 6.5900000000000004e-06, "loss": 14.0379, "step": 32950 }, { "epoch": 0.0665812853258564, "grad_norm": 458.6322326660156, "learning_rate": 6.592000000000001e-06, "loss": 19.9573, "step": 32960 }, { "epoch": 0.06660148595854022, "grad_norm": 520.3087158203125, "learning_rate": 6.594000000000001e-06, "loss": 25.0068, "step": 32970 }, { "epoch": 0.06662168659122404, "grad_norm": 451.0412902832031, "learning_rate": 6.596e-06, "loss": 19.663, "step": 32980 }, { "epoch": 0.06664188722390785, "grad_norm": 375.05084228515625, "learning_rate": 6.598000000000001e-06, "loss": 19.3326, "step": 32990 }, { "epoch": 0.06666208785659167, "grad_norm": 502.0182189941406, "learning_rate": 6.600000000000001e-06, "loss": 33.7013, "step": 33000 }, { "epoch": 0.06668228848927549, "grad_norm": 208.86871337890625, "learning_rate": 6.6020000000000005e-06, "loss": 22.5661, "step": 33010 }, { "epoch": 0.0667024891219593, "grad_norm": 312.8105163574219, "learning_rate": 6.604000000000001e-06, "loss": 18.6504, "step": 33020 }, { "epoch": 0.06672268975464311, "grad_norm": 385.7571716308594, "learning_rate": 6.606e-06, "loss": 18.8472, "step": 33030 }, { "epoch": 0.06674289038732693, "grad_norm": 104.6175765991211, "learning_rate": 6.608000000000001e-06, "loss": 42.2416, "step": 33040 }, { "epoch": 0.06676309102001075, "grad_norm": 884.9609375, "learning_rate": 6.610000000000001e-06, "loss": 31.9267, "step": 33050 }, { "epoch": 0.06678329165269456, "grad_norm": 300.7400817871094, "learning_rate": 6.612e-06, "loss": 14.5247, "step": 33060 }, { "epoch": 0.06680349228537838, "grad_norm": 644.3165283203125, "learning_rate": 6.6140000000000005e-06, "loss": 17.6598, "step": 33070 }, { "epoch": 0.0668236929180622, "grad_norm": 379.55096435546875, "learning_rate": 6.616e-06, "loss": 22.8861, "step": 33080 }, { "epoch": 0.066843893550746, "grad_norm": 1268.85888671875, "learning_rate": 6.618000000000001e-06, "loss": 33.9961, "step": 33090 }, { "epoch": 0.06686409418342983, "grad_norm": 794.061767578125, "learning_rate": 6.620000000000001e-06, "loss": 17.8204, "step": 33100 }, { "epoch": 0.06688429481611365, "grad_norm": 170.29322814941406, "learning_rate": 6.622e-06, "loss": 33.9078, "step": 33110 }, { "epoch": 0.06690449544879745, "grad_norm": 742.6322021484375, "learning_rate": 6.6240000000000004e-06, "loss": 17.3571, "step": 33120 }, { "epoch": 0.06692469608148127, "grad_norm": 141.3765869140625, "learning_rate": 6.626000000000001e-06, "loss": 28.4878, "step": 33130 }, { "epoch": 0.06694489671416509, "grad_norm": 505.18768310546875, "learning_rate": 6.628e-06, "loss": 23.0358, "step": 33140 }, { "epoch": 0.0669650973468489, "grad_norm": 922.85009765625, "learning_rate": 6.630000000000001e-06, "loss": 37.6132, "step": 33150 }, { "epoch": 0.06698529797953272, "grad_norm": 694.0914916992188, "learning_rate": 6.632000000000001e-06, "loss": 21.8531, "step": 33160 }, { "epoch": 0.06700549861221654, "grad_norm": 326.4519958496094, "learning_rate": 6.634e-06, "loss": 26.4938, "step": 33170 }, { "epoch": 0.06702569924490034, "grad_norm": 842.4217529296875, "learning_rate": 6.6360000000000005e-06, "loss": 38.9571, "step": 33180 }, { "epoch": 0.06704589987758416, "grad_norm": 392.56304931640625, "learning_rate": 6.638e-06, "loss": 47.6006, "step": 33190 }, { "epoch": 0.06706610051026798, "grad_norm": 550.9373779296875, "learning_rate": 6.640000000000001e-06, "loss": 23.8491, "step": 33200 }, { "epoch": 0.0670863011429518, "grad_norm": 589.4515991210938, "learning_rate": 6.642000000000001e-06, "loss": 32.0475, "step": 33210 }, { "epoch": 0.06710650177563561, "grad_norm": 214.31448364257812, "learning_rate": 6.644e-06, "loss": 24.7882, "step": 33220 }, { "epoch": 0.06712670240831943, "grad_norm": 458.9374694824219, "learning_rate": 6.646e-06, "loss": 29.4549, "step": 33230 }, { "epoch": 0.06714690304100325, "grad_norm": 716.244140625, "learning_rate": 6.648e-06, "loss": 29.5145, "step": 33240 }, { "epoch": 0.06716710367368706, "grad_norm": 407.79412841796875, "learning_rate": 6.650000000000001e-06, "loss": 31.9646, "step": 33250 }, { "epoch": 0.06718730430637088, "grad_norm": 821.392333984375, "learning_rate": 6.652000000000001e-06, "loss": 27.7069, "step": 33260 }, { "epoch": 0.0672075049390547, "grad_norm": 598.4210205078125, "learning_rate": 6.654e-06, "loss": 28.4975, "step": 33270 }, { "epoch": 0.0672277055717385, "grad_norm": 488.873046875, "learning_rate": 6.656e-06, "loss": 45.5605, "step": 33280 }, { "epoch": 0.06724790620442232, "grad_norm": 1378.087646484375, "learning_rate": 6.658e-06, "loss": 27.3599, "step": 33290 }, { "epoch": 0.06726810683710614, "grad_norm": 251.3562774658203, "learning_rate": 6.660000000000001e-06, "loss": 17.7338, "step": 33300 }, { "epoch": 0.06728830746978995, "grad_norm": 486.3319091796875, "learning_rate": 6.662000000000001e-06, "loss": 15.1403, "step": 33310 }, { "epoch": 0.06730850810247377, "grad_norm": 283.7904052734375, "learning_rate": 6.664e-06, "loss": 26.435, "step": 33320 }, { "epoch": 0.06732870873515759, "grad_norm": 448.2616882324219, "learning_rate": 6.666e-06, "loss": 26.329, "step": 33330 }, { "epoch": 0.0673489093678414, "grad_norm": 294.36798095703125, "learning_rate": 6.668e-06, "loss": 27.094, "step": 33340 }, { "epoch": 0.06736911000052521, "grad_norm": 496.7976989746094, "learning_rate": 6.6700000000000005e-06, "loss": 18.7006, "step": 33350 }, { "epoch": 0.06738931063320903, "grad_norm": 505.17828369140625, "learning_rate": 6.672000000000001e-06, "loss": 22.3869, "step": 33360 }, { "epoch": 0.06740951126589285, "grad_norm": 1472.1234130859375, "learning_rate": 6.674000000000001e-06, "loss": 20.5633, "step": 33370 }, { "epoch": 0.06742971189857666, "grad_norm": 226.0415802001953, "learning_rate": 6.676e-06, "loss": 33.7317, "step": 33380 }, { "epoch": 0.06744991253126048, "grad_norm": 208.1175079345703, "learning_rate": 6.678e-06, "loss": 26.1456, "step": 33390 }, { "epoch": 0.0674701131639443, "grad_norm": 288.8869323730469, "learning_rate": 6.680000000000001e-06, "loss": 23.2932, "step": 33400 }, { "epoch": 0.0674903137966281, "grad_norm": 67.0960922241211, "learning_rate": 6.6820000000000006e-06, "loss": 26.3463, "step": 33410 }, { "epoch": 0.06751051442931193, "grad_norm": 340.869140625, "learning_rate": 6.684000000000001e-06, "loss": 21.618, "step": 33420 }, { "epoch": 0.06753071506199575, "grad_norm": 330.3170471191406, "learning_rate": 6.686e-06, "loss": 28.7773, "step": 33430 }, { "epoch": 0.06755091569467955, "grad_norm": 529.8251953125, "learning_rate": 6.688e-06, "loss": 21.6966, "step": 33440 }, { "epoch": 0.06757111632736337, "grad_norm": 46.65123748779297, "learning_rate": 6.690000000000001e-06, "loss": 31.1109, "step": 33450 }, { "epoch": 0.06759131696004719, "grad_norm": 809.0994873046875, "learning_rate": 6.6920000000000005e-06, "loss": 24.3731, "step": 33460 }, { "epoch": 0.067611517592731, "grad_norm": 439.84765625, "learning_rate": 6.694000000000001e-06, "loss": 23.559, "step": 33470 }, { "epoch": 0.06763171822541482, "grad_norm": 334.27618408203125, "learning_rate": 6.696e-06, "loss": 23.3025, "step": 33480 }, { "epoch": 0.06765191885809864, "grad_norm": 465.69964599609375, "learning_rate": 6.698e-06, "loss": 25.1898, "step": 33490 }, { "epoch": 0.06767211949078245, "grad_norm": 409.4376525878906, "learning_rate": 6.700000000000001e-06, "loss": 18.741, "step": 33500 }, { "epoch": 0.06769232012346627, "grad_norm": 1168.0740966796875, "learning_rate": 6.702e-06, "loss": 29.5806, "step": 33510 }, { "epoch": 0.06771252075615009, "grad_norm": 647.8543701171875, "learning_rate": 6.7040000000000005e-06, "loss": 17.401, "step": 33520 }, { "epoch": 0.06773272138883389, "grad_norm": 760.6053466796875, "learning_rate": 6.706000000000001e-06, "loss": 23.0411, "step": 33530 }, { "epoch": 0.06775292202151771, "grad_norm": 522.3080444335938, "learning_rate": 6.708e-06, "loss": 21.4989, "step": 33540 }, { "epoch": 0.06777312265420153, "grad_norm": 568.3932495117188, "learning_rate": 6.710000000000001e-06, "loss": 28.6894, "step": 33550 }, { "epoch": 0.06779332328688535, "grad_norm": 320.2074279785156, "learning_rate": 6.712000000000001e-06, "loss": 32.6937, "step": 33560 }, { "epoch": 0.06781352391956916, "grad_norm": 511.3542785644531, "learning_rate": 6.7140000000000004e-06, "loss": 22.9541, "step": 33570 }, { "epoch": 0.06783372455225298, "grad_norm": 508.94610595703125, "learning_rate": 6.716000000000001e-06, "loss": 32.7813, "step": 33580 }, { "epoch": 0.0678539251849368, "grad_norm": 158.2616729736328, "learning_rate": 6.718e-06, "loss": 29.6369, "step": 33590 }, { "epoch": 0.0678741258176206, "grad_norm": 801.9617309570312, "learning_rate": 6.720000000000001e-06, "loss": 26.601, "step": 33600 }, { "epoch": 0.06789432645030442, "grad_norm": 248.82423400878906, "learning_rate": 6.722000000000001e-06, "loss": 58.0029, "step": 33610 }, { "epoch": 0.06791452708298824, "grad_norm": 1176.00341796875, "learning_rate": 6.724e-06, "loss": 37.8949, "step": 33620 }, { "epoch": 0.06793472771567205, "grad_norm": 1805.271728515625, "learning_rate": 6.7260000000000005e-06, "loss": 34.2037, "step": 33630 }, { "epoch": 0.06795492834835587, "grad_norm": 403.9906311035156, "learning_rate": 6.728e-06, "loss": 22.5022, "step": 33640 }, { "epoch": 0.06797512898103969, "grad_norm": 635.4937133789062, "learning_rate": 6.730000000000001e-06, "loss": 23.3953, "step": 33650 }, { "epoch": 0.0679953296137235, "grad_norm": 622.2322998046875, "learning_rate": 6.732000000000001e-06, "loss": 23.7058, "step": 33660 }, { "epoch": 0.06801553024640732, "grad_norm": 362.6100158691406, "learning_rate": 6.734e-06, "loss": 17.6944, "step": 33670 }, { "epoch": 0.06803573087909114, "grad_norm": 689.5000610351562, "learning_rate": 6.736e-06, "loss": 43.9024, "step": 33680 }, { "epoch": 0.06805593151177494, "grad_norm": 245.59693908691406, "learning_rate": 6.738e-06, "loss": 24.8223, "step": 33690 }, { "epoch": 0.06807613214445876, "grad_norm": 1100.160400390625, "learning_rate": 6.740000000000001e-06, "loss": 30.857, "step": 33700 }, { "epoch": 0.06809633277714258, "grad_norm": 413.7121276855469, "learning_rate": 6.742000000000001e-06, "loss": 20.6621, "step": 33710 }, { "epoch": 0.0681165334098264, "grad_norm": 1544.26904296875, "learning_rate": 6.744e-06, "loss": 41.2719, "step": 33720 }, { "epoch": 0.06813673404251021, "grad_norm": 639.9199829101562, "learning_rate": 6.746e-06, "loss": 43.955, "step": 33730 }, { "epoch": 0.06815693467519403, "grad_norm": 280.11669921875, "learning_rate": 6.7480000000000004e-06, "loss": 27.9973, "step": 33740 }, { "epoch": 0.06817713530787785, "grad_norm": 436.47222900390625, "learning_rate": 6.750000000000001e-06, "loss": 19.7929, "step": 33750 }, { "epoch": 0.06819733594056165, "grad_norm": 411.2103576660156, "learning_rate": 6.752000000000001e-06, "loss": 31.768, "step": 33760 }, { "epoch": 0.06821753657324547, "grad_norm": 815.0191650390625, "learning_rate": 6.754000000000001e-06, "loss": 22.2022, "step": 33770 }, { "epoch": 0.0682377372059293, "grad_norm": 494.87969970703125, "learning_rate": 6.756e-06, "loss": 22.3034, "step": 33780 }, { "epoch": 0.0682579378386131, "grad_norm": 394.14605712890625, "learning_rate": 6.758e-06, "loss": 32.416, "step": 33790 }, { "epoch": 0.06827813847129692, "grad_norm": 46.48622512817383, "learning_rate": 6.760000000000001e-06, "loss": 31.4422, "step": 33800 }, { "epoch": 0.06829833910398074, "grad_norm": 711.196533203125, "learning_rate": 6.762000000000001e-06, "loss": 27.2625, "step": 33810 }, { "epoch": 0.06831853973666455, "grad_norm": 304.4548034667969, "learning_rate": 6.764000000000001e-06, "loss": 27.4918, "step": 33820 }, { "epoch": 0.06833874036934837, "grad_norm": 586.8805541992188, "learning_rate": 6.766e-06, "loss": 25.6049, "step": 33830 }, { "epoch": 0.06835894100203219, "grad_norm": 508.9487609863281, "learning_rate": 6.768e-06, "loss": 31.5067, "step": 33840 }, { "epoch": 0.06837914163471599, "grad_norm": 1170.845703125, "learning_rate": 6.770000000000001e-06, "loss": 39.1824, "step": 33850 }, { "epoch": 0.06839934226739981, "grad_norm": 595.26904296875, "learning_rate": 6.7720000000000006e-06, "loss": 17.4453, "step": 33860 }, { "epoch": 0.06841954290008363, "grad_norm": 496.1965026855469, "learning_rate": 6.774000000000001e-06, "loss": 33.8451, "step": 33870 }, { "epoch": 0.06843974353276745, "grad_norm": 356.40869140625, "learning_rate": 6.776e-06, "loss": 31.5227, "step": 33880 }, { "epoch": 0.06845994416545126, "grad_norm": 473.4547119140625, "learning_rate": 6.778e-06, "loss": 19.9006, "step": 33890 }, { "epoch": 0.06848014479813508, "grad_norm": 346.06463623046875, "learning_rate": 6.780000000000001e-06, "loss": 19.9775, "step": 33900 }, { "epoch": 0.0685003454308189, "grad_norm": 676.5089111328125, "learning_rate": 6.7820000000000005e-06, "loss": 43.2861, "step": 33910 }, { "epoch": 0.0685205460635027, "grad_norm": 493.92626953125, "learning_rate": 6.784000000000001e-06, "loss": 24.8858, "step": 33920 }, { "epoch": 0.06854074669618652, "grad_norm": 289.1258850097656, "learning_rate": 6.786000000000001e-06, "loss": 34.1276, "step": 33930 }, { "epoch": 0.06856094732887034, "grad_norm": 1374.667236328125, "learning_rate": 6.788e-06, "loss": 36.3009, "step": 33940 }, { "epoch": 0.06858114796155415, "grad_norm": 400.775146484375, "learning_rate": 6.790000000000001e-06, "loss": 21.4908, "step": 33950 }, { "epoch": 0.06860134859423797, "grad_norm": 117.53766632080078, "learning_rate": 6.792000000000001e-06, "loss": 29.6389, "step": 33960 }, { "epoch": 0.06862154922692179, "grad_norm": 472.3504638671875, "learning_rate": 6.7940000000000005e-06, "loss": 31.6954, "step": 33970 }, { "epoch": 0.0686417498596056, "grad_norm": 478.7676086425781, "learning_rate": 6.796000000000001e-06, "loss": 36.4441, "step": 33980 }, { "epoch": 0.06866195049228942, "grad_norm": 0.0, "learning_rate": 6.798e-06, "loss": 76.5987, "step": 33990 }, { "epoch": 0.06868215112497324, "grad_norm": 302.47833251953125, "learning_rate": 6.800000000000001e-06, "loss": 22.5997, "step": 34000 }, { "epoch": 0.06870235175765704, "grad_norm": 469.24676513671875, "learning_rate": 6.802000000000001e-06, "loss": 26.1139, "step": 34010 }, { "epoch": 0.06872255239034086, "grad_norm": 500.22796630859375, "learning_rate": 6.804e-06, "loss": 26.9558, "step": 34020 }, { "epoch": 0.06874275302302468, "grad_norm": 539.3820190429688, "learning_rate": 6.8060000000000006e-06, "loss": 28.284, "step": 34030 }, { "epoch": 0.0687629536557085, "grad_norm": 205.44790649414062, "learning_rate": 6.808e-06, "loss": 24.4893, "step": 34040 }, { "epoch": 0.06878315428839231, "grad_norm": 1270.7779541015625, "learning_rate": 6.810000000000001e-06, "loss": 28.4215, "step": 34050 }, { "epoch": 0.06880335492107613, "grad_norm": 177.77996826171875, "learning_rate": 6.812000000000001e-06, "loss": 28.7874, "step": 34060 }, { "epoch": 0.06882355555375995, "grad_norm": 363.94549560546875, "learning_rate": 6.814e-06, "loss": 27.8043, "step": 34070 }, { "epoch": 0.06884375618644376, "grad_norm": 990.454833984375, "learning_rate": 6.8160000000000005e-06, "loss": 31.9178, "step": 34080 }, { "epoch": 0.06886395681912758, "grad_norm": 265.3861999511719, "learning_rate": 6.818e-06, "loss": 21.8929, "step": 34090 }, { "epoch": 0.0688841574518114, "grad_norm": 8.25837516784668, "learning_rate": 6.820000000000001e-06, "loss": 39.5831, "step": 34100 }, { "epoch": 0.0689043580844952, "grad_norm": 376.444580078125, "learning_rate": 6.822000000000001e-06, "loss": 24.0087, "step": 34110 }, { "epoch": 0.06892455871717902, "grad_norm": 737.147216796875, "learning_rate": 6.824e-06, "loss": 25.5048, "step": 34120 }, { "epoch": 0.06894475934986284, "grad_norm": 423.600341796875, "learning_rate": 6.826e-06, "loss": 13.3663, "step": 34130 }, { "epoch": 0.06896495998254665, "grad_norm": 554.6629028320312, "learning_rate": 6.8280000000000005e-06, "loss": 32.9468, "step": 34140 }, { "epoch": 0.06898516061523047, "grad_norm": 894.8736572265625, "learning_rate": 6.830000000000001e-06, "loss": 16.0512, "step": 34150 }, { "epoch": 0.06900536124791429, "grad_norm": 596.2340087890625, "learning_rate": 6.832000000000001e-06, "loss": 20.5294, "step": 34160 }, { "epoch": 0.0690255618805981, "grad_norm": 782.9882202148438, "learning_rate": 6.834000000000001e-06, "loss": 21.4276, "step": 34170 }, { "epoch": 0.06904576251328191, "grad_norm": 761.8718872070312, "learning_rate": 6.836e-06, "loss": 22.4803, "step": 34180 }, { "epoch": 0.06906596314596573, "grad_norm": 400.63726806640625, "learning_rate": 6.8380000000000004e-06, "loss": 26.6845, "step": 34190 }, { "epoch": 0.06908616377864955, "grad_norm": 430.4891357421875, "learning_rate": 6.8400000000000014e-06, "loss": 32.4808, "step": 34200 }, { "epoch": 0.06910636441133336, "grad_norm": 244.76751708984375, "learning_rate": 6.842000000000001e-06, "loss": 32.8935, "step": 34210 }, { "epoch": 0.06912656504401718, "grad_norm": 432.182861328125, "learning_rate": 6.844000000000001e-06, "loss": 32.0066, "step": 34220 }, { "epoch": 0.069146765676701, "grad_norm": 583.138671875, "learning_rate": 6.846e-06, "loss": 18.7571, "step": 34230 }, { "epoch": 0.0691669663093848, "grad_norm": 120.03465270996094, "learning_rate": 6.848e-06, "loss": 18.3551, "step": 34240 }, { "epoch": 0.06918716694206863, "grad_norm": 786.241943359375, "learning_rate": 6.850000000000001e-06, "loss": 28.8558, "step": 34250 }, { "epoch": 0.06920736757475245, "grad_norm": 479.44879150390625, "learning_rate": 6.852000000000001e-06, "loss": 28.4533, "step": 34260 }, { "epoch": 0.06922756820743625, "grad_norm": 770.8941040039062, "learning_rate": 6.854000000000001e-06, "loss": 28.8545, "step": 34270 }, { "epoch": 0.06924776884012007, "grad_norm": 744.919921875, "learning_rate": 6.856e-06, "loss": 29.4833, "step": 34280 }, { "epoch": 0.06926796947280389, "grad_norm": 441.5009460449219, "learning_rate": 6.858e-06, "loss": 32.4952, "step": 34290 }, { "epoch": 0.0692881701054877, "grad_norm": 2179.65625, "learning_rate": 6.860000000000001e-06, "loss": 24.934, "step": 34300 }, { "epoch": 0.06930837073817152, "grad_norm": 671.8490600585938, "learning_rate": 6.8620000000000005e-06, "loss": 25.6753, "step": 34310 }, { "epoch": 0.06932857137085534, "grad_norm": 768.4308471679688, "learning_rate": 6.864000000000001e-06, "loss": 24.2447, "step": 34320 }, { "epoch": 0.06934877200353914, "grad_norm": 901.4434814453125, "learning_rate": 6.866000000000001e-06, "loss": 44.3051, "step": 34330 }, { "epoch": 0.06936897263622296, "grad_norm": 268.15606689453125, "learning_rate": 6.868e-06, "loss": 16.9973, "step": 34340 }, { "epoch": 0.06938917326890678, "grad_norm": 617.8140869140625, "learning_rate": 6.870000000000001e-06, "loss": 21.9639, "step": 34350 }, { "epoch": 0.0694093739015906, "grad_norm": 1412.798095703125, "learning_rate": 6.872000000000001e-06, "loss": 40.11, "step": 34360 }, { "epoch": 0.06942957453427441, "grad_norm": 1573.531494140625, "learning_rate": 6.874000000000001e-06, "loss": 35.3232, "step": 34370 }, { "epoch": 0.06944977516695823, "grad_norm": 717.170654296875, "learning_rate": 6.876000000000001e-06, "loss": 32.4138, "step": 34380 }, { "epoch": 0.06946997579964205, "grad_norm": 529.8059692382812, "learning_rate": 6.878e-06, "loss": 30.7321, "step": 34390 }, { "epoch": 0.06949017643232586, "grad_norm": 435.5838623046875, "learning_rate": 6.88e-06, "loss": 17.2373, "step": 34400 }, { "epoch": 0.06951037706500968, "grad_norm": 550.3357543945312, "learning_rate": 6.882000000000001e-06, "loss": 28.1552, "step": 34410 }, { "epoch": 0.0695305776976935, "grad_norm": 620.171875, "learning_rate": 6.8840000000000005e-06, "loss": 17.8972, "step": 34420 }, { "epoch": 0.0695507783303773, "grad_norm": 333.9394836425781, "learning_rate": 6.886000000000001e-06, "loss": 25.1844, "step": 34430 }, { "epoch": 0.06957097896306112, "grad_norm": 400.9455261230469, "learning_rate": 6.888e-06, "loss": 35.5136, "step": 34440 }, { "epoch": 0.06959117959574494, "grad_norm": 664.9127807617188, "learning_rate": 6.89e-06, "loss": 35.7593, "step": 34450 }, { "epoch": 0.06961138022842875, "grad_norm": 761.8721923828125, "learning_rate": 6.892000000000001e-06, "loss": 26.314, "step": 34460 }, { "epoch": 0.06963158086111257, "grad_norm": 1453.0860595703125, "learning_rate": 6.894e-06, "loss": 26.4827, "step": 34470 }, { "epoch": 0.06965178149379639, "grad_norm": 151.14361572265625, "learning_rate": 6.8960000000000006e-06, "loss": 15.2546, "step": 34480 }, { "epoch": 0.0696719821264802, "grad_norm": 337.78021240234375, "learning_rate": 6.898e-06, "loss": 35.3916, "step": 34490 }, { "epoch": 0.06969218275916401, "grad_norm": 181.8362274169922, "learning_rate": 6.9e-06, "loss": 19.3162, "step": 34500 }, { "epoch": 0.06971238339184783, "grad_norm": 643.0160522460938, "learning_rate": 6.902000000000001e-06, "loss": 15.7593, "step": 34510 }, { "epoch": 0.06973258402453165, "grad_norm": 287.9972229003906, "learning_rate": 6.904e-06, "loss": 13.9202, "step": 34520 }, { "epoch": 0.06975278465721546, "grad_norm": 454.93878173828125, "learning_rate": 6.9060000000000005e-06, "loss": 20.3274, "step": 34530 }, { "epoch": 0.06977298528989928, "grad_norm": 279.34271240234375, "learning_rate": 6.908000000000001e-06, "loss": 25.0116, "step": 34540 }, { "epoch": 0.0697931859225831, "grad_norm": 1011.8807373046875, "learning_rate": 6.91e-06, "loss": 27.7776, "step": 34550 }, { "epoch": 0.06981338655526691, "grad_norm": 447.10052490234375, "learning_rate": 6.912000000000001e-06, "loss": 40.0177, "step": 34560 }, { "epoch": 0.06983358718795073, "grad_norm": 399.48968505859375, "learning_rate": 6.914000000000001e-06, "loss": 41.0893, "step": 34570 }, { "epoch": 0.06985378782063455, "grad_norm": 556.7623291015625, "learning_rate": 6.916e-06, "loss": 23.7829, "step": 34580 }, { "epoch": 0.06987398845331835, "grad_norm": 629.1818237304688, "learning_rate": 6.9180000000000005e-06, "loss": 25.9176, "step": 34590 }, { "epoch": 0.06989418908600217, "grad_norm": 825.7970581054688, "learning_rate": 6.92e-06, "loss": 33.1165, "step": 34600 }, { "epoch": 0.06991438971868599, "grad_norm": 309.82586669921875, "learning_rate": 6.922000000000001e-06, "loss": 25.6488, "step": 34610 }, { "epoch": 0.0699345903513698, "grad_norm": 297.77362060546875, "learning_rate": 6.924000000000001e-06, "loss": 30.7852, "step": 34620 }, { "epoch": 0.06995479098405362, "grad_norm": 648.6033325195312, "learning_rate": 6.926e-06, "loss": 31.115, "step": 34630 }, { "epoch": 0.06997499161673744, "grad_norm": 41.54879379272461, "learning_rate": 6.928e-06, "loss": 25.9116, "step": 34640 }, { "epoch": 0.06999519224942125, "grad_norm": 231.39988708496094, "learning_rate": 6.93e-06, "loss": 26.1055, "step": 34650 }, { "epoch": 0.07001539288210507, "grad_norm": 141.19129943847656, "learning_rate": 6.932000000000001e-06, "loss": 14.3568, "step": 34660 }, { "epoch": 0.07003559351478889, "grad_norm": 491.8395080566406, "learning_rate": 6.934000000000001e-06, "loss": 30.6901, "step": 34670 }, { "epoch": 0.0700557941474727, "grad_norm": 413.2252502441406, "learning_rate": 6.936e-06, "loss": 28.1796, "step": 34680 }, { "epoch": 0.07007599478015651, "grad_norm": 494.89678955078125, "learning_rate": 6.938e-06, "loss": 30.1756, "step": 34690 }, { "epoch": 0.07009619541284033, "grad_norm": 344.2539367675781, "learning_rate": 6.9400000000000005e-06, "loss": 25.5514, "step": 34700 }, { "epoch": 0.07011639604552415, "grad_norm": 1064.0335693359375, "learning_rate": 6.942000000000001e-06, "loss": 42.9528, "step": 34710 }, { "epoch": 0.07013659667820796, "grad_norm": 535.0779418945312, "learning_rate": 6.944000000000001e-06, "loss": 16.0364, "step": 34720 }, { "epoch": 0.07015679731089178, "grad_norm": 490.56683349609375, "learning_rate": 6.946000000000001e-06, "loss": 37.9951, "step": 34730 }, { "epoch": 0.0701769979435756, "grad_norm": 1186.7283935546875, "learning_rate": 6.948e-06, "loss": 44.0284, "step": 34740 }, { "epoch": 0.0701971985762594, "grad_norm": 765.0109252929688, "learning_rate": 6.95e-06, "loss": 37.0233, "step": 34750 }, { "epoch": 0.07021739920894322, "grad_norm": 861.0193481445312, "learning_rate": 6.952000000000001e-06, "loss": 34.5704, "step": 34760 }, { "epoch": 0.07023759984162704, "grad_norm": 221.90745544433594, "learning_rate": 6.954000000000001e-06, "loss": 20.0659, "step": 34770 }, { "epoch": 0.07025780047431085, "grad_norm": 145.42893981933594, "learning_rate": 6.956000000000001e-06, "loss": 24.7001, "step": 34780 }, { "epoch": 0.07027800110699467, "grad_norm": 274.7716979980469, "learning_rate": 6.958e-06, "loss": 17.1338, "step": 34790 }, { "epoch": 0.07029820173967849, "grad_norm": 202.82875061035156, "learning_rate": 6.96e-06, "loss": 13.7413, "step": 34800 }, { "epoch": 0.0703184023723623, "grad_norm": 153.72035217285156, "learning_rate": 6.962000000000001e-06, "loss": 41.7966, "step": 34810 }, { "epoch": 0.07033860300504612, "grad_norm": 124.36219787597656, "learning_rate": 6.964000000000001e-06, "loss": 31.4384, "step": 34820 }, { "epoch": 0.07035880363772994, "grad_norm": 485.8753356933594, "learning_rate": 6.966000000000001e-06, "loss": 34.8664, "step": 34830 }, { "epoch": 0.07037900427041376, "grad_norm": 839.91552734375, "learning_rate": 6.968e-06, "loss": 38.5283, "step": 34840 }, { "epoch": 0.07039920490309756, "grad_norm": 885.7486572265625, "learning_rate": 6.97e-06, "loss": 30.9444, "step": 34850 }, { "epoch": 0.07041940553578138, "grad_norm": 479.7201232910156, "learning_rate": 6.972000000000001e-06, "loss": 22.7172, "step": 34860 }, { "epoch": 0.0704396061684652, "grad_norm": 513.4949951171875, "learning_rate": 6.9740000000000005e-06, "loss": 17.2636, "step": 34870 }, { "epoch": 0.07045980680114901, "grad_norm": 555.849609375, "learning_rate": 6.976000000000001e-06, "loss": 24.6814, "step": 34880 }, { "epoch": 0.07048000743383283, "grad_norm": 395.6819152832031, "learning_rate": 6.978e-06, "loss": 14.5689, "step": 34890 }, { "epoch": 0.07050020806651665, "grad_norm": 256.9186706542969, "learning_rate": 6.98e-06, "loss": 21.4932, "step": 34900 }, { "epoch": 0.07052040869920045, "grad_norm": 403.16693115234375, "learning_rate": 6.982000000000001e-06, "loss": 33.3816, "step": 34910 }, { "epoch": 0.07054060933188427, "grad_norm": 724.1705322265625, "learning_rate": 6.984e-06, "loss": 16.0022, "step": 34920 }, { "epoch": 0.0705608099645681, "grad_norm": 353.1549987792969, "learning_rate": 6.9860000000000005e-06, "loss": 14.607, "step": 34930 }, { "epoch": 0.0705810105972519, "grad_norm": 2520.662109375, "learning_rate": 6.988000000000001e-06, "loss": 54.9936, "step": 34940 }, { "epoch": 0.07060121122993572, "grad_norm": 330.6929016113281, "learning_rate": 6.99e-06, "loss": 14.1727, "step": 34950 }, { "epoch": 0.07062141186261954, "grad_norm": 504.8486022949219, "learning_rate": 6.992000000000001e-06, "loss": 38.3436, "step": 34960 }, { "epoch": 0.07064161249530335, "grad_norm": 133.2992401123047, "learning_rate": 6.994000000000001e-06, "loss": 27.9458, "step": 34970 }, { "epoch": 0.07066181312798717, "grad_norm": 201.37075805664062, "learning_rate": 6.9960000000000004e-06, "loss": 17.002, "step": 34980 }, { "epoch": 0.07068201376067099, "grad_norm": 170.57139587402344, "learning_rate": 6.998000000000001e-06, "loss": 27.4595, "step": 34990 }, { "epoch": 0.0707022143933548, "grad_norm": 151.04176330566406, "learning_rate": 7e-06, "loss": 24.4173, "step": 35000 }, { "epoch": 0.07072241502603861, "grad_norm": 687.1515502929688, "learning_rate": 7.002000000000001e-06, "loss": 32.1208, "step": 35010 }, { "epoch": 0.07074261565872243, "grad_norm": 1763.760986328125, "learning_rate": 7.004000000000001e-06, "loss": 38.3857, "step": 35020 }, { "epoch": 0.07076281629140625, "grad_norm": 922.94287109375, "learning_rate": 7.006e-06, "loss": 24.6238, "step": 35030 }, { "epoch": 0.07078301692409006, "grad_norm": 73.7891616821289, "learning_rate": 7.0080000000000005e-06, "loss": 15.4841, "step": 35040 }, { "epoch": 0.07080321755677388, "grad_norm": 844.647216796875, "learning_rate": 7.01e-06, "loss": 31.8864, "step": 35050 }, { "epoch": 0.0708234181894577, "grad_norm": 832.1209106445312, "learning_rate": 7.012000000000001e-06, "loss": 20.9632, "step": 35060 }, { "epoch": 0.0708436188221415, "grad_norm": 629.44775390625, "learning_rate": 7.014000000000001e-06, "loss": 24.6583, "step": 35070 }, { "epoch": 0.07086381945482532, "grad_norm": 291.5755310058594, "learning_rate": 7.016e-06, "loss": 23.2967, "step": 35080 }, { "epoch": 0.07088402008750914, "grad_norm": 599.63916015625, "learning_rate": 7.018e-06, "loss": 34.7414, "step": 35090 }, { "epoch": 0.07090422072019295, "grad_norm": 700.6422729492188, "learning_rate": 7.0200000000000006e-06, "loss": 53.2773, "step": 35100 }, { "epoch": 0.07092442135287677, "grad_norm": 354.1465148925781, "learning_rate": 7.022000000000001e-06, "loss": 39.586, "step": 35110 }, { "epoch": 0.07094462198556059, "grad_norm": 316.0738525390625, "learning_rate": 7.024000000000001e-06, "loss": 33.9372, "step": 35120 }, { "epoch": 0.0709648226182444, "grad_norm": 215.59765625, "learning_rate": 7.026000000000001e-06, "loss": 18.8616, "step": 35130 }, { "epoch": 0.07098502325092822, "grad_norm": 479.970947265625, "learning_rate": 7.028e-06, "loss": 21.9399, "step": 35140 }, { "epoch": 0.07100522388361204, "grad_norm": 552.5803833007812, "learning_rate": 7.0300000000000005e-06, "loss": 26.6272, "step": 35150 }, { "epoch": 0.07102542451629586, "grad_norm": 471.1194763183594, "learning_rate": 7.0320000000000015e-06, "loss": 30.7594, "step": 35160 }, { "epoch": 0.07104562514897966, "grad_norm": 560.67431640625, "learning_rate": 7.034000000000001e-06, "loss": 16.5104, "step": 35170 }, { "epoch": 0.07106582578166348, "grad_norm": 297.8233947753906, "learning_rate": 7.036000000000001e-06, "loss": 36.3182, "step": 35180 }, { "epoch": 0.0710860264143473, "grad_norm": 770.4078979492188, "learning_rate": 7.038e-06, "loss": 33.7109, "step": 35190 }, { "epoch": 0.07110622704703111, "grad_norm": 431.954345703125, "learning_rate": 7.04e-06, "loss": 36.4231, "step": 35200 }, { "epoch": 0.07112642767971493, "grad_norm": 527.4671020507812, "learning_rate": 7.042000000000001e-06, "loss": 28.5493, "step": 35210 }, { "epoch": 0.07114662831239875, "grad_norm": 252.7281951904297, "learning_rate": 7.044000000000001e-06, "loss": 22.4609, "step": 35220 }, { "epoch": 0.07116682894508256, "grad_norm": 1668.7276611328125, "learning_rate": 7.046000000000001e-06, "loss": 33.1983, "step": 35230 }, { "epoch": 0.07118702957776638, "grad_norm": 0.0, "learning_rate": 7.048e-06, "loss": 33.8626, "step": 35240 }, { "epoch": 0.0712072302104502, "grad_norm": 98.54589080810547, "learning_rate": 7.05e-06, "loss": 25.6418, "step": 35250 }, { "epoch": 0.071227430843134, "grad_norm": 377.0586242675781, "learning_rate": 7.052000000000001e-06, "loss": 20.7303, "step": 35260 }, { "epoch": 0.07124763147581782, "grad_norm": 396.9246520996094, "learning_rate": 7.0540000000000006e-06, "loss": 24.3956, "step": 35270 }, { "epoch": 0.07126783210850164, "grad_norm": 313.577880859375, "learning_rate": 7.056000000000001e-06, "loss": 18.0199, "step": 35280 }, { "epoch": 0.07128803274118545, "grad_norm": 1487.6373291015625, "learning_rate": 7.058e-06, "loss": 27.6879, "step": 35290 }, { "epoch": 0.07130823337386927, "grad_norm": 543.5441284179688, "learning_rate": 7.06e-06, "loss": 36.6739, "step": 35300 }, { "epoch": 0.07132843400655309, "grad_norm": 402.0924072265625, "learning_rate": 7.062000000000001e-06, "loss": 17.9225, "step": 35310 }, { "epoch": 0.07134863463923691, "grad_norm": 373.1673889160156, "learning_rate": 7.0640000000000005e-06, "loss": 30.948, "step": 35320 }, { "epoch": 0.07136883527192071, "grad_norm": 1144.22900390625, "learning_rate": 7.066000000000001e-06, "loss": 28.6735, "step": 35330 }, { "epoch": 0.07138903590460453, "grad_norm": 1234.5067138671875, "learning_rate": 7.068000000000001e-06, "loss": 33.1719, "step": 35340 }, { "epoch": 0.07140923653728835, "grad_norm": 249.57534790039062, "learning_rate": 7.07e-06, "loss": 64.2075, "step": 35350 }, { "epoch": 0.07142943716997216, "grad_norm": 735.7301025390625, "learning_rate": 7.072000000000001e-06, "loss": 20.6333, "step": 35360 }, { "epoch": 0.07144963780265598, "grad_norm": 528.9682006835938, "learning_rate": 7.074000000000001e-06, "loss": 27.5707, "step": 35370 }, { "epoch": 0.0714698384353398, "grad_norm": 175.97882080078125, "learning_rate": 7.0760000000000005e-06, "loss": 32.1693, "step": 35380 }, { "epoch": 0.0714900390680236, "grad_norm": 267.4575500488281, "learning_rate": 7.078000000000001e-06, "loss": 26.42, "step": 35390 }, { "epoch": 0.07151023970070743, "grad_norm": 662.626708984375, "learning_rate": 7.08e-06, "loss": 22.6109, "step": 35400 }, { "epoch": 0.07153044033339125, "grad_norm": 953.2594604492188, "learning_rate": 7.082000000000001e-06, "loss": 30.7239, "step": 35410 }, { "epoch": 0.07155064096607505, "grad_norm": 174.0382080078125, "learning_rate": 7.084000000000001e-06, "loss": 18.2796, "step": 35420 }, { "epoch": 0.07157084159875887, "grad_norm": 881.6152954101562, "learning_rate": 7.0860000000000004e-06, "loss": 24.4007, "step": 35430 }, { "epoch": 0.07159104223144269, "grad_norm": 658.0614624023438, "learning_rate": 7.088000000000001e-06, "loss": 20.0999, "step": 35440 }, { "epoch": 0.0716112428641265, "grad_norm": 332.91650390625, "learning_rate": 7.09e-06, "loss": 24.0056, "step": 35450 }, { "epoch": 0.07163144349681032, "grad_norm": 573.6510009765625, "learning_rate": 7.092000000000001e-06, "loss": 38.1989, "step": 35460 }, { "epoch": 0.07165164412949414, "grad_norm": 526.1616821289062, "learning_rate": 7.094000000000001e-06, "loss": 24.0661, "step": 35470 }, { "epoch": 0.07167184476217796, "grad_norm": 206.29298400878906, "learning_rate": 7.096e-06, "loss": 15.9931, "step": 35480 }, { "epoch": 0.07169204539486176, "grad_norm": 919.5665893554688, "learning_rate": 7.0980000000000005e-06, "loss": 24.3283, "step": 35490 }, { "epoch": 0.07171224602754558, "grad_norm": 810.0609741210938, "learning_rate": 7.100000000000001e-06, "loss": 19.033, "step": 35500 }, { "epoch": 0.0717324466602294, "grad_norm": 258.5408935546875, "learning_rate": 7.102000000000001e-06, "loss": 13.9003, "step": 35510 }, { "epoch": 0.07175264729291321, "grad_norm": 451.9220886230469, "learning_rate": 7.104000000000001e-06, "loss": 23.7401, "step": 35520 }, { "epoch": 0.07177284792559703, "grad_norm": 697.3738403320312, "learning_rate": 7.106000000000001e-06, "loss": 39.4503, "step": 35530 }, { "epoch": 0.07179304855828085, "grad_norm": 576.223388671875, "learning_rate": 7.108e-06, "loss": 40.5201, "step": 35540 }, { "epoch": 0.07181324919096466, "grad_norm": 150.14456176757812, "learning_rate": 7.1100000000000005e-06, "loss": 37.7105, "step": 35550 }, { "epoch": 0.07183344982364848, "grad_norm": 319.6799011230469, "learning_rate": 7.1120000000000015e-06, "loss": 23.044, "step": 35560 }, { "epoch": 0.0718536504563323, "grad_norm": 231.80503845214844, "learning_rate": 7.114000000000001e-06, "loss": 12.1974, "step": 35570 }, { "epoch": 0.0718738510890161, "grad_norm": 284.95037841796875, "learning_rate": 7.116000000000001e-06, "loss": 23.1263, "step": 35580 }, { "epoch": 0.07189405172169992, "grad_norm": 820.784423828125, "learning_rate": 7.118e-06, "loss": 30.4677, "step": 35590 }, { "epoch": 0.07191425235438374, "grad_norm": 461.59442138671875, "learning_rate": 7.1200000000000004e-06, "loss": 11.0868, "step": 35600 }, { "epoch": 0.07193445298706755, "grad_norm": 278.0670471191406, "learning_rate": 7.1220000000000014e-06, "loss": 21.7604, "step": 35610 }, { "epoch": 0.07195465361975137, "grad_norm": 696.705078125, "learning_rate": 7.124000000000001e-06, "loss": 16.4526, "step": 35620 }, { "epoch": 0.07197485425243519, "grad_norm": 852.6187744140625, "learning_rate": 7.126000000000001e-06, "loss": 28.0098, "step": 35630 }, { "epoch": 0.07199505488511901, "grad_norm": 47.386940002441406, "learning_rate": 7.128e-06, "loss": 23.8823, "step": 35640 }, { "epoch": 0.07201525551780281, "grad_norm": 398.6817321777344, "learning_rate": 7.13e-06, "loss": 22.399, "step": 35650 }, { "epoch": 0.07203545615048663, "grad_norm": 349.03173828125, "learning_rate": 7.132e-06, "loss": 21.1377, "step": 35660 }, { "epoch": 0.07205565678317045, "grad_norm": 340.30853271484375, "learning_rate": 7.134000000000001e-06, "loss": 19.7428, "step": 35670 }, { "epoch": 0.07207585741585426, "grad_norm": 534.1447143554688, "learning_rate": 7.136000000000001e-06, "loss": 36.1049, "step": 35680 }, { "epoch": 0.07209605804853808, "grad_norm": 240.12698364257812, "learning_rate": 7.138e-06, "loss": 25.354, "step": 35690 }, { "epoch": 0.0721162586812219, "grad_norm": 668.1033935546875, "learning_rate": 7.14e-06, "loss": 33.0883, "step": 35700 }, { "epoch": 0.07213645931390571, "grad_norm": 303.3044128417969, "learning_rate": 7.142e-06, "loss": 24.0498, "step": 35710 }, { "epoch": 0.07215665994658953, "grad_norm": 401.3393249511719, "learning_rate": 7.1440000000000005e-06, "loss": 12.1863, "step": 35720 }, { "epoch": 0.07217686057927335, "grad_norm": 262.6009826660156, "learning_rate": 7.146000000000001e-06, "loss": 20.9297, "step": 35730 }, { "epoch": 0.07219706121195715, "grad_norm": 295.8729553222656, "learning_rate": 7.148000000000001e-06, "loss": 24.8015, "step": 35740 }, { "epoch": 0.07221726184464097, "grad_norm": 249.96107482910156, "learning_rate": 7.15e-06, "loss": 39.1403, "step": 35750 }, { "epoch": 0.0722374624773248, "grad_norm": 666.444091796875, "learning_rate": 7.152e-06, "loss": 25.2217, "step": 35760 }, { "epoch": 0.0722576631100086, "grad_norm": 302.3631896972656, "learning_rate": 7.154000000000001e-06, "loss": 28.7105, "step": 35770 }, { "epoch": 0.07227786374269242, "grad_norm": 661.980224609375, "learning_rate": 7.156000000000001e-06, "loss": 31.7649, "step": 35780 }, { "epoch": 0.07229806437537624, "grad_norm": 483.1505126953125, "learning_rate": 7.158000000000001e-06, "loss": 20.0861, "step": 35790 }, { "epoch": 0.07231826500806006, "grad_norm": 726.7387084960938, "learning_rate": 7.16e-06, "loss": 35.2117, "step": 35800 }, { "epoch": 0.07233846564074387, "grad_norm": 285.33734130859375, "learning_rate": 7.162e-06, "loss": 14.649, "step": 35810 }, { "epoch": 0.07235866627342769, "grad_norm": 552.3336791992188, "learning_rate": 7.164000000000001e-06, "loss": 33.3316, "step": 35820 }, { "epoch": 0.0723788669061115, "grad_norm": 341.7799072265625, "learning_rate": 7.1660000000000005e-06, "loss": 17.5601, "step": 35830 }, { "epoch": 0.07239906753879531, "grad_norm": 663.0716552734375, "learning_rate": 7.168000000000001e-06, "loss": 15.9708, "step": 35840 }, { "epoch": 0.07241926817147913, "grad_norm": 444.8297119140625, "learning_rate": 7.17e-06, "loss": 30.985, "step": 35850 }, { "epoch": 0.07243946880416295, "grad_norm": 179.97125244140625, "learning_rate": 7.172e-06, "loss": 18.7185, "step": 35860 }, { "epoch": 0.07245966943684676, "grad_norm": 562.4083251953125, "learning_rate": 7.174000000000001e-06, "loss": 50.2738, "step": 35870 }, { "epoch": 0.07247987006953058, "grad_norm": 1533.6148681640625, "learning_rate": 7.176e-06, "loss": 34.0883, "step": 35880 }, { "epoch": 0.0725000707022144, "grad_norm": 263.4617004394531, "learning_rate": 7.1780000000000006e-06, "loss": 13.4116, "step": 35890 }, { "epoch": 0.0725202713348982, "grad_norm": 1126.4736328125, "learning_rate": 7.180000000000001e-06, "loss": 40.3042, "step": 35900 }, { "epoch": 0.07254047196758202, "grad_norm": 796.5320434570312, "learning_rate": 7.182e-06, "loss": 33.9886, "step": 35910 }, { "epoch": 0.07256067260026584, "grad_norm": 769.1730346679688, "learning_rate": 7.184000000000001e-06, "loss": 33.7612, "step": 35920 }, { "epoch": 0.07258087323294965, "grad_norm": 232.10711669921875, "learning_rate": 7.186000000000001e-06, "loss": 29.5518, "step": 35930 }, { "epoch": 0.07260107386563347, "grad_norm": 311.0737609863281, "learning_rate": 7.1880000000000005e-06, "loss": 21.697, "step": 35940 }, { "epoch": 0.07262127449831729, "grad_norm": 411.9050598144531, "learning_rate": 7.190000000000001e-06, "loss": 14.3407, "step": 35950 }, { "epoch": 0.07264147513100111, "grad_norm": 597.1206665039062, "learning_rate": 7.192e-06, "loss": 23.7892, "step": 35960 }, { "epoch": 0.07266167576368492, "grad_norm": 156.48336791992188, "learning_rate": 7.194000000000001e-06, "loss": 16.2914, "step": 35970 }, { "epoch": 0.07268187639636874, "grad_norm": 631.177001953125, "learning_rate": 7.196000000000001e-06, "loss": 30.2218, "step": 35980 }, { "epoch": 0.07270207702905256, "grad_norm": 348.7903747558594, "learning_rate": 7.198e-06, "loss": 14.2282, "step": 35990 }, { "epoch": 0.07272227766173636, "grad_norm": 1277.896484375, "learning_rate": 7.2000000000000005e-06, "loss": 35.9558, "step": 36000 }, { "epoch": 0.07274247829442018, "grad_norm": 936.1773071289062, "learning_rate": 7.202e-06, "loss": 36.1961, "step": 36010 }, { "epoch": 0.072762678927104, "grad_norm": 792.3339233398438, "learning_rate": 7.204000000000001e-06, "loss": 22.9023, "step": 36020 }, { "epoch": 0.07278287955978781, "grad_norm": 298.2560119628906, "learning_rate": 7.206000000000001e-06, "loss": 24.574, "step": 36030 }, { "epoch": 0.07280308019247163, "grad_norm": 499.6198425292969, "learning_rate": 7.208e-06, "loss": 17.3295, "step": 36040 }, { "epoch": 0.07282328082515545, "grad_norm": 446.3153381347656, "learning_rate": 7.2100000000000004e-06, "loss": 18.4231, "step": 36050 }, { "epoch": 0.07284348145783925, "grad_norm": 449.7459411621094, "learning_rate": 7.212e-06, "loss": 21.9795, "step": 36060 }, { "epoch": 0.07286368209052307, "grad_norm": 578.6666259765625, "learning_rate": 7.214000000000001e-06, "loss": 19.4359, "step": 36070 }, { "epoch": 0.0728838827232069, "grad_norm": 533.8427734375, "learning_rate": 7.216000000000001e-06, "loss": 15.0341, "step": 36080 }, { "epoch": 0.0729040833558907, "grad_norm": 628.3088989257812, "learning_rate": 7.218e-06, "loss": 24.2054, "step": 36090 }, { "epoch": 0.07292428398857452, "grad_norm": 297.69281005859375, "learning_rate": 7.22e-06, "loss": 33.3355, "step": 36100 }, { "epoch": 0.07294448462125834, "grad_norm": 297.65924072265625, "learning_rate": 7.2220000000000005e-06, "loss": 18.034, "step": 36110 }, { "epoch": 0.07296468525394216, "grad_norm": 1627.181396484375, "learning_rate": 7.224000000000001e-06, "loss": 37.0366, "step": 36120 }, { "epoch": 0.07298488588662597, "grad_norm": 278.7560119628906, "learning_rate": 7.226000000000001e-06, "loss": 20.4195, "step": 36130 }, { "epoch": 0.07300508651930979, "grad_norm": 490.5456848144531, "learning_rate": 7.228000000000001e-06, "loss": 22.7641, "step": 36140 }, { "epoch": 0.0730252871519936, "grad_norm": 352.07110595703125, "learning_rate": 7.23e-06, "loss": 23.3919, "step": 36150 }, { "epoch": 0.07304548778467741, "grad_norm": 635.0588989257812, "learning_rate": 7.232e-06, "loss": 14.4249, "step": 36160 }, { "epoch": 0.07306568841736123, "grad_norm": 655.6961669921875, "learning_rate": 7.234000000000001e-06, "loss": 21.8436, "step": 36170 }, { "epoch": 0.07308588905004505, "grad_norm": 426.8301086425781, "learning_rate": 7.236000000000001e-06, "loss": 31.068, "step": 36180 }, { "epoch": 0.07310608968272886, "grad_norm": 150.89291381835938, "learning_rate": 7.238000000000001e-06, "loss": 19.2602, "step": 36190 }, { "epoch": 0.07312629031541268, "grad_norm": 315.3755798339844, "learning_rate": 7.24e-06, "loss": 11.7244, "step": 36200 }, { "epoch": 0.0731464909480965, "grad_norm": 285.57879638671875, "learning_rate": 7.242e-06, "loss": 26.9657, "step": 36210 }, { "epoch": 0.0731666915807803, "grad_norm": 414.1665344238281, "learning_rate": 7.244000000000001e-06, "loss": 23.4058, "step": 36220 }, { "epoch": 0.07318689221346412, "grad_norm": 256.0746154785156, "learning_rate": 7.246000000000001e-06, "loss": 28.3898, "step": 36230 }, { "epoch": 0.07320709284614794, "grad_norm": 397.8353271484375, "learning_rate": 7.248000000000001e-06, "loss": 22.2447, "step": 36240 }, { "epoch": 0.07322729347883175, "grad_norm": 1492.809814453125, "learning_rate": 7.25e-06, "loss": 20.0398, "step": 36250 }, { "epoch": 0.07324749411151557, "grad_norm": 957.6799926757812, "learning_rate": 7.252e-06, "loss": 39.4348, "step": 36260 }, { "epoch": 0.07326769474419939, "grad_norm": 912.636474609375, "learning_rate": 7.254000000000001e-06, "loss": 23.8131, "step": 36270 }, { "epoch": 0.07328789537688321, "grad_norm": 400.9259033203125, "learning_rate": 7.2560000000000005e-06, "loss": 26.8982, "step": 36280 }, { "epoch": 0.07330809600956702, "grad_norm": 912.023193359375, "learning_rate": 7.258000000000001e-06, "loss": 23.0974, "step": 36290 }, { "epoch": 0.07332829664225084, "grad_norm": 584.3211059570312, "learning_rate": 7.260000000000001e-06, "loss": 32.7051, "step": 36300 }, { "epoch": 0.07334849727493466, "grad_norm": 632.460693359375, "learning_rate": 7.262e-06, "loss": 36.9517, "step": 36310 }, { "epoch": 0.07336869790761846, "grad_norm": 358.9119873046875, "learning_rate": 7.264000000000001e-06, "loss": 21.1253, "step": 36320 }, { "epoch": 0.07338889854030228, "grad_norm": 192.64894104003906, "learning_rate": 7.266000000000001e-06, "loss": 35.9661, "step": 36330 }, { "epoch": 0.0734090991729861, "grad_norm": 820.314697265625, "learning_rate": 7.2680000000000005e-06, "loss": 28.4836, "step": 36340 }, { "epoch": 0.07342929980566991, "grad_norm": 559.251953125, "learning_rate": 7.270000000000001e-06, "loss": 20.2625, "step": 36350 }, { "epoch": 0.07344950043835373, "grad_norm": 406.0582580566406, "learning_rate": 7.272e-06, "loss": 22.5509, "step": 36360 }, { "epoch": 0.07346970107103755, "grad_norm": 671.7197875976562, "learning_rate": 7.274000000000001e-06, "loss": 30.905, "step": 36370 }, { "epoch": 0.07348990170372136, "grad_norm": 291.09039306640625, "learning_rate": 7.276000000000001e-06, "loss": 25.4618, "step": 36380 }, { "epoch": 0.07351010233640518, "grad_norm": 462.43133544921875, "learning_rate": 7.2780000000000005e-06, "loss": 30.4532, "step": 36390 }, { "epoch": 0.073530302969089, "grad_norm": 318.0280456542969, "learning_rate": 7.280000000000001e-06, "loss": 16.4905, "step": 36400 }, { "epoch": 0.0735505036017728, "grad_norm": 732.2247924804688, "learning_rate": 7.282e-06, "loss": 35.1043, "step": 36410 }, { "epoch": 0.07357070423445662, "grad_norm": 281.3781433105469, "learning_rate": 7.284000000000001e-06, "loss": 26.6184, "step": 36420 }, { "epoch": 0.07359090486714044, "grad_norm": 899.387939453125, "learning_rate": 7.286000000000001e-06, "loss": 30.4034, "step": 36430 }, { "epoch": 0.07361110549982426, "grad_norm": 637.453857421875, "learning_rate": 7.288e-06, "loss": 22.0351, "step": 36440 }, { "epoch": 0.07363130613250807, "grad_norm": 352.7566833496094, "learning_rate": 7.2900000000000005e-06, "loss": 13.5987, "step": 36450 }, { "epoch": 0.07365150676519189, "grad_norm": 1261.2196044921875, "learning_rate": 7.292e-06, "loss": 44.5814, "step": 36460 }, { "epoch": 0.07367170739787571, "grad_norm": 465.0794982910156, "learning_rate": 7.294000000000001e-06, "loss": 31.6626, "step": 36470 }, { "epoch": 0.07369190803055951, "grad_norm": 848.2520141601562, "learning_rate": 7.296000000000001e-06, "loss": 20.8244, "step": 36480 }, { "epoch": 0.07371210866324333, "grad_norm": 310.5050964355469, "learning_rate": 7.298e-06, "loss": 29.6421, "step": 36490 }, { "epoch": 0.07373230929592715, "grad_norm": 772.1388549804688, "learning_rate": 7.3e-06, "loss": 23.1746, "step": 36500 }, { "epoch": 0.07375250992861096, "grad_norm": 317.6028747558594, "learning_rate": 7.3020000000000006e-06, "loss": 20.4439, "step": 36510 }, { "epoch": 0.07377271056129478, "grad_norm": 456.0571594238281, "learning_rate": 7.304000000000001e-06, "loss": 22.3645, "step": 36520 }, { "epoch": 0.0737929111939786, "grad_norm": 543.1962280273438, "learning_rate": 7.306000000000001e-06, "loss": 25.9287, "step": 36530 }, { "epoch": 0.0738131118266624, "grad_norm": 459.4672546386719, "learning_rate": 7.308000000000001e-06, "loss": 46.0588, "step": 36540 }, { "epoch": 0.07383331245934623, "grad_norm": 68.5689926147461, "learning_rate": 7.31e-06, "loss": 31.2692, "step": 36550 }, { "epoch": 0.07385351309203005, "grad_norm": 289.3775634765625, "learning_rate": 7.3120000000000005e-06, "loss": 27.7922, "step": 36560 }, { "epoch": 0.07387371372471385, "grad_norm": 480.0758972167969, "learning_rate": 7.3140000000000015e-06, "loss": 48.8788, "step": 36570 }, { "epoch": 0.07389391435739767, "grad_norm": 232.5970001220703, "learning_rate": 7.316000000000001e-06, "loss": 23.6512, "step": 36580 }, { "epoch": 0.07391411499008149, "grad_norm": 491.4234924316406, "learning_rate": 7.318000000000001e-06, "loss": 27.9012, "step": 36590 }, { "epoch": 0.0739343156227653, "grad_norm": 1804.0177001953125, "learning_rate": 7.32e-06, "loss": 57.0006, "step": 36600 }, { "epoch": 0.07395451625544912, "grad_norm": 464.07696533203125, "learning_rate": 7.322e-06, "loss": 15.404, "step": 36610 }, { "epoch": 0.07397471688813294, "grad_norm": 552.944580078125, "learning_rate": 7.324000000000001e-06, "loss": 35.9958, "step": 36620 }, { "epoch": 0.07399491752081676, "grad_norm": 357.04803466796875, "learning_rate": 7.326000000000001e-06, "loss": 34.9606, "step": 36630 }, { "epoch": 0.07401511815350056, "grad_norm": 120.2250747680664, "learning_rate": 7.328000000000001e-06, "loss": 22.1038, "step": 36640 }, { "epoch": 0.07403531878618438, "grad_norm": 1244.16455078125, "learning_rate": 7.33e-06, "loss": 32.6981, "step": 36650 }, { "epoch": 0.0740555194188682, "grad_norm": 892.9081420898438, "learning_rate": 7.332e-06, "loss": 48.3434, "step": 36660 }, { "epoch": 0.07407572005155201, "grad_norm": 213.5165252685547, "learning_rate": 7.334000000000001e-06, "loss": 19.2829, "step": 36670 }, { "epoch": 0.07409592068423583, "grad_norm": 789.1298828125, "learning_rate": 7.3360000000000006e-06, "loss": 35.462, "step": 36680 }, { "epoch": 0.07411612131691965, "grad_norm": 417.73455810546875, "learning_rate": 7.338000000000001e-06, "loss": 26.294, "step": 36690 }, { "epoch": 0.07413632194960346, "grad_norm": 606.4683837890625, "learning_rate": 7.340000000000001e-06, "loss": 15.4818, "step": 36700 }, { "epoch": 0.07415652258228728, "grad_norm": 234.27586364746094, "learning_rate": 7.342e-06, "loss": 16.8294, "step": 36710 }, { "epoch": 0.0741767232149711, "grad_norm": 132.23135375976562, "learning_rate": 7.344000000000001e-06, "loss": 26.813, "step": 36720 }, { "epoch": 0.0741969238476549, "grad_norm": 132.92832946777344, "learning_rate": 7.346000000000001e-06, "loss": 35.4096, "step": 36730 }, { "epoch": 0.07421712448033872, "grad_norm": 393.9335021972656, "learning_rate": 7.348000000000001e-06, "loss": 47.1393, "step": 36740 }, { "epoch": 0.07423732511302254, "grad_norm": 359.0769958496094, "learning_rate": 7.350000000000001e-06, "loss": 24.7488, "step": 36750 }, { "epoch": 0.07425752574570635, "grad_norm": 415.5875244140625, "learning_rate": 7.352e-06, "loss": 25.986, "step": 36760 }, { "epoch": 0.07427772637839017, "grad_norm": 689.1558837890625, "learning_rate": 7.354000000000001e-06, "loss": 42.3704, "step": 36770 }, { "epoch": 0.07429792701107399, "grad_norm": 563.744140625, "learning_rate": 7.356000000000001e-06, "loss": 28.5835, "step": 36780 }, { "epoch": 0.07431812764375781, "grad_norm": 282.9549255371094, "learning_rate": 7.3580000000000005e-06, "loss": 34.0733, "step": 36790 }, { "epoch": 0.07433832827644161, "grad_norm": 133.72024536132812, "learning_rate": 7.360000000000001e-06, "loss": 15.5981, "step": 36800 }, { "epoch": 0.07435852890912543, "grad_norm": 118.42461395263672, "learning_rate": 7.362e-06, "loss": 43.5447, "step": 36810 }, { "epoch": 0.07437872954180925, "grad_norm": 428.64483642578125, "learning_rate": 7.364000000000001e-06, "loss": 45.605, "step": 36820 }, { "epoch": 0.07439893017449306, "grad_norm": 167.21322631835938, "learning_rate": 7.366000000000001e-06, "loss": 21.6644, "step": 36830 }, { "epoch": 0.07441913080717688, "grad_norm": 282.8342590332031, "learning_rate": 7.3680000000000004e-06, "loss": 15.7841, "step": 36840 }, { "epoch": 0.0744393314398607, "grad_norm": 996.6889038085938, "learning_rate": 7.370000000000001e-06, "loss": 25.6472, "step": 36850 }, { "epoch": 0.07445953207254451, "grad_norm": 406.68341064453125, "learning_rate": 7.372e-06, "loss": 28.4243, "step": 36860 }, { "epoch": 0.07447973270522833, "grad_norm": 260.7794189453125, "learning_rate": 7.374000000000001e-06, "loss": 7.6112, "step": 36870 }, { "epoch": 0.07449993333791215, "grad_norm": 580.6053466796875, "learning_rate": 7.376000000000001e-06, "loss": 20.1667, "step": 36880 }, { "epoch": 0.07452013397059595, "grad_norm": 344.7051086425781, "learning_rate": 7.378e-06, "loss": 19.748, "step": 36890 }, { "epoch": 0.07454033460327977, "grad_norm": 460.07928466796875, "learning_rate": 7.3800000000000005e-06, "loss": 25.7779, "step": 36900 }, { "epoch": 0.0745605352359636, "grad_norm": 302.0501708984375, "learning_rate": 7.382000000000001e-06, "loss": 25.0444, "step": 36910 }, { "epoch": 0.0745807358686474, "grad_norm": 419.9612731933594, "learning_rate": 7.384e-06, "loss": 33.317, "step": 36920 }, { "epoch": 0.07460093650133122, "grad_norm": 366.3216552734375, "learning_rate": 7.386000000000001e-06, "loss": 28.3039, "step": 36930 }, { "epoch": 0.07462113713401504, "grad_norm": 539.5303344726562, "learning_rate": 7.388000000000001e-06, "loss": 25.8313, "step": 36940 }, { "epoch": 0.07464133776669886, "grad_norm": 284.8318176269531, "learning_rate": 7.39e-06, "loss": 28.2287, "step": 36950 }, { "epoch": 0.07466153839938267, "grad_norm": 441.8493347167969, "learning_rate": 7.3920000000000005e-06, "loss": 15.1471, "step": 36960 }, { "epoch": 0.07468173903206649, "grad_norm": 221.10379028320312, "learning_rate": 7.394e-06, "loss": 21.8838, "step": 36970 }, { "epoch": 0.0747019396647503, "grad_norm": 668.7821655273438, "learning_rate": 7.396000000000001e-06, "loss": 29.002, "step": 36980 }, { "epoch": 0.07472214029743411, "grad_norm": 157.21408081054688, "learning_rate": 7.398000000000001e-06, "loss": 18.1363, "step": 36990 }, { "epoch": 0.07474234093011793, "grad_norm": 352.7605285644531, "learning_rate": 7.4e-06, "loss": 23.2426, "step": 37000 }, { "epoch": 0.07476254156280175, "grad_norm": 495.28564453125, "learning_rate": 7.4020000000000005e-06, "loss": 27.4683, "step": 37010 }, { "epoch": 0.07478274219548556, "grad_norm": 372.5489501953125, "learning_rate": 7.404e-06, "loss": 19.4716, "step": 37020 }, { "epoch": 0.07480294282816938, "grad_norm": 595.1874389648438, "learning_rate": 7.406000000000001e-06, "loss": 17.6525, "step": 37030 }, { "epoch": 0.0748231434608532, "grad_norm": 270.90087890625, "learning_rate": 7.408000000000001e-06, "loss": 20.5332, "step": 37040 }, { "epoch": 0.074843344093537, "grad_norm": 218.74630737304688, "learning_rate": 7.41e-06, "loss": 31.4177, "step": 37050 }, { "epoch": 0.07486354472622082, "grad_norm": 651.9527587890625, "learning_rate": 7.412e-06, "loss": 13.3607, "step": 37060 }, { "epoch": 0.07488374535890464, "grad_norm": 434.46826171875, "learning_rate": 7.4140000000000005e-06, "loss": 28.5692, "step": 37070 }, { "epoch": 0.07490394599158845, "grad_norm": 726.3607177734375, "learning_rate": 7.416000000000001e-06, "loss": 39.4051, "step": 37080 }, { "epoch": 0.07492414662427227, "grad_norm": 281.404541015625, "learning_rate": 7.418000000000001e-06, "loss": 29.5586, "step": 37090 }, { "epoch": 0.07494434725695609, "grad_norm": 226.3792266845703, "learning_rate": 7.420000000000001e-06, "loss": 35.242, "step": 37100 }, { "epoch": 0.07496454788963991, "grad_norm": 130.22166442871094, "learning_rate": 7.422e-06, "loss": 19.7581, "step": 37110 }, { "epoch": 0.07498474852232372, "grad_norm": 604.023681640625, "learning_rate": 7.424e-06, "loss": 13.2463, "step": 37120 }, { "epoch": 0.07500494915500754, "grad_norm": 749.285400390625, "learning_rate": 7.426000000000001e-06, "loss": 25.0143, "step": 37130 }, { "epoch": 0.07502514978769136, "grad_norm": 455.9515380859375, "learning_rate": 7.428000000000001e-06, "loss": 22.274, "step": 37140 }, { "epoch": 0.07504535042037516, "grad_norm": 388.0310974121094, "learning_rate": 7.430000000000001e-06, "loss": 42.7054, "step": 37150 }, { "epoch": 0.07506555105305898, "grad_norm": 435.63006591796875, "learning_rate": 7.432e-06, "loss": 18.8218, "step": 37160 }, { "epoch": 0.0750857516857428, "grad_norm": 390.9641418457031, "learning_rate": 7.434e-06, "loss": 23.881, "step": 37170 }, { "epoch": 0.07510595231842661, "grad_norm": 200.30760192871094, "learning_rate": 7.436000000000001e-06, "loss": 9.9231, "step": 37180 }, { "epoch": 0.07512615295111043, "grad_norm": 1305.658935546875, "learning_rate": 7.438000000000001e-06, "loss": 49.221, "step": 37190 }, { "epoch": 0.07514635358379425, "grad_norm": 560.2576904296875, "learning_rate": 7.440000000000001e-06, "loss": 37.7084, "step": 37200 }, { "epoch": 0.07516655421647805, "grad_norm": 796.267333984375, "learning_rate": 7.442e-06, "loss": 37.0953, "step": 37210 }, { "epoch": 0.07518675484916187, "grad_norm": 212.97508239746094, "learning_rate": 7.444e-06, "loss": 25.6259, "step": 37220 }, { "epoch": 0.0752069554818457, "grad_norm": 151.27481079101562, "learning_rate": 7.446000000000001e-06, "loss": 21.353, "step": 37230 }, { "epoch": 0.0752271561145295, "grad_norm": 427.7612609863281, "learning_rate": 7.4480000000000005e-06, "loss": 28.3006, "step": 37240 }, { "epoch": 0.07524735674721332, "grad_norm": 374.7218017578125, "learning_rate": 7.450000000000001e-06, "loss": 21.3628, "step": 37250 }, { "epoch": 0.07526755737989714, "grad_norm": 285.76763916015625, "learning_rate": 7.452e-06, "loss": 26.1006, "step": 37260 }, { "epoch": 0.07528775801258096, "grad_norm": 399.39837646484375, "learning_rate": 7.454e-06, "loss": 25.5958, "step": 37270 }, { "epoch": 0.07530795864526477, "grad_norm": 261.0355224609375, "learning_rate": 7.456000000000001e-06, "loss": 32.5744, "step": 37280 }, { "epoch": 0.07532815927794859, "grad_norm": 261.322998046875, "learning_rate": 7.458e-06, "loss": 15.9234, "step": 37290 }, { "epoch": 0.0753483599106324, "grad_norm": 595.6270141601562, "learning_rate": 7.4600000000000006e-06, "loss": 20.6847, "step": 37300 }, { "epoch": 0.07536856054331621, "grad_norm": 687.22900390625, "learning_rate": 7.462000000000001e-06, "loss": 23.3383, "step": 37310 }, { "epoch": 0.07538876117600003, "grad_norm": 506.7188415527344, "learning_rate": 7.464e-06, "loss": 36.2405, "step": 37320 }, { "epoch": 0.07540896180868385, "grad_norm": 164.11167907714844, "learning_rate": 7.466000000000001e-06, "loss": 27.747, "step": 37330 }, { "epoch": 0.07542916244136766, "grad_norm": 796.3577880859375, "learning_rate": 7.468000000000001e-06, "loss": 35.0851, "step": 37340 }, { "epoch": 0.07544936307405148, "grad_norm": 323.20159912109375, "learning_rate": 7.4700000000000005e-06, "loss": 24.5756, "step": 37350 }, { "epoch": 0.0754695637067353, "grad_norm": 509.2854309082031, "learning_rate": 7.472000000000001e-06, "loss": 18.1768, "step": 37360 }, { "epoch": 0.0754897643394191, "grad_norm": 312.6730651855469, "learning_rate": 7.474e-06, "loss": 17.1089, "step": 37370 }, { "epoch": 0.07550996497210292, "grad_norm": 584.5643920898438, "learning_rate": 7.476000000000001e-06, "loss": 17.1565, "step": 37380 }, { "epoch": 0.07553016560478674, "grad_norm": 795.0660400390625, "learning_rate": 7.478000000000001e-06, "loss": 29.7825, "step": 37390 }, { "epoch": 0.07555036623747055, "grad_norm": 894.1143798828125, "learning_rate": 7.48e-06, "loss": 21.5171, "step": 37400 }, { "epoch": 0.07557056687015437, "grad_norm": 1436.5306396484375, "learning_rate": 7.4820000000000005e-06, "loss": 31.2726, "step": 37410 }, { "epoch": 0.07559076750283819, "grad_norm": 465.63702392578125, "learning_rate": 7.484e-06, "loss": 59.6799, "step": 37420 }, { "epoch": 0.07561096813552201, "grad_norm": 656.0516357421875, "learning_rate": 7.486000000000001e-06, "loss": 30.0095, "step": 37430 }, { "epoch": 0.07563116876820582, "grad_norm": 269.5618896484375, "learning_rate": 7.488000000000001e-06, "loss": 33.9991, "step": 37440 }, { "epoch": 0.07565136940088964, "grad_norm": 410.20654296875, "learning_rate": 7.49e-06, "loss": 38.3308, "step": 37450 }, { "epoch": 0.07567157003357346, "grad_norm": 316.13446044921875, "learning_rate": 7.4920000000000004e-06, "loss": 23.907, "step": 37460 }, { "epoch": 0.07569177066625726, "grad_norm": 325.43585205078125, "learning_rate": 7.494000000000001e-06, "loss": 25.014, "step": 37470 }, { "epoch": 0.07571197129894108, "grad_norm": 317.10723876953125, "learning_rate": 7.496000000000001e-06, "loss": 25.0031, "step": 37480 }, { "epoch": 0.0757321719316249, "grad_norm": 154.67532348632812, "learning_rate": 7.498000000000001e-06, "loss": 14.9261, "step": 37490 }, { "epoch": 0.07575237256430871, "grad_norm": 840.64013671875, "learning_rate": 7.500000000000001e-06, "loss": 28.7005, "step": 37500 }, { "epoch": 0.07577257319699253, "grad_norm": 220.19874572753906, "learning_rate": 7.502e-06, "loss": 18.3881, "step": 37510 }, { "epoch": 0.07579277382967635, "grad_norm": 475.55853271484375, "learning_rate": 7.5040000000000005e-06, "loss": 23.9347, "step": 37520 }, { "epoch": 0.07581297446236016, "grad_norm": 700.2653198242188, "learning_rate": 7.506000000000001e-06, "loss": 25.2646, "step": 37530 }, { "epoch": 0.07583317509504398, "grad_norm": 204.63330078125, "learning_rate": 7.508000000000001e-06, "loss": 20.2139, "step": 37540 }, { "epoch": 0.0758533757277278, "grad_norm": 661.1112060546875, "learning_rate": 7.510000000000001e-06, "loss": 14.1901, "step": 37550 }, { "epoch": 0.0758735763604116, "grad_norm": 962.9714965820312, "learning_rate": 7.512e-06, "loss": 27.8115, "step": 37560 }, { "epoch": 0.07589377699309542, "grad_norm": 300.95953369140625, "learning_rate": 7.514e-06, "loss": 17.9731, "step": 37570 }, { "epoch": 0.07591397762577924, "grad_norm": 0.0, "learning_rate": 7.516000000000001e-06, "loss": 16.0024, "step": 37580 }, { "epoch": 0.07593417825846306, "grad_norm": 1225.2772216796875, "learning_rate": 7.518000000000001e-06, "loss": 25.6451, "step": 37590 }, { "epoch": 0.07595437889114687, "grad_norm": 384.1285095214844, "learning_rate": 7.520000000000001e-06, "loss": 25.0455, "step": 37600 }, { "epoch": 0.07597457952383069, "grad_norm": 333.9071960449219, "learning_rate": 7.522e-06, "loss": 52.661, "step": 37610 }, { "epoch": 0.07599478015651451, "grad_norm": 674.0836791992188, "learning_rate": 7.524e-06, "loss": 31.7111, "step": 37620 }, { "epoch": 0.07601498078919831, "grad_norm": 640.943603515625, "learning_rate": 7.526000000000001e-06, "loss": 20.8616, "step": 37630 }, { "epoch": 0.07603518142188213, "grad_norm": 501.4905090332031, "learning_rate": 7.528000000000001e-06, "loss": 30.3123, "step": 37640 }, { "epoch": 0.07605538205456595, "grad_norm": 301.7007751464844, "learning_rate": 7.530000000000001e-06, "loss": 28.5057, "step": 37650 }, { "epoch": 0.07607558268724976, "grad_norm": 521.26953125, "learning_rate": 7.532e-06, "loss": 31.6623, "step": 37660 }, { "epoch": 0.07609578331993358, "grad_norm": 436.21356201171875, "learning_rate": 7.534e-06, "loss": 29.2193, "step": 37670 }, { "epoch": 0.0761159839526174, "grad_norm": 395.0469055175781, "learning_rate": 7.536000000000001e-06, "loss": 10.7322, "step": 37680 }, { "epoch": 0.0761361845853012, "grad_norm": 680.1278076171875, "learning_rate": 7.5380000000000005e-06, "loss": 26.2934, "step": 37690 }, { "epoch": 0.07615638521798503, "grad_norm": 171.3746337890625, "learning_rate": 7.540000000000001e-06, "loss": 28.6653, "step": 37700 }, { "epoch": 0.07617658585066885, "grad_norm": 638.10009765625, "learning_rate": 7.542000000000001e-06, "loss": 31.7623, "step": 37710 }, { "epoch": 0.07619678648335265, "grad_norm": 997.4443969726562, "learning_rate": 7.544e-06, "loss": 26.3061, "step": 37720 }, { "epoch": 0.07621698711603647, "grad_norm": 659.7255249023438, "learning_rate": 7.546000000000001e-06, "loss": 24.0984, "step": 37730 }, { "epoch": 0.07623718774872029, "grad_norm": 463.6721496582031, "learning_rate": 7.548000000000001e-06, "loss": 29.6272, "step": 37740 }, { "epoch": 0.07625738838140411, "grad_norm": 301.8927917480469, "learning_rate": 7.5500000000000006e-06, "loss": 23.1385, "step": 37750 }, { "epoch": 0.07627758901408792, "grad_norm": 422.1348876953125, "learning_rate": 7.552000000000001e-06, "loss": 21.7568, "step": 37760 }, { "epoch": 0.07629778964677174, "grad_norm": 290.8968200683594, "learning_rate": 7.554e-06, "loss": 26.4895, "step": 37770 }, { "epoch": 0.07631799027945556, "grad_norm": 453.95074462890625, "learning_rate": 7.556000000000001e-06, "loss": 24.2269, "step": 37780 }, { "epoch": 0.07633819091213936, "grad_norm": 1822.8984375, "learning_rate": 7.558000000000001e-06, "loss": 32.174, "step": 37790 }, { "epoch": 0.07635839154482318, "grad_norm": 482.6763000488281, "learning_rate": 7.5600000000000005e-06, "loss": 30.5826, "step": 37800 }, { "epoch": 0.076378592177507, "grad_norm": 306.6064147949219, "learning_rate": 7.562000000000001e-06, "loss": 25.7277, "step": 37810 }, { "epoch": 0.07639879281019081, "grad_norm": 189.5863494873047, "learning_rate": 7.564e-06, "loss": 26.8762, "step": 37820 }, { "epoch": 0.07641899344287463, "grad_norm": 473.7322692871094, "learning_rate": 7.566000000000001e-06, "loss": 34.0479, "step": 37830 }, { "epoch": 0.07643919407555845, "grad_norm": 84.52055358886719, "learning_rate": 7.568000000000001e-06, "loss": 49.771, "step": 37840 }, { "epoch": 0.07645939470824226, "grad_norm": 611.2411499023438, "learning_rate": 7.57e-06, "loss": 20.2221, "step": 37850 }, { "epoch": 0.07647959534092608, "grad_norm": 550.1533813476562, "learning_rate": 7.5720000000000005e-06, "loss": 22.9262, "step": 37860 }, { "epoch": 0.0764997959736099, "grad_norm": 524.09521484375, "learning_rate": 7.574e-06, "loss": 19.1146, "step": 37870 }, { "epoch": 0.0765199966062937, "grad_norm": 432.2309265136719, "learning_rate": 7.576000000000001e-06, "loss": 19.8361, "step": 37880 }, { "epoch": 0.07654019723897752, "grad_norm": 1506.61328125, "learning_rate": 7.578000000000001e-06, "loss": 33.7366, "step": 37890 }, { "epoch": 0.07656039787166134, "grad_norm": 379.86492919921875, "learning_rate": 7.58e-06, "loss": 27.6723, "step": 37900 }, { "epoch": 0.07658059850434516, "grad_norm": 290.55078125, "learning_rate": 7.582e-06, "loss": 25.5951, "step": 37910 }, { "epoch": 0.07660079913702897, "grad_norm": 501.2511291503906, "learning_rate": 7.5840000000000006e-06, "loss": 31.3163, "step": 37920 }, { "epoch": 0.07662099976971279, "grad_norm": 277.7619934082031, "learning_rate": 7.586000000000001e-06, "loss": 25.5508, "step": 37930 }, { "epoch": 0.07664120040239661, "grad_norm": 225.18263244628906, "learning_rate": 7.588000000000001e-06, "loss": 14.8932, "step": 37940 }, { "epoch": 0.07666140103508041, "grad_norm": 688.5034790039062, "learning_rate": 7.590000000000001e-06, "loss": 30.3668, "step": 37950 }, { "epoch": 0.07668160166776423, "grad_norm": 1253.2955322265625, "learning_rate": 7.592e-06, "loss": 25.6947, "step": 37960 }, { "epoch": 0.07670180230044805, "grad_norm": 444.8907775878906, "learning_rate": 7.5940000000000005e-06, "loss": 31.8752, "step": 37970 }, { "epoch": 0.07672200293313186, "grad_norm": 1363.9368896484375, "learning_rate": 7.5960000000000015e-06, "loss": 37.3974, "step": 37980 }, { "epoch": 0.07674220356581568, "grad_norm": 808.9375, "learning_rate": 7.598000000000001e-06, "loss": 44.4144, "step": 37990 }, { "epoch": 0.0767624041984995, "grad_norm": 914.0721435546875, "learning_rate": 7.600000000000001e-06, "loss": 44.3968, "step": 38000 }, { "epoch": 0.07678260483118331, "grad_norm": 372.5037536621094, "learning_rate": 7.602e-06, "loss": 11.9006, "step": 38010 }, { "epoch": 0.07680280546386713, "grad_norm": 615.7632446289062, "learning_rate": 7.604e-06, "loss": 23.4646, "step": 38020 }, { "epoch": 0.07682300609655095, "grad_norm": 372.4645690917969, "learning_rate": 7.606000000000001e-06, "loss": 23.2694, "step": 38030 }, { "epoch": 0.07684320672923475, "grad_norm": 420.320068359375, "learning_rate": 7.608000000000001e-06, "loss": 26.0293, "step": 38040 }, { "epoch": 0.07686340736191857, "grad_norm": 64.26026153564453, "learning_rate": 7.610000000000001e-06, "loss": 14.9356, "step": 38050 }, { "epoch": 0.0768836079946024, "grad_norm": 496.4963073730469, "learning_rate": 7.612e-06, "loss": 29.7388, "step": 38060 }, { "epoch": 0.07690380862728621, "grad_norm": 507.73486328125, "learning_rate": 7.614e-06, "loss": 35.2588, "step": 38070 }, { "epoch": 0.07692400925997002, "grad_norm": 749.43505859375, "learning_rate": 7.616000000000001e-06, "loss": 22.8871, "step": 38080 }, { "epoch": 0.07694420989265384, "grad_norm": 670.1237182617188, "learning_rate": 7.618000000000001e-06, "loss": 36.7901, "step": 38090 }, { "epoch": 0.07696441052533766, "grad_norm": 603.0145874023438, "learning_rate": 7.620000000000001e-06, "loss": 34.3796, "step": 38100 }, { "epoch": 0.07698461115802147, "grad_norm": 187.35418701171875, "learning_rate": 7.622000000000001e-06, "loss": 51.1629, "step": 38110 }, { "epoch": 0.07700481179070529, "grad_norm": 321.2225646972656, "learning_rate": 7.624e-06, "loss": 29.4901, "step": 38120 }, { "epoch": 0.0770250124233891, "grad_norm": 518.65478515625, "learning_rate": 7.626e-06, "loss": 17.8433, "step": 38130 }, { "epoch": 0.07704521305607291, "grad_norm": 843.0776977539062, "learning_rate": 7.628000000000001e-06, "loss": 29.3418, "step": 38140 }, { "epoch": 0.07706541368875673, "grad_norm": 927.7374877929688, "learning_rate": 7.630000000000001e-06, "loss": 28.081, "step": 38150 }, { "epoch": 0.07708561432144055, "grad_norm": 97.3183364868164, "learning_rate": 7.632e-06, "loss": 22.5968, "step": 38160 }, { "epoch": 0.07710581495412436, "grad_norm": 285.63079833984375, "learning_rate": 7.634e-06, "loss": 30.5677, "step": 38170 }, { "epoch": 0.07712601558680818, "grad_norm": 595.7236938476562, "learning_rate": 7.636e-06, "loss": 22.4367, "step": 38180 }, { "epoch": 0.077146216219492, "grad_norm": 415.005859375, "learning_rate": 7.638e-06, "loss": 19.8356, "step": 38190 }, { "epoch": 0.0771664168521758, "grad_norm": 306.705322265625, "learning_rate": 7.640000000000001e-06, "loss": 28.9574, "step": 38200 }, { "epoch": 0.07718661748485962, "grad_norm": 787.25439453125, "learning_rate": 7.642e-06, "loss": 30.5896, "step": 38210 }, { "epoch": 0.07720681811754344, "grad_norm": 534.6594848632812, "learning_rate": 7.644e-06, "loss": 18.7692, "step": 38220 }, { "epoch": 0.07722701875022726, "grad_norm": 585.4483032226562, "learning_rate": 7.646e-06, "loss": 15.8626, "step": 38230 }, { "epoch": 0.07724721938291107, "grad_norm": 363.90533447265625, "learning_rate": 7.648e-06, "loss": 25.8909, "step": 38240 }, { "epoch": 0.07726742001559489, "grad_norm": 1323.8687744140625, "learning_rate": 7.650000000000001e-06, "loss": 19.5509, "step": 38250 }, { "epoch": 0.07728762064827871, "grad_norm": 408.2150573730469, "learning_rate": 7.652e-06, "loss": 56.9264, "step": 38260 }, { "epoch": 0.07730782128096252, "grad_norm": 560.2208862304688, "learning_rate": 7.654e-06, "loss": 18.0374, "step": 38270 }, { "epoch": 0.07732802191364634, "grad_norm": 481.2164306640625, "learning_rate": 7.656000000000001e-06, "loss": 20.3647, "step": 38280 }, { "epoch": 0.07734822254633016, "grad_norm": 482.8931579589844, "learning_rate": 7.658e-06, "loss": 13.6266, "step": 38290 }, { "epoch": 0.07736842317901396, "grad_norm": 443.9976806640625, "learning_rate": 7.660000000000001e-06, "loss": 16.6408, "step": 38300 }, { "epoch": 0.07738862381169778, "grad_norm": 210.58482360839844, "learning_rate": 7.662e-06, "loss": 13.5715, "step": 38310 }, { "epoch": 0.0774088244443816, "grad_norm": 504.7493896484375, "learning_rate": 7.664e-06, "loss": 21.5304, "step": 38320 }, { "epoch": 0.07742902507706541, "grad_norm": 289.60467529296875, "learning_rate": 7.666e-06, "loss": 15.3804, "step": 38330 }, { "epoch": 0.07744922570974923, "grad_norm": 631.2286376953125, "learning_rate": 7.668000000000002e-06, "loss": 14.5907, "step": 38340 }, { "epoch": 0.07746942634243305, "grad_norm": 502.9691162109375, "learning_rate": 7.670000000000001e-06, "loss": 31.2539, "step": 38350 }, { "epoch": 0.07748962697511685, "grad_norm": 769.7857055664062, "learning_rate": 7.672e-06, "loss": 26.5301, "step": 38360 }, { "epoch": 0.07750982760780067, "grad_norm": 1897.988525390625, "learning_rate": 7.674e-06, "loss": 44.4017, "step": 38370 }, { "epoch": 0.0775300282404845, "grad_norm": 465.952880859375, "learning_rate": 7.676e-06, "loss": 37.5696, "step": 38380 }, { "epoch": 0.07755022887316831, "grad_norm": 1166.8406982421875, "learning_rate": 7.678000000000002e-06, "loss": 19.16, "step": 38390 }, { "epoch": 0.07757042950585212, "grad_norm": 434.853271484375, "learning_rate": 7.680000000000001e-06, "loss": 27.5619, "step": 38400 }, { "epoch": 0.07759063013853594, "grad_norm": 784.1865844726562, "learning_rate": 7.682e-06, "loss": 38.5985, "step": 38410 }, { "epoch": 0.07761083077121976, "grad_norm": 383.14056396484375, "learning_rate": 7.684e-06, "loss": 24.7993, "step": 38420 }, { "epoch": 0.07763103140390357, "grad_norm": 652.2393798828125, "learning_rate": 7.686e-06, "loss": 38.6169, "step": 38430 }, { "epoch": 0.07765123203658739, "grad_norm": 150.37486267089844, "learning_rate": 7.688000000000002e-06, "loss": 31.2974, "step": 38440 }, { "epoch": 0.0776714326692712, "grad_norm": 388.88787841796875, "learning_rate": 7.690000000000001e-06, "loss": 27.2446, "step": 38450 }, { "epoch": 0.07769163330195501, "grad_norm": 366.23748779296875, "learning_rate": 7.692e-06, "loss": 18.4109, "step": 38460 }, { "epoch": 0.07771183393463883, "grad_norm": 170.95111083984375, "learning_rate": 7.694e-06, "loss": 39.7728, "step": 38470 }, { "epoch": 0.07773203456732265, "grad_norm": 687.3958129882812, "learning_rate": 7.696e-06, "loss": 29.1284, "step": 38480 }, { "epoch": 0.07775223520000646, "grad_norm": 922.0366821289062, "learning_rate": 7.698000000000002e-06, "loss": 27.472, "step": 38490 }, { "epoch": 0.07777243583269028, "grad_norm": 536.3035888671875, "learning_rate": 7.7e-06, "loss": 20.7643, "step": 38500 }, { "epoch": 0.0777926364653741, "grad_norm": 902.5062866210938, "learning_rate": 7.702e-06, "loss": 37.1712, "step": 38510 }, { "epoch": 0.0778128370980579, "grad_norm": 434.46832275390625, "learning_rate": 7.704000000000001e-06, "loss": 34.8708, "step": 38520 }, { "epoch": 0.07783303773074172, "grad_norm": 1345.6851806640625, "learning_rate": 7.706e-06, "loss": 26.2673, "step": 38530 }, { "epoch": 0.07785323836342554, "grad_norm": 687.81591796875, "learning_rate": 7.708000000000001e-06, "loss": 30.508, "step": 38540 }, { "epoch": 0.07787343899610936, "grad_norm": 121.17589569091797, "learning_rate": 7.71e-06, "loss": 28.7651, "step": 38550 }, { "epoch": 0.07789363962879317, "grad_norm": 371.45782470703125, "learning_rate": 7.712e-06, "loss": 10.9951, "step": 38560 }, { "epoch": 0.07791384026147699, "grad_norm": 213.26759338378906, "learning_rate": 7.714000000000001e-06, "loss": 16.4614, "step": 38570 }, { "epoch": 0.07793404089416081, "grad_norm": 198.55184936523438, "learning_rate": 7.716e-06, "loss": 12.8972, "step": 38580 }, { "epoch": 0.07795424152684462, "grad_norm": 536.1241455078125, "learning_rate": 7.718000000000001e-06, "loss": 35.0614, "step": 38590 }, { "epoch": 0.07797444215952844, "grad_norm": 51.67824935913086, "learning_rate": 7.72e-06, "loss": 20.0434, "step": 38600 }, { "epoch": 0.07799464279221226, "grad_norm": 495.4625549316406, "learning_rate": 7.722e-06, "loss": 20.3598, "step": 38610 }, { "epoch": 0.07801484342489606, "grad_norm": 378.313232421875, "learning_rate": 7.724000000000001e-06, "loss": 22.1983, "step": 38620 }, { "epoch": 0.07803504405757988, "grad_norm": 619.906005859375, "learning_rate": 7.726e-06, "loss": 33.8938, "step": 38630 }, { "epoch": 0.0780552446902637, "grad_norm": 631.1979370117188, "learning_rate": 7.728000000000001e-06, "loss": 58.8985, "step": 38640 }, { "epoch": 0.07807544532294751, "grad_norm": 437.4310607910156, "learning_rate": 7.73e-06, "loss": 30.0452, "step": 38650 }, { "epoch": 0.07809564595563133, "grad_norm": 698.7709350585938, "learning_rate": 7.732e-06, "loss": 40.7639, "step": 38660 }, { "epoch": 0.07811584658831515, "grad_norm": 877.6707153320312, "learning_rate": 7.734e-06, "loss": 32.1464, "step": 38670 }, { "epoch": 0.07813604722099896, "grad_norm": 1028.7607421875, "learning_rate": 7.736e-06, "loss": 35.8577, "step": 38680 }, { "epoch": 0.07815624785368278, "grad_norm": 375.9439392089844, "learning_rate": 7.738000000000001e-06, "loss": 22.3812, "step": 38690 }, { "epoch": 0.0781764484863666, "grad_norm": 413.5697021484375, "learning_rate": 7.74e-06, "loss": 23.9866, "step": 38700 }, { "epoch": 0.07819664911905042, "grad_norm": 441.6208801269531, "learning_rate": 7.742000000000001e-06, "loss": 31.8802, "step": 38710 }, { "epoch": 0.07821684975173422, "grad_norm": 555.9667358398438, "learning_rate": 7.744e-06, "loss": 42.2381, "step": 38720 }, { "epoch": 0.07823705038441804, "grad_norm": 448.4810485839844, "learning_rate": 7.746e-06, "loss": 25.8219, "step": 38730 }, { "epoch": 0.07825725101710186, "grad_norm": 741.141357421875, "learning_rate": 7.748000000000001e-06, "loss": 22.2343, "step": 38740 }, { "epoch": 0.07827745164978567, "grad_norm": 1138.6595458984375, "learning_rate": 7.75e-06, "loss": 34.1904, "step": 38750 }, { "epoch": 0.07829765228246949, "grad_norm": 892.67138671875, "learning_rate": 7.752000000000001e-06, "loss": 40.0788, "step": 38760 }, { "epoch": 0.07831785291515331, "grad_norm": 363.2459411621094, "learning_rate": 7.754e-06, "loss": 30.8245, "step": 38770 }, { "epoch": 0.07833805354783711, "grad_norm": 301.23675537109375, "learning_rate": 7.756e-06, "loss": 40.9477, "step": 38780 }, { "epoch": 0.07835825418052093, "grad_norm": 0.0, "learning_rate": 7.758000000000001e-06, "loss": 22.1821, "step": 38790 }, { "epoch": 0.07837845481320475, "grad_norm": 2011.7015380859375, "learning_rate": 7.76e-06, "loss": 50.0491, "step": 38800 }, { "epoch": 0.07839865544588856, "grad_norm": 141.40940856933594, "learning_rate": 7.762000000000001e-06, "loss": 27.8431, "step": 38810 }, { "epoch": 0.07841885607857238, "grad_norm": 507.6337890625, "learning_rate": 7.764e-06, "loss": 37.4765, "step": 38820 }, { "epoch": 0.0784390567112562, "grad_norm": 957.7791137695312, "learning_rate": 7.766e-06, "loss": 44.3315, "step": 38830 }, { "epoch": 0.07845925734394, "grad_norm": 348.3440856933594, "learning_rate": 7.768e-06, "loss": 19.1895, "step": 38840 }, { "epoch": 0.07847945797662383, "grad_norm": 295.80853271484375, "learning_rate": 7.77e-06, "loss": 19.3399, "step": 38850 }, { "epoch": 0.07849965860930765, "grad_norm": 333.095947265625, "learning_rate": 7.772000000000001e-06, "loss": 40.4358, "step": 38860 }, { "epoch": 0.07851985924199147, "grad_norm": 905.6741943359375, "learning_rate": 7.774e-06, "loss": 31.8227, "step": 38870 }, { "epoch": 0.07854005987467527, "grad_norm": 240.92953491210938, "learning_rate": 7.776e-06, "loss": 15.9759, "step": 38880 }, { "epoch": 0.07856026050735909, "grad_norm": 402.4757080078125, "learning_rate": 7.778e-06, "loss": 29.6348, "step": 38890 }, { "epoch": 0.07858046114004291, "grad_norm": 0.0, "learning_rate": 7.78e-06, "loss": 30.8198, "step": 38900 }, { "epoch": 0.07860066177272672, "grad_norm": 806.3101196289062, "learning_rate": 7.782000000000001e-06, "loss": 20.9412, "step": 38910 }, { "epoch": 0.07862086240541054, "grad_norm": 77.98436737060547, "learning_rate": 7.784e-06, "loss": 18.515, "step": 38920 }, { "epoch": 0.07864106303809436, "grad_norm": 394.6458435058594, "learning_rate": 7.786e-06, "loss": 30.4633, "step": 38930 }, { "epoch": 0.07866126367077816, "grad_norm": 214.89698791503906, "learning_rate": 7.788e-06, "loss": 29.3289, "step": 38940 }, { "epoch": 0.07868146430346198, "grad_norm": 907.835693359375, "learning_rate": 7.790000000000002e-06, "loss": 21.0371, "step": 38950 }, { "epoch": 0.0787016649361458, "grad_norm": 1484.2606201171875, "learning_rate": 7.792000000000001e-06, "loss": 14.1647, "step": 38960 }, { "epoch": 0.07872186556882961, "grad_norm": 491.3181457519531, "learning_rate": 7.794e-06, "loss": 27.0969, "step": 38970 }, { "epoch": 0.07874206620151343, "grad_norm": 291.0534973144531, "learning_rate": 7.796e-06, "loss": 47.8539, "step": 38980 }, { "epoch": 0.07876226683419725, "grad_norm": 1481.9825439453125, "learning_rate": 7.798e-06, "loss": 29.2362, "step": 38990 }, { "epoch": 0.07878246746688106, "grad_norm": 337.2052917480469, "learning_rate": 7.800000000000002e-06, "loss": 24.4363, "step": 39000 }, { "epoch": 0.07880266809956488, "grad_norm": 1043.8558349609375, "learning_rate": 7.802000000000001e-06, "loss": 28.7056, "step": 39010 }, { "epoch": 0.0788228687322487, "grad_norm": 354.3676452636719, "learning_rate": 7.804e-06, "loss": 13.9814, "step": 39020 }, { "epoch": 0.07884306936493252, "grad_norm": 678.2699584960938, "learning_rate": 7.806e-06, "loss": 27.087, "step": 39030 }, { "epoch": 0.07886326999761632, "grad_norm": 736.5831298828125, "learning_rate": 7.808e-06, "loss": 23.6024, "step": 39040 }, { "epoch": 0.07888347063030014, "grad_norm": 760.6795654296875, "learning_rate": 7.810000000000001e-06, "loss": 22.7807, "step": 39050 }, { "epoch": 0.07890367126298396, "grad_norm": 691.625244140625, "learning_rate": 7.812e-06, "loss": 34.0978, "step": 39060 }, { "epoch": 0.07892387189566777, "grad_norm": 589.5587768554688, "learning_rate": 7.814e-06, "loss": 16.2141, "step": 39070 }, { "epoch": 0.07894407252835159, "grad_norm": 801.611328125, "learning_rate": 7.816000000000001e-06, "loss": 34.3691, "step": 39080 }, { "epoch": 0.07896427316103541, "grad_norm": 787.2601318359375, "learning_rate": 7.818e-06, "loss": 24.3882, "step": 39090 }, { "epoch": 0.07898447379371921, "grad_norm": 602.1470336914062, "learning_rate": 7.820000000000001e-06, "loss": 26.3296, "step": 39100 }, { "epoch": 0.07900467442640303, "grad_norm": 12.870512008666992, "learning_rate": 7.822e-06, "loss": 33.7834, "step": 39110 }, { "epoch": 0.07902487505908685, "grad_norm": 243.6926727294922, "learning_rate": 7.824e-06, "loss": 24.5471, "step": 39120 }, { "epoch": 0.07904507569177066, "grad_norm": 546.6156616210938, "learning_rate": 7.826000000000001e-06, "loss": 35.1595, "step": 39130 }, { "epoch": 0.07906527632445448, "grad_norm": 60.834861755371094, "learning_rate": 7.828000000000002e-06, "loss": 16.6797, "step": 39140 }, { "epoch": 0.0790854769571383, "grad_norm": 789.2298583984375, "learning_rate": 7.830000000000001e-06, "loss": 22.5289, "step": 39150 }, { "epoch": 0.07910567758982211, "grad_norm": 857.40380859375, "learning_rate": 7.832e-06, "loss": 17.3143, "step": 39160 }, { "epoch": 0.07912587822250593, "grad_norm": 490.2104797363281, "learning_rate": 7.834e-06, "loss": 16.5558, "step": 39170 }, { "epoch": 0.07914607885518975, "grad_norm": 827.0983276367188, "learning_rate": 7.836000000000001e-06, "loss": 14.9396, "step": 39180 }, { "epoch": 0.07916627948787357, "grad_norm": 579.365966796875, "learning_rate": 7.838000000000002e-06, "loss": 27.6349, "step": 39190 }, { "epoch": 0.07918648012055737, "grad_norm": 382.62188720703125, "learning_rate": 7.840000000000001e-06, "loss": 47.7565, "step": 39200 }, { "epoch": 0.0792066807532412, "grad_norm": 320.55560302734375, "learning_rate": 7.842e-06, "loss": 19.2766, "step": 39210 }, { "epoch": 0.07922688138592501, "grad_norm": 399.2356262207031, "learning_rate": 7.844e-06, "loss": 32.178, "step": 39220 }, { "epoch": 0.07924708201860882, "grad_norm": 368.3410949707031, "learning_rate": 7.846e-06, "loss": 17.7341, "step": 39230 }, { "epoch": 0.07926728265129264, "grad_norm": 279.0954284667969, "learning_rate": 7.848000000000002e-06, "loss": 22.8076, "step": 39240 }, { "epoch": 0.07928748328397646, "grad_norm": 974.9163208007812, "learning_rate": 7.850000000000001e-06, "loss": 33.6329, "step": 39250 }, { "epoch": 0.07930768391666027, "grad_norm": 469.6398010253906, "learning_rate": 7.852e-06, "loss": 23.4467, "step": 39260 }, { "epoch": 0.07932788454934409, "grad_norm": 507.614990234375, "learning_rate": 7.854e-06, "loss": 26.2057, "step": 39270 }, { "epoch": 0.0793480851820279, "grad_norm": 944.2115478515625, "learning_rate": 7.856e-06, "loss": 30.0214, "step": 39280 }, { "epoch": 0.07936828581471171, "grad_norm": 562.4888305664062, "learning_rate": 7.858000000000002e-06, "loss": 24.1479, "step": 39290 }, { "epoch": 0.07938848644739553, "grad_norm": 451.3738098144531, "learning_rate": 7.860000000000001e-06, "loss": 19.7802, "step": 39300 }, { "epoch": 0.07940868708007935, "grad_norm": 360.2376403808594, "learning_rate": 7.862e-06, "loss": 24.9804, "step": 39310 }, { "epoch": 0.07942888771276316, "grad_norm": 306.64459228515625, "learning_rate": 7.864000000000001e-06, "loss": 20.8103, "step": 39320 }, { "epoch": 0.07944908834544698, "grad_norm": 688.0297241210938, "learning_rate": 7.866e-06, "loss": 26.7215, "step": 39330 }, { "epoch": 0.0794692889781308, "grad_norm": 685.5408325195312, "learning_rate": 7.868000000000002e-06, "loss": 33.9052, "step": 39340 }, { "epoch": 0.07948948961081462, "grad_norm": 216.55715942382812, "learning_rate": 7.870000000000001e-06, "loss": 18.1017, "step": 39350 }, { "epoch": 0.07950969024349842, "grad_norm": 402.8645935058594, "learning_rate": 7.872e-06, "loss": 24.513, "step": 39360 }, { "epoch": 0.07952989087618224, "grad_norm": 946.2855224609375, "learning_rate": 7.874000000000001e-06, "loss": 45.8529, "step": 39370 }, { "epoch": 0.07955009150886606, "grad_norm": 129.7798614501953, "learning_rate": 7.876e-06, "loss": 22.3973, "step": 39380 }, { "epoch": 0.07957029214154987, "grad_norm": 50.45730972290039, "learning_rate": 7.878e-06, "loss": 25.0302, "step": 39390 }, { "epoch": 0.07959049277423369, "grad_norm": 310.8236999511719, "learning_rate": 7.88e-06, "loss": 12.803, "step": 39400 }, { "epoch": 0.07961069340691751, "grad_norm": 492.551513671875, "learning_rate": 7.882e-06, "loss": 21.0392, "step": 39410 }, { "epoch": 0.07963089403960132, "grad_norm": 242.3144073486328, "learning_rate": 7.884000000000001e-06, "loss": 30.2967, "step": 39420 }, { "epoch": 0.07965109467228514, "grad_norm": 1357.2841796875, "learning_rate": 7.886e-06, "loss": 27.1293, "step": 39430 }, { "epoch": 0.07967129530496896, "grad_norm": 923.7060546875, "learning_rate": 7.888e-06, "loss": 18.4986, "step": 39440 }, { "epoch": 0.07969149593765276, "grad_norm": 380.8133544921875, "learning_rate": 7.89e-06, "loss": 29.5138, "step": 39450 }, { "epoch": 0.07971169657033658, "grad_norm": 702.3418579101562, "learning_rate": 7.892e-06, "loss": 36.8634, "step": 39460 }, { "epoch": 0.0797318972030204, "grad_norm": 795.5443725585938, "learning_rate": 7.894000000000001e-06, "loss": 34.3167, "step": 39470 }, { "epoch": 0.07975209783570421, "grad_norm": 436.4936218261719, "learning_rate": 7.896e-06, "loss": 18.4773, "step": 39480 }, { "epoch": 0.07977229846838803, "grad_norm": 289.3657531738281, "learning_rate": 7.898e-06, "loss": 18.9045, "step": 39490 }, { "epoch": 0.07979249910107185, "grad_norm": 681.8690185546875, "learning_rate": 7.9e-06, "loss": 21.1877, "step": 39500 }, { "epoch": 0.07981269973375565, "grad_norm": 562.03125, "learning_rate": 7.902000000000002e-06, "loss": 22.3586, "step": 39510 }, { "epoch": 0.07983290036643947, "grad_norm": 277.8089599609375, "learning_rate": 7.904000000000001e-06, "loss": 30.3118, "step": 39520 }, { "epoch": 0.0798531009991233, "grad_norm": 378.3243713378906, "learning_rate": 7.906e-06, "loss": 35.7515, "step": 39530 }, { "epoch": 0.07987330163180711, "grad_norm": 674.659912109375, "learning_rate": 7.908e-06, "loss": 24.888, "step": 39540 }, { "epoch": 0.07989350226449092, "grad_norm": 233.80078125, "learning_rate": 7.91e-06, "loss": 21.2106, "step": 39550 }, { "epoch": 0.07991370289717474, "grad_norm": 414.6405944824219, "learning_rate": 7.912000000000001e-06, "loss": 17.9159, "step": 39560 }, { "epoch": 0.07993390352985856, "grad_norm": 317.096435546875, "learning_rate": 7.914e-06, "loss": 19.6499, "step": 39570 }, { "epoch": 0.07995410416254237, "grad_norm": 554.5511474609375, "learning_rate": 7.916e-06, "loss": 19.308, "step": 39580 }, { "epoch": 0.07997430479522619, "grad_norm": 446.277587890625, "learning_rate": 7.918e-06, "loss": 17.1525, "step": 39590 }, { "epoch": 0.07999450542791, "grad_norm": 869.7025756835938, "learning_rate": 7.92e-06, "loss": 29.8401, "step": 39600 }, { "epoch": 0.08001470606059381, "grad_norm": 195.5814666748047, "learning_rate": 7.922000000000001e-06, "loss": 19.0599, "step": 39610 }, { "epoch": 0.08003490669327763, "grad_norm": 928.6428833007812, "learning_rate": 7.924e-06, "loss": 43.4772, "step": 39620 }, { "epoch": 0.08005510732596145, "grad_norm": 448.8506164550781, "learning_rate": 7.926e-06, "loss": 27.3181, "step": 39630 }, { "epoch": 0.08007530795864526, "grad_norm": 295.6615295410156, "learning_rate": 7.928e-06, "loss": 31.1969, "step": 39640 }, { "epoch": 0.08009550859132908, "grad_norm": 844.4463500976562, "learning_rate": 7.93e-06, "loss": 31.1826, "step": 39650 }, { "epoch": 0.0801157092240129, "grad_norm": 1275.2359619140625, "learning_rate": 7.932000000000001e-06, "loss": 60.3057, "step": 39660 }, { "epoch": 0.0801359098566967, "grad_norm": 669.5518188476562, "learning_rate": 7.934e-06, "loss": 32.1826, "step": 39670 }, { "epoch": 0.08015611048938052, "grad_norm": 370.1067810058594, "learning_rate": 7.936e-06, "loss": 48.7568, "step": 39680 }, { "epoch": 0.08017631112206434, "grad_norm": 85.74179077148438, "learning_rate": 7.938000000000001e-06, "loss": 32.242, "step": 39690 }, { "epoch": 0.08019651175474816, "grad_norm": 449.3262023925781, "learning_rate": 7.94e-06, "loss": 35.4012, "step": 39700 }, { "epoch": 0.08021671238743197, "grad_norm": 253.4001922607422, "learning_rate": 7.942000000000001e-06, "loss": 9.3638, "step": 39710 }, { "epoch": 0.08023691302011579, "grad_norm": 427.928955078125, "learning_rate": 7.944e-06, "loss": 20.7755, "step": 39720 }, { "epoch": 0.08025711365279961, "grad_norm": 963.2529296875, "learning_rate": 7.946e-06, "loss": 20.6616, "step": 39730 }, { "epoch": 0.08027731428548342, "grad_norm": 557.8382568359375, "learning_rate": 7.948e-06, "loss": 19.3359, "step": 39740 }, { "epoch": 0.08029751491816724, "grad_norm": 347.09466552734375, "learning_rate": 7.950000000000002e-06, "loss": 36.1432, "step": 39750 }, { "epoch": 0.08031771555085106, "grad_norm": 204.5667724609375, "learning_rate": 7.952000000000001e-06, "loss": 28.8233, "step": 39760 }, { "epoch": 0.08033791618353486, "grad_norm": 401.3494873046875, "learning_rate": 7.954e-06, "loss": 28.2676, "step": 39770 }, { "epoch": 0.08035811681621868, "grad_norm": 305.70440673828125, "learning_rate": 7.956e-06, "loss": 37.6146, "step": 39780 }, { "epoch": 0.0803783174489025, "grad_norm": 255.9058837890625, "learning_rate": 7.958e-06, "loss": 41.6842, "step": 39790 }, { "epoch": 0.08039851808158631, "grad_norm": 766.6397705078125, "learning_rate": 7.960000000000002e-06, "loss": 13.5986, "step": 39800 }, { "epoch": 0.08041871871427013, "grad_norm": 427.0234069824219, "learning_rate": 7.962000000000001e-06, "loss": 40.9575, "step": 39810 }, { "epoch": 0.08043891934695395, "grad_norm": 841.5509033203125, "learning_rate": 7.964e-06, "loss": 58.3562, "step": 39820 }, { "epoch": 0.08045911997963776, "grad_norm": 283.16986083984375, "learning_rate": 7.966e-06, "loss": 18.2624, "step": 39830 }, { "epoch": 0.08047932061232158, "grad_norm": 504.2042236328125, "learning_rate": 7.968e-06, "loss": 22.2934, "step": 39840 }, { "epoch": 0.0804995212450054, "grad_norm": 275.0429992675781, "learning_rate": 7.970000000000002e-06, "loss": 21.0991, "step": 39850 }, { "epoch": 0.08051972187768922, "grad_norm": 710.8739624023438, "learning_rate": 7.972000000000001e-06, "loss": 18.4247, "step": 39860 }, { "epoch": 0.08053992251037302, "grad_norm": 728.702880859375, "learning_rate": 7.974e-06, "loss": 22.7606, "step": 39870 }, { "epoch": 0.08056012314305684, "grad_norm": 521.3509521484375, "learning_rate": 7.976000000000001e-06, "loss": 21.6127, "step": 39880 }, { "epoch": 0.08058032377574066, "grad_norm": 398.2906188964844, "learning_rate": 7.978e-06, "loss": 22.7838, "step": 39890 }, { "epoch": 0.08060052440842447, "grad_norm": 405.0233154296875, "learning_rate": 7.980000000000002e-06, "loss": 20.4245, "step": 39900 }, { "epoch": 0.08062072504110829, "grad_norm": 275.5707702636719, "learning_rate": 7.982e-06, "loss": 49.7182, "step": 39910 }, { "epoch": 0.08064092567379211, "grad_norm": 360.2571716308594, "learning_rate": 7.984e-06, "loss": 16.3727, "step": 39920 }, { "epoch": 0.08066112630647591, "grad_norm": 792.240478515625, "learning_rate": 7.986000000000001e-06, "loss": 40.9525, "step": 39930 }, { "epoch": 0.08068132693915973, "grad_norm": 495.4645690917969, "learning_rate": 7.988e-06, "loss": 41.8557, "step": 39940 }, { "epoch": 0.08070152757184355, "grad_norm": 78.95877838134766, "learning_rate": 7.990000000000001e-06, "loss": 31.4824, "step": 39950 }, { "epoch": 0.08072172820452736, "grad_norm": 834.5568237304688, "learning_rate": 7.992e-06, "loss": 31.6639, "step": 39960 }, { "epoch": 0.08074192883721118, "grad_norm": 1125.6707763671875, "learning_rate": 7.994e-06, "loss": 22.9925, "step": 39970 }, { "epoch": 0.080762129469895, "grad_norm": 341.31854248046875, "learning_rate": 7.996000000000001e-06, "loss": 20.0642, "step": 39980 }, { "epoch": 0.0807823301025788, "grad_norm": 537.1127319335938, "learning_rate": 7.998e-06, "loss": 39.629, "step": 39990 }, { "epoch": 0.08080253073526263, "grad_norm": 627.4376831054688, "learning_rate": 8.000000000000001e-06, "loss": 10.9614, "step": 40000 }, { "epoch": 0.08082273136794645, "grad_norm": 1759.805908203125, "learning_rate": 8.002e-06, "loss": 34.1295, "step": 40010 }, { "epoch": 0.08084293200063027, "grad_norm": 330.1541748046875, "learning_rate": 8.004e-06, "loss": 38.8135, "step": 40020 }, { "epoch": 0.08086313263331407, "grad_norm": 339.01983642578125, "learning_rate": 8.006000000000001e-06, "loss": 19.9088, "step": 40030 }, { "epoch": 0.08088333326599789, "grad_norm": 237.59225463867188, "learning_rate": 8.008e-06, "loss": 28.8385, "step": 40040 }, { "epoch": 0.08090353389868171, "grad_norm": 666.0283813476562, "learning_rate": 8.010000000000001e-06, "loss": 27.669, "step": 40050 }, { "epoch": 0.08092373453136552, "grad_norm": 650.1888427734375, "learning_rate": 8.012e-06, "loss": 34.3803, "step": 40060 }, { "epoch": 0.08094393516404934, "grad_norm": 311.5391845703125, "learning_rate": 8.014e-06, "loss": 41.4724, "step": 40070 }, { "epoch": 0.08096413579673316, "grad_norm": 422.935791015625, "learning_rate": 8.016e-06, "loss": 24.8886, "step": 40080 }, { "epoch": 0.08098433642941696, "grad_norm": 1059.035400390625, "learning_rate": 8.018e-06, "loss": 43.1621, "step": 40090 }, { "epoch": 0.08100453706210078, "grad_norm": 480.57464599609375, "learning_rate": 8.020000000000001e-06, "loss": 14.7568, "step": 40100 }, { "epoch": 0.0810247376947846, "grad_norm": 879.3117065429688, "learning_rate": 8.022e-06, "loss": 30.1447, "step": 40110 }, { "epoch": 0.08104493832746841, "grad_norm": 210.75437927246094, "learning_rate": 8.024000000000001e-06, "loss": 25.4639, "step": 40120 }, { "epoch": 0.08106513896015223, "grad_norm": 235.2727813720703, "learning_rate": 8.026e-06, "loss": 31.0911, "step": 40130 }, { "epoch": 0.08108533959283605, "grad_norm": 977.4114379882812, "learning_rate": 8.028e-06, "loss": 55.1275, "step": 40140 }, { "epoch": 0.08110554022551986, "grad_norm": 594.6533203125, "learning_rate": 8.030000000000001e-06, "loss": 24.6549, "step": 40150 }, { "epoch": 0.08112574085820368, "grad_norm": 637.489013671875, "learning_rate": 8.032e-06, "loss": 44.8955, "step": 40160 }, { "epoch": 0.0811459414908875, "grad_norm": 305.0829162597656, "learning_rate": 8.034000000000001e-06, "loss": 27.1901, "step": 40170 }, { "epoch": 0.08116614212357132, "grad_norm": 196.19229125976562, "learning_rate": 8.036e-06, "loss": 25.8945, "step": 40180 }, { "epoch": 0.08118634275625512, "grad_norm": 977.69580078125, "learning_rate": 8.038e-06, "loss": 27.8558, "step": 40190 }, { "epoch": 0.08120654338893894, "grad_norm": 780.17138671875, "learning_rate": 8.040000000000001e-06, "loss": 23.9422, "step": 40200 }, { "epoch": 0.08122674402162276, "grad_norm": 611.3806762695312, "learning_rate": 8.042e-06, "loss": 28.3511, "step": 40210 }, { "epoch": 0.08124694465430657, "grad_norm": 622.1024780273438, "learning_rate": 8.044000000000001e-06, "loss": 21.6604, "step": 40220 }, { "epoch": 0.08126714528699039, "grad_norm": 918.8472900390625, "learning_rate": 8.046e-06, "loss": 26.455, "step": 40230 }, { "epoch": 0.08128734591967421, "grad_norm": 341.0751037597656, "learning_rate": 8.048e-06, "loss": 28.0223, "step": 40240 }, { "epoch": 0.08130754655235801, "grad_norm": 465.3175964355469, "learning_rate": 8.050000000000001e-06, "loss": 29.9542, "step": 40250 }, { "epoch": 0.08132774718504183, "grad_norm": 136.3564453125, "learning_rate": 8.052e-06, "loss": 32.5239, "step": 40260 }, { "epoch": 0.08134794781772565, "grad_norm": 725.799072265625, "learning_rate": 8.054000000000001e-06, "loss": 43.8185, "step": 40270 }, { "epoch": 0.08136814845040946, "grad_norm": 1356.5460205078125, "learning_rate": 8.056e-06, "loss": 24.7778, "step": 40280 }, { "epoch": 0.08138834908309328, "grad_norm": 517.8104248046875, "learning_rate": 8.058e-06, "loss": 35.1107, "step": 40290 }, { "epoch": 0.0814085497157771, "grad_norm": 297.4921875, "learning_rate": 8.06e-06, "loss": 25.8174, "step": 40300 }, { "epoch": 0.08142875034846091, "grad_norm": 340.6484375, "learning_rate": 8.062000000000002e-06, "loss": 25.051, "step": 40310 }, { "epoch": 0.08144895098114473, "grad_norm": 443.00933837890625, "learning_rate": 8.064000000000001e-06, "loss": 20.8189, "step": 40320 }, { "epoch": 0.08146915161382855, "grad_norm": 438.75347900390625, "learning_rate": 8.066e-06, "loss": 22.4965, "step": 40330 }, { "epoch": 0.08148935224651237, "grad_norm": 481.3415222167969, "learning_rate": 8.068e-06, "loss": 23.0569, "step": 40340 }, { "epoch": 0.08150955287919617, "grad_norm": 1392.9268798828125, "learning_rate": 8.07e-06, "loss": 41.2481, "step": 40350 }, { "epoch": 0.08152975351188, "grad_norm": 249.26956176757812, "learning_rate": 8.072000000000002e-06, "loss": 20.832, "step": 40360 }, { "epoch": 0.08154995414456381, "grad_norm": 21843.982421875, "learning_rate": 8.074000000000001e-06, "loss": 163.6426, "step": 40370 }, { "epoch": 0.08157015477724762, "grad_norm": 975.552490234375, "learning_rate": 8.076e-06, "loss": 36.2325, "step": 40380 }, { "epoch": 0.08159035540993144, "grad_norm": 376.71392822265625, "learning_rate": 8.078e-06, "loss": 27.1425, "step": 40390 }, { "epoch": 0.08161055604261526, "grad_norm": 438.13592529296875, "learning_rate": 8.08e-06, "loss": 26.5299, "step": 40400 }, { "epoch": 0.08163075667529907, "grad_norm": 700.2650756835938, "learning_rate": 8.082000000000002e-06, "loss": 19.8693, "step": 40410 }, { "epoch": 0.08165095730798289, "grad_norm": 253.17955017089844, "learning_rate": 8.084000000000001e-06, "loss": 27.1577, "step": 40420 }, { "epoch": 0.0816711579406667, "grad_norm": 170.56356811523438, "learning_rate": 8.086e-06, "loss": 17.2469, "step": 40430 }, { "epoch": 0.08169135857335051, "grad_norm": 399.56024169921875, "learning_rate": 8.088e-06, "loss": 44.8697, "step": 40440 }, { "epoch": 0.08171155920603433, "grad_norm": 262.854736328125, "learning_rate": 8.09e-06, "loss": 41.7183, "step": 40450 }, { "epoch": 0.08173175983871815, "grad_norm": 2068.342041015625, "learning_rate": 8.092000000000001e-06, "loss": 30.9143, "step": 40460 }, { "epoch": 0.08175196047140196, "grad_norm": 0.0, "learning_rate": 8.094e-06, "loss": 24.21, "step": 40470 }, { "epoch": 0.08177216110408578, "grad_norm": 625.5025024414062, "learning_rate": 8.096e-06, "loss": 44.1145, "step": 40480 }, { "epoch": 0.0817923617367696, "grad_norm": 492.1724853515625, "learning_rate": 8.098000000000001e-06, "loss": 18.3544, "step": 40490 }, { "epoch": 0.08181256236945342, "grad_norm": 634.8037109375, "learning_rate": 8.1e-06, "loss": 24.2937, "step": 40500 }, { "epoch": 0.08183276300213722, "grad_norm": 666.6829833984375, "learning_rate": 8.102000000000001e-06, "loss": 26.6929, "step": 40510 }, { "epoch": 0.08185296363482104, "grad_norm": 754.0609130859375, "learning_rate": 8.104e-06, "loss": 28.598, "step": 40520 }, { "epoch": 0.08187316426750486, "grad_norm": 714.6564331054688, "learning_rate": 8.106e-06, "loss": 29.988, "step": 40530 }, { "epoch": 0.08189336490018867, "grad_norm": 269.427001953125, "learning_rate": 8.108000000000001e-06, "loss": 21.3572, "step": 40540 }, { "epoch": 0.08191356553287249, "grad_norm": 756.2377319335938, "learning_rate": 8.110000000000002e-06, "loss": 22.3681, "step": 40550 }, { "epoch": 0.08193376616555631, "grad_norm": 526.88427734375, "learning_rate": 8.112000000000001e-06, "loss": 44.0786, "step": 40560 }, { "epoch": 0.08195396679824012, "grad_norm": 170.62673950195312, "learning_rate": 8.114e-06, "loss": 30.5698, "step": 40570 }, { "epoch": 0.08197416743092394, "grad_norm": 158.55050659179688, "learning_rate": 8.116e-06, "loss": 11.1812, "step": 40580 }, { "epoch": 0.08199436806360776, "grad_norm": 1319.344970703125, "learning_rate": 8.118000000000001e-06, "loss": 23.4195, "step": 40590 }, { "epoch": 0.08201456869629156, "grad_norm": 318.0577392578125, "learning_rate": 8.120000000000002e-06, "loss": 15.1838, "step": 40600 }, { "epoch": 0.08203476932897538, "grad_norm": 1007.6936645507812, "learning_rate": 8.122000000000001e-06, "loss": 42.0716, "step": 40610 }, { "epoch": 0.0820549699616592, "grad_norm": 743.8515014648438, "learning_rate": 8.124e-06, "loss": 32.5392, "step": 40620 }, { "epoch": 0.08207517059434301, "grad_norm": 240.04466247558594, "learning_rate": 8.126e-06, "loss": 11.0315, "step": 40630 }, { "epoch": 0.08209537122702683, "grad_norm": 562.1881103515625, "learning_rate": 8.128e-06, "loss": 33.486, "step": 40640 }, { "epoch": 0.08211557185971065, "grad_norm": 497.35400390625, "learning_rate": 8.13e-06, "loss": 31.8643, "step": 40650 }, { "epoch": 0.08213577249239447, "grad_norm": 762.3814086914062, "learning_rate": 8.132000000000001e-06, "loss": 21.6367, "step": 40660 }, { "epoch": 0.08215597312507827, "grad_norm": 468.392578125, "learning_rate": 8.134e-06, "loss": 23.6273, "step": 40670 }, { "epoch": 0.0821761737577621, "grad_norm": 641.936279296875, "learning_rate": 8.136000000000001e-06, "loss": 23.3873, "step": 40680 }, { "epoch": 0.08219637439044591, "grad_norm": 321.6557922363281, "learning_rate": 8.138e-06, "loss": 24.4871, "step": 40690 }, { "epoch": 0.08221657502312972, "grad_norm": 459.4052734375, "learning_rate": 8.14e-06, "loss": 25.6145, "step": 40700 }, { "epoch": 0.08223677565581354, "grad_norm": 196.30247497558594, "learning_rate": 8.142000000000001e-06, "loss": 27.1357, "step": 40710 }, { "epoch": 0.08225697628849736, "grad_norm": 385.11175537109375, "learning_rate": 8.144e-06, "loss": 24.2411, "step": 40720 }, { "epoch": 0.08227717692118117, "grad_norm": 504.1175537109375, "learning_rate": 8.146000000000001e-06, "loss": 43.324, "step": 40730 }, { "epoch": 0.08229737755386499, "grad_norm": 414.9750061035156, "learning_rate": 8.148e-06, "loss": 32.9271, "step": 40740 }, { "epoch": 0.0823175781865488, "grad_norm": 504.9588623046875, "learning_rate": 8.15e-06, "loss": 13.1979, "step": 40750 }, { "epoch": 0.08233777881923261, "grad_norm": 1631.643310546875, "learning_rate": 8.152000000000001e-06, "loss": 35.6901, "step": 40760 }, { "epoch": 0.08235797945191643, "grad_norm": 403.29083251953125, "learning_rate": 8.154e-06, "loss": 33.276, "step": 40770 }, { "epoch": 0.08237818008460025, "grad_norm": 369.1541748046875, "learning_rate": 8.156000000000001e-06, "loss": 25.6719, "step": 40780 }, { "epoch": 0.08239838071728406, "grad_norm": 772.0247802734375, "learning_rate": 8.158e-06, "loss": 32.0478, "step": 40790 }, { "epoch": 0.08241858134996788, "grad_norm": 72.48571014404297, "learning_rate": 8.16e-06, "loss": 23.0447, "step": 40800 }, { "epoch": 0.0824387819826517, "grad_norm": 208.5259246826172, "learning_rate": 8.162e-06, "loss": 16.0053, "step": 40810 }, { "epoch": 0.08245898261533552, "grad_norm": 603.0857543945312, "learning_rate": 8.164e-06, "loss": 23.1499, "step": 40820 }, { "epoch": 0.08247918324801932, "grad_norm": 1333.773193359375, "learning_rate": 8.166000000000001e-06, "loss": 43.6377, "step": 40830 }, { "epoch": 0.08249938388070314, "grad_norm": 736.5938720703125, "learning_rate": 8.168e-06, "loss": 29.6003, "step": 40840 }, { "epoch": 0.08251958451338696, "grad_norm": 653.9880981445312, "learning_rate": 8.17e-06, "loss": 22.1198, "step": 40850 }, { "epoch": 0.08253978514607077, "grad_norm": 335.3724365234375, "learning_rate": 8.172e-06, "loss": 25.5172, "step": 40860 }, { "epoch": 0.08255998577875459, "grad_norm": 306.0758972167969, "learning_rate": 8.174e-06, "loss": 22.6132, "step": 40870 }, { "epoch": 0.08258018641143841, "grad_norm": 241.45751953125, "learning_rate": 8.176000000000001e-06, "loss": 27.1, "step": 40880 }, { "epoch": 0.08260038704412222, "grad_norm": 831.9751586914062, "learning_rate": 8.178e-06, "loss": 22.5524, "step": 40890 }, { "epoch": 0.08262058767680604, "grad_norm": 880.759765625, "learning_rate": 8.18e-06, "loss": 29.0195, "step": 40900 }, { "epoch": 0.08264078830948986, "grad_norm": 794.4937744140625, "learning_rate": 8.182e-06, "loss": 21.1029, "step": 40910 }, { "epoch": 0.08266098894217366, "grad_norm": 198.36871337890625, "learning_rate": 8.184000000000002e-06, "loss": 35.888, "step": 40920 }, { "epoch": 0.08268118957485748, "grad_norm": 371.2920837402344, "learning_rate": 8.186000000000001e-06, "loss": 29.9417, "step": 40930 }, { "epoch": 0.0827013902075413, "grad_norm": 761.3952026367188, "learning_rate": 8.188e-06, "loss": 27.3552, "step": 40940 }, { "epoch": 0.08272159084022511, "grad_norm": 397.6307067871094, "learning_rate": 8.19e-06, "loss": 25.9441, "step": 40950 }, { "epoch": 0.08274179147290893, "grad_norm": 450.3551025390625, "learning_rate": 8.192e-06, "loss": 18.6319, "step": 40960 }, { "epoch": 0.08276199210559275, "grad_norm": 1369.5721435546875, "learning_rate": 8.194000000000002e-06, "loss": 43.8313, "step": 40970 }, { "epoch": 0.08278219273827657, "grad_norm": 1892.084716796875, "learning_rate": 8.196e-06, "loss": 28.8405, "step": 40980 }, { "epoch": 0.08280239337096038, "grad_norm": 391.256103515625, "learning_rate": 8.198e-06, "loss": 15.2958, "step": 40990 }, { "epoch": 0.0828225940036442, "grad_norm": 531.6309204101562, "learning_rate": 8.2e-06, "loss": 55.6875, "step": 41000 }, { "epoch": 0.08284279463632802, "grad_norm": 297.855224609375, "learning_rate": 8.202e-06, "loss": 57.8646, "step": 41010 }, { "epoch": 0.08286299526901182, "grad_norm": 479.3824157714844, "learning_rate": 8.204000000000001e-06, "loss": 21.4301, "step": 41020 }, { "epoch": 0.08288319590169564, "grad_norm": 607.3418579101562, "learning_rate": 8.206e-06, "loss": 18.8603, "step": 41030 }, { "epoch": 0.08290339653437946, "grad_norm": 906.0545043945312, "learning_rate": 8.208e-06, "loss": 19.5641, "step": 41040 }, { "epoch": 0.08292359716706327, "grad_norm": 424.09490966796875, "learning_rate": 8.210000000000001e-06, "loss": 27.5386, "step": 41050 }, { "epoch": 0.08294379779974709, "grad_norm": 570.6898803710938, "learning_rate": 8.212e-06, "loss": 42.5885, "step": 41060 }, { "epoch": 0.08296399843243091, "grad_norm": 418.614990234375, "learning_rate": 8.214000000000001e-06, "loss": 33.9309, "step": 41070 }, { "epoch": 0.08298419906511471, "grad_norm": 165.93856811523438, "learning_rate": 8.216e-06, "loss": 9.762, "step": 41080 }, { "epoch": 0.08300439969779853, "grad_norm": 193.1525421142578, "learning_rate": 8.218e-06, "loss": 18.7477, "step": 41090 }, { "epoch": 0.08302460033048235, "grad_norm": 979.6331787109375, "learning_rate": 8.220000000000001e-06, "loss": 25.1848, "step": 41100 }, { "epoch": 0.08304480096316616, "grad_norm": 431.3668212890625, "learning_rate": 8.222000000000002e-06, "loss": 29.4433, "step": 41110 }, { "epoch": 0.08306500159584998, "grad_norm": 455.6502990722656, "learning_rate": 8.224000000000001e-06, "loss": 26.0422, "step": 41120 }, { "epoch": 0.0830852022285338, "grad_norm": 230.00115966796875, "learning_rate": 8.226e-06, "loss": 17.0421, "step": 41130 }, { "epoch": 0.08310540286121762, "grad_norm": 495.62384033203125, "learning_rate": 8.228e-06, "loss": 9.0154, "step": 41140 }, { "epoch": 0.08312560349390143, "grad_norm": 102.5909423828125, "learning_rate": 8.23e-06, "loss": 37.1204, "step": 41150 }, { "epoch": 0.08314580412658525, "grad_norm": 502.5970764160156, "learning_rate": 8.232000000000002e-06, "loss": 39.7973, "step": 41160 }, { "epoch": 0.08316600475926907, "grad_norm": 378.26171875, "learning_rate": 8.234000000000001e-06, "loss": 16.8596, "step": 41170 }, { "epoch": 0.08318620539195287, "grad_norm": 492.6539306640625, "learning_rate": 8.236e-06, "loss": 41.4776, "step": 41180 }, { "epoch": 0.08320640602463669, "grad_norm": 383.61334228515625, "learning_rate": 8.238e-06, "loss": 45.7282, "step": 41190 }, { "epoch": 0.08322660665732051, "grad_norm": 531.486083984375, "learning_rate": 8.24e-06, "loss": 34.7144, "step": 41200 }, { "epoch": 0.08324680729000432, "grad_norm": 308.7742004394531, "learning_rate": 8.242000000000002e-06, "loss": 52.3713, "step": 41210 }, { "epoch": 0.08326700792268814, "grad_norm": 354.1903991699219, "learning_rate": 8.244000000000001e-06, "loss": 37.182, "step": 41220 }, { "epoch": 0.08328720855537196, "grad_norm": 454.5882263183594, "learning_rate": 8.246e-06, "loss": 38.7506, "step": 41230 }, { "epoch": 0.08330740918805576, "grad_norm": 902.9432983398438, "learning_rate": 8.248e-06, "loss": 23.8521, "step": 41240 }, { "epoch": 0.08332760982073958, "grad_norm": 92.36296081542969, "learning_rate": 8.25e-06, "loss": 27.6562, "step": 41250 }, { "epoch": 0.0833478104534234, "grad_norm": 790.8438110351562, "learning_rate": 8.252000000000002e-06, "loss": 37.9746, "step": 41260 }, { "epoch": 0.08336801108610721, "grad_norm": 457.1481628417969, "learning_rate": 8.254000000000001e-06, "loss": 28.6681, "step": 41270 }, { "epoch": 0.08338821171879103, "grad_norm": 494.378173828125, "learning_rate": 8.256e-06, "loss": 25.9664, "step": 41280 }, { "epoch": 0.08340841235147485, "grad_norm": 270.1199951171875, "learning_rate": 8.258000000000001e-06, "loss": 15.3111, "step": 41290 }, { "epoch": 0.08342861298415867, "grad_norm": 325.8103942871094, "learning_rate": 8.26e-06, "loss": 31.758, "step": 41300 }, { "epoch": 0.08344881361684248, "grad_norm": 400.83905029296875, "learning_rate": 8.262000000000002e-06, "loss": 31.7576, "step": 41310 }, { "epoch": 0.0834690142495263, "grad_norm": 578.0596313476562, "learning_rate": 8.264e-06, "loss": 45.9719, "step": 41320 }, { "epoch": 0.08348921488221012, "grad_norm": 1000.2903442382812, "learning_rate": 8.266e-06, "loss": 41.1022, "step": 41330 }, { "epoch": 0.08350941551489392, "grad_norm": 926.373291015625, "learning_rate": 8.268000000000001e-06, "loss": 40.61, "step": 41340 }, { "epoch": 0.08352961614757774, "grad_norm": 606.2439575195312, "learning_rate": 8.27e-06, "loss": 18.0386, "step": 41350 }, { "epoch": 0.08354981678026156, "grad_norm": 614.6911010742188, "learning_rate": 8.272000000000001e-06, "loss": 31.1184, "step": 41360 }, { "epoch": 0.08357001741294537, "grad_norm": 477.9070129394531, "learning_rate": 8.274e-06, "loss": 20.5355, "step": 41370 }, { "epoch": 0.08359021804562919, "grad_norm": 652.8970947265625, "learning_rate": 8.276e-06, "loss": 38.6469, "step": 41380 }, { "epoch": 0.08361041867831301, "grad_norm": 843.1055908203125, "learning_rate": 8.278000000000001e-06, "loss": 38.3296, "step": 41390 }, { "epoch": 0.08363061931099681, "grad_norm": 509.9186096191406, "learning_rate": 8.28e-06, "loss": 27.9005, "step": 41400 }, { "epoch": 0.08365081994368063, "grad_norm": 819.67041015625, "learning_rate": 8.282000000000001e-06, "loss": 46.2935, "step": 41410 }, { "epoch": 0.08367102057636445, "grad_norm": 179.84751892089844, "learning_rate": 8.284e-06, "loss": 21.5093, "step": 41420 }, { "epoch": 0.08369122120904826, "grad_norm": 1020.0496826171875, "learning_rate": 8.286e-06, "loss": 26.0462, "step": 41430 }, { "epoch": 0.08371142184173208, "grad_norm": 596.0717163085938, "learning_rate": 8.288000000000001e-06, "loss": 25.1455, "step": 41440 }, { "epoch": 0.0837316224744159, "grad_norm": 339.4415588378906, "learning_rate": 8.29e-06, "loss": 37.9272, "step": 41450 }, { "epoch": 0.08375182310709972, "grad_norm": 407.4787902832031, "learning_rate": 8.292000000000001e-06, "loss": 27.6669, "step": 41460 }, { "epoch": 0.08377202373978353, "grad_norm": 475.36175537109375, "learning_rate": 8.294e-06, "loss": 28.2954, "step": 41470 }, { "epoch": 0.08379222437246735, "grad_norm": 855.4002075195312, "learning_rate": 8.296000000000002e-06, "loss": 53.6953, "step": 41480 }, { "epoch": 0.08381242500515117, "grad_norm": 402.96697998046875, "learning_rate": 8.298000000000001e-06, "loss": 14.4904, "step": 41490 }, { "epoch": 0.08383262563783497, "grad_norm": 970.7698974609375, "learning_rate": 8.3e-06, "loss": 16.2845, "step": 41500 }, { "epoch": 0.0838528262705188, "grad_norm": 15.433819770812988, "learning_rate": 8.302000000000001e-06, "loss": 29.1261, "step": 41510 }, { "epoch": 0.08387302690320261, "grad_norm": 988.315673828125, "learning_rate": 8.304e-06, "loss": 28.2776, "step": 41520 }, { "epoch": 0.08389322753588642, "grad_norm": 191.4070281982422, "learning_rate": 8.306000000000001e-06, "loss": 12.7386, "step": 41530 }, { "epoch": 0.08391342816857024, "grad_norm": 406.33984375, "learning_rate": 8.308e-06, "loss": 30.4329, "step": 41540 }, { "epoch": 0.08393362880125406, "grad_norm": 2645.128662109375, "learning_rate": 8.31e-06, "loss": 32.924, "step": 41550 }, { "epoch": 0.08395382943393787, "grad_norm": 257.9555358886719, "learning_rate": 8.312000000000001e-06, "loss": 18.9842, "step": 41560 }, { "epoch": 0.08397403006662169, "grad_norm": 1188.1697998046875, "learning_rate": 8.314e-06, "loss": 29.7597, "step": 41570 }, { "epoch": 0.0839942306993055, "grad_norm": 760.7401733398438, "learning_rate": 8.316000000000001e-06, "loss": 26.4646, "step": 41580 }, { "epoch": 0.08401443133198931, "grad_norm": 395.8313293457031, "learning_rate": 8.318e-06, "loss": 27.8486, "step": 41590 }, { "epoch": 0.08403463196467313, "grad_norm": 300.6554870605469, "learning_rate": 8.32e-06, "loss": 52.6849, "step": 41600 }, { "epoch": 0.08405483259735695, "grad_norm": 352.93463134765625, "learning_rate": 8.322000000000001e-06, "loss": 35.2932, "step": 41610 }, { "epoch": 0.08407503323004077, "grad_norm": 1065.8187255859375, "learning_rate": 8.324e-06, "loss": 21.1793, "step": 41620 }, { "epoch": 0.08409523386272458, "grad_norm": 468.0779724121094, "learning_rate": 8.326000000000001e-06, "loss": 16.2983, "step": 41630 }, { "epoch": 0.0841154344954084, "grad_norm": 404.51318359375, "learning_rate": 8.328e-06, "loss": 29.0949, "step": 41640 }, { "epoch": 0.08413563512809222, "grad_norm": 540.3251342773438, "learning_rate": 8.33e-06, "loss": 29.328, "step": 41650 }, { "epoch": 0.08415583576077602, "grad_norm": 335.04888916015625, "learning_rate": 8.332000000000001e-06, "loss": 19.6916, "step": 41660 }, { "epoch": 0.08417603639345984, "grad_norm": 330.9485168457031, "learning_rate": 8.334e-06, "loss": 13.4891, "step": 41670 }, { "epoch": 0.08419623702614366, "grad_norm": 615.0405883789062, "learning_rate": 8.336000000000001e-06, "loss": 45.3571, "step": 41680 }, { "epoch": 0.08421643765882747, "grad_norm": 740.5963745117188, "learning_rate": 8.338e-06, "loss": 19.0211, "step": 41690 }, { "epoch": 0.08423663829151129, "grad_norm": 243.19345092773438, "learning_rate": 8.34e-06, "loss": 15.5291, "step": 41700 }, { "epoch": 0.08425683892419511, "grad_norm": 326.7149963378906, "learning_rate": 8.342e-06, "loss": 22.8227, "step": 41710 }, { "epoch": 0.08427703955687892, "grad_norm": 531.5779418945312, "learning_rate": 8.344000000000002e-06, "loss": 17.4885, "step": 41720 }, { "epoch": 0.08429724018956274, "grad_norm": 329.50665283203125, "learning_rate": 8.346000000000001e-06, "loss": 17.6037, "step": 41730 }, { "epoch": 0.08431744082224656, "grad_norm": 404.5074157714844, "learning_rate": 8.348e-06, "loss": 21.7302, "step": 41740 }, { "epoch": 0.08433764145493036, "grad_norm": 2198.21484375, "learning_rate": 8.35e-06, "loss": 39.81, "step": 41750 }, { "epoch": 0.08435784208761418, "grad_norm": 482.79779052734375, "learning_rate": 8.352e-06, "loss": 27.2771, "step": 41760 }, { "epoch": 0.084378042720298, "grad_norm": 507.9755859375, "learning_rate": 8.354000000000002e-06, "loss": 31.3565, "step": 41770 }, { "epoch": 0.08439824335298182, "grad_norm": 497.9572448730469, "learning_rate": 8.356000000000001e-06, "loss": 15.4224, "step": 41780 }, { "epoch": 0.08441844398566563, "grad_norm": 1225.9915771484375, "learning_rate": 8.358e-06, "loss": 31.6797, "step": 41790 }, { "epoch": 0.08443864461834945, "grad_norm": 1556.139404296875, "learning_rate": 8.36e-06, "loss": 31.5198, "step": 41800 }, { "epoch": 0.08445884525103327, "grad_norm": 128.2238006591797, "learning_rate": 8.362e-06, "loss": 19.0833, "step": 41810 }, { "epoch": 0.08447904588371707, "grad_norm": 409.283935546875, "learning_rate": 8.364000000000002e-06, "loss": 66.9144, "step": 41820 }, { "epoch": 0.0844992465164009, "grad_norm": 339.4384460449219, "learning_rate": 8.366000000000001e-06, "loss": 31.6918, "step": 41830 }, { "epoch": 0.08451944714908471, "grad_norm": 182.897216796875, "learning_rate": 8.368e-06, "loss": 19.6857, "step": 41840 }, { "epoch": 0.08453964778176852, "grad_norm": 490.0975341796875, "learning_rate": 8.370000000000001e-06, "loss": 25.7417, "step": 41850 }, { "epoch": 0.08455984841445234, "grad_norm": 459.1452331542969, "learning_rate": 8.372e-06, "loss": 26.4446, "step": 41860 }, { "epoch": 0.08458004904713616, "grad_norm": 581.1602172851562, "learning_rate": 8.374000000000001e-06, "loss": 31.0742, "step": 41870 }, { "epoch": 0.08460024967981997, "grad_norm": 451.20562744140625, "learning_rate": 8.376e-06, "loss": 26.2722, "step": 41880 }, { "epoch": 0.08462045031250379, "grad_norm": 299.28204345703125, "learning_rate": 8.378e-06, "loss": 36.2561, "step": 41890 }, { "epoch": 0.0846406509451876, "grad_norm": 705.5419311523438, "learning_rate": 8.380000000000001e-06, "loss": 27.1341, "step": 41900 }, { "epoch": 0.08466085157787141, "grad_norm": 602.3165283203125, "learning_rate": 8.382e-06, "loss": 17.7116, "step": 41910 }, { "epoch": 0.08468105221055523, "grad_norm": 337.2932434082031, "learning_rate": 8.384000000000001e-06, "loss": 24.8853, "step": 41920 }, { "epoch": 0.08470125284323905, "grad_norm": 604.894775390625, "learning_rate": 8.386e-06, "loss": 35.2355, "step": 41930 }, { "epoch": 0.08472145347592287, "grad_norm": 403.151123046875, "learning_rate": 8.388e-06, "loss": 25.5887, "step": 41940 }, { "epoch": 0.08474165410860668, "grad_norm": 304.0711669921875, "learning_rate": 8.390000000000001e-06, "loss": 14.1724, "step": 41950 }, { "epoch": 0.0847618547412905, "grad_norm": 502.6972961425781, "learning_rate": 8.392e-06, "loss": 28.5268, "step": 41960 }, { "epoch": 0.08478205537397432, "grad_norm": 705.1716918945312, "learning_rate": 8.394000000000001e-06, "loss": 30.1254, "step": 41970 }, { "epoch": 0.08480225600665812, "grad_norm": 112.1724853515625, "learning_rate": 8.396e-06, "loss": 28.2086, "step": 41980 }, { "epoch": 0.08482245663934194, "grad_norm": 685.7117919921875, "learning_rate": 8.398e-06, "loss": 25.5067, "step": 41990 }, { "epoch": 0.08484265727202576, "grad_norm": 233.19610595703125, "learning_rate": 8.400000000000001e-06, "loss": 19.437, "step": 42000 }, { "epoch": 0.08486285790470957, "grad_norm": 568.2144775390625, "learning_rate": 8.402e-06, "loss": 25.4206, "step": 42010 }, { "epoch": 0.08488305853739339, "grad_norm": 160.93862915039062, "learning_rate": 8.404000000000001e-06, "loss": 67.4517, "step": 42020 }, { "epoch": 0.08490325917007721, "grad_norm": 427.6259460449219, "learning_rate": 8.406e-06, "loss": 25.9073, "step": 42030 }, { "epoch": 0.08492345980276102, "grad_norm": 1021.2572631835938, "learning_rate": 8.408e-06, "loss": 48.5253, "step": 42040 }, { "epoch": 0.08494366043544484, "grad_norm": 609.3449096679688, "learning_rate": 8.41e-06, "loss": 23.9602, "step": 42050 }, { "epoch": 0.08496386106812866, "grad_norm": 300.64569091796875, "learning_rate": 8.412e-06, "loss": 20.4327, "step": 42060 }, { "epoch": 0.08498406170081246, "grad_norm": 641.7628784179688, "learning_rate": 8.414000000000001e-06, "loss": 20.2711, "step": 42070 }, { "epoch": 0.08500426233349628, "grad_norm": 452.14080810546875, "learning_rate": 8.416e-06, "loss": 19.057, "step": 42080 }, { "epoch": 0.0850244629661801, "grad_norm": 560.966064453125, "learning_rate": 8.418000000000001e-06, "loss": 20.311, "step": 42090 }, { "epoch": 0.08504466359886392, "grad_norm": 113.4162826538086, "learning_rate": 8.42e-06, "loss": 29.6927, "step": 42100 }, { "epoch": 0.08506486423154773, "grad_norm": 379.02099609375, "learning_rate": 8.422e-06, "loss": 25.708, "step": 42110 }, { "epoch": 0.08508506486423155, "grad_norm": 514.345703125, "learning_rate": 8.424000000000001e-06, "loss": 14.1769, "step": 42120 }, { "epoch": 0.08510526549691537, "grad_norm": 212.86944580078125, "learning_rate": 8.426e-06, "loss": 38.5426, "step": 42130 }, { "epoch": 0.08512546612959918, "grad_norm": 208.7589874267578, "learning_rate": 8.428000000000001e-06, "loss": 31.9014, "step": 42140 }, { "epoch": 0.085145666762283, "grad_norm": 830.7401123046875, "learning_rate": 8.43e-06, "loss": 20.2828, "step": 42150 }, { "epoch": 0.08516586739496682, "grad_norm": 324.7993469238281, "learning_rate": 8.432e-06, "loss": 32.6256, "step": 42160 }, { "epoch": 0.08518606802765062, "grad_norm": 223.50123596191406, "learning_rate": 8.434000000000001e-06, "loss": 28.1592, "step": 42170 }, { "epoch": 0.08520626866033444, "grad_norm": 1005.3001098632812, "learning_rate": 8.436e-06, "loss": 29.8392, "step": 42180 }, { "epoch": 0.08522646929301826, "grad_norm": 510.07598876953125, "learning_rate": 8.438000000000001e-06, "loss": 29.9206, "step": 42190 }, { "epoch": 0.08524666992570207, "grad_norm": 157.23410034179688, "learning_rate": 8.44e-06, "loss": 20.0351, "step": 42200 }, { "epoch": 0.08526687055838589, "grad_norm": 873.9767456054688, "learning_rate": 8.442e-06, "loss": 36.1325, "step": 42210 }, { "epoch": 0.08528707119106971, "grad_norm": 298.45013427734375, "learning_rate": 8.444e-06, "loss": 21.6595, "step": 42220 }, { "epoch": 0.08530727182375351, "grad_norm": 1407.209716796875, "learning_rate": 8.446e-06, "loss": 37.8436, "step": 42230 }, { "epoch": 0.08532747245643733, "grad_norm": 272.6626892089844, "learning_rate": 8.448000000000001e-06, "loss": 20.1577, "step": 42240 }, { "epoch": 0.08534767308912115, "grad_norm": 602.26513671875, "learning_rate": 8.45e-06, "loss": 41.1474, "step": 42250 }, { "epoch": 0.08536787372180497, "grad_norm": 456.1973571777344, "learning_rate": 8.452e-06, "loss": 20.4341, "step": 42260 }, { "epoch": 0.08538807435448878, "grad_norm": 117.47479248046875, "learning_rate": 8.454e-06, "loss": 42.2305, "step": 42270 }, { "epoch": 0.0854082749871726, "grad_norm": 460.8744812011719, "learning_rate": 8.456000000000002e-06, "loss": 34.738, "step": 42280 }, { "epoch": 0.08542847561985642, "grad_norm": 72.98493194580078, "learning_rate": 8.458000000000001e-06, "loss": 28.1704, "step": 42290 }, { "epoch": 0.08544867625254023, "grad_norm": 652.7301025390625, "learning_rate": 8.46e-06, "loss": 16.1724, "step": 42300 }, { "epoch": 0.08546887688522405, "grad_norm": 89.0145492553711, "learning_rate": 8.462e-06, "loss": 27.9235, "step": 42310 }, { "epoch": 0.08548907751790787, "grad_norm": 408.6658935546875, "learning_rate": 8.464e-06, "loss": 15.6284, "step": 42320 }, { "epoch": 0.08550927815059167, "grad_norm": 62.5884895324707, "learning_rate": 8.466000000000002e-06, "loss": 30.4507, "step": 42330 }, { "epoch": 0.08552947878327549, "grad_norm": 414.3277587890625, "learning_rate": 8.468000000000001e-06, "loss": 20.8086, "step": 42340 }, { "epoch": 0.08554967941595931, "grad_norm": 403.589599609375, "learning_rate": 8.47e-06, "loss": 30.4291, "step": 42350 }, { "epoch": 0.08556988004864312, "grad_norm": 324.6241149902344, "learning_rate": 8.472e-06, "loss": 15.3373, "step": 42360 }, { "epoch": 0.08559008068132694, "grad_norm": 705.1282958984375, "learning_rate": 8.474e-06, "loss": 34.0037, "step": 42370 }, { "epoch": 0.08561028131401076, "grad_norm": 260.4348449707031, "learning_rate": 8.476000000000002e-06, "loss": 30.7474, "step": 42380 }, { "epoch": 0.08563048194669456, "grad_norm": 382.897705078125, "learning_rate": 8.478e-06, "loss": 35.5057, "step": 42390 }, { "epoch": 0.08565068257937838, "grad_norm": 1128.849853515625, "learning_rate": 8.48e-06, "loss": 21.7381, "step": 42400 }, { "epoch": 0.0856708832120622, "grad_norm": 723.4257202148438, "learning_rate": 8.482e-06, "loss": 44.8299, "step": 42410 }, { "epoch": 0.08569108384474602, "grad_norm": 4.07509183883667, "learning_rate": 8.484e-06, "loss": 22.9919, "step": 42420 }, { "epoch": 0.08571128447742983, "grad_norm": 623.5599365234375, "learning_rate": 8.486000000000001e-06, "loss": 35.1298, "step": 42430 }, { "epoch": 0.08573148511011365, "grad_norm": 4151.32666015625, "learning_rate": 8.488e-06, "loss": 52.9023, "step": 42440 }, { "epoch": 0.08575168574279747, "grad_norm": 431.2698059082031, "learning_rate": 8.49e-06, "loss": 27.7349, "step": 42450 }, { "epoch": 0.08577188637548128, "grad_norm": 922.9758911132812, "learning_rate": 8.492000000000001e-06, "loss": 32.2484, "step": 42460 }, { "epoch": 0.0857920870081651, "grad_norm": 564.1514282226562, "learning_rate": 8.494e-06, "loss": 16.0605, "step": 42470 }, { "epoch": 0.08581228764084892, "grad_norm": 268.5475158691406, "learning_rate": 8.496000000000001e-06, "loss": 32.3987, "step": 42480 }, { "epoch": 0.08583248827353272, "grad_norm": 47.12990951538086, "learning_rate": 8.498e-06, "loss": 24.0045, "step": 42490 }, { "epoch": 0.08585268890621654, "grad_norm": 445.939453125, "learning_rate": 8.5e-06, "loss": 23.9519, "step": 42500 }, { "epoch": 0.08587288953890036, "grad_norm": 4.202223777770996, "learning_rate": 8.502000000000001e-06, "loss": 28.0244, "step": 42510 }, { "epoch": 0.08589309017158417, "grad_norm": 554.5347290039062, "learning_rate": 8.504000000000002e-06, "loss": 15.4307, "step": 42520 }, { "epoch": 0.08591329080426799, "grad_norm": 451.445556640625, "learning_rate": 8.506000000000001e-06, "loss": 23.211, "step": 42530 }, { "epoch": 0.08593349143695181, "grad_norm": 1390.3988037109375, "learning_rate": 8.508e-06, "loss": 25.8337, "step": 42540 }, { "epoch": 0.08595369206963561, "grad_norm": 660.3360595703125, "learning_rate": 8.51e-06, "loss": 43.6705, "step": 42550 }, { "epoch": 0.08597389270231943, "grad_norm": 749.7850952148438, "learning_rate": 8.512e-06, "loss": 37.2106, "step": 42560 }, { "epoch": 0.08599409333500325, "grad_norm": 137.21722412109375, "learning_rate": 8.514000000000002e-06, "loss": 46.5237, "step": 42570 }, { "epoch": 0.08601429396768706, "grad_norm": 347.36920166015625, "learning_rate": 8.516000000000001e-06, "loss": 30.8775, "step": 42580 }, { "epoch": 0.08603449460037088, "grad_norm": 0.0, "learning_rate": 8.518e-06, "loss": 15.3735, "step": 42590 }, { "epoch": 0.0860546952330547, "grad_norm": 262.8627624511719, "learning_rate": 8.52e-06, "loss": 29.8606, "step": 42600 }, { "epoch": 0.08607489586573852, "grad_norm": 183.59539794921875, "learning_rate": 8.522e-06, "loss": 21.1203, "step": 42610 }, { "epoch": 0.08609509649842233, "grad_norm": 1127.3863525390625, "learning_rate": 8.524000000000002e-06, "loss": 28.7105, "step": 42620 }, { "epoch": 0.08611529713110615, "grad_norm": 848.1657104492188, "learning_rate": 8.526000000000001e-06, "loss": 23.0514, "step": 42630 }, { "epoch": 0.08613549776378997, "grad_norm": 511.5576477050781, "learning_rate": 8.528e-06, "loss": 24.8539, "step": 42640 }, { "epoch": 0.08615569839647377, "grad_norm": 261.4380187988281, "learning_rate": 8.530000000000001e-06, "loss": 21.1395, "step": 42650 }, { "epoch": 0.0861758990291576, "grad_norm": 525.8989868164062, "learning_rate": 8.532e-06, "loss": 21.3154, "step": 42660 }, { "epoch": 0.08619609966184141, "grad_norm": 393.46014404296875, "learning_rate": 8.534000000000002e-06, "loss": 27.8852, "step": 42670 }, { "epoch": 0.08621630029452522, "grad_norm": 438.6636047363281, "learning_rate": 8.536000000000001e-06, "loss": 37.7932, "step": 42680 }, { "epoch": 0.08623650092720904, "grad_norm": 417.7564697265625, "learning_rate": 8.538e-06, "loss": 24.7863, "step": 42690 }, { "epoch": 0.08625670155989286, "grad_norm": 387.63525390625, "learning_rate": 8.540000000000001e-06, "loss": 19.4297, "step": 42700 }, { "epoch": 0.08627690219257667, "grad_norm": 145.06236267089844, "learning_rate": 8.542e-06, "loss": 28.4741, "step": 42710 }, { "epoch": 0.08629710282526049, "grad_norm": 590.2205200195312, "learning_rate": 8.544000000000002e-06, "loss": 16.8451, "step": 42720 }, { "epoch": 0.0863173034579443, "grad_norm": 425.0194396972656, "learning_rate": 8.546000000000001e-06, "loss": 26.8013, "step": 42730 }, { "epoch": 0.08633750409062811, "grad_norm": 338.0776062011719, "learning_rate": 8.548e-06, "loss": 10.24, "step": 42740 }, { "epoch": 0.08635770472331193, "grad_norm": 435.5176086425781, "learning_rate": 8.550000000000001e-06, "loss": 18.2302, "step": 42750 }, { "epoch": 0.08637790535599575, "grad_norm": 366.2986755371094, "learning_rate": 8.552e-06, "loss": 27.9482, "step": 42760 }, { "epoch": 0.08639810598867957, "grad_norm": 741.6912231445312, "learning_rate": 8.554000000000001e-06, "loss": 22.2683, "step": 42770 }, { "epoch": 0.08641830662136338, "grad_norm": 501.5782165527344, "learning_rate": 8.556e-06, "loss": 27.7317, "step": 42780 }, { "epoch": 0.0864385072540472, "grad_norm": 475.4088134765625, "learning_rate": 8.558e-06, "loss": 39.8292, "step": 42790 }, { "epoch": 0.08645870788673102, "grad_norm": 1028.5577392578125, "learning_rate": 8.560000000000001e-06, "loss": 40.9841, "step": 42800 }, { "epoch": 0.08647890851941482, "grad_norm": 702.6845703125, "learning_rate": 8.562e-06, "loss": 29.6573, "step": 42810 }, { "epoch": 0.08649910915209864, "grad_norm": 397.6463317871094, "learning_rate": 8.564000000000001e-06, "loss": 39.3842, "step": 42820 }, { "epoch": 0.08651930978478246, "grad_norm": 340.2057800292969, "learning_rate": 8.566e-06, "loss": 31.5082, "step": 42830 }, { "epoch": 0.08653951041746627, "grad_norm": 237.69635009765625, "learning_rate": 8.568e-06, "loss": 19.8738, "step": 42840 }, { "epoch": 0.08655971105015009, "grad_norm": 706.278076171875, "learning_rate": 8.570000000000001e-06, "loss": 33.9298, "step": 42850 }, { "epoch": 0.08657991168283391, "grad_norm": 508.830078125, "learning_rate": 8.572e-06, "loss": 17.8937, "step": 42860 }, { "epoch": 0.08660011231551772, "grad_norm": 683.411865234375, "learning_rate": 8.574000000000001e-06, "loss": 30.3102, "step": 42870 }, { "epoch": 0.08662031294820154, "grad_norm": 1214.28466796875, "learning_rate": 8.576e-06, "loss": 39.2031, "step": 42880 }, { "epoch": 0.08664051358088536, "grad_norm": 468.9919738769531, "learning_rate": 8.578000000000002e-06, "loss": 15.042, "step": 42890 }, { "epoch": 0.08666071421356916, "grad_norm": 392.3851623535156, "learning_rate": 8.580000000000001e-06, "loss": 25.493, "step": 42900 }, { "epoch": 0.08668091484625298, "grad_norm": 111.14898681640625, "learning_rate": 8.582e-06, "loss": 20.3436, "step": 42910 }, { "epoch": 0.0867011154789368, "grad_norm": 811.585205078125, "learning_rate": 8.584000000000001e-06, "loss": 34.9878, "step": 42920 }, { "epoch": 0.08672131611162062, "grad_norm": 531.7177734375, "learning_rate": 8.586e-06, "loss": 29.4499, "step": 42930 }, { "epoch": 0.08674151674430443, "grad_norm": 1020.8329467773438, "learning_rate": 8.588000000000001e-06, "loss": 45.2873, "step": 42940 }, { "epoch": 0.08676171737698825, "grad_norm": 693.1109619140625, "learning_rate": 8.59e-06, "loss": 15.733, "step": 42950 }, { "epoch": 0.08678191800967207, "grad_norm": 175.2774658203125, "learning_rate": 8.592e-06, "loss": 12.659, "step": 42960 }, { "epoch": 0.08680211864235587, "grad_norm": 235.388427734375, "learning_rate": 8.594000000000001e-06, "loss": 27.075, "step": 42970 }, { "epoch": 0.0868223192750397, "grad_norm": 85.86357879638672, "learning_rate": 8.596e-06, "loss": 56.4613, "step": 42980 }, { "epoch": 0.08684251990772351, "grad_norm": 545.3470458984375, "learning_rate": 8.598000000000001e-06, "loss": 23.8057, "step": 42990 }, { "epoch": 0.08686272054040732, "grad_norm": 344.33355712890625, "learning_rate": 8.6e-06, "loss": 19.7577, "step": 43000 }, { "epoch": 0.08688292117309114, "grad_norm": 144.74801635742188, "learning_rate": 8.602e-06, "loss": 16.6569, "step": 43010 }, { "epoch": 0.08690312180577496, "grad_norm": 292.4404296875, "learning_rate": 8.604000000000001e-06, "loss": 36.0593, "step": 43020 }, { "epoch": 0.08692332243845877, "grad_norm": 145.99012756347656, "learning_rate": 8.606e-06, "loss": 22.7391, "step": 43030 }, { "epoch": 0.08694352307114259, "grad_norm": 242.26412963867188, "learning_rate": 8.608000000000001e-06, "loss": 22.2086, "step": 43040 }, { "epoch": 0.0869637237038264, "grad_norm": 297.0873718261719, "learning_rate": 8.61e-06, "loss": 28.5126, "step": 43050 }, { "epoch": 0.08698392433651021, "grad_norm": 561.925537109375, "learning_rate": 8.612e-06, "loss": 30.4806, "step": 43060 }, { "epoch": 0.08700412496919403, "grad_norm": 459.6188659667969, "learning_rate": 8.614000000000001e-06, "loss": 17.2545, "step": 43070 }, { "epoch": 0.08702432560187785, "grad_norm": 876.9536743164062, "learning_rate": 8.616000000000002e-06, "loss": 17.9509, "step": 43080 }, { "epoch": 0.08704452623456167, "grad_norm": 157.70240783691406, "learning_rate": 8.618000000000001e-06, "loss": 27.4339, "step": 43090 }, { "epoch": 0.08706472686724548, "grad_norm": 452.8877868652344, "learning_rate": 8.62e-06, "loss": 32.1382, "step": 43100 }, { "epoch": 0.0870849274999293, "grad_norm": 113.4677963256836, "learning_rate": 8.622e-06, "loss": 35.2102, "step": 43110 }, { "epoch": 0.08710512813261312, "grad_norm": 404.09381103515625, "learning_rate": 8.624e-06, "loss": 20.1554, "step": 43120 }, { "epoch": 0.08712532876529692, "grad_norm": 451.4709167480469, "learning_rate": 8.626000000000002e-06, "loss": 18.2809, "step": 43130 }, { "epoch": 0.08714552939798074, "grad_norm": 440.0563049316406, "learning_rate": 8.628000000000001e-06, "loss": 7.4467, "step": 43140 }, { "epoch": 0.08716573003066456, "grad_norm": 855.1495361328125, "learning_rate": 8.63e-06, "loss": 35.828, "step": 43150 }, { "epoch": 0.08718593066334837, "grad_norm": 10.839653968811035, "learning_rate": 8.632e-06, "loss": 43.2081, "step": 43160 }, { "epoch": 0.08720613129603219, "grad_norm": 614.3381958007812, "learning_rate": 8.634e-06, "loss": 17.8169, "step": 43170 }, { "epoch": 0.08722633192871601, "grad_norm": 271.685791015625, "learning_rate": 8.636000000000002e-06, "loss": 31.9236, "step": 43180 }, { "epoch": 0.08724653256139982, "grad_norm": 554.6610717773438, "learning_rate": 8.638000000000001e-06, "loss": 30.3433, "step": 43190 }, { "epoch": 0.08726673319408364, "grad_norm": 785.8115844726562, "learning_rate": 8.64e-06, "loss": 42.5126, "step": 43200 }, { "epoch": 0.08728693382676746, "grad_norm": 818.6058959960938, "learning_rate": 8.642e-06, "loss": 24.9131, "step": 43210 }, { "epoch": 0.08730713445945126, "grad_norm": 333.1800231933594, "learning_rate": 8.644e-06, "loss": 19.2756, "step": 43220 }, { "epoch": 0.08732733509213508, "grad_norm": 167.22509765625, "learning_rate": 8.646000000000002e-06, "loss": 25.0911, "step": 43230 }, { "epoch": 0.0873475357248189, "grad_norm": 994.1484375, "learning_rate": 8.648000000000001e-06, "loss": 35.0122, "step": 43240 }, { "epoch": 0.08736773635750272, "grad_norm": 844.7338256835938, "learning_rate": 8.65e-06, "loss": 18.575, "step": 43250 }, { "epoch": 0.08738793699018653, "grad_norm": 176.0022735595703, "learning_rate": 8.652000000000001e-06, "loss": 48.7586, "step": 43260 }, { "epoch": 0.08740813762287035, "grad_norm": 178.6254119873047, "learning_rate": 8.654e-06, "loss": 18.2863, "step": 43270 }, { "epoch": 0.08742833825555417, "grad_norm": 541.4852905273438, "learning_rate": 8.656000000000001e-06, "loss": 18.9335, "step": 43280 }, { "epoch": 0.08744853888823798, "grad_norm": 424.3155822753906, "learning_rate": 8.658e-06, "loss": 27.0739, "step": 43290 }, { "epoch": 0.0874687395209218, "grad_norm": 598.650390625, "learning_rate": 8.66e-06, "loss": 25.5768, "step": 43300 }, { "epoch": 0.08748894015360562, "grad_norm": 760.1422729492188, "learning_rate": 8.662000000000001e-06, "loss": 28.0227, "step": 43310 }, { "epoch": 0.08750914078628942, "grad_norm": 464.99127197265625, "learning_rate": 8.664e-06, "loss": 19.4783, "step": 43320 }, { "epoch": 0.08752934141897324, "grad_norm": 110.35054016113281, "learning_rate": 8.666000000000001e-06, "loss": 26.5671, "step": 43330 }, { "epoch": 0.08754954205165706, "grad_norm": 261.06536865234375, "learning_rate": 8.668e-06, "loss": 33.3942, "step": 43340 }, { "epoch": 0.08756974268434087, "grad_norm": 342.5465393066406, "learning_rate": 8.67e-06, "loss": 18.9044, "step": 43350 }, { "epoch": 0.08758994331702469, "grad_norm": 270.8699951171875, "learning_rate": 8.672000000000001e-06, "loss": 20.0748, "step": 43360 }, { "epoch": 0.08761014394970851, "grad_norm": 489.7830810546875, "learning_rate": 8.674e-06, "loss": 24.418, "step": 43370 }, { "epoch": 0.08763034458239231, "grad_norm": 299.12762451171875, "learning_rate": 8.676000000000001e-06, "loss": 31.1323, "step": 43380 }, { "epoch": 0.08765054521507613, "grad_norm": 210.87306213378906, "learning_rate": 8.678e-06, "loss": 27.3704, "step": 43390 }, { "epoch": 0.08767074584775995, "grad_norm": 262.9859313964844, "learning_rate": 8.68e-06, "loss": 34.1434, "step": 43400 }, { "epoch": 0.08769094648044377, "grad_norm": 426.6127014160156, "learning_rate": 8.682000000000001e-06, "loss": 21.96, "step": 43410 }, { "epoch": 0.08771114711312758, "grad_norm": 794.215087890625, "learning_rate": 8.684e-06, "loss": 36.9314, "step": 43420 }, { "epoch": 0.0877313477458114, "grad_norm": 399.9016418457031, "learning_rate": 8.686000000000001e-06, "loss": 20.9496, "step": 43430 }, { "epoch": 0.08775154837849522, "grad_norm": 691.166259765625, "learning_rate": 8.688e-06, "loss": 38.0485, "step": 43440 }, { "epoch": 0.08777174901117903, "grad_norm": 711.8749389648438, "learning_rate": 8.690000000000002e-06, "loss": 25.7033, "step": 43450 }, { "epoch": 0.08779194964386285, "grad_norm": 171.9166717529297, "learning_rate": 8.692e-06, "loss": 21.509, "step": 43460 }, { "epoch": 0.08781215027654667, "grad_norm": 626.6830444335938, "learning_rate": 8.694e-06, "loss": 13.0115, "step": 43470 }, { "epoch": 0.08783235090923047, "grad_norm": 851.20654296875, "learning_rate": 8.696000000000001e-06, "loss": 18.5017, "step": 43480 }, { "epoch": 0.08785255154191429, "grad_norm": 569.1126708984375, "learning_rate": 8.698e-06, "loss": 23.5853, "step": 43490 }, { "epoch": 0.08787275217459811, "grad_norm": 538.6649780273438, "learning_rate": 8.700000000000001e-06, "loss": 20.7032, "step": 43500 }, { "epoch": 0.08789295280728192, "grad_norm": 783.0730590820312, "learning_rate": 8.702e-06, "loss": 35.5005, "step": 43510 }, { "epoch": 0.08791315343996574, "grad_norm": 371.6290283203125, "learning_rate": 8.704e-06, "loss": 17.9829, "step": 43520 }, { "epoch": 0.08793335407264956, "grad_norm": 243.0632781982422, "learning_rate": 8.706000000000001e-06, "loss": 25.7718, "step": 43530 }, { "epoch": 0.08795355470533336, "grad_norm": 428.3433532714844, "learning_rate": 8.708e-06, "loss": 22.0723, "step": 43540 }, { "epoch": 0.08797375533801718, "grad_norm": 359.037109375, "learning_rate": 8.710000000000001e-06, "loss": 38.0554, "step": 43550 }, { "epoch": 0.087993955970701, "grad_norm": 123.59037780761719, "learning_rate": 8.712e-06, "loss": 17.7862, "step": 43560 }, { "epoch": 0.08801415660338482, "grad_norm": 757.7338256835938, "learning_rate": 8.714e-06, "loss": 17.6592, "step": 43570 }, { "epoch": 0.08803435723606863, "grad_norm": 150.5660858154297, "learning_rate": 8.716000000000001e-06, "loss": 22.1696, "step": 43580 }, { "epoch": 0.08805455786875245, "grad_norm": 866.910400390625, "learning_rate": 8.718e-06, "loss": 30.1427, "step": 43590 }, { "epoch": 0.08807475850143627, "grad_norm": 533.5135498046875, "learning_rate": 8.720000000000001e-06, "loss": 37.1078, "step": 43600 }, { "epoch": 0.08809495913412008, "grad_norm": 1215.2125244140625, "learning_rate": 8.722e-06, "loss": 20.1203, "step": 43610 }, { "epoch": 0.0881151597668039, "grad_norm": 545.397216796875, "learning_rate": 8.724e-06, "loss": 31.156, "step": 43620 }, { "epoch": 0.08813536039948772, "grad_norm": 706.6424560546875, "learning_rate": 8.726e-06, "loss": 16.5895, "step": 43630 }, { "epoch": 0.08815556103217152, "grad_norm": 173.1048583984375, "learning_rate": 8.728e-06, "loss": 16.0207, "step": 43640 }, { "epoch": 0.08817576166485534, "grad_norm": 1318.9002685546875, "learning_rate": 8.730000000000001e-06, "loss": 21.7477, "step": 43650 }, { "epoch": 0.08819596229753916, "grad_norm": 1221.095947265625, "learning_rate": 8.732e-06, "loss": 41.428, "step": 43660 }, { "epoch": 0.08821616293022297, "grad_norm": 424.8775634765625, "learning_rate": 8.734e-06, "loss": 33.7021, "step": 43670 }, { "epoch": 0.08823636356290679, "grad_norm": 435.27850341796875, "learning_rate": 8.736e-06, "loss": 23.2997, "step": 43680 }, { "epoch": 0.08825656419559061, "grad_norm": 443.4166259765625, "learning_rate": 8.738000000000002e-06, "loss": 12.6361, "step": 43690 }, { "epoch": 0.08827676482827441, "grad_norm": 161.66542053222656, "learning_rate": 8.740000000000001e-06, "loss": 15.993, "step": 43700 }, { "epoch": 0.08829696546095823, "grad_norm": 612.4815063476562, "learning_rate": 8.742e-06, "loss": 32.946, "step": 43710 }, { "epoch": 0.08831716609364205, "grad_norm": 1040.73681640625, "learning_rate": 8.744e-06, "loss": 26.8302, "step": 43720 }, { "epoch": 0.08833736672632587, "grad_norm": 60.330177307128906, "learning_rate": 8.746e-06, "loss": 25.7003, "step": 43730 }, { "epoch": 0.08835756735900968, "grad_norm": 190.57351684570312, "learning_rate": 8.748000000000002e-06, "loss": 27.0377, "step": 43740 }, { "epoch": 0.0883777679916935, "grad_norm": 16.609949111938477, "learning_rate": 8.750000000000001e-06, "loss": 25.3611, "step": 43750 }, { "epoch": 0.08839796862437732, "grad_norm": 407.3550720214844, "learning_rate": 8.752e-06, "loss": 27.6014, "step": 43760 }, { "epoch": 0.08841816925706113, "grad_norm": 130.43447875976562, "learning_rate": 8.754e-06, "loss": 10.7957, "step": 43770 }, { "epoch": 0.08843836988974495, "grad_norm": 827.7708129882812, "learning_rate": 8.756e-06, "loss": 31.6983, "step": 43780 }, { "epoch": 0.08845857052242877, "grad_norm": 395.01715087890625, "learning_rate": 8.758000000000002e-06, "loss": 26.3091, "step": 43790 }, { "epoch": 0.08847877115511257, "grad_norm": 301.2219543457031, "learning_rate": 8.76e-06, "loss": 28.6031, "step": 43800 }, { "epoch": 0.0884989717877964, "grad_norm": 660.8982543945312, "learning_rate": 8.762e-06, "loss": 45.3063, "step": 43810 }, { "epoch": 0.08851917242048021, "grad_norm": 533.4058227539062, "learning_rate": 8.764e-06, "loss": 27.4031, "step": 43820 }, { "epoch": 0.08853937305316402, "grad_norm": 337.8182067871094, "learning_rate": 8.766e-06, "loss": 14.2368, "step": 43830 }, { "epoch": 0.08855957368584784, "grad_norm": 611.1868286132812, "learning_rate": 8.768000000000001e-06, "loss": 31.2573, "step": 43840 }, { "epoch": 0.08857977431853166, "grad_norm": 775.6557006835938, "learning_rate": 8.77e-06, "loss": 20.9313, "step": 43850 }, { "epoch": 0.08859997495121547, "grad_norm": 1237.54248046875, "learning_rate": 8.772e-06, "loss": 31.3704, "step": 43860 }, { "epoch": 0.08862017558389929, "grad_norm": 626.9205932617188, "learning_rate": 8.774000000000001e-06, "loss": 39.7188, "step": 43870 }, { "epoch": 0.0886403762165831, "grad_norm": 137.8606414794922, "learning_rate": 8.776e-06, "loss": 27.5984, "step": 43880 }, { "epoch": 0.08866057684926693, "grad_norm": 1082.872802734375, "learning_rate": 8.778000000000001e-06, "loss": 18.0964, "step": 43890 }, { "epoch": 0.08868077748195073, "grad_norm": 220.36451721191406, "learning_rate": 8.78e-06, "loss": 34.1153, "step": 43900 }, { "epoch": 0.08870097811463455, "grad_norm": 650.5850219726562, "learning_rate": 8.782e-06, "loss": 40.8374, "step": 43910 }, { "epoch": 0.08872117874731837, "grad_norm": 736.3362426757812, "learning_rate": 8.784000000000001e-06, "loss": 33.0653, "step": 43920 }, { "epoch": 0.08874137938000218, "grad_norm": 182.11184692382812, "learning_rate": 8.786000000000002e-06, "loss": 36.9502, "step": 43930 }, { "epoch": 0.088761580012686, "grad_norm": 204.7152099609375, "learning_rate": 8.788000000000001e-06, "loss": 32.2236, "step": 43940 }, { "epoch": 0.08878178064536982, "grad_norm": 664.1483154296875, "learning_rate": 8.79e-06, "loss": 29.3924, "step": 43950 }, { "epoch": 0.08880198127805362, "grad_norm": 474.9621276855469, "learning_rate": 8.792e-06, "loss": 16.2539, "step": 43960 }, { "epoch": 0.08882218191073744, "grad_norm": 1068.8729248046875, "learning_rate": 8.794e-06, "loss": 21.6826, "step": 43970 }, { "epoch": 0.08884238254342126, "grad_norm": 1158.564697265625, "learning_rate": 8.796000000000002e-06, "loss": 30.4319, "step": 43980 }, { "epoch": 0.08886258317610507, "grad_norm": 57.3450927734375, "learning_rate": 8.798000000000001e-06, "loss": 22.6598, "step": 43990 }, { "epoch": 0.08888278380878889, "grad_norm": 529.1062622070312, "learning_rate": 8.8e-06, "loss": 28.3954, "step": 44000 }, { "epoch": 0.08890298444147271, "grad_norm": 371.1905517578125, "learning_rate": 8.802e-06, "loss": 38.479, "step": 44010 }, { "epoch": 0.08892318507415652, "grad_norm": 220.77362060546875, "learning_rate": 8.804e-06, "loss": 18.6284, "step": 44020 }, { "epoch": 0.08894338570684034, "grad_norm": 539.539794921875, "learning_rate": 8.806000000000002e-06, "loss": 12.7973, "step": 44030 }, { "epoch": 0.08896358633952416, "grad_norm": 599.9065551757812, "learning_rate": 8.808000000000001e-06, "loss": 35.6135, "step": 44040 }, { "epoch": 0.08898378697220798, "grad_norm": 423.3573913574219, "learning_rate": 8.81e-06, "loss": 24.9562, "step": 44050 }, { "epoch": 0.08900398760489178, "grad_norm": 571.5018920898438, "learning_rate": 8.812000000000001e-06, "loss": 36.8214, "step": 44060 }, { "epoch": 0.0890241882375756, "grad_norm": 542.1673583984375, "learning_rate": 8.814e-06, "loss": 18.4148, "step": 44070 }, { "epoch": 0.08904438887025942, "grad_norm": 347.5661926269531, "learning_rate": 8.816000000000002e-06, "loss": 28.4541, "step": 44080 }, { "epoch": 0.08906458950294323, "grad_norm": 434.1124572753906, "learning_rate": 8.818000000000001e-06, "loss": 16.2437, "step": 44090 }, { "epoch": 0.08908479013562705, "grad_norm": 646.8975830078125, "learning_rate": 8.82e-06, "loss": 23.2977, "step": 44100 }, { "epoch": 0.08910499076831087, "grad_norm": 631.682861328125, "learning_rate": 8.822000000000001e-06, "loss": 30.6054, "step": 44110 }, { "epoch": 0.08912519140099467, "grad_norm": 479.34423828125, "learning_rate": 8.824e-06, "loss": 27.6769, "step": 44120 }, { "epoch": 0.0891453920336785, "grad_norm": 130.42416381835938, "learning_rate": 8.826000000000002e-06, "loss": 16.2841, "step": 44130 }, { "epoch": 0.08916559266636231, "grad_norm": 679.4235229492188, "learning_rate": 8.828000000000001e-06, "loss": 29.5048, "step": 44140 }, { "epoch": 0.08918579329904612, "grad_norm": 255.19798278808594, "learning_rate": 8.83e-06, "loss": 10.4203, "step": 44150 }, { "epoch": 0.08920599393172994, "grad_norm": 2336.6416015625, "learning_rate": 8.832000000000001e-06, "loss": 52.0455, "step": 44160 }, { "epoch": 0.08922619456441376, "grad_norm": 904.8772583007812, "learning_rate": 8.834e-06, "loss": 44.5716, "step": 44170 }, { "epoch": 0.08924639519709757, "grad_norm": 1653.8470458984375, "learning_rate": 8.836000000000001e-06, "loss": 33.5973, "step": 44180 }, { "epoch": 0.08926659582978139, "grad_norm": 799.48583984375, "learning_rate": 8.838e-06, "loss": 37.314, "step": 44190 }, { "epoch": 0.0892867964624652, "grad_norm": 363.776123046875, "learning_rate": 8.84e-06, "loss": 23.4966, "step": 44200 }, { "epoch": 0.08930699709514903, "grad_norm": 515.052490234375, "learning_rate": 8.842000000000001e-06, "loss": 23.1721, "step": 44210 }, { "epoch": 0.08932719772783283, "grad_norm": 1712.2266845703125, "learning_rate": 8.844e-06, "loss": 38.6416, "step": 44220 }, { "epoch": 0.08934739836051665, "grad_norm": 997.6633911132812, "learning_rate": 8.846000000000001e-06, "loss": 27.9869, "step": 44230 }, { "epoch": 0.08936759899320047, "grad_norm": 181.0132293701172, "learning_rate": 8.848e-06, "loss": 22.755, "step": 44240 }, { "epoch": 0.08938779962588428, "grad_norm": 417.53985595703125, "learning_rate": 8.85e-06, "loss": 28.4482, "step": 44250 }, { "epoch": 0.0894080002585681, "grad_norm": 661.7625122070312, "learning_rate": 8.852000000000001e-06, "loss": 32.031, "step": 44260 }, { "epoch": 0.08942820089125192, "grad_norm": 171.3567352294922, "learning_rate": 8.854e-06, "loss": 20.9981, "step": 44270 }, { "epoch": 0.08944840152393572, "grad_norm": 565.4457397460938, "learning_rate": 8.856000000000001e-06, "loss": 19.9229, "step": 44280 }, { "epoch": 0.08946860215661954, "grad_norm": 1008.0512084960938, "learning_rate": 8.858e-06, "loss": 23.9828, "step": 44290 }, { "epoch": 0.08948880278930336, "grad_norm": 650.6478881835938, "learning_rate": 8.860000000000002e-06, "loss": 32.9885, "step": 44300 }, { "epoch": 0.08950900342198717, "grad_norm": 236.9464569091797, "learning_rate": 8.862000000000001e-06, "loss": 14.9416, "step": 44310 }, { "epoch": 0.08952920405467099, "grad_norm": 485.0042419433594, "learning_rate": 8.864e-06, "loss": 21.3128, "step": 44320 }, { "epoch": 0.08954940468735481, "grad_norm": 665.9822998046875, "learning_rate": 8.866000000000001e-06, "loss": 30.5664, "step": 44330 }, { "epoch": 0.08956960532003862, "grad_norm": 575.6888427734375, "learning_rate": 8.868e-06, "loss": 23.7571, "step": 44340 }, { "epoch": 0.08958980595272244, "grad_norm": 369.822265625, "learning_rate": 8.870000000000001e-06, "loss": 21.7342, "step": 44350 }, { "epoch": 0.08961000658540626, "grad_norm": 434.0180969238281, "learning_rate": 8.872e-06, "loss": 22.0393, "step": 44360 }, { "epoch": 0.08963020721809008, "grad_norm": 124.84725189208984, "learning_rate": 8.874e-06, "loss": 20.3464, "step": 44370 }, { "epoch": 0.08965040785077388, "grad_norm": 658.8175048828125, "learning_rate": 8.876e-06, "loss": 51.8665, "step": 44380 }, { "epoch": 0.0896706084834577, "grad_norm": 400.6201477050781, "learning_rate": 8.878e-06, "loss": 36.426, "step": 44390 }, { "epoch": 0.08969080911614152, "grad_norm": 450.5768127441406, "learning_rate": 8.880000000000001e-06, "loss": 39.3117, "step": 44400 }, { "epoch": 0.08971100974882533, "grad_norm": 505.92724609375, "learning_rate": 8.882e-06, "loss": 19.8241, "step": 44410 }, { "epoch": 0.08973121038150915, "grad_norm": 537.7205200195312, "learning_rate": 8.884e-06, "loss": 27.0366, "step": 44420 }, { "epoch": 0.08975141101419297, "grad_norm": 1432.2957763671875, "learning_rate": 8.886000000000001e-06, "loss": 49.6572, "step": 44430 }, { "epoch": 0.08977161164687678, "grad_norm": 87.47107696533203, "learning_rate": 8.888e-06, "loss": 40.1563, "step": 44440 }, { "epoch": 0.0897918122795606, "grad_norm": 417.109619140625, "learning_rate": 8.890000000000001e-06, "loss": 20.4187, "step": 44450 }, { "epoch": 0.08981201291224442, "grad_norm": 192.43753051757812, "learning_rate": 8.892e-06, "loss": 10.9749, "step": 44460 }, { "epoch": 0.08983221354492822, "grad_norm": 501.7425231933594, "learning_rate": 8.894e-06, "loss": 32.2478, "step": 44470 }, { "epoch": 0.08985241417761204, "grad_norm": 219.19515991210938, "learning_rate": 8.896000000000001e-06, "loss": 41.9935, "step": 44480 }, { "epoch": 0.08987261481029586, "grad_norm": 1015.3475952148438, "learning_rate": 8.898000000000002e-06, "loss": 28.0073, "step": 44490 }, { "epoch": 0.08989281544297967, "grad_norm": 63.530372619628906, "learning_rate": 8.900000000000001e-06, "loss": 27.8875, "step": 44500 }, { "epoch": 0.08991301607566349, "grad_norm": 193.50613403320312, "learning_rate": 8.902e-06, "loss": 17.7265, "step": 44510 }, { "epoch": 0.08993321670834731, "grad_norm": 235.31927490234375, "learning_rate": 8.904e-06, "loss": 25.7365, "step": 44520 }, { "epoch": 0.08995341734103113, "grad_norm": 942.6410522460938, "learning_rate": 8.906e-06, "loss": 21.3539, "step": 44530 }, { "epoch": 0.08997361797371493, "grad_norm": 334.50787353515625, "learning_rate": 8.908000000000002e-06, "loss": 23.9226, "step": 44540 }, { "epoch": 0.08999381860639875, "grad_norm": 394.564697265625, "learning_rate": 8.910000000000001e-06, "loss": 27.8762, "step": 44550 }, { "epoch": 0.09001401923908257, "grad_norm": 385.86102294921875, "learning_rate": 8.912e-06, "loss": 12.1461, "step": 44560 }, { "epoch": 0.09003421987176638, "grad_norm": 266.55340576171875, "learning_rate": 8.914e-06, "loss": 28.2976, "step": 44570 }, { "epoch": 0.0900544205044502, "grad_norm": 353.8782958984375, "learning_rate": 8.916e-06, "loss": 54.0394, "step": 44580 }, { "epoch": 0.09007462113713402, "grad_norm": 118.74738311767578, "learning_rate": 8.918000000000002e-06, "loss": 38.6842, "step": 44590 }, { "epoch": 0.09009482176981783, "grad_norm": 520.180908203125, "learning_rate": 8.920000000000001e-06, "loss": 38.7813, "step": 44600 }, { "epoch": 0.09011502240250165, "grad_norm": 1197.62353515625, "learning_rate": 8.922e-06, "loss": 28.3833, "step": 44610 }, { "epoch": 0.09013522303518547, "grad_norm": 563.7283325195312, "learning_rate": 8.924e-06, "loss": 18.2278, "step": 44620 }, { "epoch": 0.09015542366786927, "grad_norm": 544.5486450195312, "learning_rate": 8.926e-06, "loss": 33.8533, "step": 44630 }, { "epoch": 0.09017562430055309, "grad_norm": 568.8088989257812, "learning_rate": 8.928000000000002e-06, "loss": 42.4741, "step": 44640 }, { "epoch": 0.09019582493323691, "grad_norm": 683.1576538085938, "learning_rate": 8.930000000000001e-06, "loss": 28.4008, "step": 44650 }, { "epoch": 0.09021602556592072, "grad_norm": 246.2177276611328, "learning_rate": 8.932e-06, "loss": 28.559, "step": 44660 }, { "epoch": 0.09023622619860454, "grad_norm": 644.2553100585938, "learning_rate": 8.934000000000001e-06, "loss": 18.8847, "step": 44670 }, { "epoch": 0.09025642683128836, "grad_norm": 1007.45947265625, "learning_rate": 8.936e-06, "loss": 33.1327, "step": 44680 }, { "epoch": 0.09027662746397218, "grad_norm": 695.0758666992188, "learning_rate": 8.938000000000001e-06, "loss": 25.8596, "step": 44690 }, { "epoch": 0.09029682809665598, "grad_norm": 173.3174285888672, "learning_rate": 8.94e-06, "loss": 27.8275, "step": 44700 }, { "epoch": 0.0903170287293398, "grad_norm": 156.8406219482422, "learning_rate": 8.942e-06, "loss": 30.0689, "step": 44710 }, { "epoch": 0.09033722936202362, "grad_norm": 312.4314880371094, "learning_rate": 8.944000000000001e-06, "loss": 20.6215, "step": 44720 }, { "epoch": 0.09035742999470743, "grad_norm": 1031.1641845703125, "learning_rate": 8.946e-06, "loss": 30.0523, "step": 44730 }, { "epoch": 0.09037763062739125, "grad_norm": 594.5155029296875, "learning_rate": 8.948000000000001e-06, "loss": 24.2484, "step": 44740 }, { "epoch": 0.09039783126007507, "grad_norm": 305.5896301269531, "learning_rate": 8.95e-06, "loss": 26.1327, "step": 44750 }, { "epoch": 0.09041803189275888, "grad_norm": 357.19189453125, "learning_rate": 8.952e-06, "loss": 23.2766, "step": 44760 }, { "epoch": 0.0904382325254427, "grad_norm": 378.5004577636719, "learning_rate": 8.954000000000001e-06, "loss": 24.1598, "step": 44770 }, { "epoch": 0.09045843315812652, "grad_norm": 571.3589477539062, "learning_rate": 8.956e-06, "loss": 43.4971, "step": 44780 }, { "epoch": 0.09047863379081032, "grad_norm": 250.39462280273438, "learning_rate": 8.958000000000001e-06, "loss": 21.941, "step": 44790 }, { "epoch": 0.09049883442349414, "grad_norm": 633.5438232421875, "learning_rate": 8.96e-06, "loss": 23.1309, "step": 44800 }, { "epoch": 0.09051903505617796, "grad_norm": 424.7410583496094, "learning_rate": 8.962e-06, "loss": 23.7086, "step": 44810 }, { "epoch": 0.09053923568886177, "grad_norm": 197.0968780517578, "learning_rate": 8.964000000000001e-06, "loss": 23.5309, "step": 44820 }, { "epoch": 0.09055943632154559, "grad_norm": 293.7751159667969, "learning_rate": 8.966e-06, "loss": 28.0905, "step": 44830 }, { "epoch": 0.09057963695422941, "grad_norm": 91.09911346435547, "learning_rate": 8.968000000000001e-06, "loss": 20.4776, "step": 44840 }, { "epoch": 0.09059983758691323, "grad_norm": 668.6318969726562, "learning_rate": 8.97e-06, "loss": 25.6164, "step": 44850 }, { "epoch": 0.09062003821959703, "grad_norm": 216.250732421875, "learning_rate": 8.972000000000002e-06, "loss": 20.6183, "step": 44860 }, { "epoch": 0.09064023885228085, "grad_norm": 90.79194641113281, "learning_rate": 8.974e-06, "loss": 29.3862, "step": 44870 }, { "epoch": 0.09066043948496467, "grad_norm": 927.6483764648438, "learning_rate": 8.976e-06, "loss": 20.3835, "step": 44880 }, { "epoch": 0.09068064011764848, "grad_norm": 1245.59375, "learning_rate": 8.978000000000001e-06, "loss": 36.7677, "step": 44890 }, { "epoch": 0.0907008407503323, "grad_norm": 653.4105834960938, "learning_rate": 8.98e-06, "loss": 31.7304, "step": 44900 }, { "epoch": 0.09072104138301612, "grad_norm": 239.88685607910156, "learning_rate": 8.982000000000001e-06, "loss": 30.5177, "step": 44910 }, { "epoch": 0.09074124201569993, "grad_norm": 1254.9267578125, "learning_rate": 8.984e-06, "loss": 45.9213, "step": 44920 }, { "epoch": 0.09076144264838375, "grad_norm": 237.966064453125, "learning_rate": 8.986e-06, "loss": 30.8989, "step": 44930 }, { "epoch": 0.09078164328106757, "grad_norm": 495.72808837890625, "learning_rate": 8.988000000000001e-06, "loss": 33.639, "step": 44940 }, { "epoch": 0.09080184391375137, "grad_norm": 653.3446044921875, "learning_rate": 8.99e-06, "loss": 36.9416, "step": 44950 }, { "epoch": 0.0908220445464352, "grad_norm": 40.03471755981445, "learning_rate": 8.992000000000001e-06, "loss": 15.2221, "step": 44960 }, { "epoch": 0.09084224517911901, "grad_norm": 666.4488525390625, "learning_rate": 8.994e-06, "loss": 29.3564, "step": 44970 }, { "epoch": 0.09086244581180282, "grad_norm": 497.86419677734375, "learning_rate": 8.996e-06, "loss": 29.4009, "step": 44980 }, { "epoch": 0.09088264644448664, "grad_norm": 230.97999572753906, "learning_rate": 8.998000000000001e-06, "loss": 17.3159, "step": 44990 }, { "epoch": 0.09090284707717046, "grad_norm": 1031.1046142578125, "learning_rate": 9e-06, "loss": 18.6158, "step": 45000 }, { "epoch": 0.09092304770985428, "grad_norm": 1001.5123901367188, "learning_rate": 9.002000000000001e-06, "loss": 17.3041, "step": 45010 }, { "epoch": 0.09094324834253809, "grad_norm": 458.33746337890625, "learning_rate": 9.004e-06, "loss": 26.3967, "step": 45020 }, { "epoch": 0.0909634489752219, "grad_norm": 464.9259948730469, "learning_rate": 9.006e-06, "loss": 18.7797, "step": 45030 }, { "epoch": 0.09098364960790573, "grad_norm": 177.4857177734375, "learning_rate": 9.008e-06, "loss": 20.0804, "step": 45040 }, { "epoch": 0.09100385024058953, "grad_norm": 186.34747314453125, "learning_rate": 9.01e-06, "loss": 25.6041, "step": 45050 }, { "epoch": 0.09102405087327335, "grad_norm": 156.54200744628906, "learning_rate": 9.012000000000001e-06, "loss": 14.926, "step": 45060 }, { "epoch": 0.09104425150595717, "grad_norm": 1313.6944580078125, "learning_rate": 9.014e-06, "loss": 28.0868, "step": 45070 }, { "epoch": 0.09106445213864098, "grad_norm": 1131.9775390625, "learning_rate": 9.016e-06, "loss": 32.2958, "step": 45080 }, { "epoch": 0.0910846527713248, "grad_norm": 423.86138916015625, "learning_rate": 9.018e-06, "loss": 20.0051, "step": 45090 }, { "epoch": 0.09110485340400862, "grad_norm": 537.714599609375, "learning_rate": 9.020000000000002e-06, "loss": 18.3262, "step": 45100 }, { "epoch": 0.09112505403669242, "grad_norm": 1961.3375244140625, "learning_rate": 9.022000000000001e-06, "loss": 23.6604, "step": 45110 }, { "epoch": 0.09114525466937624, "grad_norm": 424.4392395019531, "learning_rate": 9.024e-06, "loss": 45.3459, "step": 45120 }, { "epoch": 0.09116545530206006, "grad_norm": 536.6264038085938, "learning_rate": 9.026e-06, "loss": 41.8265, "step": 45130 }, { "epoch": 0.09118565593474387, "grad_norm": 195.9641571044922, "learning_rate": 9.028e-06, "loss": 29.6634, "step": 45140 }, { "epoch": 0.09120585656742769, "grad_norm": 561.6903076171875, "learning_rate": 9.030000000000002e-06, "loss": 34.5336, "step": 45150 }, { "epoch": 0.09122605720011151, "grad_norm": 404.424560546875, "learning_rate": 9.032000000000001e-06, "loss": 30.3016, "step": 45160 }, { "epoch": 0.09124625783279533, "grad_norm": 404.8960266113281, "learning_rate": 9.034e-06, "loss": 23.3106, "step": 45170 }, { "epoch": 0.09126645846547914, "grad_norm": 227.19601440429688, "learning_rate": 9.036e-06, "loss": 13.8677, "step": 45180 }, { "epoch": 0.09128665909816296, "grad_norm": 1348.8074951171875, "learning_rate": 9.038e-06, "loss": 40.3152, "step": 45190 }, { "epoch": 0.09130685973084678, "grad_norm": 285.83380126953125, "learning_rate": 9.040000000000002e-06, "loss": 26.9267, "step": 45200 }, { "epoch": 0.09132706036353058, "grad_norm": 1111.412109375, "learning_rate": 9.042e-06, "loss": 17.8624, "step": 45210 }, { "epoch": 0.0913472609962144, "grad_norm": 252.94573974609375, "learning_rate": 9.044e-06, "loss": 33.217, "step": 45220 }, { "epoch": 0.09136746162889822, "grad_norm": 122.52388763427734, "learning_rate": 9.046000000000001e-06, "loss": 27.4571, "step": 45230 }, { "epoch": 0.09138766226158203, "grad_norm": 1031.7841796875, "learning_rate": 9.048e-06, "loss": 31.5857, "step": 45240 }, { "epoch": 0.09140786289426585, "grad_norm": 288.2206726074219, "learning_rate": 9.050000000000001e-06, "loss": 34.444, "step": 45250 }, { "epoch": 0.09142806352694967, "grad_norm": 503.2816162109375, "learning_rate": 9.052e-06, "loss": 23.9389, "step": 45260 }, { "epoch": 0.09144826415963347, "grad_norm": 79.8081283569336, "learning_rate": 9.054e-06, "loss": 36.626, "step": 45270 }, { "epoch": 0.0914684647923173, "grad_norm": 74.55158233642578, "learning_rate": 9.056000000000001e-06, "loss": 15.2105, "step": 45280 }, { "epoch": 0.09148866542500111, "grad_norm": 408.1317443847656, "learning_rate": 9.058000000000002e-06, "loss": 26.4177, "step": 45290 }, { "epoch": 0.09150886605768492, "grad_norm": 183.99171447753906, "learning_rate": 9.060000000000001e-06, "loss": 28.0801, "step": 45300 }, { "epoch": 0.09152906669036874, "grad_norm": 711.5023803710938, "learning_rate": 9.062e-06, "loss": 44.5351, "step": 45310 }, { "epoch": 0.09154926732305256, "grad_norm": 684.1775512695312, "learning_rate": 9.064e-06, "loss": 23.5863, "step": 45320 }, { "epoch": 0.09156946795573638, "grad_norm": 288.3268127441406, "learning_rate": 9.066000000000001e-06, "loss": 20.7055, "step": 45330 }, { "epoch": 0.09158966858842019, "grad_norm": 383.0359802246094, "learning_rate": 9.068000000000002e-06, "loss": 32.7892, "step": 45340 }, { "epoch": 0.091609869221104, "grad_norm": 602.8131103515625, "learning_rate": 9.070000000000001e-06, "loss": 30.4119, "step": 45350 }, { "epoch": 0.09163006985378783, "grad_norm": 874.2801513671875, "learning_rate": 9.072e-06, "loss": 16.9795, "step": 45360 }, { "epoch": 0.09165027048647163, "grad_norm": 43.25394058227539, "learning_rate": 9.074e-06, "loss": 23.7794, "step": 45370 }, { "epoch": 0.09167047111915545, "grad_norm": 591.4110107421875, "learning_rate": 9.076000000000001e-06, "loss": 17.5977, "step": 45380 }, { "epoch": 0.09169067175183927, "grad_norm": 294.79351806640625, "learning_rate": 9.078000000000002e-06, "loss": 43.4651, "step": 45390 }, { "epoch": 0.09171087238452308, "grad_norm": 596.6688232421875, "learning_rate": 9.080000000000001e-06, "loss": 21.6313, "step": 45400 }, { "epoch": 0.0917310730172069, "grad_norm": 666.8948364257812, "learning_rate": 9.082e-06, "loss": 24.9612, "step": 45410 }, { "epoch": 0.09175127364989072, "grad_norm": 1062.8319091796875, "learning_rate": 9.084e-06, "loss": 35.6636, "step": 45420 }, { "epoch": 0.09177147428257452, "grad_norm": 472.1858825683594, "learning_rate": 9.086e-06, "loss": 21.0453, "step": 45430 }, { "epoch": 0.09179167491525834, "grad_norm": 104.99319458007812, "learning_rate": 9.088000000000002e-06, "loss": 22.8547, "step": 45440 }, { "epoch": 0.09181187554794216, "grad_norm": 627.6539916992188, "learning_rate": 9.090000000000001e-06, "loss": 39.3272, "step": 45450 }, { "epoch": 0.09183207618062597, "grad_norm": 1194.921630859375, "learning_rate": 9.092e-06, "loss": 35.9621, "step": 45460 }, { "epoch": 0.09185227681330979, "grad_norm": 295.4495849609375, "learning_rate": 9.094000000000001e-06, "loss": 33.5286, "step": 45470 }, { "epoch": 0.09187247744599361, "grad_norm": 303.76641845703125, "learning_rate": 9.096e-06, "loss": 16.3219, "step": 45480 }, { "epoch": 0.09189267807867743, "grad_norm": 308.8945617675781, "learning_rate": 9.098000000000002e-06, "loss": 11.375, "step": 45490 }, { "epoch": 0.09191287871136124, "grad_norm": 264.4830322265625, "learning_rate": 9.100000000000001e-06, "loss": 17.8894, "step": 45500 }, { "epoch": 0.09193307934404506, "grad_norm": 859.7493896484375, "learning_rate": 9.102e-06, "loss": 28.4209, "step": 45510 }, { "epoch": 0.09195327997672888, "grad_norm": 277.9159240722656, "learning_rate": 9.104000000000001e-06, "loss": 31.0762, "step": 45520 }, { "epoch": 0.09197348060941268, "grad_norm": 779.0860595703125, "learning_rate": 9.106e-06, "loss": 32.6603, "step": 45530 }, { "epoch": 0.0919936812420965, "grad_norm": 305.54998779296875, "learning_rate": 9.108000000000002e-06, "loss": 23.2495, "step": 45540 }, { "epoch": 0.09201388187478032, "grad_norm": 478.4544982910156, "learning_rate": 9.110000000000001e-06, "loss": 25.226, "step": 45550 }, { "epoch": 0.09203408250746413, "grad_norm": 795.5175170898438, "learning_rate": 9.112e-06, "loss": 24.1799, "step": 45560 }, { "epoch": 0.09205428314014795, "grad_norm": 347.9759216308594, "learning_rate": 9.114000000000001e-06, "loss": 18.2694, "step": 45570 }, { "epoch": 0.09207448377283177, "grad_norm": 580.8516235351562, "learning_rate": 9.116e-06, "loss": 14.3688, "step": 45580 }, { "epoch": 0.09209468440551558, "grad_norm": 573.6776733398438, "learning_rate": 9.118000000000001e-06, "loss": 30.0077, "step": 45590 }, { "epoch": 0.0921148850381994, "grad_norm": 477.5769348144531, "learning_rate": 9.12e-06, "loss": 21.5053, "step": 45600 }, { "epoch": 0.09213508567088322, "grad_norm": 404.3766174316406, "learning_rate": 9.122e-06, "loss": 40.801, "step": 45610 }, { "epoch": 0.09215528630356702, "grad_norm": 420.88336181640625, "learning_rate": 9.124000000000001e-06, "loss": 20.7538, "step": 45620 }, { "epoch": 0.09217548693625084, "grad_norm": 280.4460144042969, "learning_rate": 9.126e-06, "loss": 29.4988, "step": 45630 }, { "epoch": 0.09219568756893466, "grad_norm": 323.4939880371094, "learning_rate": 9.128e-06, "loss": 27.4168, "step": 45640 }, { "epoch": 0.09221588820161847, "grad_norm": 575.996826171875, "learning_rate": 9.13e-06, "loss": 20.6319, "step": 45650 }, { "epoch": 0.09223608883430229, "grad_norm": 690.1239013671875, "learning_rate": 9.132000000000002e-06, "loss": 21.5123, "step": 45660 }, { "epoch": 0.09225628946698611, "grad_norm": 2189.362060546875, "learning_rate": 9.134000000000001e-06, "loss": 32.8427, "step": 45670 }, { "epoch": 0.09227649009966993, "grad_norm": 559.661865234375, "learning_rate": 9.136e-06, "loss": 23.2627, "step": 45680 }, { "epoch": 0.09229669073235373, "grad_norm": 852.9085083007812, "learning_rate": 9.138e-06, "loss": 21.4998, "step": 45690 }, { "epoch": 0.09231689136503755, "grad_norm": 159.3247528076172, "learning_rate": 9.14e-06, "loss": 23.1891, "step": 45700 }, { "epoch": 0.09233709199772137, "grad_norm": 492.49896240234375, "learning_rate": 9.142000000000002e-06, "loss": 33.5397, "step": 45710 }, { "epoch": 0.09235729263040518, "grad_norm": 287.7591247558594, "learning_rate": 9.144000000000001e-06, "loss": 35.0218, "step": 45720 }, { "epoch": 0.092377493263089, "grad_norm": 223.2801055908203, "learning_rate": 9.146e-06, "loss": 15.031, "step": 45730 }, { "epoch": 0.09239769389577282, "grad_norm": 201.246826171875, "learning_rate": 9.148e-06, "loss": 27.2919, "step": 45740 }, { "epoch": 0.09241789452845663, "grad_norm": 520.645751953125, "learning_rate": 9.15e-06, "loss": 20.69, "step": 45750 }, { "epoch": 0.09243809516114045, "grad_norm": 602.8200073242188, "learning_rate": 9.152000000000001e-06, "loss": 11.4767, "step": 45760 }, { "epoch": 0.09245829579382427, "grad_norm": 380.5906982421875, "learning_rate": 9.154e-06, "loss": 21.1936, "step": 45770 }, { "epoch": 0.09247849642650807, "grad_norm": 442.836181640625, "learning_rate": 9.156e-06, "loss": 43.0645, "step": 45780 }, { "epoch": 0.09249869705919189, "grad_norm": 434.8324890136719, "learning_rate": 9.158e-06, "loss": 22.0832, "step": 45790 }, { "epoch": 0.09251889769187571, "grad_norm": 540.1774291992188, "learning_rate": 9.16e-06, "loss": 24.301, "step": 45800 }, { "epoch": 0.09253909832455952, "grad_norm": 759.1398315429688, "learning_rate": 9.162000000000001e-06, "loss": 38.7936, "step": 45810 }, { "epoch": 0.09255929895724334, "grad_norm": 196.523681640625, "learning_rate": 9.164e-06, "loss": 29.7429, "step": 45820 }, { "epoch": 0.09257949958992716, "grad_norm": 374.3005676269531, "learning_rate": 9.166e-06, "loss": 28.2329, "step": 45830 }, { "epoch": 0.09259970022261098, "grad_norm": 242.35548400878906, "learning_rate": 9.168000000000001e-06, "loss": 37.0264, "step": 45840 }, { "epoch": 0.09261990085529478, "grad_norm": 605.6915893554688, "learning_rate": 9.17e-06, "loss": 51.7208, "step": 45850 }, { "epoch": 0.0926401014879786, "grad_norm": 581.0315551757812, "learning_rate": 9.172000000000001e-06, "loss": 32.1027, "step": 45860 }, { "epoch": 0.09266030212066242, "grad_norm": 395.7412414550781, "learning_rate": 9.174e-06, "loss": 31.1553, "step": 45870 }, { "epoch": 0.09268050275334623, "grad_norm": 244.5458221435547, "learning_rate": 9.176e-06, "loss": 17.0078, "step": 45880 }, { "epoch": 0.09270070338603005, "grad_norm": 1080.458984375, "learning_rate": 9.178000000000001e-06, "loss": 23.8676, "step": 45890 }, { "epoch": 0.09272090401871387, "grad_norm": 505.7313232421875, "learning_rate": 9.180000000000002e-06, "loss": 21.9754, "step": 45900 }, { "epoch": 0.09274110465139768, "grad_norm": 414.4971618652344, "learning_rate": 9.182000000000001e-06, "loss": 24.1529, "step": 45910 }, { "epoch": 0.0927613052840815, "grad_norm": 373.2899169921875, "learning_rate": 9.184e-06, "loss": 30.3035, "step": 45920 }, { "epoch": 0.09278150591676532, "grad_norm": 712.4485473632812, "learning_rate": 9.186e-06, "loss": 32.8662, "step": 45930 }, { "epoch": 0.09280170654944912, "grad_norm": 501.9930419921875, "learning_rate": 9.188e-06, "loss": 22.7343, "step": 45940 }, { "epoch": 0.09282190718213294, "grad_norm": 407.44189453125, "learning_rate": 9.190000000000002e-06, "loss": 21.3925, "step": 45950 }, { "epoch": 0.09284210781481676, "grad_norm": 778.7462768554688, "learning_rate": 9.192000000000001e-06, "loss": 17.9243, "step": 45960 }, { "epoch": 0.09286230844750057, "grad_norm": 452.5445251464844, "learning_rate": 9.194e-06, "loss": 21.0491, "step": 45970 }, { "epoch": 0.09288250908018439, "grad_norm": 513.406005859375, "learning_rate": 9.196e-06, "loss": 33.8423, "step": 45980 }, { "epoch": 0.09290270971286821, "grad_norm": 500.4651794433594, "learning_rate": 9.198e-06, "loss": 26.1923, "step": 45990 }, { "epoch": 0.09292291034555203, "grad_norm": 803.8782348632812, "learning_rate": 9.200000000000002e-06, "loss": 16.747, "step": 46000 }, { "epoch": 0.09294311097823584, "grad_norm": 263.3955078125, "learning_rate": 9.202000000000001e-06, "loss": 19.4754, "step": 46010 }, { "epoch": 0.09296331161091966, "grad_norm": 863.2661743164062, "learning_rate": 9.204e-06, "loss": 26.7707, "step": 46020 }, { "epoch": 0.09298351224360348, "grad_norm": 619.3812255859375, "learning_rate": 9.206000000000001e-06, "loss": 17.0884, "step": 46030 }, { "epoch": 0.09300371287628728, "grad_norm": 764.2350463867188, "learning_rate": 9.208e-06, "loss": 19.7395, "step": 46040 }, { "epoch": 0.0930239135089711, "grad_norm": 128.36607360839844, "learning_rate": 9.210000000000002e-06, "loss": 24.6796, "step": 46050 }, { "epoch": 0.09304411414165492, "grad_norm": 430.3720703125, "learning_rate": 9.212000000000001e-06, "loss": 36.0097, "step": 46060 }, { "epoch": 0.09306431477433873, "grad_norm": 1156.9661865234375, "learning_rate": 9.214e-06, "loss": 35.3924, "step": 46070 }, { "epoch": 0.09308451540702255, "grad_norm": 588.6331787109375, "learning_rate": 9.216000000000001e-06, "loss": 23.5478, "step": 46080 }, { "epoch": 0.09310471603970637, "grad_norm": 320.4897766113281, "learning_rate": 9.218e-06, "loss": 110.5782, "step": 46090 }, { "epoch": 0.09312491667239017, "grad_norm": 401.3931579589844, "learning_rate": 9.220000000000002e-06, "loss": 31.4041, "step": 46100 }, { "epoch": 0.093145117305074, "grad_norm": 779.8281860351562, "learning_rate": 9.222e-06, "loss": 20.6462, "step": 46110 }, { "epoch": 0.09316531793775781, "grad_norm": 177.2711944580078, "learning_rate": 9.224e-06, "loss": 28.436, "step": 46120 }, { "epoch": 0.09318551857044162, "grad_norm": 483.94451904296875, "learning_rate": 9.226000000000001e-06, "loss": 23.6505, "step": 46130 }, { "epoch": 0.09320571920312544, "grad_norm": 930.6395874023438, "learning_rate": 9.228e-06, "loss": 22.6424, "step": 46140 }, { "epoch": 0.09322591983580926, "grad_norm": 684.120849609375, "learning_rate": 9.230000000000001e-06, "loss": 29.6059, "step": 46150 }, { "epoch": 0.09324612046849308, "grad_norm": 332.86712646484375, "learning_rate": 9.232e-06, "loss": 33.6476, "step": 46160 }, { "epoch": 0.09326632110117689, "grad_norm": 692.9288330078125, "learning_rate": 9.234e-06, "loss": 41.6462, "step": 46170 }, { "epoch": 0.0932865217338607, "grad_norm": 852.34326171875, "learning_rate": 9.236000000000001e-06, "loss": 29.988, "step": 46180 }, { "epoch": 0.09330672236654453, "grad_norm": 610.2581176757812, "learning_rate": 9.238e-06, "loss": 35.6251, "step": 46190 }, { "epoch": 0.09332692299922833, "grad_norm": 218.29176330566406, "learning_rate": 9.240000000000001e-06, "loss": 12.5734, "step": 46200 }, { "epoch": 0.09334712363191215, "grad_norm": 421.0220947265625, "learning_rate": 9.242e-06, "loss": 28.3077, "step": 46210 }, { "epoch": 0.09336732426459597, "grad_norm": 361.57086181640625, "learning_rate": 9.244e-06, "loss": 27.5079, "step": 46220 }, { "epoch": 0.09338752489727978, "grad_norm": 703.63525390625, "learning_rate": 9.246000000000001e-06, "loss": 19.9812, "step": 46230 }, { "epoch": 0.0934077255299636, "grad_norm": 85.08383178710938, "learning_rate": 9.248e-06, "loss": 17.4826, "step": 46240 }, { "epoch": 0.09342792616264742, "grad_norm": 6.185848236083984, "learning_rate": 9.250000000000001e-06, "loss": 28.1113, "step": 46250 }, { "epoch": 0.09344812679533122, "grad_norm": 1017.949951171875, "learning_rate": 9.252e-06, "loss": 33.4018, "step": 46260 }, { "epoch": 0.09346832742801504, "grad_norm": 175.9057159423828, "learning_rate": 9.254000000000002e-06, "loss": 26.9791, "step": 46270 }, { "epoch": 0.09348852806069886, "grad_norm": 471.81842041015625, "learning_rate": 9.256e-06, "loss": 18.282, "step": 46280 }, { "epoch": 0.09350872869338267, "grad_norm": 694.974609375, "learning_rate": 9.258e-06, "loss": 37.9084, "step": 46290 }, { "epoch": 0.09352892932606649, "grad_norm": 1199.8182373046875, "learning_rate": 9.260000000000001e-06, "loss": 33.2196, "step": 46300 }, { "epoch": 0.09354912995875031, "grad_norm": 324.40020751953125, "learning_rate": 9.262e-06, "loss": 24.762, "step": 46310 }, { "epoch": 0.09356933059143413, "grad_norm": 673.99951171875, "learning_rate": 9.264000000000001e-06, "loss": 28.9734, "step": 46320 }, { "epoch": 0.09358953122411794, "grad_norm": 576.8762817382812, "learning_rate": 9.266e-06, "loss": 30.6695, "step": 46330 }, { "epoch": 0.09360973185680176, "grad_norm": 605.3089599609375, "learning_rate": 9.268e-06, "loss": 18.2871, "step": 46340 }, { "epoch": 0.09362993248948558, "grad_norm": 347.7716979980469, "learning_rate": 9.270000000000001e-06, "loss": 25.4771, "step": 46350 }, { "epoch": 0.09365013312216938, "grad_norm": 317.8143310546875, "learning_rate": 9.272e-06, "loss": 30.3004, "step": 46360 }, { "epoch": 0.0936703337548532, "grad_norm": 277.15118408203125, "learning_rate": 9.274000000000001e-06, "loss": 16.8318, "step": 46370 }, { "epoch": 0.09369053438753702, "grad_norm": 454.1064453125, "learning_rate": 9.276e-06, "loss": 47.5036, "step": 46380 }, { "epoch": 0.09371073502022083, "grad_norm": 611.6060791015625, "learning_rate": 9.278e-06, "loss": 28.1561, "step": 46390 }, { "epoch": 0.09373093565290465, "grad_norm": 836.2199096679688, "learning_rate": 9.280000000000001e-06, "loss": 34.3666, "step": 46400 }, { "epoch": 0.09375113628558847, "grad_norm": 332.7566223144531, "learning_rate": 9.282e-06, "loss": 22.9057, "step": 46410 }, { "epoch": 0.09377133691827227, "grad_norm": 728.076171875, "learning_rate": 9.284000000000001e-06, "loss": 42.9878, "step": 46420 }, { "epoch": 0.0937915375509561, "grad_norm": 1112.4063720703125, "learning_rate": 9.286e-06, "loss": 33.806, "step": 46430 }, { "epoch": 0.09381173818363991, "grad_norm": 353.80694580078125, "learning_rate": 9.288e-06, "loss": 26.4135, "step": 46440 }, { "epoch": 0.09383193881632372, "grad_norm": 497.838134765625, "learning_rate": 9.29e-06, "loss": 64.492, "step": 46450 }, { "epoch": 0.09385213944900754, "grad_norm": 262.32928466796875, "learning_rate": 9.292000000000002e-06, "loss": 35.9133, "step": 46460 }, { "epoch": 0.09387234008169136, "grad_norm": 176.88645935058594, "learning_rate": 9.294000000000001e-06, "loss": 28.2898, "step": 46470 }, { "epoch": 0.09389254071437518, "grad_norm": 167.89064025878906, "learning_rate": 9.296e-06, "loss": 7.748, "step": 46480 }, { "epoch": 0.09391274134705899, "grad_norm": 33.82958221435547, "learning_rate": 9.298e-06, "loss": 8.3599, "step": 46490 }, { "epoch": 0.0939329419797428, "grad_norm": 691.0979614257812, "learning_rate": 9.3e-06, "loss": 17.1772, "step": 46500 }, { "epoch": 0.09395314261242663, "grad_norm": 238.71844482421875, "learning_rate": 9.302000000000002e-06, "loss": 26.17, "step": 46510 }, { "epoch": 0.09397334324511043, "grad_norm": 457.35858154296875, "learning_rate": 9.304000000000001e-06, "loss": 23.229, "step": 46520 }, { "epoch": 0.09399354387779425, "grad_norm": 219.3545379638672, "learning_rate": 9.306e-06, "loss": 13.5186, "step": 46530 }, { "epoch": 0.09401374451047807, "grad_norm": 238.89414978027344, "learning_rate": 9.308e-06, "loss": 14.7598, "step": 46540 }, { "epoch": 0.09403394514316188, "grad_norm": 729.1282348632812, "learning_rate": 9.31e-06, "loss": 31.1767, "step": 46550 }, { "epoch": 0.0940541457758457, "grad_norm": 317.5440673828125, "learning_rate": 9.312000000000002e-06, "loss": 18.8898, "step": 46560 }, { "epoch": 0.09407434640852952, "grad_norm": 462.8973083496094, "learning_rate": 9.314000000000001e-06, "loss": 20.6372, "step": 46570 }, { "epoch": 0.09409454704121333, "grad_norm": 206.12710571289062, "learning_rate": 9.316e-06, "loss": 14.3806, "step": 46580 }, { "epoch": 0.09411474767389715, "grad_norm": 1026.1854248046875, "learning_rate": 9.318e-06, "loss": 29.5411, "step": 46590 }, { "epoch": 0.09413494830658097, "grad_norm": 604.3740234375, "learning_rate": 9.32e-06, "loss": 15.6268, "step": 46600 }, { "epoch": 0.09415514893926477, "grad_norm": 230.26522827148438, "learning_rate": 9.322000000000002e-06, "loss": 15.1666, "step": 46610 }, { "epoch": 0.09417534957194859, "grad_norm": 204.7043914794922, "learning_rate": 9.324000000000001e-06, "loss": 17.7947, "step": 46620 }, { "epoch": 0.09419555020463241, "grad_norm": 1054.12255859375, "learning_rate": 9.326e-06, "loss": 35.5986, "step": 46630 }, { "epoch": 0.09421575083731623, "grad_norm": 1029.0006103515625, "learning_rate": 9.328000000000001e-06, "loss": 29.8678, "step": 46640 }, { "epoch": 0.09423595147000004, "grad_norm": 311.5683288574219, "learning_rate": 9.33e-06, "loss": 35.0463, "step": 46650 }, { "epoch": 0.09425615210268386, "grad_norm": 88.19459533691406, "learning_rate": 9.332000000000001e-06, "loss": 19.4599, "step": 46660 }, { "epoch": 0.09427635273536768, "grad_norm": 1075.1103515625, "learning_rate": 9.334e-06, "loss": 35.3525, "step": 46670 }, { "epoch": 0.09429655336805148, "grad_norm": 277.8233947753906, "learning_rate": 9.336e-06, "loss": 9.9575, "step": 46680 }, { "epoch": 0.0943167540007353, "grad_norm": 454.8833923339844, "learning_rate": 9.338000000000001e-06, "loss": 18.5828, "step": 46690 }, { "epoch": 0.09433695463341912, "grad_norm": 599.8738403320312, "learning_rate": 9.340000000000002e-06, "loss": 18.3383, "step": 46700 }, { "epoch": 0.09435715526610293, "grad_norm": 1908.4217529296875, "learning_rate": 9.342000000000001e-06, "loss": 29.3225, "step": 46710 }, { "epoch": 0.09437735589878675, "grad_norm": 559.5934448242188, "learning_rate": 9.344e-06, "loss": 14.9467, "step": 46720 }, { "epoch": 0.09439755653147057, "grad_norm": 132.86239624023438, "learning_rate": 9.346e-06, "loss": 28.6968, "step": 46730 }, { "epoch": 0.09441775716415438, "grad_norm": 344.94012451171875, "learning_rate": 9.348000000000001e-06, "loss": 14.2309, "step": 46740 }, { "epoch": 0.0944379577968382, "grad_norm": 701.255859375, "learning_rate": 9.350000000000002e-06, "loss": 20.1366, "step": 46750 }, { "epoch": 0.09445815842952202, "grad_norm": 436.1933288574219, "learning_rate": 9.352000000000001e-06, "loss": 39.7034, "step": 46760 }, { "epoch": 0.09447835906220582, "grad_norm": 484.61767578125, "learning_rate": 9.354e-06, "loss": 17.1705, "step": 46770 }, { "epoch": 0.09449855969488964, "grad_norm": 287.7072448730469, "learning_rate": 9.356e-06, "loss": 13.5601, "step": 46780 }, { "epoch": 0.09451876032757346, "grad_norm": 528.2890625, "learning_rate": 9.358000000000001e-06, "loss": 28.1622, "step": 46790 }, { "epoch": 0.09453896096025728, "grad_norm": 734.2177734375, "learning_rate": 9.360000000000002e-06, "loss": 42.813, "step": 46800 }, { "epoch": 0.09455916159294109, "grad_norm": 280.38079833984375, "learning_rate": 9.362000000000001e-06, "loss": 26.2074, "step": 46810 }, { "epoch": 0.09457936222562491, "grad_norm": 533.17529296875, "learning_rate": 9.364e-06, "loss": 27.4847, "step": 46820 }, { "epoch": 0.09459956285830873, "grad_norm": 298.01092529296875, "learning_rate": 9.366000000000001e-06, "loss": 20.3961, "step": 46830 }, { "epoch": 0.09461976349099253, "grad_norm": 490.7327575683594, "learning_rate": 9.368e-06, "loss": 20.4112, "step": 46840 }, { "epoch": 0.09463996412367635, "grad_norm": 352.32666015625, "learning_rate": 9.370000000000002e-06, "loss": 14.1502, "step": 46850 }, { "epoch": 0.09466016475636017, "grad_norm": 244.9453125, "learning_rate": 9.372000000000001e-06, "loss": 15.6283, "step": 46860 }, { "epoch": 0.09468036538904398, "grad_norm": 1122.0040283203125, "learning_rate": 9.374e-06, "loss": 30.7566, "step": 46870 }, { "epoch": 0.0947005660217278, "grad_norm": 325.7804870605469, "learning_rate": 9.376000000000001e-06, "loss": 28.0684, "step": 46880 }, { "epoch": 0.09472076665441162, "grad_norm": 490.7943115234375, "learning_rate": 9.378e-06, "loss": 23.9426, "step": 46890 }, { "epoch": 0.09474096728709543, "grad_norm": 305.4912414550781, "learning_rate": 9.38e-06, "loss": 39.7157, "step": 46900 }, { "epoch": 0.09476116791977925, "grad_norm": 268.6631164550781, "learning_rate": 9.382000000000001e-06, "loss": 27.5846, "step": 46910 }, { "epoch": 0.09478136855246307, "grad_norm": 17.4284725189209, "learning_rate": 9.384e-06, "loss": 13.5578, "step": 46920 }, { "epoch": 0.09480156918514687, "grad_norm": 496.9639587402344, "learning_rate": 9.386000000000001e-06, "loss": 16.1514, "step": 46930 }, { "epoch": 0.09482176981783069, "grad_norm": 1134.2791748046875, "learning_rate": 9.388e-06, "loss": 31.3823, "step": 46940 }, { "epoch": 0.09484197045051451, "grad_norm": 445.1515808105469, "learning_rate": 9.39e-06, "loss": 17.084, "step": 46950 }, { "epoch": 0.09486217108319833, "grad_norm": 311.2286682128906, "learning_rate": 9.392000000000001e-06, "loss": 19.7756, "step": 46960 }, { "epoch": 0.09488237171588214, "grad_norm": 1450.7786865234375, "learning_rate": 9.394e-06, "loss": 33.0113, "step": 46970 }, { "epoch": 0.09490257234856596, "grad_norm": 1356.1912841796875, "learning_rate": 9.396000000000001e-06, "loss": 40.1539, "step": 46980 }, { "epoch": 0.09492277298124978, "grad_norm": 563.09912109375, "learning_rate": 9.398e-06, "loss": 32.9458, "step": 46990 }, { "epoch": 0.09494297361393358, "grad_norm": 822.3256225585938, "learning_rate": 9.4e-06, "loss": 34.4446, "step": 47000 }, { "epoch": 0.0949631742466174, "grad_norm": 885.6978759765625, "learning_rate": 9.402e-06, "loss": 22.9885, "step": 47010 }, { "epoch": 0.09498337487930122, "grad_norm": 535.3980102539062, "learning_rate": 9.404e-06, "loss": 27.7092, "step": 47020 }, { "epoch": 0.09500357551198503, "grad_norm": 529.09619140625, "learning_rate": 9.406000000000001e-06, "loss": 31.665, "step": 47030 }, { "epoch": 0.09502377614466885, "grad_norm": 475.2674255371094, "learning_rate": 9.408e-06, "loss": 29.4506, "step": 47040 }, { "epoch": 0.09504397677735267, "grad_norm": 451.7756652832031, "learning_rate": 9.41e-06, "loss": 36.2947, "step": 47050 }, { "epoch": 0.09506417741003648, "grad_norm": 88.78792572021484, "learning_rate": 9.412e-06, "loss": 11.6324, "step": 47060 }, { "epoch": 0.0950843780427203, "grad_norm": 658.9125366210938, "learning_rate": 9.414000000000002e-06, "loss": 22.183, "step": 47070 }, { "epoch": 0.09510457867540412, "grad_norm": 284.6681823730469, "learning_rate": 9.416000000000001e-06, "loss": 13.6106, "step": 47080 }, { "epoch": 0.09512477930808792, "grad_norm": 410.4359436035156, "learning_rate": 9.418e-06, "loss": 36.8253, "step": 47090 }, { "epoch": 0.09514497994077174, "grad_norm": 983.0914306640625, "learning_rate": 9.42e-06, "loss": 19.7314, "step": 47100 }, { "epoch": 0.09516518057345556, "grad_norm": 245.94956970214844, "learning_rate": 9.422e-06, "loss": 28.235, "step": 47110 }, { "epoch": 0.09518538120613938, "grad_norm": 606.5446166992188, "learning_rate": 9.424000000000002e-06, "loss": 16.4744, "step": 47120 }, { "epoch": 0.09520558183882319, "grad_norm": 252.76507568359375, "learning_rate": 9.426000000000001e-06, "loss": 18.8907, "step": 47130 }, { "epoch": 0.09522578247150701, "grad_norm": 486.3790283203125, "learning_rate": 9.428e-06, "loss": 27.5937, "step": 47140 }, { "epoch": 0.09524598310419083, "grad_norm": 640.2138671875, "learning_rate": 9.43e-06, "loss": 32.7064, "step": 47150 }, { "epoch": 0.09526618373687464, "grad_norm": 300.470458984375, "learning_rate": 9.432e-06, "loss": 14.2572, "step": 47160 }, { "epoch": 0.09528638436955846, "grad_norm": 368.8597717285156, "learning_rate": 9.434000000000001e-06, "loss": 13.106, "step": 47170 }, { "epoch": 0.09530658500224228, "grad_norm": 1.5726017951965332, "learning_rate": 9.436e-06, "loss": 20.2675, "step": 47180 }, { "epoch": 0.09532678563492608, "grad_norm": 236.24609375, "learning_rate": 9.438e-06, "loss": 23.3547, "step": 47190 }, { "epoch": 0.0953469862676099, "grad_norm": 131.8438262939453, "learning_rate": 9.440000000000001e-06, "loss": 36.904, "step": 47200 }, { "epoch": 0.09536718690029372, "grad_norm": 385.8686218261719, "learning_rate": 9.442e-06, "loss": 22.8752, "step": 47210 }, { "epoch": 0.09538738753297753, "grad_norm": 147.48646545410156, "learning_rate": 9.444000000000001e-06, "loss": 27.6462, "step": 47220 }, { "epoch": 0.09540758816566135, "grad_norm": 641.9337158203125, "learning_rate": 9.446e-06, "loss": 32.553, "step": 47230 }, { "epoch": 0.09542778879834517, "grad_norm": 379.5395812988281, "learning_rate": 9.448e-06, "loss": 29.307, "step": 47240 }, { "epoch": 0.09544798943102897, "grad_norm": 647.9698486328125, "learning_rate": 9.450000000000001e-06, "loss": 44.1172, "step": 47250 }, { "epoch": 0.0954681900637128, "grad_norm": 380.27032470703125, "learning_rate": 9.452000000000002e-06, "loss": 32.1924, "step": 47260 }, { "epoch": 0.09548839069639661, "grad_norm": 618.8623657226562, "learning_rate": 9.454000000000001e-06, "loss": 20.6941, "step": 47270 }, { "epoch": 0.09550859132908043, "grad_norm": 189.7655792236328, "learning_rate": 9.456e-06, "loss": 22.5058, "step": 47280 }, { "epoch": 0.09552879196176424, "grad_norm": 615.1441650390625, "learning_rate": 9.458e-06, "loss": 38.724, "step": 47290 }, { "epoch": 0.09554899259444806, "grad_norm": 760.5025634765625, "learning_rate": 9.460000000000001e-06, "loss": 42.7108, "step": 47300 }, { "epoch": 0.09556919322713188, "grad_norm": 254.2874298095703, "learning_rate": 9.462000000000002e-06, "loss": 19.7814, "step": 47310 }, { "epoch": 0.09558939385981569, "grad_norm": 364.7778015136719, "learning_rate": 9.464000000000001e-06, "loss": 24.5197, "step": 47320 }, { "epoch": 0.0956095944924995, "grad_norm": 336.9996643066406, "learning_rate": 9.466e-06, "loss": 13.6541, "step": 47330 }, { "epoch": 0.09562979512518333, "grad_norm": 495.0628967285156, "learning_rate": 9.468e-06, "loss": 36.6066, "step": 47340 }, { "epoch": 0.09564999575786713, "grad_norm": 320.690673828125, "learning_rate": 9.47e-06, "loss": 41.1295, "step": 47350 }, { "epoch": 0.09567019639055095, "grad_norm": 56.88438415527344, "learning_rate": 9.472000000000002e-06, "loss": 32.3294, "step": 47360 }, { "epoch": 0.09569039702323477, "grad_norm": 865.4175415039062, "learning_rate": 9.474000000000001e-06, "loss": 27.6809, "step": 47370 }, { "epoch": 0.09571059765591858, "grad_norm": 591.1318359375, "learning_rate": 9.476e-06, "loss": 30.3608, "step": 47380 }, { "epoch": 0.0957307982886024, "grad_norm": 485.3149108886719, "learning_rate": 9.478e-06, "loss": 30.1645, "step": 47390 }, { "epoch": 0.09575099892128622, "grad_norm": 582.667236328125, "learning_rate": 9.48e-06, "loss": 31.4972, "step": 47400 }, { "epoch": 0.09577119955397002, "grad_norm": 919.2569580078125, "learning_rate": 9.482000000000002e-06, "loss": 37.2458, "step": 47410 }, { "epoch": 0.09579140018665384, "grad_norm": 324.873291015625, "learning_rate": 9.484000000000001e-06, "loss": 21.6334, "step": 47420 }, { "epoch": 0.09581160081933766, "grad_norm": 403.7833251953125, "learning_rate": 9.486e-06, "loss": 30.9771, "step": 47430 }, { "epoch": 0.09583180145202148, "grad_norm": 845.0510864257812, "learning_rate": 9.488000000000001e-06, "loss": 20.9893, "step": 47440 }, { "epoch": 0.09585200208470529, "grad_norm": 737.4741821289062, "learning_rate": 9.49e-06, "loss": 33.6122, "step": 47450 }, { "epoch": 0.09587220271738911, "grad_norm": 3.5464887619018555, "learning_rate": 9.492000000000002e-06, "loss": 22.8622, "step": 47460 }, { "epoch": 0.09589240335007293, "grad_norm": 250.9477081298828, "learning_rate": 9.494000000000001e-06, "loss": 42.5499, "step": 47470 }, { "epoch": 0.09591260398275674, "grad_norm": 897.3517456054688, "learning_rate": 9.496e-06, "loss": 30.6094, "step": 47480 }, { "epoch": 0.09593280461544056, "grad_norm": 1824.970458984375, "learning_rate": 9.498000000000001e-06, "loss": 29.0285, "step": 47490 }, { "epoch": 0.09595300524812438, "grad_norm": 762.5152587890625, "learning_rate": 9.5e-06, "loss": 26.8512, "step": 47500 }, { "epoch": 0.09597320588080818, "grad_norm": 190.81185913085938, "learning_rate": 9.502000000000002e-06, "loss": 20.1303, "step": 47510 }, { "epoch": 0.095993406513492, "grad_norm": 286.0945129394531, "learning_rate": 9.504e-06, "loss": 27.1819, "step": 47520 }, { "epoch": 0.09601360714617582, "grad_norm": 300.3119812011719, "learning_rate": 9.506e-06, "loss": 39.6307, "step": 47530 }, { "epoch": 0.09603380777885963, "grad_norm": 256.6836853027344, "learning_rate": 9.508000000000001e-06, "loss": 32.6281, "step": 47540 }, { "epoch": 0.09605400841154345, "grad_norm": 520.3702392578125, "learning_rate": 9.51e-06, "loss": 26.2266, "step": 47550 }, { "epoch": 0.09607420904422727, "grad_norm": 511.2731628417969, "learning_rate": 9.512000000000001e-06, "loss": 19.3785, "step": 47560 }, { "epoch": 0.09609440967691107, "grad_norm": 657.1405029296875, "learning_rate": 9.514e-06, "loss": 40.0813, "step": 47570 }, { "epoch": 0.0961146103095949, "grad_norm": 645.1378173828125, "learning_rate": 9.516e-06, "loss": 30.9545, "step": 47580 }, { "epoch": 0.09613481094227871, "grad_norm": 378.3953857421875, "learning_rate": 9.518000000000001e-06, "loss": 21.1613, "step": 47590 }, { "epoch": 0.09615501157496253, "grad_norm": 228.1564178466797, "learning_rate": 9.52e-06, "loss": 34.5154, "step": 47600 }, { "epoch": 0.09617521220764634, "grad_norm": 860.0980224609375, "learning_rate": 9.522000000000001e-06, "loss": 23.0088, "step": 47610 }, { "epoch": 0.09619541284033016, "grad_norm": 343.0500793457031, "learning_rate": 9.524e-06, "loss": 24.8239, "step": 47620 }, { "epoch": 0.09621561347301398, "grad_norm": 300.5077209472656, "learning_rate": 9.526000000000002e-06, "loss": 23.6022, "step": 47630 }, { "epoch": 0.09623581410569779, "grad_norm": 355.74566650390625, "learning_rate": 9.528000000000001e-06, "loss": 29.7588, "step": 47640 }, { "epoch": 0.0962560147383816, "grad_norm": 646.3258056640625, "learning_rate": 9.53e-06, "loss": 28.0084, "step": 47650 }, { "epoch": 0.09627621537106543, "grad_norm": 455.2993469238281, "learning_rate": 9.532000000000001e-06, "loss": 49.2897, "step": 47660 }, { "epoch": 0.09629641600374923, "grad_norm": 1240.07080078125, "learning_rate": 9.534e-06, "loss": 30.8765, "step": 47670 }, { "epoch": 0.09631661663643305, "grad_norm": 223.4470672607422, "learning_rate": 9.536000000000002e-06, "loss": 32.9669, "step": 47680 }, { "epoch": 0.09633681726911687, "grad_norm": 366.6746826171875, "learning_rate": 9.538e-06, "loss": 12.4205, "step": 47690 }, { "epoch": 0.09635701790180068, "grad_norm": 607.6683349609375, "learning_rate": 9.54e-06, "loss": 17.9753, "step": 47700 }, { "epoch": 0.0963772185344845, "grad_norm": 476.2154235839844, "learning_rate": 9.542000000000001e-06, "loss": 29.2031, "step": 47710 }, { "epoch": 0.09639741916716832, "grad_norm": 614.3005981445312, "learning_rate": 9.544e-06, "loss": 32.7907, "step": 47720 }, { "epoch": 0.09641761979985213, "grad_norm": 329.02471923828125, "learning_rate": 9.546000000000001e-06, "loss": 26.8656, "step": 47730 }, { "epoch": 0.09643782043253595, "grad_norm": 46.64715576171875, "learning_rate": 9.548e-06, "loss": 19.0038, "step": 47740 }, { "epoch": 0.09645802106521977, "grad_norm": 140.94720458984375, "learning_rate": 9.55e-06, "loss": 22.3911, "step": 47750 }, { "epoch": 0.09647822169790359, "grad_norm": 324.6895751953125, "learning_rate": 9.552000000000001e-06, "loss": 14.5786, "step": 47760 }, { "epoch": 0.09649842233058739, "grad_norm": 750.6205444335938, "learning_rate": 9.554e-06, "loss": 26.3296, "step": 47770 }, { "epoch": 0.09651862296327121, "grad_norm": 253.6141815185547, "learning_rate": 9.556000000000001e-06, "loss": 24.6459, "step": 47780 }, { "epoch": 0.09653882359595503, "grad_norm": 1342.5201416015625, "learning_rate": 9.558e-06, "loss": 40.2704, "step": 47790 }, { "epoch": 0.09655902422863884, "grad_norm": 89.08769226074219, "learning_rate": 9.56e-06, "loss": 30.5612, "step": 47800 }, { "epoch": 0.09657922486132266, "grad_norm": 513.7152099609375, "learning_rate": 9.562000000000001e-06, "loss": 30.5212, "step": 47810 }, { "epoch": 0.09659942549400648, "grad_norm": 381.00433349609375, "learning_rate": 9.564e-06, "loss": 48.6877, "step": 47820 }, { "epoch": 0.09661962612669028, "grad_norm": 431.2875061035156, "learning_rate": 9.566000000000001e-06, "loss": 16.3693, "step": 47830 }, { "epoch": 0.0966398267593741, "grad_norm": 1027.247314453125, "learning_rate": 9.568e-06, "loss": 19.0051, "step": 47840 }, { "epoch": 0.09666002739205792, "grad_norm": 960.1653442382812, "learning_rate": 9.57e-06, "loss": 43.8949, "step": 47850 }, { "epoch": 0.09668022802474173, "grad_norm": 407.0733642578125, "learning_rate": 9.572000000000001e-06, "loss": 25.9826, "step": 47860 }, { "epoch": 0.09670042865742555, "grad_norm": 415.5325927734375, "learning_rate": 9.574000000000002e-06, "loss": 29.5783, "step": 47870 }, { "epoch": 0.09672062929010937, "grad_norm": 389.2325439453125, "learning_rate": 9.576000000000001e-06, "loss": 31.4837, "step": 47880 }, { "epoch": 0.09674082992279318, "grad_norm": 363.0729064941406, "learning_rate": 9.578e-06, "loss": 24.4001, "step": 47890 }, { "epoch": 0.096761030555477, "grad_norm": 274.0908508300781, "learning_rate": 9.58e-06, "loss": 23.3419, "step": 47900 }, { "epoch": 0.09678123118816082, "grad_norm": 610.1469116210938, "learning_rate": 9.582e-06, "loss": 35.8652, "step": 47910 }, { "epoch": 0.09680143182084464, "grad_norm": 581.1511840820312, "learning_rate": 9.584000000000002e-06, "loss": 17.0714, "step": 47920 }, { "epoch": 0.09682163245352844, "grad_norm": 97.17230224609375, "learning_rate": 9.586000000000001e-06, "loss": 31.8159, "step": 47930 }, { "epoch": 0.09684183308621226, "grad_norm": 383.70404052734375, "learning_rate": 9.588e-06, "loss": 30.4987, "step": 47940 }, { "epoch": 0.09686203371889608, "grad_norm": 444.5631103515625, "learning_rate": 9.59e-06, "loss": 25.1242, "step": 47950 }, { "epoch": 0.09688223435157989, "grad_norm": 145.23377990722656, "learning_rate": 9.592e-06, "loss": 10.8214, "step": 47960 }, { "epoch": 0.09690243498426371, "grad_norm": 360.3565979003906, "learning_rate": 9.594000000000002e-06, "loss": 27.0414, "step": 47970 }, { "epoch": 0.09692263561694753, "grad_norm": 454.5389709472656, "learning_rate": 9.596000000000001e-06, "loss": 22.0487, "step": 47980 }, { "epoch": 0.09694283624963133, "grad_norm": 232.68057250976562, "learning_rate": 9.598e-06, "loss": 37.0938, "step": 47990 }, { "epoch": 0.09696303688231515, "grad_norm": 759.0354614257812, "learning_rate": 9.600000000000001e-06, "loss": 31.7083, "step": 48000 }, { "epoch": 0.09698323751499897, "grad_norm": 329.7867431640625, "learning_rate": 9.602e-06, "loss": 31.8868, "step": 48010 }, { "epoch": 0.09700343814768278, "grad_norm": 214.38702392578125, "learning_rate": 9.604000000000002e-06, "loss": 35.0172, "step": 48020 }, { "epoch": 0.0970236387803666, "grad_norm": 392.5238342285156, "learning_rate": 9.606000000000001e-06, "loss": 46.4812, "step": 48030 }, { "epoch": 0.09704383941305042, "grad_norm": 628.2894287109375, "learning_rate": 9.608e-06, "loss": 16.1298, "step": 48040 }, { "epoch": 0.09706404004573423, "grad_norm": 550.8941650390625, "learning_rate": 9.610000000000001e-06, "loss": 28.6235, "step": 48050 }, { "epoch": 0.09708424067841805, "grad_norm": 400.3697204589844, "learning_rate": 9.612000000000002e-06, "loss": 30.4452, "step": 48060 }, { "epoch": 0.09710444131110187, "grad_norm": 95.51148223876953, "learning_rate": 9.614000000000001e-06, "loss": 22.6477, "step": 48070 }, { "epoch": 0.09712464194378569, "grad_norm": 159.8924560546875, "learning_rate": 9.616e-06, "loss": 27.6692, "step": 48080 }, { "epoch": 0.09714484257646949, "grad_norm": 180.40538024902344, "learning_rate": 9.618e-06, "loss": 18.7072, "step": 48090 }, { "epoch": 0.09716504320915331, "grad_norm": 945.0508422851562, "learning_rate": 9.620000000000001e-06, "loss": 30.841, "step": 48100 }, { "epoch": 0.09718524384183713, "grad_norm": 666.922119140625, "learning_rate": 9.622000000000002e-06, "loss": 28.6417, "step": 48110 }, { "epoch": 0.09720544447452094, "grad_norm": 468.8777770996094, "learning_rate": 9.624000000000001e-06, "loss": 16.8679, "step": 48120 }, { "epoch": 0.09722564510720476, "grad_norm": 426.8685302734375, "learning_rate": 9.626e-06, "loss": 16.4289, "step": 48130 }, { "epoch": 0.09724584573988858, "grad_norm": 123.73139953613281, "learning_rate": 9.628e-06, "loss": 11.6659, "step": 48140 }, { "epoch": 0.09726604637257238, "grad_norm": 1088.898681640625, "learning_rate": 9.630000000000001e-06, "loss": 42.8518, "step": 48150 }, { "epoch": 0.0972862470052562, "grad_norm": 595.4813842773438, "learning_rate": 9.632e-06, "loss": 26.0703, "step": 48160 }, { "epoch": 0.09730644763794002, "grad_norm": 534.6911010742188, "learning_rate": 9.634000000000001e-06, "loss": 26.4292, "step": 48170 }, { "epoch": 0.09732664827062383, "grad_norm": 337.2037658691406, "learning_rate": 9.636e-06, "loss": 14.135, "step": 48180 }, { "epoch": 0.09734684890330765, "grad_norm": 302.7720642089844, "learning_rate": 9.638e-06, "loss": 16.6161, "step": 48190 }, { "epoch": 0.09736704953599147, "grad_norm": 383.7428283691406, "learning_rate": 9.640000000000001e-06, "loss": 56.4775, "step": 48200 }, { "epoch": 0.09738725016867528, "grad_norm": 401.3696594238281, "learning_rate": 9.642e-06, "loss": 26.5211, "step": 48210 }, { "epoch": 0.0974074508013591, "grad_norm": 501.5325622558594, "learning_rate": 9.644000000000001e-06, "loss": 20.5206, "step": 48220 }, { "epoch": 0.09742765143404292, "grad_norm": 993.3955688476562, "learning_rate": 9.646e-06, "loss": 31.1039, "step": 48230 }, { "epoch": 0.09744785206672674, "grad_norm": 249.63485717773438, "learning_rate": 9.648000000000001e-06, "loss": 18.1888, "step": 48240 }, { "epoch": 0.09746805269941054, "grad_norm": 142.93650817871094, "learning_rate": 9.65e-06, "loss": 25.8998, "step": 48250 }, { "epoch": 0.09748825333209436, "grad_norm": 346.6017761230469, "learning_rate": 9.652e-06, "loss": 28.8821, "step": 48260 }, { "epoch": 0.09750845396477818, "grad_norm": 139.22000122070312, "learning_rate": 9.654000000000001e-06, "loss": 17.9235, "step": 48270 }, { "epoch": 0.09752865459746199, "grad_norm": 592.041015625, "learning_rate": 9.656e-06, "loss": 36.8627, "step": 48280 }, { "epoch": 0.09754885523014581, "grad_norm": 1026.140625, "learning_rate": 9.658000000000001e-06, "loss": 28.4813, "step": 48290 }, { "epoch": 0.09756905586282963, "grad_norm": 800.9178466796875, "learning_rate": 9.66e-06, "loss": 21.5535, "step": 48300 }, { "epoch": 0.09758925649551344, "grad_norm": 222.7467498779297, "learning_rate": 9.662e-06, "loss": 22.0214, "step": 48310 }, { "epoch": 0.09760945712819726, "grad_norm": 229.6143035888672, "learning_rate": 9.664000000000001e-06, "loss": 24.3152, "step": 48320 }, { "epoch": 0.09762965776088108, "grad_norm": 124.33726501464844, "learning_rate": 9.666e-06, "loss": 23.7983, "step": 48330 }, { "epoch": 0.09764985839356488, "grad_norm": 685.0302124023438, "learning_rate": 9.668000000000001e-06, "loss": 19.4996, "step": 48340 }, { "epoch": 0.0976700590262487, "grad_norm": 650.8430786132812, "learning_rate": 9.67e-06, "loss": 38.9991, "step": 48350 }, { "epoch": 0.09769025965893252, "grad_norm": 792.5812377929688, "learning_rate": 9.672e-06, "loss": 33.9084, "step": 48360 }, { "epoch": 0.09771046029161633, "grad_norm": 352.05487060546875, "learning_rate": 9.674000000000001e-06, "loss": 24.5777, "step": 48370 }, { "epoch": 0.09773066092430015, "grad_norm": 399.4421081542969, "learning_rate": 9.676e-06, "loss": 29.6903, "step": 48380 }, { "epoch": 0.09775086155698397, "grad_norm": 282.4613952636719, "learning_rate": 9.678000000000001e-06, "loss": 27.1341, "step": 48390 }, { "epoch": 0.09777106218966779, "grad_norm": 875.9047241210938, "learning_rate": 9.68e-06, "loss": 17.4331, "step": 48400 }, { "epoch": 0.0977912628223516, "grad_norm": 238.08753967285156, "learning_rate": 9.682e-06, "loss": 41.6776, "step": 48410 }, { "epoch": 0.09781146345503541, "grad_norm": 381.57763671875, "learning_rate": 9.684e-06, "loss": 28.2582, "step": 48420 }, { "epoch": 0.09783166408771923, "grad_norm": 740.5884399414062, "learning_rate": 9.686000000000002e-06, "loss": 32.7431, "step": 48430 }, { "epoch": 0.09785186472040304, "grad_norm": 491.7377014160156, "learning_rate": 9.688000000000001e-06, "loss": 20.9476, "step": 48440 }, { "epoch": 0.09787206535308686, "grad_norm": 746.2936401367188, "learning_rate": 9.69e-06, "loss": 30.073, "step": 48450 }, { "epoch": 0.09789226598577068, "grad_norm": 35.32424545288086, "learning_rate": 9.692e-06, "loss": 41.2601, "step": 48460 }, { "epoch": 0.09791246661845449, "grad_norm": 313.6356506347656, "learning_rate": 9.694e-06, "loss": 15.1328, "step": 48470 }, { "epoch": 0.0979326672511383, "grad_norm": 622.7384033203125, "learning_rate": 9.696000000000002e-06, "loss": 16.9361, "step": 48480 }, { "epoch": 0.09795286788382213, "grad_norm": 444.7352294921875, "learning_rate": 9.698000000000001e-06, "loss": 15.3796, "step": 48490 }, { "epoch": 0.09797306851650593, "grad_norm": 309.7575378417969, "learning_rate": 9.7e-06, "loss": 79.6284, "step": 48500 }, { "epoch": 0.09799326914918975, "grad_norm": 1072.44921875, "learning_rate": 9.702e-06, "loss": 43.3716, "step": 48510 }, { "epoch": 0.09801346978187357, "grad_norm": 436.8084411621094, "learning_rate": 9.704e-06, "loss": 39.6096, "step": 48520 }, { "epoch": 0.09803367041455738, "grad_norm": 649.1705322265625, "learning_rate": 9.706000000000002e-06, "loss": 26.3654, "step": 48530 }, { "epoch": 0.0980538710472412, "grad_norm": 322.66290283203125, "learning_rate": 9.708000000000001e-06, "loss": 23.6526, "step": 48540 }, { "epoch": 0.09807407167992502, "grad_norm": 325.04425048828125, "learning_rate": 9.71e-06, "loss": 25.3373, "step": 48550 }, { "epoch": 0.09809427231260884, "grad_norm": 1282.7777099609375, "learning_rate": 9.712e-06, "loss": 18.9983, "step": 48560 }, { "epoch": 0.09811447294529264, "grad_norm": 436.2747802734375, "learning_rate": 9.714e-06, "loss": 25.8153, "step": 48570 }, { "epoch": 0.09813467357797646, "grad_norm": 450.4263000488281, "learning_rate": 9.716000000000002e-06, "loss": 20.8123, "step": 48580 }, { "epoch": 0.09815487421066028, "grad_norm": 256.3553771972656, "learning_rate": 9.718e-06, "loss": 22.8999, "step": 48590 }, { "epoch": 0.09817507484334409, "grad_norm": 310.2727355957031, "learning_rate": 9.72e-06, "loss": 20.5452, "step": 48600 }, { "epoch": 0.09819527547602791, "grad_norm": 443.9215087890625, "learning_rate": 9.722000000000001e-06, "loss": 22.8119, "step": 48610 }, { "epoch": 0.09821547610871173, "grad_norm": 273.1202087402344, "learning_rate": 9.724e-06, "loss": 26.6364, "step": 48620 }, { "epoch": 0.09823567674139554, "grad_norm": 335.86328125, "learning_rate": 9.726000000000001e-06, "loss": 25.182, "step": 48630 }, { "epoch": 0.09825587737407936, "grad_norm": 287.7085266113281, "learning_rate": 9.728e-06, "loss": 20.7065, "step": 48640 }, { "epoch": 0.09827607800676318, "grad_norm": 658.7157592773438, "learning_rate": 9.73e-06, "loss": 14.5838, "step": 48650 }, { "epoch": 0.09829627863944698, "grad_norm": 614.1793823242188, "learning_rate": 9.732000000000001e-06, "loss": 23.6119, "step": 48660 }, { "epoch": 0.0983164792721308, "grad_norm": 591.2699584960938, "learning_rate": 9.734000000000002e-06, "loss": 14.9975, "step": 48670 }, { "epoch": 0.09833667990481462, "grad_norm": 191.0492401123047, "learning_rate": 9.736000000000001e-06, "loss": 14.8718, "step": 48680 }, { "epoch": 0.09835688053749843, "grad_norm": 739.43359375, "learning_rate": 9.738e-06, "loss": 22.9545, "step": 48690 }, { "epoch": 0.09837708117018225, "grad_norm": 464.5688781738281, "learning_rate": 9.74e-06, "loss": 13.4298, "step": 48700 }, { "epoch": 0.09839728180286607, "grad_norm": 756.2413940429688, "learning_rate": 9.742000000000001e-06, "loss": 45.1545, "step": 48710 }, { "epoch": 0.09841748243554987, "grad_norm": 219.85523986816406, "learning_rate": 9.744000000000002e-06, "loss": 34.1919, "step": 48720 }, { "epoch": 0.0984376830682337, "grad_norm": 631.513671875, "learning_rate": 9.746000000000001e-06, "loss": 20.2011, "step": 48730 }, { "epoch": 0.09845788370091751, "grad_norm": 432.5674743652344, "learning_rate": 9.748e-06, "loss": 30.0944, "step": 48740 }, { "epoch": 0.09847808433360133, "grad_norm": 647.7390747070312, "learning_rate": 9.75e-06, "loss": 38.3661, "step": 48750 }, { "epoch": 0.09849828496628514, "grad_norm": 410.4930725097656, "learning_rate": 9.752e-06, "loss": 29.9325, "step": 48760 }, { "epoch": 0.09851848559896896, "grad_norm": 470.92840576171875, "learning_rate": 9.754000000000002e-06, "loss": 35.1719, "step": 48770 }, { "epoch": 0.09853868623165278, "grad_norm": 953.4794921875, "learning_rate": 9.756000000000001e-06, "loss": 36.2353, "step": 48780 }, { "epoch": 0.09855888686433659, "grad_norm": 471.443359375, "learning_rate": 9.758e-06, "loss": 33.5306, "step": 48790 }, { "epoch": 0.0985790874970204, "grad_norm": 820.6240234375, "learning_rate": 9.760000000000001e-06, "loss": 24.8495, "step": 48800 }, { "epoch": 0.09859928812970423, "grad_norm": 463.1524658203125, "learning_rate": 9.762e-06, "loss": 22.0579, "step": 48810 }, { "epoch": 0.09861948876238803, "grad_norm": 1104.513916015625, "learning_rate": 9.764000000000002e-06, "loss": 47.9236, "step": 48820 }, { "epoch": 0.09863968939507185, "grad_norm": 345.3257141113281, "learning_rate": 9.766000000000001e-06, "loss": 20.4036, "step": 48830 }, { "epoch": 0.09865989002775567, "grad_norm": 549.0986328125, "learning_rate": 9.768e-06, "loss": 34.3462, "step": 48840 }, { "epoch": 0.09868009066043948, "grad_norm": 277.656982421875, "learning_rate": 9.770000000000001e-06, "loss": 14.8656, "step": 48850 }, { "epoch": 0.0987002912931233, "grad_norm": 465.69207763671875, "learning_rate": 9.772e-06, "loss": 17.3637, "step": 48860 }, { "epoch": 0.09872049192580712, "grad_norm": 1081.8680419921875, "learning_rate": 9.774000000000002e-06, "loss": 44.3744, "step": 48870 }, { "epoch": 0.09874069255849093, "grad_norm": 158.52818298339844, "learning_rate": 9.776000000000001e-06, "loss": 33.0443, "step": 48880 }, { "epoch": 0.09876089319117475, "grad_norm": 1199.7681884765625, "learning_rate": 9.778e-06, "loss": 29.7864, "step": 48890 }, { "epoch": 0.09878109382385857, "grad_norm": 692.0853881835938, "learning_rate": 9.780000000000001e-06, "loss": 40.4721, "step": 48900 }, { "epoch": 0.09880129445654239, "grad_norm": 753.58740234375, "learning_rate": 9.782e-06, "loss": 28.6318, "step": 48910 }, { "epoch": 0.09882149508922619, "grad_norm": 319.7926940917969, "learning_rate": 9.784000000000002e-06, "loss": 34.901, "step": 48920 }, { "epoch": 0.09884169572191001, "grad_norm": 278.90948486328125, "learning_rate": 9.786e-06, "loss": 35.2243, "step": 48930 }, { "epoch": 0.09886189635459383, "grad_norm": 888.979736328125, "learning_rate": 9.788e-06, "loss": 21.1623, "step": 48940 }, { "epoch": 0.09888209698727764, "grad_norm": 50.69482421875, "learning_rate": 9.790000000000001e-06, "loss": 19.2865, "step": 48950 }, { "epoch": 0.09890229761996146, "grad_norm": 271.34930419921875, "learning_rate": 9.792e-06, "loss": 14.6149, "step": 48960 }, { "epoch": 0.09892249825264528, "grad_norm": 865.9595336914062, "learning_rate": 9.794000000000001e-06, "loss": 22.4335, "step": 48970 }, { "epoch": 0.09894269888532908, "grad_norm": 556.2496948242188, "learning_rate": 9.796e-06, "loss": 54.3076, "step": 48980 }, { "epoch": 0.0989628995180129, "grad_norm": 735.6205444335938, "learning_rate": 9.798e-06, "loss": 38.0693, "step": 48990 }, { "epoch": 0.09898310015069672, "grad_norm": 426.1153259277344, "learning_rate": 9.800000000000001e-06, "loss": 39.0343, "step": 49000 }, { "epoch": 0.09900330078338053, "grad_norm": 471.3280334472656, "learning_rate": 9.802e-06, "loss": 26.5112, "step": 49010 }, { "epoch": 0.09902350141606435, "grad_norm": 684.482421875, "learning_rate": 9.804000000000001e-06, "loss": 26.193, "step": 49020 }, { "epoch": 0.09904370204874817, "grad_norm": 1121.659423828125, "learning_rate": 9.806e-06, "loss": 33.4643, "step": 49030 }, { "epoch": 0.09906390268143198, "grad_norm": 200.65771484375, "learning_rate": 9.808000000000002e-06, "loss": 16.047, "step": 49040 }, { "epoch": 0.0990841033141158, "grad_norm": 884.4088134765625, "learning_rate": 9.810000000000001e-06, "loss": 36.1623, "step": 49050 }, { "epoch": 0.09910430394679962, "grad_norm": 194.87034606933594, "learning_rate": 9.812e-06, "loss": 19.7563, "step": 49060 }, { "epoch": 0.09912450457948344, "grad_norm": 558.3735961914062, "learning_rate": 9.814000000000001e-06, "loss": 16.634, "step": 49070 }, { "epoch": 0.09914470521216724, "grad_norm": 298.5070495605469, "learning_rate": 9.816e-06, "loss": 31.8815, "step": 49080 }, { "epoch": 0.09916490584485106, "grad_norm": 371.0187072753906, "learning_rate": 9.818000000000002e-06, "loss": 22.5085, "step": 49090 }, { "epoch": 0.09918510647753488, "grad_norm": 807.2926025390625, "learning_rate": 9.820000000000001e-06, "loss": 25.0996, "step": 49100 }, { "epoch": 0.09920530711021869, "grad_norm": 630.2256469726562, "learning_rate": 9.822e-06, "loss": 27.6292, "step": 49110 }, { "epoch": 0.09922550774290251, "grad_norm": 741.7303466796875, "learning_rate": 9.824000000000001e-06, "loss": 29.3892, "step": 49120 }, { "epoch": 0.09924570837558633, "grad_norm": 972.7586059570312, "learning_rate": 9.826e-06, "loss": 49.3759, "step": 49130 }, { "epoch": 0.09926590900827013, "grad_norm": 659.7006225585938, "learning_rate": 9.828000000000001e-06, "loss": 30.6737, "step": 49140 }, { "epoch": 0.09928610964095395, "grad_norm": 323.7527160644531, "learning_rate": 9.83e-06, "loss": 21.8791, "step": 49150 }, { "epoch": 0.09930631027363777, "grad_norm": 181.38067626953125, "learning_rate": 9.832e-06, "loss": 15.8764, "step": 49160 }, { "epoch": 0.09932651090632158, "grad_norm": 531.7208862304688, "learning_rate": 9.834000000000001e-06, "loss": 51.3417, "step": 49170 }, { "epoch": 0.0993467115390054, "grad_norm": 471.9506530761719, "learning_rate": 9.836e-06, "loss": 33.242, "step": 49180 }, { "epoch": 0.09936691217168922, "grad_norm": 215.51206970214844, "learning_rate": 9.838000000000001e-06, "loss": 41.7949, "step": 49190 }, { "epoch": 0.09938711280437303, "grad_norm": 1006.3677368164062, "learning_rate": 9.84e-06, "loss": 22.6436, "step": 49200 }, { "epoch": 0.09940731343705685, "grad_norm": 322.4917297363281, "learning_rate": 9.842e-06, "loss": 15.0789, "step": 49210 }, { "epoch": 0.09942751406974067, "grad_norm": 309.1917724609375, "learning_rate": 9.844000000000001e-06, "loss": 29.4168, "step": 49220 }, { "epoch": 0.09944771470242449, "grad_norm": 712.6861572265625, "learning_rate": 9.846000000000002e-06, "loss": 16.7173, "step": 49230 }, { "epoch": 0.09946791533510829, "grad_norm": 174.32557678222656, "learning_rate": 9.848000000000001e-06, "loss": 19.2307, "step": 49240 }, { "epoch": 0.09948811596779211, "grad_norm": 589.4677734375, "learning_rate": 9.85e-06, "loss": 22.0556, "step": 49250 }, { "epoch": 0.09950831660047593, "grad_norm": 193.81019592285156, "learning_rate": 9.852e-06, "loss": 17.3796, "step": 49260 }, { "epoch": 0.09952851723315974, "grad_norm": 647.8886108398438, "learning_rate": 9.854000000000001e-06, "loss": 27.9435, "step": 49270 }, { "epoch": 0.09954871786584356, "grad_norm": 348.2982177734375, "learning_rate": 9.856000000000002e-06, "loss": 25.0838, "step": 49280 }, { "epoch": 0.09956891849852738, "grad_norm": 203.4016571044922, "learning_rate": 9.858000000000001e-06, "loss": 20.925, "step": 49290 }, { "epoch": 0.09958911913121118, "grad_norm": 454.58380126953125, "learning_rate": 9.86e-06, "loss": 14.073, "step": 49300 }, { "epoch": 0.099609319763895, "grad_norm": 470.5152893066406, "learning_rate": 9.862e-06, "loss": 20.2741, "step": 49310 }, { "epoch": 0.09962952039657882, "grad_norm": 651.3986206054688, "learning_rate": 9.864e-06, "loss": 33.504, "step": 49320 }, { "epoch": 0.09964972102926263, "grad_norm": 140.67408752441406, "learning_rate": 9.866000000000002e-06, "loss": 24.0097, "step": 49330 }, { "epoch": 0.09966992166194645, "grad_norm": 414.4718322753906, "learning_rate": 9.868000000000001e-06, "loss": 48.8509, "step": 49340 }, { "epoch": 0.09969012229463027, "grad_norm": 113.15275573730469, "learning_rate": 9.87e-06, "loss": 28.0613, "step": 49350 }, { "epoch": 0.09971032292731408, "grad_norm": 1159.5601806640625, "learning_rate": 9.872e-06, "loss": 28.8047, "step": 49360 }, { "epoch": 0.0997305235599979, "grad_norm": 609.1572265625, "learning_rate": 9.874e-06, "loss": 35.9614, "step": 49370 }, { "epoch": 0.09975072419268172, "grad_norm": 290.0668029785156, "learning_rate": 9.876000000000002e-06, "loss": 29.4886, "step": 49380 }, { "epoch": 0.09977092482536554, "grad_norm": 580.041015625, "learning_rate": 9.878000000000001e-06, "loss": 21.3514, "step": 49390 }, { "epoch": 0.09979112545804934, "grad_norm": 476.08868408203125, "learning_rate": 9.88e-06, "loss": 12.8899, "step": 49400 }, { "epoch": 0.09981132609073316, "grad_norm": 284.8551330566406, "learning_rate": 9.882000000000001e-06, "loss": 31.1479, "step": 49410 }, { "epoch": 0.09983152672341698, "grad_norm": 244.68206787109375, "learning_rate": 9.884e-06, "loss": 23.8315, "step": 49420 }, { "epoch": 0.09985172735610079, "grad_norm": 907.8687133789062, "learning_rate": 9.886000000000002e-06, "loss": 30.9991, "step": 49430 }, { "epoch": 0.09987192798878461, "grad_norm": 513.1172485351562, "learning_rate": 9.888000000000001e-06, "loss": 21.5931, "step": 49440 }, { "epoch": 0.09989212862146843, "grad_norm": 338.56011962890625, "learning_rate": 9.89e-06, "loss": 26.8022, "step": 49450 }, { "epoch": 0.09991232925415224, "grad_norm": 185.15675354003906, "learning_rate": 9.892000000000001e-06, "loss": 28.307, "step": 49460 }, { "epoch": 0.09993252988683606, "grad_norm": 904.0110473632812, "learning_rate": 9.894e-06, "loss": 37.4639, "step": 49470 }, { "epoch": 0.09995273051951988, "grad_norm": 521.9727172851562, "learning_rate": 9.896000000000001e-06, "loss": 25.5981, "step": 49480 }, { "epoch": 0.09997293115220368, "grad_norm": 1088.702880859375, "learning_rate": 9.898e-06, "loss": 28.4142, "step": 49490 }, { "epoch": 0.0999931317848875, "grad_norm": 861.6896362304688, "learning_rate": 9.9e-06, "loss": 37.6479, "step": 49500 }, { "epoch": 0.10001333241757132, "grad_norm": 1178.4163818359375, "learning_rate": 9.902000000000001e-06, "loss": 28.324, "step": 49510 }, { "epoch": 0.10003353305025513, "grad_norm": 389.5504150390625, "learning_rate": 9.904e-06, "loss": 15.2446, "step": 49520 }, { "epoch": 0.10005373368293895, "grad_norm": 87.75468444824219, "learning_rate": 9.906000000000001e-06, "loss": 18.3448, "step": 49530 }, { "epoch": 0.10007393431562277, "grad_norm": 485.5669250488281, "learning_rate": 9.908e-06, "loss": 23.2733, "step": 49540 }, { "epoch": 0.10009413494830659, "grad_norm": 325.63507080078125, "learning_rate": 9.91e-06, "loss": 14.1606, "step": 49550 }, { "epoch": 0.1001143355809904, "grad_norm": 516.7115478515625, "learning_rate": 9.912000000000001e-06, "loss": 20.5059, "step": 49560 }, { "epoch": 0.10013453621367421, "grad_norm": 232.55116271972656, "learning_rate": 9.914e-06, "loss": 40.8113, "step": 49570 }, { "epoch": 0.10015473684635803, "grad_norm": 1050.576416015625, "learning_rate": 9.916000000000001e-06, "loss": 28.4415, "step": 49580 }, { "epoch": 0.10017493747904184, "grad_norm": 224.6217498779297, "learning_rate": 9.918e-06, "loss": 21.9504, "step": 49590 }, { "epoch": 0.10019513811172566, "grad_norm": 691.5042724609375, "learning_rate": 9.920000000000002e-06, "loss": 36.9765, "step": 49600 }, { "epoch": 0.10021533874440948, "grad_norm": 529.460205078125, "learning_rate": 9.922000000000001e-06, "loss": 30.945, "step": 49610 }, { "epoch": 0.10023553937709329, "grad_norm": 744.3157348632812, "learning_rate": 9.924e-06, "loss": 28.8601, "step": 49620 }, { "epoch": 0.1002557400097771, "grad_norm": 417.7217712402344, "learning_rate": 9.926000000000001e-06, "loss": 32.3139, "step": 49630 }, { "epoch": 0.10027594064246093, "grad_norm": 457.09521484375, "learning_rate": 9.928e-06, "loss": 34.9598, "step": 49640 }, { "epoch": 0.10029614127514473, "grad_norm": 300.70135498046875, "learning_rate": 9.930000000000001e-06, "loss": 36.5921, "step": 49650 }, { "epoch": 0.10031634190782855, "grad_norm": 692.780517578125, "learning_rate": 9.932e-06, "loss": 51.0492, "step": 49660 }, { "epoch": 0.10033654254051237, "grad_norm": 630.0022583007812, "learning_rate": 9.934e-06, "loss": 25.7311, "step": 49670 }, { "epoch": 0.10035674317319618, "grad_norm": 484.6986999511719, "learning_rate": 9.936000000000001e-06, "loss": 38.4831, "step": 49680 }, { "epoch": 0.10037694380588, "grad_norm": 374.391357421875, "learning_rate": 9.938e-06, "loss": 26.8847, "step": 49690 }, { "epoch": 0.10039714443856382, "grad_norm": 257.7834167480469, "learning_rate": 9.940000000000001e-06, "loss": 17.869, "step": 49700 }, { "epoch": 0.10041734507124764, "grad_norm": 170.61480712890625, "learning_rate": 9.942e-06, "loss": 56.4838, "step": 49710 }, { "epoch": 0.10043754570393144, "grad_norm": 676.6981201171875, "learning_rate": 9.944e-06, "loss": 24.0504, "step": 49720 }, { "epoch": 0.10045774633661526, "grad_norm": 375.68365478515625, "learning_rate": 9.946000000000001e-06, "loss": 29.4494, "step": 49730 }, { "epoch": 0.10047794696929908, "grad_norm": 500.6487121582031, "learning_rate": 9.948e-06, "loss": 36.1562, "step": 49740 }, { "epoch": 0.10049814760198289, "grad_norm": 536.2442626953125, "learning_rate": 9.950000000000001e-06, "loss": 30.0837, "step": 49750 }, { "epoch": 0.10051834823466671, "grad_norm": 451.24169921875, "learning_rate": 9.952e-06, "loss": 26.6665, "step": 49760 }, { "epoch": 0.10053854886735053, "grad_norm": 143.89686584472656, "learning_rate": 9.954e-06, "loss": 22.8259, "step": 49770 }, { "epoch": 0.10055874950003434, "grad_norm": 847.1919555664062, "learning_rate": 9.956000000000001e-06, "loss": 23.5676, "step": 49780 }, { "epoch": 0.10057895013271816, "grad_norm": 378.9535827636719, "learning_rate": 9.958e-06, "loss": 33.596, "step": 49790 }, { "epoch": 0.10059915076540198, "grad_norm": 350.87176513671875, "learning_rate": 9.960000000000001e-06, "loss": 21.7925, "step": 49800 }, { "epoch": 0.10061935139808578, "grad_norm": 517.0904541015625, "learning_rate": 9.962e-06, "loss": 17.3267, "step": 49810 }, { "epoch": 0.1006395520307696, "grad_norm": 170.994140625, "learning_rate": 9.964e-06, "loss": 23.7536, "step": 49820 }, { "epoch": 0.10065975266345342, "grad_norm": 305.552001953125, "learning_rate": 9.966e-06, "loss": 18.693, "step": 49830 }, { "epoch": 0.10067995329613723, "grad_norm": 470.3461608886719, "learning_rate": 9.968000000000002e-06, "loss": 39.8488, "step": 49840 }, { "epoch": 0.10070015392882105, "grad_norm": 524.2937622070312, "learning_rate": 9.970000000000001e-06, "loss": 18.4456, "step": 49850 }, { "epoch": 0.10072035456150487, "grad_norm": 369.7236633300781, "learning_rate": 9.972e-06, "loss": 26.828, "step": 49860 }, { "epoch": 0.10074055519418869, "grad_norm": 463.5006408691406, "learning_rate": 9.974e-06, "loss": 26.2836, "step": 49870 }, { "epoch": 0.1007607558268725, "grad_norm": 629.5526733398438, "learning_rate": 9.976e-06, "loss": 24.2549, "step": 49880 }, { "epoch": 0.10078095645955631, "grad_norm": 384.2488098144531, "learning_rate": 9.978000000000002e-06, "loss": 19.898, "step": 49890 }, { "epoch": 0.10080115709224013, "grad_norm": 799.72119140625, "learning_rate": 9.980000000000001e-06, "loss": 25.2737, "step": 49900 }, { "epoch": 0.10082135772492394, "grad_norm": 441.5334777832031, "learning_rate": 9.982e-06, "loss": 31.0727, "step": 49910 }, { "epoch": 0.10084155835760776, "grad_norm": 276.5469665527344, "learning_rate": 9.984e-06, "loss": 18.8078, "step": 49920 }, { "epoch": 0.10086175899029158, "grad_norm": 187.44979858398438, "learning_rate": 9.986e-06, "loss": 15.4224, "step": 49930 }, { "epoch": 0.10088195962297539, "grad_norm": 361.1304931640625, "learning_rate": 9.988000000000002e-06, "loss": 16.9527, "step": 49940 }, { "epoch": 0.1009021602556592, "grad_norm": 191.9720916748047, "learning_rate": 9.990000000000001e-06, "loss": 34.9568, "step": 49950 }, { "epoch": 0.10092236088834303, "grad_norm": 8.192609786987305, "learning_rate": 9.992e-06, "loss": 39.4295, "step": 49960 }, { "epoch": 0.10094256152102683, "grad_norm": 300.62615966796875, "learning_rate": 9.994000000000001e-06, "loss": 12.9227, "step": 49970 }, { "epoch": 0.10096276215371065, "grad_norm": 494.94549560546875, "learning_rate": 9.996e-06, "loss": 42.9506, "step": 49980 }, { "epoch": 0.10098296278639447, "grad_norm": 412.6275634765625, "learning_rate": 9.998000000000002e-06, "loss": 28.3969, "step": 49990 }, { "epoch": 0.10100316341907828, "grad_norm": 99.5644302368164, "learning_rate": 1e-05, "loss": 27.8822, "step": 50000 }, { "epoch": 0.1010233640517621, "grad_norm": 362.5364990234375, "learning_rate": 9.999999987815305e-06, "loss": 29.5299, "step": 50010 }, { "epoch": 0.10104356468444592, "grad_norm": 5027.0400390625, "learning_rate": 9.999999951261215e-06, "loss": 62.4795, "step": 50020 }, { "epoch": 0.10106376531712974, "grad_norm": 354.19964599609375, "learning_rate": 9.99999989033773e-06, "loss": 36.6571, "step": 50030 }, { "epoch": 0.10108396594981355, "grad_norm": 725.4144897460938, "learning_rate": 9.999999805044853e-06, "loss": 49.3792, "step": 50040 }, { "epoch": 0.10110416658249737, "grad_norm": 221.52777099609375, "learning_rate": 9.999999695382584e-06, "loss": 31.6822, "step": 50050 }, { "epoch": 0.10112436721518119, "grad_norm": 853.5789794921875, "learning_rate": 9.999999561350923e-06, "loss": 17.717, "step": 50060 }, { "epoch": 0.10114456784786499, "grad_norm": 897.6527709960938, "learning_rate": 9.99999940294987e-06, "loss": 21.7384, "step": 50070 }, { "epoch": 0.10116476848054881, "grad_norm": 411.7169189453125, "learning_rate": 9.999999220179426e-06, "loss": 22.1829, "step": 50080 }, { "epoch": 0.10118496911323263, "grad_norm": 250.9145050048828, "learning_rate": 9.999999013039593e-06, "loss": 18.3321, "step": 50090 }, { "epoch": 0.10120516974591644, "grad_norm": 736.3198852539062, "learning_rate": 9.999998781530372e-06, "loss": 37.9785, "step": 50100 }, { "epoch": 0.10122537037860026, "grad_norm": 568.701904296875, "learning_rate": 9.999998525651761e-06, "loss": 25.465, "step": 50110 }, { "epoch": 0.10124557101128408, "grad_norm": 870.9944458007812, "learning_rate": 9.999998245403766e-06, "loss": 27.752, "step": 50120 }, { "epoch": 0.10126577164396788, "grad_norm": 518.2539672851562, "learning_rate": 9.999997940786385e-06, "loss": 24.4236, "step": 50130 }, { "epoch": 0.1012859722766517, "grad_norm": 261.4921569824219, "learning_rate": 9.99999761179962e-06, "loss": 28.2899, "step": 50140 }, { "epoch": 0.10130617290933552, "grad_norm": 390.89501953125, "learning_rate": 9.999997258443473e-06, "loss": 18.5972, "step": 50150 }, { "epoch": 0.10132637354201933, "grad_norm": 375.1386413574219, "learning_rate": 9.999996880717946e-06, "loss": 28.7425, "step": 50160 }, { "epoch": 0.10134657417470315, "grad_norm": 156.5116424560547, "learning_rate": 9.999996478623041e-06, "loss": 41.3474, "step": 50170 }, { "epoch": 0.10136677480738697, "grad_norm": 1503.359375, "learning_rate": 9.99999605215876e-06, "loss": 22.6597, "step": 50180 }, { "epoch": 0.10138697544007079, "grad_norm": 205.34335327148438, "learning_rate": 9.999995601325104e-06, "loss": 26.2105, "step": 50190 }, { "epoch": 0.1014071760727546, "grad_norm": 1484.55126953125, "learning_rate": 9.999995126122076e-06, "loss": 27.1298, "step": 50200 }, { "epoch": 0.10142737670543842, "grad_norm": 1214.3726806640625, "learning_rate": 9.999994626549678e-06, "loss": 49.6199, "step": 50210 }, { "epoch": 0.10144757733812224, "grad_norm": 79.13058471679688, "learning_rate": 9.999994102607912e-06, "loss": 19.2485, "step": 50220 }, { "epoch": 0.10146777797080604, "grad_norm": 451.7241516113281, "learning_rate": 9.999993554296783e-06, "loss": 26.9545, "step": 50230 }, { "epoch": 0.10148797860348986, "grad_norm": 281.6031494140625, "learning_rate": 9.999992981616292e-06, "loss": 29.894, "step": 50240 }, { "epoch": 0.10150817923617368, "grad_norm": 297.3249816894531, "learning_rate": 9.99999238456644e-06, "loss": 21.369, "step": 50250 }, { "epoch": 0.10152837986885749, "grad_norm": 95.88583374023438, "learning_rate": 9.999991763147232e-06, "loss": 24.8606, "step": 50260 }, { "epoch": 0.10154858050154131, "grad_norm": 1134.8065185546875, "learning_rate": 9.99999111735867e-06, "loss": 55.5175, "step": 50270 }, { "epoch": 0.10156878113422513, "grad_norm": 288.083740234375, "learning_rate": 9.999990447200758e-06, "loss": 32.7888, "step": 50280 }, { "epoch": 0.10158898176690893, "grad_norm": 329.77276611328125, "learning_rate": 9.9999897526735e-06, "loss": 28.8469, "step": 50290 }, { "epoch": 0.10160918239959275, "grad_norm": 308.0667419433594, "learning_rate": 9.999989033776898e-06, "loss": 40.5289, "step": 50300 }, { "epoch": 0.10162938303227657, "grad_norm": 1404.3338623046875, "learning_rate": 9.999988290510955e-06, "loss": 38.1056, "step": 50310 }, { "epoch": 0.10164958366496038, "grad_norm": 679.6793212890625, "learning_rate": 9.999987522875676e-06, "loss": 24.7506, "step": 50320 }, { "epoch": 0.1016697842976442, "grad_norm": 589.7515869140625, "learning_rate": 9.999986730871065e-06, "loss": 31.0373, "step": 50330 }, { "epoch": 0.10168998493032802, "grad_norm": 1030.587646484375, "learning_rate": 9.999985914497124e-06, "loss": 41.3364, "step": 50340 }, { "epoch": 0.10171018556301184, "grad_norm": 253.97445678710938, "learning_rate": 9.999985073753857e-06, "loss": 21.784, "step": 50350 }, { "epoch": 0.10173038619569565, "grad_norm": 803.2162475585938, "learning_rate": 9.999984208641271e-06, "loss": 31.6272, "step": 50360 }, { "epoch": 0.10175058682837947, "grad_norm": 313.1042175292969, "learning_rate": 9.999983319159368e-06, "loss": 24.4944, "step": 50370 }, { "epoch": 0.10177078746106329, "grad_norm": 444.9187927246094, "learning_rate": 9.999982405308154e-06, "loss": 26.4927, "step": 50380 }, { "epoch": 0.10179098809374709, "grad_norm": 1321.82421875, "learning_rate": 9.999981467087629e-06, "loss": 35.5587, "step": 50390 }, { "epoch": 0.10181118872643091, "grad_norm": 391.11090087890625, "learning_rate": 9.999980504497803e-06, "loss": 18.7048, "step": 50400 }, { "epoch": 0.10183138935911473, "grad_norm": 337.62158203125, "learning_rate": 9.999979517538677e-06, "loss": 22.3681, "step": 50410 }, { "epoch": 0.10185158999179854, "grad_norm": 601.7754516601562, "learning_rate": 9.99997850621026e-06, "loss": 32.052, "step": 50420 }, { "epoch": 0.10187179062448236, "grad_norm": 412.9033203125, "learning_rate": 9.999977470512551e-06, "loss": 17.4484, "step": 50430 }, { "epoch": 0.10189199125716618, "grad_norm": 209.8957977294922, "learning_rate": 9.999976410445563e-06, "loss": 11.9061, "step": 50440 }, { "epoch": 0.10191219188984998, "grad_norm": 570.1461181640625, "learning_rate": 9.999975326009292e-06, "loss": 26.4283, "step": 50450 }, { "epoch": 0.1019323925225338, "grad_norm": 583.421630859375, "learning_rate": 9.999974217203749e-06, "loss": 34.1035, "step": 50460 }, { "epoch": 0.10195259315521762, "grad_norm": 680.7811889648438, "learning_rate": 9.999973084028938e-06, "loss": 44.3453, "step": 50470 }, { "epoch": 0.10197279378790143, "grad_norm": 4006.479736328125, "learning_rate": 9.999971926484865e-06, "loss": 56.8407, "step": 50480 }, { "epoch": 0.10199299442058525, "grad_norm": 249.22564697265625, "learning_rate": 9.999970744571534e-06, "loss": 25.6443, "step": 50490 }, { "epoch": 0.10201319505326907, "grad_norm": 551.4610595703125, "learning_rate": 9.999969538288953e-06, "loss": 16.5304, "step": 50500 }, { "epoch": 0.10203339568595289, "grad_norm": 375.3378601074219, "learning_rate": 9.999968307637127e-06, "loss": 27.6524, "step": 50510 }, { "epoch": 0.1020535963186367, "grad_norm": 436.2530822753906, "learning_rate": 9.999967052616061e-06, "loss": 26.4405, "step": 50520 }, { "epoch": 0.10207379695132052, "grad_norm": 317.2707214355469, "learning_rate": 9.999965773225762e-06, "loss": 48.2838, "step": 50530 }, { "epoch": 0.10209399758400434, "grad_norm": 534.0542602539062, "learning_rate": 9.999964469466236e-06, "loss": 19.6013, "step": 50540 }, { "epoch": 0.10211419821668814, "grad_norm": 306.4703674316406, "learning_rate": 9.999963141337493e-06, "loss": 26.0738, "step": 50550 }, { "epoch": 0.10213439884937196, "grad_norm": 255.9512176513672, "learning_rate": 9.999961788839533e-06, "loss": 25.5566, "step": 50560 }, { "epoch": 0.10215459948205578, "grad_norm": 515.935791015625, "learning_rate": 9.999960411972366e-06, "loss": 26.5733, "step": 50570 }, { "epoch": 0.10217480011473959, "grad_norm": 255.52371215820312, "learning_rate": 9.999959010735997e-06, "loss": 17.7823, "step": 50580 }, { "epoch": 0.10219500074742341, "grad_norm": 451.1026916503906, "learning_rate": 9.999957585130438e-06, "loss": 26.8705, "step": 50590 }, { "epoch": 0.10221520138010723, "grad_norm": 106.65933990478516, "learning_rate": 9.999956135155688e-06, "loss": 16.0425, "step": 50600 }, { "epoch": 0.10223540201279104, "grad_norm": 511.0063781738281, "learning_rate": 9.999954660811761e-06, "loss": 26.8811, "step": 50610 }, { "epoch": 0.10225560264547486, "grad_norm": 331.65325927734375, "learning_rate": 9.99995316209866e-06, "loss": 28.7006, "step": 50620 }, { "epoch": 0.10227580327815868, "grad_norm": 638.6454467773438, "learning_rate": 9.999951639016396e-06, "loss": 43.5507, "step": 50630 }, { "epoch": 0.10229600391084248, "grad_norm": 381.807861328125, "learning_rate": 9.999950091564972e-06, "loss": 29.0359, "step": 50640 }, { "epoch": 0.1023162045435263, "grad_norm": 1509.6329345703125, "learning_rate": 9.999948519744397e-06, "loss": 29.2152, "step": 50650 }, { "epoch": 0.10233640517621012, "grad_norm": 591.4840698242188, "learning_rate": 9.999946923554681e-06, "loss": 33.325, "step": 50660 }, { "epoch": 0.10235660580889394, "grad_norm": 924.4262084960938, "learning_rate": 9.99994530299583e-06, "loss": 45.0304, "step": 50670 }, { "epoch": 0.10237680644157775, "grad_norm": 101.5724868774414, "learning_rate": 9.99994365806785e-06, "loss": 28.9632, "step": 50680 }, { "epoch": 0.10239700707426157, "grad_norm": 1574.7281494140625, "learning_rate": 9.999941988770754e-06, "loss": 32.7535, "step": 50690 }, { "epoch": 0.10241720770694539, "grad_norm": 59.04460906982422, "learning_rate": 9.999940295104546e-06, "loss": 24.2862, "step": 50700 }, { "epoch": 0.1024374083396292, "grad_norm": 1890.5078125, "learning_rate": 9.999938577069235e-06, "loss": 36.4641, "step": 50710 }, { "epoch": 0.10245760897231301, "grad_norm": 362.4627990722656, "learning_rate": 9.99993683466483e-06, "loss": 17.6841, "step": 50720 }, { "epoch": 0.10247780960499683, "grad_norm": 637.8155517578125, "learning_rate": 9.999935067891339e-06, "loss": 36.074, "step": 50730 }, { "epoch": 0.10249801023768064, "grad_norm": 397.3134460449219, "learning_rate": 9.999933276748772e-06, "loss": 31.947, "step": 50740 }, { "epoch": 0.10251821087036446, "grad_norm": 405.8807067871094, "learning_rate": 9.999931461237135e-06, "loss": 42.5062, "step": 50750 }, { "epoch": 0.10253841150304828, "grad_norm": 437.52545166015625, "learning_rate": 9.99992962135644e-06, "loss": 24.7354, "step": 50760 }, { "epoch": 0.10255861213573209, "grad_norm": 597.0206298828125, "learning_rate": 9.999927757106693e-06, "loss": 22.6693, "step": 50770 }, { "epoch": 0.1025788127684159, "grad_norm": 541.7193603515625, "learning_rate": 9.999925868487905e-06, "loss": 16.0042, "step": 50780 }, { "epoch": 0.10259901340109973, "grad_norm": 262.95062255859375, "learning_rate": 9.999923955500085e-06, "loss": 42.3814, "step": 50790 }, { "epoch": 0.10261921403378353, "grad_norm": 9.531939506530762, "learning_rate": 9.999922018143242e-06, "loss": 35.0652, "step": 50800 }, { "epoch": 0.10263941466646735, "grad_norm": 460.79962158203125, "learning_rate": 9.999920056417385e-06, "loss": 22.1603, "step": 50810 }, { "epoch": 0.10265961529915117, "grad_norm": 277.33734130859375, "learning_rate": 9.999918070322525e-06, "loss": 27.1832, "step": 50820 }, { "epoch": 0.10267981593183499, "grad_norm": 694.1124877929688, "learning_rate": 9.999916059858669e-06, "loss": 18.5083, "step": 50830 }, { "epoch": 0.1027000165645188, "grad_norm": 479.1972961425781, "learning_rate": 9.999914025025831e-06, "loss": 40.0288, "step": 50840 }, { "epoch": 0.10272021719720262, "grad_norm": 374.9565734863281, "learning_rate": 9.999911965824018e-06, "loss": 20.3209, "step": 50850 }, { "epoch": 0.10274041782988644, "grad_norm": 387.45050048828125, "learning_rate": 9.99990988225324e-06, "loss": 13.5861, "step": 50860 }, { "epoch": 0.10276061846257024, "grad_norm": 0.3691152036190033, "learning_rate": 9.999907774313507e-06, "loss": 35.716, "step": 50870 }, { "epoch": 0.10278081909525406, "grad_norm": 792.8024291992188, "learning_rate": 9.99990564200483e-06, "loss": 29.3103, "step": 50880 }, { "epoch": 0.10280101972793788, "grad_norm": 0.0, "learning_rate": 9.999903485327221e-06, "loss": 24.5385, "step": 50890 }, { "epoch": 0.10282122036062169, "grad_norm": 558.761474609375, "learning_rate": 9.999901304280686e-06, "loss": 29.9813, "step": 50900 }, { "epoch": 0.10284142099330551, "grad_norm": 453.54083251953125, "learning_rate": 9.99989909886524e-06, "loss": 25.5813, "step": 50910 }, { "epoch": 0.10286162162598933, "grad_norm": 423.0571594238281, "learning_rate": 9.999896869080893e-06, "loss": 35.28, "step": 50920 }, { "epoch": 0.10288182225867314, "grad_norm": 456.1601257324219, "learning_rate": 9.999894614927655e-06, "loss": 38.2854, "step": 50930 }, { "epoch": 0.10290202289135696, "grad_norm": 271.1506042480469, "learning_rate": 9.999892336405534e-06, "loss": 24.9651, "step": 50940 }, { "epoch": 0.10292222352404078, "grad_norm": 677.3641357421875, "learning_rate": 9.999890033514547e-06, "loss": 15.468, "step": 50950 }, { "epoch": 0.10294242415672458, "grad_norm": 1117.5863037109375, "learning_rate": 9.999887706254703e-06, "loss": 23.983, "step": 50960 }, { "epoch": 0.1029626247894084, "grad_norm": 292.31304931640625, "learning_rate": 9.999885354626011e-06, "loss": 19.6384, "step": 50970 }, { "epoch": 0.10298282542209222, "grad_norm": 214.32498168945312, "learning_rate": 9.999882978628485e-06, "loss": 16.6767, "step": 50980 }, { "epoch": 0.10300302605477604, "grad_norm": 1378.0211181640625, "learning_rate": 9.999880578262135e-06, "loss": 36.312, "step": 50990 }, { "epoch": 0.10302322668745985, "grad_norm": 745.3373413085938, "learning_rate": 9.999878153526974e-06, "loss": 43.9458, "step": 51000 }, { "epoch": 0.10304342732014367, "grad_norm": 415.322265625, "learning_rate": 9.999875704423015e-06, "loss": 20.623, "step": 51010 }, { "epoch": 0.10306362795282749, "grad_norm": 57.3155517578125, "learning_rate": 9.999873230950265e-06, "loss": 25.575, "step": 51020 }, { "epoch": 0.1030838285855113, "grad_norm": 103.42644500732422, "learning_rate": 9.99987073310874e-06, "loss": 41.701, "step": 51030 }, { "epoch": 0.10310402921819511, "grad_norm": 331.5990295410156, "learning_rate": 9.999868210898454e-06, "loss": 20.5087, "step": 51040 }, { "epoch": 0.10312422985087893, "grad_norm": 333.0093994140625, "learning_rate": 9.999865664319414e-06, "loss": 49.9883, "step": 51050 }, { "epoch": 0.10314443048356274, "grad_norm": 185.40383911132812, "learning_rate": 9.999863093371638e-06, "loss": 28.2973, "step": 51060 }, { "epoch": 0.10316463111624656, "grad_norm": 212.54566955566406, "learning_rate": 9.999860498055134e-06, "loss": 25.8497, "step": 51070 }, { "epoch": 0.10318483174893038, "grad_norm": 758.4007568359375, "learning_rate": 9.999857878369917e-06, "loss": 49.4042, "step": 51080 }, { "epoch": 0.10320503238161419, "grad_norm": 395.51055908203125, "learning_rate": 9.999855234315997e-06, "loss": 24.5271, "step": 51090 }, { "epoch": 0.103225233014298, "grad_norm": 550.81298828125, "learning_rate": 9.99985256589339e-06, "loss": 43.8895, "step": 51100 }, { "epoch": 0.10324543364698183, "grad_norm": 79.79631805419922, "learning_rate": 9.999849873102108e-06, "loss": 16.611, "step": 51110 }, { "epoch": 0.10326563427966563, "grad_norm": 463.4939270019531, "learning_rate": 9.999847155942165e-06, "loss": 17.6845, "step": 51120 }, { "epoch": 0.10328583491234945, "grad_norm": 754.7512817382812, "learning_rate": 9.999844414413574e-06, "loss": 25.539, "step": 51130 }, { "epoch": 0.10330603554503327, "grad_norm": 0.0, "learning_rate": 9.999841648516347e-06, "loss": 14.2443, "step": 51140 }, { "epoch": 0.10332623617771709, "grad_norm": 267.25244140625, "learning_rate": 9.999838858250497e-06, "loss": 21.5823, "step": 51150 }, { "epoch": 0.1033464368104009, "grad_norm": 285.90228271484375, "learning_rate": 9.99983604361604e-06, "loss": 22.5378, "step": 51160 }, { "epoch": 0.10336663744308472, "grad_norm": 921.9609985351562, "learning_rate": 9.999833204612988e-06, "loss": 43.638, "step": 51170 }, { "epoch": 0.10338683807576854, "grad_norm": 650.6875610351562, "learning_rate": 9.999830341241354e-06, "loss": 23.4214, "step": 51180 }, { "epoch": 0.10340703870845235, "grad_norm": 594.7377319335938, "learning_rate": 9.999827453501156e-06, "loss": 29.7626, "step": 51190 }, { "epoch": 0.10342723934113617, "grad_norm": 707.9284057617188, "learning_rate": 9.999824541392404e-06, "loss": 21.3611, "step": 51200 }, { "epoch": 0.10344743997381999, "grad_norm": 590.6197509765625, "learning_rate": 9.999821604915114e-06, "loss": 26.3745, "step": 51210 }, { "epoch": 0.10346764060650379, "grad_norm": 358.73663330078125, "learning_rate": 9.999818644069299e-06, "loss": 17.7493, "step": 51220 }, { "epoch": 0.10348784123918761, "grad_norm": 354.9080505371094, "learning_rate": 9.999815658854976e-06, "loss": 19.0474, "step": 51230 }, { "epoch": 0.10350804187187143, "grad_norm": 677.2418823242188, "learning_rate": 9.999812649272157e-06, "loss": 24.1071, "step": 51240 }, { "epoch": 0.10352824250455524, "grad_norm": 390.850341796875, "learning_rate": 9.999809615320857e-06, "loss": 27.0657, "step": 51250 }, { "epoch": 0.10354844313723906, "grad_norm": 764.44580078125, "learning_rate": 9.999806557001092e-06, "loss": 32.6188, "step": 51260 }, { "epoch": 0.10356864376992288, "grad_norm": 365.0505676269531, "learning_rate": 9.999803474312877e-06, "loss": 41.0896, "step": 51270 }, { "epoch": 0.10358884440260668, "grad_norm": 994.4592895507812, "learning_rate": 9.999800367256225e-06, "loss": 40.0048, "step": 51280 }, { "epoch": 0.1036090450352905, "grad_norm": 393.38275146484375, "learning_rate": 9.999797235831153e-06, "loss": 34.3346, "step": 51290 }, { "epoch": 0.10362924566797432, "grad_norm": 161.1769561767578, "learning_rate": 9.999794080037675e-06, "loss": 25.0284, "step": 51300 }, { "epoch": 0.10364944630065814, "grad_norm": 1198.1754150390625, "learning_rate": 9.999790899875807e-06, "loss": 28.4299, "step": 51310 }, { "epoch": 0.10366964693334195, "grad_norm": 631.53759765625, "learning_rate": 9.999787695345565e-06, "loss": 27.3778, "step": 51320 }, { "epoch": 0.10368984756602577, "grad_norm": 743.1617431640625, "learning_rate": 9.999784466446965e-06, "loss": 31.1852, "step": 51330 }, { "epoch": 0.10371004819870959, "grad_norm": 666.7760009765625, "learning_rate": 9.99978121318002e-06, "loss": 17.261, "step": 51340 }, { "epoch": 0.1037302488313934, "grad_norm": 412.00347900390625, "learning_rate": 9.99977793554475e-06, "loss": 10.3803, "step": 51350 }, { "epoch": 0.10375044946407722, "grad_norm": 355.3598327636719, "learning_rate": 9.999774633541169e-06, "loss": 20.7727, "step": 51360 }, { "epoch": 0.10377065009676104, "grad_norm": 631.0842895507812, "learning_rate": 9.999771307169291e-06, "loss": 28.0426, "step": 51370 }, { "epoch": 0.10379085072944484, "grad_norm": 382.2905578613281, "learning_rate": 9.999767956429135e-06, "loss": 25.7704, "step": 51380 }, { "epoch": 0.10381105136212866, "grad_norm": 224.6883544921875, "learning_rate": 9.999764581320714e-06, "loss": 32.9772, "step": 51390 }, { "epoch": 0.10383125199481248, "grad_norm": 2052.6513671875, "learning_rate": 9.99976118184405e-06, "loss": 28.7195, "step": 51400 }, { "epoch": 0.10385145262749629, "grad_norm": 920.0391845703125, "learning_rate": 9.999757757999155e-06, "loss": 22.5864, "step": 51410 }, { "epoch": 0.10387165326018011, "grad_norm": 1105.635009765625, "learning_rate": 9.999754309786047e-06, "loss": 39.2646, "step": 51420 }, { "epoch": 0.10389185389286393, "grad_norm": 618.7783813476562, "learning_rate": 9.999750837204743e-06, "loss": 23.2488, "step": 51430 }, { "epoch": 0.10391205452554773, "grad_norm": 17.139188766479492, "learning_rate": 9.99974734025526e-06, "loss": 14.946, "step": 51440 }, { "epoch": 0.10393225515823155, "grad_norm": 938.8400268554688, "learning_rate": 9.999743818937614e-06, "loss": 32.3364, "step": 51450 }, { "epoch": 0.10395245579091537, "grad_norm": 339.00128173828125, "learning_rate": 9.999740273251824e-06, "loss": 52.0854, "step": 51460 }, { "epoch": 0.1039726564235992, "grad_norm": 901.957275390625, "learning_rate": 9.999736703197907e-06, "loss": 23.8989, "step": 51470 }, { "epoch": 0.103992857056283, "grad_norm": 620.7359008789062, "learning_rate": 9.999733108775878e-06, "loss": 29.3632, "step": 51480 }, { "epoch": 0.10401305768896682, "grad_norm": 580.5281372070312, "learning_rate": 9.999729489985757e-06, "loss": 19.642, "step": 51490 }, { "epoch": 0.10403325832165064, "grad_norm": 936.9527587890625, "learning_rate": 9.999725846827562e-06, "loss": 32.875, "step": 51500 }, { "epoch": 0.10405345895433445, "grad_norm": 901.7499389648438, "learning_rate": 9.999722179301309e-06, "loss": 29.5425, "step": 51510 }, { "epoch": 0.10407365958701827, "grad_norm": 1585.744384765625, "learning_rate": 9.999718487407015e-06, "loss": 38.8731, "step": 51520 }, { "epoch": 0.10409386021970209, "grad_norm": 512.9431762695312, "learning_rate": 9.9997147711447e-06, "loss": 19.3307, "step": 51530 }, { "epoch": 0.10411406085238589, "grad_norm": 902.8880615234375, "learning_rate": 9.999711030514383e-06, "loss": 33.5718, "step": 51540 }, { "epoch": 0.10413426148506971, "grad_norm": 299.1571350097656, "learning_rate": 9.99970726551608e-06, "loss": 16.439, "step": 51550 }, { "epoch": 0.10415446211775353, "grad_norm": 574.4469604492188, "learning_rate": 9.999703476149808e-06, "loss": 23.9682, "step": 51560 }, { "epoch": 0.10417466275043734, "grad_norm": 555.4169921875, "learning_rate": 9.999699662415592e-06, "loss": 27.2174, "step": 51570 }, { "epoch": 0.10419486338312116, "grad_norm": 558.3908081054688, "learning_rate": 9.999695824313443e-06, "loss": 39.1974, "step": 51580 }, { "epoch": 0.10421506401580498, "grad_norm": 407.7419738769531, "learning_rate": 9.999691961843385e-06, "loss": 28.7544, "step": 51590 }, { "epoch": 0.10423526464848878, "grad_norm": 324.6638488769531, "learning_rate": 9.999688075005434e-06, "loss": 39.8842, "step": 51600 }, { "epoch": 0.1042554652811726, "grad_norm": 339.9502258300781, "learning_rate": 9.999684163799609e-06, "loss": 21.535, "step": 51610 }, { "epoch": 0.10427566591385642, "grad_norm": 338.2267150878906, "learning_rate": 9.99968022822593e-06, "loss": 17.7734, "step": 51620 }, { "epoch": 0.10429586654654023, "grad_norm": 398.8443603515625, "learning_rate": 9.999676268284416e-06, "loss": 17.9831, "step": 51630 }, { "epoch": 0.10431606717922405, "grad_norm": 372.1466979980469, "learning_rate": 9.999672283975085e-06, "loss": 47.2209, "step": 51640 }, { "epoch": 0.10433626781190787, "grad_norm": 289.52178955078125, "learning_rate": 9.99966827529796e-06, "loss": 46.3143, "step": 51650 }, { "epoch": 0.10435646844459169, "grad_norm": 84.61372375488281, "learning_rate": 9.999664242253058e-06, "loss": 20.6101, "step": 51660 }, { "epoch": 0.1043766690772755, "grad_norm": 750.5768432617188, "learning_rate": 9.999660184840398e-06, "loss": 47.7543, "step": 51670 }, { "epoch": 0.10439686970995932, "grad_norm": 398.7869567871094, "learning_rate": 9.999656103060001e-06, "loss": 22.8918, "step": 51680 }, { "epoch": 0.10441707034264314, "grad_norm": 359.3421325683594, "learning_rate": 9.999651996911886e-06, "loss": 31.5459, "step": 51690 }, { "epoch": 0.10443727097532694, "grad_norm": 285.4722595214844, "learning_rate": 9.999647866396073e-06, "loss": 16.909, "step": 51700 }, { "epoch": 0.10445747160801076, "grad_norm": 381.8937072753906, "learning_rate": 9.999643711512586e-06, "loss": 27.6456, "step": 51710 }, { "epoch": 0.10447767224069458, "grad_norm": 206.19906616210938, "learning_rate": 9.999639532261438e-06, "loss": 23.2891, "step": 51720 }, { "epoch": 0.10449787287337839, "grad_norm": 1186.94482421875, "learning_rate": 9.999635328642655e-06, "loss": 30.3152, "step": 51730 }, { "epoch": 0.10451807350606221, "grad_norm": 0.0, "learning_rate": 9.999631100656255e-06, "loss": 24.8379, "step": 51740 }, { "epoch": 0.10453827413874603, "grad_norm": 702.4994506835938, "learning_rate": 9.999626848302261e-06, "loss": 25.1743, "step": 51750 }, { "epoch": 0.10455847477142984, "grad_norm": 430.71099853515625, "learning_rate": 9.99962257158069e-06, "loss": 41.395, "step": 51760 }, { "epoch": 0.10457867540411366, "grad_norm": 786.567626953125, "learning_rate": 9.999618270491567e-06, "loss": 21.2917, "step": 51770 }, { "epoch": 0.10459887603679748, "grad_norm": 536.7110595703125, "learning_rate": 9.999613945034909e-06, "loss": 40.1751, "step": 51780 }, { "epoch": 0.10461907666948128, "grad_norm": 436.41864013671875, "learning_rate": 9.999609595210743e-06, "loss": 11.29, "step": 51790 }, { "epoch": 0.1046392773021651, "grad_norm": 682.1253051757812, "learning_rate": 9.999605221019082e-06, "loss": 48.1011, "step": 51800 }, { "epoch": 0.10465947793484892, "grad_norm": 248.62008666992188, "learning_rate": 9.999600822459952e-06, "loss": 15.4513, "step": 51810 }, { "epoch": 0.10467967856753274, "grad_norm": 7.036900997161865, "learning_rate": 9.999596399533375e-06, "loss": 34.9343, "step": 51820 }, { "epoch": 0.10469987920021655, "grad_norm": 166.2686767578125, "learning_rate": 9.999591952239371e-06, "loss": 14.706, "step": 51830 }, { "epoch": 0.10472007983290037, "grad_norm": 692.9573974609375, "learning_rate": 9.999587480577964e-06, "loss": 23.2436, "step": 51840 }, { "epoch": 0.10474028046558419, "grad_norm": 597.7660522460938, "learning_rate": 9.999582984549172e-06, "loss": 25.2128, "step": 51850 }, { "epoch": 0.104760481098268, "grad_norm": 51.638824462890625, "learning_rate": 9.99957846415302e-06, "loss": 15.9829, "step": 51860 }, { "epoch": 0.10478068173095181, "grad_norm": 1591.6973876953125, "learning_rate": 9.999573919389527e-06, "loss": 34.3104, "step": 51870 }, { "epoch": 0.10480088236363563, "grad_norm": 325.8369140625, "learning_rate": 9.999569350258717e-06, "loss": 35.8628, "step": 51880 }, { "epoch": 0.10482108299631944, "grad_norm": 702.3936157226562, "learning_rate": 9.999564756760616e-06, "loss": 37.2984, "step": 51890 }, { "epoch": 0.10484128362900326, "grad_norm": 442.11151123046875, "learning_rate": 9.999560138895238e-06, "loss": 31.7938, "step": 51900 }, { "epoch": 0.10486148426168708, "grad_norm": 658.4376831054688, "learning_rate": 9.999555496662614e-06, "loss": 24.6095, "step": 51910 }, { "epoch": 0.10488168489437089, "grad_norm": 486.6987609863281, "learning_rate": 9.999550830062762e-06, "loss": 30.5503, "step": 51920 }, { "epoch": 0.1049018855270547, "grad_norm": 486.70977783203125, "learning_rate": 9.999546139095706e-06, "loss": 30.3326, "step": 51930 }, { "epoch": 0.10492208615973853, "grad_norm": 426.9347839355469, "learning_rate": 9.999541423761468e-06, "loss": 26.62, "step": 51940 }, { "epoch": 0.10494228679242233, "grad_norm": 310.9057312011719, "learning_rate": 9.999536684060071e-06, "loss": 31.6233, "step": 51950 }, { "epoch": 0.10496248742510615, "grad_norm": 875.162109375, "learning_rate": 9.999531919991538e-06, "loss": 31.4422, "step": 51960 }, { "epoch": 0.10498268805778997, "grad_norm": 545.2814331054688, "learning_rate": 9.999527131555894e-06, "loss": 24.9442, "step": 51970 }, { "epoch": 0.10500288869047379, "grad_norm": 427.1497802734375, "learning_rate": 9.99952231875316e-06, "loss": 25.8614, "step": 51980 }, { "epoch": 0.1050230893231576, "grad_norm": 203.9998321533203, "learning_rate": 9.999517481583363e-06, "loss": 27.2882, "step": 51990 }, { "epoch": 0.10504328995584142, "grad_norm": 637.4127197265625, "learning_rate": 9.999512620046523e-06, "loss": 19.4995, "step": 52000 }, { "epoch": 0.10506349058852524, "grad_norm": 1.447941541671753, "learning_rate": 9.999507734142663e-06, "loss": 24.0484, "step": 52010 }, { "epoch": 0.10508369122120904, "grad_norm": 347.05963134765625, "learning_rate": 9.999502823871809e-06, "loss": 11.7482, "step": 52020 }, { "epoch": 0.10510389185389286, "grad_norm": 711.4403076171875, "learning_rate": 9.999497889233987e-06, "loss": 13.4801, "step": 52030 }, { "epoch": 0.10512409248657668, "grad_norm": 358.95794677734375, "learning_rate": 9.999492930229217e-06, "loss": 19.8398, "step": 52040 }, { "epoch": 0.10514429311926049, "grad_norm": 154.77569580078125, "learning_rate": 9.999487946857526e-06, "loss": 27.7863, "step": 52050 }, { "epoch": 0.10516449375194431, "grad_norm": 385.1726379394531, "learning_rate": 9.999482939118936e-06, "loss": 20.5073, "step": 52060 }, { "epoch": 0.10518469438462813, "grad_norm": 1505.1348876953125, "learning_rate": 9.999477907013473e-06, "loss": 39.0965, "step": 52070 }, { "epoch": 0.10520489501731194, "grad_norm": 717.062255859375, "learning_rate": 9.999472850541161e-06, "loss": 28.785, "step": 52080 }, { "epoch": 0.10522509564999576, "grad_norm": 666.9161987304688, "learning_rate": 9.999467769702023e-06, "loss": 24.6605, "step": 52090 }, { "epoch": 0.10524529628267958, "grad_norm": 605.6522216796875, "learning_rate": 9.999462664496088e-06, "loss": 18.0582, "step": 52100 }, { "epoch": 0.10526549691536338, "grad_norm": 1095.1883544921875, "learning_rate": 9.999457534923377e-06, "loss": 25.2785, "step": 52110 }, { "epoch": 0.1052856975480472, "grad_norm": 414.3034973144531, "learning_rate": 9.999452380983915e-06, "loss": 29.4141, "step": 52120 }, { "epoch": 0.10530589818073102, "grad_norm": 270.92645263671875, "learning_rate": 9.999447202677732e-06, "loss": 25.1118, "step": 52130 }, { "epoch": 0.10532609881341484, "grad_norm": 488.3146667480469, "learning_rate": 9.999442000004848e-06, "loss": 22.3528, "step": 52140 }, { "epoch": 0.10534629944609865, "grad_norm": 535.1305541992188, "learning_rate": 9.99943677296529e-06, "loss": 33.6538, "step": 52150 }, { "epoch": 0.10536650007878247, "grad_norm": 386.5953063964844, "learning_rate": 9.999431521559081e-06, "loss": 33.8812, "step": 52160 }, { "epoch": 0.10538670071146629, "grad_norm": 412.7962341308594, "learning_rate": 9.999426245786253e-06, "loss": 14.0402, "step": 52170 }, { "epoch": 0.1054069013441501, "grad_norm": 908.8006591796875, "learning_rate": 9.999420945646828e-06, "loss": 36.5653, "step": 52180 }, { "epoch": 0.10542710197683391, "grad_norm": 831.282470703125, "learning_rate": 9.99941562114083e-06, "loss": 13.5727, "step": 52190 }, { "epoch": 0.10544730260951773, "grad_norm": 193.27737426757812, "learning_rate": 9.999410272268285e-06, "loss": 17.8289, "step": 52200 }, { "epoch": 0.10546750324220154, "grad_norm": 293.7889099121094, "learning_rate": 9.999404899029222e-06, "loss": 23.4262, "step": 52210 }, { "epoch": 0.10548770387488536, "grad_norm": 384.21270751953125, "learning_rate": 9.999399501423667e-06, "loss": 22.6356, "step": 52220 }, { "epoch": 0.10550790450756918, "grad_norm": 239.36883544921875, "learning_rate": 9.999394079451643e-06, "loss": 20.3355, "step": 52230 }, { "epoch": 0.10552810514025299, "grad_norm": 570.4447021484375, "learning_rate": 9.99938863311318e-06, "loss": 15.0387, "step": 52240 }, { "epoch": 0.1055483057729368, "grad_norm": 514.3821411132812, "learning_rate": 9.999383162408303e-06, "loss": 19.0036, "step": 52250 }, { "epoch": 0.10556850640562063, "grad_norm": 615.8590698242188, "learning_rate": 9.99937766733704e-06, "loss": 22.2815, "step": 52260 }, { "epoch": 0.10558870703830443, "grad_norm": 272.0684509277344, "learning_rate": 9.999372147899416e-06, "loss": 11.1711, "step": 52270 }, { "epoch": 0.10560890767098825, "grad_norm": 31.58615493774414, "learning_rate": 9.999366604095458e-06, "loss": 44.4439, "step": 52280 }, { "epoch": 0.10562910830367207, "grad_norm": 1436.013916015625, "learning_rate": 9.999361035925193e-06, "loss": 43.548, "step": 52290 }, { "epoch": 0.10564930893635589, "grad_norm": 1929.2962646484375, "learning_rate": 9.999355443388649e-06, "loss": 40.7206, "step": 52300 }, { "epoch": 0.1056695095690397, "grad_norm": 864.4912109375, "learning_rate": 9.999349826485854e-06, "loss": 24.2701, "step": 52310 }, { "epoch": 0.10568971020172352, "grad_norm": 441.25213623046875, "learning_rate": 9.999344185216833e-06, "loss": 31.1885, "step": 52320 }, { "epoch": 0.10570991083440734, "grad_norm": 1056.015380859375, "learning_rate": 9.999338519581616e-06, "loss": 31.1782, "step": 52330 }, { "epoch": 0.10573011146709115, "grad_norm": 326.104248046875, "learning_rate": 9.999332829580227e-06, "loss": 26.628, "step": 52340 }, { "epoch": 0.10575031209977497, "grad_norm": 393.9754638671875, "learning_rate": 9.999327115212698e-06, "loss": 18.3491, "step": 52350 }, { "epoch": 0.10577051273245879, "grad_norm": 493.7698059082031, "learning_rate": 9.999321376479054e-06, "loss": 22.4783, "step": 52360 }, { "epoch": 0.10579071336514259, "grad_norm": 261.75701904296875, "learning_rate": 9.999315613379326e-06, "loss": 20.0119, "step": 52370 }, { "epoch": 0.10581091399782641, "grad_norm": 341.0134582519531, "learning_rate": 9.999309825913538e-06, "loss": 21.6089, "step": 52380 }, { "epoch": 0.10583111463051023, "grad_norm": 845.4539184570312, "learning_rate": 9.999304014081721e-06, "loss": 26.0644, "step": 52390 }, { "epoch": 0.10585131526319404, "grad_norm": 114.71834564208984, "learning_rate": 9.999298177883902e-06, "loss": 22.7964, "step": 52400 }, { "epoch": 0.10587151589587786, "grad_norm": 818.751953125, "learning_rate": 9.999292317320112e-06, "loss": 12.1773, "step": 52410 }, { "epoch": 0.10589171652856168, "grad_norm": 947.8588256835938, "learning_rate": 9.999286432390376e-06, "loss": 39.9747, "step": 52420 }, { "epoch": 0.10591191716124548, "grad_norm": 605.8237915039062, "learning_rate": 9.999280523094724e-06, "loss": 33.8493, "step": 52430 }, { "epoch": 0.1059321177939293, "grad_norm": 116.42375183105469, "learning_rate": 9.999274589433186e-06, "loss": 34.6618, "step": 52440 }, { "epoch": 0.10595231842661312, "grad_norm": 577.7481689453125, "learning_rate": 9.99926863140579e-06, "loss": 27.7989, "step": 52450 }, { "epoch": 0.10597251905929694, "grad_norm": 676.259521484375, "learning_rate": 9.999262649012564e-06, "loss": 23.1021, "step": 52460 }, { "epoch": 0.10599271969198075, "grad_norm": 573.849853515625, "learning_rate": 9.99925664225354e-06, "loss": 11.2289, "step": 52470 }, { "epoch": 0.10601292032466457, "grad_norm": 659.0547485351562, "learning_rate": 9.999250611128743e-06, "loss": 28.9521, "step": 52480 }, { "epoch": 0.10603312095734839, "grad_norm": 242.6114044189453, "learning_rate": 9.999244555638205e-06, "loss": 12.3119, "step": 52490 }, { "epoch": 0.1060533215900322, "grad_norm": 634.4039306640625, "learning_rate": 9.999238475781957e-06, "loss": 11.1968, "step": 52500 }, { "epoch": 0.10607352222271602, "grad_norm": 402.59869384765625, "learning_rate": 9.999232371560027e-06, "loss": 15.2867, "step": 52510 }, { "epoch": 0.10609372285539984, "grad_norm": 417.6640319824219, "learning_rate": 9.999226242972445e-06, "loss": 11.4818, "step": 52520 }, { "epoch": 0.10611392348808364, "grad_norm": 556.7757568359375, "learning_rate": 9.999220090019238e-06, "loss": 28.6786, "step": 52530 }, { "epoch": 0.10613412412076746, "grad_norm": 875.1763305664062, "learning_rate": 9.99921391270044e-06, "loss": 33.0905, "step": 52540 }, { "epoch": 0.10615432475345128, "grad_norm": 915.5540161132812, "learning_rate": 9.999207711016081e-06, "loss": 17.076, "step": 52550 }, { "epoch": 0.10617452538613509, "grad_norm": 349.79315185546875, "learning_rate": 9.999201484966188e-06, "loss": 17.2544, "step": 52560 }, { "epoch": 0.10619472601881891, "grad_norm": 351.5311584472656, "learning_rate": 9.999195234550796e-06, "loss": 37.0559, "step": 52570 }, { "epoch": 0.10621492665150273, "grad_norm": 649.7620849609375, "learning_rate": 9.99918895976993e-06, "loss": 39.2075, "step": 52580 }, { "epoch": 0.10623512728418653, "grad_norm": 445.8486633300781, "learning_rate": 9.999182660623625e-06, "loss": 26.4072, "step": 52590 }, { "epoch": 0.10625532791687035, "grad_norm": 693.0591430664062, "learning_rate": 9.999176337111908e-06, "loss": 33.9964, "step": 52600 }, { "epoch": 0.10627552854955417, "grad_norm": 428.8168640136719, "learning_rate": 9.999169989234815e-06, "loss": 19.2667, "step": 52610 }, { "epoch": 0.106295729182238, "grad_norm": 706.9661865234375, "learning_rate": 9.999163616992371e-06, "loss": 27.4699, "step": 52620 }, { "epoch": 0.1063159298149218, "grad_norm": 27.181907653808594, "learning_rate": 9.999157220384612e-06, "loss": 11.5111, "step": 52630 }, { "epoch": 0.10633613044760562, "grad_norm": 1255.19873046875, "learning_rate": 9.999150799411565e-06, "loss": 23.6398, "step": 52640 }, { "epoch": 0.10635633108028944, "grad_norm": 1140.2137451171875, "learning_rate": 9.999144354073264e-06, "loss": 41.7556, "step": 52650 }, { "epoch": 0.10637653171297325, "grad_norm": 892.744384765625, "learning_rate": 9.999137884369741e-06, "loss": 36.8008, "step": 52660 }, { "epoch": 0.10639673234565707, "grad_norm": 619.9985961914062, "learning_rate": 9.999131390301027e-06, "loss": 21.2839, "step": 52670 }, { "epoch": 0.10641693297834089, "grad_norm": 673.101806640625, "learning_rate": 9.99912487186715e-06, "loss": 29.2515, "step": 52680 }, { "epoch": 0.10643713361102469, "grad_norm": 835.369384765625, "learning_rate": 9.999118329068148e-06, "loss": 24.7376, "step": 52690 }, { "epoch": 0.10645733424370851, "grad_norm": 894.3941650390625, "learning_rate": 9.999111761904046e-06, "loss": 41.1717, "step": 52700 }, { "epoch": 0.10647753487639233, "grad_norm": 427.3544921875, "learning_rate": 9.999105170374881e-06, "loss": 19.4941, "step": 52710 }, { "epoch": 0.10649773550907614, "grad_norm": 622.7033081054688, "learning_rate": 9.999098554480685e-06, "loss": 35.2149, "step": 52720 }, { "epoch": 0.10651793614175996, "grad_norm": 0.0, "learning_rate": 9.999091914221487e-06, "loss": 21.8452, "step": 52730 }, { "epoch": 0.10653813677444378, "grad_norm": 916.897705078125, "learning_rate": 9.999085249597322e-06, "loss": 17.9101, "step": 52740 }, { "epoch": 0.10655833740712758, "grad_norm": 294.47430419921875, "learning_rate": 9.999078560608221e-06, "loss": 30.3524, "step": 52750 }, { "epoch": 0.1065785380398114, "grad_norm": 421.6717834472656, "learning_rate": 9.999071847254219e-06, "loss": 19.8002, "step": 52760 }, { "epoch": 0.10659873867249522, "grad_norm": 1147.113037109375, "learning_rate": 9.999065109535346e-06, "loss": 35.8034, "step": 52770 }, { "epoch": 0.10661893930517904, "grad_norm": 404.02435302734375, "learning_rate": 9.999058347451638e-06, "loss": 37.1147, "step": 52780 }, { "epoch": 0.10663913993786285, "grad_norm": 252.00086975097656, "learning_rate": 9.999051561003124e-06, "loss": 28.5985, "step": 52790 }, { "epoch": 0.10665934057054667, "grad_norm": 530.6477661132812, "learning_rate": 9.99904475018984e-06, "loss": 15.8119, "step": 52800 }, { "epoch": 0.10667954120323049, "grad_norm": 23.36237144470215, "learning_rate": 9.999037915011819e-06, "loss": 26.4043, "step": 52810 }, { "epoch": 0.1066997418359143, "grad_norm": 336.9685363769531, "learning_rate": 9.999031055469091e-06, "loss": 24.8935, "step": 52820 }, { "epoch": 0.10671994246859812, "grad_norm": 243.57879638671875, "learning_rate": 9.999024171561693e-06, "loss": 31.3197, "step": 52830 }, { "epoch": 0.10674014310128194, "grad_norm": 294.1635437011719, "learning_rate": 9.999017263289656e-06, "loss": 52.6235, "step": 52840 }, { "epoch": 0.10676034373396574, "grad_norm": 929.2692260742188, "learning_rate": 9.999010330653019e-06, "loss": 23.1442, "step": 52850 }, { "epoch": 0.10678054436664956, "grad_norm": 376.0462951660156, "learning_rate": 9.999003373651809e-06, "loss": 27.5916, "step": 52860 }, { "epoch": 0.10680074499933338, "grad_norm": 150.8600616455078, "learning_rate": 9.998996392286062e-06, "loss": 25.589, "step": 52870 }, { "epoch": 0.10682094563201719, "grad_norm": 474.697265625, "learning_rate": 9.998989386555815e-06, "loss": 42.2781, "step": 52880 }, { "epoch": 0.10684114626470101, "grad_norm": 419.69866943359375, "learning_rate": 9.9989823564611e-06, "loss": 25.1389, "step": 52890 }, { "epoch": 0.10686134689738483, "grad_norm": 993.33203125, "learning_rate": 9.99897530200195e-06, "loss": 34.6098, "step": 52900 }, { "epoch": 0.10688154753006864, "grad_norm": 916.4841918945312, "learning_rate": 9.998968223178402e-06, "loss": 28.6282, "step": 52910 }, { "epoch": 0.10690174816275246, "grad_norm": 377.6288146972656, "learning_rate": 9.99896111999049e-06, "loss": 24.0976, "step": 52920 }, { "epoch": 0.10692194879543628, "grad_norm": 520.31005859375, "learning_rate": 9.998953992438245e-06, "loss": 21.7018, "step": 52930 }, { "epoch": 0.1069421494281201, "grad_norm": 13.560802459716797, "learning_rate": 9.998946840521706e-06, "loss": 19.4946, "step": 52940 }, { "epoch": 0.1069623500608039, "grad_norm": 363.0177001953125, "learning_rate": 9.998939664240908e-06, "loss": 28.6526, "step": 52950 }, { "epoch": 0.10698255069348772, "grad_norm": 833.1908569335938, "learning_rate": 9.998932463595882e-06, "loss": 27.0875, "step": 52960 }, { "epoch": 0.10700275132617154, "grad_norm": 210.7505340576172, "learning_rate": 9.998925238586666e-06, "loss": 29.3727, "step": 52970 }, { "epoch": 0.10702295195885535, "grad_norm": 1198.724853515625, "learning_rate": 9.998917989213296e-06, "loss": 35.7954, "step": 52980 }, { "epoch": 0.10704315259153917, "grad_norm": 161.1515655517578, "learning_rate": 9.998910715475804e-06, "loss": 20.7784, "step": 52990 }, { "epoch": 0.10706335322422299, "grad_norm": 343.9988098144531, "learning_rate": 9.998903417374228e-06, "loss": 30.7323, "step": 53000 }, { "epoch": 0.1070835538569068, "grad_norm": 633.6296997070312, "learning_rate": 9.998896094908603e-06, "loss": 41.7541, "step": 53010 }, { "epoch": 0.10710375448959061, "grad_norm": 1106.44287109375, "learning_rate": 9.998888748078966e-06, "loss": 30.7732, "step": 53020 }, { "epoch": 0.10712395512227443, "grad_norm": 206.07020568847656, "learning_rate": 9.99888137688535e-06, "loss": 21.4664, "step": 53030 }, { "epoch": 0.10714415575495824, "grad_norm": 63.42051315307617, "learning_rate": 9.998873981327795e-06, "loss": 15.0017, "step": 53040 }, { "epoch": 0.10716435638764206, "grad_norm": 336.19708251953125, "learning_rate": 9.998866561406331e-06, "loss": 42.9714, "step": 53050 }, { "epoch": 0.10718455702032588, "grad_norm": 1005.9963989257812, "learning_rate": 9.998859117121e-06, "loss": 20.2886, "step": 53060 }, { "epoch": 0.10720475765300969, "grad_norm": 536.5841674804688, "learning_rate": 9.998851648471834e-06, "loss": 25.2781, "step": 53070 }, { "epoch": 0.1072249582856935, "grad_norm": 568.0372924804688, "learning_rate": 9.998844155458873e-06, "loss": 27.6063, "step": 53080 }, { "epoch": 0.10724515891837733, "grad_norm": 491.802490234375, "learning_rate": 9.998836638082152e-06, "loss": 18.594, "step": 53090 }, { "epoch": 0.10726535955106115, "grad_norm": 441.60296630859375, "learning_rate": 9.998829096341706e-06, "loss": 24.2671, "step": 53100 }, { "epoch": 0.10728556018374495, "grad_norm": 473.935546875, "learning_rate": 9.998821530237576e-06, "loss": 32.2329, "step": 53110 }, { "epoch": 0.10730576081642877, "grad_norm": 698.8916015625, "learning_rate": 9.998813939769794e-06, "loss": 19.6332, "step": 53120 }, { "epoch": 0.10732596144911259, "grad_norm": 762.2566528320312, "learning_rate": 9.9988063249384e-06, "loss": 31.1006, "step": 53130 }, { "epoch": 0.1073461620817964, "grad_norm": 819.5429077148438, "learning_rate": 9.99879868574343e-06, "loss": 35.1782, "step": 53140 }, { "epoch": 0.10736636271448022, "grad_norm": 969.3384399414062, "learning_rate": 9.998791022184921e-06, "loss": 49.248, "step": 53150 }, { "epoch": 0.10738656334716404, "grad_norm": 376.130615234375, "learning_rate": 9.998783334262911e-06, "loss": 30.3146, "step": 53160 }, { "epoch": 0.10740676397984784, "grad_norm": 422.056640625, "learning_rate": 9.998775621977438e-06, "loss": 21.0312, "step": 53170 }, { "epoch": 0.10742696461253166, "grad_norm": 239.6740264892578, "learning_rate": 9.998767885328538e-06, "loss": 48.1807, "step": 53180 }, { "epoch": 0.10744716524521548, "grad_norm": 499.3955993652344, "learning_rate": 9.99876012431625e-06, "loss": 30.1279, "step": 53190 }, { "epoch": 0.10746736587789929, "grad_norm": 1409.747314453125, "learning_rate": 9.998752338940612e-06, "loss": 32.9902, "step": 53200 }, { "epoch": 0.10748756651058311, "grad_norm": 380.8999328613281, "learning_rate": 9.99874452920166e-06, "loss": 22.1534, "step": 53210 }, { "epoch": 0.10750776714326693, "grad_norm": 833.2760009765625, "learning_rate": 9.998736695099434e-06, "loss": 30.3201, "step": 53220 }, { "epoch": 0.10752796777595074, "grad_norm": 20.92523193359375, "learning_rate": 9.998728836633972e-06, "loss": 23.5144, "step": 53230 }, { "epoch": 0.10754816840863456, "grad_norm": 172.36753845214844, "learning_rate": 9.998720953805312e-06, "loss": 9.3381, "step": 53240 }, { "epoch": 0.10756836904131838, "grad_norm": 600.3418579101562, "learning_rate": 9.998713046613492e-06, "loss": 23.8562, "step": 53250 }, { "epoch": 0.1075885696740022, "grad_norm": 426.4175720214844, "learning_rate": 9.998705115058552e-06, "loss": 21.8772, "step": 53260 }, { "epoch": 0.107608770306686, "grad_norm": 863.323974609375, "learning_rate": 9.998697159140528e-06, "loss": 22.8647, "step": 53270 }, { "epoch": 0.10762897093936982, "grad_norm": 672.56005859375, "learning_rate": 9.998689178859461e-06, "loss": 47.7858, "step": 53280 }, { "epoch": 0.10764917157205364, "grad_norm": 302.8929138183594, "learning_rate": 9.99868117421539e-06, "loss": 12.609, "step": 53290 }, { "epoch": 0.10766937220473745, "grad_norm": 1255.6273193359375, "learning_rate": 9.998673145208351e-06, "loss": 22.6348, "step": 53300 }, { "epoch": 0.10768957283742127, "grad_norm": 475.1684875488281, "learning_rate": 9.998665091838386e-06, "loss": 33.2957, "step": 53310 }, { "epoch": 0.10770977347010509, "grad_norm": 845.2681884765625, "learning_rate": 9.998657014105535e-06, "loss": 34.4305, "step": 53320 }, { "epoch": 0.1077299741027889, "grad_norm": 612.3766479492188, "learning_rate": 9.998648912009835e-06, "loss": 29.1685, "step": 53330 }, { "epoch": 0.10775017473547271, "grad_norm": 1290.07958984375, "learning_rate": 9.998640785551327e-06, "loss": 29.3782, "step": 53340 }, { "epoch": 0.10777037536815653, "grad_norm": 537.4452514648438, "learning_rate": 9.99863263473005e-06, "loss": 23.3855, "step": 53350 }, { "epoch": 0.10779057600084034, "grad_norm": 746.5062866210938, "learning_rate": 9.998624459546043e-06, "loss": 16.1965, "step": 53360 }, { "epoch": 0.10781077663352416, "grad_norm": 312.06500244140625, "learning_rate": 9.998616259999348e-06, "loss": 26.0713, "step": 53370 }, { "epoch": 0.10783097726620798, "grad_norm": 1012.008056640625, "learning_rate": 9.998608036090003e-06, "loss": 22.3234, "step": 53380 }, { "epoch": 0.10785117789889179, "grad_norm": 272.9735412597656, "learning_rate": 9.998599787818048e-06, "loss": 31.0834, "step": 53390 }, { "epoch": 0.1078713785315756, "grad_norm": 673.2590942382812, "learning_rate": 9.998591515183524e-06, "loss": 25.4174, "step": 53400 }, { "epoch": 0.10789157916425943, "grad_norm": 318.61651611328125, "learning_rate": 9.998583218186471e-06, "loss": 17.2329, "step": 53410 }, { "epoch": 0.10791177979694325, "grad_norm": 539.6445922851562, "learning_rate": 9.998574896826931e-06, "loss": 28.4101, "step": 53420 }, { "epoch": 0.10793198042962705, "grad_norm": 844.5818481445312, "learning_rate": 9.998566551104943e-06, "loss": 21.043, "step": 53430 }, { "epoch": 0.10795218106231087, "grad_norm": 463.2301940917969, "learning_rate": 9.998558181020547e-06, "loss": 37.8564, "step": 53440 }, { "epoch": 0.10797238169499469, "grad_norm": 87.23060607910156, "learning_rate": 9.998549786573785e-06, "loss": 22.3368, "step": 53450 }, { "epoch": 0.1079925823276785, "grad_norm": 630.809326171875, "learning_rate": 9.998541367764699e-06, "loss": 39.6557, "step": 53460 }, { "epoch": 0.10801278296036232, "grad_norm": 790.4534912109375, "learning_rate": 9.998532924593327e-06, "loss": 29.6731, "step": 53470 }, { "epoch": 0.10803298359304614, "grad_norm": 419.2471008300781, "learning_rate": 9.99852445705971e-06, "loss": 36.6505, "step": 53480 }, { "epoch": 0.10805318422572995, "grad_norm": 75.427978515625, "learning_rate": 9.998515965163894e-06, "loss": 12.1146, "step": 53490 }, { "epoch": 0.10807338485841377, "grad_norm": 83.66301727294922, "learning_rate": 9.998507448905917e-06, "loss": 39.2989, "step": 53500 }, { "epoch": 0.10809358549109759, "grad_norm": 391.13385009765625, "learning_rate": 9.99849890828582e-06, "loss": 20.2477, "step": 53510 }, { "epoch": 0.10811378612378139, "grad_norm": 490.6471252441406, "learning_rate": 9.998490343303646e-06, "loss": 43.6847, "step": 53520 }, { "epoch": 0.10813398675646521, "grad_norm": 599.6272583007812, "learning_rate": 9.998481753959436e-06, "loss": 37.4203, "step": 53530 }, { "epoch": 0.10815418738914903, "grad_norm": 155.78521728515625, "learning_rate": 9.998473140253234e-06, "loss": 27.2147, "step": 53540 }, { "epoch": 0.10817438802183284, "grad_norm": 479.9679260253906, "learning_rate": 9.998464502185076e-06, "loss": 39.7745, "step": 53550 }, { "epoch": 0.10819458865451666, "grad_norm": 572.0115356445312, "learning_rate": 9.998455839755013e-06, "loss": 26.7621, "step": 53560 }, { "epoch": 0.10821478928720048, "grad_norm": 535.1430053710938, "learning_rate": 9.99844715296308e-06, "loss": 26.889, "step": 53570 }, { "epoch": 0.1082349899198843, "grad_norm": 422.1188049316406, "learning_rate": 9.998438441809322e-06, "loss": 33.705, "step": 53580 }, { "epoch": 0.1082551905525681, "grad_norm": 357.9285888671875, "learning_rate": 9.998429706293781e-06, "loss": 28.6368, "step": 53590 }, { "epoch": 0.10827539118525192, "grad_norm": 109.74182891845703, "learning_rate": 9.9984209464165e-06, "loss": 20.9994, "step": 53600 }, { "epoch": 0.10829559181793574, "grad_norm": 334.81854248046875, "learning_rate": 9.998412162177523e-06, "loss": 21.9419, "step": 53610 }, { "epoch": 0.10831579245061955, "grad_norm": 549.0813598632812, "learning_rate": 9.99840335357689e-06, "loss": 28.1463, "step": 53620 }, { "epoch": 0.10833599308330337, "grad_norm": 441.5652770996094, "learning_rate": 9.998394520614645e-06, "loss": 23.2583, "step": 53630 }, { "epoch": 0.10835619371598719, "grad_norm": 401.61724853515625, "learning_rate": 9.998385663290833e-06, "loss": 24.7862, "step": 53640 }, { "epoch": 0.108376394348671, "grad_norm": 549.4804077148438, "learning_rate": 9.998376781605493e-06, "loss": 28.995, "step": 53650 }, { "epoch": 0.10839659498135482, "grad_norm": 1153.8587646484375, "learning_rate": 9.998367875558673e-06, "loss": 35.7791, "step": 53660 }, { "epoch": 0.10841679561403864, "grad_norm": 320.6040344238281, "learning_rate": 9.998358945150412e-06, "loss": 13.7571, "step": 53670 }, { "epoch": 0.10843699624672244, "grad_norm": 478.5079345703125, "learning_rate": 9.998349990380757e-06, "loss": 26.84, "step": 53680 }, { "epoch": 0.10845719687940626, "grad_norm": 282.97344970703125, "learning_rate": 9.998341011249752e-06, "loss": 23.3799, "step": 53690 }, { "epoch": 0.10847739751209008, "grad_norm": 136.7860565185547, "learning_rate": 9.998332007757436e-06, "loss": 23.2608, "step": 53700 }, { "epoch": 0.10849759814477389, "grad_norm": 452.0782775878906, "learning_rate": 9.998322979903859e-06, "loss": 18.1503, "step": 53710 }, { "epoch": 0.10851779877745771, "grad_norm": 145.5153350830078, "learning_rate": 9.99831392768906e-06, "loss": 33.076, "step": 53720 }, { "epoch": 0.10853799941014153, "grad_norm": 559.6956787109375, "learning_rate": 9.998304851113086e-06, "loss": 19.8302, "step": 53730 }, { "epoch": 0.10855820004282535, "grad_norm": 395.3185119628906, "learning_rate": 9.99829575017598e-06, "loss": 16.7486, "step": 53740 }, { "epoch": 0.10857840067550915, "grad_norm": 1397.56103515625, "learning_rate": 9.998286624877786e-06, "loss": 31.4005, "step": 53750 }, { "epoch": 0.10859860130819297, "grad_norm": 927.7950439453125, "learning_rate": 9.998277475218552e-06, "loss": 21.003, "step": 53760 }, { "epoch": 0.1086188019408768, "grad_norm": 304.82440185546875, "learning_rate": 9.998268301198317e-06, "loss": 28.2366, "step": 53770 }, { "epoch": 0.1086390025735606, "grad_norm": 485.1670837402344, "learning_rate": 9.99825910281713e-06, "loss": 20.6205, "step": 53780 }, { "epoch": 0.10865920320624442, "grad_norm": 731.8318481445312, "learning_rate": 9.998249880075033e-06, "loss": 34.5541, "step": 53790 }, { "epoch": 0.10867940383892824, "grad_norm": 604.5245361328125, "learning_rate": 9.998240632972073e-06, "loss": 55.5603, "step": 53800 }, { "epoch": 0.10869960447161205, "grad_norm": 246.02989196777344, "learning_rate": 9.998231361508295e-06, "loss": 45.6582, "step": 53810 }, { "epoch": 0.10871980510429587, "grad_norm": 198.7808380126953, "learning_rate": 9.998222065683743e-06, "loss": 33.1657, "step": 53820 }, { "epoch": 0.10874000573697969, "grad_norm": 500.4750671386719, "learning_rate": 9.998212745498464e-06, "loss": 20.7875, "step": 53830 }, { "epoch": 0.10876020636966349, "grad_norm": 19.5983943939209, "learning_rate": 9.9982034009525e-06, "loss": 23.3247, "step": 53840 }, { "epoch": 0.10878040700234731, "grad_norm": 623.8035888671875, "learning_rate": 9.9981940320459e-06, "loss": 39.7996, "step": 53850 }, { "epoch": 0.10880060763503113, "grad_norm": 161.05856323242188, "learning_rate": 9.998184638778708e-06, "loss": 15.9248, "step": 53860 }, { "epoch": 0.10882080826771494, "grad_norm": 612.73681640625, "learning_rate": 9.99817522115097e-06, "loss": 26.1079, "step": 53870 }, { "epoch": 0.10884100890039876, "grad_norm": 522.5286865234375, "learning_rate": 9.998165779162734e-06, "loss": 14.3749, "step": 53880 }, { "epoch": 0.10886120953308258, "grad_norm": 1359.5078125, "learning_rate": 9.998156312814043e-06, "loss": 32.4587, "step": 53890 }, { "epoch": 0.1088814101657664, "grad_norm": 127.57379913330078, "learning_rate": 9.998146822104943e-06, "loss": 26.3984, "step": 53900 }, { "epoch": 0.1089016107984502, "grad_norm": 600.6903076171875, "learning_rate": 9.998137307035486e-06, "loss": 23.3385, "step": 53910 }, { "epoch": 0.10892181143113402, "grad_norm": 444.3834533691406, "learning_rate": 9.99812776760571e-06, "loss": 16.6408, "step": 53920 }, { "epoch": 0.10894201206381784, "grad_norm": 877.210693359375, "learning_rate": 9.998118203815666e-06, "loss": 31.8723, "step": 53930 }, { "epoch": 0.10896221269650165, "grad_norm": 181.5795135498047, "learning_rate": 9.9981086156654e-06, "loss": 24.42, "step": 53940 }, { "epoch": 0.10898241332918547, "grad_norm": 507.66552734375, "learning_rate": 9.99809900315496e-06, "loss": 19.5684, "step": 53950 }, { "epoch": 0.10900261396186929, "grad_norm": 19.269437789916992, "learning_rate": 9.998089366284392e-06, "loss": 37.1751, "step": 53960 }, { "epoch": 0.1090228145945531, "grad_norm": 168.76394653320312, "learning_rate": 9.99807970505374e-06, "loss": 42.0356, "step": 53970 }, { "epoch": 0.10904301522723692, "grad_norm": 190.10775756835938, "learning_rate": 9.998070019463055e-06, "loss": 18.8945, "step": 53980 }, { "epoch": 0.10906321585992074, "grad_norm": 312.4748840332031, "learning_rate": 9.998060309512384e-06, "loss": 14.506, "step": 53990 }, { "epoch": 0.10908341649260454, "grad_norm": 254.49879455566406, "learning_rate": 9.998050575201772e-06, "loss": 22.7319, "step": 54000 }, { "epoch": 0.10910361712528836, "grad_norm": 532.451171875, "learning_rate": 9.99804081653127e-06, "loss": 26.7482, "step": 54010 }, { "epoch": 0.10912381775797218, "grad_norm": 229.19528198242188, "learning_rate": 9.99803103350092e-06, "loss": 35.1213, "step": 54020 }, { "epoch": 0.10914401839065599, "grad_norm": 440.79376220703125, "learning_rate": 9.998021226110775e-06, "loss": 26.4451, "step": 54030 }, { "epoch": 0.10916421902333981, "grad_norm": 519.68359375, "learning_rate": 9.99801139436088e-06, "loss": 30.7062, "step": 54040 }, { "epoch": 0.10918441965602363, "grad_norm": 55.21887969970703, "learning_rate": 9.998001538251283e-06, "loss": 42.4805, "step": 54050 }, { "epoch": 0.10920462028870745, "grad_norm": 317.5483093261719, "learning_rate": 9.997991657782033e-06, "loss": 31.2201, "step": 54060 }, { "epoch": 0.10922482092139126, "grad_norm": 345.548828125, "learning_rate": 9.997981752953179e-06, "loss": 25.0568, "step": 54070 }, { "epoch": 0.10924502155407508, "grad_norm": 359.0416259765625, "learning_rate": 9.997971823764766e-06, "loss": 21.3692, "step": 54080 }, { "epoch": 0.1092652221867589, "grad_norm": 595.4906005859375, "learning_rate": 9.997961870216849e-06, "loss": 35.6632, "step": 54090 }, { "epoch": 0.1092854228194427, "grad_norm": 575.0725708007812, "learning_rate": 9.997951892309468e-06, "loss": 28.2221, "step": 54100 }, { "epoch": 0.10930562345212652, "grad_norm": 317.45599365234375, "learning_rate": 9.997941890042677e-06, "loss": 22.9554, "step": 54110 }, { "epoch": 0.10932582408481034, "grad_norm": 75.43408966064453, "learning_rate": 9.997931863416522e-06, "loss": 24.0662, "step": 54120 }, { "epoch": 0.10934602471749415, "grad_norm": 599.4276733398438, "learning_rate": 9.997921812431055e-06, "loss": 31.3656, "step": 54130 }, { "epoch": 0.10936622535017797, "grad_norm": 584.6956176757812, "learning_rate": 9.997911737086322e-06, "loss": 25.7612, "step": 54140 }, { "epoch": 0.10938642598286179, "grad_norm": 881.17529296875, "learning_rate": 9.997901637382375e-06, "loss": 35.9875, "step": 54150 }, { "epoch": 0.1094066266155456, "grad_norm": 802.3683471679688, "learning_rate": 9.99789151331926e-06, "loss": 23.1203, "step": 54160 }, { "epoch": 0.10942682724822941, "grad_norm": 1032.8818359375, "learning_rate": 9.997881364897028e-06, "loss": 31.3608, "step": 54170 }, { "epoch": 0.10944702788091323, "grad_norm": 646.2077026367188, "learning_rate": 9.99787119211573e-06, "loss": 13.592, "step": 54180 }, { "epoch": 0.10946722851359704, "grad_norm": 403.6358947753906, "learning_rate": 9.997860994975412e-06, "loss": 49.6401, "step": 54190 }, { "epoch": 0.10948742914628086, "grad_norm": 436.64154052734375, "learning_rate": 9.997850773476126e-06, "loss": 56.8283, "step": 54200 }, { "epoch": 0.10950762977896468, "grad_norm": 477.2749328613281, "learning_rate": 9.997840527617921e-06, "loss": 18.9835, "step": 54210 }, { "epoch": 0.1095278304116485, "grad_norm": 686.4449462890625, "learning_rate": 9.99783025740085e-06, "loss": 17.5475, "step": 54220 }, { "epoch": 0.1095480310443323, "grad_norm": 216.48248291015625, "learning_rate": 9.997819962824958e-06, "loss": 8.0905, "step": 54230 }, { "epoch": 0.10956823167701613, "grad_norm": 912.3517456054688, "learning_rate": 9.9978096438903e-06, "loss": 23.9246, "step": 54240 }, { "epoch": 0.10958843230969995, "grad_norm": 164.28170776367188, "learning_rate": 9.997799300596921e-06, "loss": 23.017, "step": 54250 }, { "epoch": 0.10960863294238375, "grad_norm": 504.7520751953125, "learning_rate": 9.997788932944877e-06, "loss": 23.6303, "step": 54260 }, { "epoch": 0.10962883357506757, "grad_norm": 206.88621520996094, "learning_rate": 9.997778540934213e-06, "loss": 14.847, "step": 54270 }, { "epoch": 0.10964903420775139, "grad_norm": 454.28759765625, "learning_rate": 9.997768124564986e-06, "loss": 38.5703, "step": 54280 }, { "epoch": 0.1096692348404352, "grad_norm": 222.45407104492188, "learning_rate": 9.997757683837242e-06, "loss": 19.3966, "step": 54290 }, { "epoch": 0.10968943547311902, "grad_norm": 229.19483947753906, "learning_rate": 9.997747218751032e-06, "loss": 28.7323, "step": 54300 }, { "epoch": 0.10970963610580284, "grad_norm": 463.9140930175781, "learning_rate": 9.997736729306409e-06, "loss": 19.2052, "step": 54310 }, { "epoch": 0.10972983673848664, "grad_norm": 145.03977966308594, "learning_rate": 9.997726215503422e-06, "loss": 34.755, "step": 54320 }, { "epoch": 0.10975003737117046, "grad_norm": 101.87061309814453, "learning_rate": 9.997715677342126e-06, "loss": 41.6257, "step": 54330 }, { "epoch": 0.10977023800385428, "grad_norm": 725.406494140625, "learning_rate": 9.99770511482257e-06, "loss": 33.6832, "step": 54340 }, { "epoch": 0.10979043863653809, "grad_norm": 580.8824462890625, "learning_rate": 9.997694527944804e-06, "loss": 29.1013, "step": 54350 }, { "epoch": 0.10981063926922191, "grad_norm": 512.6138916015625, "learning_rate": 9.99768391670888e-06, "loss": 12.6474, "step": 54360 }, { "epoch": 0.10983083990190573, "grad_norm": 195.0771942138672, "learning_rate": 9.997673281114852e-06, "loss": 18.8358, "step": 54370 }, { "epoch": 0.10985104053458955, "grad_norm": 817.9515380859375, "learning_rate": 9.99766262116277e-06, "loss": 30.0784, "step": 54380 }, { "epoch": 0.10987124116727336, "grad_norm": 674.0523681640625, "learning_rate": 9.997651936852689e-06, "loss": 18.0687, "step": 54390 }, { "epoch": 0.10989144179995718, "grad_norm": 369.1578674316406, "learning_rate": 9.997641228184656e-06, "loss": 19.4317, "step": 54400 }, { "epoch": 0.109911642432641, "grad_norm": 512.1553344726562, "learning_rate": 9.997630495158728e-06, "loss": 29.9607, "step": 54410 }, { "epoch": 0.1099318430653248, "grad_norm": 334.9314270019531, "learning_rate": 9.997619737774953e-06, "loss": 22.4586, "step": 54420 }, { "epoch": 0.10995204369800862, "grad_norm": 0.5988953113555908, "learning_rate": 9.997608956033386e-06, "loss": 15.379, "step": 54430 }, { "epoch": 0.10997224433069244, "grad_norm": 1061.28662109375, "learning_rate": 9.99759814993408e-06, "loss": 49.6255, "step": 54440 }, { "epoch": 0.10999244496337625, "grad_norm": 1052.1668701171875, "learning_rate": 9.997587319477084e-06, "loss": 22.5256, "step": 54450 }, { "epoch": 0.11001264559606007, "grad_norm": 403.6893005371094, "learning_rate": 9.997576464662458e-06, "loss": 17.6924, "step": 54460 }, { "epoch": 0.11003284622874389, "grad_norm": 272.0291442871094, "learning_rate": 9.997565585490247e-06, "loss": 27.3708, "step": 54470 }, { "epoch": 0.1100530468614277, "grad_norm": 632.6046142578125, "learning_rate": 9.997554681960508e-06, "loss": 22.4342, "step": 54480 }, { "epoch": 0.11007324749411151, "grad_norm": 52.98223114013672, "learning_rate": 9.997543754073295e-06, "loss": 13.4893, "step": 54490 }, { "epoch": 0.11009344812679533, "grad_norm": 391.6419677734375, "learning_rate": 9.997532801828659e-06, "loss": 18.883, "step": 54500 }, { "epoch": 0.11011364875947914, "grad_norm": 259.5841369628906, "learning_rate": 9.997521825226654e-06, "loss": 37.2853, "step": 54510 }, { "epoch": 0.11013384939216296, "grad_norm": 824.7269287109375, "learning_rate": 9.997510824267334e-06, "loss": 23.0775, "step": 54520 }, { "epoch": 0.11015405002484678, "grad_norm": 314.40289306640625, "learning_rate": 9.997499798950752e-06, "loss": 27.897, "step": 54530 }, { "epoch": 0.1101742506575306, "grad_norm": 372.8232116699219, "learning_rate": 9.997488749276962e-06, "loss": 28.0352, "step": 54540 }, { "epoch": 0.1101944512902144, "grad_norm": 238.98486328125, "learning_rate": 9.997477675246018e-06, "loss": 15.9825, "step": 54550 }, { "epoch": 0.11021465192289823, "grad_norm": 268.8155822753906, "learning_rate": 9.997466576857974e-06, "loss": 28.761, "step": 54560 }, { "epoch": 0.11023485255558205, "grad_norm": 372.1516418457031, "learning_rate": 9.997455454112885e-06, "loss": 20.2901, "step": 54570 }, { "epoch": 0.11025505318826585, "grad_norm": 884.8719482421875, "learning_rate": 9.997444307010804e-06, "loss": 30.1917, "step": 54580 }, { "epoch": 0.11027525382094967, "grad_norm": 562.89404296875, "learning_rate": 9.997433135551786e-06, "loss": 45.1124, "step": 54590 }, { "epoch": 0.11029545445363349, "grad_norm": 471.60125732421875, "learning_rate": 9.997421939735885e-06, "loss": 16.8417, "step": 54600 }, { "epoch": 0.1103156550863173, "grad_norm": 163.50860595703125, "learning_rate": 9.997410719563155e-06, "loss": 16.828, "step": 54610 }, { "epoch": 0.11033585571900112, "grad_norm": 351.6030578613281, "learning_rate": 9.997399475033648e-06, "loss": 18.1173, "step": 54620 }, { "epoch": 0.11035605635168494, "grad_norm": 441.3740539550781, "learning_rate": 9.997388206147427e-06, "loss": 53.553, "step": 54630 }, { "epoch": 0.11037625698436875, "grad_norm": 431.582275390625, "learning_rate": 9.99737691290454e-06, "loss": 21.1727, "step": 54640 }, { "epoch": 0.11039645761705257, "grad_norm": 929.2691650390625, "learning_rate": 9.997365595305045e-06, "loss": 32.7812, "step": 54650 }, { "epoch": 0.11041665824973639, "grad_norm": 606.33935546875, "learning_rate": 9.997354253348994e-06, "loss": 20.2651, "step": 54660 }, { "epoch": 0.11043685888242019, "grad_norm": 936.4393920898438, "learning_rate": 9.997342887036446e-06, "loss": 29.7989, "step": 54670 }, { "epoch": 0.11045705951510401, "grad_norm": 1194.6771240234375, "learning_rate": 9.997331496367455e-06, "loss": 42.4737, "step": 54680 }, { "epoch": 0.11047726014778783, "grad_norm": 923.7783813476562, "learning_rate": 9.997320081342076e-06, "loss": 34.2305, "step": 54690 }, { "epoch": 0.11049746078047164, "grad_norm": 231.43601989746094, "learning_rate": 9.997308641960365e-06, "loss": 43.9614, "step": 54700 }, { "epoch": 0.11051766141315546, "grad_norm": 400.3169860839844, "learning_rate": 9.997297178222378e-06, "loss": 46.262, "step": 54710 }, { "epoch": 0.11053786204583928, "grad_norm": 156.56224060058594, "learning_rate": 9.997285690128172e-06, "loss": 22.7581, "step": 54720 }, { "epoch": 0.1105580626785231, "grad_norm": 332.6772766113281, "learning_rate": 9.997274177677799e-06, "loss": 22.8242, "step": 54730 }, { "epoch": 0.1105782633112069, "grad_norm": 432.33856201171875, "learning_rate": 9.997262640871319e-06, "loss": 45.6695, "step": 54740 }, { "epoch": 0.11059846394389072, "grad_norm": 222.8570556640625, "learning_rate": 9.997251079708788e-06, "loss": 14.8523, "step": 54750 }, { "epoch": 0.11061866457657454, "grad_norm": 334.9029846191406, "learning_rate": 9.997239494190258e-06, "loss": 30.4605, "step": 54760 }, { "epoch": 0.11063886520925835, "grad_norm": 446.7870178222656, "learning_rate": 9.997227884315792e-06, "loss": 29.3506, "step": 54770 }, { "epoch": 0.11065906584194217, "grad_norm": 456.64947509765625, "learning_rate": 9.997216250085441e-06, "loss": 23.3379, "step": 54780 }, { "epoch": 0.11067926647462599, "grad_norm": 900.9299926757812, "learning_rate": 9.997204591499266e-06, "loss": 20.6617, "step": 54790 }, { "epoch": 0.1106994671073098, "grad_norm": 494.7242431640625, "learning_rate": 9.997192908557322e-06, "loss": 34.7412, "step": 54800 }, { "epoch": 0.11071966773999362, "grad_norm": 474.8592529296875, "learning_rate": 9.997181201259664e-06, "loss": 28.8738, "step": 54810 }, { "epoch": 0.11073986837267744, "grad_norm": 1227.4732666015625, "learning_rate": 9.997169469606353e-06, "loss": 31.113, "step": 54820 }, { "epoch": 0.11076006900536124, "grad_norm": 375.9781799316406, "learning_rate": 9.997157713597444e-06, "loss": 19.6979, "step": 54830 }, { "epoch": 0.11078026963804506, "grad_norm": 57.54928207397461, "learning_rate": 9.997145933232994e-06, "loss": 51.4527, "step": 54840 }, { "epoch": 0.11080047027072888, "grad_norm": 1278.0533447265625, "learning_rate": 9.99713412851306e-06, "loss": 25.5548, "step": 54850 }, { "epoch": 0.11082067090341269, "grad_norm": 505.0701599121094, "learning_rate": 9.9971222994377e-06, "loss": 27.7517, "step": 54860 }, { "epoch": 0.11084087153609651, "grad_norm": 332.50238037109375, "learning_rate": 9.997110446006974e-06, "loss": 19.6809, "step": 54870 }, { "epoch": 0.11086107216878033, "grad_norm": 26.537817001342773, "learning_rate": 9.997098568220937e-06, "loss": 21.3314, "step": 54880 }, { "epoch": 0.11088127280146415, "grad_norm": 660.31982421875, "learning_rate": 9.997086666079647e-06, "loss": 39.6938, "step": 54890 }, { "epoch": 0.11090147343414795, "grad_norm": 132.39810180664062, "learning_rate": 9.997074739583162e-06, "loss": 26.4871, "step": 54900 }, { "epoch": 0.11092167406683177, "grad_norm": 183.94454956054688, "learning_rate": 9.997062788731541e-06, "loss": 41.0132, "step": 54910 }, { "epoch": 0.1109418746995156, "grad_norm": 370.9226989746094, "learning_rate": 9.997050813524843e-06, "loss": 17.3327, "step": 54920 }, { "epoch": 0.1109620753321994, "grad_norm": 385.3973083496094, "learning_rate": 9.997038813963126e-06, "loss": 33.9201, "step": 54930 }, { "epoch": 0.11098227596488322, "grad_norm": 227.87315368652344, "learning_rate": 9.997026790046446e-06, "loss": 25.1385, "step": 54940 }, { "epoch": 0.11100247659756704, "grad_norm": 280.9164123535156, "learning_rate": 9.997014741774866e-06, "loss": 32.4668, "step": 54950 }, { "epoch": 0.11102267723025085, "grad_norm": 86.0960693359375, "learning_rate": 9.99700266914844e-06, "loss": 24.8734, "step": 54960 }, { "epoch": 0.11104287786293467, "grad_norm": 495.7022399902344, "learning_rate": 9.996990572167229e-06, "loss": 23.1751, "step": 54970 }, { "epoch": 0.11106307849561849, "grad_norm": 260.6387939453125, "learning_rate": 9.996978450831293e-06, "loss": 20.1213, "step": 54980 }, { "epoch": 0.11108327912830229, "grad_norm": 1072.140380859375, "learning_rate": 9.99696630514069e-06, "loss": 45.5128, "step": 54990 }, { "epoch": 0.11110347976098611, "grad_norm": 288.9633483886719, "learning_rate": 9.99695413509548e-06, "loss": 12.2324, "step": 55000 }, { "epoch": 0.11112368039366993, "grad_norm": 443.3056945800781, "learning_rate": 9.99694194069572e-06, "loss": 15.8432, "step": 55010 }, { "epoch": 0.11114388102635374, "grad_norm": 394.9388427734375, "learning_rate": 9.996929721941472e-06, "loss": 17.3093, "step": 55020 }, { "epoch": 0.11116408165903756, "grad_norm": 652.3133544921875, "learning_rate": 9.996917478832794e-06, "loss": 27.1608, "step": 55030 }, { "epoch": 0.11118428229172138, "grad_norm": 6.787502288818359, "learning_rate": 9.996905211369748e-06, "loss": 23.7932, "step": 55040 }, { "epoch": 0.1112044829244052, "grad_norm": 278.0809020996094, "learning_rate": 9.99689291955239e-06, "loss": 18.0862, "step": 55050 }, { "epoch": 0.111224683557089, "grad_norm": 312.7170104980469, "learning_rate": 9.996880603380784e-06, "loss": 29.128, "step": 55060 }, { "epoch": 0.11124488418977282, "grad_norm": 130.94741821289062, "learning_rate": 9.996868262854986e-06, "loss": 19.5875, "step": 55070 }, { "epoch": 0.11126508482245664, "grad_norm": 140.8140106201172, "learning_rate": 9.996855897975058e-06, "loss": 34.391, "step": 55080 }, { "epoch": 0.11128528545514045, "grad_norm": 270.0653076171875, "learning_rate": 9.996843508741061e-06, "loss": 36.159, "step": 55090 }, { "epoch": 0.11130548608782427, "grad_norm": 353.35723876953125, "learning_rate": 9.996831095153054e-06, "loss": 25.6871, "step": 55100 }, { "epoch": 0.11132568672050809, "grad_norm": 827.9155883789062, "learning_rate": 9.9968186572111e-06, "loss": 17.5517, "step": 55110 }, { "epoch": 0.1113458873531919, "grad_norm": 444.61322021484375, "learning_rate": 9.996806194915258e-06, "loss": 17.5781, "step": 55120 }, { "epoch": 0.11136608798587572, "grad_norm": 321.96417236328125, "learning_rate": 9.996793708265586e-06, "loss": 21.4721, "step": 55130 }, { "epoch": 0.11138628861855954, "grad_norm": 632.9940185546875, "learning_rate": 9.99678119726215e-06, "loss": 43.9916, "step": 55140 }, { "epoch": 0.11140648925124334, "grad_norm": 361.72259521484375, "learning_rate": 9.996768661905008e-06, "loss": 26.2405, "step": 55150 }, { "epoch": 0.11142668988392716, "grad_norm": 176.63058471679688, "learning_rate": 9.996756102194222e-06, "loss": 25.3955, "step": 55160 }, { "epoch": 0.11144689051661098, "grad_norm": 364.80621337890625, "learning_rate": 9.996743518129852e-06, "loss": 27.8307, "step": 55170 }, { "epoch": 0.11146709114929479, "grad_norm": 623.612060546875, "learning_rate": 9.99673090971196e-06, "loss": 21.1618, "step": 55180 }, { "epoch": 0.11148729178197861, "grad_norm": 511.4942321777344, "learning_rate": 9.996718276940608e-06, "loss": 51.635, "step": 55190 }, { "epoch": 0.11150749241466243, "grad_norm": 248.29827880859375, "learning_rate": 9.996705619815857e-06, "loss": 33.6349, "step": 55200 }, { "epoch": 0.11152769304734625, "grad_norm": 206.81057739257812, "learning_rate": 9.996692938337768e-06, "loss": 30.27, "step": 55210 }, { "epoch": 0.11154789368003006, "grad_norm": 1038.8233642578125, "learning_rate": 9.996680232506404e-06, "loss": 27.2952, "step": 55220 }, { "epoch": 0.11156809431271388, "grad_norm": 291.82684326171875, "learning_rate": 9.996667502321829e-06, "loss": 33.1123, "step": 55230 }, { "epoch": 0.1115882949453977, "grad_norm": 355.9356384277344, "learning_rate": 9.9966547477841e-06, "loss": 40.046, "step": 55240 }, { "epoch": 0.1116084955780815, "grad_norm": 349.36199951171875, "learning_rate": 9.996641968893281e-06, "loss": 31.7898, "step": 55250 }, { "epoch": 0.11162869621076532, "grad_norm": 279.517578125, "learning_rate": 9.996629165649437e-06, "loss": 19.8619, "step": 55260 }, { "epoch": 0.11164889684344914, "grad_norm": 702.2809448242188, "learning_rate": 9.996616338052629e-06, "loss": 19.4479, "step": 55270 }, { "epoch": 0.11166909747613295, "grad_norm": 339.9565124511719, "learning_rate": 9.996603486102918e-06, "loss": 17.744, "step": 55280 }, { "epoch": 0.11168929810881677, "grad_norm": 548.6559448242188, "learning_rate": 9.996590609800367e-06, "loss": 36.1667, "step": 55290 }, { "epoch": 0.11170949874150059, "grad_norm": 188.1508331298828, "learning_rate": 9.99657770914504e-06, "loss": 25.0498, "step": 55300 }, { "epoch": 0.1117296993741844, "grad_norm": 133.46206665039062, "learning_rate": 9.996564784137e-06, "loss": 21.7022, "step": 55310 }, { "epoch": 0.11174990000686821, "grad_norm": 527.6439208984375, "learning_rate": 9.996551834776309e-06, "loss": 29.3728, "step": 55320 }, { "epoch": 0.11177010063955203, "grad_norm": 766.2464599609375, "learning_rate": 9.996538861063029e-06, "loss": 22.1587, "step": 55330 }, { "epoch": 0.11179030127223584, "grad_norm": 281.0527038574219, "learning_rate": 9.996525862997225e-06, "loss": 14.2926, "step": 55340 }, { "epoch": 0.11181050190491966, "grad_norm": 428.09002685546875, "learning_rate": 9.99651284057896e-06, "loss": 23.0039, "step": 55350 }, { "epoch": 0.11183070253760348, "grad_norm": 353.99432373046875, "learning_rate": 9.9964997938083e-06, "loss": 23.1277, "step": 55360 }, { "epoch": 0.1118509031702873, "grad_norm": 177.79042053222656, "learning_rate": 9.996486722685302e-06, "loss": 27.636, "step": 55370 }, { "epoch": 0.1118711038029711, "grad_norm": 402.3406982421875, "learning_rate": 9.996473627210035e-06, "loss": 17.3252, "step": 55380 }, { "epoch": 0.11189130443565493, "grad_norm": 906.8685302734375, "learning_rate": 9.996460507382563e-06, "loss": 21.174, "step": 55390 }, { "epoch": 0.11191150506833875, "grad_norm": 865.0765991210938, "learning_rate": 9.996447363202947e-06, "loss": 30.8987, "step": 55400 }, { "epoch": 0.11193170570102255, "grad_norm": 144.87705993652344, "learning_rate": 9.996434194671254e-06, "loss": 21.6505, "step": 55410 }, { "epoch": 0.11195190633370637, "grad_norm": 489.4412536621094, "learning_rate": 9.996421001787545e-06, "loss": 23.8128, "step": 55420 }, { "epoch": 0.11197210696639019, "grad_norm": 224.87892150878906, "learning_rate": 9.996407784551888e-06, "loss": 20.8717, "step": 55430 }, { "epoch": 0.111992307599074, "grad_norm": 768.6514282226562, "learning_rate": 9.996394542964343e-06, "loss": 15.8672, "step": 55440 }, { "epoch": 0.11201250823175782, "grad_norm": 360.9457702636719, "learning_rate": 9.996381277024978e-06, "loss": 24.7542, "step": 55450 }, { "epoch": 0.11203270886444164, "grad_norm": 1402.239990234375, "learning_rate": 9.996367986733857e-06, "loss": 45.6634, "step": 55460 }, { "epoch": 0.11205290949712544, "grad_norm": 317.9265441894531, "learning_rate": 9.996354672091044e-06, "loss": 29.3877, "step": 55470 }, { "epoch": 0.11207311012980926, "grad_norm": 490.77606201171875, "learning_rate": 9.996341333096606e-06, "loss": 21.8087, "step": 55480 }, { "epoch": 0.11209331076249308, "grad_norm": 421.822509765625, "learning_rate": 9.996327969750605e-06, "loss": 34.6557, "step": 55490 }, { "epoch": 0.11211351139517689, "grad_norm": 159.26327514648438, "learning_rate": 9.996314582053106e-06, "loss": 22.0908, "step": 55500 }, { "epoch": 0.11213371202786071, "grad_norm": 531.8242797851562, "learning_rate": 9.996301170004179e-06, "loss": 26.5158, "step": 55510 }, { "epoch": 0.11215391266054453, "grad_norm": 328.302490234375, "learning_rate": 9.996287733603883e-06, "loss": 15.4036, "step": 55520 }, { "epoch": 0.11217411329322835, "grad_norm": 70.45991516113281, "learning_rate": 9.996274272852289e-06, "loss": 21.8658, "step": 55530 }, { "epoch": 0.11219431392591216, "grad_norm": 393.1010437011719, "learning_rate": 9.996260787749457e-06, "loss": 15.8029, "step": 55540 }, { "epoch": 0.11221451455859598, "grad_norm": 866.2672729492188, "learning_rate": 9.99624727829546e-06, "loss": 22.1823, "step": 55550 }, { "epoch": 0.1122347151912798, "grad_norm": 341.88909912109375, "learning_rate": 9.996233744490356e-06, "loss": 18.3481, "step": 55560 }, { "epoch": 0.1122549158239636, "grad_norm": 340.0164489746094, "learning_rate": 9.996220186334217e-06, "loss": 16.4936, "step": 55570 }, { "epoch": 0.11227511645664742, "grad_norm": 381.4293518066406, "learning_rate": 9.996206603827105e-06, "loss": 21.0376, "step": 55580 }, { "epoch": 0.11229531708933124, "grad_norm": 1209.41845703125, "learning_rate": 9.99619299696909e-06, "loss": 46.2751, "step": 55590 }, { "epoch": 0.11231551772201505, "grad_norm": 558.7429809570312, "learning_rate": 9.996179365760235e-06, "loss": 32.1158, "step": 55600 }, { "epoch": 0.11233571835469887, "grad_norm": 306.48089599609375, "learning_rate": 9.996165710200607e-06, "loss": 34.9552, "step": 55610 }, { "epoch": 0.11235591898738269, "grad_norm": 435.9257507324219, "learning_rate": 9.996152030290276e-06, "loss": 25.7519, "step": 55620 }, { "epoch": 0.1123761196200665, "grad_norm": 270.9410095214844, "learning_rate": 9.996138326029303e-06, "loss": 37.5972, "step": 55630 }, { "epoch": 0.11239632025275031, "grad_norm": 586.7861938476562, "learning_rate": 9.99612459741776e-06, "loss": 11.5343, "step": 55640 }, { "epoch": 0.11241652088543413, "grad_norm": 965.9428100585938, "learning_rate": 9.99611084445571e-06, "loss": 34.3882, "step": 55650 }, { "epoch": 0.11243672151811794, "grad_norm": 423.5049133300781, "learning_rate": 9.996097067143223e-06, "loss": 19.8324, "step": 55660 }, { "epoch": 0.11245692215080176, "grad_norm": 628.5164794921875, "learning_rate": 9.996083265480366e-06, "loss": 15.5108, "step": 55670 }, { "epoch": 0.11247712278348558, "grad_norm": 750.0355224609375, "learning_rate": 9.996069439467203e-06, "loss": 14.4539, "step": 55680 }, { "epoch": 0.1124973234161694, "grad_norm": 312.1278991699219, "learning_rate": 9.996055589103804e-06, "loss": 41.3886, "step": 55690 }, { "epoch": 0.1125175240488532, "grad_norm": 176.88836669921875, "learning_rate": 9.996041714390235e-06, "loss": 24.272, "step": 55700 }, { "epoch": 0.11253772468153703, "grad_norm": 282.3443908691406, "learning_rate": 9.996027815326565e-06, "loss": 29.1884, "step": 55710 }, { "epoch": 0.11255792531422085, "grad_norm": 459.432861328125, "learning_rate": 9.996013891912862e-06, "loss": 30.6273, "step": 55720 }, { "epoch": 0.11257812594690465, "grad_norm": 69.33020782470703, "learning_rate": 9.995999944149192e-06, "loss": 24.3404, "step": 55730 }, { "epoch": 0.11259832657958847, "grad_norm": 468.4703063964844, "learning_rate": 9.995985972035626e-06, "loss": 22.6565, "step": 55740 }, { "epoch": 0.11261852721227229, "grad_norm": 227.947265625, "learning_rate": 9.995971975572231e-06, "loss": 19.853, "step": 55750 }, { "epoch": 0.1126387278449561, "grad_norm": 875.2319946289062, "learning_rate": 9.995957954759073e-06, "loss": 30.5711, "step": 55760 }, { "epoch": 0.11265892847763992, "grad_norm": 507.3000793457031, "learning_rate": 9.995943909596222e-06, "loss": 27.3444, "step": 55770 }, { "epoch": 0.11267912911032374, "grad_norm": 289.5107421875, "learning_rate": 9.995929840083746e-06, "loss": 15.7379, "step": 55780 }, { "epoch": 0.11269932974300755, "grad_norm": 469.662353515625, "learning_rate": 9.995915746221715e-06, "loss": 40.0659, "step": 55790 }, { "epoch": 0.11271953037569137, "grad_norm": 589.2631225585938, "learning_rate": 9.995901628010196e-06, "loss": 20.8513, "step": 55800 }, { "epoch": 0.11273973100837519, "grad_norm": 828.27001953125, "learning_rate": 9.995887485449257e-06, "loss": 25.4181, "step": 55810 }, { "epoch": 0.11275993164105899, "grad_norm": 356.7001647949219, "learning_rate": 9.99587331853897e-06, "loss": 12.0291, "step": 55820 }, { "epoch": 0.11278013227374281, "grad_norm": 361.2005615234375, "learning_rate": 9.995859127279402e-06, "loss": 28.8626, "step": 55830 }, { "epoch": 0.11280033290642663, "grad_norm": 738.162353515625, "learning_rate": 9.995844911670623e-06, "loss": 30.355, "step": 55840 }, { "epoch": 0.11282053353911045, "grad_norm": 478.655029296875, "learning_rate": 9.995830671712701e-06, "loss": 31.6947, "step": 55850 }, { "epoch": 0.11284073417179426, "grad_norm": 278.16937255859375, "learning_rate": 9.995816407405708e-06, "loss": 17.1322, "step": 55860 }, { "epoch": 0.11286093480447808, "grad_norm": 565.4407958984375, "learning_rate": 9.995802118749708e-06, "loss": 13.4129, "step": 55870 }, { "epoch": 0.1128811354371619, "grad_norm": 794.0328369140625, "learning_rate": 9.995787805744778e-06, "loss": 36.1648, "step": 55880 }, { "epoch": 0.1129013360698457, "grad_norm": 404.40130615234375, "learning_rate": 9.995773468390983e-06, "loss": 51.3268, "step": 55890 }, { "epoch": 0.11292153670252952, "grad_norm": 479.6914978027344, "learning_rate": 9.995759106688394e-06, "loss": 42.5575, "step": 55900 }, { "epoch": 0.11294173733521334, "grad_norm": 426.2121276855469, "learning_rate": 9.99574472063708e-06, "loss": 25.6598, "step": 55910 }, { "epoch": 0.11296193796789715, "grad_norm": 378.1146240234375, "learning_rate": 9.995730310237113e-06, "loss": 16.542, "step": 55920 }, { "epoch": 0.11298213860058097, "grad_norm": 993.0328369140625, "learning_rate": 9.995715875488563e-06, "loss": 29.6863, "step": 55930 }, { "epoch": 0.11300233923326479, "grad_norm": 480.3172912597656, "learning_rate": 9.9957014163915e-06, "loss": 20.4958, "step": 55940 }, { "epoch": 0.1130225398659486, "grad_norm": 461.8092346191406, "learning_rate": 9.995686932945993e-06, "loss": 21.4223, "step": 55950 }, { "epoch": 0.11304274049863242, "grad_norm": 1219.12744140625, "learning_rate": 9.995672425152115e-06, "loss": 20.2931, "step": 55960 }, { "epoch": 0.11306294113131624, "grad_norm": 276.2924499511719, "learning_rate": 9.995657893009933e-06, "loss": 18.137, "step": 55970 }, { "epoch": 0.11308314176400004, "grad_norm": 500.78607177734375, "learning_rate": 9.995643336519523e-06, "loss": 71.622, "step": 55980 }, { "epoch": 0.11310334239668386, "grad_norm": 8.226629257202148, "learning_rate": 9.995628755680952e-06, "loss": 14.3438, "step": 55990 }, { "epoch": 0.11312354302936768, "grad_norm": 1123.1558837890625, "learning_rate": 9.995614150494293e-06, "loss": 32.6038, "step": 56000 }, { "epoch": 0.1131437436620515, "grad_norm": 306.28533935546875, "learning_rate": 9.995599520959615e-06, "loss": 22.0771, "step": 56010 }, { "epoch": 0.11316394429473531, "grad_norm": 290.10418701171875, "learning_rate": 9.995584867076994e-06, "loss": 33.6507, "step": 56020 }, { "epoch": 0.11318414492741913, "grad_norm": 321.5469055175781, "learning_rate": 9.995570188846495e-06, "loss": 19.5831, "step": 56030 }, { "epoch": 0.11320434556010295, "grad_norm": 2160.419189453125, "learning_rate": 9.995555486268193e-06, "loss": 23.1755, "step": 56040 }, { "epoch": 0.11322454619278675, "grad_norm": 494.45135498046875, "learning_rate": 9.995540759342161e-06, "loss": 24.4019, "step": 56050 }, { "epoch": 0.11324474682547057, "grad_norm": 220.35263061523438, "learning_rate": 9.995526008068469e-06, "loss": 33.1868, "step": 56060 }, { "epoch": 0.1132649474581544, "grad_norm": 943.328369140625, "learning_rate": 9.995511232447189e-06, "loss": 39.2561, "step": 56070 }, { "epoch": 0.1132851480908382, "grad_norm": 840.2725219726562, "learning_rate": 9.995496432478392e-06, "loss": 20.5088, "step": 56080 }, { "epoch": 0.11330534872352202, "grad_norm": 215.2857666015625, "learning_rate": 9.995481608162154e-06, "loss": 32.3851, "step": 56090 }, { "epoch": 0.11332554935620584, "grad_norm": 862.8607177734375, "learning_rate": 9.995466759498543e-06, "loss": 40.5288, "step": 56100 }, { "epoch": 0.11334574998888965, "grad_norm": 469.4685363769531, "learning_rate": 9.995451886487632e-06, "loss": 23.1097, "step": 56110 }, { "epoch": 0.11336595062157347, "grad_norm": 288.13287353515625, "learning_rate": 9.995436989129495e-06, "loss": 12.7289, "step": 56120 }, { "epoch": 0.11338615125425729, "grad_norm": 537.4690551757812, "learning_rate": 9.995422067424203e-06, "loss": 19.3094, "step": 56130 }, { "epoch": 0.11340635188694109, "grad_norm": 481.13201904296875, "learning_rate": 9.995407121371832e-06, "loss": 60.8726, "step": 56140 }, { "epoch": 0.11342655251962491, "grad_norm": 704.1107788085938, "learning_rate": 9.995392150972451e-06, "loss": 20.7292, "step": 56150 }, { "epoch": 0.11344675315230873, "grad_norm": 87.8348159790039, "learning_rate": 9.995377156226133e-06, "loss": 16.0251, "step": 56160 }, { "epoch": 0.11346695378499255, "grad_norm": 465.2171936035156, "learning_rate": 9.995362137132956e-06, "loss": 19.0864, "step": 56170 }, { "epoch": 0.11348715441767636, "grad_norm": 502.523193359375, "learning_rate": 9.995347093692987e-06, "loss": 51.2079, "step": 56180 }, { "epoch": 0.11350735505036018, "grad_norm": 278.1914978027344, "learning_rate": 9.995332025906304e-06, "loss": 17.8252, "step": 56190 }, { "epoch": 0.113527555683044, "grad_norm": 663.8987426757812, "learning_rate": 9.995316933772978e-06, "loss": 39.5888, "step": 56200 }, { "epoch": 0.1135477563157278, "grad_norm": 186.1903076171875, "learning_rate": 9.995301817293084e-06, "loss": 22.6794, "step": 56210 }, { "epoch": 0.11356795694841162, "grad_norm": 361.7859191894531, "learning_rate": 9.995286676466694e-06, "loss": 23.041, "step": 56220 }, { "epoch": 0.11358815758109544, "grad_norm": 83.45796203613281, "learning_rate": 9.995271511293881e-06, "loss": 28.4525, "step": 56230 }, { "epoch": 0.11360835821377925, "grad_norm": 630.6253662109375, "learning_rate": 9.995256321774722e-06, "loss": 22.4465, "step": 56240 }, { "epoch": 0.11362855884646307, "grad_norm": 1938.2076416015625, "learning_rate": 9.99524110790929e-06, "loss": 26.8141, "step": 56250 }, { "epoch": 0.11364875947914689, "grad_norm": 202.69126892089844, "learning_rate": 9.995225869697657e-06, "loss": 18.5333, "step": 56260 }, { "epoch": 0.1136689601118307, "grad_norm": 724.8658447265625, "learning_rate": 9.9952106071399e-06, "loss": 31.2706, "step": 56270 }, { "epoch": 0.11368916074451452, "grad_norm": 337.84869384765625, "learning_rate": 9.995195320236093e-06, "loss": 15.1961, "step": 56280 }, { "epoch": 0.11370936137719834, "grad_norm": 447.12237548828125, "learning_rate": 9.995180008986309e-06, "loss": 23.9454, "step": 56290 }, { "epoch": 0.11372956200988214, "grad_norm": 175.07080078125, "learning_rate": 9.995164673390624e-06, "loss": 15.8106, "step": 56300 }, { "epoch": 0.11374976264256596, "grad_norm": 708.8894653320312, "learning_rate": 9.995149313449114e-06, "loss": 27.8332, "step": 56310 }, { "epoch": 0.11376996327524978, "grad_norm": 524.0888671875, "learning_rate": 9.995133929161848e-06, "loss": 29.217, "step": 56320 }, { "epoch": 0.1137901639079336, "grad_norm": 191.51504516601562, "learning_rate": 9.995118520528908e-06, "loss": 24.1241, "step": 56330 }, { "epoch": 0.11381036454061741, "grad_norm": 293.5033264160156, "learning_rate": 9.995103087550366e-06, "loss": 37.2772, "step": 56340 }, { "epoch": 0.11383056517330123, "grad_norm": 759.746337890625, "learning_rate": 9.995087630226295e-06, "loss": 20.8201, "step": 56350 }, { "epoch": 0.11385076580598505, "grad_norm": 1000.7098388671875, "learning_rate": 9.995072148556776e-06, "loss": 37.3082, "step": 56360 }, { "epoch": 0.11387096643866886, "grad_norm": 338.8851318359375, "learning_rate": 9.995056642541879e-06, "loss": 28.0065, "step": 56370 }, { "epoch": 0.11389116707135268, "grad_norm": 513.4716796875, "learning_rate": 9.995041112181683e-06, "loss": 23.1701, "step": 56380 }, { "epoch": 0.1139113677040365, "grad_norm": 482.89727783203125, "learning_rate": 9.99502555747626e-06, "loss": 21.6878, "step": 56390 }, { "epoch": 0.1139315683367203, "grad_norm": 181.3020782470703, "learning_rate": 9.995009978425692e-06, "loss": 23.6508, "step": 56400 }, { "epoch": 0.11395176896940412, "grad_norm": 275.6293029785156, "learning_rate": 9.994994375030048e-06, "loss": 16.3618, "step": 56410 }, { "epoch": 0.11397196960208794, "grad_norm": 504.3753967285156, "learning_rate": 9.994978747289408e-06, "loss": 26.1073, "step": 56420 }, { "epoch": 0.11399217023477175, "grad_norm": 264.54803466796875, "learning_rate": 9.994963095203849e-06, "loss": 21.6772, "step": 56430 }, { "epoch": 0.11401237086745557, "grad_norm": 701.0449829101562, "learning_rate": 9.994947418773445e-06, "loss": 18.9392, "step": 56440 }, { "epoch": 0.11403257150013939, "grad_norm": 58.76970672607422, "learning_rate": 9.994931717998272e-06, "loss": 23.6807, "step": 56450 }, { "epoch": 0.1140527721328232, "grad_norm": 554.5676879882812, "learning_rate": 9.99491599287841e-06, "loss": 25.2369, "step": 56460 }, { "epoch": 0.11407297276550701, "grad_norm": 892.4497680664062, "learning_rate": 9.99490024341393e-06, "loss": 34.5214, "step": 56470 }, { "epoch": 0.11409317339819083, "grad_norm": 662.4153442382812, "learning_rate": 9.994884469604913e-06, "loss": 26.8336, "step": 56480 }, { "epoch": 0.11411337403087465, "grad_norm": 685.477294921875, "learning_rate": 9.994868671451436e-06, "loss": 19.0529, "step": 56490 }, { "epoch": 0.11413357466355846, "grad_norm": 465.7191467285156, "learning_rate": 9.994852848953574e-06, "loss": 30.9412, "step": 56500 }, { "epoch": 0.11415377529624228, "grad_norm": 403.6457824707031, "learning_rate": 9.994837002111407e-06, "loss": 24.6486, "step": 56510 }, { "epoch": 0.1141739759289261, "grad_norm": 743.2261962890625, "learning_rate": 9.994821130925007e-06, "loss": 22.5384, "step": 56520 }, { "epoch": 0.1141941765616099, "grad_norm": 684.90576171875, "learning_rate": 9.994805235394456e-06, "loss": 26.7867, "step": 56530 }, { "epoch": 0.11421437719429373, "grad_norm": 160.59158325195312, "learning_rate": 9.99478931551983e-06, "loss": 34.1178, "step": 56540 }, { "epoch": 0.11423457782697755, "grad_norm": 474.1269226074219, "learning_rate": 9.994773371301208e-06, "loss": 36.5215, "step": 56550 }, { "epoch": 0.11425477845966135, "grad_norm": 1074.53271484375, "learning_rate": 9.994757402738666e-06, "loss": 42.4489, "step": 56560 }, { "epoch": 0.11427497909234517, "grad_norm": 258.2461853027344, "learning_rate": 9.99474140983228e-06, "loss": 28.1245, "step": 56570 }, { "epoch": 0.11429517972502899, "grad_norm": 411.3861999511719, "learning_rate": 9.994725392582132e-06, "loss": 21.3379, "step": 56580 }, { "epoch": 0.1143153803577128, "grad_norm": 444.5906677246094, "learning_rate": 9.994709350988299e-06, "loss": 13.2252, "step": 56590 }, { "epoch": 0.11433558099039662, "grad_norm": 307.0662841796875, "learning_rate": 9.994693285050858e-06, "loss": 20.8745, "step": 56600 }, { "epoch": 0.11435578162308044, "grad_norm": 433.30780029296875, "learning_rate": 9.994677194769886e-06, "loss": 24.0843, "step": 56610 }, { "epoch": 0.11437598225576424, "grad_norm": 271.6571960449219, "learning_rate": 9.994661080145464e-06, "loss": 33.2398, "step": 56620 }, { "epoch": 0.11439618288844806, "grad_norm": 338.8603210449219, "learning_rate": 9.99464494117767e-06, "loss": 32.869, "step": 56630 }, { "epoch": 0.11441638352113188, "grad_norm": 358.50103759765625, "learning_rate": 9.994628777866582e-06, "loss": 21.8971, "step": 56640 }, { "epoch": 0.1144365841538157, "grad_norm": 423.21978759765625, "learning_rate": 9.99461259021228e-06, "loss": 31.6401, "step": 56650 }, { "epoch": 0.11445678478649951, "grad_norm": 133.86451721191406, "learning_rate": 9.99459637821484e-06, "loss": 21.5203, "step": 56660 }, { "epoch": 0.11447698541918333, "grad_norm": 179.37725830078125, "learning_rate": 9.994580141874345e-06, "loss": 22.2574, "step": 56670 }, { "epoch": 0.11449718605186715, "grad_norm": 458.38720703125, "learning_rate": 9.994563881190874e-06, "loss": 16.016, "step": 56680 }, { "epoch": 0.11451738668455096, "grad_norm": 234.48512268066406, "learning_rate": 9.9945475961645e-06, "loss": 36.4717, "step": 56690 }, { "epoch": 0.11453758731723478, "grad_norm": 906.83837890625, "learning_rate": 9.994531286795309e-06, "loss": 34.8452, "step": 56700 }, { "epoch": 0.1145577879499186, "grad_norm": 293.6820068359375, "learning_rate": 9.994514953083379e-06, "loss": 18.3865, "step": 56710 }, { "epoch": 0.1145779885826024, "grad_norm": 399.5999450683594, "learning_rate": 9.994498595028787e-06, "loss": 15.497, "step": 56720 }, { "epoch": 0.11459818921528622, "grad_norm": 321.9813232421875, "learning_rate": 9.994482212631616e-06, "loss": 22.6184, "step": 56730 }, { "epoch": 0.11461838984797004, "grad_norm": 612.1865844726562, "learning_rate": 9.994465805891944e-06, "loss": 33.9304, "step": 56740 }, { "epoch": 0.11463859048065385, "grad_norm": 490.2412414550781, "learning_rate": 9.994449374809851e-06, "loss": 26.3115, "step": 56750 }, { "epoch": 0.11465879111333767, "grad_norm": 564.2010498046875, "learning_rate": 9.994432919385417e-06, "loss": 22.0646, "step": 56760 }, { "epoch": 0.11467899174602149, "grad_norm": 1255.3232421875, "learning_rate": 9.994416439618723e-06, "loss": 32.4541, "step": 56770 }, { "epoch": 0.1146991923787053, "grad_norm": 463.0643310546875, "learning_rate": 9.994399935509851e-06, "loss": 25.6335, "step": 56780 }, { "epoch": 0.11471939301138911, "grad_norm": 411.6350402832031, "learning_rate": 9.994383407058878e-06, "loss": 21.3374, "step": 56790 }, { "epoch": 0.11473959364407293, "grad_norm": 190.25454711914062, "learning_rate": 9.994366854265886e-06, "loss": 35.2417, "step": 56800 }, { "epoch": 0.11475979427675675, "grad_norm": 499.181396484375, "learning_rate": 9.994350277130956e-06, "loss": 27.2036, "step": 56810 }, { "epoch": 0.11477999490944056, "grad_norm": 353.4414367675781, "learning_rate": 9.994333675654169e-06, "loss": 31.7005, "step": 56820 }, { "epoch": 0.11480019554212438, "grad_norm": 1147.8031005859375, "learning_rate": 9.994317049835604e-06, "loss": 36.4782, "step": 56830 }, { "epoch": 0.1148203961748082, "grad_norm": 738.7057495117188, "learning_rate": 9.994300399675342e-06, "loss": 18.1057, "step": 56840 }, { "epoch": 0.114840596807492, "grad_norm": 61.7659797668457, "learning_rate": 9.994283725173468e-06, "loss": 14.8604, "step": 56850 }, { "epoch": 0.11486079744017583, "grad_norm": 513.6361694335938, "learning_rate": 9.994267026330063e-06, "loss": 26.7803, "step": 56860 }, { "epoch": 0.11488099807285965, "grad_norm": 395.6575622558594, "learning_rate": 9.994250303145203e-06, "loss": 29.9968, "step": 56870 }, { "epoch": 0.11490119870554345, "grad_norm": 477.5645446777344, "learning_rate": 9.994233555618973e-06, "loss": 31.5064, "step": 56880 }, { "epoch": 0.11492139933822727, "grad_norm": 823.677978515625, "learning_rate": 9.994216783751457e-06, "loss": 20.4399, "step": 56890 }, { "epoch": 0.11494159997091109, "grad_norm": 697.7006225585938, "learning_rate": 9.99419998754273e-06, "loss": 19.6363, "step": 56900 }, { "epoch": 0.1149618006035949, "grad_norm": 309.27020263671875, "learning_rate": 9.99418316699288e-06, "loss": 15.9355, "step": 56910 }, { "epoch": 0.11498200123627872, "grad_norm": 765.7940063476562, "learning_rate": 9.994166322101988e-06, "loss": 23.612, "step": 56920 }, { "epoch": 0.11500220186896254, "grad_norm": 422.702392578125, "learning_rate": 9.994149452870133e-06, "loss": 21.7339, "step": 56930 }, { "epoch": 0.11502240250164635, "grad_norm": 143.65939331054688, "learning_rate": 9.9941325592974e-06, "loss": 26.656, "step": 56940 }, { "epoch": 0.11504260313433017, "grad_norm": 522.1123657226562, "learning_rate": 9.994115641383872e-06, "loss": 16.4517, "step": 56950 }, { "epoch": 0.11506280376701399, "grad_norm": 439.05224609375, "learning_rate": 9.994098699129628e-06, "loss": 25.0762, "step": 56960 }, { "epoch": 0.1150830043996978, "grad_norm": 448.45843505859375, "learning_rate": 9.994081732534755e-06, "loss": 31.6349, "step": 56970 }, { "epoch": 0.11510320503238161, "grad_norm": 285.8194274902344, "learning_rate": 9.994064741599332e-06, "loss": 33.3975, "step": 56980 }, { "epoch": 0.11512340566506543, "grad_norm": 248.9784393310547, "learning_rate": 9.994047726323442e-06, "loss": 45.9833, "step": 56990 }, { "epoch": 0.11514360629774925, "grad_norm": 389.7409362792969, "learning_rate": 9.994030686707171e-06, "loss": 21.8294, "step": 57000 }, { "epoch": 0.11516380693043306, "grad_norm": 615.904052734375, "learning_rate": 9.9940136227506e-06, "loss": 22.9632, "step": 57010 }, { "epoch": 0.11518400756311688, "grad_norm": 621.622314453125, "learning_rate": 9.993996534453812e-06, "loss": 33.6485, "step": 57020 }, { "epoch": 0.1152042081958007, "grad_norm": 725.99365234375, "learning_rate": 9.993979421816889e-06, "loss": 24.6697, "step": 57030 }, { "epoch": 0.1152244088284845, "grad_norm": 476.81597900390625, "learning_rate": 9.993962284839918e-06, "loss": 11.6037, "step": 57040 }, { "epoch": 0.11524460946116832, "grad_norm": 514.463623046875, "learning_rate": 9.99394512352298e-06, "loss": 17.1566, "step": 57050 }, { "epoch": 0.11526481009385214, "grad_norm": 104.52167510986328, "learning_rate": 9.993927937866158e-06, "loss": 17.8265, "step": 57060 }, { "epoch": 0.11528501072653595, "grad_norm": 642.501220703125, "learning_rate": 9.993910727869538e-06, "loss": 11.5884, "step": 57070 }, { "epoch": 0.11530521135921977, "grad_norm": 1070.10546875, "learning_rate": 9.993893493533203e-06, "loss": 33.6258, "step": 57080 }, { "epoch": 0.11532541199190359, "grad_norm": 349.5815124511719, "learning_rate": 9.993876234857236e-06, "loss": 26.892, "step": 57090 }, { "epoch": 0.1153456126245874, "grad_norm": 879.8796997070312, "learning_rate": 9.993858951841724e-06, "loss": 32.2006, "step": 57100 }, { "epoch": 0.11536581325727122, "grad_norm": 424.421875, "learning_rate": 9.993841644486747e-06, "loss": 21.2678, "step": 57110 }, { "epoch": 0.11538601388995504, "grad_norm": 462.0297546386719, "learning_rate": 9.993824312792393e-06, "loss": 19.2433, "step": 57120 }, { "epoch": 0.11540621452263886, "grad_norm": 856.4939575195312, "learning_rate": 9.993806956758743e-06, "loss": 31.9198, "step": 57130 }, { "epoch": 0.11542641515532266, "grad_norm": 427.4967346191406, "learning_rate": 9.993789576385884e-06, "loss": 24.4099, "step": 57140 }, { "epoch": 0.11544661578800648, "grad_norm": 440.6112060546875, "learning_rate": 9.993772171673901e-06, "loss": 23.8275, "step": 57150 }, { "epoch": 0.1154668164206903, "grad_norm": 592.0247192382812, "learning_rate": 9.993754742622879e-06, "loss": 22.3119, "step": 57160 }, { "epoch": 0.11548701705337411, "grad_norm": 569.0758056640625, "learning_rate": 9.993737289232902e-06, "loss": 25.2769, "step": 57170 }, { "epoch": 0.11550721768605793, "grad_norm": 488.72186279296875, "learning_rate": 9.993719811504053e-06, "loss": 46.9328, "step": 57180 }, { "epoch": 0.11552741831874175, "grad_norm": 543.4966430664062, "learning_rate": 9.993702309436419e-06, "loss": 32.973, "step": 57190 }, { "epoch": 0.11554761895142555, "grad_norm": 421.9209289550781, "learning_rate": 9.99368478303009e-06, "loss": 29.8, "step": 57200 }, { "epoch": 0.11556781958410937, "grad_norm": 626.4269409179688, "learning_rate": 9.993667232285142e-06, "loss": 27.3487, "step": 57210 }, { "epoch": 0.1155880202167932, "grad_norm": 692.6672973632812, "learning_rate": 9.993649657201669e-06, "loss": 38.2158, "step": 57220 }, { "epoch": 0.115608220849477, "grad_norm": 570.1915893554688, "learning_rate": 9.993632057779752e-06, "loss": 27.0207, "step": 57230 }, { "epoch": 0.11562842148216082, "grad_norm": 467.6327209472656, "learning_rate": 9.993614434019476e-06, "loss": 22.598, "step": 57240 }, { "epoch": 0.11564862211484464, "grad_norm": 325.28057861328125, "learning_rate": 9.993596785920932e-06, "loss": 19.8026, "step": 57250 }, { "epoch": 0.11566882274752845, "grad_norm": 511.07354736328125, "learning_rate": 9.993579113484202e-06, "loss": 15.4554, "step": 57260 }, { "epoch": 0.11568902338021227, "grad_norm": 673.0833740234375, "learning_rate": 9.993561416709372e-06, "loss": 20.6629, "step": 57270 }, { "epoch": 0.11570922401289609, "grad_norm": 536.3285522460938, "learning_rate": 9.99354369559653e-06, "loss": 15.0465, "step": 57280 }, { "epoch": 0.1157294246455799, "grad_norm": 496.5762634277344, "learning_rate": 9.993525950145761e-06, "loss": 43.6122, "step": 57290 }, { "epoch": 0.11574962527826371, "grad_norm": 224.7476806640625, "learning_rate": 9.993508180357154e-06, "loss": 30.9923, "step": 57300 }, { "epoch": 0.11576982591094753, "grad_norm": 402.7610168457031, "learning_rate": 9.993490386230793e-06, "loss": 16.9395, "step": 57310 }, { "epoch": 0.11579002654363135, "grad_norm": 367.4598693847656, "learning_rate": 9.993472567766764e-06, "loss": 38.8323, "step": 57320 }, { "epoch": 0.11581022717631516, "grad_norm": 364.91204833984375, "learning_rate": 9.993454724965157e-06, "loss": 41.1314, "step": 57330 }, { "epoch": 0.11583042780899898, "grad_norm": 645.4959106445312, "learning_rate": 9.993436857826058e-06, "loss": 20.6863, "step": 57340 }, { "epoch": 0.1158506284416828, "grad_norm": 1384.5391845703125, "learning_rate": 9.993418966349551e-06, "loss": 42.2686, "step": 57350 }, { "epoch": 0.1158708290743666, "grad_norm": 139.28945922851562, "learning_rate": 9.993401050535726e-06, "loss": 22.8077, "step": 57360 }, { "epoch": 0.11589102970705042, "grad_norm": 650.4097290039062, "learning_rate": 9.993383110384673e-06, "loss": 22.7724, "step": 57370 }, { "epoch": 0.11591123033973424, "grad_norm": 778.6038208007812, "learning_rate": 9.993365145896473e-06, "loss": 39.9742, "step": 57380 }, { "epoch": 0.11593143097241805, "grad_norm": 345.47930908203125, "learning_rate": 9.993347157071218e-06, "loss": 39.1161, "step": 57390 }, { "epoch": 0.11595163160510187, "grad_norm": 597.97900390625, "learning_rate": 9.993329143908994e-06, "loss": 18.4554, "step": 57400 }, { "epoch": 0.11597183223778569, "grad_norm": 596.5072021484375, "learning_rate": 9.993311106409891e-06, "loss": 42.1834, "step": 57410 }, { "epoch": 0.1159920328704695, "grad_norm": 614.6201782226562, "learning_rate": 9.993293044573995e-06, "loss": 24.9577, "step": 57420 }, { "epoch": 0.11601223350315332, "grad_norm": 279.330078125, "learning_rate": 9.993274958401392e-06, "loss": 15.4448, "step": 57430 }, { "epoch": 0.11603243413583714, "grad_norm": 958.8892211914062, "learning_rate": 9.993256847892175e-06, "loss": 38.1558, "step": 57440 }, { "epoch": 0.11605263476852096, "grad_norm": 280.5309753417969, "learning_rate": 9.993238713046428e-06, "loss": 24.53, "step": 57450 }, { "epoch": 0.11607283540120476, "grad_norm": 600.3644409179688, "learning_rate": 9.993220553864242e-06, "loss": 20.4499, "step": 57460 }, { "epoch": 0.11609303603388858, "grad_norm": 562.6834106445312, "learning_rate": 9.993202370345705e-06, "loss": 33.8889, "step": 57470 }, { "epoch": 0.1161132366665724, "grad_norm": 548.0901489257812, "learning_rate": 9.993184162490903e-06, "loss": 29.6577, "step": 57480 }, { "epoch": 0.11613343729925621, "grad_norm": 2284.564208984375, "learning_rate": 9.99316593029993e-06, "loss": 16.7488, "step": 57490 }, { "epoch": 0.11615363793194003, "grad_norm": 303.5821838378906, "learning_rate": 9.993147673772869e-06, "loss": 12.0639, "step": 57500 }, { "epoch": 0.11617383856462385, "grad_norm": 66.35529327392578, "learning_rate": 9.993129392909814e-06, "loss": 12.7584, "step": 57510 }, { "epoch": 0.11619403919730766, "grad_norm": 562.1385498046875, "learning_rate": 9.993111087710852e-06, "loss": 27.5785, "step": 57520 }, { "epoch": 0.11621423982999148, "grad_norm": 539.2239990234375, "learning_rate": 9.993092758176071e-06, "loss": 27.2716, "step": 57530 }, { "epoch": 0.1162344404626753, "grad_norm": 614.6983032226562, "learning_rate": 9.993074404305563e-06, "loss": 42.4461, "step": 57540 }, { "epoch": 0.1162546410953591, "grad_norm": 294.503662109375, "learning_rate": 9.993056026099415e-06, "loss": 19.2669, "step": 57550 }, { "epoch": 0.11627484172804292, "grad_norm": 1092.808837890625, "learning_rate": 9.993037623557716e-06, "loss": 27.4644, "step": 57560 }, { "epoch": 0.11629504236072674, "grad_norm": 371.67474365234375, "learning_rate": 9.993019196680558e-06, "loss": 29.2138, "step": 57570 }, { "epoch": 0.11631524299341055, "grad_norm": 180.21095275878906, "learning_rate": 9.993000745468031e-06, "loss": 31.5071, "step": 57580 }, { "epoch": 0.11633544362609437, "grad_norm": 300.315673828125, "learning_rate": 9.992982269920223e-06, "loss": 34.1091, "step": 57590 }, { "epoch": 0.11635564425877819, "grad_norm": 456.3833312988281, "learning_rate": 9.992963770037227e-06, "loss": 33.3168, "step": 57600 }, { "epoch": 0.11637584489146201, "grad_norm": 561.9912719726562, "learning_rate": 9.99294524581913e-06, "loss": 24.6334, "step": 57610 }, { "epoch": 0.11639604552414581, "grad_norm": 923.3316650390625, "learning_rate": 9.992926697266023e-06, "loss": 32.0695, "step": 57620 }, { "epoch": 0.11641624615682963, "grad_norm": 271.27056884765625, "learning_rate": 9.992908124377997e-06, "loss": 24.4795, "step": 57630 }, { "epoch": 0.11643644678951345, "grad_norm": 366.7196960449219, "learning_rate": 9.992889527155143e-06, "loss": 12.7371, "step": 57640 }, { "epoch": 0.11645664742219726, "grad_norm": 556.1275634765625, "learning_rate": 9.992870905597549e-06, "loss": 29.0126, "step": 57650 }, { "epoch": 0.11647684805488108, "grad_norm": 286.32623291015625, "learning_rate": 9.99285225970531e-06, "loss": 12.455, "step": 57660 }, { "epoch": 0.1164970486875649, "grad_norm": 1047.10791015625, "learning_rate": 9.992833589478513e-06, "loss": 33.5002, "step": 57670 }, { "epoch": 0.1165172493202487, "grad_norm": 158.2174835205078, "learning_rate": 9.992814894917251e-06, "loss": 30.4101, "step": 57680 }, { "epoch": 0.11653744995293253, "grad_norm": 427.68572998046875, "learning_rate": 9.992796176021616e-06, "loss": 29.2073, "step": 57690 }, { "epoch": 0.11655765058561635, "grad_norm": 769.3206176757812, "learning_rate": 9.992777432791697e-06, "loss": 30.5703, "step": 57700 }, { "epoch": 0.11657785121830015, "grad_norm": 341.97418212890625, "learning_rate": 9.992758665227586e-06, "loss": 20.2494, "step": 57710 }, { "epoch": 0.11659805185098397, "grad_norm": 262.6230773925781, "learning_rate": 9.992739873329375e-06, "loss": 19.6372, "step": 57720 }, { "epoch": 0.11661825248366779, "grad_norm": 458.0865478515625, "learning_rate": 9.992721057097157e-06, "loss": 24.1996, "step": 57730 }, { "epoch": 0.1166384531163516, "grad_norm": 0.0, "learning_rate": 9.99270221653102e-06, "loss": 26.2471, "step": 57740 }, { "epoch": 0.11665865374903542, "grad_norm": 513.4843139648438, "learning_rate": 9.99268335163106e-06, "loss": 13.6434, "step": 57750 }, { "epoch": 0.11667885438171924, "grad_norm": 678.3419189453125, "learning_rate": 9.992664462397366e-06, "loss": 25.4183, "step": 57760 }, { "epoch": 0.11669905501440304, "grad_norm": 431.51934814453125, "learning_rate": 9.99264554883003e-06, "loss": 39.4392, "step": 57770 }, { "epoch": 0.11671925564708686, "grad_norm": 1319.972412109375, "learning_rate": 9.992626610929146e-06, "loss": 38.1995, "step": 57780 }, { "epoch": 0.11673945627977068, "grad_norm": 961.5303344726562, "learning_rate": 9.992607648694805e-06, "loss": 40.5593, "step": 57790 }, { "epoch": 0.1167596569124545, "grad_norm": 1148.9764404296875, "learning_rate": 9.9925886621271e-06, "loss": 41.0508, "step": 57800 }, { "epoch": 0.11677985754513831, "grad_norm": 970.7742309570312, "learning_rate": 9.992569651226123e-06, "loss": 33.3816, "step": 57810 }, { "epoch": 0.11680005817782213, "grad_norm": 494.38726806640625, "learning_rate": 9.992550615991968e-06, "loss": 24.0766, "step": 57820 }, { "epoch": 0.11682025881050595, "grad_norm": 767.6143798828125, "learning_rate": 9.992531556424726e-06, "loss": 23.0109, "step": 57830 }, { "epoch": 0.11684045944318976, "grad_norm": 1003.689697265625, "learning_rate": 9.992512472524491e-06, "loss": 26.832, "step": 57840 }, { "epoch": 0.11686066007587358, "grad_norm": 314.8854064941406, "learning_rate": 9.992493364291356e-06, "loss": 17.4941, "step": 57850 }, { "epoch": 0.1168808607085574, "grad_norm": 2019.5692138671875, "learning_rate": 9.992474231725412e-06, "loss": 12.1528, "step": 57860 }, { "epoch": 0.1169010613412412, "grad_norm": 562.8046264648438, "learning_rate": 9.992455074826757e-06, "loss": 37.6524, "step": 57870 }, { "epoch": 0.11692126197392502, "grad_norm": 376.6202392578125, "learning_rate": 9.99243589359548e-06, "loss": 20.6366, "step": 57880 }, { "epoch": 0.11694146260660884, "grad_norm": 657.8515625, "learning_rate": 9.992416688031676e-06, "loss": 59.3229, "step": 57890 }, { "epoch": 0.11696166323929265, "grad_norm": 494.7339172363281, "learning_rate": 9.992397458135438e-06, "loss": 25.8328, "step": 57900 }, { "epoch": 0.11698186387197647, "grad_norm": 543.6424560546875, "learning_rate": 9.992378203906862e-06, "loss": 44.8141, "step": 57910 }, { "epoch": 0.11700206450466029, "grad_norm": 639.5101928710938, "learning_rate": 9.99235892534604e-06, "loss": 17.6983, "step": 57920 }, { "epoch": 0.1170222651373441, "grad_norm": 370.078125, "learning_rate": 9.992339622453065e-06, "loss": 18.3153, "step": 57930 }, { "epoch": 0.11704246577002791, "grad_norm": 717.491943359375, "learning_rate": 9.992320295228032e-06, "loss": 17.3251, "step": 57940 }, { "epoch": 0.11706266640271173, "grad_norm": 760.3424072265625, "learning_rate": 9.992300943671035e-06, "loss": 28.1888, "step": 57950 }, { "epoch": 0.11708286703539555, "grad_norm": 713.1371459960938, "learning_rate": 9.99228156778217e-06, "loss": 34.6133, "step": 57960 }, { "epoch": 0.11710306766807936, "grad_norm": 922.029541015625, "learning_rate": 9.99226216756153e-06, "loss": 32.47, "step": 57970 }, { "epoch": 0.11712326830076318, "grad_norm": 440.2043762207031, "learning_rate": 9.99224274300921e-06, "loss": 23.6248, "step": 57980 }, { "epoch": 0.117143468933447, "grad_norm": 435.0998840332031, "learning_rate": 9.992223294125303e-06, "loss": 32.5848, "step": 57990 }, { "epoch": 0.11716366956613081, "grad_norm": 367.6516418457031, "learning_rate": 9.992203820909906e-06, "loss": 15.29, "step": 58000 }, { "epoch": 0.11718387019881463, "grad_norm": 1535.766845703125, "learning_rate": 9.992184323363112e-06, "loss": 34.3091, "step": 58010 }, { "epoch": 0.11720407083149845, "grad_norm": 151.68701171875, "learning_rate": 9.992164801485018e-06, "loss": 15.6894, "step": 58020 }, { "epoch": 0.11722427146418225, "grad_norm": 664.869140625, "learning_rate": 9.992145255275718e-06, "loss": 43.3754, "step": 58030 }, { "epoch": 0.11724447209686607, "grad_norm": 286.3050842285156, "learning_rate": 9.99212568473531e-06, "loss": 41.8678, "step": 58040 }, { "epoch": 0.1172646727295499, "grad_norm": 387.75909423828125, "learning_rate": 9.992106089863884e-06, "loss": 20.4063, "step": 58050 }, { "epoch": 0.1172848733622337, "grad_norm": 555.0672607421875, "learning_rate": 9.992086470661537e-06, "loss": 21.1624, "step": 58060 }, { "epoch": 0.11730507399491752, "grad_norm": 337.3147888183594, "learning_rate": 9.992066827128368e-06, "loss": 34.3024, "step": 58070 }, { "epoch": 0.11732527462760134, "grad_norm": 368.8452453613281, "learning_rate": 9.992047159264472e-06, "loss": 28.2762, "step": 58080 }, { "epoch": 0.11734547526028515, "grad_norm": 2197.8544921875, "learning_rate": 9.992027467069943e-06, "loss": 50.4354, "step": 58090 }, { "epoch": 0.11736567589296897, "grad_norm": 304.2486572265625, "learning_rate": 9.992007750544876e-06, "loss": 23.3722, "step": 58100 }, { "epoch": 0.11738587652565279, "grad_norm": 185.19680786132812, "learning_rate": 9.99198800968937e-06, "loss": 14.933, "step": 58110 }, { "epoch": 0.1174060771583366, "grad_norm": 280.1153259277344, "learning_rate": 9.991968244503519e-06, "loss": 16.9798, "step": 58120 }, { "epoch": 0.11742627779102041, "grad_norm": 76.7415542602539, "learning_rate": 9.991948454987422e-06, "loss": 31.7434, "step": 58130 }, { "epoch": 0.11744647842370423, "grad_norm": 420.1647033691406, "learning_rate": 9.99192864114117e-06, "loss": 24.274, "step": 58140 }, { "epoch": 0.11746667905638805, "grad_norm": 495.7421569824219, "learning_rate": 9.991908802964867e-06, "loss": 23.7836, "step": 58150 }, { "epoch": 0.11748687968907186, "grad_norm": 703.0047607421875, "learning_rate": 9.991888940458605e-06, "loss": 49.2084, "step": 58160 }, { "epoch": 0.11750708032175568, "grad_norm": 1381.2279052734375, "learning_rate": 9.99186905362248e-06, "loss": 26.2378, "step": 58170 }, { "epoch": 0.1175272809544395, "grad_norm": 596.7677001953125, "learning_rate": 9.991849142456593e-06, "loss": 28.4683, "step": 58180 }, { "epoch": 0.1175474815871233, "grad_norm": 591.5968017578125, "learning_rate": 9.991829206961038e-06, "loss": 21.3083, "step": 58190 }, { "epoch": 0.11756768221980712, "grad_norm": 319.48431396484375, "learning_rate": 9.991809247135912e-06, "loss": 17.7571, "step": 58200 }, { "epoch": 0.11758788285249094, "grad_norm": 525.16064453125, "learning_rate": 9.991789262981314e-06, "loss": 21.6295, "step": 58210 }, { "epoch": 0.11760808348517475, "grad_norm": 647.814208984375, "learning_rate": 9.99176925449734e-06, "loss": 27.8297, "step": 58220 }, { "epoch": 0.11762828411785857, "grad_norm": 604.1672973632812, "learning_rate": 9.991749221684088e-06, "loss": 30.7615, "step": 58230 }, { "epoch": 0.11764848475054239, "grad_norm": 349.92083740234375, "learning_rate": 9.991729164541656e-06, "loss": 12.1612, "step": 58240 }, { "epoch": 0.1176686853832262, "grad_norm": 504.90301513671875, "learning_rate": 9.991709083070143e-06, "loss": 27.4712, "step": 58250 }, { "epoch": 0.11768888601591002, "grad_norm": 342.3115234375, "learning_rate": 9.991688977269643e-06, "loss": 13.135, "step": 58260 }, { "epoch": 0.11770908664859384, "grad_norm": 734.429443359375, "learning_rate": 9.991668847140258e-06, "loss": 27.3773, "step": 58270 }, { "epoch": 0.11772928728127766, "grad_norm": 343.8572082519531, "learning_rate": 9.991648692682083e-06, "loss": 27.6731, "step": 58280 }, { "epoch": 0.11774948791396146, "grad_norm": 927.5995483398438, "learning_rate": 9.99162851389522e-06, "loss": 37.6311, "step": 58290 }, { "epoch": 0.11776968854664528, "grad_norm": 367.6170654296875, "learning_rate": 9.991608310779762e-06, "loss": 20.4994, "step": 58300 }, { "epoch": 0.1177898891793291, "grad_norm": 320.6072998046875, "learning_rate": 9.991588083335812e-06, "loss": 19.2313, "step": 58310 }, { "epoch": 0.11781008981201291, "grad_norm": 23.609176635742188, "learning_rate": 9.991567831563468e-06, "loss": 17.761, "step": 58320 }, { "epoch": 0.11783029044469673, "grad_norm": 727.7561645507812, "learning_rate": 9.991547555462825e-06, "loss": 49.5363, "step": 58330 }, { "epoch": 0.11785049107738055, "grad_norm": 802.432373046875, "learning_rate": 9.991527255033988e-06, "loss": 29.4928, "step": 58340 }, { "epoch": 0.11787069171006435, "grad_norm": 270.4449768066406, "learning_rate": 9.99150693027705e-06, "loss": 21.9631, "step": 58350 }, { "epoch": 0.11789089234274817, "grad_norm": 470.1295471191406, "learning_rate": 9.991486581192115e-06, "loss": 28.0967, "step": 58360 }, { "epoch": 0.117911092975432, "grad_norm": 301.1944885253906, "learning_rate": 9.991466207779279e-06, "loss": 33.0549, "step": 58370 }, { "epoch": 0.1179312936081158, "grad_norm": 477.31915283203125, "learning_rate": 9.99144581003864e-06, "loss": 59.8705, "step": 58380 }, { "epoch": 0.11795149424079962, "grad_norm": 351.3213806152344, "learning_rate": 9.991425387970301e-06, "loss": 16.9762, "step": 58390 }, { "epoch": 0.11797169487348344, "grad_norm": 351.16082763671875, "learning_rate": 9.99140494157436e-06, "loss": 20.5696, "step": 58400 }, { "epoch": 0.11799189550616725, "grad_norm": 512.7572631835938, "learning_rate": 9.991384470850918e-06, "loss": 29.0036, "step": 58410 }, { "epoch": 0.11801209613885107, "grad_norm": 1209.859375, "learning_rate": 9.991363975800073e-06, "loss": 38.2597, "step": 58420 }, { "epoch": 0.11803229677153489, "grad_norm": 84.26258087158203, "learning_rate": 9.991343456421923e-06, "loss": 22.3503, "step": 58430 }, { "epoch": 0.1180524974042187, "grad_norm": 389.1960144042969, "learning_rate": 9.991322912716572e-06, "loss": 24.8009, "step": 58440 }, { "epoch": 0.11807269803690251, "grad_norm": 231.02340698242188, "learning_rate": 9.99130234468412e-06, "loss": 31.0219, "step": 58450 }, { "epoch": 0.11809289866958633, "grad_norm": 334.5686950683594, "learning_rate": 9.991281752324664e-06, "loss": 36.9654, "step": 58460 }, { "epoch": 0.11811309930227015, "grad_norm": 690.6244506835938, "learning_rate": 9.991261135638307e-06, "loss": 33.6558, "step": 58470 }, { "epoch": 0.11813329993495396, "grad_norm": 1505.632568359375, "learning_rate": 9.991240494625147e-06, "loss": 31.5277, "step": 58480 }, { "epoch": 0.11815350056763778, "grad_norm": 481.1512145996094, "learning_rate": 9.991219829285287e-06, "loss": 25.099, "step": 58490 }, { "epoch": 0.1181737012003216, "grad_norm": 308.86309814453125, "learning_rate": 9.991199139618828e-06, "loss": 27.3802, "step": 58500 }, { "epoch": 0.1181939018330054, "grad_norm": 517.23974609375, "learning_rate": 9.991178425625869e-06, "loss": 20.0147, "step": 58510 }, { "epoch": 0.11821410246568922, "grad_norm": 603.149169921875, "learning_rate": 9.99115768730651e-06, "loss": 18.7056, "step": 58520 }, { "epoch": 0.11823430309837304, "grad_norm": 542.2094116210938, "learning_rate": 9.991136924660856e-06, "loss": 25.3252, "step": 58530 }, { "epoch": 0.11825450373105685, "grad_norm": 313.8913879394531, "learning_rate": 9.991116137689006e-06, "loss": 25.2412, "step": 58540 }, { "epoch": 0.11827470436374067, "grad_norm": 169.6969451904297, "learning_rate": 9.991095326391061e-06, "loss": 25.847, "step": 58550 }, { "epoch": 0.11829490499642449, "grad_norm": 474.6405944824219, "learning_rate": 9.99107449076712e-06, "loss": 36.8564, "step": 58560 }, { "epoch": 0.1183151056291083, "grad_norm": 104.40969848632812, "learning_rate": 9.99105363081729e-06, "loss": 31.2089, "step": 58570 }, { "epoch": 0.11833530626179212, "grad_norm": 732.682373046875, "learning_rate": 9.99103274654167e-06, "loss": 38.1569, "step": 58580 }, { "epoch": 0.11835550689447594, "grad_norm": 895.7142944335938, "learning_rate": 9.99101183794036e-06, "loss": 28.7475, "step": 58590 }, { "epoch": 0.11837570752715976, "grad_norm": 870.6466064453125, "learning_rate": 9.990990905013466e-06, "loss": 38.2412, "step": 58600 }, { "epoch": 0.11839590815984356, "grad_norm": 542.204345703125, "learning_rate": 9.990969947761087e-06, "loss": 44.1003, "step": 58610 }, { "epoch": 0.11841610879252738, "grad_norm": 331.6095886230469, "learning_rate": 9.990948966183324e-06, "loss": 17.5997, "step": 58620 }, { "epoch": 0.1184363094252112, "grad_norm": 720.2861328125, "learning_rate": 9.990927960280283e-06, "loss": 18.8987, "step": 58630 }, { "epoch": 0.11845651005789501, "grad_norm": 239.30694580078125, "learning_rate": 9.990906930052065e-06, "loss": 26.3439, "step": 58640 }, { "epoch": 0.11847671069057883, "grad_norm": 426.04010009765625, "learning_rate": 9.99088587549877e-06, "loss": 19.9495, "step": 58650 }, { "epoch": 0.11849691132326265, "grad_norm": 659.5116577148438, "learning_rate": 9.990864796620503e-06, "loss": 34.0379, "step": 58660 }, { "epoch": 0.11851711195594646, "grad_norm": 318.29913330078125, "learning_rate": 9.990843693417366e-06, "loss": 33.4272, "step": 58670 }, { "epoch": 0.11853731258863028, "grad_norm": 1562.17578125, "learning_rate": 9.990822565889464e-06, "loss": 39.8931, "step": 58680 }, { "epoch": 0.1185575132213141, "grad_norm": 414.65179443359375, "learning_rate": 9.990801414036896e-06, "loss": 27.6652, "step": 58690 }, { "epoch": 0.1185777138539979, "grad_norm": 619.0970458984375, "learning_rate": 9.99078023785977e-06, "loss": 34.0049, "step": 58700 }, { "epoch": 0.11859791448668172, "grad_norm": 70.22889709472656, "learning_rate": 9.990759037358184e-06, "loss": 21.8346, "step": 58710 }, { "epoch": 0.11861811511936554, "grad_norm": 615.2586669921875, "learning_rate": 9.990737812532245e-06, "loss": 38.4461, "step": 58720 }, { "epoch": 0.11863831575204935, "grad_norm": 209.21034240722656, "learning_rate": 9.990716563382055e-06, "loss": 26.0752, "step": 58730 }, { "epoch": 0.11865851638473317, "grad_norm": 538.8511352539062, "learning_rate": 9.990695289907716e-06, "loss": 23.6841, "step": 58740 }, { "epoch": 0.11867871701741699, "grad_norm": 438.6738586425781, "learning_rate": 9.990673992109335e-06, "loss": 20.516, "step": 58750 }, { "epoch": 0.11869891765010081, "grad_norm": 688.6759033203125, "learning_rate": 9.990652669987016e-06, "loss": 29.0761, "step": 58760 }, { "epoch": 0.11871911828278461, "grad_norm": 660.4488525390625, "learning_rate": 9.990631323540858e-06, "loss": 38.8158, "step": 58770 }, { "epoch": 0.11873931891546843, "grad_norm": 245.68350219726562, "learning_rate": 9.990609952770969e-06, "loss": 26.0449, "step": 58780 }, { "epoch": 0.11875951954815225, "grad_norm": 518.3926391601562, "learning_rate": 9.990588557677454e-06, "loss": 21.3319, "step": 58790 }, { "epoch": 0.11877972018083606, "grad_norm": 302.8959655761719, "learning_rate": 9.990567138260414e-06, "loss": 25.8163, "step": 58800 }, { "epoch": 0.11879992081351988, "grad_norm": 538.0042724609375, "learning_rate": 9.990545694519956e-06, "loss": 20.2308, "step": 58810 }, { "epoch": 0.1188201214462037, "grad_norm": 324.6055603027344, "learning_rate": 9.990524226456182e-06, "loss": 17.885, "step": 58820 }, { "epoch": 0.1188403220788875, "grad_norm": 277.4670715332031, "learning_rate": 9.9905027340692e-06, "loss": 21.5367, "step": 58830 }, { "epoch": 0.11886052271157133, "grad_norm": 645.9742431640625, "learning_rate": 9.990481217359112e-06, "loss": 28.8911, "step": 58840 }, { "epoch": 0.11888072334425515, "grad_norm": 592.8202514648438, "learning_rate": 9.990459676326025e-06, "loss": 36.6906, "step": 58850 }, { "epoch": 0.11890092397693895, "grad_norm": 239.2350616455078, "learning_rate": 9.990438110970043e-06, "loss": 34.2834, "step": 58860 }, { "epoch": 0.11892112460962277, "grad_norm": 448.14385986328125, "learning_rate": 9.990416521291268e-06, "loss": 30.5244, "step": 58870 }, { "epoch": 0.11894132524230659, "grad_norm": 569.7626953125, "learning_rate": 9.990394907289811e-06, "loss": 17.9717, "step": 58880 }, { "epoch": 0.1189615258749904, "grad_norm": 212.79605102539062, "learning_rate": 9.990373268965773e-06, "loss": 24.8917, "step": 58890 }, { "epoch": 0.11898172650767422, "grad_norm": 324.95703125, "learning_rate": 9.990351606319261e-06, "loss": 23.7205, "step": 58900 }, { "epoch": 0.11900192714035804, "grad_norm": 168.9674530029297, "learning_rate": 9.990329919350382e-06, "loss": 26.6645, "step": 58910 }, { "epoch": 0.11902212777304186, "grad_norm": 445.2579040527344, "learning_rate": 9.990308208059239e-06, "loss": 29.4624, "step": 58920 }, { "epoch": 0.11904232840572566, "grad_norm": 312.6114501953125, "learning_rate": 9.990286472445938e-06, "loss": 33.1878, "step": 58930 }, { "epoch": 0.11906252903840948, "grad_norm": 316.3656311035156, "learning_rate": 9.990264712510586e-06, "loss": 20.1057, "step": 58940 }, { "epoch": 0.1190827296710933, "grad_norm": 761.0764770507812, "learning_rate": 9.990242928253291e-06, "loss": 26.4023, "step": 58950 }, { "epoch": 0.11910293030377711, "grad_norm": 697.7257690429688, "learning_rate": 9.990221119674157e-06, "loss": 26.1063, "step": 58960 }, { "epoch": 0.11912313093646093, "grad_norm": 316.9278259277344, "learning_rate": 9.99019928677329e-06, "loss": 30.1808, "step": 58970 }, { "epoch": 0.11914333156914475, "grad_norm": 1050.027099609375, "learning_rate": 9.990177429550797e-06, "loss": 39.0877, "step": 58980 }, { "epoch": 0.11916353220182856, "grad_norm": 1752.5679931640625, "learning_rate": 9.990155548006783e-06, "loss": 46.105, "step": 58990 }, { "epoch": 0.11918373283451238, "grad_norm": 439.0074768066406, "learning_rate": 9.990133642141359e-06, "loss": 20.7704, "step": 59000 }, { "epoch": 0.1192039334671962, "grad_norm": 903.7244873046875, "learning_rate": 9.990111711954626e-06, "loss": 26.862, "step": 59010 }, { "epoch": 0.11922413409988, "grad_norm": 458.92303466796875, "learning_rate": 9.990089757446697e-06, "loss": 27.5504, "step": 59020 }, { "epoch": 0.11924433473256382, "grad_norm": 698.0676879882812, "learning_rate": 9.990067778617672e-06, "loss": 25.8991, "step": 59030 }, { "epoch": 0.11926453536524764, "grad_norm": 370.335693359375, "learning_rate": 9.990045775467664e-06, "loss": 17.0434, "step": 59040 }, { "epoch": 0.11928473599793145, "grad_norm": 358.4537658691406, "learning_rate": 9.990023747996778e-06, "loss": 41.7143, "step": 59050 }, { "epoch": 0.11930493663061527, "grad_norm": 152.5941619873047, "learning_rate": 9.990001696205121e-06, "loss": 28.2509, "step": 59060 }, { "epoch": 0.11932513726329909, "grad_norm": 308.4674987792969, "learning_rate": 9.989979620092802e-06, "loss": 41.3298, "step": 59070 }, { "epoch": 0.11934533789598291, "grad_norm": 1057.538818359375, "learning_rate": 9.989957519659926e-06, "loss": 21.2202, "step": 59080 }, { "epoch": 0.11936553852866671, "grad_norm": 96.51363372802734, "learning_rate": 9.989935394906602e-06, "loss": 15.6763, "step": 59090 }, { "epoch": 0.11938573916135053, "grad_norm": 492.85235595703125, "learning_rate": 9.98991324583294e-06, "loss": 25.7886, "step": 59100 }, { "epoch": 0.11940593979403435, "grad_norm": 351.7673645019531, "learning_rate": 9.989891072439045e-06, "loss": 16.7681, "step": 59110 }, { "epoch": 0.11942614042671816, "grad_norm": 160.32521057128906, "learning_rate": 9.989868874725026e-06, "loss": 27.9819, "step": 59120 }, { "epoch": 0.11944634105940198, "grad_norm": 373.262939453125, "learning_rate": 9.989846652690992e-06, "loss": 38.1606, "step": 59130 }, { "epoch": 0.1194665416920858, "grad_norm": 405.1363220214844, "learning_rate": 9.989824406337049e-06, "loss": 16.6895, "step": 59140 }, { "epoch": 0.11948674232476961, "grad_norm": 0.0, "learning_rate": 9.989802135663308e-06, "loss": 25.7655, "step": 59150 }, { "epoch": 0.11950694295745343, "grad_norm": 384.34661865234375, "learning_rate": 9.989779840669878e-06, "loss": 16.8252, "step": 59160 }, { "epoch": 0.11952714359013725, "grad_norm": 223.25277709960938, "learning_rate": 9.989757521356864e-06, "loss": 29.3963, "step": 59170 }, { "epoch": 0.11954734422282105, "grad_norm": 473.12060546875, "learning_rate": 9.989735177724378e-06, "loss": 27.4276, "step": 59180 }, { "epoch": 0.11956754485550487, "grad_norm": 427.2732238769531, "learning_rate": 9.989712809772528e-06, "loss": 12.1941, "step": 59190 }, { "epoch": 0.1195877454881887, "grad_norm": 679.8904418945312, "learning_rate": 9.989690417501423e-06, "loss": 21.767, "step": 59200 }, { "epoch": 0.1196079461208725, "grad_norm": 848.4871215820312, "learning_rate": 9.989668000911173e-06, "loss": 37.814, "step": 59210 }, { "epoch": 0.11962814675355632, "grad_norm": 1310.9892578125, "learning_rate": 9.989645560001884e-06, "loss": 34.3109, "step": 59220 }, { "epoch": 0.11964834738624014, "grad_norm": 901.0699462890625, "learning_rate": 9.989623094773669e-06, "loss": 40.5445, "step": 59230 }, { "epoch": 0.11966854801892396, "grad_norm": 418.48486328125, "learning_rate": 9.989600605226637e-06, "loss": 13.6698, "step": 59240 }, { "epoch": 0.11968874865160777, "grad_norm": 536.8137817382812, "learning_rate": 9.989578091360896e-06, "loss": 27.8379, "step": 59250 }, { "epoch": 0.11970894928429159, "grad_norm": 376.47186279296875, "learning_rate": 9.989555553176556e-06, "loss": 27.6799, "step": 59260 }, { "epoch": 0.1197291499169754, "grad_norm": 207.98965454101562, "learning_rate": 9.989532990673729e-06, "loss": 19.1201, "step": 59270 }, { "epoch": 0.11974935054965921, "grad_norm": 511.27935791015625, "learning_rate": 9.989510403852521e-06, "loss": 16.5542, "step": 59280 }, { "epoch": 0.11976955118234303, "grad_norm": 453.9069519042969, "learning_rate": 9.989487792713045e-06, "loss": 34.6967, "step": 59290 }, { "epoch": 0.11978975181502685, "grad_norm": 347.8300476074219, "learning_rate": 9.989465157255413e-06, "loss": 56.5665, "step": 59300 }, { "epoch": 0.11980995244771066, "grad_norm": 910.8221435546875, "learning_rate": 9.98944249747973e-06, "loss": 23.1538, "step": 59310 }, { "epoch": 0.11983015308039448, "grad_norm": 453.1932067871094, "learning_rate": 9.989419813386112e-06, "loss": 14.0581, "step": 59320 }, { "epoch": 0.1198503537130783, "grad_norm": 909.9956665039062, "learning_rate": 9.989397104974665e-06, "loss": 41.8085, "step": 59330 }, { "epoch": 0.1198705543457621, "grad_norm": 358.1769714355469, "learning_rate": 9.989374372245503e-06, "loss": 17.7476, "step": 59340 }, { "epoch": 0.11989075497844592, "grad_norm": 4.939453601837158, "learning_rate": 9.989351615198734e-06, "loss": 22.7722, "step": 59350 }, { "epoch": 0.11991095561112974, "grad_norm": 536.285400390625, "learning_rate": 9.989328833834472e-06, "loss": 35.5165, "step": 59360 }, { "epoch": 0.11993115624381355, "grad_norm": 133.91326904296875, "learning_rate": 9.989306028152825e-06, "loss": 30.8306, "step": 59370 }, { "epoch": 0.11995135687649737, "grad_norm": 473.4412536621094, "learning_rate": 9.989283198153908e-06, "loss": 26.01, "step": 59380 }, { "epoch": 0.11997155750918119, "grad_norm": 402.10003662109375, "learning_rate": 9.989260343837827e-06, "loss": 35.8979, "step": 59390 }, { "epoch": 0.11999175814186501, "grad_norm": 619.9006958007812, "learning_rate": 9.989237465204698e-06, "loss": 24.5981, "step": 59400 }, { "epoch": 0.12001195877454882, "grad_norm": 230.40646362304688, "learning_rate": 9.989214562254628e-06, "loss": 20.0228, "step": 59410 }, { "epoch": 0.12003215940723264, "grad_norm": 986.6119995117188, "learning_rate": 9.989191634987734e-06, "loss": 45.3377, "step": 59420 }, { "epoch": 0.12005236003991646, "grad_norm": 708.2041015625, "learning_rate": 9.989168683404125e-06, "loss": 31.9151, "step": 59430 }, { "epoch": 0.12007256067260026, "grad_norm": 770.5503540039062, "learning_rate": 9.98914570750391e-06, "loss": 31.5929, "step": 59440 }, { "epoch": 0.12009276130528408, "grad_norm": 893.4546508789062, "learning_rate": 9.98912270728721e-06, "loss": 27.7154, "step": 59450 }, { "epoch": 0.1201129619379679, "grad_norm": 554.7857666015625, "learning_rate": 9.989099682754125e-06, "loss": 31.6928, "step": 59460 }, { "epoch": 0.12013316257065171, "grad_norm": 255.66104125976562, "learning_rate": 9.989076633904775e-06, "loss": 21.1733, "step": 59470 }, { "epoch": 0.12015336320333553, "grad_norm": 572.1908569335938, "learning_rate": 9.989053560739272e-06, "loss": 30.5591, "step": 59480 }, { "epoch": 0.12017356383601935, "grad_norm": 202.6723175048828, "learning_rate": 9.989030463257726e-06, "loss": 23.6414, "step": 59490 }, { "epoch": 0.12019376446870315, "grad_norm": 424.7135009765625, "learning_rate": 9.989007341460251e-06, "loss": 19.5265, "step": 59500 }, { "epoch": 0.12021396510138697, "grad_norm": 145.0359344482422, "learning_rate": 9.98898419534696e-06, "loss": 33.8042, "step": 59510 }, { "epoch": 0.1202341657340708, "grad_norm": 337.57708740234375, "learning_rate": 9.988961024917963e-06, "loss": 19.0491, "step": 59520 }, { "epoch": 0.1202543663667546, "grad_norm": 806.2337036132812, "learning_rate": 9.988937830173376e-06, "loss": 25.364, "step": 59530 }, { "epoch": 0.12027456699943842, "grad_norm": 343.5975036621094, "learning_rate": 9.988914611113311e-06, "loss": 34.3596, "step": 59540 }, { "epoch": 0.12029476763212224, "grad_norm": 233.7205810546875, "learning_rate": 9.988891367737882e-06, "loss": 23.9101, "step": 59550 }, { "epoch": 0.12031496826480606, "grad_norm": 336.2087707519531, "learning_rate": 9.988868100047203e-06, "loss": 27.152, "step": 59560 }, { "epoch": 0.12033516889748987, "grad_norm": 626.9401245117188, "learning_rate": 9.988844808041382e-06, "loss": 24.9398, "step": 59570 }, { "epoch": 0.12035536953017369, "grad_norm": 83.66886901855469, "learning_rate": 9.98882149172054e-06, "loss": 15.7853, "step": 59580 }, { "epoch": 0.1203755701628575, "grad_norm": 285.5963439941406, "learning_rate": 9.988798151084783e-06, "loss": 20.7018, "step": 59590 }, { "epoch": 0.12039577079554131, "grad_norm": 113.7260971069336, "learning_rate": 9.988774786134235e-06, "loss": 27.5119, "step": 59600 }, { "epoch": 0.12041597142822513, "grad_norm": 674.3125, "learning_rate": 9.988751396869e-06, "loss": 21.0328, "step": 59610 }, { "epoch": 0.12043617206090895, "grad_norm": 297.72686767578125, "learning_rate": 9.988727983289195e-06, "loss": 15.3047, "step": 59620 }, { "epoch": 0.12045637269359276, "grad_norm": 1073.702880859375, "learning_rate": 9.988704545394936e-06, "loss": 36.0673, "step": 59630 }, { "epoch": 0.12047657332627658, "grad_norm": 434.8298034667969, "learning_rate": 9.988681083186336e-06, "loss": 22.7055, "step": 59640 }, { "epoch": 0.1204967739589604, "grad_norm": 218.23529052734375, "learning_rate": 9.988657596663509e-06, "loss": 30.0182, "step": 59650 }, { "epoch": 0.1205169745916442, "grad_norm": 245.4269561767578, "learning_rate": 9.988634085826571e-06, "loss": 22.8707, "step": 59660 }, { "epoch": 0.12053717522432802, "grad_norm": 857.5812377929688, "learning_rate": 9.988610550675635e-06, "loss": 22.2752, "step": 59670 }, { "epoch": 0.12055737585701184, "grad_norm": 861.4116821289062, "learning_rate": 9.988586991210816e-06, "loss": 26.9759, "step": 59680 }, { "epoch": 0.12057757648969565, "grad_norm": 266.3482666015625, "learning_rate": 9.98856340743223e-06, "loss": 22.2715, "step": 59690 }, { "epoch": 0.12059777712237947, "grad_norm": 893.9590454101562, "learning_rate": 9.988539799339989e-06, "loss": 19.9834, "step": 59700 }, { "epoch": 0.12061797775506329, "grad_norm": 336.1539306640625, "learning_rate": 9.988516166934212e-06, "loss": 21.4351, "step": 59710 }, { "epoch": 0.12063817838774711, "grad_norm": 277.47705078125, "learning_rate": 9.988492510215011e-06, "loss": 12.952, "step": 59720 }, { "epoch": 0.12065837902043092, "grad_norm": 474.82122802734375, "learning_rate": 9.988468829182504e-06, "loss": 17.7442, "step": 59730 }, { "epoch": 0.12067857965311474, "grad_norm": 399.83392333984375, "learning_rate": 9.988445123836804e-06, "loss": 24.6492, "step": 59740 }, { "epoch": 0.12069878028579856, "grad_norm": 361.17340087890625, "learning_rate": 9.988421394178027e-06, "loss": 23.9565, "step": 59750 }, { "epoch": 0.12071898091848236, "grad_norm": 385.287109375, "learning_rate": 9.98839764020629e-06, "loss": 19.6916, "step": 59760 }, { "epoch": 0.12073918155116618, "grad_norm": 229.69644165039062, "learning_rate": 9.988373861921708e-06, "loss": 18.5205, "step": 59770 }, { "epoch": 0.12075938218385, "grad_norm": 1031.325927734375, "learning_rate": 9.988350059324396e-06, "loss": 39.4992, "step": 59780 }, { "epoch": 0.12077958281653381, "grad_norm": 408.28875732421875, "learning_rate": 9.988326232414472e-06, "loss": 32.7715, "step": 59790 }, { "epoch": 0.12079978344921763, "grad_norm": 407.96453857421875, "learning_rate": 9.98830238119205e-06, "loss": 18.9229, "step": 59800 }, { "epoch": 0.12081998408190145, "grad_norm": 478.8912048339844, "learning_rate": 9.988278505657247e-06, "loss": 35.969, "step": 59810 }, { "epoch": 0.12084018471458526, "grad_norm": 551.2542114257812, "learning_rate": 9.98825460581018e-06, "loss": 30.9293, "step": 59820 }, { "epoch": 0.12086038534726908, "grad_norm": 959.1485595703125, "learning_rate": 9.988230681650964e-06, "loss": 41.9438, "step": 59830 }, { "epoch": 0.1208805859799529, "grad_norm": 390.9014587402344, "learning_rate": 9.988206733179718e-06, "loss": 20.6948, "step": 59840 }, { "epoch": 0.1209007866126367, "grad_norm": 562.6345825195312, "learning_rate": 9.988182760396557e-06, "loss": 71.4276, "step": 59850 }, { "epoch": 0.12092098724532052, "grad_norm": 431.55218505859375, "learning_rate": 9.988158763301598e-06, "loss": 31.5547, "step": 59860 }, { "epoch": 0.12094118787800434, "grad_norm": 429.47686767578125, "learning_rate": 9.988134741894959e-06, "loss": 25.3229, "step": 59870 }, { "epoch": 0.12096138851068816, "grad_norm": 431.9924011230469, "learning_rate": 9.988110696176756e-06, "loss": 27.9693, "step": 59880 }, { "epoch": 0.12098158914337197, "grad_norm": 1132.8892822265625, "learning_rate": 9.988086626147107e-06, "loss": 50.1092, "step": 59890 }, { "epoch": 0.12100178977605579, "grad_norm": 326.2806396484375, "learning_rate": 9.988062531806127e-06, "loss": 24.4289, "step": 59900 }, { "epoch": 0.12102199040873961, "grad_norm": 238.85772705078125, "learning_rate": 9.988038413153936e-06, "loss": 34.2657, "step": 59910 }, { "epoch": 0.12104219104142341, "grad_norm": 1077.6678466796875, "learning_rate": 9.988014270190652e-06, "loss": 31.9235, "step": 59920 }, { "epoch": 0.12106239167410723, "grad_norm": 399.2193298339844, "learning_rate": 9.98799010291639e-06, "loss": 32.658, "step": 59930 }, { "epoch": 0.12108259230679105, "grad_norm": 67.97364044189453, "learning_rate": 9.987965911331268e-06, "loss": 22.4463, "step": 59940 }, { "epoch": 0.12110279293947486, "grad_norm": 682.7759399414062, "learning_rate": 9.987941695435409e-06, "loss": 25.9808, "step": 59950 }, { "epoch": 0.12112299357215868, "grad_norm": 673.4269409179688, "learning_rate": 9.987917455228924e-06, "loss": 26.4, "step": 59960 }, { "epoch": 0.1211431942048425, "grad_norm": 634.286865234375, "learning_rate": 9.987893190711935e-06, "loss": 13.4664, "step": 59970 }, { "epoch": 0.1211633948375263, "grad_norm": 449.27093505859375, "learning_rate": 9.987868901884558e-06, "loss": 31.3343, "step": 59980 }, { "epoch": 0.12118359547021013, "grad_norm": 37.370670318603516, "learning_rate": 9.987844588746916e-06, "loss": 27.4201, "step": 59990 }, { "epoch": 0.12120379610289395, "grad_norm": 800.6913452148438, "learning_rate": 9.987820251299121e-06, "loss": 45.0441, "step": 60000 }, { "epoch": 0.12122399673557775, "grad_norm": 472.5890197753906, "learning_rate": 9.987795889541298e-06, "loss": 19.9383, "step": 60010 }, { "epoch": 0.12124419736826157, "grad_norm": 215.47775268554688, "learning_rate": 9.987771503473562e-06, "loss": 25.8273, "step": 60020 }, { "epoch": 0.12126439800094539, "grad_norm": 548.9321899414062, "learning_rate": 9.987747093096032e-06, "loss": 32.3031, "step": 60030 }, { "epoch": 0.12128459863362921, "grad_norm": 481.9915771484375, "learning_rate": 9.987722658408828e-06, "loss": 30.0601, "step": 60040 }, { "epoch": 0.12130479926631302, "grad_norm": 268.7188415527344, "learning_rate": 9.98769819941207e-06, "loss": 17.8035, "step": 60050 }, { "epoch": 0.12132499989899684, "grad_norm": 696.6322021484375, "learning_rate": 9.987673716105874e-06, "loss": 40.5163, "step": 60060 }, { "epoch": 0.12134520053168066, "grad_norm": 440.85699462890625, "learning_rate": 9.987649208490361e-06, "loss": 27.2829, "step": 60070 }, { "epoch": 0.12136540116436446, "grad_norm": 438.387451171875, "learning_rate": 9.987624676565652e-06, "loss": 24.1422, "step": 60080 }, { "epoch": 0.12138560179704828, "grad_norm": 432.260009765625, "learning_rate": 9.987600120331864e-06, "loss": 33.1364, "step": 60090 }, { "epoch": 0.1214058024297321, "grad_norm": 470.2272033691406, "learning_rate": 9.987575539789119e-06, "loss": 32.1774, "step": 60100 }, { "epoch": 0.12142600306241591, "grad_norm": 510.7843322753906, "learning_rate": 9.987550934937536e-06, "loss": 22.3902, "step": 60110 }, { "epoch": 0.12144620369509973, "grad_norm": 294.6884765625, "learning_rate": 9.987526305777234e-06, "loss": 45.5376, "step": 60120 }, { "epoch": 0.12146640432778355, "grad_norm": 465.04290771484375, "learning_rate": 9.987501652308333e-06, "loss": 23.2083, "step": 60130 }, { "epoch": 0.12148660496046736, "grad_norm": 308.0381774902344, "learning_rate": 9.987476974530957e-06, "loss": 37.3375, "step": 60140 }, { "epoch": 0.12150680559315118, "grad_norm": 290.26446533203125, "learning_rate": 9.98745227244522e-06, "loss": 24.6941, "step": 60150 }, { "epoch": 0.121527006225835, "grad_norm": 9.64521598815918, "learning_rate": 9.987427546051246e-06, "loss": 46.0543, "step": 60160 }, { "epoch": 0.1215472068585188, "grad_norm": 141.52389526367188, "learning_rate": 9.987402795349154e-06, "loss": 18.1899, "step": 60170 }, { "epoch": 0.12156740749120262, "grad_norm": 1335.341552734375, "learning_rate": 9.987378020339069e-06, "loss": 33.4883, "step": 60180 }, { "epoch": 0.12158760812388644, "grad_norm": 771.3448486328125, "learning_rate": 9.987353221021106e-06, "loss": 21.3794, "step": 60190 }, { "epoch": 0.12160780875657026, "grad_norm": 264.4566345214844, "learning_rate": 9.987328397395389e-06, "loss": 15.827, "step": 60200 }, { "epoch": 0.12162800938925407, "grad_norm": 343.1784362792969, "learning_rate": 9.987303549462038e-06, "loss": 26.9193, "step": 60210 }, { "epoch": 0.12164821002193789, "grad_norm": 879.5808715820312, "learning_rate": 9.987278677221174e-06, "loss": 19.3837, "step": 60220 }, { "epoch": 0.12166841065462171, "grad_norm": 271.2762451171875, "learning_rate": 9.987253780672918e-06, "loss": 25.6372, "step": 60230 }, { "epoch": 0.12168861128730551, "grad_norm": 431.32720947265625, "learning_rate": 9.987228859817395e-06, "loss": 38.1591, "step": 60240 }, { "epoch": 0.12170881191998933, "grad_norm": 414.6575012207031, "learning_rate": 9.987203914654721e-06, "loss": 24.6773, "step": 60250 }, { "epoch": 0.12172901255267315, "grad_norm": 0.0, "learning_rate": 9.987178945185019e-06, "loss": 18.7729, "step": 60260 }, { "epoch": 0.12174921318535696, "grad_norm": 275.6715393066406, "learning_rate": 9.987153951408414e-06, "loss": 26.2396, "step": 60270 }, { "epoch": 0.12176941381804078, "grad_norm": 370.8847961425781, "learning_rate": 9.987128933325025e-06, "loss": 20.9022, "step": 60280 }, { "epoch": 0.1217896144507246, "grad_norm": 157.69837951660156, "learning_rate": 9.987103890934974e-06, "loss": 25.6948, "step": 60290 }, { "epoch": 0.12180981508340841, "grad_norm": 628.7579956054688, "learning_rate": 9.987078824238384e-06, "loss": 28.3776, "step": 60300 }, { "epoch": 0.12183001571609223, "grad_norm": 1117.4095458984375, "learning_rate": 9.987053733235376e-06, "loss": 37.893, "step": 60310 }, { "epoch": 0.12185021634877605, "grad_norm": 498.21563720703125, "learning_rate": 9.987028617926074e-06, "loss": 34.8712, "step": 60320 }, { "epoch": 0.12187041698145985, "grad_norm": 13.446024894714355, "learning_rate": 9.987003478310597e-06, "loss": 36.502, "step": 60330 }, { "epoch": 0.12189061761414367, "grad_norm": 266.8991394042969, "learning_rate": 9.986978314389071e-06, "loss": 22.1401, "step": 60340 }, { "epoch": 0.1219108182468275, "grad_norm": 373.22625732421875, "learning_rate": 9.98695312616162e-06, "loss": 20.9709, "step": 60350 }, { "epoch": 0.12193101887951131, "grad_norm": 419.4221496582031, "learning_rate": 9.986927913628361e-06, "loss": 27.2048, "step": 60360 }, { "epoch": 0.12195121951219512, "grad_norm": 540.9066772460938, "learning_rate": 9.986902676789421e-06, "loss": 22.5147, "step": 60370 }, { "epoch": 0.12197142014487894, "grad_norm": 384.9314880371094, "learning_rate": 9.986877415644925e-06, "loss": 38.0179, "step": 60380 }, { "epoch": 0.12199162077756276, "grad_norm": 708.5804443359375, "learning_rate": 9.98685213019499e-06, "loss": 21.5257, "step": 60390 }, { "epoch": 0.12201182141024657, "grad_norm": 278.7950439453125, "learning_rate": 9.986826820439743e-06, "loss": 20.9915, "step": 60400 }, { "epoch": 0.12203202204293039, "grad_norm": 258.7886962890625, "learning_rate": 9.986801486379307e-06, "loss": 22.9166, "step": 60410 }, { "epoch": 0.1220522226756142, "grad_norm": 470.2723083496094, "learning_rate": 9.986776128013807e-06, "loss": 18.895, "step": 60420 }, { "epoch": 0.12207242330829801, "grad_norm": 332.79156494140625, "learning_rate": 9.986750745343363e-06, "loss": 22.5629, "step": 60430 }, { "epoch": 0.12209262394098183, "grad_norm": 585.84814453125, "learning_rate": 9.986725338368103e-06, "loss": 21.4241, "step": 60440 }, { "epoch": 0.12211282457366565, "grad_norm": 286.4283752441406, "learning_rate": 9.986699907088147e-06, "loss": 16.1121, "step": 60450 }, { "epoch": 0.12213302520634946, "grad_norm": 5481.30712890625, "learning_rate": 9.986674451503619e-06, "loss": 51.051, "step": 60460 }, { "epoch": 0.12215322583903328, "grad_norm": 128.4597625732422, "learning_rate": 9.986648971614646e-06, "loss": 22.5577, "step": 60470 }, { "epoch": 0.1221734264717171, "grad_norm": 968.1907348632812, "learning_rate": 9.98662346742135e-06, "loss": 17.0717, "step": 60480 }, { "epoch": 0.1221936271044009, "grad_norm": 1540.86474609375, "learning_rate": 9.986597938923859e-06, "loss": 22.1102, "step": 60490 }, { "epoch": 0.12221382773708472, "grad_norm": 95.4626693725586, "learning_rate": 9.98657238612229e-06, "loss": 12.8485, "step": 60500 }, { "epoch": 0.12223402836976854, "grad_norm": 428.83221435546875, "learning_rate": 9.986546809016775e-06, "loss": 28.4717, "step": 60510 }, { "epoch": 0.12225422900245236, "grad_norm": 233.34690856933594, "learning_rate": 9.986521207607436e-06, "loss": 29.036, "step": 60520 }, { "epoch": 0.12227442963513617, "grad_norm": 314.431396484375, "learning_rate": 9.986495581894396e-06, "loss": 17.1531, "step": 60530 }, { "epoch": 0.12229463026781999, "grad_norm": 357.11761474609375, "learning_rate": 9.986469931877781e-06, "loss": 14.6935, "step": 60540 }, { "epoch": 0.12231483090050381, "grad_norm": 377.8204650878906, "learning_rate": 9.986444257557717e-06, "loss": 16.1743, "step": 60550 }, { "epoch": 0.12233503153318762, "grad_norm": 251.5559539794922, "learning_rate": 9.986418558934329e-06, "loss": 18.553, "step": 60560 }, { "epoch": 0.12235523216587144, "grad_norm": 278.3482360839844, "learning_rate": 9.98639283600774e-06, "loss": 26.0725, "step": 60570 }, { "epoch": 0.12237543279855526, "grad_norm": 127.29243469238281, "learning_rate": 9.98636708877808e-06, "loss": 15.9861, "step": 60580 }, { "epoch": 0.12239563343123906, "grad_norm": 461.56890869140625, "learning_rate": 9.986341317245469e-06, "loss": 21.9128, "step": 60590 }, { "epoch": 0.12241583406392288, "grad_norm": 438.2317199707031, "learning_rate": 9.986315521410035e-06, "loss": 22.8366, "step": 60600 }, { "epoch": 0.1224360346966067, "grad_norm": 911.2008056640625, "learning_rate": 9.986289701271905e-06, "loss": 41.1762, "step": 60610 }, { "epoch": 0.12245623532929051, "grad_norm": 937.158203125, "learning_rate": 9.986263856831204e-06, "loss": 39.3684, "step": 60620 }, { "epoch": 0.12247643596197433, "grad_norm": 120.9735107421875, "learning_rate": 9.986237988088059e-06, "loss": 22.2186, "step": 60630 }, { "epoch": 0.12249663659465815, "grad_norm": 405.4740295410156, "learning_rate": 9.986212095042593e-06, "loss": 27.3225, "step": 60640 }, { "epoch": 0.12251683722734195, "grad_norm": 889.656494140625, "learning_rate": 9.986186177694935e-06, "loss": 35.7576, "step": 60650 }, { "epoch": 0.12253703786002577, "grad_norm": 352.78173828125, "learning_rate": 9.986160236045207e-06, "loss": 25.7411, "step": 60660 }, { "epoch": 0.1225572384927096, "grad_norm": 382.47137451171875, "learning_rate": 9.986134270093542e-06, "loss": 33.1577, "step": 60670 }, { "epoch": 0.12257743912539341, "grad_norm": 947.0201416015625, "learning_rate": 9.986108279840063e-06, "loss": 28.6087, "step": 60680 }, { "epoch": 0.12259763975807722, "grad_norm": 1204.2515869140625, "learning_rate": 9.986082265284896e-06, "loss": 25.0962, "step": 60690 }, { "epoch": 0.12261784039076104, "grad_norm": 963.8109741210938, "learning_rate": 9.98605622642817e-06, "loss": 20.0314, "step": 60700 }, { "epoch": 0.12263804102344486, "grad_norm": 320.7643127441406, "learning_rate": 9.986030163270011e-06, "loss": 30.8012, "step": 60710 }, { "epoch": 0.12265824165612867, "grad_norm": 361.7972412109375, "learning_rate": 9.986004075810543e-06, "loss": 40.871, "step": 60720 }, { "epoch": 0.12267844228881249, "grad_norm": 666.5037231445312, "learning_rate": 9.985977964049898e-06, "loss": 38.1172, "step": 60730 }, { "epoch": 0.1226986429214963, "grad_norm": 725.2588500976562, "learning_rate": 9.9859518279882e-06, "loss": 32.248, "step": 60740 }, { "epoch": 0.12271884355418011, "grad_norm": 773.1401977539062, "learning_rate": 9.985925667625581e-06, "loss": 24.3105, "step": 60750 }, { "epoch": 0.12273904418686393, "grad_norm": 326.5883483886719, "learning_rate": 9.98589948296216e-06, "loss": 18.8559, "step": 60760 }, { "epoch": 0.12275924481954775, "grad_norm": 1008.8342895507812, "learning_rate": 9.985873273998072e-06, "loss": 28.795, "step": 60770 }, { "epoch": 0.12277944545223156, "grad_norm": 273.88427734375, "learning_rate": 9.985847040733442e-06, "loss": 20.6629, "step": 60780 }, { "epoch": 0.12279964608491538, "grad_norm": 200.18011474609375, "learning_rate": 9.9858207831684e-06, "loss": 34.8831, "step": 60790 }, { "epoch": 0.1228198467175992, "grad_norm": 386.1703796386719, "learning_rate": 9.98579450130307e-06, "loss": 23.9608, "step": 60800 }, { "epoch": 0.122840047350283, "grad_norm": 409.2703552246094, "learning_rate": 9.985768195137585e-06, "loss": 24.2317, "step": 60810 }, { "epoch": 0.12286024798296682, "grad_norm": 231.22320556640625, "learning_rate": 9.985741864672067e-06, "loss": 24.0767, "step": 60820 }, { "epoch": 0.12288044861565064, "grad_norm": 154.06040954589844, "learning_rate": 9.985715509906649e-06, "loss": 19.945, "step": 60830 }, { "epoch": 0.12290064924833445, "grad_norm": 389.05035400390625, "learning_rate": 9.985689130841459e-06, "loss": 12.9584, "step": 60840 }, { "epoch": 0.12292084988101827, "grad_norm": 656.5178833007812, "learning_rate": 9.985662727476625e-06, "loss": 27.0256, "step": 60850 }, { "epoch": 0.12294105051370209, "grad_norm": 851.6620483398438, "learning_rate": 9.985636299812275e-06, "loss": 16.2623, "step": 60860 }, { "epoch": 0.12296125114638591, "grad_norm": 497.35540771484375, "learning_rate": 9.98560984784854e-06, "loss": 31.88, "step": 60870 }, { "epoch": 0.12298145177906972, "grad_norm": 720.6923828125, "learning_rate": 9.985583371585544e-06, "loss": 22.5373, "step": 60880 }, { "epoch": 0.12300165241175354, "grad_norm": 2.283111095428467, "learning_rate": 9.98555687102342e-06, "loss": 12.7178, "step": 60890 }, { "epoch": 0.12302185304443736, "grad_norm": 215.31703186035156, "learning_rate": 9.9855303461623e-06, "loss": 15.6389, "step": 60900 }, { "epoch": 0.12304205367712116, "grad_norm": 410.4704284667969, "learning_rate": 9.985503797002307e-06, "loss": 22.4629, "step": 60910 }, { "epoch": 0.12306225430980498, "grad_norm": 411.4508972167969, "learning_rate": 9.985477223543574e-06, "loss": 25.9603, "step": 60920 }, { "epoch": 0.1230824549424888, "grad_norm": 344.9388732910156, "learning_rate": 9.985450625786228e-06, "loss": 35.1812, "step": 60930 }, { "epoch": 0.12310265557517261, "grad_norm": 679.7549438476562, "learning_rate": 9.985424003730403e-06, "loss": 21.9696, "step": 60940 }, { "epoch": 0.12312285620785643, "grad_norm": 101.80624389648438, "learning_rate": 9.985397357376224e-06, "loss": 25.8398, "step": 60950 }, { "epoch": 0.12314305684054025, "grad_norm": 431.5050354003906, "learning_rate": 9.985370686723823e-06, "loss": 35.3801, "step": 60960 }, { "epoch": 0.12316325747322406, "grad_norm": 225.48330688476562, "learning_rate": 9.985343991773331e-06, "loss": 52.8736, "step": 60970 }, { "epoch": 0.12318345810590788, "grad_norm": 608.2294921875, "learning_rate": 9.985317272524876e-06, "loss": 29.2429, "step": 60980 }, { "epoch": 0.1232036587385917, "grad_norm": 345.0340881347656, "learning_rate": 9.98529052897859e-06, "loss": 40.7269, "step": 60990 }, { "epoch": 0.1232238593712755, "grad_norm": 175.9382781982422, "learning_rate": 9.985263761134602e-06, "loss": 19.7594, "step": 61000 }, { "epoch": 0.12324406000395932, "grad_norm": 617.882080078125, "learning_rate": 9.985236968993044e-06, "loss": 27.3067, "step": 61010 }, { "epoch": 0.12326426063664314, "grad_norm": 990.5284423828125, "learning_rate": 9.985210152554045e-06, "loss": 41.4177, "step": 61020 }, { "epoch": 0.12328446126932696, "grad_norm": 390.6434631347656, "learning_rate": 9.985183311817736e-06, "loss": 29.425, "step": 61030 }, { "epoch": 0.12330466190201077, "grad_norm": 545.95703125, "learning_rate": 9.985156446784249e-06, "loss": 27.7012, "step": 61040 }, { "epoch": 0.12332486253469459, "grad_norm": 392.1247863769531, "learning_rate": 9.985129557453714e-06, "loss": 13.2131, "step": 61050 }, { "epoch": 0.12334506316737841, "grad_norm": 934.265625, "learning_rate": 9.985102643826261e-06, "loss": 29.2276, "step": 61060 }, { "epoch": 0.12336526380006221, "grad_norm": 246.337890625, "learning_rate": 9.985075705902024e-06, "loss": 33.959, "step": 61070 }, { "epoch": 0.12338546443274603, "grad_norm": 411.26983642578125, "learning_rate": 9.985048743681131e-06, "loss": 18.7968, "step": 61080 }, { "epoch": 0.12340566506542985, "grad_norm": 231.3650665283203, "learning_rate": 9.985021757163715e-06, "loss": 37.9589, "step": 61090 }, { "epoch": 0.12342586569811366, "grad_norm": 535.2852172851562, "learning_rate": 9.98499474634991e-06, "loss": 26.4339, "step": 61100 }, { "epoch": 0.12344606633079748, "grad_norm": 128.99632263183594, "learning_rate": 9.984967711239844e-06, "loss": 19.9055, "step": 61110 }, { "epoch": 0.1234662669634813, "grad_norm": 528.6619262695312, "learning_rate": 9.984940651833648e-06, "loss": 21.8287, "step": 61120 }, { "epoch": 0.1234864675961651, "grad_norm": 272.9593505859375, "learning_rate": 9.984913568131458e-06, "loss": 28.5915, "step": 61130 }, { "epoch": 0.12350666822884893, "grad_norm": 574.6172485351562, "learning_rate": 9.984886460133403e-06, "loss": 19.0841, "step": 61140 }, { "epoch": 0.12352686886153275, "grad_norm": 263.6935729980469, "learning_rate": 9.984859327839617e-06, "loss": 26.3418, "step": 61150 }, { "epoch": 0.12354706949421655, "grad_norm": 36.87807846069336, "learning_rate": 9.98483217125023e-06, "loss": 23.3879, "step": 61160 }, { "epoch": 0.12356727012690037, "grad_norm": 109.2344741821289, "learning_rate": 9.984804990365376e-06, "loss": 13.9505, "step": 61170 }, { "epoch": 0.12358747075958419, "grad_norm": 798.1170654296875, "learning_rate": 9.984777785185188e-06, "loss": 32.6037, "step": 61180 }, { "epoch": 0.12360767139226801, "grad_norm": 117.23961639404297, "learning_rate": 9.984750555709797e-06, "loss": 24.9973, "step": 61190 }, { "epoch": 0.12362787202495182, "grad_norm": 25.17947006225586, "learning_rate": 9.984723301939337e-06, "loss": 15.0977, "step": 61200 }, { "epoch": 0.12364807265763564, "grad_norm": 751.7405395507812, "learning_rate": 9.984696023873939e-06, "loss": 24.9332, "step": 61210 }, { "epoch": 0.12366827329031946, "grad_norm": 631.7305908203125, "learning_rate": 9.984668721513737e-06, "loss": 25.5583, "step": 61220 }, { "epoch": 0.12368847392300326, "grad_norm": 189.65731811523438, "learning_rate": 9.984641394858865e-06, "loss": 31.7349, "step": 61230 }, { "epoch": 0.12370867455568708, "grad_norm": 864.2401733398438, "learning_rate": 9.984614043909455e-06, "loss": 41.5382, "step": 61240 }, { "epoch": 0.1237288751883709, "grad_norm": 474.04248046875, "learning_rate": 9.984586668665641e-06, "loss": 22.3956, "step": 61250 }, { "epoch": 0.12374907582105471, "grad_norm": 389.1614685058594, "learning_rate": 9.984559269127557e-06, "loss": 37.3944, "step": 61260 }, { "epoch": 0.12376927645373853, "grad_norm": 271.9932861328125, "learning_rate": 9.984531845295333e-06, "loss": 40.0991, "step": 61270 }, { "epoch": 0.12378947708642235, "grad_norm": 236.9116668701172, "learning_rate": 9.984504397169107e-06, "loss": 30.0676, "step": 61280 }, { "epoch": 0.12380967771910616, "grad_norm": 349.3979187011719, "learning_rate": 9.984476924749011e-06, "loss": 25.6765, "step": 61290 }, { "epoch": 0.12382987835178998, "grad_norm": 796.3491821289062, "learning_rate": 9.98444942803518e-06, "loss": 30.176, "step": 61300 }, { "epoch": 0.1238500789844738, "grad_norm": 377.94488525390625, "learning_rate": 9.984421907027747e-06, "loss": 17.7391, "step": 61310 }, { "epoch": 0.1238702796171576, "grad_norm": 386.1169128417969, "learning_rate": 9.984394361726844e-06, "loss": 24.3696, "step": 61320 }, { "epoch": 0.12389048024984142, "grad_norm": 349.8980712890625, "learning_rate": 9.98436679213261e-06, "loss": 15.9381, "step": 61330 }, { "epoch": 0.12391068088252524, "grad_norm": 294.2398986816406, "learning_rate": 9.984339198245175e-06, "loss": 20.0689, "step": 61340 }, { "epoch": 0.12393088151520906, "grad_norm": 570.2796630859375, "learning_rate": 9.984311580064676e-06, "loss": 19.928, "step": 61350 }, { "epoch": 0.12395108214789287, "grad_norm": 382.2208557128906, "learning_rate": 9.984283937591246e-06, "loss": 21.401, "step": 61360 }, { "epoch": 0.12397128278057669, "grad_norm": 213.10166931152344, "learning_rate": 9.98425627082502e-06, "loss": 39.4499, "step": 61370 }, { "epoch": 0.12399148341326051, "grad_norm": 287.1283264160156, "learning_rate": 9.984228579766136e-06, "loss": 26.1791, "step": 61380 }, { "epoch": 0.12401168404594431, "grad_norm": 476.8355712890625, "learning_rate": 9.984200864414726e-06, "loss": 17.3141, "step": 61390 }, { "epoch": 0.12403188467862813, "grad_norm": 283.53021240234375, "learning_rate": 9.984173124770924e-06, "loss": 25.9638, "step": 61400 }, { "epoch": 0.12405208531131195, "grad_norm": 553.5846557617188, "learning_rate": 9.984145360834868e-06, "loss": 17.2837, "step": 61410 }, { "epoch": 0.12407228594399576, "grad_norm": 697.6364135742188, "learning_rate": 9.984117572606691e-06, "loss": 22.7716, "step": 61420 }, { "epoch": 0.12409248657667958, "grad_norm": 444.0830078125, "learning_rate": 9.984089760086531e-06, "loss": 20.1147, "step": 61430 }, { "epoch": 0.1241126872093634, "grad_norm": 849.0206298828125, "learning_rate": 9.98406192327452e-06, "loss": 21.7118, "step": 61440 }, { "epoch": 0.12413288784204721, "grad_norm": 439.1556396484375, "learning_rate": 9.984034062170796e-06, "loss": 17.6189, "step": 61450 }, { "epoch": 0.12415308847473103, "grad_norm": 125.74227142333984, "learning_rate": 9.984006176775496e-06, "loss": 10.3332, "step": 61460 }, { "epoch": 0.12417328910741485, "grad_norm": 601.2088623046875, "learning_rate": 9.983978267088753e-06, "loss": 28.9244, "step": 61470 }, { "epoch": 0.12419348974009865, "grad_norm": 554.8843994140625, "learning_rate": 9.983950333110705e-06, "loss": 35.1721, "step": 61480 }, { "epoch": 0.12421369037278247, "grad_norm": 937.4196166992188, "learning_rate": 9.983922374841488e-06, "loss": 23.9458, "step": 61490 }, { "epoch": 0.1242338910054663, "grad_norm": 1045.7427978515625, "learning_rate": 9.983894392281237e-06, "loss": 16.7227, "step": 61500 }, { "epoch": 0.12425409163815011, "grad_norm": 490.57745361328125, "learning_rate": 9.98386638543009e-06, "loss": 21.1804, "step": 61510 }, { "epoch": 0.12427429227083392, "grad_norm": 618.8065185546875, "learning_rate": 9.983838354288181e-06, "loss": 18.1042, "step": 61520 }, { "epoch": 0.12429449290351774, "grad_norm": 363.8397216796875, "learning_rate": 9.98381029885565e-06, "loss": 16.5094, "step": 61530 }, { "epoch": 0.12431469353620156, "grad_norm": 390.1875, "learning_rate": 9.983782219132631e-06, "loss": 30.3959, "step": 61540 }, { "epoch": 0.12433489416888537, "grad_norm": 624.716552734375, "learning_rate": 9.983754115119262e-06, "loss": 18.5887, "step": 61550 }, { "epoch": 0.12435509480156919, "grad_norm": 233.31919860839844, "learning_rate": 9.983725986815682e-06, "loss": 29.8712, "step": 61560 }, { "epoch": 0.124375295434253, "grad_norm": 617.6629638671875, "learning_rate": 9.983697834222024e-06, "loss": 19.6594, "step": 61570 }, { "epoch": 0.12439549606693681, "grad_norm": 1028.4312744140625, "learning_rate": 9.983669657338425e-06, "loss": 34.8874, "step": 61580 }, { "epoch": 0.12441569669962063, "grad_norm": 407.6793212890625, "learning_rate": 9.98364145616503e-06, "loss": 28.1744, "step": 61590 }, { "epoch": 0.12443589733230445, "grad_norm": 901.1602783203125, "learning_rate": 9.983613230701967e-06, "loss": 29.0466, "step": 61600 }, { "epoch": 0.12445609796498826, "grad_norm": 1.035197138786316, "learning_rate": 9.98358498094938e-06, "loss": 9.143, "step": 61610 }, { "epoch": 0.12447629859767208, "grad_norm": 651.5478515625, "learning_rate": 9.983556706907401e-06, "loss": 29.3895, "step": 61620 }, { "epoch": 0.1244964992303559, "grad_norm": 241.96258544921875, "learning_rate": 9.983528408576173e-06, "loss": 22.3464, "step": 61630 }, { "epoch": 0.1245166998630397, "grad_norm": 303.60888671875, "learning_rate": 9.983500085955833e-06, "loss": 20.8418, "step": 61640 }, { "epoch": 0.12453690049572352, "grad_norm": 122.80511474609375, "learning_rate": 9.983471739046515e-06, "loss": 24.85, "step": 61650 }, { "epoch": 0.12455710112840734, "grad_norm": 281.5797424316406, "learning_rate": 9.983443367848363e-06, "loss": 26.0335, "step": 61660 }, { "epoch": 0.12457730176109116, "grad_norm": 522.9718627929688, "learning_rate": 9.98341497236151e-06, "loss": 31.9752, "step": 61670 }, { "epoch": 0.12459750239377497, "grad_norm": 588.1405029296875, "learning_rate": 9.9833865525861e-06, "loss": 25.7646, "step": 61680 }, { "epoch": 0.12461770302645879, "grad_norm": 466.3359069824219, "learning_rate": 9.983358108522266e-06, "loss": 16.9239, "step": 61690 }, { "epoch": 0.12463790365914261, "grad_norm": 493.8813781738281, "learning_rate": 9.98332964017015e-06, "loss": 19.6411, "step": 61700 }, { "epoch": 0.12465810429182642, "grad_norm": 640.7128295898438, "learning_rate": 9.98330114752989e-06, "loss": 33.6878, "step": 61710 }, { "epoch": 0.12467830492451024, "grad_norm": 43.688926696777344, "learning_rate": 9.983272630601624e-06, "loss": 20.7413, "step": 61720 }, { "epoch": 0.12469850555719406, "grad_norm": 632.6639404296875, "learning_rate": 9.983244089385491e-06, "loss": 29.1099, "step": 61730 }, { "epoch": 0.12471870618987786, "grad_norm": 153.4970245361328, "learning_rate": 9.98321552388163e-06, "loss": 20.4657, "step": 61740 }, { "epoch": 0.12473890682256168, "grad_norm": 3450.285888671875, "learning_rate": 9.983186934090183e-06, "loss": 25.8551, "step": 61750 }, { "epoch": 0.1247591074552455, "grad_norm": 504.5535888671875, "learning_rate": 9.983158320011288e-06, "loss": 16.0905, "step": 61760 }, { "epoch": 0.12477930808792931, "grad_norm": 757.3421630859375, "learning_rate": 9.983129681645082e-06, "loss": 41.4387, "step": 61770 }, { "epoch": 0.12479950872061313, "grad_norm": 647.8482055664062, "learning_rate": 9.983101018991706e-06, "loss": 52.0292, "step": 61780 }, { "epoch": 0.12481970935329695, "grad_norm": 446.3780517578125, "learning_rate": 9.9830723320513e-06, "loss": 18.6124, "step": 61790 }, { "epoch": 0.12483990998598075, "grad_norm": 786.84619140625, "learning_rate": 9.983043620824005e-06, "loss": 18.3116, "step": 61800 }, { "epoch": 0.12486011061866457, "grad_norm": 768.5186767578125, "learning_rate": 9.983014885309959e-06, "loss": 28.8062, "step": 61810 }, { "epoch": 0.1248803112513484, "grad_norm": 104.13092803955078, "learning_rate": 9.982986125509303e-06, "loss": 23.444, "step": 61820 }, { "epoch": 0.12490051188403221, "grad_norm": 358.8445739746094, "learning_rate": 9.982957341422177e-06, "loss": 20.2794, "step": 61830 }, { "epoch": 0.12492071251671602, "grad_norm": 301.2618103027344, "learning_rate": 9.982928533048722e-06, "loss": 23.7256, "step": 61840 }, { "epoch": 0.12494091314939984, "grad_norm": 330.6232604980469, "learning_rate": 9.982899700389077e-06, "loss": 23.1953, "step": 61850 }, { "epoch": 0.12496111378208366, "grad_norm": 403.6541442871094, "learning_rate": 9.982870843443381e-06, "loss": 18.7548, "step": 61860 }, { "epoch": 0.12498131441476747, "grad_norm": 914.1959838867188, "learning_rate": 9.98284196221178e-06, "loss": 52.2723, "step": 61870 }, { "epoch": 0.12500151504745127, "grad_norm": 83.53597259521484, "learning_rate": 9.982813056694411e-06, "loss": 22.2355, "step": 61880 }, { "epoch": 0.1250217156801351, "grad_norm": 223.97230529785156, "learning_rate": 9.982784126891416e-06, "loss": 25.4685, "step": 61890 }, { "epoch": 0.1250419163128189, "grad_norm": 347.29119873046875, "learning_rate": 9.982755172802933e-06, "loss": 16.4986, "step": 61900 }, { "epoch": 0.12506211694550273, "grad_norm": 619.5742797851562, "learning_rate": 9.98272619442911e-06, "loss": 80.9853, "step": 61910 }, { "epoch": 0.12508231757818655, "grad_norm": 507.7166442871094, "learning_rate": 9.982697191770079e-06, "loss": 39.1105, "step": 61920 }, { "epoch": 0.12510251821087037, "grad_norm": 338.95648193359375, "learning_rate": 9.982668164825989e-06, "loss": 36.0701, "step": 61930 }, { "epoch": 0.1251227188435542, "grad_norm": 301.35870361328125, "learning_rate": 9.982639113596978e-06, "loss": 18.2, "step": 61940 }, { "epoch": 0.12514291947623798, "grad_norm": 194.94728088378906, "learning_rate": 9.982610038083188e-06, "loss": 20.8123, "step": 61950 }, { "epoch": 0.1251631201089218, "grad_norm": 499.0058288574219, "learning_rate": 9.98258093828476e-06, "loss": 28.3766, "step": 61960 }, { "epoch": 0.12518332074160562, "grad_norm": 532.4949951171875, "learning_rate": 9.98255181420184e-06, "loss": 22.134, "step": 61970 }, { "epoch": 0.12520352137428944, "grad_norm": 259.8731689453125, "learning_rate": 9.982522665834565e-06, "loss": 27.6904, "step": 61980 }, { "epoch": 0.12522372200697326, "grad_norm": 523.75927734375, "learning_rate": 9.982493493183079e-06, "loss": 33.2929, "step": 61990 }, { "epoch": 0.12524392263965708, "grad_norm": 442.4575500488281, "learning_rate": 9.982464296247523e-06, "loss": 22.7982, "step": 62000 }, { "epoch": 0.12526412327234088, "grad_norm": 357.35003662109375, "learning_rate": 9.98243507502804e-06, "loss": 25.7578, "step": 62010 }, { "epoch": 0.1252843239050247, "grad_norm": 140.2523651123047, "learning_rate": 9.982405829524774e-06, "loss": 15.0906, "step": 62020 }, { "epoch": 0.12530452453770852, "grad_norm": 661.8172607421875, "learning_rate": 9.982376559737866e-06, "loss": 22.1402, "step": 62030 }, { "epoch": 0.12532472517039234, "grad_norm": 529.1638793945312, "learning_rate": 9.982347265667459e-06, "loss": 41.1321, "step": 62040 }, { "epoch": 0.12534492580307616, "grad_norm": 210.56182861328125, "learning_rate": 9.982317947313695e-06, "loss": 21.2127, "step": 62050 }, { "epoch": 0.12536512643575998, "grad_norm": 278.6200866699219, "learning_rate": 9.982288604676719e-06, "loss": 30.7348, "step": 62060 }, { "epoch": 0.1253853270684438, "grad_norm": 654.5919799804688, "learning_rate": 9.982259237756674e-06, "loss": 20.0652, "step": 62070 }, { "epoch": 0.1254055277011276, "grad_norm": 1227.6817626953125, "learning_rate": 9.982229846553698e-06, "loss": 49.6078, "step": 62080 }, { "epoch": 0.1254257283338114, "grad_norm": 528.229736328125, "learning_rate": 9.982200431067939e-06, "loss": 17.7291, "step": 62090 }, { "epoch": 0.12544592896649523, "grad_norm": 511.2461242675781, "learning_rate": 9.98217099129954e-06, "loss": 23.0345, "step": 62100 }, { "epoch": 0.12546612959917905, "grad_norm": 539.4857177734375, "learning_rate": 9.982141527248646e-06, "loss": 17.4074, "step": 62110 }, { "epoch": 0.12548633023186287, "grad_norm": 269.706787109375, "learning_rate": 9.982112038915394e-06, "loss": 32.9321, "step": 62120 }, { "epoch": 0.1255065308645467, "grad_norm": 617.9381713867188, "learning_rate": 9.982082526299935e-06, "loss": 20.269, "step": 62130 }, { "epoch": 0.12552673149723048, "grad_norm": 715.280517578125, "learning_rate": 9.98205298940241e-06, "loss": 24.2262, "step": 62140 }, { "epoch": 0.1255469321299143, "grad_norm": 23.387699127197266, "learning_rate": 9.982023428222963e-06, "loss": 17.8983, "step": 62150 }, { "epoch": 0.12556713276259812, "grad_norm": 752.984130859375, "learning_rate": 9.981993842761737e-06, "loss": 35.768, "step": 62160 }, { "epoch": 0.12558733339528194, "grad_norm": 755.310302734375, "learning_rate": 9.981964233018877e-06, "loss": 37.8018, "step": 62170 }, { "epoch": 0.12560753402796576, "grad_norm": 482.14447021484375, "learning_rate": 9.981934598994529e-06, "loss": 31.3451, "step": 62180 }, { "epoch": 0.12562773466064958, "grad_norm": 426.26519775390625, "learning_rate": 9.981904940688836e-06, "loss": 14.7094, "step": 62190 }, { "epoch": 0.12564793529333337, "grad_norm": 317.7471923828125, "learning_rate": 9.981875258101944e-06, "loss": 45.4545, "step": 62200 }, { "epoch": 0.1256681359260172, "grad_norm": 453.5595397949219, "learning_rate": 9.981845551233993e-06, "loss": 26.3621, "step": 62210 }, { "epoch": 0.125688336558701, "grad_norm": 694.0244140625, "learning_rate": 9.981815820085132e-06, "loss": 20.3197, "step": 62220 }, { "epoch": 0.12570853719138483, "grad_norm": 382.5671691894531, "learning_rate": 9.981786064655505e-06, "loss": 24.1285, "step": 62230 }, { "epoch": 0.12572873782406865, "grad_norm": 571.6986694335938, "learning_rate": 9.981756284945256e-06, "loss": 22.9406, "step": 62240 }, { "epoch": 0.12574893845675247, "grad_norm": 1450.048583984375, "learning_rate": 9.981726480954532e-06, "loss": 26.0896, "step": 62250 }, { "epoch": 0.1257691390894363, "grad_norm": 6.673811912536621, "learning_rate": 9.981696652683479e-06, "loss": 26.3327, "step": 62260 }, { "epoch": 0.12578933972212009, "grad_norm": 528.0861206054688, "learning_rate": 9.98166680013224e-06, "loss": 27.3872, "step": 62270 }, { "epoch": 0.1258095403548039, "grad_norm": 413.5461120605469, "learning_rate": 9.981636923300959e-06, "loss": 23.6593, "step": 62280 }, { "epoch": 0.12582974098748773, "grad_norm": 643.4320068359375, "learning_rate": 9.981607022189785e-06, "loss": 21.158, "step": 62290 }, { "epoch": 0.12584994162017155, "grad_norm": 147.05723571777344, "learning_rate": 9.981577096798864e-06, "loss": 25.1463, "step": 62300 }, { "epoch": 0.12587014225285537, "grad_norm": 261.19879150390625, "learning_rate": 9.981547147128338e-06, "loss": 29.367, "step": 62310 }, { "epoch": 0.12589034288553919, "grad_norm": 550.4620971679688, "learning_rate": 9.981517173178357e-06, "loss": 29.0102, "step": 62320 }, { "epoch": 0.12591054351822298, "grad_norm": 1298.7689208984375, "learning_rate": 9.981487174949065e-06, "loss": 26.1776, "step": 62330 }, { "epoch": 0.1259307441509068, "grad_norm": 797.3565063476562, "learning_rate": 9.98145715244061e-06, "loss": 24.0866, "step": 62340 }, { "epoch": 0.12595094478359062, "grad_norm": 371.12225341796875, "learning_rate": 9.981427105653135e-06, "loss": 11.8349, "step": 62350 }, { "epoch": 0.12597114541627444, "grad_norm": 324.1166687011719, "learning_rate": 9.981397034586789e-06, "loss": 16.4646, "step": 62360 }, { "epoch": 0.12599134604895826, "grad_norm": 239.66323852539062, "learning_rate": 9.981366939241719e-06, "loss": 18.3616, "step": 62370 }, { "epoch": 0.12601154668164208, "grad_norm": 237.30917358398438, "learning_rate": 9.98133681961807e-06, "loss": 16.7278, "step": 62380 }, { "epoch": 0.1260317473143259, "grad_norm": 569.3565673828125, "learning_rate": 9.981306675715989e-06, "loss": 31.6287, "step": 62390 }, { "epoch": 0.1260519479470097, "grad_norm": 778.6282958984375, "learning_rate": 9.981276507535625e-06, "loss": 28.1746, "step": 62400 }, { "epoch": 0.1260721485796935, "grad_norm": 1418.2340087890625, "learning_rate": 9.981246315077123e-06, "loss": 22.1584, "step": 62410 }, { "epoch": 0.12609234921237733, "grad_norm": 351.3438415527344, "learning_rate": 9.98121609834063e-06, "loss": 20.2334, "step": 62420 }, { "epoch": 0.12611254984506115, "grad_norm": 361.52587890625, "learning_rate": 9.981185857326292e-06, "loss": 29.948, "step": 62430 }, { "epoch": 0.12613275047774497, "grad_norm": 1249.1368408203125, "learning_rate": 9.98115559203426e-06, "loss": 49.3923, "step": 62440 }, { "epoch": 0.1261529511104288, "grad_norm": 133.21397399902344, "learning_rate": 9.981125302464681e-06, "loss": 36.9209, "step": 62450 }, { "epoch": 0.12617315174311258, "grad_norm": 401.4458923339844, "learning_rate": 9.9810949886177e-06, "loss": 15.0456, "step": 62460 }, { "epoch": 0.1261933523757964, "grad_norm": 708.7898559570312, "learning_rate": 9.981064650493466e-06, "loss": 18.3516, "step": 62470 }, { "epoch": 0.12621355300848022, "grad_norm": 446.0447082519531, "learning_rate": 9.981034288092129e-06, "loss": 14.9286, "step": 62480 }, { "epoch": 0.12623375364116404, "grad_norm": 444.21429443359375, "learning_rate": 9.981003901413833e-06, "loss": 27.0375, "step": 62490 }, { "epoch": 0.12625395427384786, "grad_norm": 848.4797973632812, "learning_rate": 9.980973490458728e-06, "loss": 26.718, "step": 62500 }, { "epoch": 0.12627415490653168, "grad_norm": 399.71966552734375, "learning_rate": 9.980943055226964e-06, "loss": 35.966, "step": 62510 }, { "epoch": 0.12629435553921547, "grad_norm": 382.6466979980469, "learning_rate": 9.980912595718686e-06, "loss": 49.3784, "step": 62520 }, { "epoch": 0.1263145561718993, "grad_norm": 497.79144287109375, "learning_rate": 9.980882111934046e-06, "loss": 22.7074, "step": 62530 }, { "epoch": 0.12633475680458311, "grad_norm": 277.94525146484375, "learning_rate": 9.980851603873189e-06, "loss": 11.5644, "step": 62540 }, { "epoch": 0.12635495743726693, "grad_norm": 538.9913330078125, "learning_rate": 9.980821071536266e-06, "loss": 16.5249, "step": 62550 }, { "epoch": 0.12637515806995075, "grad_norm": 309.7591247558594, "learning_rate": 9.980790514923425e-06, "loss": 16.1624, "step": 62560 }, { "epoch": 0.12639535870263457, "grad_norm": 750.3809204101562, "learning_rate": 9.980759934034816e-06, "loss": 36.466, "step": 62570 }, { "epoch": 0.1264155593353184, "grad_norm": 282.3537902832031, "learning_rate": 9.980729328870586e-06, "loss": 25.2453, "step": 62580 }, { "epoch": 0.1264357599680022, "grad_norm": 766.6907958984375, "learning_rate": 9.980698699430884e-06, "loss": 27.8084, "step": 62590 }, { "epoch": 0.126455960600686, "grad_norm": 303.53924560546875, "learning_rate": 9.980668045715864e-06, "loss": 16.3529, "step": 62600 }, { "epoch": 0.12647616123336983, "grad_norm": 388.064453125, "learning_rate": 9.98063736772567e-06, "loss": 25.9673, "step": 62610 }, { "epoch": 0.12649636186605365, "grad_norm": 583.8735961914062, "learning_rate": 9.980606665460453e-06, "loss": 27.4928, "step": 62620 }, { "epoch": 0.12651656249873747, "grad_norm": 440.026611328125, "learning_rate": 9.980575938920364e-06, "loss": 14.5656, "step": 62630 }, { "epoch": 0.1265367631314213, "grad_norm": 146.12265014648438, "learning_rate": 9.980545188105553e-06, "loss": 35.6076, "step": 62640 }, { "epoch": 0.12655696376410508, "grad_norm": 385.4832458496094, "learning_rate": 9.980514413016167e-06, "loss": 14.4751, "step": 62650 }, { "epoch": 0.1265771643967889, "grad_norm": 463.3867492675781, "learning_rate": 9.980483613652359e-06, "loss": 14.9639, "step": 62660 }, { "epoch": 0.12659736502947272, "grad_norm": 459.55169677734375, "learning_rate": 9.980452790014278e-06, "loss": 16.1644, "step": 62670 }, { "epoch": 0.12661756566215654, "grad_norm": 587.0556640625, "learning_rate": 9.980421942102075e-06, "loss": 29.3284, "step": 62680 }, { "epoch": 0.12663776629484036, "grad_norm": 805.0811767578125, "learning_rate": 9.980391069915897e-06, "loss": 12.8979, "step": 62690 }, { "epoch": 0.12665796692752418, "grad_norm": 100.18392944335938, "learning_rate": 9.980360173455899e-06, "loss": 25.0022, "step": 62700 }, { "epoch": 0.126678167560208, "grad_norm": 463.1624755859375, "learning_rate": 9.980329252722227e-06, "loss": 14.9295, "step": 62710 }, { "epoch": 0.1266983681928918, "grad_norm": 406.43902587890625, "learning_rate": 9.980298307715038e-06, "loss": 29.2952, "step": 62720 }, { "epoch": 0.1267185688255756, "grad_norm": 508.7016296386719, "learning_rate": 9.980267338434477e-06, "loss": 34.4754, "step": 62730 }, { "epoch": 0.12673876945825943, "grad_norm": 99.78828430175781, "learning_rate": 9.980236344880696e-06, "loss": 15.8014, "step": 62740 }, { "epoch": 0.12675897009094325, "grad_norm": 783.4483032226562, "learning_rate": 9.98020532705385e-06, "loss": 32.7593, "step": 62750 }, { "epoch": 0.12677917072362707, "grad_norm": 648.4140014648438, "learning_rate": 9.980174284954084e-06, "loss": 44.6443, "step": 62760 }, { "epoch": 0.1267993713563109, "grad_norm": 341.7405090332031, "learning_rate": 9.980143218581555e-06, "loss": 34.7895, "step": 62770 }, { "epoch": 0.12681957198899468, "grad_norm": 1091.858154296875, "learning_rate": 9.98011212793641e-06, "loss": 33.4063, "step": 62780 }, { "epoch": 0.1268397726216785, "grad_norm": 216.94027709960938, "learning_rate": 9.980081013018804e-06, "loss": 15.5937, "step": 62790 }, { "epoch": 0.12685997325436232, "grad_norm": 202.47886657714844, "learning_rate": 9.980049873828887e-06, "loss": 16.3564, "step": 62800 }, { "epoch": 0.12688017388704614, "grad_norm": 545.5491333007812, "learning_rate": 9.98001871036681e-06, "loss": 34.0013, "step": 62810 }, { "epoch": 0.12690037451972996, "grad_norm": 1135.317626953125, "learning_rate": 9.979987522632727e-06, "loss": 36.4949, "step": 62820 }, { "epoch": 0.12692057515241378, "grad_norm": 727.2149047851562, "learning_rate": 9.979956310626788e-06, "loss": 27.9218, "step": 62830 }, { "epoch": 0.12694077578509758, "grad_norm": 78.84790802001953, "learning_rate": 9.979925074349146e-06, "loss": 26.28, "step": 62840 }, { "epoch": 0.1269609764177814, "grad_norm": 331.7414245605469, "learning_rate": 9.979893813799953e-06, "loss": 14.5715, "step": 62850 }, { "epoch": 0.12698117705046522, "grad_norm": 116.44114685058594, "learning_rate": 9.979862528979362e-06, "loss": 50.3006, "step": 62860 }, { "epoch": 0.12700137768314904, "grad_norm": 532.8187866210938, "learning_rate": 9.979831219887526e-06, "loss": 21.8745, "step": 62870 }, { "epoch": 0.12702157831583286, "grad_norm": 470.2156066894531, "learning_rate": 9.979799886524594e-06, "loss": 15.5994, "step": 62880 }, { "epoch": 0.12704177894851668, "grad_norm": 854.6575317382812, "learning_rate": 9.979768528890725e-06, "loss": 35.0675, "step": 62890 }, { "epoch": 0.1270619795812005, "grad_norm": 1332.267578125, "learning_rate": 9.979737146986064e-06, "loss": 40.4353, "step": 62900 }, { "epoch": 0.1270821802138843, "grad_norm": 223.40878295898438, "learning_rate": 9.979705740810771e-06, "loss": 21.3494, "step": 62910 }, { "epoch": 0.1271023808465681, "grad_norm": 792.7653198242188, "learning_rate": 9.979674310364996e-06, "loss": 25.4134, "step": 62920 }, { "epoch": 0.12712258147925193, "grad_norm": 511.9507751464844, "learning_rate": 9.979642855648892e-06, "loss": 20.9992, "step": 62930 }, { "epoch": 0.12714278211193575, "grad_norm": 477.951416015625, "learning_rate": 9.979611376662613e-06, "loss": 32.2239, "step": 62940 }, { "epoch": 0.12716298274461957, "grad_norm": 362.9677734375, "learning_rate": 9.97957987340631e-06, "loss": 19.6855, "step": 62950 }, { "epoch": 0.1271831833773034, "grad_norm": 836.57373046875, "learning_rate": 9.979548345880142e-06, "loss": 25.1571, "step": 62960 }, { "epoch": 0.12720338400998718, "grad_norm": 803.3682250976562, "learning_rate": 9.979516794084256e-06, "loss": 24.3883, "step": 62970 }, { "epoch": 0.127223584642671, "grad_norm": 614.2709350585938, "learning_rate": 9.97948521801881e-06, "loss": 29.8649, "step": 62980 }, { "epoch": 0.12724378527535482, "grad_norm": 376.4046936035156, "learning_rate": 9.979453617683958e-06, "loss": 17.8293, "step": 62990 }, { "epoch": 0.12726398590803864, "grad_norm": 491.6141357421875, "learning_rate": 9.979421993079853e-06, "loss": 23.357, "step": 63000 }, { "epoch": 0.12728418654072246, "grad_norm": 759.8665161132812, "learning_rate": 9.979390344206648e-06, "loss": 44.4881, "step": 63010 }, { "epoch": 0.12730438717340628, "grad_norm": 520.8623657226562, "learning_rate": 9.9793586710645e-06, "loss": 24.3983, "step": 63020 }, { "epoch": 0.1273245878060901, "grad_norm": 1215.4954833984375, "learning_rate": 9.97932697365356e-06, "loss": 22.5193, "step": 63030 }, { "epoch": 0.1273447884387739, "grad_norm": 676.3529663085938, "learning_rate": 9.979295251973986e-06, "loss": 21.0263, "step": 63040 }, { "epoch": 0.1273649890714577, "grad_norm": 123.64913940429688, "learning_rate": 9.97926350602593e-06, "loss": 35.7433, "step": 63050 }, { "epoch": 0.12738518970414153, "grad_norm": 516.5016479492188, "learning_rate": 9.979231735809546e-06, "loss": 15.5689, "step": 63060 }, { "epoch": 0.12740539033682535, "grad_norm": 415.07916259765625, "learning_rate": 9.979199941324994e-06, "loss": 25.6038, "step": 63070 }, { "epoch": 0.12742559096950917, "grad_norm": 245.9202880859375, "learning_rate": 9.979168122572422e-06, "loss": 49.7665, "step": 63080 }, { "epoch": 0.127445791602193, "grad_norm": 489.91802978515625, "learning_rate": 9.97913627955199e-06, "loss": 36.2062, "step": 63090 }, { "epoch": 0.12746599223487678, "grad_norm": 432.1904296875, "learning_rate": 9.979104412263851e-06, "loss": 12.2549, "step": 63100 }, { "epoch": 0.1274861928675606, "grad_norm": 1187.847900390625, "learning_rate": 9.979072520708162e-06, "loss": 49.9682, "step": 63110 }, { "epoch": 0.12750639350024442, "grad_norm": 222.1941680908203, "learning_rate": 9.979040604885077e-06, "loss": 16.5459, "step": 63120 }, { "epoch": 0.12752659413292824, "grad_norm": 416.6627502441406, "learning_rate": 9.979008664794751e-06, "loss": 24.3047, "step": 63130 }, { "epoch": 0.12754679476561206, "grad_norm": 560.9697875976562, "learning_rate": 9.978976700437341e-06, "loss": 42.9497, "step": 63140 }, { "epoch": 0.12756699539829588, "grad_norm": 936.8536376953125, "learning_rate": 9.978944711813003e-06, "loss": 25.2848, "step": 63150 }, { "epoch": 0.12758719603097968, "grad_norm": 513.5052490234375, "learning_rate": 9.978912698921892e-06, "loss": 24.7976, "step": 63160 }, { "epoch": 0.1276073966636635, "grad_norm": 493.39495849609375, "learning_rate": 9.978880661764166e-06, "loss": 27.8852, "step": 63170 }, { "epoch": 0.12762759729634732, "grad_norm": 1059.8115234375, "learning_rate": 9.978848600339978e-06, "loss": 26.1442, "step": 63180 }, { "epoch": 0.12764779792903114, "grad_norm": 272.6844787597656, "learning_rate": 9.978816514649486e-06, "loss": 18.1199, "step": 63190 }, { "epoch": 0.12766799856171496, "grad_norm": 409.2892150878906, "learning_rate": 9.978784404692847e-06, "loss": 15.1925, "step": 63200 }, { "epoch": 0.12768819919439878, "grad_norm": 542.8263549804688, "learning_rate": 9.978752270470216e-06, "loss": 28.0719, "step": 63210 }, { "epoch": 0.1277083998270826, "grad_norm": 213.36155700683594, "learning_rate": 9.97872011198175e-06, "loss": 21.9098, "step": 63220 }, { "epoch": 0.1277286004597664, "grad_norm": 448.73211669921875, "learning_rate": 9.978687929227606e-06, "loss": 12.8645, "step": 63230 }, { "epoch": 0.1277488010924502, "grad_norm": 156.6364288330078, "learning_rate": 9.97865572220794e-06, "loss": 39.7648, "step": 63240 }, { "epoch": 0.12776900172513403, "grad_norm": 245.63671875, "learning_rate": 9.978623490922913e-06, "loss": 30.4392, "step": 63250 }, { "epoch": 0.12778920235781785, "grad_norm": 526.5847778320312, "learning_rate": 9.978591235372675e-06, "loss": 27.034, "step": 63260 }, { "epoch": 0.12780940299050167, "grad_norm": 855.2476806640625, "learning_rate": 9.97855895555739e-06, "loss": 40.838, "step": 63270 }, { "epoch": 0.1278296036231855, "grad_norm": 755.0592041015625, "learning_rate": 9.978526651477211e-06, "loss": 21.5641, "step": 63280 }, { "epoch": 0.12784980425586928, "grad_norm": 696.832763671875, "learning_rate": 9.978494323132296e-06, "loss": 36.0213, "step": 63290 }, { "epoch": 0.1278700048885531, "grad_norm": 617.84130859375, "learning_rate": 9.978461970522807e-06, "loss": 40.2178, "step": 63300 }, { "epoch": 0.12789020552123692, "grad_norm": 611.9417114257812, "learning_rate": 9.978429593648894e-06, "loss": 31.6474, "step": 63310 }, { "epoch": 0.12791040615392074, "grad_norm": 738.6346435546875, "learning_rate": 9.978397192510722e-06, "loss": 44.4837, "step": 63320 }, { "epoch": 0.12793060678660456, "grad_norm": 576.538330078125, "learning_rate": 9.978364767108444e-06, "loss": 23.9101, "step": 63330 }, { "epoch": 0.12795080741928838, "grad_norm": 486.086181640625, "learning_rate": 9.97833231744222e-06, "loss": 35.1615, "step": 63340 }, { "epoch": 0.1279710080519722, "grad_norm": 596.9443359375, "learning_rate": 9.97829984351221e-06, "loss": 16.0704, "step": 63350 }, { "epoch": 0.127991208684656, "grad_norm": 423.5945739746094, "learning_rate": 9.978267345318569e-06, "loss": 24.7456, "step": 63360 }, { "epoch": 0.1280114093173398, "grad_norm": 267.3250732421875, "learning_rate": 9.978234822861456e-06, "loss": 17.0896, "step": 63370 }, { "epoch": 0.12803160995002363, "grad_norm": 861.3472290039062, "learning_rate": 9.978202276141032e-06, "loss": 42.2953, "step": 63380 }, { "epoch": 0.12805181058270745, "grad_norm": 272.4585876464844, "learning_rate": 9.978169705157455e-06, "loss": 28.3199, "step": 63390 }, { "epoch": 0.12807201121539127, "grad_norm": 191.0371551513672, "learning_rate": 9.97813710991088e-06, "loss": 12.2191, "step": 63400 }, { "epoch": 0.1280922118480751, "grad_norm": 767.1546020507812, "learning_rate": 9.978104490401468e-06, "loss": 28.2204, "step": 63410 }, { "epoch": 0.12811241248075889, "grad_norm": 939.4093627929688, "learning_rate": 9.978071846629381e-06, "loss": 16.1536, "step": 63420 }, { "epoch": 0.1281326131134427, "grad_norm": 300.4186096191406, "learning_rate": 9.978039178594774e-06, "loss": 25.2184, "step": 63430 }, { "epoch": 0.12815281374612653, "grad_norm": 396.7921447753906, "learning_rate": 9.978006486297808e-06, "loss": 31.1113, "step": 63440 }, { "epoch": 0.12817301437881035, "grad_norm": 381.0914001464844, "learning_rate": 9.977973769738642e-06, "loss": 20.6207, "step": 63450 }, { "epoch": 0.12819321501149417, "grad_norm": 422.78662109375, "learning_rate": 9.977941028917436e-06, "loss": 30.2763, "step": 63460 }, { "epoch": 0.12821341564417799, "grad_norm": 509.16619873046875, "learning_rate": 9.977908263834348e-06, "loss": 47.3221, "step": 63470 }, { "epoch": 0.12823361627686178, "grad_norm": 5963.0517578125, "learning_rate": 9.97787547448954e-06, "loss": 38.5918, "step": 63480 }, { "epoch": 0.1282538169095456, "grad_norm": 472.1371765136719, "learning_rate": 9.977842660883172e-06, "loss": 17.5362, "step": 63490 }, { "epoch": 0.12827401754222942, "grad_norm": 688.972900390625, "learning_rate": 9.9778098230154e-06, "loss": 36.3663, "step": 63500 }, { "epoch": 0.12829421817491324, "grad_norm": 708.9854736328125, "learning_rate": 9.97777696088639e-06, "loss": 36.2051, "step": 63510 }, { "epoch": 0.12831441880759706, "grad_norm": 469.16461181640625, "learning_rate": 9.977744074496297e-06, "loss": 29.461, "step": 63520 }, { "epoch": 0.12833461944028088, "grad_norm": 111.1149673461914, "learning_rate": 9.97771116384528e-06, "loss": 16.2335, "step": 63530 }, { "epoch": 0.1283548200729647, "grad_norm": 853.3607177734375, "learning_rate": 9.977678228933508e-06, "loss": 38.7535, "step": 63540 }, { "epoch": 0.1283750207056485, "grad_norm": 373.10369873046875, "learning_rate": 9.977645269761131e-06, "loss": 13.5213, "step": 63550 }, { "epoch": 0.1283952213383323, "grad_norm": 445.2317810058594, "learning_rate": 9.977612286328317e-06, "loss": 18.522, "step": 63560 }, { "epoch": 0.12841542197101613, "grad_norm": 432.3799743652344, "learning_rate": 9.977579278635225e-06, "loss": 23.454, "step": 63570 }, { "epoch": 0.12843562260369995, "grad_norm": 113.14830017089844, "learning_rate": 9.977546246682015e-06, "loss": 18.0668, "step": 63580 }, { "epoch": 0.12845582323638377, "grad_norm": 644.360595703125, "learning_rate": 9.977513190468848e-06, "loss": 30.9097, "step": 63590 }, { "epoch": 0.1284760238690676, "grad_norm": 473.98876953125, "learning_rate": 9.977480109995886e-06, "loss": 16.7546, "step": 63600 }, { "epoch": 0.12849622450175138, "grad_norm": 153.7622833251953, "learning_rate": 9.977447005263289e-06, "loss": 22.1556, "step": 63610 }, { "epoch": 0.1285164251344352, "grad_norm": 905.6984252929688, "learning_rate": 9.97741387627122e-06, "loss": 30.4328, "step": 63620 }, { "epoch": 0.12853662576711902, "grad_norm": 665.6696166992188, "learning_rate": 9.977380723019838e-06, "loss": 21.1389, "step": 63630 }, { "epoch": 0.12855682639980284, "grad_norm": 173.6195526123047, "learning_rate": 9.977347545509307e-06, "loss": 29.5608, "step": 63640 }, { "epoch": 0.12857702703248666, "grad_norm": 340.8911437988281, "learning_rate": 9.977314343739785e-06, "loss": 26.3831, "step": 63650 }, { "epoch": 0.12859722766517048, "grad_norm": 622.4423828125, "learning_rate": 9.97728111771144e-06, "loss": 19.2024, "step": 63660 }, { "epoch": 0.1286174282978543, "grad_norm": 1370.3134765625, "learning_rate": 9.97724786742443e-06, "loss": 35.2596, "step": 63670 }, { "epoch": 0.1286376289305381, "grad_norm": 574.2199096679688, "learning_rate": 9.977214592878917e-06, "loss": 21.3078, "step": 63680 }, { "epoch": 0.12865782956322191, "grad_norm": 389.3919677734375, "learning_rate": 9.977181294075063e-06, "loss": 26.5428, "step": 63690 }, { "epoch": 0.12867803019590573, "grad_norm": 382.49951171875, "learning_rate": 9.977147971013033e-06, "loss": 27.7318, "step": 63700 }, { "epoch": 0.12869823082858955, "grad_norm": 169.2600860595703, "learning_rate": 9.977114623692985e-06, "loss": 20.7903, "step": 63710 }, { "epoch": 0.12871843146127337, "grad_norm": 247.0108642578125, "learning_rate": 9.977081252115085e-06, "loss": 19.3662, "step": 63720 }, { "epoch": 0.1287386320939572, "grad_norm": 111.87555694580078, "learning_rate": 9.977047856279496e-06, "loss": 20.1174, "step": 63730 }, { "epoch": 0.128758832726641, "grad_norm": 472.9613952636719, "learning_rate": 9.977014436186377e-06, "loss": 14.0777, "step": 63740 }, { "epoch": 0.1287790333593248, "grad_norm": 267.59625244140625, "learning_rate": 9.976980991835896e-06, "loss": 28.9651, "step": 63750 }, { "epoch": 0.12879923399200863, "grad_norm": 580.331298828125, "learning_rate": 9.97694752322821e-06, "loss": 14.1914, "step": 63760 }, { "epoch": 0.12881943462469245, "grad_norm": 190.30702209472656, "learning_rate": 9.976914030363488e-06, "loss": 22.6187, "step": 63770 }, { "epoch": 0.12883963525737627, "grad_norm": 502.5259704589844, "learning_rate": 9.976880513241889e-06, "loss": 23.2208, "step": 63780 }, { "epoch": 0.1288598358900601, "grad_norm": 8.196840286254883, "learning_rate": 9.976846971863579e-06, "loss": 24.0969, "step": 63790 }, { "epoch": 0.12888003652274388, "grad_norm": 593.9136352539062, "learning_rate": 9.97681340622872e-06, "loss": 34.7351, "step": 63800 }, { "epoch": 0.1289002371554277, "grad_norm": 153.56385803222656, "learning_rate": 9.976779816337476e-06, "loss": 13.862, "step": 63810 }, { "epoch": 0.12892043778811152, "grad_norm": 849.2635498046875, "learning_rate": 9.976746202190012e-06, "loss": 23.3059, "step": 63820 }, { "epoch": 0.12894063842079534, "grad_norm": 373.9201965332031, "learning_rate": 9.97671256378649e-06, "loss": 16.4901, "step": 63830 }, { "epoch": 0.12896083905347916, "grad_norm": 494.0238342285156, "learning_rate": 9.976678901127074e-06, "loss": 34.1576, "step": 63840 }, { "epoch": 0.12898103968616298, "grad_norm": 39.347312927246094, "learning_rate": 9.976645214211929e-06, "loss": 26.6505, "step": 63850 }, { "epoch": 0.1290012403188468, "grad_norm": 414.5834655761719, "learning_rate": 9.976611503041218e-06, "loss": 36.6754, "step": 63860 }, { "epoch": 0.1290214409515306, "grad_norm": 803.5025024414062, "learning_rate": 9.976577767615108e-06, "loss": 22.6195, "step": 63870 }, { "epoch": 0.1290416415842144, "grad_norm": 374.4903259277344, "learning_rate": 9.97654400793376e-06, "loss": 14.9365, "step": 63880 }, { "epoch": 0.12906184221689823, "grad_norm": 419.9744567871094, "learning_rate": 9.97651022399734e-06, "loss": 24.2052, "step": 63890 }, { "epoch": 0.12908204284958205, "grad_norm": 553.0997314453125, "learning_rate": 9.976476415806013e-06, "loss": 24.3125, "step": 63900 }, { "epoch": 0.12910224348226587, "grad_norm": 273.9212951660156, "learning_rate": 9.976442583359944e-06, "loss": 18.0074, "step": 63910 }, { "epoch": 0.1291224441149497, "grad_norm": 304.9971923828125, "learning_rate": 9.976408726659296e-06, "loss": 19.2274, "step": 63920 }, { "epoch": 0.12914264474763348, "grad_norm": 256.17962646484375, "learning_rate": 9.976374845704238e-06, "loss": 29.1594, "step": 63930 }, { "epoch": 0.1291628453803173, "grad_norm": 272.98468017578125, "learning_rate": 9.976340940494931e-06, "loss": 15.6594, "step": 63940 }, { "epoch": 0.12918304601300112, "grad_norm": 390.11083984375, "learning_rate": 9.976307011031542e-06, "loss": 14.2196, "step": 63950 }, { "epoch": 0.12920324664568494, "grad_norm": 174.08494567871094, "learning_rate": 9.976273057314236e-06, "loss": 14.4673, "step": 63960 }, { "epoch": 0.12922344727836876, "grad_norm": 425.7914733886719, "learning_rate": 9.97623907934318e-06, "loss": 23.7006, "step": 63970 }, { "epoch": 0.12924364791105258, "grad_norm": 1210.1116943359375, "learning_rate": 9.976205077118536e-06, "loss": 36.7556, "step": 63980 }, { "epoch": 0.12926384854373638, "grad_norm": 631.6767578125, "learning_rate": 9.976171050640473e-06, "loss": 14.5007, "step": 63990 }, { "epoch": 0.1292840491764202, "grad_norm": 1304.597412109375, "learning_rate": 9.976136999909156e-06, "loss": 42.4553, "step": 64000 }, { "epoch": 0.12930424980910402, "grad_norm": 374.9949645996094, "learning_rate": 9.976102924924752e-06, "loss": 21.9549, "step": 64010 }, { "epoch": 0.12932445044178784, "grad_norm": 504.45379638671875, "learning_rate": 9.976068825687424e-06, "loss": 53.4435, "step": 64020 }, { "epoch": 0.12934465107447166, "grad_norm": 1374.239501953125, "learning_rate": 9.97603470219734e-06, "loss": 35.7616, "step": 64030 }, { "epoch": 0.12936485170715548, "grad_norm": 1206.2969970703125, "learning_rate": 9.976000554454668e-06, "loss": 33.7899, "step": 64040 }, { "epoch": 0.1293850523398393, "grad_norm": 494.3724365234375, "learning_rate": 9.975966382459571e-06, "loss": 25.3993, "step": 64050 }, { "epoch": 0.1294052529725231, "grad_norm": 606.0910034179688, "learning_rate": 9.975932186212217e-06, "loss": 21.95, "step": 64060 }, { "epoch": 0.1294254536052069, "grad_norm": 281.4843444824219, "learning_rate": 9.975897965712777e-06, "loss": 30.7947, "step": 64070 }, { "epoch": 0.12944565423789073, "grad_norm": 342.4183654785156, "learning_rate": 9.975863720961411e-06, "loss": 25.8221, "step": 64080 }, { "epoch": 0.12946585487057455, "grad_norm": 411.2066345214844, "learning_rate": 9.975829451958288e-06, "loss": 16.7347, "step": 64090 }, { "epoch": 0.12948605550325837, "grad_norm": 1058.9154052734375, "learning_rate": 9.975795158703576e-06, "loss": 20.1638, "step": 64100 }, { "epoch": 0.1295062561359422, "grad_norm": 440.6236572265625, "learning_rate": 9.975760841197443e-06, "loss": 17.8365, "step": 64110 }, { "epoch": 0.12952645676862598, "grad_norm": 1042.24072265625, "learning_rate": 9.975726499440055e-06, "loss": 33.1461, "step": 64120 }, { "epoch": 0.1295466574013098, "grad_norm": 714.8013916015625, "learning_rate": 9.975692133431579e-06, "loss": 21.159, "step": 64130 }, { "epoch": 0.12956685803399362, "grad_norm": 693.3626098632812, "learning_rate": 9.975657743172182e-06, "loss": 33.558, "step": 64140 }, { "epoch": 0.12958705866667744, "grad_norm": 453.12518310546875, "learning_rate": 9.975623328662036e-06, "loss": 41.4328, "step": 64150 }, { "epoch": 0.12960725929936126, "grad_norm": 252.20648193359375, "learning_rate": 9.975588889901302e-06, "loss": 44.4843, "step": 64160 }, { "epoch": 0.12962745993204508, "grad_norm": 361.7718505859375, "learning_rate": 9.975554426890152e-06, "loss": 29.4922, "step": 64170 }, { "epoch": 0.1296476605647289, "grad_norm": 407.3759460449219, "learning_rate": 9.975519939628754e-06, "loss": 18.3133, "step": 64180 }, { "epoch": 0.1296678611974127, "grad_norm": 848.3601684570312, "learning_rate": 9.975485428117276e-06, "loss": 28.4552, "step": 64190 }, { "epoch": 0.1296880618300965, "grad_norm": 806.0908203125, "learning_rate": 9.975450892355882e-06, "loss": 25.4261, "step": 64200 }, { "epoch": 0.12970826246278033, "grad_norm": 502.7238464355469, "learning_rate": 9.975416332344747e-06, "loss": 14.6552, "step": 64210 }, { "epoch": 0.12972846309546415, "grad_norm": 248.8984832763672, "learning_rate": 9.975381748084035e-06, "loss": 22.231, "step": 64220 }, { "epoch": 0.12974866372814797, "grad_norm": 625.7821044921875, "learning_rate": 9.975347139573917e-06, "loss": 25.4415, "step": 64230 }, { "epoch": 0.1297688643608318, "grad_norm": 1019.3402709960938, "learning_rate": 9.97531250681456e-06, "loss": 27.9808, "step": 64240 }, { "epoch": 0.12978906499351558, "grad_norm": 89.6280517578125, "learning_rate": 9.975277849806133e-06, "loss": 20.8346, "step": 64250 }, { "epoch": 0.1298092656261994, "grad_norm": 483.666259765625, "learning_rate": 9.975243168548804e-06, "loss": 22.905, "step": 64260 }, { "epoch": 0.12982946625888322, "grad_norm": 325.27740478515625, "learning_rate": 9.975208463042745e-06, "loss": 15.8422, "step": 64270 }, { "epoch": 0.12984966689156704, "grad_norm": 131.23312377929688, "learning_rate": 9.975173733288122e-06, "loss": 10.4955, "step": 64280 }, { "epoch": 0.12986986752425086, "grad_norm": 442.3735046386719, "learning_rate": 9.975138979285107e-06, "loss": 13.1356, "step": 64290 }, { "epoch": 0.12989006815693468, "grad_norm": 340.0762634277344, "learning_rate": 9.975104201033868e-06, "loss": 23.6785, "step": 64300 }, { "epoch": 0.12991026878961848, "grad_norm": 655.1138305664062, "learning_rate": 9.975069398534574e-06, "loss": 20.8663, "step": 64310 }, { "epoch": 0.1299304694223023, "grad_norm": 326.4592590332031, "learning_rate": 9.975034571787394e-06, "loss": 46.7844, "step": 64320 }, { "epoch": 0.12995067005498612, "grad_norm": 664.6102905273438, "learning_rate": 9.9749997207925e-06, "loss": 52.0087, "step": 64330 }, { "epoch": 0.12997087068766994, "grad_norm": 1101.6463623046875, "learning_rate": 9.974964845550062e-06, "loss": 36.4772, "step": 64340 }, { "epoch": 0.12999107132035376, "grad_norm": 372.61932373046875, "learning_rate": 9.974929946060246e-06, "loss": 25.8474, "step": 64350 }, { "epoch": 0.13001127195303758, "grad_norm": 405.5989074707031, "learning_rate": 9.974895022323226e-06, "loss": 23.0418, "step": 64360 }, { "epoch": 0.1300314725857214, "grad_norm": 154.89332580566406, "learning_rate": 9.974860074339173e-06, "loss": 25.3169, "step": 64370 }, { "epoch": 0.1300516732184052, "grad_norm": 984.13720703125, "learning_rate": 9.974825102108251e-06, "loss": 29.0367, "step": 64380 }, { "epoch": 0.130071873851089, "grad_norm": 707.0607299804688, "learning_rate": 9.974790105630639e-06, "loss": 23.0206, "step": 64390 }, { "epoch": 0.13009207448377283, "grad_norm": 607.7217407226562, "learning_rate": 9.974755084906503e-06, "loss": 22.579, "step": 64400 }, { "epoch": 0.13011227511645665, "grad_norm": 210.3593292236328, "learning_rate": 9.974720039936012e-06, "loss": 31.0888, "step": 64410 }, { "epoch": 0.13013247574914047, "grad_norm": 390.9085693359375, "learning_rate": 9.97468497071934e-06, "loss": 18.0068, "step": 64420 }, { "epoch": 0.1301526763818243, "grad_norm": 544.2200317382812, "learning_rate": 9.974649877256657e-06, "loss": 33.2558, "step": 64430 }, { "epoch": 0.13017287701450808, "grad_norm": 478.2790222167969, "learning_rate": 9.974614759548133e-06, "loss": 21.6782, "step": 64440 }, { "epoch": 0.1301930776471919, "grad_norm": 1261.015869140625, "learning_rate": 9.97457961759394e-06, "loss": 35.9209, "step": 64450 }, { "epoch": 0.13021327827987572, "grad_norm": 182.3002166748047, "learning_rate": 9.97454445139425e-06, "loss": 28.252, "step": 64460 }, { "epoch": 0.13023347891255954, "grad_norm": 259.7107238769531, "learning_rate": 9.974509260949233e-06, "loss": 22.5172, "step": 64470 }, { "epoch": 0.13025367954524336, "grad_norm": 923.8392944335938, "learning_rate": 9.97447404625906e-06, "loss": 24.7331, "step": 64480 }, { "epoch": 0.13027388017792718, "grad_norm": 728.4458618164062, "learning_rate": 9.974438807323907e-06, "loss": 30.6729, "step": 64490 }, { "epoch": 0.130294080810611, "grad_norm": 342.2673645019531, "learning_rate": 9.974403544143942e-06, "loss": 27.3647, "step": 64500 }, { "epoch": 0.1303142814432948, "grad_norm": 1431.777099609375, "learning_rate": 9.974368256719335e-06, "loss": 17.8087, "step": 64510 }, { "epoch": 0.1303344820759786, "grad_norm": 767.341796875, "learning_rate": 9.974332945050263e-06, "loss": 16.4807, "step": 64520 }, { "epoch": 0.13035468270866243, "grad_norm": 351.86724853515625, "learning_rate": 9.974297609136895e-06, "loss": 12.8071, "step": 64530 }, { "epoch": 0.13037488334134625, "grad_norm": 826.7744750976562, "learning_rate": 9.974262248979402e-06, "loss": 22.566, "step": 64540 }, { "epoch": 0.13039508397403007, "grad_norm": 854.947021484375, "learning_rate": 9.97422686457796e-06, "loss": 27.3139, "step": 64550 }, { "epoch": 0.1304152846067139, "grad_norm": 271.3603820800781, "learning_rate": 9.97419145593274e-06, "loss": 25.9834, "step": 64560 }, { "epoch": 0.13043548523939769, "grad_norm": 244.5535125732422, "learning_rate": 9.974156023043912e-06, "loss": 39.2915, "step": 64570 }, { "epoch": 0.1304556858720815, "grad_norm": 781.3843994140625, "learning_rate": 9.974120565911653e-06, "loss": 30.444, "step": 64580 }, { "epoch": 0.13047588650476533, "grad_norm": 391.172119140625, "learning_rate": 9.974085084536132e-06, "loss": 19.9876, "step": 64590 }, { "epoch": 0.13049608713744915, "grad_norm": 277.1730041503906, "learning_rate": 9.974049578917524e-06, "loss": 10.8678, "step": 64600 }, { "epoch": 0.13051628777013297, "grad_norm": 488.2147216796875, "learning_rate": 9.974014049056003e-06, "loss": 38.7072, "step": 64610 }, { "epoch": 0.13053648840281679, "grad_norm": 635.7302856445312, "learning_rate": 9.973978494951739e-06, "loss": 23.2721, "step": 64620 }, { "epoch": 0.13055668903550058, "grad_norm": 249.53155517578125, "learning_rate": 9.973942916604907e-06, "loss": 20.0015, "step": 64630 }, { "epoch": 0.1305768896681844, "grad_norm": 248.79937744140625, "learning_rate": 9.973907314015682e-06, "loss": 33.9802, "step": 64640 }, { "epoch": 0.13059709030086822, "grad_norm": 326.5070495605469, "learning_rate": 9.973871687184234e-06, "loss": 29.0385, "step": 64650 }, { "epoch": 0.13061729093355204, "grad_norm": 515.3168334960938, "learning_rate": 9.97383603611074e-06, "loss": 37.1464, "step": 64660 }, { "epoch": 0.13063749156623586, "grad_norm": 392.71917724609375, "learning_rate": 9.973800360795372e-06, "loss": 20.8531, "step": 64670 }, { "epoch": 0.13065769219891968, "grad_norm": 380.54180908203125, "learning_rate": 9.973764661238306e-06, "loss": 14.8502, "step": 64680 }, { "epoch": 0.1306778928316035, "grad_norm": 634.4970703125, "learning_rate": 9.973728937439714e-06, "loss": 19.7428, "step": 64690 }, { "epoch": 0.1306980934642873, "grad_norm": 1988.073974609375, "learning_rate": 9.973693189399767e-06, "loss": 27.3053, "step": 64700 }, { "epoch": 0.1307182940969711, "grad_norm": 496.10345458984375, "learning_rate": 9.973657417118646e-06, "loss": 30.6424, "step": 64710 }, { "epoch": 0.13073849472965493, "grad_norm": 1122.1827392578125, "learning_rate": 9.97362162059652e-06, "loss": 54.9974, "step": 64720 }, { "epoch": 0.13075869536233875, "grad_norm": 236.85916137695312, "learning_rate": 9.973585799833567e-06, "loss": 19.5452, "step": 64730 }, { "epoch": 0.13077889599502257, "grad_norm": 589.6627807617188, "learning_rate": 9.97354995482996e-06, "loss": 15.7837, "step": 64740 }, { "epoch": 0.1307990966277064, "grad_norm": 297.345703125, "learning_rate": 9.973514085585871e-06, "loss": 31.8557, "step": 64750 }, { "epoch": 0.13081929726039018, "grad_norm": 140.3568878173828, "learning_rate": 9.97347819210148e-06, "loss": 23.0842, "step": 64760 }, { "epoch": 0.130839497893074, "grad_norm": 843.7931518554688, "learning_rate": 9.973442274376958e-06, "loss": 18.599, "step": 64770 }, { "epoch": 0.13085969852575782, "grad_norm": 771.300048828125, "learning_rate": 9.973406332412484e-06, "loss": 30.0886, "step": 64780 }, { "epoch": 0.13087989915844164, "grad_norm": 551.4595336914062, "learning_rate": 9.97337036620823e-06, "loss": 35.5825, "step": 64790 }, { "epoch": 0.13090009979112546, "grad_norm": 511.57830810546875, "learning_rate": 9.973334375764372e-06, "loss": 29.4059, "step": 64800 }, { "epoch": 0.13092030042380928, "grad_norm": 481.27239990234375, "learning_rate": 9.973298361081083e-06, "loss": 29.5845, "step": 64810 }, { "epoch": 0.1309405010564931, "grad_norm": 1035.55810546875, "learning_rate": 9.973262322158544e-06, "loss": 41.9542, "step": 64820 }, { "epoch": 0.1309607016891769, "grad_norm": 915.2047729492188, "learning_rate": 9.973226258996926e-06, "loss": 19.5643, "step": 64830 }, { "epoch": 0.13098090232186071, "grad_norm": 910.8612670898438, "learning_rate": 9.973190171596407e-06, "loss": 30.1054, "step": 64840 }, { "epoch": 0.13100110295454453, "grad_norm": 542.8687133789062, "learning_rate": 9.973154059957162e-06, "loss": 20.7429, "step": 64850 }, { "epoch": 0.13102130358722835, "grad_norm": 548.01904296875, "learning_rate": 9.973117924079367e-06, "loss": 32.7375, "step": 64860 }, { "epoch": 0.13104150421991217, "grad_norm": 599.3499145507812, "learning_rate": 9.973081763963199e-06, "loss": 18.564, "step": 64870 }, { "epoch": 0.131061704852596, "grad_norm": 360.1961975097656, "learning_rate": 9.973045579608834e-06, "loss": 23.948, "step": 64880 }, { "epoch": 0.1310819054852798, "grad_norm": 4.230323314666748, "learning_rate": 9.973009371016447e-06, "loss": 21.289, "step": 64890 }, { "epoch": 0.1311021061179636, "grad_norm": 274.7096252441406, "learning_rate": 9.972973138186217e-06, "loss": 28.9408, "step": 64900 }, { "epoch": 0.13112230675064743, "grad_norm": 829.801025390625, "learning_rate": 9.972936881118318e-06, "loss": 48.0795, "step": 64910 }, { "epoch": 0.13114250738333125, "grad_norm": 677.6478881835938, "learning_rate": 9.972900599812928e-06, "loss": 22.2759, "step": 64920 }, { "epoch": 0.13116270801601507, "grad_norm": 723.1565551757812, "learning_rate": 9.972864294270224e-06, "loss": 34.0883, "step": 64930 }, { "epoch": 0.1311829086486989, "grad_norm": 311.7381591796875, "learning_rate": 9.972827964490382e-06, "loss": 29.4282, "step": 64940 }, { "epoch": 0.13120310928138268, "grad_norm": 153.3361358642578, "learning_rate": 9.972791610473578e-06, "loss": 17.1192, "step": 64950 }, { "epoch": 0.1312233099140665, "grad_norm": 489.56915283203125, "learning_rate": 9.972755232219992e-06, "loss": 18.815, "step": 64960 }, { "epoch": 0.13124351054675032, "grad_norm": 193.79891967773438, "learning_rate": 9.972718829729802e-06, "loss": 23.3586, "step": 64970 }, { "epoch": 0.13126371117943414, "grad_norm": 437.9062194824219, "learning_rate": 9.972682403003182e-06, "loss": 30.5258, "step": 64980 }, { "epoch": 0.13128391181211796, "grad_norm": 2628.37548828125, "learning_rate": 9.972645952040311e-06, "loss": 43.4292, "step": 64990 }, { "epoch": 0.13130411244480178, "grad_norm": 966.9459228515625, "learning_rate": 9.972609476841368e-06, "loss": 30.0858, "step": 65000 }, { "epoch": 0.1313243130774856, "grad_norm": 589.35888671875, "learning_rate": 9.972572977406527e-06, "loss": 18.7885, "step": 65010 }, { "epoch": 0.1313445137101694, "grad_norm": 691.9627685546875, "learning_rate": 9.97253645373597e-06, "loss": 39.7634, "step": 65020 }, { "epoch": 0.1313647143428532, "grad_norm": 296.74078369140625, "learning_rate": 9.972499905829874e-06, "loss": 20.8549, "step": 65030 }, { "epoch": 0.13138491497553703, "grad_norm": 345.5241394042969, "learning_rate": 9.972463333688416e-06, "loss": 22.2938, "step": 65040 }, { "epoch": 0.13140511560822085, "grad_norm": 407.33001708984375, "learning_rate": 9.972426737311775e-06, "loss": 80.9497, "step": 65050 }, { "epoch": 0.13142531624090467, "grad_norm": 222.73822021484375, "learning_rate": 9.972390116700128e-06, "loss": 26.0368, "step": 65060 }, { "epoch": 0.1314455168735885, "grad_norm": 840.1920776367188, "learning_rate": 9.972353471853655e-06, "loss": 39.2992, "step": 65070 }, { "epoch": 0.13146571750627228, "grad_norm": 670.8307495117188, "learning_rate": 9.972316802772536e-06, "loss": 35.0744, "step": 65080 }, { "epoch": 0.1314859181389561, "grad_norm": 267.5872802734375, "learning_rate": 9.972280109456946e-06, "loss": 15.6043, "step": 65090 }, { "epoch": 0.13150611877163992, "grad_norm": 318.8644104003906, "learning_rate": 9.972243391907068e-06, "loss": 22.2779, "step": 65100 }, { "epoch": 0.13152631940432374, "grad_norm": 190.1090087890625, "learning_rate": 9.972206650123077e-06, "loss": 25.1073, "step": 65110 }, { "epoch": 0.13154652003700756, "grad_norm": 1195.5474853515625, "learning_rate": 9.972169884105155e-06, "loss": 51.1655, "step": 65120 }, { "epoch": 0.13156672066969138, "grad_norm": 401.41473388671875, "learning_rate": 9.972133093853477e-06, "loss": 23.0753, "step": 65130 }, { "epoch": 0.1315869213023752, "grad_norm": 6.660244941711426, "learning_rate": 9.972096279368228e-06, "loss": 24.8062, "step": 65140 }, { "epoch": 0.131607121935059, "grad_norm": 705.7124633789062, "learning_rate": 9.972059440649584e-06, "loss": 15.3535, "step": 65150 }, { "epoch": 0.13162732256774282, "grad_norm": 452.8949890136719, "learning_rate": 9.972022577697726e-06, "loss": 22.1648, "step": 65160 }, { "epoch": 0.13164752320042664, "grad_norm": 213.5918426513672, "learning_rate": 9.971985690512834e-06, "loss": 29.4293, "step": 65170 }, { "epoch": 0.13166772383311046, "grad_norm": 435.3036804199219, "learning_rate": 9.971948779095084e-06, "loss": 24.4588, "step": 65180 }, { "epoch": 0.13168792446579428, "grad_norm": 391.5770568847656, "learning_rate": 9.97191184344466e-06, "loss": 16.859, "step": 65190 }, { "epoch": 0.1317081250984781, "grad_norm": 375.3039245605469, "learning_rate": 9.97187488356174e-06, "loss": 30.387, "step": 65200 }, { "epoch": 0.1317283257311619, "grad_norm": 898.590087890625, "learning_rate": 9.971837899446505e-06, "loss": 21.8392, "step": 65210 }, { "epoch": 0.1317485263638457, "grad_norm": 195.2491912841797, "learning_rate": 9.971800891099137e-06, "loss": 34.7816, "step": 65220 }, { "epoch": 0.13176872699652953, "grad_norm": 457.738525390625, "learning_rate": 9.971763858519812e-06, "loss": 22.7771, "step": 65230 }, { "epoch": 0.13178892762921335, "grad_norm": 596.8607177734375, "learning_rate": 9.971726801708715e-06, "loss": 37.7583, "step": 65240 }, { "epoch": 0.13180912826189717, "grad_norm": 175.02639770507812, "learning_rate": 9.971689720666024e-06, "loss": 25.4858, "step": 65250 }, { "epoch": 0.131829328894581, "grad_norm": 393.55499267578125, "learning_rate": 9.97165261539192e-06, "loss": 34.8065, "step": 65260 }, { "epoch": 0.13184952952726478, "grad_norm": 489.8519287109375, "learning_rate": 9.971615485886583e-06, "loss": 26.5333, "step": 65270 }, { "epoch": 0.1318697301599486, "grad_norm": 23.220930099487305, "learning_rate": 9.971578332150197e-06, "loss": 27.3347, "step": 65280 }, { "epoch": 0.13188993079263242, "grad_norm": 729.2762451171875, "learning_rate": 9.97154115418294e-06, "loss": 31.484, "step": 65290 }, { "epoch": 0.13191013142531624, "grad_norm": 443.9441223144531, "learning_rate": 9.971503951984996e-06, "loss": 26.0966, "step": 65300 }, { "epoch": 0.13193033205800006, "grad_norm": 804.5036010742188, "learning_rate": 9.971466725556542e-06, "loss": 29.2958, "step": 65310 }, { "epoch": 0.13195053269068388, "grad_norm": 887.5077514648438, "learning_rate": 9.971429474897765e-06, "loss": 29.7262, "step": 65320 }, { "epoch": 0.1319707333233677, "grad_norm": 244.98841857910156, "learning_rate": 9.971392200008842e-06, "loss": 17.1568, "step": 65330 }, { "epoch": 0.1319909339560515, "grad_norm": 267.2784423828125, "learning_rate": 9.971354900889955e-06, "loss": 26.1696, "step": 65340 }, { "epoch": 0.1320111345887353, "grad_norm": 1662.8267822265625, "learning_rate": 9.97131757754129e-06, "loss": 21.3175, "step": 65350 }, { "epoch": 0.13203133522141913, "grad_norm": 412.65484619140625, "learning_rate": 9.971280229963026e-06, "loss": 20.529, "step": 65360 }, { "epoch": 0.13205153585410295, "grad_norm": 1063.288330078125, "learning_rate": 9.971242858155344e-06, "loss": 23.6185, "step": 65370 }, { "epoch": 0.13207173648678677, "grad_norm": 390.1273498535156, "learning_rate": 9.971205462118427e-06, "loss": 27.4855, "step": 65380 }, { "epoch": 0.1320919371194706, "grad_norm": 226.97308349609375, "learning_rate": 9.971168041852456e-06, "loss": 23.0615, "step": 65390 }, { "epoch": 0.13211213775215438, "grad_norm": 207.3575439453125, "learning_rate": 9.971130597357618e-06, "loss": 19.5395, "step": 65400 }, { "epoch": 0.1321323383848382, "grad_norm": 101.44998931884766, "learning_rate": 9.97109312863409e-06, "loss": 15.154, "step": 65410 }, { "epoch": 0.13215253901752202, "grad_norm": 793.5848388671875, "learning_rate": 9.971055635682059e-06, "loss": 36.1369, "step": 65420 }, { "epoch": 0.13217273965020584, "grad_norm": 382.6589660644531, "learning_rate": 9.971018118501706e-06, "loss": 31.8379, "step": 65430 }, { "epoch": 0.13219294028288966, "grad_norm": 679.6033325195312, "learning_rate": 9.970980577093212e-06, "loss": 30.3324, "step": 65440 }, { "epoch": 0.13221314091557348, "grad_norm": 203.62864685058594, "learning_rate": 9.970943011456762e-06, "loss": 21.4908, "step": 65450 }, { "epoch": 0.1322333415482573, "grad_norm": 303.47442626953125, "learning_rate": 9.970905421592538e-06, "loss": 24.6235, "step": 65460 }, { "epoch": 0.1322535421809411, "grad_norm": 98.95892333984375, "learning_rate": 9.970867807500725e-06, "loss": 17.3525, "step": 65470 }, { "epoch": 0.13227374281362492, "grad_norm": 339.5857849121094, "learning_rate": 9.970830169181504e-06, "loss": 23.0931, "step": 65480 }, { "epoch": 0.13229394344630874, "grad_norm": 434.018798828125, "learning_rate": 9.97079250663506e-06, "loss": 21.3534, "step": 65490 }, { "epoch": 0.13231414407899256, "grad_norm": 775.967041015625, "learning_rate": 9.970754819861577e-06, "loss": 38.1361, "step": 65500 }, { "epoch": 0.13233434471167638, "grad_norm": 294.7892761230469, "learning_rate": 9.97071710886124e-06, "loss": 41.3853, "step": 65510 }, { "epoch": 0.1323545453443602, "grad_norm": 288.3537902832031, "learning_rate": 9.970679373634227e-06, "loss": 23.1883, "step": 65520 }, { "epoch": 0.132374745977044, "grad_norm": 668.0358276367188, "learning_rate": 9.970641614180727e-06, "loss": 28.7483, "step": 65530 }, { "epoch": 0.1323949466097278, "grad_norm": 594.7171630859375, "learning_rate": 9.970603830500923e-06, "loss": 24.1122, "step": 65540 }, { "epoch": 0.13241514724241163, "grad_norm": 376.3564453125, "learning_rate": 9.970566022594996e-06, "loss": 15.4006, "step": 65550 }, { "epoch": 0.13243534787509545, "grad_norm": 929.987548828125, "learning_rate": 9.970528190463136e-06, "loss": 26.8913, "step": 65560 }, { "epoch": 0.13245554850777927, "grad_norm": 254.50448608398438, "learning_rate": 9.970490334105525e-06, "loss": 28.261, "step": 65570 }, { "epoch": 0.1324757491404631, "grad_norm": 626.42822265625, "learning_rate": 9.970452453522344e-06, "loss": 41.1738, "step": 65580 }, { "epoch": 0.13249594977314688, "grad_norm": 231.1833953857422, "learning_rate": 9.970414548713783e-06, "loss": 19.4747, "step": 65590 }, { "epoch": 0.1325161504058307, "grad_norm": 348.5114440917969, "learning_rate": 9.970376619680024e-06, "loss": 26.3212, "step": 65600 }, { "epoch": 0.13253635103851452, "grad_norm": 442.5945129394531, "learning_rate": 9.970338666421251e-06, "loss": 23.4635, "step": 65610 }, { "epoch": 0.13255655167119834, "grad_norm": 1400.3162841796875, "learning_rate": 9.970300688937651e-06, "loss": 51.6835, "step": 65620 }, { "epoch": 0.13257675230388216, "grad_norm": 606.9361572265625, "learning_rate": 9.970262687229409e-06, "loss": 30.7905, "step": 65630 }, { "epoch": 0.13259695293656598, "grad_norm": 134.6631622314453, "learning_rate": 9.970224661296708e-06, "loss": 12.5447, "step": 65640 }, { "epoch": 0.1326171535692498, "grad_norm": 940.1969604492188, "learning_rate": 9.970186611139736e-06, "loss": 19.8421, "step": 65650 }, { "epoch": 0.1326373542019336, "grad_norm": 123.28846740722656, "learning_rate": 9.970148536758678e-06, "loss": 29.372, "step": 65660 }, { "epoch": 0.1326575548346174, "grad_norm": 271.18011474609375, "learning_rate": 9.970110438153717e-06, "loss": 55.829, "step": 65670 }, { "epoch": 0.13267775546730123, "grad_norm": 936.2969970703125, "learning_rate": 9.970072315325041e-06, "loss": 37.135, "step": 65680 }, { "epoch": 0.13269795609998505, "grad_norm": 471.40313720703125, "learning_rate": 9.970034168272835e-06, "loss": 30.9649, "step": 65690 }, { "epoch": 0.13271815673266887, "grad_norm": 281.6181335449219, "learning_rate": 9.969995996997285e-06, "loss": 45.4568, "step": 65700 }, { "epoch": 0.1327383573653527, "grad_norm": 494.97882080078125, "learning_rate": 9.96995780149858e-06, "loss": 17.6761, "step": 65710 }, { "epoch": 0.13275855799803649, "grad_norm": 258.3773193359375, "learning_rate": 9.969919581776902e-06, "loss": 58.6749, "step": 65720 }, { "epoch": 0.1327787586307203, "grad_norm": 736.1028442382812, "learning_rate": 9.969881337832437e-06, "loss": 29.7768, "step": 65730 }, { "epoch": 0.13279895926340413, "grad_norm": 171.40736389160156, "learning_rate": 9.969843069665375e-06, "loss": 20.6034, "step": 65740 }, { "epoch": 0.13281915989608795, "grad_norm": 645.6675415039062, "learning_rate": 9.9698047772759e-06, "loss": 25.7872, "step": 65750 }, { "epoch": 0.13283936052877177, "grad_norm": 761.6134033203125, "learning_rate": 9.969766460664199e-06, "loss": 23.8913, "step": 65760 }, { "epoch": 0.13285956116145559, "grad_norm": 463.9010925292969, "learning_rate": 9.96972811983046e-06, "loss": 29.2706, "step": 65770 }, { "epoch": 0.1328797617941394, "grad_norm": 744.6932983398438, "learning_rate": 9.969689754774868e-06, "loss": 41.1497, "step": 65780 }, { "epoch": 0.1328999624268232, "grad_norm": 113.17998504638672, "learning_rate": 9.96965136549761e-06, "loss": 24.2846, "step": 65790 }, { "epoch": 0.13292016305950702, "grad_norm": 502.0255126953125, "learning_rate": 9.969612951998874e-06, "loss": 18.2136, "step": 65800 }, { "epoch": 0.13294036369219084, "grad_norm": 218.131103515625, "learning_rate": 9.96957451427885e-06, "loss": 12.6895, "step": 65810 }, { "epoch": 0.13296056432487466, "grad_norm": 779.2266845703125, "learning_rate": 9.96953605233772e-06, "loss": 29.5333, "step": 65820 }, { "epoch": 0.13298076495755848, "grad_norm": 357.7973937988281, "learning_rate": 9.969497566175675e-06, "loss": 24.1586, "step": 65830 }, { "epoch": 0.1330009655902423, "grad_norm": 298.0308837890625, "learning_rate": 9.969459055792903e-06, "loss": 25.5972, "step": 65840 }, { "epoch": 0.1330211662229261, "grad_norm": 641.5197143554688, "learning_rate": 9.969420521189587e-06, "loss": 49.9216, "step": 65850 }, { "epoch": 0.1330413668556099, "grad_norm": 617.6475219726562, "learning_rate": 9.96938196236592e-06, "loss": 26.327, "step": 65860 }, { "epoch": 0.13306156748829373, "grad_norm": 931.670654296875, "learning_rate": 9.96934337932209e-06, "loss": 33.2053, "step": 65870 }, { "epoch": 0.13308176812097755, "grad_norm": 334.64727783203125, "learning_rate": 9.969304772058279e-06, "loss": 30.9071, "step": 65880 }, { "epoch": 0.13310196875366137, "grad_norm": 1005.65576171875, "learning_rate": 9.969266140574682e-06, "loss": 18.8181, "step": 65890 }, { "epoch": 0.1331221693863452, "grad_norm": 423.1051940917969, "learning_rate": 9.969227484871485e-06, "loss": 38.5354, "step": 65900 }, { "epoch": 0.13314237001902898, "grad_norm": 172.60694885253906, "learning_rate": 9.969188804948872e-06, "loss": 26.1588, "step": 65910 }, { "epoch": 0.1331625706517128, "grad_norm": 11.49144172668457, "learning_rate": 9.969150100807039e-06, "loss": 23.5129, "step": 65920 }, { "epoch": 0.13318277128439662, "grad_norm": 270.6551208496094, "learning_rate": 9.969111372446171e-06, "loss": 26.9372, "step": 65930 }, { "epoch": 0.13320297191708044, "grad_norm": 278.15399169921875, "learning_rate": 9.969072619866455e-06, "loss": 27.4141, "step": 65940 }, { "epoch": 0.13322317254976426, "grad_norm": 480.8581848144531, "learning_rate": 9.969033843068083e-06, "loss": 18.4563, "step": 65950 }, { "epoch": 0.13324337318244808, "grad_norm": 7.052169322967529, "learning_rate": 9.968995042051244e-06, "loss": 26.8215, "step": 65960 }, { "epoch": 0.1332635738151319, "grad_norm": 3.9065651893615723, "learning_rate": 9.968956216816123e-06, "loss": 25.9901, "step": 65970 }, { "epoch": 0.1332837744478157, "grad_norm": 306.3456115722656, "learning_rate": 9.968917367362914e-06, "loss": 20.442, "step": 65980 }, { "epoch": 0.13330397508049951, "grad_norm": 314.388427734375, "learning_rate": 9.968878493691803e-06, "loss": 22.7953, "step": 65990 }, { "epoch": 0.13332417571318333, "grad_norm": 373.8533020019531, "learning_rate": 9.968839595802982e-06, "loss": 40.5255, "step": 66000 }, { "epoch": 0.13334437634586715, "grad_norm": 40.60340881347656, "learning_rate": 9.968800673696638e-06, "loss": 25.608, "step": 66010 }, { "epoch": 0.13336457697855097, "grad_norm": 156.1980438232422, "learning_rate": 9.968761727372965e-06, "loss": 19.8583, "step": 66020 }, { "epoch": 0.1333847776112348, "grad_norm": 496.1346130371094, "learning_rate": 9.968722756832148e-06, "loss": 28.0034, "step": 66030 }, { "epoch": 0.1334049782439186, "grad_norm": 536.6175537109375, "learning_rate": 9.96868376207438e-06, "loss": 25.7999, "step": 66040 }, { "epoch": 0.1334251788766024, "grad_norm": 595.9449462890625, "learning_rate": 9.968644743099848e-06, "loss": 17.8054, "step": 66050 }, { "epoch": 0.13344537950928623, "grad_norm": 683.5084838867188, "learning_rate": 9.968605699908747e-06, "loss": 17.1145, "step": 66060 }, { "epoch": 0.13346558014197005, "grad_norm": 745.710205078125, "learning_rate": 9.968566632501262e-06, "loss": 36.371, "step": 66070 }, { "epoch": 0.13348578077465387, "grad_norm": 324.8899230957031, "learning_rate": 9.968527540877586e-06, "loss": 27.8732, "step": 66080 }, { "epoch": 0.1335059814073377, "grad_norm": 488.1936950683594, "learning_rate": 9.96848842503791e-06, "loss": 18.805, "step": 66090 }, { "epoch": 0.1335261820400215, "grad_norm": 332.4110412597656, "learning_rate": 9.968449284982424e-06, "loss": 25.6441, "step": 66100 }, { "epoch": 0.1335463826727053, "grad_norm": 494.11724853515625, "learning_rate": 9.968410120711321e-06, "loss": 24.8029, "step": 66110 }, { "epoch": 0.13356658330538912, "grad_norm": 397.534912109375, "learning_rate": 9.968370932224787e-06, "loss": 35.6343, "step": 66120 }, { "epoch": 0.13358678393807294, "grad_norm": 884.9349975585938, "learning_rate": 9.968331719523015e-06, "loss": 18.9939, "step": 66130 }, { "epoch": 0.13360698457075676, "grad_norm": 329.7498779296875, "learning_rate": 9.968292482606199e-06, "loss": 22.9038, "step": 66140 }, { "epoch": 0.13362718520344058, "grad_norm": 852.6181030273438, "learning_rate": 9.968253221474527e-06, "loss": 42.3823, "step": 66150 }, { "epoch": 0.1336473858361244, "grad_norm": 490.7461242675781, "learning_rate": 9.96821393612819e-06, "loss": 19.3947, "step": 66160 }, { "epoch": 0.1336675864688082, "grad_norm": 0.002593101467937231, "learning_rate": 9.968174626567382e-06, "loss": 13.4255, "step": 66170 }, { "epoch": 0.133687787101492, "grad_norm": 390.745361328125, "learning_rate": 9.968135292792294e-06, "loss": 41.6445, "step": 66180 }, { "epoch": 0.13370798773417583, "grad_norm": 767.7255859375, "learning_rate": 9.968095934803116e-06, "loss": 36.6521, "step": 66190 }, { "epoch": 0.13372818836685965, "grad_norm": 296.0707702636719, "learning_rate": 9.968056552600043e-06, "loss": 31.3354, "step": 66200 }, { "epoch": 0.13374838899954347, "grad_norm": 317.9017639160156, "learning_rate": 9.968017146183263e-06, "loss": 29.1129, "step": 66210 }, { "epoch": 0.1337685896322273, "grad_norm": 4.721127986907959, "learning_rate": 9.967977715552972e-06, "loss": 34.9939, "step": 66220 }, { "epoch": 0.13378879026491108, "grad_norm": 666.0792846679688, "learning_rate": 9.967938260709357e-06, "loss": 34.0891, "step": 66230 }, { "epoch": 0.1338089908975949, "grad_norm": 283.35662841796875, "learning_rate": 9.967898781652616e-06, "loss": 16.9283, "step": 66240 }, { "epoch": 0.13382919153027872, "grad_norm": 798.3267822265625, "learning_rate": 9.967859278382939e-06, "loss": 18.6469, "step": 66250 }, { "epoch": 0.13384939216296254, "grad_norm": 0.20100249350070953, "learning_rate": 9.967819750900517e-06, "loss": 33.2351, "step": 66260 }, { "epoch": 0.13386959279564636, "grad_norm": 469.26519775390625, "learning_rate": 9.967780199205544e-06, "loss": 23.6421, "step": 66270 }, { "epoch": 0.13388979342833018, "grad_norm": 317.48565673828125, "learning_rate": 9.967740623298214e-06, "loss": 29.4715, "step": 66280 }, { "epoch": 0.133909994061014, "grad_norm": 375.7717590332031, "learning_rate": 9.967701023178717e-06, "loss": 22.9257, "step": 66290 }, { "epoch": 0.1339301946936978, "grad_norm": 761.9866943359375, "learning_rate": 9.96766139884725e-06, "loss": 24.6845, "step": 66300 }, { "epoch": 0.13395039532638162, "grad_norm": 102.87804412841797, "learning_rate": 9.967621750304002e-06, "loss": 26.4663, "step": 66310 }, { "epoch": 0.13397059595906544, "grad_norm": 135.01455688476562, "learning_rate": 9.96758207754917e-06, "loss": 22.0731, "step": 66320 }, { "epoch": 0.13399079659174926, "grad_norm": 464.4028015136719, "learning_rate": 9.967542380582944e-06, "loss": 32.7973, "step": 66330 }, { "epoch": 0.13401099722443308, "grad_norm": 1074.724365234375, "learning_rate": 9.96750265940552e-06, "loss": 21.3063, "step": 66340 }, { "epoch": 0.1340311978571169, "grad_norm": 1100.5205078125, "learning_rate": 9.967462914017087e-06, "loss": 26.3388, "step": 66350 }, { "epoch": 0.1340513984898007, "grad_norm": 665.2679443359375, "learning_rate": 9.967423144417847e-06, "loss": 17.4782, "step": 66360 }, { "epoch": 0.1340715991224845, "grad_norm": 401.9553527832031, "learning_rate": 9.967383350607986e-06, "loss": 20.9634, "step": 66370 }, { "epoch": 0.13409179975516833, "grad_norm": 857.6044921875, "learning_rate": 9.967343532587701e-06, "loss": 25.9167, "step": 66380 }, { "epoch": 0.13411200038785215, "grad_norm": 273.08056640625, "learning_rate": 9.967303690357189e-06, "loss": 22.9494, "step": 66390 }, { "epoch": 0.13413220102053597, "grad_norm": 339.6724853515625, "learning_rate": 9.967263823916638e-06, "loss": 22.062, "step": 66400 }, { "epoch": 0.1341524016532198, "grad_norm": 499.204345703125, "learning_rate": 9.967223933266247e-06, "loss": 15.3817, "step": 66410 }, { "epoch": 0.1341726022859036, "grad_norm": 450.8316345214844, "learning_rate": 9.96718401840621e-06, "loss": 19.4555, "step": 66420 }, { "epoch": 0.1341928029185874, "grad_norm": 155.22021484375, "learning_rate": 9.96714407933672e-06, "loss": 22.9686, "step": 66430 }, { "epoch": 0.13421300355127122, "grad_norm": 405.15850830078125, "learning_rate": 9.96710411605797e-06, "loss": 12.4172, "step": 66440 }, { "epoch": 0.13423320418395504, "grad_norm": 691.965087890625, "learning_rate": 9.96706412857016e-06, "loss": 33.4339, "step": 66450 }, { "epoch": 0.13425340481663886, "grad_norm": 133.9887237548828, "learning_rate": 9.967024116873481e-06, "loss": 13.0019, "step": 66460 }, { "epoch": 0.13427360544932268, "grad_norm": 767.7424926757812, "learning_rate": 9.966984080968128e-06, "loss": 23.3655, "step": 66470 }, { "epoch": 0.1342938060820065, "grad_norm": 147.04881286621094, "learning_rate": 9.966944020854297e-06, "loss": 19.7351, "step": 66480 }, { "epoch": 0.1343140067146903, "grad_norm": 57.78780746459961, "learning_rate": 9.966903936532184e-06, "loss": 25.1124, "step": 66490 }, { "epoch": 0.1343342073473741, "grad_norm": 1054.9471435546875, "learning_rate": 9.966863828001982e-06, "loss": 30.6405, "step": 66500 }, { "epoch": 0.13435440798005793, "grad_norm": 239.92677307128906, "learning_rate": 9.96682369526389e-06, "loss": 21.5139, "step": 66510 }, { "epoch": 0.13437460861274175, "grad_norm": 739.2789306640625, "learning_rate": 9.966783538318101e-06, "loss": 20.7183, "step": 66520 }, { "epoch": 0.13439480924542557, "grad_norm": 720.2469482421875, "learning_rate": 9.966743357164812e-06, "loss": 41.5077, "step": 66530 }, { "epoch": 0.1344150098781094, "grad_norm": 863.4821166992188, "learning_rate": 9.966703151804219e-06, "loss": 20.7125, "step": 66540 }, { "epoch": 0.13443521051079318, "grad_norm": 230.131591796875, "learning_rate": 9.966662922236515e-06, "loss": 25.8556, "step": 66550 }, { "epoch": 0.134455411143477, "grad_norm": 183.84738159179688, "learning_rate": 9.966622668461899e-06, "loss": 26.4966, "step": 66560 }, { "epoch": 0.13447561177616082, "grad_norm": 575.3276977539062, "learning_rate": 9.966582390480567e-06, "loss": 18.8949, "step": 66570 }, { "epoch": 0.13449581240884464, "grad_norm": 283.83489990234375, "learning_rate": 9.966542088292714e-06, "loss": 32.3473, "step": 66580 }, { "epoch": 0.13451601304152846, "grad_norm": 466.7230529785156, "learning_rate": 9.96650176189854e-06, "loss": 23.7041, "step": 66590 }, { "epoch": 0.13453621367421228, "grad_norm": 392.8020324707031, "learning_rate": 9.966461411298235e-06, "loss": 22.3975, "step": 66600 }, { "epoch": 0.1345564143068961, "grad_norm": 194.2777557373047, "learning_rate": 9.966421036492003e-06, "loss": 10.7699, "step": 66610 }, { "epoch": 0.1345766149395799, "grad_norm": 1023.65185546875, "learning_rate": 9.966380637480034e-06, "loss": 32.0953, "step": 66620 }, { "epoch": 0.13459681557226372, "grad_norm": 158.03370666503906, "learning_rate": 9.96634021426253e-06, "loss": 24.5801, "step": 66630 }, { "epoch": 0.13461701620494754, "grad_norm": 229.6029510498047, "learning_rate": 9.966299766839685e-06, "loss": 42.3127, "step": 66640 }, { "epoch": 0.13463721683763136, "grad_norm": 418.8670654296875, "learning_rate": 9.966259295211698e-06, "loss": 14.8902, "step": 66650 }, { "epoch": 0.13465741747031518, "grad_norm": 314.76324462890625, "learning_rate": 9.966218799378766e-06, "loss": 35.0342, "step": 66660 }, { "epoch": 0.134677618102999, "grad_norm": 417.505615234375, "learning_rate": 9.966178279341084e-06, "loss": 50.4991, "step": 66670 }, { "epoch": 0.1346978187356828, "grad_norm": 613.02197265625, "learning_rate": 9.966137735098853e-06, "loss": 39.8462, "step": 66680 }, { "epoch": 0.1347180193683666, "grad_norm": 2.145066261291504, "learning_rate": 9.966097166652268e-06, "loss": 27.7255, "step": 66690 }, { "epoch": 0.13473822000105043, "grad_norm": 361.9964904785156, "learning_rate": 9.966056574001528e-06, "loss": 23.7623, "step": 66700 }, { "epoch": 0.13475842063373425, "grad_norm": 327.3876037597656, "learning_rate": 9.966015957146832e-06, "loss": 28.5767, "step": 66710 }, { "epoch": 0.13477862126641807, "grad_norm": 237.48236083984375, "learning_rate": 9.965975316088377e-06, "loss": 16.9549, "step": 66720 }, { "epoch": 0.1347988218991019, "grad_norm": 196.621337890625, "learning_rate": 9.96593465082636e-06, "loss": 26.1039, "step": 66730 }, { "epoch": 0.1348190225317857, "grad_norm": 177.80996704101562, "learning_rate": 9.965893961360977e-06, "loss": 18.6695, "step": 66740 }, { "epoch": 0.1348392231644695, "grad_norm": 460.58953857421875, "learning_rate": 9.965853247692433e-06, "loss": 30.0823, "step": 66750 }, { "epoch": 0.13485942379715332, "grad_norm": 277.8708801269531, "learning_rate": 9.965812509820918e-06, "loss": 13.5059, "step": 66760 }, { "epoch": 0.13487962442983714, "grad_norm": 351.21954345703125, "learning_rate": 9.965771747746638e-06, "loss": 29.4535, "step": 66770 }, { "epoch": 0.13489982506252096, "grad_norm": 301.07952880859375, "learning_rate": 9.96573096146979e-06, "loss": 11.7858, "step": 66780 }, { "epoch": 0.13492002569520478, "grad_norm": 314.43585205078125, "learning_rate": 9.96569015099057e-06, "loss": 24.5155, "step": 66790 }, { "epoch": 0.1349402263278886, "grad_norm": 499.5002746582031, "learning_rate": 9.965649316309178e-06, "loss": 13.3254, "step": 66800 }, { "epoch": 0.1349604269605724, "grad_norm": 196.3819580078125, "learning_rate": 9.965608457425813e-06, "loss": 16.0215, "step": 66810 }, { "epoch": 0.1349806275932562, "grad_norm": 305.2103576660156, "learning_rate": 9.965567574340676e-06, "loss": 20.4752, "step": 66820 }, { "epoch": 0.13500082822594003, "grad_norm": 798.6466064453125, "learning_rate": 9.965526667053964e-06, "loss": 36.875, "step": 66830 }, { "epoch": 0.13502102885862385, "grad_norm": 1179.1925048828125, "learning_rate": 9.965485735565878e-06, "loss": 32.7117, "step": 66840 }, { "epoch": 0.13504122949130767, "grad_norm": 803.5941772460938, "learning_rate": 9.965444779876618e-06, "loss": 26.9367, "step": 66850 }, { "epoch": 0.1350614301239915, "grad_norm": 504.97418212890625, "learning_rate": 9.96540379998638e-06, "loss": 46.2716, "step": 66860 }, { "epoch": 0.13508163075667529, "grad_norm": 356.00054931640625, "learning_rate": 9.965362795895368e-06, "loss": 26.5936, "step": 66870 }, { "epoch": 0.1351018313893591, "grad_norm": 173.8934783935547, "learning_rate": 9.965321767603778e-06, "loss": 24.5727, "step": 66880 }, { "epoch": 0.13512203202204293, "grad_norm": 603.3222045898438, "learning_rate": 9.965280715111814e-06, "loss": 22.2474, "step": 66890 }, { "epoch": 0.13514223265472675, "grad_norm": 259.8135986328125, "learning_rate": 9.965239638419673e-06, "loss": 37.9913, "step": 66900 }, { "epoch": 0.13516243328741057, "grad_norm": 754.8200073242188, "learning_rate": 9.965198537527556e-06, "loss": 24.2608, "step": 66910 }, { "epoch": 0.13518263392009439, "grad_norm": 705.2947387695312, "learning_rate": 9.965157412435663e-06, "loss": 18.6022, "step": 66920 }, { "epoch": 0.1352028345527782, "grad_norm": 501.9771728515625, "learning_rate": 9.965116263144196e-06, "loss": 19.7913, "step": 66930 }, { "epoch": 0.135223035185462, "grad_norm": 1352.078369140625, "learning_rate": 9.965075089653354e-06, "loss": 37.3073, "step": 66940 }, { "epoch": 0.13524323581814582, "grad_norm": 667.0462646484375, "learning_rate": 9.965033891963338e-06, "loss": 16.9303, "step": 66950 }, { "epoch": 0.13526343645082964, "grad_norm": 216.81114196777344, "learning_rate": 9.96499267007435e-06, "loss": 19.2859, "step": 66960 }, { "epoch": 0.13528363708351346, "grad_norm": 225.5086212158203, "learning_rate": 9.964951423986588e-06, "loss": 11.4455, "step": 66970 }, { "epoch": 0.13530383771619728, "grad_norm": 167.20376586914062, "learning_rate": 9.964910153700258e-06, "loss": 36.076, "step": 66980 }, { "epoch": 0.1353240383488811, "grad_norm": 449.9119567871094, "learning_rate": 9.964868859215555e-06, "loss": 14.487, "step": 66990 }, { "epoch": 0.1353442389815649, "grad_norm": 344.0535888671875, "learning_rate": 9.964827540532685e-06, "loss": 21.6166, "step": 67000 }, { "epoch": 0.1353644396142487, "grad_norm": 555.558837890625, "learning_rate": 9.964786197651848e-06, "loss": 40.5872, "step": 67010 }, { "epoch": 0.13538464024693253, "grad_norm": 415.7837219238281, "learning_rate": 9.964744830573245e-06, "loss": 29.0629, "step": 67020 }, { "epoch": 0.13540484087961635, "grad_norm": 190.95452880859375, "learning_rate": 9.964703439297076e-06, "loss": 38.2105, "step": 67030 }, { "epoch": 0.13542504151230017, "grad_norm": 420.1988830566406, "learning_rate": 9.964662023823548e-06, "loss": 39.1838, "step": 67040 }, { "epoch": 0.135445242144984, "grad_norm": 630.7103881835938, "learning_rate": 9.964620584152858e-06, "loss": 21.7209, "step": 67050 }, { "epoch": 0.13546544277766778, "grad_norm": 555.09765625, "learning_rate": 9.964579120285208e-06, "loss": 38.1503, "step": 67060 }, { "epoch": 0.1354856434103516, "grad_norm": 399.41827392578125, "learning_rate": 9.964537632220801e-06, "loss": 16.5488, "step": 67070 }, { "epoch": 0.13550584404303542, "grad_norm": 541.5283203125, "learning_rate": 9.964496119959842e-06, "loss": 24.1675, "step": 67080 }, { "epoch": 0.13552604467571924, "grad_norm": 724.1858520507812, "learning_rate": 9.96445458350253e-06, "loss": 18.6799, "step": 67090 }, { "epoch": 0.13554624530840306, "grad_norm": 88.37238311767578, "learning_rate": 9.964413022849069e-06, "loss": 15.1643, "step": 67100 }, { "epoch": 0.13556644594108688, "grad_norm": 329.0193176269531, "learning_rate": 9.964371437999661e-06, "loss": 16.1981, "step": 67110 }, { "epoch": 0.1355866465737707, "grad_norm": 1264.38818359375, "learning_rate": 9.96432982895451e-06, "loss": 26.0559, "step": 67120 }, { "epoch": 0.1356068472064545, "grad_norm": 255.5403289794922, "learning_rate": 9.964288195713814e-06, "loss": 18.634, "step": 67130 }, { "epoch": 0.13562704783913831, "grad_norm": 536.2504272460938, "learning_rate": 9.964246538277782e-06, "loss": 21.1002, "step": 67140 }, { "epoch": 0.13564724847182213, "grad_norm": 286.3037109375, "learning_rate": 9.964204856646613e-06, "loss": 33.4804, "step": 67150 }, { "epoch": 0.13566744910450595, "grad_norm": 717.703369140625, "learning_rate": 9.964163150820512e-06, "loss": 27.5898, "step": 67160 }, { "epoch": 0.13568764973718977, "grad_norm": 630.5046997070312, "learning_rate": 9.964121420799682e-06, "loss": 21.9463, "step": 67170 }, { "epoch": 0.1357078503698736, "grad_norm": 434.26190185546875, "learning_rate": 9.964079666584327e-06, "loss": 28.9183, "step": 67180 }, { "epoch": 0.1357280510025574, "grad_norm": 521.5183715820312, "learning_rate": 9.96403788817465e-06, "loss": 27.702, "step": 67190 }, { "epoch": 0.1357482516352412, "grad_norm": 243.92129516601562, "learning_rate": 9.963996085570854e-06, "loss": 20.1031, "step": 67200 }, { "epoch": 0.13576845226792503, "grad_norm": 271.4411926269531, "learning_rate": 9.963954258773143e-06, "loss": 30.0734, "step": 67210 }, { "epoch": 0.13578865290060885, "grad_norm": 767.510009765625, "learning_rate": 9.963912407781721e-06, "loss": 40.2711, "step": 67220 }, { "epoch": 0.13580885353329267, "grad_norm": 553.5968627929688, "learning_rate": 9.963870532596791e-06, "loss": 24.5075, "step": 67230 }, { "epoch": 0.1358290541659765, "grad_norm": 15.026395797729492, "learning_rate": 9.96382863321856e-06, "loss": 34.9113, "step": 67240 }, { "epoch": 0.1358492547986603, "grad_norm": 391.4563293457031, "learning_rate": 9.963786709647228e-06, "loss": 21.575, "step": 67250 }, { "epoch": 0.1358694554313441, "grad_norm": 361.42437744140625, "learning_rate": 9.963744761883003e-06, "loss": 30.4524, "step": 67260 }, { "epoch": 0.13588965606402792, "grad_norm": 395.6061096191406, "learning_rate": 9.963702789926089e-06, "loss": 26.6612, "step": 67270 }, { "epoch": 0.13590985669671174, "grad_norm": 1625.7520751953125, "learning_rate": 9.963660793776689e-06, "loss": 28.8253, "step": 67280 }, { "epoch": 0.13593005732939556, "grad_norm": 562.817626953125, "learning_rate": 9.963618773435006e-06, "loss": 28.3572, "step": 67290 }, { "epoch": 0.13595025796207938, "grad_norm": 445.7143859863281, "learning_rate": 9.96357672890125e-06, "loss": 55.1681, "step": 67300 }, { "epoch": 0.1359704585947632, "grad_norm": 429.44384765625, "learning_rate": 9.963534660175622e-06, "loss": 18.6562, "step": 67310 }, { "epoch": 0.135990659227447, "grad_norm": 275.9927062988281, "learning_rate": 9.963492567258327e-06, "loss": 30.0211, "step": 67320 }, { "epoch": 0.1360108598601308, "grad_norm": 510.2088928222656, "learning_rate": 9.963450450149572e-06, "loss": 19.8133, "step": 67330 }, { "epoch": 0.13603106049281463, "grad_norm": 1253.2021484375, "learning_rate": 9.963408308849563e-06, "loss": 38.2961, "step": 67340 }, { "epoch": 0.13605126112549845, "grad_norm": 325.6735534667969, "learning_rate": 9.963366143358502e-06, "loss": 30.1534, "step": 67350 }, { "epoch": 0.13607146175818227, "grad_norm": 878.3908081054688, "learning_rate": 9.963323953676599e-06, "loss": 31.5037, "step": 67360 }, { "epoch": 0.1360916623908661, "grad_norm": 10.928342819213867, "learning_rate": 9.963281739804054e-06, "loss": 39.6315, "step": 67370 }, { "epoch": 0.13611186302354988, "grad_norm": 400.7942810058594, "learning_rate": 9.963239501741076e-06, "loss": 27.5854, "step": 67380 }, { "epoch": 0.1361320636562337, "grad_norm": 572.9030151367188, "learning_rate": 9.963197239487871e-06, "loss": 14.9577, "step": 67390 }, { "epoch": 0.13615226428891752, "grad_norm": 394.2555236816406, "learning_rate": 9.963154953044646e-06, "loss": 21.3841, "step": 67400 }, { "epoch": 0.13617246492160134, "grad_norm": 450.9844055175781, "learning_rate": 9.963112642411606e-06, "loss": 17.7634, "step": 67410 }, { "epoch": 0.13619266555428516, "grad_norm": 335.7803649902344, "learning_rate": 9.963070307588955e-06, "loss": 16.0571, "step": 67420 }, { "epoch": 0.13621286618696898, "grad_norm": 833.5035400390625, "learning_rate": 9.963027948576902e-06, "loss": 41.7651, "step": 67430 }, { "epoch": 0.1362330668196528, "grad_norm": 54.94221115112305, "learning_rate": 9.96298556537565e-06, "loss": 18.6648, "step": 67440 }, { "epoch": 0.1362532674523366, "grad_norm": 461.9586181640625, "learning_rate": 9.962943157985412e-06, "loss": 54.466, "step": 67450 }, { "epoch": 0.13627346808502042, "grad_norm": 416.55413818359375, "learning_rate": 9.96290072640639e-06, "loss": 25.2187, "step": 67460 }, { "epoch": 0.13629366871770424, "grad_norm": 100.7286148071289, "learning_rate": 9.962858270638793e-06, "loss": 27.7973, "step": 67470 }, { "epoch": 0.13631386935038806, "grad_norm": 581.2256469726562, "learning_rate": 9.962815790682825e-06, "loss": 18.5809, "step": 67480 }, { "epoch": 0.13633406998307188, "grad_norm": 106.12641906738281, "learning_rate": 9.962773286538696e-06, "loss": 12.7785, "step": 67490 }, { "epoch": 0.1363542706157557, "grad_norm": 380.1458740234375, "learning_rate": 9.962730758206612e-06, "loss": 23.1059, "step": 67500 }, { "epoch": 0.1363744712484395, "grad_norm": 300.8750305175781, "learning_rate": 9.962688205686778e-06, "loss": 22.584, "step": 67510 }, { "epoch": 0.1363946718811233, "grad_norm": 13.970352172851562, "learning_rate": 9.962645628979406e-06, "loss": 30.345, "step": 67520 }, { "epoch": 0.13641487251380713, "grad_norm": 391.0465393066406, "learning_rate": 9.962603028084699e-06, "loss": 15.2943, "step": 67530 }, { "epoch": 0.13643507314649095, "grad_norm": 475.7255859375, "learning_rate": 9.962560403002868e-06, "loss": 33.4532, "step": 67540 }, { "epoch": 0.13645527377917477, "grad_norm": 274.9440612792969, "learning_rate": 9.96251775373412e-06, "loss": 27.1277, "step": 67550 }, { "epoch": 0.1364754744118586, "grad_norm": 575.31396484375, "learning_rate": 9.962475080278662e-06, "loss": 19.352, "step": 67560 }, { "epoch": 0.1364956750445424, "grad_norm": 339.14263916015625, "learning_rate": 9.9624323826367e-06, "loss": 12.814, "step": 67570 }, { "epoch": 0.1365158756772262, "grad_norm": 429.7268371582031, "learning_rate": 9.962389660808447e-06, "loss": 25.122, "step": 67580 }, { "epoch": 0.13653607630991002, "grad_norm": 527.354736328125, "learning_rate": 9.96234691479411e-06, "loss": 38.8722, "step": 67590 }, { "epoch": 0.13655627694259384, "grad_norm": 259.0694274902344, "learning_rate": 9.962304144593893e-06, "loss": 24.0669, "step": 67600 }, { "epoch": 0.13657647757527766, "grad_norm": 2309.948974609375, "learning_rate": 9.962261350208008e-06, "loss": 27.341, "step": 67610 }, { "epoch": 0.13659667820796148, "grad_norm": 381.55816650390625, "learning_rate": 9.962218531636664e-06, "loss": 51.6446, "step": 67620 }, { "epoch": 0.1366168788406453, "grad_norm": 0.0, "learning_rate": 9.962175688880067e-06, "loss": 24.9652, "step": 67630 }, { "epoch": 0.1366370794733291, "grad_norm": 463.18341064453125, "learning_rate": 9.96213282193843e-06, "loss": 11.8149, "step": 67640 }, { "epoch": 0.1366572801060129, "grad_norm": 388.6172180175781, "learning_rate": 9.962089930811959e-06, "loss": 29.2907, "step": 67650 }, { "epoch": 0.13667748073869673, "grad_norm": 566.1581420898438, "learning_rate": 9.962047015500861e-06, "loss": 38.5838, "step": 67660 }, { "epoch": 0.13669768137138055, "grad_norm": 1256.208984375, "learning_rate": 9.96200407600535e-06, "loss": 28.8538, "step": 67670 }, { "epoch": 0.13671788200406437, "grad_norm": 573.28955078125, "learning_rate": 9.961961112325633e-06, "loss": 22.7216, "step": 67680 }, { "epoch": 0.1367380826367482, "grad_norm": 205.13772583007812, "learning_rate": 9.961918124461918e-06, "loss": 26.4684, "step": 67690 }, { "epoch": 0.13675828326943198, "grad_norm": 352.84588623046875, "learning_rate": 9.961875112414417e-06, "loss": 22.3335, "step": 67700 }, { "epoch": 0.1367784839021158, "grad_norm": 215.87574768066406, "learning_rate": 9.961832076183337e-06, "loss": 30.561, "step": 67710 }, { "epoch": 0.13679868453479962, "grad_norm": 438.83349609375, "learning_rate": 9.96178901576889e-06, "loss": 18.9253, "step": 67720 }, { "epoch": 0.13681888516748344, "grad_norm": 502.7414245605469, "learning_rate": 9.961745931171288e-06, "loss": 24.2419, "step": 67730 }, { "epoch": 0.13683908580016726, "grad_norm": 333.0059509277344, "learning_rate": 9.961702822390735e-06, "loss": 11.7589, "step": 67740 }, { "epoch": 0.13685928643285108, "grad_norm": 112.25717163085938, "learning_rate": 9.961659689427444e-06, "loss": 37.3995, "step": 67750 }, { "epoch": 0.1368794870655349, "grad_norm": 359.86376953125, "learning_rate": 9.961616532281626e-06, "loss": 34.1306, "step": 67760 }, { "epoch": 0.1368996876982187, "grad_norm": 108.73788452148438, "learning_rate": 9.961573350953491e-06, "loss": 18.536, "step": 67770 }, { "epoch": 0.13691988833090252, "grad_norm": 442.3098449707031, "learning_rate": 9.96153014544325e-06, "loss": 38.8789, "step": 67780 }, { "epoch": 0.13694008896358634, "grad_norm": 553.655029296875, "learning_rate": 9.961486915751114e-06, "loss": 22.1211, "step": 67790 }, { "epoch": 0.13696028959627016, "grad_norm": 427.0609436035156, "learning_rate": 9.96144366187729e-06, "loss": 22.2904, "step": 67800 }, { "epoch": 0.13698049022895398, "grad_norm": 374.5810852050781, "learning_rate": 9.961400383821992e-06, "loss": 18.3437, "step": 67810 }, { "epoch": 0.1370006908616378, "grad_norm": 619.0362548828125, "learning_rate": 9.96135708158543e-06, "loss": 28.4649, "step": 67820 }, { "epoch": 0.1370208914943216, "grad_norm": 703.8502197265625, "learning_rate": 9.961313755167816e-06, "loss": 21.8278, "step": 67830 }, { "epoch": 0.1370410921270054, "grad_norm": 651.2317504882812, "learning_rate": 9.961270404569358e-06, "loss": 40.7999, "step": 67840 }, { "epoch": 0.13706129275968923, "grad_norm": 545.5133666992188, "learning_rate": 9.961227029790272e-06, "loss": 17.8611, "step": 67850 }, { "epoch": 0.13708149339237305, "grad_norm": 575.947265625, "learning_rate": 9.961183630830768e-06, "loss": 22.0308, "step": 67860 }, { "epoch": 0.13710169402505687, "grad_norm": 690.7603759765625, "learning_rate": 9.961140207691055e-06, "loss": 34.1975, "step": 67870 }, { "epoch": 0.1371218946577407, "grad_norm": 673.443115234375, "learning_rate": 9.961096760371349e-06, "loss": 31.6265, "step": 67880 }, { "epoch": 0.1371420952904245, "grad_norm": 657.22900390625, "learning_rate": 9.961053288871855e-06, "loss": 28.9402, "step": 67890 }, { "epoch": 0.1371622959231083, "grad_norm": 200.78326416015625, "learning_rate": 9.961009793192793e-06, "loss": 20.546, "step": 67900 }, { "epoch": 0.13718249655579212, "grad_norm": 388.554931640625, "learning_rate": 9.96096627333437e-06, "loss": 35.5744, "step": 67910 }, { "epoch": 0.13720269718847594, "grad_norm": 165.8963623046875, "learning_rate": 9.960922729296797e-06, "loss": 33.9824, "step": 67920 }, { "epoch": 0.13722289782115976, "grad_norm": 621.302978515625, "learning_rate": 9.96087916108029e-06, "loss": 36.0635, "step": 67930 }, { "epoch": 0.13724309845384358, "grad_norm": 466.5898742675781, "learning_rate": 9.960835568685058e-06, "loss": 20.2203, "step": 67940 }, { "epoch": 0.1372632990865274, "grad_norm": 539.1677856445312, "learning_rate": 9.960791952111318e-06, "loss": 28.2711, "step": 67950 }, { "epoch": 0.1372834997192112, "grad_norm": 535.0186767578125, "learning_rate": 9.960748311359278e-06, "loss": 26.4194, "step": 67960 }, { "epoch": 0.137303700351895, "grad_norm": 375.3293762207031, "learning_rate": 9.96070464642915e-06, "loss": 21.6461, "step": 67970 }, { "epoch": 0.13732390098457883, "grad_norm": 550.9193725585938, "learning_rate": 9.960660957321153e-06, "loss": 41.7613, "step": 67980 }, { "epoch": 0.13734410161726265, "grad_norm": 271.54925537109375, "learning_rate": 9.960617244035495e-06, "loss": 40.3926, "step": 67990 }, { "epoch": 0.13736430224994647, "grad_norm": 734.3870239257812, "learning_rate": 9.960573506572391e-06, "loss": 30.0268, "step": 68000 }, { "epoch": 0.1373845028826303, "grad_norm": 708.679443359375, "learning_rate": 9.960529744932051e-06, "loss": 23.1979, "step": 68010 }, { "epoch": 0.13740470351531409, "grad_norm": 49.49264144897461, "learning_rate": 9.960485959114693e-06, "loss": 11.3027, "step": 68020 }, { "epoch": 0.1374249041479979, "grad_norm": 423.9950256347656, "learning_rate": 9.960442149120527e-06, "loss": 39.6836, "step": 68030 }, { "epoch": 0.13744510478068173, "grad_norm": 415.1904602050781, "learning_rate": 9.960398314949767e-06, "loss": 30.8742, "step": 68040 }, { "epoch": 0.13746530541336555, "grad_norm": 331.5396423339844, "learning_rate": 9.960354456602627e-06, "loss": 18.4558, "step": 68050 }, { "epoch": 0.13748550604604937, "grad_norm": 432.09698486328125, "learning_rate": 9.960310574079324e-06, "loss": 20.5393, "step": 68060 }, { "epoch": 0.13750570667873319, "grad_norm": 296.25335693359375, "learning_rate": 9.960266667380065e-06, "loss": 18.6186, "step": 68070 }, { "epoch": 0.137525907311417, "grad_norm": 695.0523071289062, "learning_rate": 9.96022273650507e-06, "loss": 22.6743, "step": 68080 }, { "epoch": 0.1375461079441008, "grad_norm": 382.1102294921875, "learning_rate": 9.96017878145455e-06, "loss": 35.8567, "step": 68090 }, { "epoch": 0.13756630857678462, "grad_norm": 228.53292846679688, "learning_rate": 9.960134802228722e-06, "loss": 22.0488, "step": 68100 }, { "epoch": 0.13758650920946844, "grad_norm": 258.3619384765625, "learning_rate": 9.960090798827798e-06, "loss": 21.9386, "step": 68110 }, { "epoch": 0.13760670984215226, "grad_norm": 43.63883972167969, "learning_rate": 9.960046771251991e-06, "loss": 30.7622, "step": 68120 }, { "epoch": 0.13762691047483608, "grad_norm": 0.0, "learning_rate": 9.96000271950152e-06, "loss": 19.6476, "step": 68130 }, { "epoch": 0.1376471111075199, "grad_norm": 451.0689392089844, "learning_rate": 9.959958643576597e-06, "loss": 18.3354, "step": 68140 }, { "epoch": 0.1376673117402037, "grad_norm": 537.6965942382812, "learning_rate": 9.959914543477436e-06, "loss": 17.0985, "step": 68150 }, { "epoch": 0.1376875123728875, "grad_norm": 432.7866516113281, "learning_rate": 9.959870419204253e-06, "loss": 32.9352, "step": 68160 }, { "epoch": 0.13770771300557133, "grad_norm": 923.4307250976562, "learning_rate": 9.959826270757265e-06, "loss": 22.7099, "step": 68170 }, { "epoch": 0.13772791363825515, "grad_norm": 331.7381896972656, "learning_rate": 9.959782098136683e-06, "loss": 32.172, "step": 68180 }, { "epoch": 0.13774811427093897, "grad_norm": 315.8378601074219, "learning_rate": 9.959737901342725e-06, "loss": 20.4848, "step": 68190 }, { "epoch": 0.1377683149036228, "grad_norm": 580.3365478515625, "learning_rate": 9.959693680375608e-06, "loss": 17.3681, "step": 68200 }, { "epoch": 0.1377885155363066, "grad_norm": 297.5736389160156, "learning_rate": 9.959649435235543e-06, "loss": 24.8473, "step": 68210 }, { "epoch": 0.1378087161689904, "grad_norm": 836.2515869140625, "learning_rate": 9.95960516592275e-06, "loss": 30.2962, "step": 68220 }, { "epoch": 0.13782891680167422, "grad_norm": 164.470458984375, "learning_rate": 9.959560872437443e-06, "loss": 23.1598, "step": 68230 }, { "epoch": 0.13784911743435804, "grad_norm": 827.936767578125, "learning_rate": 9.959516554779838e-06, "loss": 28.8193, "step": 68240 }, { "epoch": 0.13786931806704186, "grad_norm": 363.9051513671875, "learning_rate": 9.95947221295015e-06, "loss": 27.7335, "step": 68250 }, { "epoch": 0.13788951869972568, "grad_norm": 777.4404907226562, "learning_rate": 9.959427846948595e-06, "loss": 14.0809, "step": 68260 }, { "epoch": 0.1379097193324095, "grad_norm": 699.2410888671875, "learning_rate": 9.959383456775392e-06, "loss": 44.7876, "step": 68270 }, { "epoch": 0.1379299199650933, "grad_norm": 1100.128662109375, "learning_rate": 9.959339042430753e-06, "loss": 46.4882, "step": 68280 }, { "epoch": 0.13795012059777711, "grad_norm": 350.7457275390625, "learning_rate": 9.9592946039149e-06, "loss": 22.3283, "step": 68290 }, { "epoch": 0.13797032123046093, "grad_norm": 496.0047607421875, "learning_rate": 9.959250141228046e-06, "loss": 14.4258, "step": 68300 }, { "epoch": 0.13799052186314475, "grad_norm": 721.7589721679688, "learning_rate": 9.959205654370406e-06, "loss": 41.2446, "step": 68310 }, { "epoch": 0.13801072249582857, "grad_norm": 502.5924377441406, "learning_rate": 9.959161143342201e-06, "loss": 31.4467, "step": 68320 }, { "epoch": 0.1380309231285124, "grad_norm": 656.1293334960938, "learning_rate": 9.959116608143647e-06, "loss": 31.2924, "step": 68330 }, { "epoch": 0.1380511237611962, "grad_norm": 314.748046875, "learning_rate": 9.959072048774958e-06, "loss": 22.5767, "step": 68340 }, { "epoch": 0.13807132439388, "grad_norm": 367.8751525878906, "learning_rate": 9.959027465236354e-06, "loss": 16.0496, "step": 68350 }, { "epoch": 0.13809152502656383, "grad_norm": 26.35764503479004, "learning_rate": 9.958982857528053e-06, "loss": 30.7503, "step": 68360 }, { "epoch": 0.13811172565924765, "grad_norm": 167.81228637695312, "learning_rate": 9.958938225650268e-06, "loss": 35.3317, "step": 68370 }, { "epoch": 0.13813192629193147, "grad_norm": 604.2232055664062, "learning_rate": 9.958893569603222e-06, "loss": 39.864, "step": 68380 }, { "epoch": 0.1381521269246153, "grad_norm": 809.530517578125, "learning_rate": 9.958848889387129e-06, "loss": 20.9284, "step": 68390 }, { "epoch": 0.1381723275572991, "grad_norm": 674.8519287109375, "learning_rate": 9.958804185002209e-06, "loss": 21.7976, "step": 68400 }, { "epoch": 0.1381925281899829, "grad_norm": 393.74920654296875, "learning_rate": 9.958759456448677e-06, "loss": 34.8823, "step": 68410 }, { "epoch": 0.13821272882266672, "grad_norm": 949.1607055664062, "learning_rate": 9.958714703726755e-06, "loss": 37.0131, "step": 68420 }, { "epoch": 0.13823292945535054, "grad_norm": 193.75955200195312, "learning_rate": 9.958669926836658e-06, "loss": 15.5887, "step": 68430 }, { "epoch": 0.13825313008803436, "grad_norm": 171.90570068359375, "learning_rate": 9.958625125778606e-06, "loss": 25.8418, "step": 68440 }, { "epoch": 0.13827333072071818, "grad_norm": 347.1898193359375, "learning_rate": 9.958580300552816e-06, "loss": 22.348, "step": 68450 }, { "epoch": 0.138293531353402, "grad_norm": 556.2518920898438, "learning_rate": 9.958535451159506e-06, "loss": 23.4541, "step": 68460 }, { "epoch": 0.1383137319860858, "grad_norm": 35.548519134521484, "learning_rate": 9.958490577598896e-06, "loss": 21.8669, "step": 68470 }, { "epoch": 0.1383339326187696, "grad_norm": 303.63336181640625, "learning_rate": 9.958445679871204e-06, "loss": 20.6764, "step": 68480 }, { "epoch": 0.13835413325145343, "grad_norm": 724.085205078125, "learning_rate": 9.958400757976651e-06, "loss": 23.8251, "step": 68490 }, { "epoch": 0.13837433388413725, "grad_norm": 848.7816772460938, "learning_rate": 9.958355811915452e-06, "loss": 27.7101, "step": 68500 }, { "epoch": 0.13839453451682107, "grad_norm": 732.7776489257812, "learning_rate": 9.95831084168783e-06, "loss": 53.3678, "step": 68510 }, { "epoch": 0.1384147351495049, "grad_norm": 470.7287292480469, "learning_rate": 9.958265847294001e-06, "loss": 25.9891, "step": 68520 }, { "epoch": 0.1384349357821887, "grad_norm": 185.62628173828125, "learning_rate": 9.958220828734187e-06, "loss": 34.1429, "step": 68530 }, { "epoch": 0.1384551364148725, "grad_norm": 449.30059814453125, "learning_rate": 9.958175786008605e-06, "loss": 12.2641, "step": 68540 }, { "epoch": 0.13847533704755632, "grad_norm": 734.6017456054688, "learning_rate": 9.958130719117476e-06, "loss": 29.0336, "step": 68550 }, { "epoch": 0.13849553768024014, "grad_norm": 243.384765625, "learning_rate": 9.958085628061018e-06, "loss": 21.049, "step": 68560 }, { "epoch": 0.13851573831292396, "grad_norm": 220.47532653808594, "learning_rate": 9.958040512839453e-06, "loss": 22.1039, "step": 68570 }, { "epoch": 0.13853593894560778, "grad_norm": 515.5274047851562, "learning_rate": 9.957995373453e-06, "loss": 21.835, "step": 68580 }, { "epoch": 0.1385561395782916, "grad_norm": 538.095947265625, "learning_rate": 9.95795020990188e-06, "loss": 33.4923, "step": 68590 }, { "epoch": 0.1385763402109754, "grad_norm": 402.7262268066406, "learning_rate": 9.957905022186309e-06, "loss": 18.5955, "step": 68600 }, { "epoch": 0.13859654084365922, "grad_norm": 315.2361755371094, "learning_rate": 9.957859810306511e-06, "loss": 31.8788, "step": 68610 }, { "epoch": 0.13861674147634304, "grad_norm": 1102.16552734375, "learning_rate": 9.957814574262707e-06, "loss": 35.5344, "step": 68620 }, { "epoch": 0.13863694210902686, "grad_norm": 865.6183471679688, "learning_rate": 9.957769314055117e-06, "loss": 30.1209, "step": 68630 }, { "epoch": 0.13865714274171068, "grad_norm": 912.3616333007812, "learning_rate": 9.957724029683958e-06, "loss": 13.6061, "step": 68640 }, { "epoch": 0.1386773433743945, "grad_norm": 1288.3985595703125, "learning_rate": 9.957678721149454e-06, "loss": 32.3121, "step": 68650 }, { "epoch": 0.1386975440070783, "grad_norm": 875.0906372070312, "learning_rate": 9.957633388451827e-06, "loss": 33.2735, "step": 68660 }, { "epoch": 0.1387177446397621, "grad_norm": 260.54937744140625, "learning_rate": 9.957588031591295e-06, "loss": 23.7336, "step": 68670 }, { "epoch": 0.13873794527244593, "grad_norm": 270.64227294921875, "learning_rate": 9.957542650568079e-06, "loss": 28.7163, "step": 68680 }, { "epoch": 0.13875814590512975, "grad_norm": 886.39306640625, "learning_rate": 9.957497245382403e-06, "loss": 20.3497, "step": 68690 }, { "epoch": 0.13877834653781357, "grad_norm": 486.9722900390625, "learning_rate": 9.957451816034487e-06, "loss": 21.0285, "step": 68700 }, { "epoch": 0.1387985471704974, "grad_norm": 477.1606750488281, "learning_rate": 9.95740636252455e-06, "loss": 36.446, "step": 68710 }, { "epoch": 0.1388187478031812, "grad_norm": 369.5108337402344, "learning_rate": 9.957360884852819e-06, "loss": 18.5451, "step": 68720 }, { "epoch": 0.138838948435865, "grad_norm": 984.824951171875, "learning_rate": 9.95731538301951e-06, "loss": 33.7099, "step": 68730 }, { "epoch": 0.13885914906854882, "grad_norm": 458.1757507324219, "learning_rate": 9.957269857024847e-06, "loss": 24.3889, "step": 68740 }, { "epoch": 0.13887934970123264, "grad_norm": 206.4055633544922, "learning_rate": 9.957224306869053e-06, "loss": 23.9538, "step": 68750 }, { "epoch": 0.13889955033391646, "grad_norm": 405.8059387207031, "learning_rate": 9.957178732552348e-06, "loss": 22.2457, "step": 68760 }, { "epoch": 0.13891975096660028, "grad_norm": 348.5886535644531, "learning_rate": 9.957133134074955e-06, "loss": 16.4888, "step": 68770 }, { "epoch": 0.1389399515992841, "grad_norm": 384.4114685058594, "learning_rate": 9.957087511437099e-06, "loss": 45.6042, "step": 68780 }, { "epoch": 0.1389601522319679, "grad_norm": 197.05648803710938, "learning_rate": 9.957041864638997e-06, "loss": 14.7395, "step": 68790 }, { "epoch": 0.1389803528646517, "grad_norm": 393.7522277832031, "learning_rate": 9.956996193680874e-06, "loss": 24.0284, "step": 68800 }, { "epoch": 0.13900055349733553, "grad_norm": 796.6821899414062, "learning_rate": 9.956950498562954e-06, "loss": 25.859, "step": 68810 }, { "epoch": 0.13902075413001935, "grad_norm": 257.58404541015625, "learning_rate": 9.956904779285457e-06, "loss": 20.2082, "step": 68820 }, { "epoch": 0.13904095476270317, "grad_norm": 1210.4019775390625, "learning_rate": 9.956859035848608e-06, "loss": 34.5147, "step": 68830 }, { "epoch": 0.139061155395387, "grad_norm": 212.5049591064453, "learning_rate": 9.95681326825263e-06, "loss": 24.8414, "step": 68840 }, { "epoch": 0.1390813560280708, "grad_norm": 134.4169158935547, "learning_rate": 9.956767476497745e-06, "loss": 30.2395, "step": 68850 }, { "epoch": 0.1391015566607546, "grad_norm": 624.7702026367188, "learning_rate": 9.956721660584175e-06, "loss": 40.1395, "step": 68860 }, { "epoch": 0.13912175729343843, "grad_norm": 300.9224548339844, "learning_rate": 9.956675820512146e-06, "loss": 27.4624, "step": 68870 }, { "epoch": 0.13914195792612225, "grad_norm": 674.11083984375, "learning_rate": 9.956629956281881e-06, "loss": 44.8466, "step": 68880 }, { "epoch": 0.13916215855880607, "grad_norm": 552.4646606445312, "learning_rate": 9.956584067893602e-06, "loss": 17.5639, "step": 68890 }, { "epoch": 0.13918235919148988, "grad_norm": 289.3154296875, "learning_rate": 9.956538155347534e-06, "loss": 19.0497, "step": 68900 }, { "epoch": 0.1392025598241737, "grad_norm": 791.2666625976562, "learning_rate": 9.9564922186439e-06, "loss": 23.6453, "step": 68910 }, { "epoch": 0.1392227604568575, "grad_norm": 387.15289306640625, "learning_rate": 9.956446257782923e-06, "loss": 18.8502, "step": 68920 }, { "epoch": 0.13924296108954132, "grad_norm": 713.1915893554688, "learning_rate": 9.95640027276483e-06, "loss": 43.7808, "step": 68930 }, { "epoch": 0.13926316172222514, "grad_norm": 234.76963806152344, "learning_rate": 9.95635426358984e-06, "loss": 27.3004, "step": 68940 }, { "epoch": 0.13928336235490896, "grad_norm": 646.1390380859375, "learning_rate": 9.956308230258182e-06, "loss": 20.7643, "step": 68950 }, { "epoch": 0.13930356298759278, "grad_norm": 410.85760498046875, "learning_rate": 9.956262172770082e-06, "loss": 21.7578, "step": 68960 }, { "epoch": 0.1393237636202766, "grad_norm": 160.68807983398438, "learning_rate": 9.956216091125756e-06, "loss": 25.8317, "step": 68970 }, { "epoch": 0.1393439642529604, "grad_norm": 418.814208984375, "learning_rate": 9.956169985325438e-06, "loss": 22.5675, "step": 68980 }, { "epoch": 0.1393641648856442, "grad_norm": 477.0711975097656, "learning_rate": 9.956123855369346e-06, "loss": 36.7583, "step": 68990 }, { "epoch": 0.13938436551832803, "grad_norm": 489.3850402832031, "learning_rate": 9.95607770125771e-06, "loss": 12.5654, "step": 69000 }, { "epoch": 0.13940456615101185, "grad_norm": 42.41978073120117, "learning_rate": 9.95603152299075e-06, "loss": 35.2543, "step": 69010 }, { "epoch": 0.13942476678369567, "grad_norm": 299.6903381347656, "learning_rate": 9.955985320568696e-06, "loss": 17.0381, "step": 69020 }, { "epoch": 0.1394449674163795, "grad_norm": 323.9222717285156, "learning_rate": 9.955939093991767e-06, "loss": 20.292, "step": 69030 }, { "epoch": 0.1394651680490633, "grad_norm": 148.71099853515625, "learning_rate": 9.955892843260195e-06, "loss": 41.621, "step": 69040 }, { "epoch": 0.1394853686817471, "grad_norm": 461.989013671875, "learning_rate": 9.955846568374201e-06, "loss": 23.9631, "step": 69050 }, { "epoch": 0.13950556931443092, "grad_norm": 942.498291015625, "learning_rate": 9.955800269334013e-06, "loss": 31.5033, "step": 69060 }, { "epoch": 0.13952576994711474, "grad_norm": 210.36659240722656, "learning_rate": 9.955753946139855e-06, "loss": 28.8701, "step": 69070 }, { "epoch": 0.13954597057979856, "grad_norm": 200.81434631347656, "learning_rate": 9.955707598791952e-06, "loss": 15.0929, "step": 69080 }, { "epoch": 0.13956617121248238, "grad_norm": 317.3502197265625, "learning_rate": 9.955661227290531e-06, "loss": 43.9322, "step": 69090 }, { "epoch": 0.1395863718451662, "grad_norm": 581.02978515625, "learning_rate": 9.95561483163582e-06, "loss": 21.4542, "step": 69100 }, { "epoch": 0.13960657247785, "grad_norm": 909.7908325195312, "learning_rate": 9.955568411828043e-06, "loss": 26.0563, "step": 69110 }, { "epoch": 0.13962677311053381, "grad_norm": 326.35675048828125, "learning_rate": 9.955521967867427e-06, "loss": 34.8543, "step": 69120 }, { "epoch": 0.13964697374321763, "grad_norm": 180.7159881591797, "learning_rate": 9.955475499754197e-06, "loss": 10.9506, "step": 69130 }, { "epoch": 0.13966717437590145, "grad_norm": 64.47581481933594, "learning_rate": 9.955429007488582e-06, "loss": 26.2207, "step": 69140 }, { "epoch": 0.13968737500858527, "grad_norm": 541.9278564453125, "learning_rate": 9.955382491070806e-06, "loss": 27.692, "step": 69150 }, { "epoch": 0.1397075756412691, "grad_norm": 188.29824829101562, "learning_rate": 9.955335950501097e-06, "loss": 10.1276, "step": 69160 }, { "epoch": 0.13972777627395291, "grad_norm": 1394.490478515625, "learning_rate": 9.955289385779681e-06, "loss": 36.9296, "step": 69170 }, { "epoch": 0.1397479769066367, "grad_norm": 520.42138671875, "learning_rate": 9.955242796906785e-06, "loss": 16.4732, "step": 69180 }, { "epoch": 0.13976817753932053, "grad_norm": 236.06822204589844, "learning_rate": 9.955196183882637e-06, "loss": 33.4686, "step": 69190 }, { "epoch": 0.13978837817200435, "grad_norm": 565.5700073242188, "learning_rate": 9.955149546707465e-06, "loss": 23.4184, "step": 69200 }, { "epoch": 0.13980857880468817, "grad_norm": 150.66734313964844, "learning_rate": 9.955102885381494e-06, "loss": 22.63, "step": 69210 }, { "epoch": 0.13982877943737199, "grad_norm": 470.6201477050781, "learning_rate": 9.955056199904953e-06, "loss": 16.4032, "step": 69220 }, { "epoch": 0.1398489800700558, "grad_norm": 483.5701904296875, "learning_rate": 9.955009490278069e-06, "loss": 23.6988, "step": 69230 }, { "epoch": 0.1398691807027396, "grad_norm": 386.0276184082031, "learning_rate": 9.95496275650107e-06, "loss": 34.1396, "step": 69240 }, { "epoch": 0.13988938133542342, "grad_norm": 470.1758728027344, "learning_rate": 9.954915998574182e-06, "loss": 7.9019, "step": 69250 }, { "epoch": 0.13990958196810724, "grad_norm": 269.1112060546875, "learning_rate": 9.954869216497636e-06, "loss": 18.3955, "step": 69260 }, { "epoch": 0.13992978260079106, "grad_norm": 4.423580169677734, "learning_rate": 9.954822410271657e-06, "loss": 27.5181, "step": 69270 }, { "epoch": 0.13994998323347488, "grad_norm": 875.4022216796875, "learning_rate": 9.954775579896476e-06, "loss": 37.5965, "step": 69280 }, { "epoch": 0.1399701838661587, "grad_norm": 188.47088623046875, "learning_rate": 9.954728725372319e-06, "loss": 15.6562, "step": 69290 }, { "epoch": 0.1399903844988425, "grad_norm": 453.6847839355469, "learning_rate": 9.954681846699414e-06, "loss": 14.6812, "step": 69300 }, { "epoch": 0.1400105851315263, "grad_norm": 286.9229431152344, "learning_rate": 9.954634943877993e-06, "loss": 26.4569, "step": 69310 }, { "epoch": 0.14003078576421013, "grad_norm": 224.12432861328125, "learning_rate": 9.95458801690828e-06, "loss": 37.0338, "step": 69320 }, { "epoch": 0.14005098639689395, "grad_norm": 250.95457458496094, "learning_rate": 9.954541065790509e-06, "loss": 19.3025, "step": 69330 }, { "epoch": 0.14007118702957777, "grad_norm": 204.41136169433594, "learning_rate": 9.954494090524903e-06, "loss": 29.5032, "step": 69340 }, { "epoch": 0.1400913876622616, "grad_norm": 1198.9293212890625, "learning_rate": 9.954447091111695e-06, "loss": 29.9783, "step": 69350 }, { "epoch": 0.1401115882949454, "grad_norm": 605.0090942382812, "learning_rate": 9.95440006755111e-06, "loss": 28.5639, "step": 69360 }, { "epoch": 0.1401317889276292, "grad_norm": 203.74388122558594, "learning_rate": 9.954353019843384e-06, "loss": 29.1512, "step": 69370 }, { "epoch": 0.14015198956031302, "grad_norm": 1235.79541015625, "learning_rate": 9.95430594798874e-06, "loss": 43.7828, "step": 69380 }, { "epoch": 0.14017219019299684, "grad_norm": 1251.275390625, "learning_rate": 9.954258851987411e-06, "loss": 28.2353, "step": 69390 }, { "epoch": 0.14019239082568066, "grad_norm": 473.9927062988281, "learning_rate": 9.954211731839623e-06, "loss": 31.9145, "step": 69400 }, { "epoch": 0.14021259145836448, "grad_norm": 120.4296875, "learning_rate": 9.95416458754561e-06, "loss": 24.6197, "step": 69410 }, { "epoch": 0.1402327920910483, "grad_norm": 1342.9423828125, "learning_rate": 9.954117419105599e-06, "loss": 31.6074, "step": 69420 }, { "epoch": 0.1402529927237321, "grad_norm": 377.57598876953125, "learning_rate": 9.95407022651982e-06, "loss": 42.855, "step": 69430 }, { "epoch": 0.14027319335641592, "grad_norm": 127.66248321533203, "learning_rate": 9.954023009788505e-06, "loss": 16.1247, "step": 69440 }, { "epoch": 0.14029339398909974, "grad_norm": 824.560791015625, "learning_rate": 9.953975768911881e-06, "loss": 28.6148, "step": 69450 }, { "epoch": 0.14031359462178356, "grad_norm": 1145.84765625, "learning_rate": 9.953928503890181e-06, "loss": 23.5062, "step": 69460 }, { "epoch": 0.14033379525446738, "grad_norm": 68.81543731689453, "learning_rate": 9.953881214723636e-06, "loss": 29.8865, "step": 69470 }, { "epoch": 0.1403539958871512, "grad_norm": 387.13104248046875, "learning_rate": 9.95383390141247e-06, "loss": 23.7702, "step": 69480 }, { "epoch": 0.14037419651983502, "grad_norm": 235.42279052734375, "learning_rate": 9.953786563956923e-06, "loss": 27.2501, "step": 69490 }, { "epoch": 0.1403943971525188, "grad_norm": 868.7579956054688, "learning_rate": 9.953739202357219e-06, "loss": 38.222, "step": 69500 }, { "epoch": 0.14041459778520263, "grad_norm": 819.7193603515625, "learning_rate": 9.953691816613592e-06, "loss": 28.7001, "step": 69510 }, { "epoch": 0.14043479841788645, "grad_norm": 698.81201171875, "learning_rate": 9.95364440672627e-06, "loss": 26.3605, "step": 69520 }, { "epoch": 0.14045499905057027, "grad_norm": 539.9085083007812, "learning_rate": 9.953596972695487e-06, "loss": 20.6521, "step": 69530 }, { "epoch": 0.1404751996832541, "grad_norm": 578.3551635742188, "learning_rate": 9.953549514521474e-06, "loss": 30.846, "step": 69540 }, { "epoch": 0.1404954003159379, "grad_norm": 356.5929260253906, "learning_rate": 9.953502032204461e-06, "loss": 26.0084, "step": 69550 }, { "epoch": 0.1405156009486217, "grad_norm": 0.0, "learning_rate": 9.95345452574468e-06, "loss": 30.3256, "step": 69560 }, { "epoch": 0.14053580158130552, "grad_norm": 183.98497009277344, "learning_rate": 9.95340699514236e-06, "loss": 26.8322, "step": 69570 }, { "epoch": 0.14055600221398934, "grad_norm": 624.8582763671875, "learning_rate": 9.953359440397738e-06, "loss": 25.6515, "step": 69580 }, { "epoch": 0.14057620284667316, "grad_norm": 1048.4591064453125, "learning_rate": 9.953311861511043e-06, "loss": 17.7401, "step": 69590 }, { "epoch": 0.14059640347935698, "grad_norm": 491.6831970214844, "learning_rate": 9.953264258482505e-06, "loss": 23.7325, "step": 69600 }, { "epoch": 0.1406166041120408, "grad_norm": 396.37847900390625, "learning_rate": 9.953216631312358e-06, "loss": 19.6475, "step": 69610 }, { "epoch": 0.1406368047447246, "grad_norm": 236.17860412597656, "learning_rate": 9.953168980000836e-06, "loss": 25.2312, "step": 69620 }, { "epoch": 0.1406570053774084, "grad_norm": 310.13165283203125, "learning_rate": 9.953121304548167e-06, "loss": 19.4689, "step": 69630 }, { "epoch": 0.14067720601009223, "grad_norm": 410.7505187988281, "learning_rate": 9.953073604954586e-06, "loss": 41.4177, "step": 69640 }, { "epoch": 0.14069740664277605, "grad_norm": 237.97850036621094, "learning_rate": 9.953025881220325e-06, "loss": 23.2995, "step": 69650 }, { "epoch": 0.14071760727545987, "grad_norm": 510.98211669921875, "learning_rate": 9.952978133345616e-06, "loss": 26.9472, "step": 69660 }, { "epoch": 0.1407378079081437, "grad_norm": 132.69935607910156, "learning_rate": 9.952930361330694e-06, "loss": 19.6408, "step": 69670 }, { "epoch": 0.1407580085408275, "grad_norm": 272.2227783203125, "learning_rate": 9.952882565175788e-06, "loss": 38.1983, "step": 69680 }, { "epoch": 0.1407782091735113, "grad_norm": 784.8576049804688, "learning_rate": 9.952834744881135e-06, "loss": 18.8075, "step": 69690 }, { "epoch": 0.14079840980619512, "grad_norm": 1082.32763671875, "learning_rate": 9.952786900446964e-06, "loss": 32.6177, "step": 69700 }, { "epoch": 0.14081861043887894, "grad_norm": 709.037109375, "learning_rate": 9.952739031873513e-06, "loss": 19.6137, "step": 69710 }, { "epoch": 0.14083881107156276, "grad_norm": 991.0155029296875, "learning_rate": 9.952691139161012e-06, "loss": 27.8896, "step": 69720 }, { "epoch": 0.14085901170424658, "grad_norm": 575.1040649414062, "learning_rate": 9.952643222309694e-06, "loss": 14.214, "step": 69730 }, { "epoch": 0.1408792123369304, "grad_norm": 414.91455078125, "learning_rate": 9.952595281319794e-06, "loss": 25.0494, "step": 69740 }, { "epoch": 0.1408994129696142, "grad_norm": 277.9414367675781, "learning_rate": 9.952547316191545e-06, "loss": 11.4466, "step": 69750 }, { "epoch": 0.14091961360229802, "grad_norm": 530.8134765625, "learning_rate": 9.95249932692518e-06, "loss": 30.588, "step": 69760 }, { "epoch": 0.14093981423498184, "grad_norm": 419.50445556640625, "learning_rate": 9.952451313520937e-06, "loss": 33.3972, "step": 69770 }, { "epoch": 0.14096001486766566, "grad_norm": 332.69232177734375, "learning_rate": 9.952403275979046e-06, "loss": 36.2613, "step": 69780 }, { "epoch": 0.14098021550034948, "grad_norm": 544.991455078125, "learning_rate": 9.95235521429974e-06, "loss": 30.0454, "step": 69790 }, { "epoch": 0.1410004161330333, "grad_norm": 213.3559112548828, "learning_rate": 9.952307128483257e-06, "loss": 12.5632, "step": 69800 }, { "epoch": 0.14102061676571712, "grad_norm": 528.3657836914062, "learning_rate": 9.952259018529829e-06, "loss": 30.6922, "step": 69810 }, { "epoch": 0.1410408173984009, "grad_norm": 367.0792236328125, "learning_rate": 9.952210884439693e-06, "loss": 23.4962, "step": 69820 }, { "epoch": 0.14106101803108473, "grad_norm": 598.0098876953125, "learning_rate": 9.95216272621308e-06, "loss": 16.1818, "step": 69830 }, { "epoch": 0.14108121866376855, "grad_norm": 490.101318359375, "learning_rate": 9.952114543850227e-06, "loss": 23.8804, "step": 69840 }, { "epoch": 0.14110141929645237, "grad_norm": 424.291015625, "learning_rate": 9.952066337351367e-06, "loss": 14.4883, "step": 69850 }, { "epoch": 0.1411216199291362, "grad_norm": 195.29574584960938, "learning_rate": 9.952018106716737e-06, "loss": 21.0854, "step": 69860 }, { "epoch": 0.14114182056182, "grad_norm": 383.5818786621094, "learning_rate": 9.951969851946573e-06, "loss": 32.4189, "step": 69870 }, { "epoch": 0.1411620211945038, "grad_norm": 73.61356353759766, "learning_rate": 9.951921573041107e-06, "loss": 35.9182, "step": 69880 }, { "epoch": 0.14118222182718762, "grad_norm": 420.78076171875, "learning_rate": 9.951873270000576e-06, "loss": 25.4562, "step": 69890 }, { "epoch": 0.14120242245987144, "grad_norm": 511.9713134765625, "learning_rate": 9.951824942825215e-06, "loss": 37.8071, "step": 69900 }, { "epoch": 0.14122262309255526, "grad_norm": 323.9211730957031, "learning_rate": 9.951776591515262e-06, "loss": 32.4247, "step": 69910 }, { "epoch": 0.14124282372523908, "grad_norm": 487.0621032714844, "learning_rate": 9.951728216070949e-06, "loss": 13.8481, "step": 69920 }, { "epoch": 0.1412630243579229, "grad_norm": 356.62457275390625, "learning_rate": 9.951679816492513e-06, "loss": 20.0213, "step": 69930 }, { "epoch": 0.1412832249906067, "grad_norm": 377.02972412109375, "learning_rate": 9.951631392780189e-06, "loss": 18.7376, "step": 69940 }, { "epoch": 0.1413034256232905, "grad_norm": 738.23046875, "learning_rate": 9.951582944934215e-06, "loss": 37.3979, "step": 69950 }, { "epoch": 0.14132362625597433, "grad_norm": 892.313232421875, "learning_rate": 9.951534472954826e-06, "loss": 31.2392, "step": 69960 }, { "epoch": 0.14134382688865815, "grad_norm": 460.5330505371094, "learning_rate": 9.95148597684226e-06, "loss": 35.134, "step": 69970 }, { "epoch": 0.14136402752134197, "grad_norm": 384.5951232910156, "learning_rate": 9.951437456596751e-06, "loss": 14.3294, "step": 69980 }, { "epoch": 0.1413842281540258, "grad_norm": 486.7411804199219, "learning_rate": 9.951388912218536e-06, "loss": 34.7231, "step": 69990 }, { "epoch": 0.1414044287867096, "grad_norm": 677.1619873046875, "learning_rate": 9.951340343707852e-06, "loss": 17.5728, "step": 70000 }, { "epoch": 0.1414246294193934, "grad_norm": 442.0053405761719, "learning_rate": 9.951291751064937e-06, "loss": 22.1191, "step": 70010 }, { "epoch": 0.14144483005207723, "grad_norm": 257.58514404296875, "learning_rate": 9.951243134290025e-06, "loss": 19.5074, "step": 70020 }, { "epoch": 0.14146503068476105, "grad_norm": 800.439697265625, "learning_rate": 9.951194493383355e-06, "loss": 32.0352, "step": 70030 }, { "epoch": 0.14148523131744487, "grad_norm": 539.307861328125, "learning_rate": 9.951145828345163e-06, "loss": 27.1896, "step": 70040 }, { "epoch": 0.14150543195012869, "grad_norm": 671.9888916015625, "learning_rate": 9.951097139175688e-06, "loss": 30.1305, "step": 70050 }, { "epoch": 0.1415256325828125, "grad_norm": 693.0454711914062, "learning_rate": 9.951048425875165e-06, "loss": 30.7239, "step": 70060 }, { "epoch": 0.1415458332154963, "grad_norm": 144.39990234375, "learning_rate": 9.950999688443833e-06, "loss": 10.8088, "step": 70070 }, { "epoch": 0.14156603384818012, "grad_norm": 645.5604858398438, "learning_rate": 9.950950926881928e-06, "loss": 25.8998, "step": 70080 }, { "epoch": 0.14158623448086394, "grad_norm": 876.4896240234375, "learning_rate": 9.950902141189691e-06, "loss": 28.263, "step": 70090 }, { "epoch": 0.14160643511354776, "grad_norm": 988.877197265625, "learning_rate": 9.950853331367356e-06, "loss": 28.0873, "step": 70100 }, { "epoch": 0.14162663574623158, "grad_norm": 166.33299255371094, "learning_rate": 9.95080449741516e-06, "loss": 25.429, "step": 70110 }, { "epoch": 0.1416468363789154, "grad_norm": 656.4375, "learning_rate": 9.950755639333347e-06, "loss": 29.118, "step": 70120 }, { "epoch": 0.1416670370115992, "grad_norm": 502.52630615234375, "learning_rate": 9.95070675712215e-06, "loss": 24.3436, "step": 70130 }, { "epoch": 0.141687237644283, "grad_norm": 465.3695983886719, "learning_rate": 9.950657850781809e-06, "loss": 16.0229, "step": 70140 }, { "epoch": 0.14170743827696683, "grad_norm": 240.92152404785156, "learning_rate": 9.95060892031256e-06, "loss": 34.4966, "step": 70150 }, { "epoch": 0.14172763890965065, "grad_norm": 497.15380859375, "learning_rate": 9.950559965714647e-06, "loss": 51.5669, "step": 70160 }, { "epoch": 0.14174783954233447, "grad_norm": 919.5972900390625, "learning_rate": 9.950510986988304e-06, "loss": 26.8649, "step": 70170 }, { "epoch": 0.1417680401750183, "grad_norm": 525.713134765625, "learning_rate": 9.95046198413377e-06, "loss": 20.6903, "step": 70180 }, { "epoch": 0.1417882408077021, "grad_norm": 516.0868530273438, "learning_rate": 9.950412957151286e-06, "loss": 32.3086, "step": 70190 }, { "epoch": 0.1418084414403859, "grad_norm": 0.0, "learning_rate": 9.950363906041089e-06, "loss": 22.1381, "step": 70200 }, { "epoch": 0.14182864207306972, "grad_norm": 357.0196228027344, "learning_rate": 9.950314830803418e-06, "loss": 23.64, "step": 70210 }, { "epoch": 0.14184884270575354, "grad_norm": 1040.35546875, "learning_rate": 9.950265731438513e-06, "loss": 32.8609, "step": 70220 }, { "epoch": 0.14186904333843736, "grad_norm": 1001.8739013671875, "learning_rate": 9.950216607946614e-06, "loss": 35.8748, "step": 70230 }, { "epoch": 0.14188924397112118, "grad_norm": 714.5865478515625, "learning_rate": 9.95016746032796e-06, "loss": 40.7987, "step": 70240 }, { "epoch": 0.141909444603805, "grad_norm": 114.6450424194336, "learning_rate": 9.95011828858279e-06, "loss": 28.8126, "step": 70250 }, { "epoch": 0.1419296452364888, "grad_norm": 127.16027069091797, "learning_rate": 9.950069092711342e-06, "loss": 17.4822, "step": 70260 }, { "epoch": 0.14194984586917261, "grad_norm": 243.0860137939453, "learning_rate": 9.950019872713858e-06, "loss": 35.47, "step": 70270 }, { "epoch": 0.14197004650185643, "grad_norm": 204.92752075195312, "learning_rate": 9.94997062859058e-06, "loss": 14.5843, "step": 70280 }, { "epoch": 0.14199024713454025, "grad_norm": 654.3239135742188, "learning_rate": 9.949921360341743e-06, "loss": 21.8702, "step": 70290 }, { "epoch": 0.14201044776722407, "grad_norm": 19.094406127929688, "learning_rate": 9.94987206796759e-06, "loss": 26.9252, "step": 70300 }, { "epoch": 0.1420306483999079, "grad_norm": 92.65380096435547, "learning_rate": 9.949822751468364e-06, "loss": 23.4914, "step": 70310 }, { "epoch": 0.14205084903259171, "grad_norm": 118.98175811767578, "learning_rate": 9.949773410844299e-06, "loss": 31.5911, "step": 70320 }, { "epoch": 0.1420710496652755, "grad_norm": 509.1815185546875, "learning_rate": 9.94972404609564e-06, "loss": 30.0535, "step": 70330 }, { "epoch": 0.14209125029795933, "grad_norm": 457.3714904785156, "learning_rate": 9.949674657222624e-06, "loss": 32.8641, "step": 70340 }, { "epoch": 0.14211145093064315, "grad_norm": 552.8912353515625, "learning_rate": 9.949625244225496e-06, "loss": 21.6767, "step": 70350 }, { "epoch": 0.14213165156332697, "grad_norm": 760.5970458984375, "learning_rate": 9.949575807104494e-06, "loss": 22.7619, "step": 70360 }, { "epoch": 0.1421518521960108, "grad_norm": 1111.3450927734375, "learning_rate": 9.94952634585986e-06, "loss": 56.025, "step": 70370 }, { "epoch": 0.1421720528286946, "grad_norm": 27.02530860900879, "learning_rate": 9.949476860491836e-06, "loss": 48.3702, "step": 70380 }, { "epoch": 0.1421922534613784, "grad_norm": 394.2063903808594, "learning_rate": 9.949427351000662e-06, "loss": 27.2819, "step": 70390 }, { "epoch": 0.14221245409406222, "grad_norm": 257.6773681640625, "learning_rate": 9.94937781738658e-06, "loss": 19.1424, "step": 70400 }, { "epoch": 0.14223265472674604, "grad_norm": 160.51309204101562, "learning_rate": 9.949328259649828e-06, "loss": 12.8866, "step": 70410 }, { "epoch": 0.14225285535942986, "grad_norm": 356.33544921875, "learning_rate": 9.949278677790653e-06, "loss": 20.0134, "step": 70420 }, { "epoch": 0.14227305599211368, "grad_norm": 252.20248413085938, "learning_rate": 9.949229071809294e-06, "loss": 19.4008, "step": 70430 }, { "epoch": 0.1422932566247975, "grad_norm": 359.1466064453125, "learning_rate": 9.949179441705992e-06, "loss": 37.4052, "step": 70440 }, { "epoch": 0.1423134572574813, "grad_norm": 66.66751861572266, "learning_rate": 9.949129787480988e-06, "loss": 19.6264, "step": 70450 }, { "epoch": 0.1423336578901651, "grad_norm": 571.6493530273438, "learning_rate": 9.949080109134528e-06, "loss": 29.6699, "step": 70460 }, { "epoch": 0.14235385852284893, "grad_norm": 299.482666015625, "learning_rate": 9.949030406666852e-06, "loss": 34.4904, "step": 70470 }, { "epoch": 0.14237405915553275, "grad_norm": 201.146728515625, "learning_rate": 9.948980680078199e-06, "loss": 30.5541, "step": 70480 }, { "epoch": 0.14239425978821657, "grad_norm": 637.2625732421875, "learning_rate": 9.948930929368818e-06, "loss": 19.0271, "step": 70490 }, { "epoch": 0.1424144604209004, "grad_norm": 1033.426025390625, "learning_rate": 9.948881154538946e-06, "loss": 32.5386, "step": 70500 }, { "epoch": 0.1424346610535842, "grad_norm": 1215.341796875, "learning_rate": 9.948831355588828e-06, "loss": 30.3841, "step": 70510 }, { "epoch": 0.142454861686268, "grad_norm": 822.8902587890625, "learning_rate": 9.948781532518706e-06, "loss": 24.5997, "step": 70520 }, { "epoch": 0.14247506231895182, "grad_norm": 214.73452758789062, "learning_rate": 9.948731685328823e-06, "loss": 18.9409, "step": 70530 }, { "epoch": 0.14249526295163564, "grad_norm": 299.24285888671875, "learning_rate": 9.948681814019421e-06, "loss": 33.5172, "step": 70540 }, { "epoch": 0.14251546358431946, "grad_norm": 396.3786926269531, "learning_rate": 9.948631918590746e-06, "loss": 27.0587, "step": 70550 }, { "epoch": 0.14253566421700328, "grad_norm": 928.9906616210938, "learning_rate": 9.948581999043038e-06, "loss": 32.9953, "step": 70560 }, { "epoch": 0.1425558648496871, "grad_norm": 1428.0191650390625, "learning_rate": 9.948532055376541e-06, "loss": 37.54, "step": 70570 }, { "epoch": 0.1425760654823709, "grad_norm": 709.8215942382812, "learning_rate": 9.9484820875915e-06, "loss": 23.8771, "step": 70580 }, { "epoch": 0.14259626611505472, "grad_norm": 489.7835693359375, "learning_rate": 9.948432095688157e-06, "loss": 22.3415, "step": 70590 }, { "epoch": 0.14261646674773854, "grad_norm": 374.5968322753906, "learning_rate": 9.948382079666756e-06, "loss": 16.0797, "step": 70600 }, { "epoch": 0.14263666738042236, "grad_norm": 396.25579833984375, "learning_rate": 9.948332039527541e-06, "loss": 36.8543, "step": 70610 }, { "epoch": 0.14265686801310618, "grad_norm": 680.112060546875, "learning_rate": 9.948281975270758e-06, "loss": 25.1519, "step": 70620 }, { "epoch": 0.14267706864579, "grad_norm": 302.9621276855469, "learning_rate": 9.948231886896646e-06, "loss": 13.1773, "step": 70630 }, { "epoch": 0.14269726927847382, "grad_norm": 1454.6026611328125, "learning_rate": 9.948181774405453e-06, "loss": 44.4787, "step": 70640 }, { "epoch": 0.1427174699111576, "grad_norm": 295.6230773925781, "learning_rate": 9.94813163779742e-06, "loss": 21.5774, "step": 70650 }, { "epoch": 0.14273767054384143, "grad_norm": 382.83721923828125, "learning_rate": 9.948081477072797e-06, "loss": 22.5638, "step": 70660 }, { "epoch": 0.14275787117652525, "grad_norm": 109.67560577392578, "learning_rate": 9.948031292231823e-06, "loss": 38.8435, "step": 70670 }, { "epoch": 0.14277807180920907, "grad_norm": 414.8904113769531, "learning_rate": 9.947981083274747e-06, "loss": 20.7894, "step": 70680 }, { "epoch": 0.1427982724418929, "grad_norm": 94.47399139404297, "learning_rate": 9.947930850201808e-06, "loss": 27.4957, "step": 70690 }, { "epoch": 0.1428184730745767, "grad_norm": 349.111328125, "learning_rate": 9.947880593013256e-06, "loss": 17.7162, "step": 70700 }, { "epoch": 0.1428386737072605, "grad_norm": 327.14337158203125, "learning_rate": 9.947830311709333e-06, "loss": 17.6172, "step": 70710 }, { "epoch": 0.14285887433994432, "grad_norm": 281.6408386230469, "learning_rate": 9.947780006290287e-06, "loss": 31.3466, "step": 70720 }, { "epoch": 0.14287907497262814, "grad_norm": 466.6632080078125, "learning_rate": 9.947729676756359e-06, "loss": 18.6596, "step": 70730 }, { "epoch": 0.14289927560531196, "grad_norm": 386.74041748046875, "learning_rate": 9.947679323107798e-06, "loss": 29.0378, "step": 70740 }, { "epoch": 0.14291947623799578, "grad_norm": 254.9665069580078, "learning_rate": 9.947628945344849e-06, "loss": 38.1556, "step": 70750 }, { "epoch": 0.1429396768706796, "grad_norm": 189.6139373779297, "learning_rate": 9.947578543467755e-06, "loss": 21.6816, "step": 70760 }, { "epoch": 0.1429598775033634, "grad_norm": 496.5278625488281, "learning_rate": 9.947528117476764e-06, "loss": 31.7688, "step": 70770 }, { "epoch": 0.1429800781360472, "grad_norm": 716.6107788085938, "learning_rate": 9.94747766737212e-06, "loss": 31.3266, "step": 70780 }, { "epoch": 0.14300027876873103, "grad_norm": 0.0, "learning_rate": 9.94742719315407e-06, "loss": 28.7017, "step": 70790 }, { "epoch": 0.14302047940141485, "grad_norm": 351.0498962402344, "learning_rate": 9.947376694822861e-06, "loss": 25.1587, "step": 70800 }, { "epoch": 0.14304068003409867, "grad_norm": 553.6657104492188, "learning_rate": 9.947326172378736e-06, "loss": 24.1454, "step": 70810 }, { "epoch": 0.1430608806667825, "grad_norm": 856.5927124023438, "learning_rate": 9.947275625821947e-06, "loss": 33.1798, "step": 70820 }, { "epoch": 0.1430810812994663, "grad_norm": 38.8865852355957, "learning_rate": 9.947225055152735e-06, "loss": 27.3439, "step": 70830 }, { "epoch": 0.1431012819321501, "grad_norm": 1.1428660154342651, "learning_rate": 9.947174460371347e-06, "loss": 31.6444, "step": 70840 }, { "epoch": 0.14312148256483392, "grad_norm": 246.42755126953125, "learning_rate": 9.947123841478032e-06, "loss": 37.5845, "step": 70850 }, { "epoch": 0.14314168319751774, "grad_norm": 471.3725280761719, "learning_rate": 9.947073198473034e-06, "loss": 29.1606, "step": 70860 }, { "epoch": 0.14316188383020156, "grad_norm": 591.9281616210938, "learning_rate": 9.947022531356602e-06, "loss": 23.9603, "step": 70870 }, { "epoch": 0.14318208446288538, "grad_norm": 134.68898010253906, "learning_rate": 9.946971840128982e-06, "loss": 21.785, "step": 70880 }, { "epoch": 0.1432022850955692, "grad_norm": 1408.50439453125, "learning_rate": 9.94692112479042e-06, "loss": 35.7125, "step": 70890 }, { "epoch": 0.143222485728253, "grad_norm": 923.0642700195312, "learning_rate": 9.946870385341167e-06, "loss": 22.5851, "step": 70900 }, { "epoch": 0.14324268636093682, "grad_norm": 491.4736633300781, "learning_rate": 9.946819621781467e-06, "loss": 25.2908, "step": 70910 }, { "epoch": 0.14326288699362064, "grad_norm": 0.0, "learning_rate": 9.946768834111568e-06, "loss": 21.2509, "step": 70920 }, { "epoch": 0.14328308762630446, "grad_norm": 561.29443359375, "learning_rate": 9.946718022331715e-06, "loss": 24.0213, "step": 70930 }, { "epoch": 0.14330328825898828, "grad_norm": 606.1071166992188, "learning_rate": 9.946667186442162e-06, "loss": 23.4115, "step": 70940 }, { "epoch": 0.1433234888916721, "grad_norm": 1087.3997802734375, "learning_rate": 9.946616326443153e-06, "loss": 30.4881, "step": 70950 }, { "epoch": 0.14334368952435592, "grad_norm": 187.8196563720703, "learning_rate": 9.946565442334935e-06, "loss": 21.7119, "step": 70960 }, { "epoch": 0.1433638901570397, "grad_norm": 473.32635498046875, "learning_rate": 9.946514534117755e-06, "loss": 24.639, "step": 70970 }, { "epoch": 0.14338409078972353, "grad_norm": 752.272216796875, "learning_rate": 9.946463601791865e-06, "loss": 34.8138, "step": 70980 }, { "epoch": 0.14340429142240735, "grad_norm": 426.7176208496094, "learning_rate": 9.94641264535751e-06, "loss": 26.9315, "step": 70990 }, { "epoch": 0.14342449205509117, "grad_norm": 397.26904296875, "learning_rate": 9.946361664814942e-06, "loss": 20.7427, "step": 71000 }, { "epoch": 0.143444692687775, "grad_norm": 177.0984649658203, "learning_rate": 9.946310660164407e-06, "loss": 18.6685, "step": 71010 }, { "epoch": 0.1434648933204588, "grad_norm": 419.7292175292969, "learning_rate": 9.946259631406153e-06, "loss": 18.3675, "step": 71020 }, { "epoch": 0.1434850939531426, "grad_norm": 580.2398681640625, "learning_rate": 9.946208578540428e-06, "loss": 25.4747, "step": 71030 }, { "epoch": 0.14350529458582642, "grad_norm": 718.4306030273438, "learning_rate": 9.946157501567484e-06, "loss": 23.6021, "step": 71040 }, { "epoch": 0.14352549521851024, "grad_norm": 560.6592407226562, "learning_rate": 9.946106400487568e-06, "loss": 17.248, "step": 71050 }, { "epoch": 0.14354569585119406, "grad_norm": 392.8415222167969, "learning_rate": 9.946055275300929e-06, "loss": 19.7624, "step": 71060 }, { "epoch": 0.14356589648387788, "grad_norm": 362.4658203125, "learning_rate": 9.946004126007817e-06, "loss": 14.2599, "step": 71070 }, { "epoch": 0.1435860971165617, "grad_norm": 1601.98583984375, "learning_rate": 9.94595295260848e-06, "loss": 37.1649, "step": 71080 }, { "epoch": 0.1436062977492455, "grad_norm": 0.0, "learning_rate": 9.945901755103169e-06, "loss": 31.0146, "step": 71090 }, { "epoch": 0.1436264983819293, "grad_norm": 408.6718444824219, "learning_rate": 9.945850533492132e-06, "loss": 23.8855, "step": 71100 }, { "epoch": 0.14364669901461313, "grad_norm": 375.17816162109375, "learning_rate": 9.94579928777562e-06, "loss": 59.7405, "step": 71110 }, { "epoch": 0.14366689964729695, "grad_norm": 420.8493957519531, "learning_rate": 9.94574801795388e-06, "loss": 21.973, "step": 71120 }, { "epoch": 0.14368710027998077, "grad_norm": 337.4616394042969, "learning_rate": 9.945696724027166e-06, "loss": 26.9458, "step": 71130 }, { "epoch": 0.1437073009126646, "grad_norm": 458.1142883300781, "learning_rate": 9.945645405995726e-06, "loss": 16.4347, "step": 71140 }, { "epoch": 0.1437275015453484, "grad_norm": 811.0916748046875, "learning_rate": 9.94559406385981e-06, "loss": 21.0274, "step": 71150 }, { "epoch": 0.1437477021780322, "grad_norm": 215.50169372558594, "learning_rate": 9.945542697619667e-06, "loss": 22.2922, "step": 71160 }, { "epoch": 0.14376790281071603, "grad_norm": 829.2427368164062, "learning_rate": 9.94549130727555e-06, "loss": 26.2956, "step": 71170 }, { "epoch": 0.14378810344339985, "grad_norm": 208.13414001464844, "learning_rate": 9.945439892827709e-06, "loss": 18.2897, "step": 71180 }, { "epoch": 0.14380830407608367, "grad_norm": 497.759521484375, "learning_rate": 9.945388454276392e-06, "loss": 24.3056, "step": 71190 }, { "epoch": 0.14382850470876749, "grad_norm": 440.09063720703125, "learning_rate": 9.945336991621854e-06, "loss": 45.3409, "step": 71200 }, { "epoch": 0.1438487053414513, "grad_norm": 448.5332946777344, "learning_rate": 9.945285504864342e-06, "loss": 25.1452, "step": 71210 }, { "epoch": 0.1438689059741351, "grad_norm": 254.63189697265625, "learning_rate": 9.945233994004107e-06, "loss": 12.5257, "step": 71220 }, { "epoch": 0.14388910660681892, "grad_norm": 229.0892791748047, "learning_rate": 9.945182459041403e-06, "loss": 20.8888, "step": 71230 }, { "epoch": 0.14390930723950274, "grad_norm": 329.3419189453125, "learning_rate": 9.945130899976477e-06, "loss": 26.5256, "step": 71240 }, { "epoch": 0.14392950787218656, "grad_norm": 468.9582824707031, "learning_rate": 9.945079316809585e-06, "loss": 19.1805, "step": 71250 }, { "epoch": 0.14394970850487038, "grad_norm": 264.82318115234375, "learning_rate": 9.945027709540975e-06, "loss": 26.3839, "step": 71260 }, { "epoch": 0.1439699091375542, "grad_norm": 438.0096130371094, "learning_rate": 9.9449760781709e-06, "loss": 23.1159, "step": 71270 }, { "epoch": 0.14399010977023802, "grad_norm": 433.0907897949219, "learning_rate": 9.944924422699613e-06, "loss": 23.6146, "step": 71280 }, { "epoch": 0.1440103104029218, "grad_norm": 108.1781005859375, "learning_rate": 9.944872743127363e-06, "loss": 18.6154, "step": 71290 }, { "epoch": 0.14403051103560563, "grad_norm": 152.50469970703125, "learning_rate": 9.944821039454403e-06, "loss": 25.8473, "step": 71300 }, { "epoch": 0.14405071166828945, "grad_norm": 565.368896484375, "learning_rate": 9.944769311680984e-06, "loss": 22.8616, "step": 71310 }, { "epoch": 0.14407091230097327, "grad_norm": 292.283447265625, "learning_rate": 9.94471755980736e-06, "loss": 25.7549, "step": 71320 }, { "epoch": 0.1440911129336571, "grad_norm": 400.43670654296875, "learning_rate": 9.944665783833782e-06, "loss": 37.9263, "step": 71330 }, { "epoch": 0.1441113135663409, "grad_norm": 591.67431640625, "learning_rate": 9.944613983760503e-06, "loss": 40.6577, "step": 71340 }, { "epoch": 0.1441315141990247, "grad_norm": 323.1944580078125, "learning_rate": 9.944562159587774e-06, "loss": 20.8895, "step": 71350 }, { "epoch": 0.14415171483170852, "grad_norm": 226.9590606689453, "learning_rate": 9.94451031131585e-06, "loss": 20.5786, "step": 71360 }, { "epoch": 0.14417191546439234, "grad_norm": 200.12771606445312, "learning_rate": 9.944458438944983e-06, "loss": 22.3802, "step": 71370 }, { "epoch": 0.14419211609707616, "grad_norm": 198.01441955566406, "learning_rate": 9.944406542475425e-06, "loss": 23.1966, "step": 71380 }, { "epoch": 0.14421231672975998, "grad_norm": 760.1091918945312, "learning_rate": 9.944354621907428e-06, "loss": 29.3378, "step": 71390 }, { "epoch": 0.1442325173624438, "grad_norm": 270.2275085449219, "learning_rate": 9.944302677241247e-06, "loss": 17.9261, "step": 71400 }, { "epoch": 0.1442527179951276, "grad_norm": 1244.6324462890625, "learning_rate": 9.944250708477135e-06, "loss": 39.5314, "step": 71410 }, { "epoch": 0.14427291862781141, "grad_norm": 1194.732666015625, "learning_rate": 9.944198715615343e-06, "loss": 38.33, "step": 71420 }, { "epoch": 0.14429311926049523, "grad_norm": 353.905517578125, "learning_rate": 9.944146698656127e-06, "loss": 19.8253, "step": 71430 }, { "epoch": 0.14431331989317905, "grad_norm": 589.7752075195312, "learning_rate": 9.94409465759974e-06, "loss": 30.6325, "step": 71440 }, { "epoch": 0.14433352052586287, "grad_norm": 605.8091430664062, "learning_rate": 9.944042592446434e-06, "loss": 12.5729, "step": 71450 }, { "epoch": 0.1443537211585467, "grad_norm": 256.3218078613281, "learning_rate": 9.943990503196466e-06, "loss": 24.6645, "step": 71460 }, { "epoch": 0.14437392179123051, "grad_norm": 366.6457214355469, "learning_rate": 9.943938389850087e-06, "loss": 27.4997, "step": 71470 }, { "epoch": 0.1443941224239143, "grad_norm": 243.17288208007812, "learning_rate": 9.943886252407551e-06, "loss": 28.5592, "step": 71480 }, { "epoch": 0.14441432305659813, "grad_norm": 215.53054809570312, "learning_rate": 9.943834090869116e-06, "loss": 31.3464, "step": 71490 }, { "epoch": 0.14443452368928195, "grad_norm": 322.67962646484375, "learning_rate": 9.94378190523503e-06, "loss": 32.2169, "step": 71500 }, { "epoch": 0.14445472432196577, "grad_norm": 447.6349792480469, "learning_rate": 9.943729695505552e-06, "loss": 20.0924, "step": 71510 }, { "epoch": 0.1444749249546496, "grad_norm": 477.0517883300781, "learning_rate": 9.943677461680935e-06, "loss": 47.1084, "step": 71520 }, { "epoch": 0.1444951255873334, "grad_norm": 384.2557067871094, "learning_rate": 9.943625203761434e-06, "loss": 11.8908, "step": 71530 }, { "epoch": 0.1445153262200172, "grad_norm": 230.05831909179688, "learning_rate": 9.943572921747302e-06, "loss": 34.5417, "step": 71540 }, { "epoch": 0.14453552685270102, "grad_norm": 277.23028564453125, "learning_rate": 9.943520615638796e-06, "loss": 40.2084, "step": 71550 }, { "epoch": 0.14455572748538484, "grad_norm": 230.21156311035156, "learning_rate": 9.943468285436171e-06, "loss": 29.1958, "step": 71560 }, { "epoch": 0.14457592811806866, "grad_norm": 643.342529296875, "learning_rate": 9.94341593113968e-06, "loss": 35.6655, "step": 71570 }, { "epoch": 0.14459612875075248, "grad_norm": 253.8309326171875, "learning_rate": 9.943363552749579e-06, "loss": 25.6384, "step": 71580 }, { "epoch": 0.1446163293834363, "grad_norm": 603.95263671875, "learning_rate": 9.943311150266124e-06, "loss": 27.2352, "step": 71590 }, { "epoch": 0.14463653001612012, "grad_norm": 601.6875, "learning_rate": 9.94325872368957e-06, "loss": 34.9108, "step": 71600 }, { "epoch": 0.1446567306488039, "grad_norm": 501.0145263671875, "learning_rate": 9.943206273020174e-06, "loss": 37.0522, "step": 71610 }, { "epoch": 0.14467693128148773, "grad_norm": 282.2042541503906, "learning_rate": 9.943153798258188e-06, "loss": 25.7665, "step": 71620 }, { "epoch": 0.14469713191417155, "grad_norm": 731.9998168945312, "learning_rate": 9.94310129940387e-06, "loss": 35.901, "step": 71630 }, { "epoch": 0.14471733254685537, "grad_norm": 175.04879760742188, "learning_rate": 9.943048776457479e-06, "loss": 19.0858, "step": 71640 }, { "epoch": 0.1447375331795392, "grad_norm": 203.5707244873047, "learning_rate": 9.942996229419264e-06, "loss": 24.6141, "step": 71650 }, { "epoch": 0.144757733812223, "grad_norm": 112.2118911743164, "learning_rate": 9.942943658289487e-06, "loss": 15.6003, "step": 71660 }, { "epoch": 0.1447779344449068, "grad_norm": 635.341552734375, "learning_rate": 9.942891063068401e-06, "loss": 22.3218, "step": 71670 }, { "epoch": 0.14479813507759062, "grad_norm": 1005.9336547851562, "learning_rate": 9.942838443756265e-06, "loss": 30.0047, "step": 71680 }, { "epoch": 0.14481833571027444, "grad_norm": 518.338623046875, "learning_rate": 9.942785800353332e-06, "loss": 21.7113, "step": 71690 }, { "epoch": 0.14483853634295826, "grad_norm": 1058.7803955078125, "learning_rate": 9.942733132859861e-06, "loss": 31.9251, "step": 71700 }, { "epoch": 0.14485873697564208, "grad_norm": 773.3302612304688, "learning_rate": 9.94268044127611e-06, "loss": 23.9225, "step": 71710 }, { "epoch": 0.1448789376083259, "grad_norm": 324.1346435546875, "learning_rate": 9.942627725602332e-06, "loss": 27.4762, "step": 71720 }, { "epoch": 0.1448991382410097, "grad_norm": 994.0797119140625, "learning_rate": 9.942574985838785e-06, "loss": 30.1357, "step": 71730 }, { "epoch": 0.14491933887369352, "grad_norm": 356.2742004394531, "learning_rate": 9.942522221985728e-06, "loss": 12.7751, "step": 71740 }, { "epoch": 0.14493953950637734, "grad_norm": 1333.2322998046875, "learning_rate": 9.942469434043418e-06, "loss": 56.2553, "step": 71750 }, { "epoch": 0.14495974013906116, "grad_norm": 566.2124633789062, "learning_rate": 9.942416622012113e-06, "loss": 33.4768, "step": 71760 }, { "epoch": 0.14497994077174498, "grad_norm": 307.00054931640625, "learning_rate": 9.942363785892065e-06, "loss": 27.0711, "step": 71770 }, { "epoch": 0.1450001414044288, "grad_norm": 560.7721557617188, "learning_rate": 9.942310925683538e-06, "loss": 30.7037, "step": 71780 }, { "epoch": 0.14502034203711262, "grad_norm": 684.5990600585938, "learning_rate": 9.942258041386785e-06, "loss": 22.6475, "step": 71790 }, { "epoch": 0.1450405426697964, "grad_norm": 727.7120971679688, "learning_rate": 9.942205133002067e-06, "loss": 21.8784, "step": 71800 }, { "epoch": 0.14506074330248023, "grad_norm": 561.783447265625, "learning_rate": 9.94215220052964e-06, "loss": 25.0401, "step": 71810 }, { "epoch": 0.14508094393516405, "grad_norm": 275.6103820800781, "learning_rate": 9.942099243969765e-06, "loss": 18.0501, "step": 71820 }, { "epoch": 0.14510114456784787, "grad_norm": 422.5455322265625, "learning_rate": 9.942046263322694e-06, "loss": 26.8563, "step": 71830 }, { "epoch": 0.1451213452005317, "grad_norm": 210.2841796875, "learning_rate": 9.941993258588691e-06, "loss": 18.4556, "step": 71840 }, { "epoch": 0.1451415458332155, "grad_norm": 1116.8270263671875, "learning_rate": 9.941940229768012e-06, "loss": 24.2325, "step": 71850 }, { "epoch": 0.1451617464658993, "grad_norm": 1234.160400390625, "learning_rate": 9.941887176860916e-06, "loss": 41.3825, "step": 71860 }, { "epoch": 0.14518194709858312, "grad_norm": 401.3832092285156, "learning_rate": 9.94183409986766e-06, "loss": 25.3118, "step": 71870 }, { "epoch": 0.14520214773126694, "grad_norm": 474.1400146484375, "learning_rate": 9.941780998788506e-06, "loss": 20.4632, "step": 71880 }, { "epoch": 0.14522234836395076, "grad_norm": 840.2498168945312, "learning_rate": 9.941727873623709e-06, "loss": 23.0177, "step": 71890 }, { "epoch": 0.14524254899663458, "grad_norm": 611.0426635742188, "learning_rate": 9.94167472437353e-06, "loss": 17.3159, "step": 71900 }, { "epoch": 0.1452627496293184, "grad_norm": 185.4032745361328, "learning_rate": 9.941621551038228e-06, "loss": 22.2357, "step": 71910 }, { "epoch": 0.14528295026200222, "grad_norm": 188.64749145507812, "learning_rate": 9.941568353618064e-06, "loss": 19.7696, "step": 71920 }, { "epoch": 0.145303150894686, "grad_norm": 486.74554443359375, "learning_rate": 9.941515132113291e-06, "loss": 20.4263, "step": 71930 }, { "epoch": 0.14532335152736983, "grad_norm": 1020.122314453125, "learning_rate": 9.941461886524176e-06, "loss": 24.4576, "step": 71940 }, { "epoch": 0.14534355216005365, "grad_norm": 264.3952331542969, "learning_rate": 9.941408616850974e-06, "loss": 24.2844, "step": 71950 }, { "epoch": 0.14536375279273747, "grad_norm": 311.9317932128906, "learning_rate": 9.941355323093944e-06, "loss": 25.734, "step": 71960 }, { "epoch": 0.1453839534254213, "grad_norm": 331.9508056640625, "learning_rate": 9.94130200525335e-06, "loss": 21.6463, "step": 71970 }, { "epoch": 0.1454041540581051, "grad_norm": 280.59490966796875, "learning_rate": 9.941248663329448e-06, "loss": 23.8914, "step": 71980 }, { "epoch": 0.1454243546907889, "grad_norm": 605.7194213867188, "learning_rate": 9.941195297322498e-06, "loss": 43.5715, "step": 71990 }, { "epoch": 0.14544455532347272, "grad_norm": 524.6144409179688, "learning_rate": 9.941141907232766e-06, "loss": 45.0033, "step": 72000 }, { "epoch": 0.14546475595615654, "grad_norm": 158.6700897216797, "learning_rate": 9.941088493060504e-06, "loss": 9.7627, "step": 72010 }, { "epoch": 0.14548495658884036, "grad_norm": 221.95321655273438, "learning_rate": 9.941035054805977e-06, "loss": 22.0498, "step": 72020 }, { "epoch": 0.14550515722152418, "grad_norm": 384.6241760253906, "learning_rate": 9.940981592469443e-06, "loss": 25.4921, "step": 72030 }, { "epoch": 0.145525357854208, "grad_norm": 591.4114990234375, "learning_rate": 9.940928106051166e-06, "loss": 21.4154, "step": 72040 }, { "epoch": 0.1455455584868918, "grad_norm": 499.0342102050781, "learning_rate": 9.940874595551403e-06, "loss": 23.9025, "step": 72050 }, { "epoch": 0.14556575911957562, "grad_norm": 80.63325500488281, "learning_rate": 9.940821060970418e-06, "loss": 13.7926, "step": 72060 }, { "epoch": 0.14558595975225944, "grad_norm": 888.7040405273438, "learning_rate": 9.940767502308469e-06, "loss": 45.9061, "step": 72070 }, { "epoch": 0.14560616038494326, "grad_norm": 318.607666015625, "learning_rate": 9.940713919565819e-06, "loss": 28.0343, "step": 72080 }, { "epoch": 0.14562636101762708, "grad_norm": 273.0870666503906, "learning_rate": 9.94066031274273e-06, "loss": 22.2069, "step": 72090 }, { "epoch": 0.1456465616503109, "grad_norm": 448.2008361816406, "learning_rate": 9.94060668183946e-06, "loss": 31.4073, "step": 72100 }, { "epoch": 0.14566676228299472, "grad_norm": 479.724365234375, "learning_rate": 9.940553026856273e-06, "loss": 36.0503, "step": 72110 }, { "epoch": 0.1456869629156785, "grad_norm": 589.8539428710938, "learning_rate": 9.940499347793429e-06, "loss": 33.8547, "step": 72120 }, { "epoch": 0.14570716354836233, "grad_norm": 3227.3447265625, "learning_rate": 9.940445644651191e-06, "loss": 34.8153, "step": 72130 }, { "epoch": 0.14572736418104615, "grad_norm": 115.9288101196289, "learning_rate": 9.94039191742982e-06, "loss": 19.407, "step": 72140 }, { "epoch": 0.14574756481372997, "grad_norm": 314.36505126953125, "learning_rate": 9.940338166129578e-06, "loss": 22.6938, "step": 72150 }, { "epoch": 0.1457677654464138, "grad_norm": 200.53411865234375, "learning_rate": 9.940284390750727e-06, "loss": 13.9298, "step": 72160 }, { "epoch": 0.1457879660790976, "grad_norm": 94.67725372314453, "learning_rate": 9.94023059129353e-06, "loss": 29.4556, "step": 72170 }, { "epoch": 0.1458081667117814, "grad_norm": 327.7362365722656, "learning_rate": 9.940176767758247e-06, "loss": 22.0037, "step": 72180 }, { "epoch": 0.14582836734446522, "grad_norm": 446.6937561035156, "learning_rate": 9.940122920145142e-06, "loss": 19.2912, "step": 72190 }, { "epoch": 0.14584856797714904, "grad_norm": 612.349365234375, "learning_rate": 9.940069048454478e-06, "loss": 29.6461, "step": 72200 }, { "epoch": 0.14586876860983286, "grad_norm": 673.0333862304688, "learning_rate": 9.940015152686514e-06, "loss": 17.071, "step": 72210 }, { "epoch": 0.14588896924251668, "grad_norm": 688.1871337890625, "learning_rate": 9.939961232841517e-06, "loss": 21.6557, "step": 72220 }, { "epoch": 0.1459091698752005, "grad_norm": 392.2479553222656, "learning_rate": 9.939907288919749e-06, "loss": 50.7839, "step": 72230 }, { "epoch": 0.14592937050788432, "grad_norm": 220.849609375, "learning_rate": 9.93985332092147e-06, "loss": 36.6879, "step": 72240 }, { "epoch": 0.1459495711405681, "grad_norm": 191.9847412109375, "learning_rate": 9.939799328846947e-06, "loss": 31.7834, "step": 72250 }, { "epoch": 0.14596977177325193, "grad_norm": 1537.3671875, "learning_rate": 9.93974531269644e-06, "loss": 40.3923, "step": 72260 }, { "epoch": 0.14598997240593575, "grad_norm": 484.6485900878906, "learning_rate": 9.939691272470214e-06, "loss": 33.6825, "step": 72270 }, { "epoch": 0.14601017303861957, "grad_norm": 132.58514404296875, "learning_rate": 9.939637208168532e-06, "loss": 21.2194, "step": 72280 }, { "epoch": 0.1460303736713034, "grad_norm": 546.3110961914062, "learning_rate": 9.939583119791656e-06, "loss": 22.5165, "step": 72290 }, { "epoch": 0.1460505743039872, "grad_norm": 688.3536376953125, "learning_rate": 9.939529007339852e-06, "loss": 14.9624, "step": 72300 }, { "epoch": 0.146070774936671, "grad_norm": 384.6953430175781, "learning_rate": 9.939474870813383e-06, "loss": 18.2546, "step": 72310 }, { "epoch": 0.14609097556935483, "grad_norm": 402.7142028808594, "learning_rate": 9.939420710212511e-06, "loss": 25.0478, "step": 72320 }, { "epoch": 0.14611117620203865, "grad_norm": 438.6862487792969, "learning_rate": 9.939366525537503e-06, "loss": 22.4429, "step": 72330 }, { "epoch": 0.14613137683472247, "grad_norm": 558.4066162109375, "learning_rate": 9.939312316788622e-06, "loss": 35.3318, "step": 72340 }, { "epoch": 0.14615157746740629, "grad_norm": 403.58538818359375, "learning_rate": 9.93925808396613e-06, "loss": 23.4264, "step": 72350 }, { "epoch": 0.1461717781000901, "grad_norm": 608.3269653320312, "learning_rate": 9.939203827070296e-06, "loss": 24.3604, "step": 72360 }, { "epoch": 0.1461919787327739, "grad_norm": 530.1038208007812, "learning_rate": 9.939149546101379e-06, "loss": 30.2006, "step": 72370 }, { "epoch": 0.14621217936545772, "grad_norm": 136.15496826171875, "learning_rate": 9.939095241059648e-06, "loss": 54.0793, "step": 72380 }, { "epoch": 0.14623237999814154, "grad_norm": 891.3991088867188, "learning_rate": 9.939040911945365e-06, "loss": 22.0462, "step": 72390 }, { "epoch": 0.14625258063082536, "grad_norm": 911.6983642578125, "learning_rate": 9.938986558758795e-06, "loss": 24.5087, "step": 72400 }, { "epoch": 0.14627278126350918, "grad_norm": 462.1217956542969, "learning_rate": 9.938932181500206e-06, "loss": 15.7559, "step": 72410 }, { "epoch": 0.146292981896193, "grad_norm": 341.0543518066406, "learning_rate": 9.938877780169858e-06, "loss": 22.3165, "step": 72420 }, { "epoch": 0.14631318252887682, "grad_norm": 263.682373046875, "learning_rate": 9.938823354768019e-06, "loss": 29.8192, "step": 72430 }, { "epoch": 0.1463333831615606, "grad_norm": 1487.048583984375, "learning_rate": 9.938768905294954e-06, "loss": 27.3454, "step": 72440 }, { "epoch": 0.14635358379424443, "grad_norm": 643.3409423828125, "learning_rate": 9.938714431750928e-06, "loss": 13.5408, "step": 72450 }, { "epoch": 0.14637378442692825, "grad_norm": 90.78771209716797, "learning_rate": 9.938659934136208e-06, "loss": 45.4623, "step": 72460 }, { "epoch": 0.14639398505961207, "grad_norm": 196.9699249267578, "learning_rate": 9.93860541245106e-06, "loss": 21.7251, "step": 72470 }, { "epoch": 0.1464141856922959, "grad_norm": 958.56005859375, "learning_rate": 9.938550866695745e-06, "loss": 30.8501, "step": 72480 }, { "epoch": 0.1464343863249797, "grad_norm": 260.7809753417969, "learning_rate": 9.938496296870532e-06, "loss": 13.5617, "step": 72490 }, { "epoch": 0.1464545869576635, "grad_norm": 315.475830078125, "learning_rate": 9.938441702975689e-06, "loss": 28.4831, "step": 72500 }, { "epoch": 0.14647478759034732, "grad_norm": 295.13983154296875, "learning_rate": 9.93838708501148e-06, "loss": 16.444, "step": 72510 }, { "epoch": 0.14649498822303114, "grad_norm": 232.6532440185547, "learning_rate": 9.93833244297817e-06, "loss": 27.1728, "step": 72520 }, { "epoch": 0.14651518885571496, "grad_norm": 648.509033203125, "learning_rate": 9.938277776876029e-06, "loss": 49.6133, "step": 72530 }, { "epoch": 0.14653538948839878, "grad_norm": 0.0, "learning_rate": 9.938223086705318e-06, "loss": 35.7085, "step": 72540 }, { "epoch": 0.1465555901210826, "grad_norm": 210.35365295410156, "learning_rate": 9.938168372466308e-06, "loss": 26.5235, "step": 72550 }, { "epoch": 0.14657579075376642, "grad_norm": 806.7994384765625, "learning_rate": 9.938113634159266e-06, "loss": 30.8781, "step": 72560 }, { "epoch": 0.14659599138645021, "grad_norm": 231.79620361328125, "learning_rate": 9.938058871784453e-06, "loss": 17.0388, "step": 72570 }, { "epoch": 0.14661619201913403, "grad_norm": 658.1468505859375, "learning_rate": 9.938004085342144e-06, "loss": 32.5382, "step": 72580 }, { "epoch": 0.14663639265181785, "grad_norm": 463.2548828125, "learning_rate": 9.9379492748326e-06, "loss": 18.3364, "step": 72590 }, { "epoch": 0.14665659328450167, "grad_norm": 674.02099609375, "learning_rate": 9.937894440256091e-06, "loss": 22.3668, "step": 72600 }, { "epoch": 0.1466767939171855, "grad_norm": 305.38836669921875, "learning_rate": 9.937839581612883e-06, "loss": 21.051, "step": 72610 }, { "epoch": 0.14669699454986931, "grad_norm": 644.2742309570312, "learning_rate": 9.937784698903244e-06, "loss": 41.5936, "step": 72620 }, { "epoch": 0.1467171951825531, "grad_norm": 472.70849609375, "learning_rate": 9.937729792127439e-06, "loss": 24.1044, "step": 72630 }, { "epoch": 0.14673739581523693, "grad_norm": 102.48870086669922, "learning_rate": 9.93767486128574e-06, "loss": 25.1896, "step": 72640 }, { "epoch": 0.14675759644792075, "grad_norm": 204.5181121826172, "learning_rate": 9.937619906378413e-06, "loss": 20.637, "step": 72650 }, { "epoch": 0.14677779708060457, "grad_norm": 694.7501220703125, "learning_rate": 9.937564927405724e-06, "loss": 53.5922, "step": 72660 }, { "epoch": 0.1467979977132884, "grad_norm": 728.1456909179688, "learning_rate": 9.937509924367944e-06, "loss": 25.4128, "step": 72670 }, { "epoch": 0.1468181983459722, "grad_norm": 596.4130859375, "learning_rate": 9.937454897265338e-06, "loss": 20.684, "step": 72680 }, { "epoch": 0.146838398978656, "grad_norm": 476.9186706542969, "learning_rate": 9.937399846098177e-06, "loss": 24.8751, "step": 72690 }, { "epoch": 0.14685859961133982, "grad_norm": 375.2703552246094, "learning_rate": 9.937344770866727e-06, "loss": 23.6126, "step": 72700 }, { "epoch": 0.14687880024402364, "grad_norm": 0.0, "learning_rate": 9.937289671571257e-06, "loss": 13.9453, "step": 72710 }, { "epoch": 0.14689900087670746, "grad_norm": 703.7996215820312, "learning_rate": 9.937234548212038e-06, "loss": 35.1447, "step": 72720 }, { "epoch": 0.14691920150939128, "grad_norm": 388.9770202636719, "learning_rate": 9.937179400789336e-06, "loss": 25.3248, "step": 72730 }, { "epoch": 0.1469394021420751, "grad_norm": 896.0739135742188, "learning_rate": 9.937124229303419e-06, "loss": 35.4272, "step": 72740 }, { "epoch": 0.14695960277475892, "grad_norm": 51.91160583496094, "learning_rate": 9.937069033754558e-06, "loss": 16.3871, "step": 72750 }, { "epoch": 0.1469798034074427, "grad_norm": 319.04730224609375, "learning_rate": 9.937013814143021e-06, "loss": 20.4597, "step": 72760 }, { "epoch": 0.14700000404012653, "grad_norm": 857.5437622070312, "learning_rate": 9.936958570469077e-06, "loss": 26.6086, "step": 72770 }, { "epoch": 0.14702020467281035, "grad_norm": 318.8205261230469, "learning_rate": 9.936903302732997e-06, "loss": 32.8497, "step": 72780 }, { "epoch": 0.14704040530549417, "grad_norm": 105.94303131103516, "learning_rate": 9.936848010935049e-06, "loss": 18.9716, "step": 72790 }, { "epoch": 0.147060605938178, "grad_norm": 305.947265625, "learning_rate": 9.936792695075502e-06, "loss": 20.7573, "step": 72800 }, { "epoch": 0.1470808065708618, "grad_norm": 627.2362670898438, "learning_rate": 9.936737355154627e-06, "loss": 22.1974, "step": 72810 }, { "epoch": 0.1471010072035456, "grad_norm": 54.313289642333984, "learning_rate": 9.936681991172692e-06, "loss": 51.2659, "step": 72820 }, { "epoch": 0.14712120783622942, "grad_norm": 207.916015625, "learning_rate": 9.936626603129968e-06, "loss": 21.5755, "step": 72830 }, { "epoch": 0.14714140846891324, "grad_norm": 37.225521087646484, "learning_rate": 9.936571191026726e-06, "loss": 13.9551, "step": 72840 }, { "epoch": 0.14716160910159706, "grad_norm": 607.05322265625, "learning_rate": 9.936515754863231e-06, "loss": 20.6456, "step": 72850 }, { "epoch": 0.14718180973428088, "grad_norm": 301.5707702636719, "learning_rate": 9.93646029463976e-06, "loss": 40.3485, "step": 72860 }, { "epoch": 0.1472020103669647, "grad_norm": 448.8086853027344, "learning_rate": 9.93640481035658e-06, "loss": 21.5936, "step": 72870 }, { "epoch": 0.14722221099964852, "grad_norm": 683.2775268554688, "learning_rate": 9.936349302013962e-06, "loss": 23.9059, "step": 72880 }, { "epoch": 0.14724241163233232, "grad_norm": 495.9053649902344, "learning_rate": 9.936293769612175e-06, "loss": 23.7701, "step": 72890 }, { "epoch": 0.14726261226501614, "grad_norm": 631.983642578125, "learning_rate": 9.936238213151491e-06, "loss": 25.8129, "step": 72900 }, { "epoch": 0.14728281289769996, "grad_norm": 886.5300903320312, "learning_rate": 9.93618263263218e-06, "loss": 22.7748, "step": 72910 }, { "epoch": 0.14730301353038378, "grad_norm": 615.3167114257812, "learning_rate": 9.936127028054516e-06, "loss": 31.9266, "step": 72920 }, { "epoch": 0.1473232141630676, "grad_norm": 245.50645446777344, "learning_rate": 9.936071399418764e-06, "loss": 29.3315, "step": 72930 }, { "epoch": 0.14734341479575142, "grad_norm": 393.6664123535156, "learning_rate": 9.936015746725202e-06, "loss": 19.3517, "step": 72940 }, { "epoch": 0.1473636154284352, "grad_norm": 333.4764099121094, "learning_rate": 9.935960069974096e-06, "loss": 12.023, "step": 72950 }, { "epoch": 0.14738381606111903, "grad_norm": 338.2967224121094, "learning_rate": 9.93590436916572e-06, "loss": 32.581, "step": 72960 }, { "epoch": 0.14740401669380285, "grad_norm": 234.2685089111328, "learning_rate": 9.935848644300345e-06, "loss": 21.2717, "step": 72970 }, { "epoch": 0.14742421732648667, "grad_norm": 1314.709716796875, "learning_rate": 9.935792895378243e-06, "loss": 38.1685, "step": 72980 }, { "epoch": 0.1474444179591705, "grad_norm": 145.0713653564453, "learning_rate": 9.935737122399683e-06, "loss": 18.8477, "step": 72990 }, { "epoch": 0.1474646185918543, "grad_norm": 231.71591186523438, "learning_rate": 9.93568132536494e-06, "loss": 15.7926, "step": 73000 }, { "epoch": 0.1474848192245381, "grad_norm": 409.4983825683594, "learning_rate": 9.935625504274284e-06, "loss": 22.0732, "step": 73010 }, { "epoch": 0.14750501985722192, "grad_norm": 316.9377746582031, "learning_rate": 9.93556965912799e-06, "loss": 30.9234, "step": 73020 }, { "epoch": 0.14752522048990574, "grad_norm": 176.481201171875, "learning_rate": 9.935513789926327e-06, "loss": 33.495, "step": 73030 }, { "epoch": 0.14754542112258956, "grad_norm": 481.75, "learning_rate": 9.935457896669568e-06, "loss": 26.2654, "step": 73040 }, { "epoch": 0.14756562175527338, "grad_norm": 501.1618347167969, "learning_rate": 9.935401979357985e-06, "loss": 14.997, "step": 73050 }, { "epoch": 0.1475858223879572, "grad_norm": 503.587646484375, "learning_rate": 9.935346037991854e-06, "loss": 27.9058, "step": 73060 }, { "epoch": 0.14760602302064102, "grad_norm": 726.5780029296875, "learning_rate": 9.935290072571442e-06, "loss": 20.1439, "step": 73070 }, { "epoch": 0.1476262236533248, "grad_norm": 296.281982421875, "learning_rate": 9.935234083097028e-06, "loss": 25.1859, "step": 73080 }, { "epoch": 0.14764642428600863, "grad_norm": 439.1563415527344, "learning_rate": 9.935178069568878e-06, "loss": 41.3575, "step": 73090 }, { "epoch": 0.14766662491869245, "grad_norm": 594.9931640625, "learning_rate": 9.93512203198727e-06, "loss": 19.4409, "step": 73100 }, { "epoch": 0.14768682555137627, "grad_norm": 542.8465576171875, "learning_rate": 9.935065970352477e-06, "loss": 13.9351, "step": 73110 }, { "epoch": 0.1477070261840601, "grad_norm": 549.0725708007812, "learning_rate": 9.93500988466477e-06, "loss": 28.5047, "step": 73120 }, { "epoch": 0.1477272268167439, "grad_norm": 386.43927001953125, "learning_rate": 9.934953774924425e-06, "loss": 35.2264, "step": 73130 }, { "epoch": 0.1477474274494277, "grad_norm": 102.36702728271484, "learning_rate": 9.934897641131712e-06, "loss": 29.2037, "step": 73140 }, { "epoch": 0.14776762808211152, "grad_norm": 502.3369445800781, "learning_rate": 9.934841483286907e-06, "loss": 34.0425, "step": 73150 }, { "epoch": 0.14778782871479534, "grad_norm": 143.2417449951172, "learning_rate": 9.934785301390282e-06, "loss": 35.4966, "step": 73160 }, { "epoch": 0.14780802934747916, "grad_norm": 582.600830078125, "learning_rate": 9.934729095442113e-06, "loss": 26.3404, "step": 73170 }, { "epoch": 0.14782822998016298, "grad_norm": 16.06324005126953, "learning_rate": 9.934672865442673e-06, "loss": 11.684, "step": 73180 }, { "epoch": 0.1478484306128468, "grad_norm": 486.680419921875, "learning_rate": 9.934616611392235e-06, "loss": 22.8405, "step": 73190 }, { "epoch": 0.1478686312455306, "grad_norm": 1008.5476684570312, "learning_rate": 9.934560333291077e-06, "loss": 34.9215, "step": 73200 }, { "epoch": 0.14788883187821442, "grad_norm": 362.5773620605469, "learning_rate": 9.934504031139468e-06, "loss": 35.7853, "step": 73210 }, { "epoch": 0.14790903251089824, "grad_norm": 552.7269897460938, "learning_rate": 9.934447704937684e-06, "loss": 27.2318, "step": 73220 }, { "epoch": 0.14792923314358206, "grad_norm": 1780.918212890625, "learning_rate": 9.934391354686002e-06, "loss": 46.4913, "step": 73230 }, { "epoch": 0.14794943377626588, "grad_norm": 63.38420867919922, "learning_rate": 9.934334980384694e-06, "loss": 18.7989, "step": 73240 }, { "epoch": 0.1479696344089497, "grad_norm": 250.62246704101562, "learning_rate": 9.934278582034037e-06, "loss": 30.9458, "step": 73250 }, { "epoch": 0.14798983504163352, "grad_norm": 820.0814208984375, "learning_rate": 9.934222159634303e-06, "loss": 25.7737, "step": 73260 }, { "epoch": 0.1480100356743173, "grad_norm": 222.3558807373047, "learning_rate": 9.93416571318577e-06, "loss": 19.4418, "step": 73270 }, { "epoch": 0.14803023630700113, "grad_norm": 472.84735107421875, "learning_rate": 9.934109242688712e-06, "loss": 23.9828, "step": 73280 }, { "epoch": 0.14805043693968495, "grad_norm": 714.0300903320312, "learning_rate": 9.934052748143403e-06, "loss": 52.5531, "step": 73290 }, { "epoch": 0.14807063757236877, "grad_norm": 398.2305908203125, "learning_rate": 9.93399622955012e-06, "loss": 20.6784, "step": 73300 }, { "epoch": 0.1480908382050526, "grad_norm": 104.2729263305664, "learning_rate": 9.933939686909137e-06, "loss": 24.4492, "step": 73310 }, { "epoch": 0.1481110388377364, "grad_norm": 1208.980224609375, "learning_rate": 9.933883120220731e-06, "loss": 34.2655, "step": 73320 }, { "epoch": 0.1481312394704202, "grad_norm": 200.7018280029297, "learning_rate": 9.933826529485178e-06, "loss": 40.8249, "step": 73330 }, { "epoch": 0.14815144010310402, "grad_norm": 139.2305450439453, "learning_rate": 9.933769914702751e-06, "loss": 18.0279, "step": 73340 }, { "epoch": 0.14817164073578784, "grad_norm": 537.5657348632812, "learning_rate": 9.933713275873728e-06, "loss": 30.6076, "step": 73350 }, { "epoch": 0.14819184136847166, "grad_norm": 290.5312805175781, "learning_rate": 9.933656612998387e-06, "loss": 20.4049, "step": 73360 }, { "epoch": 0.14821204200115548, "grad_norm": 461.2494812011719, "learning_rate": 9.933599926077e-06, "loss": 25.0895, "step": 73370 }, { "epoch": 0.1482322426338393, "grad_norm": 345.025634765625, "learning_rate": 9.933543215109846e-06, "loss": 22.5119, "step": 73380 }, { "epoch": 0.14825244326652312, "grad_norm": 417.30206298828125, "learning_rate": 9.933486480097201e-06, "loss": 31.8335, "step": 73390 }, { "epoch": 0.1482726438992069, "grad_norm": 17.376859664916992, "learning_rate": 9.93342972103934e-06, "loss": 18.072, "step": 73400 }, { "epoch": 0.14829284453189073, "grad_norm": 380.94287109375, "learning_rate": 9.933372937936542e-06, "loss": 39.921, "step": 73410 }, { "epoch": 0.14831304516457455, "grad_norm": 909.1221313476562, "learning_rate": 9.933316130789084e-06, "loss": 38.6447, "step": 73420 }, { "epoch": 0.14833324579725837, "grad_norm": 382.06170654296875, "learning_rate": 9.93325929959724e-06, "loss": 34.3546, "step": 73430 }, { "epoch": 0.1483534464299422, "grad_norm": 166.60946655273438, "learning_rate": 9.933202444361288e-06, "loss": 18.317, "step": 73440 }, { "epoch": 0.148373647062626, "grad_norm": 561.4780883789062, "learning_rate": 9.933145565081506e-06, "loss": 21.7824, "step": 73450 }, { "epoch": 0.1483938476953098, "grad_norm": 776.0906982421875, "learning_rate": 9.933088661758172e-06, "loss": 23.8071, "step": 73460 }, { "epoch": 0.14841404832799363, "grad_norm": 535.42431640625, "learning_rate": 9.933031734391561e-06, "loss": 24.7226, "step": 73470 }, { "epoch": 0.14843424896067745, "grad_norm": 219.21336364746094, "learning_rate": 9.93297478298195e-06, "loss": 16.1967, "step": 73480 }, { "epoch": 0.14845444959336127, "grad_norm": 315.6730041503906, "learning_rate": 9.93291780752962e-06, "loss": 10.5076, "step": 73490 }, { "epoch": 0.14847465022604509, "grad_norm": 326.9447937011719, "learning_rate": 9.932860808034847e-06, "loss": 22.2159, "step": 73500 }, { "epoch": 0.1484948508587289, "grad_norm": 153.1551055908203, "learning_rate": 9.93280378449791e-06, "loss": 45.4165, "step": 73510 }, { "epoch": 0.1485150514914127, "grad_norm": 309.3044128417969, "learning_rate": 9.932746736919084e-06, "loss": 36.2212, "step": 73520 }, { "epoch": 0.14853525212409652, "grad_norm": 96.15229034423828, "learning_rate": 9.93268966529865e-06, "loss": 27.7137, "step": 73530 }, { "epoch": 0.14855545275678034, "grad_norm": 145.47523498535156, "learning_rate": 9.932632569636882e-06, "loss": 18.7185, "step": 73540 }, { "epoch": 0.14857565338946416, "grad_norm": 754.7882690429688, "learning_rate": 9.932575449934063e-06, "loss": 17.8452, "step": 73550 }, { "epoch": 0.14859585402214798, "grad_norm": 1249.251953125, "learning_rate": 9.93251830619047e-06, "loss": 34.2317, "step": 73560 }, { "epoch": 0.1486160546548318, "grad_norm": 577.043701171875, "learning_rate": 9.93246113840638e-06, "loss": 31.0139, "step": 73570 }, { "epoch": 0.14863625528751562, "grad_norm": 778.6085205078125, "learning_rate": 9.932403946582071e-06, "loss": 30.8692, "step": 73580 }, { "epoch": 0.1486564559201994, "grad_norm": 703.0812377929688, "learning_rate": 9.932346730717828e-06, "loss": 18.4887, "step": 73590 }, { "epoch": 0.14867665655288323, "grad_norm": 294.0816345214844, "learning_rate": 9.932289490813922e-06, "loss": 26.0219, "step": 73600 }, { "epoch": 0.14869685718556705, "grad_norm": 444.4838562011719, "learning_rate": 9.932232226870635e-06, "loss": 17.5626, "step": 73610 }, { "epoch": 0.14871705781825087, "grad_norm": 185.8114013671875, "learning_rate": 9.932174938888248e-06, "loss": 20.0357, "step": 73620 }, { "epoch": 0.1487372584509347, "grad_norm": 641.9573974609375, "learning_rate": 9.932117626867037e-06, "loss": 20.9796, "step": 73630 }, { "epoch": 0.1487574590836185, "grad_norm": 871.9342041015625, "learning_rate": 9.932060290807283e-06, "loss": 23.5767, "step": 73640 }, { "epoch": 0.1487776597163023, "grad_norm": 996.84326171875, "learning_rate": 9.932002930709268e-06, "loss": 25.4256, "step": 73650 }, { "epoch": 0.14879786034898612, "grad_norm": 1495.176025390625, "learning_rate": 9.931945546573266e-06, "loss": 35.1073, "step": 73660 }, { "epoch": 0.14881806098166994, "grad_norm": 597.5657348632812, "learning_rate": 9.931888138399562e-06, "loss": 22.791, "step": 73670 }, { "epoch": 0.14883826161435376, "grad_norm": 597.6026611328125, "learning_rate": 9.93183070618843e-06, "loss": 40.5281, "step": 73680 }, { "epoch": 0.14885846224703758, "grad_norm": 557.2459716796875, "learning_rate": 9.931773249940156e-06, "loss": 14.6514, "step": 73690 }, { "epoch": 0.1488786628797214, "grad_norm": 438.7613525390625, "learning_rate": 9.931715769655017e-06, "loss": 26.2284, "step": 73700 }, { "epoch": 0.14889886351240522, "grad_norm": 747.023193359375, "learning_rate": 9.931658265333293e-06, "loss": 32.2668, "step": 73710 }, { "epoch": 0.14891906414508901, "grad_norm": 264.25408935546875, "learning_rate": 9.931600736975264e-06, "loss": 23.192, "step": 73720 }, { "epoch": 0.14893926477777283, "grad_norm": 806.9119262695312, "learning_rate": 9.93154318458121e-06, "loss": 31.4644, "step": 73730 }, { "epoch": 0.14895946541045665, "grad_norm": 129.6310272216797, "learning_rate": 9.931485608151416e-06, "loss": 30.5778, "step": 73740 }, { "epoch": 0.14897966604314047, "grad_norm": 556.3810424804688, "learning_rate": 9.931428007686158e-06, "loss": 33.2141, "step": 73750 }, { "epoch": 0.1489998666758243, "grad_norm": 291.308349609375, "learning_rate": 9.931370383185717e-06, "loss": 22.1474, "step": 73760 }, { "epoch": 0.14902006730850811, "grad_norm": 460.9413146972656, "learning_rate": 9.931312734650376e-06, "loss": 33.3643, "step": 73770 }, { "epoch": 0.1490402679411919, "grad_norm": 124.01321411132812, "learning_rate": 9.931255062080415e-06, "loss": 18.3147, "step": 73780 }, { "epoch": 0.14906046857387573, "grad_norm": 175.38063049316406, "learning_rate": 9.931197365476113e-06, "loss": 14.1293, "step": 73790 }, { "epoch": 0.14908066920655955, "grad_norm": 290.3349914550781, "learning_rate": 9.931139644837755e-06, "loss": 14.0846, "step": 73800 }, { "epoch": 0.14910086983924337, "grad_norm": 833.2294311523438, "learning_rate": 9.93108190016562e-06, "loss": 30.1313, "step": 73810 }, { "epoch": 0.1491210704719272, "grad_norm": 368.4813537597656, "learning_rate": 9.93102413145999e-06, "loss": 18.284, "step": 73820 }, { "epoch": 0.149141271104611, "grad_norm": 315.0820007324219, "learning_rate": 9.930966338721146e-06, "loss": 32.4391, "step": 73830 }, { "epoch": 0.1491614717372948, "grad_norm": 446.75750732421875, "learning_rate": 9.930908521949371e-06, "loss": 18.0605, "step": 73840 }, { "epoch": 0.14918167236997862, "grad_norm": 471.6275634765625, "learning_rate": 9.930850681144946e-06, "loss": 22.9185, "step": 73850 }, { "epoch": 0.14920187300266244, "grad_norm": 325.270263671875, "learning_rate": 9.930792816308151e-06, "loss": 29.7445, "step": 73860 }, { "epoch": 0.14922207363534626, "grad_norm": 479.4029235839844, "learning_rate": 9.930734927439272e-06, "loss": 24.8208, "step": 73870 }, { "epoch": 0.14924227426803008, "grad_norm": 184.0373992919922, "learning_rate": 9.930677014538587e-06, "loss": 31.5938, "step": 73880 }, { "epoch": 0.1492624749007139, "grad_norm": 830.44921875, "learning_rate": 9.93061907760638e-06, "loss": 22.3806, "step": 73890 }, { "epoch": 0.14928267553339772, "grad_norm": 287.38494873046875, "learning_rate": 9.930561116642936e-06, "loss": 33.4672, "step": 73900 }, { "epoch": 0.1493028761660815, "grad_norm": 918.8916015625, "learning_rate": 9.930503131648535e-06, "loss": 46.5241, "step": 73910 }, { "epoch": 0.14932307679876533, "grad_norm": 1375.9710693359375, "learning_rate": 9.930445122623458e-06, "loss": 19.428, "step": 73920 }, { "epoch": 0.14934327743144915, "grad_norm": 59.05356979370117, "learning_rate": 9.93038708956799e-06, "loss": 31.6225, "step": 73930 }, { "epoch": 0.14936347806413297, "grad_norm": 588.1260986328125, "learning_rate": 9.930329032482412e-06, "loss": 33.2808, "step": 73940 }, { "epoch": 0.1493836786968168, "grad_norm": 764.7052001953125, "learning_rate": 9.930270951367012e-06, "loss": 36.3644, "step": 73950 }, { "epoch": 0.1494038793295006, "grad_norm": 249.54364013671875, "learning_rate": 9.930212846222065e-06, "loss": 19.8232, "step": 73960 }, { "epoch": 0.1494240799621844, "grad_norm": 973.8547973632812, "learning_rate": 9.930154717047862e-06, "loss": 22.1961, "step": 73970 }, { "epoch": 0.14944428059486822, "grad_norm": 854.751220703125, "learning_rate": 9.930096563844682e-06, "loss": 57.2152, "step": 73980 }, { "epoch": 0.14946448122755204, "grad_norm": 548.9900512695312, "learning_rate": 9.930038386612809e-06, "loss": 26.7725, "step": 73990 }, { "epoch": 0.14948468186023586, "grad_norm": 520.6438598632812, "learning_rate": 9.929980185352525e-06, "loss": 21.2265, "step": 74000 }, { "epoch": 0.14950488249291968, "grad_norm": 46.74055480957031, "learning_rate": 9.929921960064117e-06, "loss": 15.4381, "step": 74010 }, { "epoch": 0.1495250831256035, "grad_norm": 67.48046875, "learning_rate": 9.929863710747869e-06, "loss": 34.7771, "step": 74020 }, { "epoch": 0.14954528375828732, "grad_norm": 496.70318603515625, "learning_rate": 9.929805437404061e-06, "loss": 22.7784, "step": 74030 }, { "epoch": 0.14956548439097112, "grad_norm": 363.4870910644531, "learning_rate": 9.929747140032979e-06, "loss": 30.567, "step": 74040 }, { "epoch": 0.14958568502365494, "grad_norm": 459.33447265625, "learning_rate": 9.929688818634909e-06, "loss": 27.3487, "step": 74050 }, { "epoch": 0.14960588565633876, "grad_norm": 586.962646484375, "learning_rate": 9.929630473210132e-06, "loss": 31.9464, "step": 74060 }, { "epoch": 0.14962608628902258, "grad_norm": 106.21631622314453, "learning_rate": 9.929572103758935e-06, "loss": 14.6552, "step": 74070 }, { "epoch": 0.1496462869217064, "grad_norm": 579.7979125976562, "learning_rate": 9.929513710281602e-06, "loss": 25.5261, "step": 74080 }, { "epoch": 0.14966648755439022, "grad_norm": 310.973876953125, "learning_rate": 9.929455292778416e-06, "loss": 16.3713, "step": 74090 }, { "epoch": 0.149686688187074, "grad_norm": 586.092529296875, "learning_rate": 9.929396851249661e-06, "loss": 32.1962, "step": 74100 }, { "epoch": 0.14970688881975783, "grad_norm": 509.2660217285156, "learning_rate": 9.929338385695626e-06, "loss": 38.1233, "step": 74110 }, { "epoch": 0.14972708945244165, "grad_norm": 702.5281372070312, "learning_rate": 9.929279896116595e-06, "loss": 22.2846, "step": 74120 }, { "epoch": 0.14974729008512547, "grad_norm": 1263.78515625, "learning_rate": 9.92922138251285e-06, "loss": 27.9118, "step": 74130 }, { "epoch": 0.1497674907178093, "grad_norm": 309.19757080078125, "learning_rate": 9.929162844884676e-06, "loss": 23.4226, "step": 74140 }, { "epoch": 0.1497876913504931, "grad_norm": 427.94464111328125, "learning_rate": 9.929104283232363e-06, "loss": 19.4286, "step": 74150 }, { "epoch": 0.1498078919831769, "grad_norm": 336.3860778808594, "learning_rate": 9.929045697556192e-06, "loss": 17.9172, "step": 74160 }, { "epoch": 0.14982809261586072, "grad_norm": 173.4564666748047, "learning_rate": 9.92898708785645e-06, "loss": 14.8733, "step": 74170 }, { "epoch": 0.14984829324854454, "grad_norm": 299.27276611328125, "learning_rate": 9.928928454133424e-06, "loss": 11.8068, "step": 74180 }, { "epoch": 0.14986849388122836, "grad_norm": 350.9851989746094, "learning_rate": 9.928869796387396e-06, "loss": 20.769, "step": 74190 }, { "epoch": 0.14988869451391218, "grad_norm": 254.02316284179688, "learning_rate": 9.928811114618658e-06, "loss": 44.699, "step": 74200 }, { "epoch": 0.149908895146596, "grad_norm": 1146.9010009765625, "learning_rate": 9.92875240882749e-06, "loss": 18.2647, "step": 74210 }, { "epoch": 0.14992909577927982, "grad_norm": 595.7329711914062, "learning_rate": 9.92869367901418e-06, "loss": 28.4935, "step": 74220 }, { "epoch": 0.1499492964119636, "grad_norm": 61.0308837890625, "learning_rate": 9.928634925179018e-06, "loss": 26.1481, "step": 74230 }, { "epoch": 0.14996949704464743, "grad_norm": 545.187255859375, "learning_rate": 9.928576147322283e-06, "loss": 26.7562, "step": 74240 }, { "epoch": 0.14998969767733125, "grad_norm": 535.875, "learning_rate": 9.92851734544427e-06, "loss": 44.9036, "step": 74250 }, { "epoch": 0.15000989831001507, "grad_norm": 366.1502380371094, "learning_rate": 9.928458519545258e-06, "loss": 26.5014, "step": 74260 }, { "epoch": 0.1500300989426989, "grad_norm": 377.4646301269531, "learning_rate": 9.928399669625537e-06, "loss": 19.756, "step": 74270 }, { "epoch": 0.1500502995753827, "grad_norm": 847.5916748046875, "learning_rate": 9.928340795685396e-06, "loss": 45.6313, "step": 74280 }, { "epoch": 0.1500705002080665, "grad_norm": 234.44790649414062, "learning_rate": 9.928281897725117e-06, "loss": 29.406, "step": 74290 }, { "epoch": 0.15009070084075032, "grad_norm": 174.0032958984375, "learning_rate": 9.928222975744992e-06, "loss": 24.4992, "step": 74300 }, { "epoch": 0.15011090147343414, "grad_norm": 289.6244812011719, "learning_rate": 9.928164029745304e-06, "loss": 21.2528, "step": 74310 }, { "epoch": 0.15013110210611796, "grad_norm": 644.0867309570312, "learning_rate": 9.928105059726342e-06, "loss": 33.9758, "step": 74320 }, { "epoch": 0.15015130273880178, "grad_norm": 418.8067932128906, "learning_rate": 9.928046065688396e-06, "loss": 25.9401, "step": 74330 }, { "epoch": 0.1501715033714856, "grad_norm": 609.118896484375, "learning_rate": 9.927987047631749e-06, "loss": 20.8019, "step": 74340 }, { "epoch": 0.15019170400416942, "grad_norm": 737.2074584960938, "learning_rate": 9.927928005556692e-06, "loss": 25.2103, "step": 74350 }, { "epoch": 0.15021190463685322, "grad_norm": 372.0144348144531, "learning_rate": 9.927868939463511e-06, "loss": 12.7893, "step": 74360 }, { "epoch": 0.15023210526953704, "grad_norm": 196.4942626953125, "learning_rate": 9.927809849352496e-06, "loss": 27.7522, "step": 74370 }, { "epoch": 0.15025230590222086, "grad_norm": 516.6409301757812, "learning_rate": 9.927750735223932e-06, "loss": 31.0842, "step": 74380 }, { "epoch": 0.15027250653490468, "grad_norm": 370.11346435546875, "learning_rate": 9.927691597078109e-06, "loss": 29.7255, "step": 74390 }, { "epoch": 0.1502927071675885, "grad_norm": 707.3624267578125, "learning_rate": 9.927632434915315e-06, "loss": 38.3532, "step": 74400 }, { "epoch": 0.15031290780027232, "grad_norm": 684.619384765625, "learning_rate": 9.927573248735839e-06, "loss": 21.7652, "step": 74410 }, { "epoch": 0.1503331084329561, "grad_norm": 706.516845703125, "learning_rate": 9.927514038539966e-06, "loss": 42.4786, "step": 74420 }, { "epoch": 0.15035330906563993, "grad_norm": 441.7760009765625, "learning_rate": 9.927454804327989e-06, "loss": 34.0491, "step": 74430 }, { "epoch": 0.15037350969832375, "grad_norm": 835.3612060546875, "learning_rate": 9.927395546100195e-06, "loss": 28.9184, "step": 74440 }, { "epoch": 0.15039371033100757, "grad_norm": 351.7720947265625, "learning_rate": 9.927336263856873e-06, "loss": 19.5311, "step": 74450 }, { "epoch": 0.1504139109636914, "grad_norm": 395.46429443359375, "learning_rate": 9.92727695759831e-06, "loss": 18.5734, "step": 74460 }, { "epoch": 0.1504341115963752, "grad_norm": 179.93319702148438, "learning_rate": 9.927217627324798e-06, "loss": 19.2748, "step": 74470 }, { "epoch": 0.150454312229059, "grad_norm": 140.97605895996094, "learning_rate": 9.927158273036624e-06, "loss": 12.4965, "step": 74480 }, { "epoch": 0.15047451286174282, "grad_norm": 530.7149658203125, "learning_rate": 9.92709889473408e-06, "loss": 36.4391, "step": 74490 }, { "epoch": 0.15049471349442664, "grad_norm": 322.33831787109375, "learning_rate": 9.927039492417452e-06, "loss": 27.7319, "step": 74500 }, { "epoch": 0.15051491412711046, "grad_norm": 355.8292236328125, "learning_rate": 9.92698006608703e-06, "loss": 22.9936, "step": 74510 }, { "epoch": 0.15053511475979428, "grad_norm": 345.5832214355469, "learning_rate": 9.926920615743108e-06, "loss": 29.3384, "step": 74520 }, { "epoch": 0.1505553153924781, "grad_norm": 1088.1964111328125, "learning_rate": 9.92686114138597e-06, "loss": 41.6574, "step": 74530 }, { "epoch": 0.15057551602516192, "grad_norm": 448.7097473144531, "learning_rate": 9.926801643015908e-06, "loss": 30.9739, "step": 74540 }, { "epoch": 0.1505957166578457, "grad_norm": 222.37721252441406, "learning_rate": 9.926742120633215e-06, "loss": 20.0155, "step": 74550 }, { "epoch": 0.15061591729052953, "grad_norm": 391.6602783203125, "learning_rate": 9.926682574238175e-06, "loss": 35.0277, "step": 74560 }, { "epoch": 0.15063611792321335, "grad_norm": 212.23800659179688, "learning_rate": 9.926623003831085e-06, "loss": 24.9151, "step": 74570 }, { "epoch": 0.15065631855589717, "grad_norm": 699.8584594726562, "learning_rate": 9.92656340941223e-06, "loss": 46.0956, "step": 74580 }, { "epoch": 0.150676519188581, "grad_norm": 381.85382080078125, "learning_rate": 9.926503790981903e-06, "loss": 16.9223, "step": 74590 }, { "epoch": 0.1506967198212648, "grad_norm": 648.5833129882812, "learning_rate": 9.926444148540394e-06, "loss": 18.0826, "step": 74600 }, { "epoch": 0.1507169204539486, "grad_norm": 681.515869140625, "learning_rate": 9.926384482087994e-06, "loss": 19.9235, "step": 74610 }, { "epoch": 0.15073712108663243, "grad_norm": 418.3777160644531, "learning_rate": 9.926324791624993e-06, "loss": 18.0033, "step": 74620 }, { "epoch": 0.15075732171931625, "grad_norm": 308.60699462890625, "learning_rate": 9.926265077151682e-06, "loss": 10.9382, "step": 74630 }, { "epoch": 0.15077752235200007, "grad_norm": 316.04754638671875, "learning_rate": 9.926205338668353e-06, "loss": 18.1846, "step": 74640 }, { "epoch": 0.15079772298468389, "grad_norm": 419.26593017578125, "learning_rate": 9.926145576175297e-06, "loss": 39.5158, "step": 74650 }, { "epoch": 0.1508179236173677, "grad_norm": 415.845947265625, "learning_rate": 9.926085789672806e-06, "loss": 31.3318, "step": 74660 }, { "epoch": 0.15083812425005153, "grad_norm": 338.5599365234375, "learning_rate": 9.926025979161169e-06, "loss": 15.7716, "step": 74670 }, { "epoch": 0.15085832488273532, "grad_norm": 396.2326965332031, "learning_rate": 9.925966144640677e-06, "loss": 17.2184, "step": 74680 }, { "epoch": 0.15087852551541914, "grad_norm": 300.29656982421875, "learning_rate": 9.925906286111627e-06, "loss": 13.1868, "step": 74690 }, { "epoch": 0.15089872614810296, "grad_norm": 389.8894348144531, "learning_rate": 9.925846403574306e-06, "loss": 29.3188, "step": 74700 }, { "epoch": 0.15091892678078678, "grad_norm": 262.18408203125, "learning_rate": 9.925786497029007e-06, "loss": 11.778, "step": 74710 }, { "epoch": 0.1509391274134706, "grad_norm": 382.22821044921875, "learning_rate": 9.925726566476021e-06, "loss": 34.0867, "step": 74720 }, { "epoch": 0.15095932804615442, "grad_norm": 244.11781311035156, "learning_rate": 9.925666611915642e-06, "loss": 38.6311, "step": 74730 }, { "epoch": 0.1509795286788382, "grad_norm": 415.68048095703125, "learning_rate": 9.925606633348161e-06, "loss": 15.0911, "step": 74740 }, { "epoch": 0.15099972931152203, "grad_norm": 376.7303771972656, "learning_rate": 9.92554663077387e-06, "loss": 21.868, "step": 74750 }, { "epoch": 0.15101992994420585, "grad_norm": 1667.0445556640625, "learning_rate": 9.925486604193064e-06, "loss": 34.2437, "step": 74760 }, { "epoch": 0.15104013057688967, "grad_norm": 507.23333740234375, "learning_rate": 9.925426553606033e-06, "loss": 22.3991, "step": 74770 }, { "epoch": 0.1510603312095735, "grad_norm": 404.0858154296875, "learning_rate": 9.92536647901307e-06, "loss": 20.0867, "step": 74780 }, { "epoch": 0.1510805318422573, "grad_norm": 406.8843994140625, "learning_rate": 9.925306380414468e-06, "loss": 27.0763, "step": 74790 }, { "epoch": 0.1511007324749411, "grad_norm": 584.16796875, "learning_rate": 9.925246257810519e-06, "loss": 24.6141, "step": 74800 }, { "epoch": 0.15112093310762492, "grad_norm": 369.7292175292969, "learning_rate": 9.925186111201519e-06, "loss": 28.5047, "step": 74810 }, { "epoch": 0.15114113374030874, "grad_norm": 138.77566528320312, "learning_rate": 9.92512594058776e-06, "loss": 18.0816, "step": 74820 }, { "epoch": 0.15116133437299256, "grad_norm": 340.2200012207031, "learning_rate": 9.925065745969531e-06, "loss": 39.3574, "step": 74830 }, { "epoch": 0.15118153500567638, "grad_norm": 531.7938842773438, "learning_rate": 9.925005527347132e-06, "loss": 21.366, "step": 74840 }, { "epoch": 0.1512017356383602, "grad_norm": 245.169921875, "learning_rate": 9.924945284720852e-06, "loss": 13.2756, "step": 74850 }, { "epoch": 0.15122193627104402, "grad_norm": 468.15594482421875, "learning_rate": 9.924885018090987e-06, "loss": 24.177, "step": 74860 }, { "epoch": 0.15124213690372781, "grad_norm": 570.9451293945312, "learning_rate": 9.924824727457829e-06, "loss": 15.2152, "step": 74870 }, { "epoch": 0.15126233753641163, "grad_norm": 418.68328857421875, "learning_rate": 9.924764412821673e-06, "loss": 27.3847, "step": 74880 }, { "epoch": 0.15128253816909545, "grad_norm": 413.4746398925781, "learning_rate": 9.924704074182811e-06, "loss": 33.3847, "step": 74890 }, { "epoch": 0.15130273880177927, "grad_norm": 563.0431518554688, "learning_rate": 9.92464371154154e-06, "loss": 9.7395, "step": 74900 }, { "epoch": 0.1513229394344631, "grad_norm": 581.7094116210938, "learning_rate": 9.924583324898152e-06, "loss": 26.6438, "step": 74910 }, { "epoch": 0.15134314006714691, "grad_norm": 756.6255493164062, "learning_rate": 9.924522914252943e-06, "loss": 39.0042, "step": 74920 }, { "epoch": 0.1513633406998307, "grad_norm": 233.01658630371094, "learning_rate": 9.924462479606207e-06, "loss": 20.5177, "step": 74930 }, { "epoch": 0.15138354133251453, "grad_norm": 268.5835876464844, "learning_rate": 9.924402020958238e-06, "loss": 15.2923, "step": 74940 }, { "epoch": 0.15140374196519835, "grad_norm": 182.87258911132812, "learning_rate": 9.92434153830933e-06, "loss": 11.0032, "step": 74950 }, { "epoch": 0.15142394259788217, "grad_norm": 632.199951171875, "learning_rate": 9.92428103165978e-06, "loss": 23.4439, "step": 74960 }, { "epoch": 0.151444143230566, "grad_norm": 280.0271911621094, "learning_rate": 9.92422050100988e-06, "loss": 16.7607, "step": 74970 }, { "epoch": 0.1514643438632498, "grad_norm": 266.1084899902344, "learning_rate": 9.924159946359927e-06, "loss": 22.0468, "step": 74980 }, { "epoch": 0.15148454449593363, "grad_norm": 336.4967956542969, "learning_rate": 9.924099367710215e-06, "loss": 28.806, "step": 74990 }, { "epoch": 0.15150474512861742, "grad_norm": 474.88555908203125, "learning_rate": 9.924038765061042e-06, "loss": 39.5052, "step": 75000 }, { "epoch": 0.15152494576130124, "grad_norm": 489.7330627441406, "learning_rate": 9.923978138412698e-06, "loss": 16.7885, "step": 75010 }, { "epoch": 0.15154514639398506, "grad_norm": 597.09130859375, "learning_rate": 9.923917487765484e-06, "loss": 19.9639, "step": 75020 }, { "epoch": 0.15156534702666888, "grad_norm": 377.1841125488281, "learning_rate": 9.923856813119694e-06, "loss": 26.6894, "step": 75030 }, { "epoch": 0.1515855476593527, "grad_norm": 311.2860107421875, "learning_rate": 9.92379611447562e-06, "loss": 26.1113, "step": 75040 }, { "epoch": 0.15160574829203652, "grad_norm": 455.06207275390625, "learning_rate": 9.923735391833564e-06, "loss": 28.5179, "step": 75050 }, { "epoch": 0.1516259489247203, "grad_norm": 607.7901611328125, "learning_rate": 9.923674645193819e-06, "loss": 22.8817, "step": 75060 }, { "epoch": 0.15164614955740413, "grad_norm": 356.8912353515625, "learning_rate": 9.92361387455668e-06, "loss": 29.9606, "step": 75070 }, { "epoch": 0.15166635019008795, "grad_norm": 312.0567321777344, "learning_rate": 9.923553079922443e-06, "loss": 16.3842, "step": 75080 }, { "epoch": 0.15168655082277177, "grad_norm": 487.7376708984375, "learning_rate": 9.923492261291406e-06, "loss": 15.1316, "step": 75090 }, { "epoch": 0.1517067514554556, "grad_norm": 1110.7755126953125, "learning_rate": 9.923431418663866e-06, "loss": 31.4992, "step": 75100 }, { "epoch": 0.1517269520881394, "grad_norm": 140.07437133789062, "learning_rate": 9.923370552040117e-06, "loss": 18.2737, "step": 75110 }, { "epoch": 0.1517471527208232, "grad_norm": 594.5069580078125, "learning_rate": 9.923309661420458e-06, "loss": 30.8502, "step": 75120 }, { "epoch": 0.15176735335350702, "grad_norm": 526.073486328125, "learning_rate": 9.923248746805185e-06, "loss": 17.2189, "step": 75130 }, { "epoch": 0.15178755398619084, "grad_norm": 359.8941955566406, "learning_rate": 9.923187808194594e-06, "loss": 32.6762, "step": 75140 }, { "epoch": 0.15180775461887466, "grad_norm": 660.1754760742188, "learning_rate": 9.923126845588982e-06, "loss": 25.885, "step": 75150 }, { "epoch": 0.15182795525155848, "grad_norm": 89.28858947753906, "learning_rate": 9.92306585898865e-06, "loss": 16.3939, "step": 75160 }, { "epoch": 0.1518481558842423, "grad_norm": 153.34735107421875, "learning_rate": 9.92300484839389e-06, "loss": 18.2618, "step": 75170 }, { "epoch": 0.15186835651692612, "grad_norm": 660.1924438476562, "learning_rate": 9.922943813805e-06, "loss": 18.8986, "step": 75180 }, { "epoch": 0.15188855714960992, "grad_norm": 723.8023681640625, "learning_rate": 9.92288275522228e-06, "loss": 20.5952, "step": 75190 }, { "epoch": 0.15190875778229374, "grad_norm": 915.8523559570312, "learning_rate": 9.922821672646028e-06, "loss": 43.1564, "step": 75200 }, { "epoch": 0.15192895841497756, "grad_norm": 453.2025146484375, "learning_rate": 9.922760566076538e-06, "loss": 13.1289, "step": 75210 }, { "epoch": 0.15194915904766138, "grad_norm": 555.0697021484375, "learning_rate": 9.922699435514112e-06, "loss": 19.823, "step": 75220 }, { "epoch": 0.1519693596803452, "grad_norm": 1103.8321533203125, "learning_rate": 9.922638280959044e-06, "loss": 31.5298, "step": 75230 }, { "epoch": 0.15198956031302902, "grad_norm": 458.439208984375, "learning_rate": 9.922577102411638e-06, "loss": 13.7083, "step": 75240 }, { "epoch": 0.1520097609457128, "grad_norm": 1403.2596435546875, "learning_rate": 9.922515899872184e-06, "loss": 31.9748, "step": 75250 }, { "epoch": 0.15202996157839663, "grad_norm": 774.9822387695312, "learning_rate": 9.922454673340987e-06, "loss": 38.4883, "step": 75260 }, { "epoch": 0.15205016221108045, "grad_norm": 397.8044128417969, "learning_rate": 9.922393422818342e-06, "loss": 29.8237, "step": 75270 }, { "epoch": 0.15207036284376427, "grad_norm": 441.2273864746094, "learning_rate": 9.922332148304548e-06, "loss": 36.652, "step": 75280 }, { "epoch": 0.1520905634764481, "grad_norm": 610.16064453125, "learning_rate": 9.922270849799903e-06, "loss": 30.548, "step": 75290 }, { "epoch": 0.1521107641091319, "grad_norm": 347.32177734375, "learning_rate": 9.922209527304709e-06, "loss": 27.3734, "step": 75300 }, { "epoch": 0.15213096474181573, "grad_norm": 383.2518005371094, "learning_rate": 9.922148180819261e-06, "loss": 10.6325, "step": 75310 }, { "epoch": 0.15215116537449952, "grad_norm": 634.7061767578125, "learning_rate": 9.922086810343862e-06, "loss": 30.0669, "step": 75320 }, { "epoch": 0.15217136600718334, "grad_norm": 825.0751342773438, "learning_rate": 9.922025415878809e-06, "loss": 19.7906, "step": 75330 }, { "epoch": 0.15219156663986716, "grad_norm": 240.3961944580078, "learning_rate": 9.9219639974244e-06, "loss": 19.2792, "step": 75340 }, { "epoch": 0.15221176727255098, "grad_norm": 708.1145629882812, "learning_rate": 9.921902554980935e-06, "loss": 31.2332, "step": 75350 }, { "epoch": 0.1522319679052348, "grad_norm": 205.85398864746094, "learning_rate": 9.921841088548713e-06, "loss": 28.1157, "step": 75360 }, { "epoch": 0.15225216853791862, "grad_norm": 699.2012939453125, "learning_rate": 9.921779598128036e-06, "loss": 16.0434, "step": 75370 }, { "epoch": 0.1522723691706024, "grad_norm": 511.19268798828125, "learning_rate": 9.921718083719203e-06, "loss": 15.3861, "step": 75380 }, { "epoch": 0.15229256980328623, "grad_norm": 991.4080200195312, "learning_rate": 9.921656545322512e-06, "loss": 29.8642, "step": 75390 }, { "epoch": 0.15231277043597005, "grad_norm": 498.2525634765625, "learning_rate": 9.921594982938262e-06, "loss": 36.8228, "step": 75400 }, { "epoch": 0.15233297106865387, "grad_norm": 452.8070068359375, "learning_rate": 9.921533396566758e-06, "loss": 24.1474, "step": 75410 }, { "epoch": 0.1523531717013377, "grad_norm": 188.76637268066406, "learning_rate": 9.921471786208296e-06, "loss": 22.8066, "step": 75420 }, { "epoch": 0.1523733723340215, "grad_norm": 500.168212890625, "learning_rate": 9.921410151863177e-06, "loss": 26.2489, "step": 75430 }, { "epoch": 0.1523935729667053, "grad_norm": 485.8915710449219, "learning_rate": 9.921348493531701e-06, "loss": 24.2653, "step": 75440 }, { "epoch": 0.15241377359938912, "grad_norm": 66.25651550292969, "learning_rate": 9.921286811214173e-06, "loss": 11.0651, "step": 75450 }, { "epoch": 0.15243397423207294, "grad_norm": 338.6200256347656, "learning_rate": 9.921225104910886e-06, "loss": 13.1423, "step": 75460 }, { "epoch": 0.15245417486475676, "grad_norm": 302.8039245605469, "learning_rate": 9.921163374622147e-06, "loss": 11.354, "step": 75470 }, { "epoch": 0.15247437549744058, "grad_norm": 645.382080078125, "learning_rate": 9.921101620348252e-06, "loss": 29.3993, "step": 75480 }, { "epoch": 0.1524945761301244, "grad_norm": 650.5449829101562, "learning_rate": 9.921039842089508e-06, "loss": 29.5309, "step": 75490 }, { "epoch": 0.15251477676280822, "grad_norm": 434.36199951171875, "learning_rate": 9.92097803984621e-06, "loss": 8.5805, "step": 75500 }, { "epoch": 0.15253497739549202, "grad_norm": 535.7971801757812, "learning_rate": 9.920916213618664e-06, "loss": 27.8635, "step": 75510 }, { "epoch": 0.15255517802817584, "grad_norm": 709.4111328125, "learning_rate": 9.920854363407168e-06, "loss": 36.1847, "step": 75520 }, { "epoch": 0.15257537866085966, "grad_norm": 454.9409484863281, "learning_rate": 9.920792489212023e-06, "loss": 35.0518, "step": 75530 }, { "epoch": 0.15259557929354348, "grad_norm": 222.44839477539062, "learning_rate": 9.920730591033534e-06, "loss": 29.2773, "step": 75540 }, { "epoch": 0.1526157799262273, "grad_norm": 240.63441467285156, "learning_rate": 9.920668668872002e-06, "loss": 21.3169, "step": 75550 }, { "epoch": 0.15263598055891112, "grad_norm": 293.8035583496094, "learning_rate": 9.920606722727726e-06, "loss": 20.815, "step": 75560 }, { "epoch": 0.1526561811915949, "grad_norm": 701.8116455078125, "learning_rate": 9.920544752601011e-06, "loss": 25.9134, "step": 75570 }, { "epoch": 0.15267638182427873, "grad_norm": 493.2641906738281, "learning_rate": 9.920482758492156e-06, "loss": 19.1189, "step": 75580 }, { "epoch": 0.15269658245696255, "grad_norm": 305.3404235839844, "learning_rate": 9.920420740401466e-06, "loss": 19.7217, "step": 75590 }, { "epoch": 0.15271678308964637, "grad_norm": 363.524658203125, "learning_rate": 9.920358698329242e-06, "loss": 16.3366, "step": 75600 }, { "epoch": 0.1527369837223302, "grad_norm": 82.39474487304688, "learning_rate": 9.920296632275785e-06, "loss": 18.9301, "step": 75610 }, { "epoch": 0.152757184355014, "grad_norm": 452.2556457519531, "learning_rate": 9.9202345422414e-06, "loss": 26.0476, "step": 75620 }, { "epoch": 0.15277738498769783, "grad_norm": 23.63207244873047, "learning_rate": 9.92017242822639e-06, "loss": 12.7449, "step": 75630 }, { "epoch": 0.15279758562038162, "grad_norm": 771.0640869140625, "learning_rate": 9.920110290231056e-06, "loss": 43.8966, "step": 75640 }, { "epoch": 0.15281778625306544, "grad_norm": 504.03155517578125, "learning_rate": 9.920048128255699e-06, "loss": 29.4398, "step": 75650 }, { "epoch": 0.15283798688574926, "grad_norm": 937.1925048828125, "learning_rate": 9.919985942300625e-06, "loss": 27.1426, "step": 75660 }, { "epoch": 0.15285818751843308, "grad_norm": 121.6279067993164, "learning_rate": 9.919923732366137e-06, "loss": 42.1627, "step": 75670 }, { "epoch": 0.1528783881511169, "grad_norm": 428.188232421875, "learning_rate": 9.919861498452538e-06, "loss": 32.3342, "step": 75680 }, { "epoch": 0.15289858878380072, "grad_norm": 225.9127197265625, "learning_rate": 9.91979924056013e-06, "loss": 24.5659, "step": 75690 }, { "epoch": 0.1529187894164845, "grad_norm": 1178.7598876953125, "learning_rate": 9.919736958689216e-06, "loss": 31.9912, "step": 75700 }, { "epoch": 0.15293899004916833, "grad_norm": 442.5712585449219, "learning_rate": 9.919674652840103e-06, "loss": 21.1682, "step": 75710 }, { "epoch": 0.15295919068185215, "grad_norm": 509.2165832519531, "learning_rate": 9.91961232301309e-06, "loss": 15.8996, "step": 75720 }, { "epoch": 0.15297939131453597, "grad_norm": 304.9468994140625, "learning_rate": 9.919549969208486e-06, "loss": 23.0404, "step": 75730 }, { "epoch": 0.1529995919472198, "grad_norm": 488.9942626953125, "learning_rate": 9.919487591426591e-06, "loss": 15.6335, "step": 75740 }, { "epoch": 0.1530197925799036, "grad_norm": 564.2071533203125, "learning_rate": 9.91942518966771e-06, "loss": 35.4118, "step": 75750 }, { "epoch": 0.1530399932125874, "grad_norm": 462.4146728515625, "learning_rate": 9.919362763932145e-06, "loss": 27.5123, "step": 75760 }, { "epoch": 0.15306019384527123, "grad_norm": 478.4244384765625, "learning_rate": 9.919300314220206e-06, "loss": 18.8904, "step": 75770 }, { "epoch": 0.15308039447795505, "grad_norm": 644.9933471679688, "learning_rate": 9.919237840532192e-06, "loss": 17.6179, "step": 75780 }, { "epoch": 0.15310059511063887, "grad_norm": 502.4374084472656, "learning_rate": 9.91917534286841e-06, "loss": 17.1634, "step": 75790 }, { "epoch": 0.15312079574332269, "grad_norm": 602.4114990234375, "learning_rate": 9.919112821229165e-06, "loss": 16.3323, "step": 75800 }, { "epoch": 0.1531409963760065, "grad_norm": 299.54547119140625, "learning_rate": 9.91905027561476e-06, "loss": 401.1151, "step": 75810 }, { "epoch": 0.15316119700869033, "grad_norm": 134.6005859375, "learning_rate": 9.918987706025498e-06, "loss": 17.0332, "step": 75820 }, { "epoch": 0.15318139764137412, "grad_norm": 676.94970703125, "learning_rate": 9.918925112461688e-06, "loss": 138.785, "step": 75830 }, { "epoch": 0.15320159827405794, "grad_norm": 81.3394775390625, "learning_rate": 9.918862494923635e-06, "loss": 17.716, "step": 75840 }, { "epoch": 0.15322179890674176, "grad_norm": 814.876953125, "learning_rate": 9.918799853411642e-06, "loss": 137.1458, "step": 75850 }, { "epoch": 0.15324199953942558, "grad_norm": 340.6568908691406, "learning_rate": 9.918737187926014e-06, "loss": 325.2914, "step": 75860 }, { "epoch": 0.1532622001721094, "grad_norm": 693.6303100585938, "learning_rate": 9.91867449846706e-06, "loss": 172.8194, "step": 75870 }, { "epoch": 0.15328240080479322, "grad_norm": 308.6283874511719, "learning_rate": 9.91861178503508e-06, "loss": 35.7975, "step": 75880 }, { "epoch": 0.153302601437477, "grad_norm": 1901.94873046875, "learning_rate": 9.918549047630386e-06, "loss": 26.413, "step": 75890 }, { "epoch": 0.15332280207016083, "grad_norm": 418.302978515625, "learning_rate": 9.918486286253279e-06, "loss": 19.2579, "step": 75900 }, { "epoch": 0.15334300270284465, "grad_norm": 296.1881103515625, "learning_rate": 9.918423500904066e-06, "loss": 34.349, "step": 75910 }, { "epoch": 0.15336320333552847, "grad_norm": 217.81080627441406, "learning_rate": 9.918360691583056e-06, "loss": 17.4354, "step": 75920 }, { "epoch": 0.1533834039682123, "grad_norm": 393.1789855957031, "learning_rate": 9.918297858290548e-06, "loss": 20.4026, "step": 75930 }, { "epoch": 0.1534036046008961, "grad_norm": 217.10507202148438, "learning_rate": 9.918235001026856e-06, "loss": 39.3795, "step": 75940 }, { "epoch": 0.1534238052335799, "grad_norm": 469.1217956542969, "learning_rate": 9.918172119792283e-06, "loss": 27.6138, "step": 75950 }, { "epoch": 0.15344400586626372, "grad_norm": 576.5799560546875, "learning_rate": 9.918109214587134e-06, "loss": 18.6139, "step": 75960 }, { "epoch": 0.15346420649894754, "grad_norm": 478.5797424316406, "learning_rate": 9.918046285411717e-06, "loss": 24.1646, "step": 75970 }, { "epoch": 0.15348440713163136, "grad_norm": 94.12849426269531, "learning_rate": 9.917983332266342e-06, "loss": 28.6062, "step": 75980 }, { "epoch": 0.15350460776431518, "grad_norm": 63.8079719543457, "learning_rate": 9.91792035515131e-06, "loss": 13.2191, "step": 75990 }, { "epoch": 0.153524808396999, "grad_norm": 1298.6837158203125, "learning_rate": 9.91785735406693e-06, "loss": 19.2591, "step": 76000 }, { "epoch": 0.15354500902968282, "grad_norm": 159.49261474609375, "learning_rate": 9.917794329013511e-06, "loss": 24.4185, "step": 76010 }, { "epoch": 0.15356520966236661, "grad_norm": 543.8831176757812, "learning_rate": 9.917731279991358e-06, "loss": 25.5307, "step": 76020 }, { "epoch": 0.15358541029505043, "grad_norm": 165.6639404296875, "learning_rate": 9.91766820700078e-06, "loss": 33.5344, "step": 76030 }, { "epoch": 0.15360561092773425, "grad_norm": 179.58657836914062, "learning_rate": 9.917605110042084e-06, "loss": 19.2109, "step": 76040 }, { "epoch": 0.15362581156041807, "grad_norm": 240.4688262939453, "learning_rate": 9.917541989115579e-06, "loss": 24.1171, "step": 76050 }, { "epoch": 0.1536460121931019, "grad_norm": 550.3084716796875, "learning_rate": 9.917478844221566e-06, "loss": 17.9788, "step": 76060 }, { "epoch": 0.15366621282578571, "grad_norm": 469.7898254394531, "learning_rate": 9.91741567536036e-06, "loss": 10.9789, "step": 76070 }, { "epoch": 0.1536864134584695, "grad_norm": 687.9810791015625, "learning_rate": 9.917352482532267e-06, "loss": 29.3961, "step": 76080 }, { "epoch": 0.15370661409115333, "grad_norm": 198.01539611816406, "learning_rate": 9.917289265737594e-06, "loss": 21.9126, "step": 76090 }, { "epoch": 0.15372681472383715, "grad_norm": 140.68165588378906, "learning_rate": 9.91722602497665e-06, "loss": 16.0029, "step": 76100 }, { "epoch": 0.15374701535652097, "grad_norm": 246.17007446289062, "learning_rate": 9.917162760249741e-06, "loss": 14.9653, "step": 76110 }, { "epoch": 0.1537672159892048, "grad_norm": 525.4667358398438, "learning_rate": 9.91709947155718e-06, "loss": 42.5954, "step": 76120 }, { "epoch": 0.1537874166218886, "grad_norm": 499.7342224121094, "learning_rate": 9.91703615889927e-06, "loss": 18.207, "step": 76130 }, { "epoch": 0.15380761725457243, "grad_norm": 0.0, "learning_rate": 9.916972822276322e-06, "loss": 16.8293, "step": 76140 }, { "epoch": 0.15382781788725622, "grad_norm": 606.3645629882812, "learning_rate": 9.916909461688646e-06, "loss": 32.2728, "step": 76150 }, { "epoch": 0.15384801851994004, "grad_norm": 433.4953918457031, "learning_rate": 9.916846077136548e-06, "loss": 25.9946, "step": 76160 }, { "epoch": 0.15386821915262386, "grad_norm": 642.1217651367188, "learning_rate": 9.916782668620341e-06, "loss": 29.4844, "step": 76170 }, { "epoch": 0.15388841978530768, "grad_norm": 12.11685562133789, "learning_rate": 9.91671923614033e-06, "loss": 20.6094, "step": 76180 }, { "epoch": 0.1539086204179915, "grad_norm": 974.4423217773438, "learning_rate": 9.916655779696826e-06, "loss": 31.4484, "step": 76190 }, { "epoch": 0.15392882105067532, "grad_norm": 656.9561767578125, "learning_rate": 9.91659229929014e-06, "loss": 28.2891, "step": 76200 }, { "epoch": 0.1539490216833591, "grad_norm": 680.5543823242188, "learning_rate": 9.916528794920577e-06, "loss": 22.4223, "step": 76210 }, { "epoch": 0.15396922231604293, "grad_norm": 487.59283447265625, "learning_rate": 9.916465266588448e-06, "loss": 26.6795, "step": 76220 }, { "epoch": 0.15398942294872675, "grad_norm": 547.8593139648438, "learning_rate": 9.916401714294067e-06, "loss": 32.8007, "step": 76230 }, { "epoch": 0.15400962358141057, "grad_norm": 372.6311950683594, "learning_rate": 9.916338138037738e-06, "loss": 30.9446, "step": 76240 }, { "epoch": 0.1540298242140944, "grad_norm": 315.0910949707031, "learning_rate": 9.916274537819774e-06, "loss": 27.6902, "step": 76250 }, { "epoch": 0.1540500248467782, "grad_norm": 1071.88720703125, "learning_rate": 9.916210913640483e-06, "loss": 27.2452, "step": 76260 }, { "epoch": 0.154070225479462, "grad_norm": 520.8048095703125, "learning_rate": 9.916147265500179e-06, "loss": 37.0092, "step": 76270 }, { "epoch": 0.15409042611214582, "grad_norm": 297.08050537109375, "learning_rate": 9.916083593399167e-06, "loss": 19.7901, "step": 76280 }, { "epoch": 0.15411062674482964, "grad_norm": 264.24761962890625, "learning_rate": 9.916019897337761e-06, "loss": 36.8133, "step": 76290 }, { "epoch": 0.15413082737751346, "grad_norm": 252.61801147460938, "learning_rate": 9.915956177316269e-06, "loss": 11.0623, "step": 76300 }, { "epoch": 0.15415102801019728, "grad_norm": 211.3620147705078, "learning_rate": 9.915892433335004e-06, "loss": 21.1611, "step": 76310 }, { "epoch": 0.1541712286428811, "grad_norm": 372.51190185546875, "learning_rate": 9.915828665394274e-06, "loss": 26.7633, "step": 76320 }, { "epoch": 0.15419142927556492, "grad_norm": 1148.5885009765625, "learning_rate": 9.915764873494393e-06, "loss": 35.1224, "step": 76330 }, { "epoch": 0.15421162990824872, "grad_norm": 257.689697265625, "learning_rate": 9.915701057635669e-06, "loss": 20.7373, "step": 76340 }, { "epoch": 0.15423183054093254, "grad_norm": 776.6898803710938, "learning_rate": 9.915637217818415e-06, "loss": 15.584, "step": 76350 }, { "epoch": 0.15425203117361636, "grad_norm": 365.4017333984375, "learning_rate": 9.915573354042943e-06, "loss": 24.7629, "step": 76360 }, { "epoch": 0.15427223180630018, "grad_norm": 526.2894287109375, "learning_rate": 9.91550946630956e-06, "loss": 20.6592, "step": 76370 }, { "epoch": 0.154292432438984, "grad_norm": 499.64520263671875, "learning_rate": 9.915445554618581e-06, "loss": 23.5198, "step": 76380 }, { "epoch": 0.15431263307166782, "grad_norm": 267.19219970703125, "learning_rate": 9.915381618970317e-06, "loss": 17.8118, "step": 76390 }, { "epoch": 0.1543328337043516, "grad_norm": 146.3233184814453, "learning_rate": 9.915317659365078e-06, "loss": 16.3571, "step": 76400 }, { "epoch": 0.15435303433703543, "grad_norm": 4946.3212890625, "learning_rate": 9.915253675803178e-06, "loss": 25.2522, "step": 76410 }, { "epoch": 0.15437323496971925, "grad_norm": 288.2351989746094, "learning_rate": 9.915189668284927e-06, "loss": 23.2986, "step": 76420 }, { "epoch": 0.15439343560240307, "grad_norm": 218.5893096923828, "learning_rate": 9.915125636810638e-06, "loss": 27.1564, "step": 76430 }, { "epoch": 0.1544136362350869, "grad_norm": 391.52838134765625, "learning_rate": 9.915061581380622e-06, "loss": 25.8432, "step": 76440 }, { "epoch": 0.1544338368677707, "grad_norm": 594.8270874023438, "learning_rate": 9.914997501995193e-06, "loss": 27.4184, "step": 76450 }, { "epoch": 0.15445403750045453, "grad_norm": 498.0327453613281, "learning_rate": 9.914933398654663e-06, "loss": 32.746, "step": 76460 }, { "epoch": 0.15447423813313832, "grad_norm": 1847.38525390625, "learning_rate": 9.914869271359342e-06, "loss": 55.4857, "step": 76470 }, { "epoch": 0.15449443876582214, "grad_norm": 1521.9986572265625, "learning_rate": 9.914805120109545e-06, "loss": 49.2495, "step": 76480 }, { "epoch": 0.15451463939850596, "grad_norm": 898.3662109375, "learning_rate": 9.914740944905585e-06, "loss": 34.2419, "step": 76490 }, { "epoch": 0.15453484003118978, "grad_norm": 201742.3125, "learning_rate": 9.914676745747772e-06, "loss": 55.974, "step": 76500 }, { "epoch": 0.1545550406638736, "grad_norm": 1071.6346435546875, "learning_rate": 9.914612522636423e-06, "loss": 25.4427, "step": 76510 }, { "epoch": 0.15457524129655742, "grad_norm": 874.8290405273438, "learning_rate": 9.914548275571845e-06, "loss": 29.754, "step": 76520 }, { "epoch": 0.1545954419292412, "grad_norm": 1176.8680419921875, "learning_rate": 9.914484004554356e-06, "loss": 31.7233, "step": 76530 }, { "epoch": 0.15461564256192503, "grad_norm": 422.1765441894531, "learning_rate": 9.91441970958427e-06, "loss": 31.054, "step": 76540 }, { "epoch": 0.15463584319460885, "grad_norm": 934.604248046875, "learning_rate": 9.914355390661897e-06, "loss": 23.3269, "step": 76550 }, { "epoch": 0.15465604382729267, "grad_norm": 196.0285186767578, "learning_rate": 9.914291047787552e-06, "loss": 34.0607, "step": 76560 }, { "epoch": 0.1546762444599765, "grad_norm": 294.592041015625, "learning_rate": 9.914226680961549e-06, "loss": 32.4687, "step": 76570 }, { "epoch": 0.1546964450926603, "grad_norm": 706.431640625, "learning_rate": 9.9141622901842e-06, "loss": 22.9893, "step": 76580 }, { "epoch": 0.1547166457253441, "grad_norm": 534.7452392578125, "learning_rate": 9.914097875455821e-06, "loss": 29.5437, "step": 76590 }, { "epoch": 0.15473684635802792, "grad_norm": 2935.35498046875, "learning_rate": 9.914033436776724e-06, "loss": 18.9412, "step": 76600 }, { "epoch": 0.15475704699071174, "grad_norm": 543.9201049804688, "learning_rate": 9.913968974147225e-06, "loss": 18.4595, "step": 76610 }, { "epoch": 0.15477724762339556, "grad_norm": 767.2598266601562, "learning_rate": 9.913904487567636e-06, "loss": 35.2977, "step": 76620 }, { "epoch": 0.15479744825607938, "grad_norm": 1048.299560546875, "learning_rate": 9.913839977038274e-06, "loss": 37.5103, "step": 76630 }, { "epoch": 0.1548176488887632, "grad_norm": 85.23568725585938, "learning_rate": 9.913775442559451e-06, "loss": 42.2107, "step": 76640 }, { "epoch": 0.15483784952144702, "grad_norm": 466.147216796875, "learning_rate": 9.913710884131483e-06, "loss": 24.6358, "step": 76650 }, { "epoch": 0.15485805015413082, "grad_norm": 731.3047485351562, "learning_rate": 9.913646301754685e-06, "loss": 18.8588, "step": 76660 }, { "epoch": 0.15487825078681464, "grad_norm": 63.4068603515625, "learning_rate": 9.913581695429368e-06, "loss": 26.315, "step": 76670 }, { "epoch": 0.15489845141949846, "grad_norm": 304.8154602050781, "learning_rate": 9.913517065155852e-06, "loss": 19.8241, "step": 76680 }, { "epoch": 0.15491865205218228, "grad_norm": 497.0758056640625, "learning_rate": 9.91345241093445e-06, "loss": 17.4634, "step": 76690 }, { "epoch": 0.1549388526848661, "grad_norm": 625.6817626953125, "learning_rate": 9.913387732765475e-06, "loss": 18.1652, "step": 76700 }, { "epoch": 0.15495905331754992, "grad_norm": 552.4562377929688, "learning_rate": 9.913323030649247e-06, "loss": 25.8852, "step": 76710 }, { "epoch": 0.1549792539502337, "grad_norm": 316.5075378417969, "learning_rate": 9.913258304586076e-06, "loss": 18.0775, "step": 76720 }, { "epoch": 0.15499945458291753, "grad_norm": 488.21954345703125, "learning_rate": 9.91319355457628e-06, "loss": 20.0942, "step": 76730 }, { "epoch": 0.15501965521560135, "grad_norm": 557.0736083984375, "learning_rate": 9.913128780620175e-06, "loss": 19.5888, "step": 76740 }, { "epoch": 0.15503985584828517, "grad_norm": 99.08460235595703, "learning_rate": 9.913063982718076e-06, "loss": 34.2454, "step": 76750 }, { "epoch": 0.155060056480969, "grad_norm": 432.3857727050781, "learning_rate": 9.9129991608703e-06, "loss": 24.6678, "step": 76760 }, { "epoch": 0.1550802571136528, "grad_norm": 114.72826385498047, "learning_rate": 9.912934315077162e-06, "loss": 24.7001, "step": 76770 }, { "epoch": 0.15510045774633663, "grad_norm": 290.5341796875, "learning_rate": 9.912869445338978e-06, "loss": 25.8757, "step": 76780 }, { "epoch": 0.15512065837902042, "grad_norm": 732.5404663085938, "learning_rate": 9.912804551656064e-06, "loss": 24.0103, "step": 76790 }, { "epoch": 0.15514085901170424, "grad_norm": 145.4403533935547, "learning_rate": 9.912739634028734e-06, "loss": 33.1736, "step": 76800 }, { "epoch": 0.15516105964438806, "grad_norm": 846.7334594726562, "learning_rate": 9.91267469245731e-06, "loss": 41.3353, "step": 76810 }, { "epoch": 0.15518126027707188, "grad_norm": 392.328857421875, "learning_rate": 9.912609726942104e-06, "loss": 73.3939, "step": 76820 }, { "epoch": 0.1552014609097557, "grad_norm": 452.7477111816406, "learning_rate": 9.912544737483434e-06, "loss": 17.1838, "step": 76830 }, { "epoch": 0.15522166154243952, "grad_norm": 541.6780395507812, "learning_rate": 9.912479724081617e-06, "loss": 23.2453, "step": 76840 }, { "epoch": 0.1552418621751233, "grad_norm": 394.6813659667969, "learning_rate": 9.912414686736971e-06, "loss": 24.0498, "step": 76850 }, { "epoch": 0.15526206280780713, "grad_norm": 977.437744140625, "learning_rate": 9.912349625449808e-06, "loss": 27.9861, "step": 76860 }, { "epoch": 0.15528226344049095, "grad_norm": 970.5541381835938, "learning_rate": 9.912284540220452e-06, "loss": 41.6399, "step": 76870 }, { "epoch": 0.15530246407317477, "grad_norm": 234.07644653320312, "learning_rate": 9.912219431049217e-06, "loss": 18.4895, "step": 76880 }, { "epoch": 0.1553226647058586, "grad_norm": 215.9114532470703, "learning_rate": 9.912154297936418e-06, "loss": 17.8177, "step": 76890 }, { "epoch": 0.1553428653385424, "grad_norm": 476.1218566894531, "learning_rate": 9.912089140882377e-06, "loss": 45.933, "step": 76900 }, { "epoch": 0.1553630659712262, "grad_norm": 321.398193359375, "learning_rate": 9.912023959887408e-06, "loss": 18.5507, "step": 76910 }, { "epoch": 0.15538326660391003, "grad_norm": 664.8775024414062, "learning_rate": 9.91195875495183e-06, "loss": 24.0288, "step": 76920 }, { "epoch": 0.15540346723659385, "grad_norm": 143.9698944091797, "learning_rate": 9.911893526075961e-06, "loss": 34.0271, "step": 76930 }, { "epoch": 0.15542366786927767, "grad_norm": 165.63645935058594, "learning_rate": 9.911828273260119e-06, "loss": 18.4847, "step": 76940 }, { "epoch": 0.15544386850196149, "grad_norm": 630.33984375, "learning_rate": 9.911762996504621e-06, "loss": 37.2778, "step": 76950 }, { "epoch": 0.1554640691346453, "grad_norm": 330.86572265625, "learning_rate": 9.911697695809787e-06, "loss": 30.9341, "step": 76960 }, { "epoch": 0.15548426976732913, "grad_norm": 773.8388061523438, "learning_rate": 9.911632371175934e-06, "loss": 34.7768, "step": 76970 }, { "epoch": 0.15550447040001292, "grad_norm": 658.99853515625, "learning_rate": 9.911567022603379e-06, "loss": 19.9509, "step": 76980 }, { "epoch": 0.15552467103269674, "grad_norm": 29.142122268676758, "learning_rate": 9.911501650092443e-06, "loss": 33.9231, "step": 76990 }, { "epoch": 0.15554487166538056, "grad_norm": 688.2086791992188, "learning_rate": 9.911436253643445e-06, "loss": 41.0092, "step": 77000 }, { "epoch": 0.15556507229806438, "grad_norm": 366.0467224121094, "learning_rate": 9.911370833256701e-06, "loss": 29.3616, "step": 77010 }, { "epoch": 0.1555852729307482, "grad_norm": 1323.949462890625, "learning_rate": 9.91130538893253e-06, "loss": 34.4818, "step": 77020 }, { "epoch": 0.15560547356343202, "grad_norm": 302.8736572265625, "learning_rate": 9.911239920671253e-06, "loss": 20.0606, "step": 77030 }, { "epoch": 0.1556256741961158, "grad_norm": 256.28668212890625, "learning_rate": 9.91117442847319e-06, "loss": 41.563, "step": 77040 }, { "epoch": 0.15564587482879963, "grad_norm": 155.36709594726562, "learning_rate": 9.911108912338656e-06, "loss": 21.108, "step": 77050 }, { "epoch": 0.15566607546148345, "grad_norm": 400.1911926269531, "learning_rate": 9.911043372267975e-06, "loss": 23.0878, "step": 77060 }, { "epoch": 0.15568627609416727, "grad_norm": 868.4791259765625, "learning_rate": 9.910977808261463e-06, "loss": 32.3664, "step": 77070 }, { "epoch": 0.1557064767268511, "grad_norm": 157.62985229492188, "learning_rate": 9.910912220319443e-06, "loss": 30.454, "step": 77080 }, { "epoch": 0.1557266773595349, "grad_norm": 215.0827178955078, "learning_rate": 9.910846608442229e-06, "loss": 16.5634, "step": 77090 }, { "epoch": 0.15574687799221873, "grad_norm": 794.7200927734375, "learning_rate": 9.910780972630146e-06, "loss": 25.2269, "step": 77100 }, { "epoch": 0.15576707862490252, "grad_norm": 849.30419921875, "learning_rate": 9.910715312883512e-06, "loss": 26.416, "step": 77110 }, { "epoch": 0.15578727925758634, "grad_norm": 707.9942016601562, "learning_rate": 9.910649629202648e-06, "loss": 23.5251, "step": 77120 }, { "epoch": 0.15580747989027016, "grad_norm": 254.89849853515625, "learning_rate": 9.910583921587872e-06, "loss": 18.0644, "step": 77130 }, { "epoch": 0.15582768052295398, "grad_norm": 4.354783535003662, "learning_rate": 9.910518190039506e-06, "loss": 41.0595, "step": 77140 }, { "epoch": 0.1558478811556378, "grad_norm": 247.84661865234375, "learning_rate": 9.91045243455787e-06, "loss": 17.9057, "step": 77150 }, { "epoch": 0.15586808178832162, "grad_norm": 324.16680908203125, "learning_rate": 9.910386655143285e-06, "loss": 22.0415, "step": 77160 }, { "epoch": 0.15588828242100541, "grad_norm": 178.23960876464844, "learning_rate": 9.91032085179607e-06, "loss": 18.2775, "step": 77170 }, { "epoch": 0.15590848305368923, "grad_norm": 926.4631958007812, "learning_rate": 9.910255024516546e-06, "loss": 26.7787, "step": 77180 }, { "epoch": 0.15592868368637305, "grad_norm": 1258.066650390625, "learning_rate": 9.910189173305035e-06, "loss": 28.3574, "step": 77190 }, { "epoch": 0.15594888431905687, "grad_norm": 583.8325805664062, "learning_rate": 9.91012329816186e-06, "loss": 42.5556, "step": 77200 }, { "epoch": 0.1559690849517407, "grad_norm": 1802.2276611328125, "learning_rate": 9.910057399087338e-06, "loss": 54.5202, "step": 77210 }, { "epoch": 0.15598928558442451, "grad_norm": 827.5545043945312, "learning_rate": 9.90999147608179e-06, "loss": 12.4443, "step": 77220 }, { "epoch": 0.1560094862171083, "grad_norm": 375.8981628417969, "learning_rate": 9.909925529145541e-06, "loss": 16.1195, "step": 77230 }, { "epoch": 0.15602968684979213, "grad_norm": 313.28118896484375, "learning_rate": 9.90985955827891e-06, "loss": 29.6728, "step": 77240 }, { "epoch": 0.15604988748247595, "grad_norm": 215.35768127441406, "learning_rate": 9.90979356348222e-06, "loss": 50.7093, "step": 77250 }, { "epoch": 0.15607008811515977, "grad_norm": 261.1755065917969, "learning_rate": 9.909727544755789e-06, "loss": 17.7893, "step": 77260 }, { "epoch": 0.1560902887478436, "grad_norm": 696.5534057617188, "learning_rate": 9.909661502099943e-06, "loss": 26.2884, "step": 77270 }, { "epoch": 0.1561104893805274, "grad_norm": 255.1345977783203, "learning_rate": 9.909595435515002e-06, "loss": 18.1933, "step": 77280 }, { "epoch": 0.15613069001321123, "grad_norm": 17.61561393737793, "learning_rate": 9.90952934500129e-06, "loss": 31.0577, "step": 77290 }, { "epoch": 0.15615089064589502, "grad_norm": 395.40673828125, "learning_rate": 9.909463230559127e-06, "loss": 19.4631, "step": 77300 }, { "epoch": 0.15617109127857884, "grad_norm": 1012.7659301757812, "learning_rate": 9.909397092188834e-06, "loss": 21.1493, "step": 77310 }, { "epoch": 0.15619129191126266, "grad_norm": 131.47860717773438, "learning_rate": 9.909330929890734e-06, "loss": 13.5247, "step": 77320 }, { "epoch": 0.15621149254394648, "grad_norm": 621.2780151367188, "learning_rate": 9.909264743665153e-06, "loss": 31.8489, "step": 77330 }, { "epoch": 0.1562316931766303, "grad_norm": 0.0, "learning_rate": 9.90919853351241e-06, "loss": 34.2143, "step": 77340 }, { "epoch": 0.15625189380931412, "grad_norm": 1121.5819091796875, "learning_rate": 9.90913229943283e-06, "loss": 43.6555, "step": 77350 }, { "epoch": 0.1562720944419979, "grad_norm": 268.9812316894531, "learning_rate": 9.909066041426733e-06, "loss": 25.7186, "step": 77360 }, { "epoch": 0.15629229507468173, "grad_norm": 447.2286376953125, "learning_rate": 9.908999759494444e-06, "loss": 29.3143, "step": 77370 }, { "epoch": 0.15631249570736555, "grad_norm": 381.0235595703125, "learning_rate": 9.908933453636287e-06, "loss": 25.825, "step": 77380 }, { "epoch": 0.15633269634004937, "grad_norm": 327.3936462402344, "learning_rate": 9.90886712385258e-06, "loss": 19.0007, "step": 77390 }, { "epoch": 0.1563528969727332, "grad_norm": 329.6006774902344, "learning_rate": 9.908800770143654e-06, "loss": 35.5917, "step": 77400 }, { "epoch": 0.156373097605417, "grad_norm": 445.0928039550781, "learning_rate": 9.908734392509827e-06, "loss": 25.8085, "step": 77410 }, { "epoch": 0.15639329823810083, "grad_norm": 307.52618408203125, "learning_rate": 9.908667990951424e-06, "loss": 12.7383, "step": 77420 }, { "epoch": 0.15641349887078462, "grad_norm": 266.4269104003906, "learning_rate": 9.908601565468768e-06, "loss": 10.2761, "step": 77430 }, { "epoch": 0.15643369950346844, "grad_norm": 706.94921875, "learning_rate": 9.908535116062185e-06, "loss": 22.4348, "step": 77440 }, { "epoch": 0.15645390013615226, "grad_norm": 533.7908935546875, "learning_rate": 9.908468642731996e-06, "loss": 25.5123, "step": 77450 }, { "epoch": 0.15647410076883608, "grad_norm": 943.0638427734375, "learning_rate": 9.908402145478526e-06, "loss": 30.1085, "step": 77460 }, { "epoch": 0.1564943014015199, "grad_norm": 375.0356750488281, "learning_rate": 9.908335624302099e-06, "loss": 18.2726, "step": 77470 }, { "epoch": 0.15651450203420372, "grad_norm": 1029.725341796875, "learning_rate": 9.908269079203039e-06, "loss": 34.551, "step": 77480 }, { "epoch": 0.15653470266688752, "grad_norm": 944.670166015625, "learning_rate": 9.908202510181673e-06, "loss": 36.7944, "step": 77490 }, { "epoch": 0.15655490329957134, "grad_norm": 336.8741760253906, "learning_rate": 9.908135917238321e-06, "loss": 20.5199, "step": 77500 }, { "epoch": 0.15657510393225516, "grad_norm": 336.6961364746094, "learning_rate": 9.90806930037331e-06, "loss": 21.9893, "step": 77510 }, { "epoch": 0.15659530456493898, "grad_norm": 1216.4962158203125, "learning_rate": 9.908002659586966e-06, "loss": 20.7643, "step": 77520 }, { "epoch": 0.1566155051976228, "grad_norm": 611.2420043945312, "learning_rate": 9.907935994879612e-06, "loss": 25.8953, "step": 77530 }, { "epoch": 0.15663570583030662, "grad_norm": 165.66566467285156, "learning_rate": 9.907869306251571e-06, "loss": 32.8533, "step": 77540 }, { "epoch": 0.1566559064629904, "grad_norm": 209.29922485351562, "learning_rate": 9.907802593703173e-06, "loss": 15.847, "step": 77550 }, { "epoch": 0.15667610709567423, "grad_norm": 831.6998291015625, "learning_rate": 9.90773585723474e-06, "loss": 27.0138, "step": 77560 }, { "epoch": 0.15669630772835805, "grad_norm": 424.3438720703125, "learning_rate": 9.907669096846596e-06, "loss": 29.0207, "step": 77570 }, { "epoch": 0.15671650836104187, "grad_norm": 574.2207641601562, "learning_rate": 9.90760231253907e-06, "loss": 21.9935, "step": 77580 }, { "epoch": 0.1567367089937257, "grad_norm": 306.4346923828125, "learning_rate": 9.907535504312484e-06, "loss": 10.5125, "step": 77590 }, { "epoch": 0.1567569096264095, "grad_norm": 223.48089599609375, "learning_rate": 9.907468672167165e-06, "loss": 25.2818, "step": 77600 }, { "epoch": 0.15677711025909333, "grad_norm": 1125.790771484375, "learning_rate": 9.90740181610344e-06, "loss": 31.856, "step": 77610 }, { "epoch": 0.15679731089177712, "grad_norm": 347.9830017089844, "learning_rate": 9.907334936121634e-06, "loss": 21.6079, "step": 77620 }, { "epoch": 0.15681751152446094, "grad_norm": 1014.19140625, "learning_rate": 9.907268032222072e-06, "loss": 34.0749, "step": 77630 }, { "epoch": 0.15683771215714476, "grad_norm": 373.791748046875, "learning_rate": 9.90720110440508e-06, "loss": 29.7491, "step": 77640 }, { "epoch": 0.15685791278982858, "grad_norm": 263.6556396484375, "learning_rate": 9.907134152670987e-06, "loss": 21.2426, "step": 77650 }, { "epoch": 0.1568781134225124, "grad_norm": 543.62890625, "learning_rate": 9.907067177020115e-06, "loss": 33.8, "step": 77660 }, { "epoch": 0.15689831405519622, "grad_norm": 467.43670654296875, "learning_rate": 9.907000177452794e-06, "loss": 13.0608, "step": 77670 }, { "epoch": 0.15691851468788, "grad_norm": 398.6751708984375, "learning_rate": 9.90693315396935e-06, "loss": 21.4861, "step": 77680 }, { "epoch": 0.15693871532056383, "grad_norm": 741.5259399414062, "learning_rate": 9.906866106570108e-06, "loss": 27.6681, "step": 77690 }, { "epoch": 0.15695891595324765, "grad_norm": 310.8424377441406, "learning_rate": 9.906799035255395e-06, "loss": 22.0539, "step": 77700 }, { "epoch": 0.15697911658593147, "grad_norm": 357.97467041015625, "learning_rate": 9.90673194002554e-06, "loss": 15.8134, "step": 77710 }, { "epoch": 0.1569993172186153, "grad_norm": 987.2659912109375, "learning_rate": 9.906664820880869e-06, "loss": 27.9395, "step": 77720 }, { "epoch": 0.1570195178512991, "grad_norm": 219.6345672607422, "learning_rate": 9.906597677821708e-06, "loss": 25.4969, "step": 77730 }, { "epoch": 0.15703971848398293, "grad_norm": 298.4275817871094, "learning_rate": 9.906530510848384e-06, "loss": 25.9595, "step": 77740 }, { "epoch": 0.15705991911666672, "grad_norm": 527.123291015625, "learning_rate": 9.906463319961225e-06, "loss": 33.0071, "step": 77750 }, { "epoch": 0.15708011974935054, "grad_norm": 114.7584457397461, "learning_rate": 9.906396105160561e-06, "loss": 16.7282, "step": 77760 }, { "epoch": 0.15710032038203436, "grad_norm": 606.5748901367188, "learning_rate": 9.906328866446717e-06, "loss": 19.7913, "step": 77770 }, { "epoch": 0.15712052101471818, "grad_norm": 411.9849853515625, "learning_rate": 9.906261603820022e-06, "loss": 16.8656, "step": 77780 }, { "epoch": 0.157140721647402, "grad_norm": 129.91851806640625, "learning_rate": 9.906194317280802e-06, "loss": 24.2333, "step": 77790 }, { "epoch": 0.15716092228008582, "grad_norm": 140.0809326171875, "learning_rate": 9.906127006829385e-06, "loss": 11.4515, "step": 77800 }, { "epoch": 0.15718112291276962, "grad_norm": 596.6380004882812, "learning_rate": 9.9060596724661e-06, "loss": 19.8775, "step": 77810 }, { "epoch": 0.15720132354545344, "grad_norm": 184.2483367919922, "learning_rate": 9.905992314191277e-06, "loss": 25.3137, "step": 77820 }, { "epoch": 0.15722152417813726, "grad_norm": 447.89453125, "learning_rate": 9.905924932005241e-06, "loss": 42.2161, "step": 77830 }, { "epoch": 0.15724172481082108, "grad_norm": 356.61834716796875, "learning_rate": 9.905857525908322e-06, "loss": 17.7926, "step": 77840 }, { "epoch": 0.1572619254435049, "grad_norm": 294.5441589355469, "learning_rate": 9.905790095900849e-06, "loss": 23.3443, "step": 77850 }, { "epoch": 0.15728212607618872, "grad_norm": 754.86279296875, "learning_rate": 9.905722641983151e-06, "loss": 19.1399, "step": 77860 }, { "epoch": 0.1573023267088725, "grad_norm": 366.54058837890625, "learning_rate": 9.905655164155554e-06, "loss": 35.2363, "step": 77870 }, { "epoch": 0.15732252734155633, "grad_norm": 971.9962158203125, "learning_rate": 9.90558766241839e-06, "loss": 29.3012, "step": 77880 }, { "epoch": 0.15734272797424015, "grad_norm": 175.7159881591797, "learning_rate": 9.905520136771985e-06, "loss": 25.0469, "step": 77890 }, { "epoch": 0.15736292860692397, "grad_norm": 540.7525024414062, "learning_rate": 9.90545258721667e-06, "loss": 24.6414, "step": 77900 }, { "epoch": 0.1573831292396078, "grad_norm": 341.110107421875, "learning_rate": 9.905385013752777e-06, "loss": 16.2405, "step": 77910 }, { "epoch": 0.1574033298722916, "grad_norm": 381.3741455078125, "learning_rate": 9.905317416380629e-06, "loss": 40.9647, "step": 77920 }, { "epoch": 0.15742353050497543, "grad_norm": 1155.7586669921875, "learning_rate": 9.905249795100561e-06, "loss": 30.6699, "step": 77930 }, { "epoch": 0.15744373113765922, "grad_norm": 485.991943359375, "learning_rate": 9.905182149912899e-06, "loss": 30.4669, "step": 77940 }, { "epoch": 0.15746393177034304, "grad_norm": 553.8409423828125, "learning_rate": 9.905114480817976e-06, "loss": 35.3405, "step": 77950 }, { "epoch": 0.15748413240302686, "grad_norm": 347.72711181640625, "learning_rate": 9.905046787816118e-06, "loss": 17.575, "step": 77960 }, { "epoch": 0.15750433303571068, "grad_norm": 490.9544982910156, "learning_rate": 9.904979070907657e-06, "loss": 21.9032, "step": 77970 }, { "epoch": 0.1575245336683945, "grad_norm": 809.31494140625, "learning_rate": 9.904911330092923e-06, "loss": 28.7668, "step": 77980 }, { "epoch": 0.15754473430107832, "grad_norm": 347.6751708984375, "learning_rate": 9.904843565372249e-06, "loss": 43.1143, "step": 77990 }, { "epoch": 0.1575649349337621, "grad_norm": 299.9636535644531, "learning_rate": 9.904775776745959e-06, "loss": 24.741, "step": 78000 }, { "epoch": 0.15758513556644593, "grad_norm": 677.6216430664062, "learning_rate": 9.904707964214386e-06, "loss": 17.0624, "step": 78010 }, { "epoch": 0.15760533619912975, "grad_norm": 403.5384826660156, "learning_rate": 9.904640127777865e-06, "loss": 36.389, "step": 78020 }, { "epoch": 0.15762553683181357, "grad_norm": 339.7036437988281, "learning_rate": 9.904572267436721e-06, "loss": 14.2183, "step": 78030 }, { "epoch": 0.1576457374644974, "grad_norm": 675.6530151367188, "learning_rate": 9.904504383191286e-06, "loss": 13.3383, "step": 78040 }, { "epoch": 0.1576659380971812, "grad_norm": 551.7474365234375, "learning_rate": 9.904436475041892e-06, "loss": 26.4035, "step": 78050 }, { "epoch": 0.15768613872986503, "grad_norm": 467.4062805175781, "learning_rate": 9.904368542988869e-06, "loss": 32.9935, "step": 78060 }, { "epoch": 0.15770633936254883, "grad_norm": 215.01026916503906, "learning_rate": 9.90430058703255e-06, "loss": 25.5886, "step": 78070 }, { "epoch": 0.15772653999523265, "grad_norm": 233.78195190429688, "learning_rate": 9.904232607173262e-06, "loss": 24.0233, "step": 78080 }, { "epoch": 0.15774674062791647, "grad_norm": 7.0149664878845215, "learning_rate": 9.90416460341134e-06, "loss": 22.3333, "step": 78090 }, { "epoch": 0.15776694126060029, "grad_norm": 793.04150390625, "learning_rate": 9.904096575747117e-06, "loss": 33.6149, "step": 78100 }, { "epoch": 0.1577871418932841, "grad_norm": 748.5648803710938, "learning_rate": 9.90402852418092e-06, "loss": 31.4978, "step": 78110 }, { "epoch": 0.15780734252596793, "grad_norm": 306.300537109375, "learning_rate": 9.903960448713084e-06, "loss": 20.0952, "step": 78120 }, { "epoch": 0.15782754315865172, "grad_norm": 211.0547332763672, "learning_rate": 9.903892349343938e-06, "loss": 45.8324, "step": 78130 }, { "epoch": 0.15784774379133554, "grad_norm": 253.32325744628906, "learning_rate": 9.903824226073816e-06, "loss": 24.4214, "step": 78140 }, { "epoch": 0.15786794442401936, "grad_norm": 349.3773193359375, "learning_rate": 9.90375607890305e-06, "loss": 18.1157, "step": 78150 }, { "epoch": 0.15788814505670318, "grad_norm": 328.2897033691406, "learning_rate": 9.903687907831972e-06, "loss": 16.9796, "step": 78160 }, { "epoch": 0.157908345689387, "grad_norm": 980.4244384765625, "learning_rate": 9.903619712860912e-06, "loss": 35.0202, "step": 78170 }, { "epoch": 0.15792854632207082, "grad_norm": 1009.1373291015625, "learning_rate": 9.903551493990205e-06, "loss": 41.671, "step": 78180 }, { "epoch": 0.1579487469547546, "grad_norm": 41.043155670166016, "learning_rate": 9.903483251220183e-06, "loss": 8.9685, "step": 78190 }, { "epoch": 0.15796894758743843, "grad_norm": 442.794189453125, "learning_rate": 9.903414984551178e-06, "loss": 24.7748, "step": 78200 }, { "epoch": 0.15798914822012225, "grad_norm": 415.76959228515625, "learning_rate": 9.903346693983524e-06, "loss": 24.3408, "step": 78210 }, { "epoch": 0.15800934885280607, "grad_norm": 367.5481262207031, "learning_rate": 9.903278379517554e-06, "loss": 23.2312, "step": 78220 }, { "epoch": 0.1580295494854899, "grad_norm": 825.7837524414062, "learning_rate": 9.903210041153597e-06, "loss": 34.7608, "step": 78230 }, { "epoch": 0.1580497501181737, "grad_norm": 370.3226318359375, "learning_rate": 9.90314167889199e-06, "loss": 18.5017, "step": 78240 }, { "epoch": 0.15806995075085753, "grad_norm": 147.1715850830078, "learning_rate": 9.903073292733065e-06, "loss": 16.7317, "step": 78250 }, { "epoch": 0.15809015138354132, "grad_norm": 1859.4921875, "learning_rate": 9.903004882677157e-06, "loss": 26.7355, "step": 78260 }, { "epoch": 0.15811035201622514, "grad_norm": 334.5677185058594, "learning_rate": 9.902936448724596e-06, "loss": 32.5608, "step": 78270 }, { "epoch": 0.15813055264890896, "grad_norm": 192.8003692626953, "learning_rate": 9.90286799087572e-06, "loss": 17.3901, "step": 78280 }, { "epoch": 0.15815075328159278, "grad_norm": 165.76022338867188, "learning_rate": 9.902799509130857e-06, "loss": 18.9962, "step": 78290 }, { "epoch": 0.1581709539142766, "grad_norm": 381.4460144042969, "learning_rate": 9.902731003490344e-06, "loss": 19.4837, "step": 78300 }, { "epoch": 0.15819115454696042, "grad_norm": 466.5162048339844, "learning_rate": 9.902662473954516e-06, "loss": 24.59, "step": 78310 }, { "epoch": 0.15821135517964421, "grad_norm": 919.7655029296875, "learning_rate": 9.902593920523706e-06, "loss": 38.7198, "step": 78320 }, { "epoch": 0.15823155581232803, "grad_norm": 211.0843963623047, "learning_rate": 9.902525343198249e-06, "loss": 17.7991, "step": 78330 }, { "epoch": 0.15825175644501185, "grad_norm": 894.8384399414062, "learning_rate": 9.902456741978475e-06, "loss": 23.4159, "step": 78340 }, { "epoch": 0.15827195707769567, "grad_norm": 917.4749755859375, "learning_rate": 9.902388116864723e-06, "loss": 28.902, "step": 78350 }, { "epoch": 0.1582921577103795, "grad_norm": 173.82229614257812, "learning_rate": 9.902319467857326e-06, "loss": 34.2183, "step": 78360 }, { "epoch": 0.15831235834306331, "grad_norm": 616.9852905273438, "learning_rate": 9.902250794956618e-06, "loss": 19.6543, "step": 78370 }, { "epoch": 0.15833255897574713, "grad_norm": 374.2037658691406, "learning_rate": 9.902182098162933e-06, "loss": 10.8244, "step": 78380 }, { "epoch": 0.15835275960843093, "grad_norm": 563.2325439453125, "learning_rate": 9.90211337747661e-06, "loss": 21.4827, "step": 78390 }, { "epoch": 0.15837296024111475, "grad_norm": 328.37738037109375, "learning_rate": 9.90204463289798e-06, "loss": 20.2847, "step": 78400 }, { "epoch": 0.15839316087379857, "grad_norm": 261.7314453125, "learning_rate": 9.901975864427378e-06, "loss": 17.9665, "step": 78410 }, { "epoch": 0.1584133615064824, "grad_norm": 357.9438781738281, "learning_rate": 9.90190707206514e-06, "loss": 33.0531, "step": 78420 }, { "epoch": 0.1584335621391662, "grad_norm": 505.8860168457031, "learning_rate": 9.901838255811602e-06, "loss": 21.0291, "step": 78430 }, { "epoch": 0.15845376277185003, "grad_norm": 987.0386962890625, "learning_rate": 9.9017694156671e-06, "loss": 27.4556, "step": 78440 }, { "epoch": 0.15847396340453382, "grad_norm": 564.0770263671875, "learning_rate": 9.901700551631966e-06, "loss": 22.7123, "step": 78450 }, { "epoch": 0.15849416403721764, "grad_norm": 160.28030395507812, "learning_rate": 9.901631663706539e-06, "loss": 21.0695, "step": 78460 }, { "epoch": 0.15851436466990146, "grad_norm": 0.0, "learning_rate": 9.901562751891155e-06, "loss": 17.3257, "step": 78470 }, { "epoch": 0.15853456530258528, "grad_norm": 255.56430053710938, "learning_rate": 9.901493816186148e-06, "loss": 28.4382, "step": 78480 }, { "epoch": 0.1585547659352691, "grad_norm": 261.3754577636719, "learning_rate": 9.901424856591855e-06, "loss": 15.6467, "step": 78490 }, { "epoch": 0.15857496656795292, "grad_norm": 113.56884002685547, "learning_rate": 9.901355873108611e-06, "loss": 15.9694, "step": 78500 }, { "epoch": 0.1585951672006367, "grad_norm": 428.04351806640625, "learning_rate": 9.901286865736752e-06, "loss": 25.3237, "step": 78510 }, { "epoch": 0.15861536783332053, "grad_norm": 272.8048400878906, "learning_rate": 9.901217834476616e-06, "loss": 23.434, "step": 78520 }, { "epoch": 0.15863556846600435, "grad_norm": 240.38743591308594, "learning_rate": 9.90114877932854e-06, "loss": 25.2228, "step": 78530 }, { "epoch": 0.15865576909868817, "grad_norm": 931.5643920898438, "learning_rate": 9.901079700292858e-06, "loss": 25.6073, "step": 78540 }, { "epoch": 0.158675969731372, "grad_norm": 1009.3199462890625, "learning_rate": 9.901010597369908e-06, "loss": 25.051, "step": 78550 }, { "epoch": 0.1586961703640558, "grad_norm": 98.7724838256836, "learning_rate": 9.900941470560025e-06, "loss": 19.5606, "step": 78560 }, { "epoch": 0.15871637099673963, "grad_norm": 404.62738037109375, "learning_rate": 9.900872319863551e-06, "loss": 12.841, "step": 78570 }, { "epoch": 0.15873657162942342, "grad_norm": 814.7490234375, "learning_rate": 9.90080314528082e-06, "loss": 21.3623, "step": 78580 }, { "epoch": 0.15875677226210724, "grad_norm": 500.2481384277344, "learning_rate": 9.900733946812167e-06, "loss": 25.9193, "step": 78590 }, { "epoch": 0.15877697289479106, "grad_norm": 202.12466430664062, "learning_rate": 9.900664724457932e-06, "loss": 32.4658, "step": 78600 }, { "epoch": 0.15879717352747488, "grad_norm": 304.1930236816406, "learning_rate": 9.900595478218449e-06, "loss": 16.5442, "step": 78610 }, { "epoch": 0.1588173741601587, "grad_norm": 462.53460693359375, "learning_rate": 9.900526208094061e-06, "loss": 18.6229, "step": 78620 }, { "epoch": 0.15883757479284252, "grad_norm": 747.231201171875, "learning_rate": 9.900456914085101e-06, "loss": 25.9071, "step": 78630 }, { "epoch": 0.15885777542552632, "grad_norm": 685.5018310546875, "learning_rate": 9.90038759619191e-06, "loss": 17.1709, "step": 78640 }, { "epoch": 0.15887797605821014, "grad_norm": 176.51881408691406, "learning_rate": 9.900318254414823e-06, "loss": 13.024, "step": 78650 }, { "epoch": 0.15889817669089396, "grad_norm": 309.40802001953125, "learning_rate": 9.900248888754179e-06, "loss": 39.5304, "step": 78660 }, { "epoch": 0.15891837732357778, "grad_norm": 299.6368713378906, "learning_rate": 9.900179499210316e-06, "loss": 25.6999, "step": 78670 }, { "epoch": 0.1589385779562616, "grad_norm": 795.1394653320312, "learning_rate": 9.900110085783573e-06, "loss": 18.9494, "step": 78680 }, { "epoch": 0.15895877858894542, "grad_norm": 306.2209777832031, "learning_rate": 9.900040648474287e-06, "loss": 37.5444, "step": 78690 }, { "epoch": 0.15897897922162924, "grad_norm": 913.7659301757812, "learning_rate": 9.899971187282799e-06, "loss": 41.746, "step": 78700 }, { "epoch": 0.15899917985431303, "grad_norm": 345.07769775390625, "learning_rate": 9.899901702209445e-06, "loss": 20.4043, "step": 78710 }, { "epoch": 0.15901938048699685, "grad_norm": 782.451416015625, "learning_rate": 9.899832193254564e-06, "loss": 31.8815, "step": 78720 }, { "epoch": 0.15903958111968067, "grad_norm": 756.5360107421875, "learning_rate": 9.899762660418495e-06, "loss": 39.7848, "step": 78730 }, { "epoch": 0.1590597817523645, "grad_norm": 521.2705078125, "learning_rate": 9.899693103701577e-06, "loss": 27.9996, "step": 78740 }, { "epoch": 0.1590799823850483, "grad_norm": 410.418701171875, "learning_rate": 9.899623523104149e-06, "loss": 17.4492, "step": 78750 }, { "epoch": 0.15910018301773213, "grad_norm": 274.18798828125, "learning_rate": 9.89955391862655e-06, "loss": 22.3157, "step": 78760 }, { "epoch": 0.15912038365041592, "grad_norm": 649.7140502929688, "learning_rate": 9.89948429026912e-06, "loss": 29.439, "step": 78770 }, { "epoch": 0.15914058428309974, "grad_norm": 510.0090637207031, "learning_rate": 9.8994146380322e-06, "loss": 23.7447, "step": 78780 }, { "epoch": 0.15916078491578356, "grad_norm": 239.9370880126953, "learning_rate": 9.899344961916123e-06, "loss": 23.484, "step": 78790 }, { "epoch": 0.15918098554846738, "grad_norm": 328.5083923339844, "learning_rate": 9.899275261921236e-06, "loss": 15.7193, "step": 78800 }, { "epoch": 0.1592011861811512, "grad_norm": 220.7934112548828, "learning_rate": 9.899205538047873e-06, "loss": 26.5226, "step": 78810 }, { "epoch": 0.15922138681383502, "grad_norm": 187.09764099121094, "learning_rate": 9.899135790296379e-06, "loss": 26.8904, "step": 78820 }, { "epoch": 0.1592415874465188, "grad_norm": 310.1947021484375, "learning_rate": 9.89906601866709e-06, "loss": 19.6561, "step": 78830 }, { "epoch": 0.15926178807920263, "grad_norm": 543.0774536132812, "learning_rate": 9.898996223160348e-06, "loss": 26.4144, "step": 78840 }, { "epoch": 0.15928198871188645, "grad_norm": 652.3689575195312, "learning_rate": 9.898926403776492e-06, "loss": 36.7921, "step": 78850 }, { "epoch": 0.15930218934457027, "grad_norm": 535.2853393554688, "learning_rate": 9.898856560515864e-06, "loss": 37.836, "step": 78860 }, { "epoch": 0.1593223899772541, "grad_norm": 407.8519592285156, "learning_rate": 9.898786693378801e-06, "loss": 18.7181, "step": 78870 }, { "epoch": 0.1593425906099379, "grad_norm": 126.90837097167969, "learning_rate": 9.898716802365648e-06, "loss": 26.3485, "step": 78880 }, { "epoch": 0.15936279124262173, "grad_norm": 841.9174194335938, "learning_rate": 9.898646887476742e-06, "loss": 41.1984, "step": 78890 }, { "epoch": 0.15938299187530552, "grad_norm": 364.14984130859375, "learning_rate": 9.898576948712427e-06, "loss": 13.7308, "step": 78900 }, { "epoch": 0.15940319250798934, "grad_norm": 1718.7718505859375, "learning_rate": 9.89850698607304e-06, "loss": 63.4012, "step": 78910 }, { "epoch": 0.15942339314067316, "grad_norm": 320.6148986816406, "learning_rate": 9.898436999558924e-06, "loss": 30.7214, "step": 78920 }, { "epoch": 0.15944359377335698, "grad_norm": 392.3424377441406, "learning_rate": 9.898366989170423e-06, "loss": 26.3176, "step": 78930 }, { "epoch": 0.1594637944060408, "grad_norm": 726.4696044921875, "learning_rate": 9.898296954907874e-06, "loss": 33.6014, "step": 78940 }, { "epoch": 0.15948399503872462, "grad_norm": 577.5730590820312, "learning_rate": 9.898226896771619e-06, "loss": 31.6721, "step": 78950 }, { "epoch": 0.15950419567140842, "grad_norm": 1348.5142822265625, "learning_rate": 9.898156814762e-06, "loss": 24.0305, "step": 78960 }, { "epoch": 0.15952439630409224, "grad_norm": 515.8429565429688, "learning_rate": 9.898086708879359e-06, "loss": 20.5706, "step": 78970 }, { "epoch": 0.15954459693677606, "grad_norm": 455.5331115722656, "learning_rate": 9.898016579124039e-06, "loss": 26.9815, "step": 78980 }, { "epoch": 0.15956479756945988, "grad_norm": 233.70321655273438, "learning_rate": 9.897946425496379e-06, "loss": 20.0229, "step": 78990 }, { "epoch": 0.1595849982021437, "grad_norm": 485.7436828613281, "learning_rate": 9.89787624799672e-06, "loss": 17.0227, "step": 79000 }, { "epoch": 0.15960519883482752, "grad_norm": 206.46237182617188, "learning_rate": 9.897806046625408e-06, "loss": 24.8934, "step": 79010 }, { "epoch": 0.1596253994675113, "grad_norm": 575.5479125976562, "learning_rate": 9.897735821382786e-06, "loss": 25.1718, "step": 79020 }, { "epoch": 0.15964560010019513, "grad_norm": 186.63671875, "learning_rate": 9.89766557226919e-06, "loss": 20.1794, "step": 79030 }, { "epoch": 0.15966580073287895, "grad_norm": 283.55096435546875, "learning_rate": 9.897595299284968e-06, "loss": 25.0277, "step": 79040 }, { "epoch": 0.15968600136556277, "grad_norm": 308.6726989746094, "learning_rate": 9.897525002430459e-06, "loss": 36.4611, "step": 79050 }, { "epoch": 0.1597062019982466, "grad_norm": 326.8775329589844, "learning_rate": 9.89745468170601e-06, "loss": 21.8554, "step": 79060 }, { "epoch": 0.1597264026309304, "grad_norm": 462.3061218261719, "learning_rate": 9.897384337111956e-06, "loss": 27.4429, "step": 79070 }, { "epoch": 0.15974660326361423, "grad_norm": 472.64935302734375, "learning_rate": 9.89731396864865e-06, "loss": 22.6224, "step": 79080 }, { "epoch": 0.15976680389629802, "grad_norm": 454.6688537597656, "learning_rate": 9.897243576316426e-06, "loss": 32.4297, "step": 79090 }, { "epoch": 0.15978700452898184, "grad_norm": 139.9843292236328, "learning_rate": 9.897173160115633e-06, "loss": 19.9118, "step": 79100 }, { "epoch": 0.15980720516166566, "grad_norm": 404.95819091796875, "learning_rate": 9.89710272004661e-06, "loss": 27.7085, "step": 79110 }, { "epoch": 0.15982740579434948, "grad_norm": 372.12335205078125, "learning_rate": 9.897032256109705e-06, "loss": 19.7024, "step": 79120 }, { "epoch": 0.1598476064270333, "grad_norm": 99.77437591552734, "learning_rate": 9.896961768305255e-06, "loss": 37.5838, "step": 79130 }, { "epoch": 0.15986780705971712, "grad_norm": 16.08628273010254, "learning_rate": 9.89689125663361e-06, "loss": 15.0803, "step": 79140 }, { "epoch": 0.1598880076924009, "grad_norm": 142.9248809814453, "learning_rate": 9.89682072109511e-06, "loss": 38.2178, "step": 79150 }, { "epoch": 0.15990820832508473, "grad_norm": 1037.6297607421875, "learning_rate": 9.8967501616901e-06, "loss": 36.9221, "step": 79160 }, { "epoch": 0.15992840895776855, "grad_norm": 433.88153076171875, "learning_rate": 9.896679578418924e-06, "loss": 46.2312, "step": 79170 }, { "epoch": 0.15994860959045237, "grad_norm": 305.0263671875, "learning_rate": 9.896608971281926e-06, "loss": 14.7309, "step": 79180 }, { "epoch": 0.1599688102231362, "grad_norm": 538.8518676757812, "learning_rate": 9.896538340279449e-06, "loss": 36.7984, "step": 79190 }, { "epoch": 0.15998901085582, "grad_norm": 184.4010009765625, "learning_rate": 9.896467685411838e-06, "loss": 16.8615, "step": 79200 }, { "epoch": 0.16000921148850383, "grad_norm": 405.7578125, "learning_rate": 9.896397006679437e-06, "loss": 26.13, "step": 79210 }, { "epoch": 0.16002941212118763, "grad_norm": 516.3880004882812, "learning_rate": 9.89632630408259e-06, "loss": 22.1688, "step": 79220 }, { "epoch": 0.16004961275387145, "grad_norm": 382.6693115234375, "learning_rate": 9.896255577621646e-06, "loss": 39.9827, "step": 79230 }, { "epoch": 0.16006981338655527, "grad_norm": 166.48707580566406, "learning_rate": 9.896184827296942e-06, "loss": 20.4572, "step": 79240 }, { "epoch": 0.16009001401923909, "grad_norm": 1007.0042724609375, "learning_rate": 9.89611405310883e-06, "loss": 37.7073, "step": 79250 }, { "epoch": 0.1601102146519229, "grad_norm": 357.4544677734375, "learning_rate": 9.89604325505765e-06, "loss": 26.8339, "step": 79260 }, { "epoch": 0.16013041528460673, "grad_norm": 221.9705810546875, "learning_rate": 9.89597243314375e-06, "loss": 23.2171, "step": 79270 }, { "epoch": 0.16015061591729052, "grad_norm": 1382.4564208984375, "learning_rate": 9.895901587367473e-06, "loss": 34.2562, "step": 79280 }, { "epoch": 0.16017081654997434, "grad_norm": 156.50645446777344, "learning_rate": 9.895830717729166e-06, "loss": 24.7322, "step": 79290 }, { "epoch": 0.16019101718265816, "grad_norm": 394.5559997558594, "learning_rate": 9.895759824229176e-06, "loss": 25.0025, "step": 79300 }, { "epoch": 0.16021121781534198, "grad_norm": 498.4718322753906, "learning_rate": 9.895688906867844e-06, "loss": 21.523, "step": 79310 }, { "epoch": 0.1602314184480258, "grad_norm": 883.3118896484375, "learning_rate": 9.89561796564552e-06, "loss": 38.3339, "step": 79320 }, { "epoch": 0.16025161908070962, "grad_norm": 458.0317687988281, "learning_rate": 9.895547000562546e-06, "loss": 20.0624, "step": 79330 }, { "epoch": 0.1602718197133934, "grad_norm": 438.5228576660156, "learning_rate": 9.895476011619269e-06, "loss": 35.2709, "step": 79340 }, { "epoch": 0.16029202034607723, "grad_norm": 300.7026062011719, "learning_rate": 9.895404998816038e-06, "loss": 21.5623, "step": 79350 }, { "epoch": 0.16031222097876105, "grad_norm": 396.0194091796875, "learning_rate": 9.895333962153195e-06, "loss": 18.2463, "step": 79360 }, { "epoch": 0.16033242161144487, "grad_norm": 136.9529266357422, "learning_rate": 9.895262901631088e-06, "loss": 26.6099, "step": 79370 }, { "epoch": 0.1603526222441287, "grad_norm": 322.3371276855469, "learning_rate": 9.895191817250064e-06, "loss": 25.9525, "step": 79380 }, { "epoch": 0.1603728228768125, "grad_norm": 151.4694366455078, "learning_rate": 9.89512070901047e-06, "loss": 15.6631, "step": 79390 }, { "epoch": 0.16039302350949633, "grad_norm": 291.7964172363281, "learning_rate": 9.89504957691265e-06, "loss": 16.5786, "step": 79400 }, { "epoch": 0.16041322414218012, "grad_norm": 261.4203186035156, "learning_rate": 9.894978420956953e-06, "loss": 14.5822, "step": 79410 }, { "epoch": 0.16043342477486394, "grad_norm": 631.08642578125, "learning_rate": 9.894907241143722e-06, "loss": 21.6464, "step": 79420 }, { "epoch": 0.16045362540754776, "grad_norm": 390.1617736816406, "learning_rate": 9.89483603747331e-06, "loss": 34.0947, "step": 79430 }, { "epoch": 0.16047382604023158, "grad_norm": 240.8678741455078, "learning_rate": 9.89476480994606e-06, "loss": 47.7836, "step": 79440 }, { "epoch": 0.1604940266729154, "grad_norm": 767.7398071289062, "learning_rate": 9.894693558562319e-06, "loss": 19.2485, "step": 79450 }, { "epoch": 0.16051422730559922, "grad_norm": 880.9559326171875, "learning_rate": 9.894622283322436e-06, "loss": 28.4394, "step": 79460 }, { "epoch": 0.16053442793828301, "grad_norm": 347.03765869140625, "learning_rate": 9.894550984226759e-06, "loss": 22.7121, "step": 79470 }, { "epoch": 0.16055462857096683, "grad_norm": 497.994384765625, "learning_rate": 9.894479661275631e-06, "loss": 22.3754, "step": 79480 }, { "epoch": 0.16057482920365065, "grad_norm": 418.9917297363281, "learning_rate": 9.894408314469404e-06, "loss": 29.0265, "step": 79490 }, { "epoch": 0.16059502983633447, "grad_norm": 813.7963256835938, "learning_rate": 9.894336943808426e-06, "loss": 31.7485, "step": 79500 }, { "epoch": 0.1606152304690183, "grad_norm": 968.6171875, "learning_rate": 9.894265549293043e-06, "loss": 19.6763, "step": 79510 }, { "epoch": 0.16063543110170211, "grad_norm": 200.5768280029297, "learning_rate": 9.894194130923602e-06, "loss": 20.3647, "step": 79520 }, { "epoch": 0.16065563173438593, "grad_norm": 1075.686767578125, "learning_rate": 9.894122688700452e-06, "loss": 49.7737, "step": 79530 }, { "epoch": 0.16067583236706973, "grad_norm": 535.426513671875, "learning_rate": 9.894051222623943e-06, "loss": 16.3721, "step": 79540 }, { "epoch": 0.16069603299975355, "grad_norm": 645.5509033203125, "learning_rate": 9.893979732694422e-06, "loss": 35.6814, "step": 79550 }, { "epoch": 0.16071623363243737, "grad_norm": 494.3453063964844, "learning_rate": 9.893908218912237e-06, "loss": 30.173, "step": 79560 }, { "epoch": 0.1607364342651212, "grad_norm": 543.2445068359375, "learning_rate": 9.893836681277736e-06, "loss": 16.8465, "step": 79570 }, { "epoch": 0.160756634897805, "grad_norm": 445.7519226074219, "learning_rate": 9.89376511979127e-06, "loss": 17.2944, "step": 79580 }, { "epoch": 0.16077683553048883, "grad_norm": 349.07275390625, "learning_rate": 9.893693534453186e-06, "loss": 23.8222, "step": 79590 }, { "epoch": 0.16079703616317262, "grad_norm": 555.302001953125, "learning_rate": 9.893621925263832e-06, "loss": 24.3078, "step": 79600 }, { "epoch": 0.16081723679585644, "grad_norm": 283.0042419433594, "learning_rate": 9.89355029222356e-06, "loss": 17.526, "step": 79610 }, { "epoch": 0.16083743742854026, "grad_norm": 327.09649658203125, "learning_rate": 9.893478635332716e-06, "loss": 20.9916, "step": 79620 }, { "epoch": 0.16085763806122408, "grad_norm": 710.9197998046875, "learning_rate": 9.893406954591651e-06, "loss": 27.72, "step": 79630 }, { "epoch": 0.1608778386939079, "grad_norm": 314.8814392089844, "learning_rate": 9.893335250000715e-06, "loss": 15.2672, "step": 79640 }, { "epoch": 0.16089803932659172, "grad_norm": 287.74749755859375, "learning_rate": 9.893263521560255e-06, "loss": 35.9944, "step": 79650 }, { "epoch": 0.1609182399592755, "grad_norm": 526.1190185546875, "learning_rate": 9.893191769270624e-06, "loss": 17.8103, "step": 79660 }, { "epoch": 0.16093844059195933, "grad_norm": 195.8773956298828, "learning_rate": 9.893119993132167e-06, "loss": 31.6414, "step": 79670 }, { "epoch": 0.16095864122464315, "grad_norm": 404.8184814453125, "learning_rate": 9.89304819314524e-06, "loss": 29.388, "step": 79680 }, { "epoch": 0.16097884185732697, "grad_norm": 266.7315979003906, "learning_rate": 9.892976369310188e-06, "loss": 18.0616, "step": 79690 }, { "epoch": 0.1609990424900108, "grad_norm": 986.9926147460938, "learning_rate": 9.89290452162736e-06, "loss": 19.9799, "step": 79700 }, { "epoch": 0.1610192431226946, "grad_norm": 558.1566772460938, "learning_rate": 9.892832650097113e-06, "loss": 12.9845, "step": 79710 }, { "epoch": 0.16103944375537843, "grad_norm": 499.2138366699219, "learning_rate": 9.89276075471979e-06, "loss": 20.4387, "step": 79720 }, { "epoch": 0.16105964438806222, "grad_norm": 318.83428955078125, "learning_rate": 9.892688835495747e-06, "loss": 37.4188, "step": 79730 }, { "epoch": 0.16107984502074604, "grad_norm": 922.7367553710938, "learning_rate": 9.89261689242533e-06, "loss": 24.9707, "step": 79740 }, { "epoch": 0.16110004565342986, "grad_norm": 407.1906433105469, "learning_rate": 9.892544925508894e-06, "loss": 17.5345, "step": 79750 }, { "epoch": 0.16112024628611368, "grad_norm": 346.2884826660156, "learning_rate": 9.892472934746784e-06, "loss": 27.4917, "step": 79760 }, { "epoch": 0.1611404469187975, "grad_norm": 190.8533172607422, "learning_rate": 9.892400920139357e-06, "loss": 36.435, "step": 79770 }, { "epoch": 0.16116064755148132, "grad_norm": 59.34263229370117, "learning_rate": 9.892328881686961e-06, "loss": 14.9362, "step": 79780 }, { "epoch": 0.16118084818416512, "grad_norm": 558.2163696289062, "learning_rate": 9.892256819389947e-06, "loss": 30.6207, "step": 79790 }, { "epoch": 0.16120104881684894, "grad_norm": 229.86813354492188, "learning_rate": 9.892184733248666e-06, "loss": 19.285, "step": 79800 }, { "epoch": 0.16122124944953276, "grad_norm": 801.639404296875, "learning_rate": 9.89211262326347e-06, "loss": 29.1693, "step": 79810 }, { "epoch": 0.16124145008221658, "grad_norm": 560.8685302734375, "learning_rate": 9.892040489434711e-06, "loss": 34.4534, "step": 79820 }, { "epoch": 0.1612616507149004, "grad_norm": 351.6724853515625, "learning_rate": 9.89196833176274e-06, "loss": 35.787, "step": 79830 }, { "epoch": 0.16128185134758422, "grad_norm": 287.583984375, "learning_rate": 9.891896150247909e-06, "loss": 11.5023, "step": 79840 }, { "epoch": 0.16130205198026804, "grad_norm": 695.3501586914062, "learning_rate": 9.891823944890569e-06, "loss": 31.0794, "step": 79850 }, { "epoch": 0.16132225261295183, "grad_norm": 909.6913452148438, "learning_rate": 9.891751715691071e-06, "loss": 40.7071, "step": 79860 }, { "epoch": 0.16134245324563565, "grad_norm": 70.47769927978516, "learning_rate": 9.89167946264977e-06, "loss": 15.011, "step": 79870 }, { "epoch": 0.16136265387831947, "grad_norm": 341.2173767089844, "learning_rate": 9.891607185767018e-06, "loss": 29.7133, "step": 79880 }, { "epoch": 0.1613828545110033, "grad_norm": 1443.01708984375, "learning_rate": 9.891534885043164e-06, "loss": 40.5355, "step": 79890 }, { "epoch": 0.1614030551436871, "grad_norm": 685.8870239257812, "learning_rate": 9.891462560478562e-06, "loss": 23.2146, "step": 79900 }, { "epoch": 0.16142325577637093, "grad_norm": 180.77066040039062, "learning_rate": 9.891390212073566e-06, "loss": 17.5765, "step": 79910 }, { "epoch": 0.16144345640905472, "grad_norm": 525.450927734375, "learning_rate": 9.891317839828527e-06, "loss": 44.7903, "step": 79920 }, { "epoch": 0.16146365704173854, "grad_norm": 477.0588073730469, "learning_rate": 9.891245443743797e-06, "loss": 46.1729, "step": 79930 }, { "epoch": 0.16148385767442236, "grad_norm": 689.93505859375, "learning_rate": 9.891173023819731e-06, "loss": 36.2425, "step": 79940 }, { "epoch": 0.16150405830710618, "grad_norm": 440.6839904785156, "learning_rate": 9.891100580056681e-06, "loss": 32.2712, "step": 79950 }, { "epoch": 0.16152425893979, "grad_norm": 283.39691162109375, "learning_rate": 9.891028112454998e-06, "loss": 30.0116, "step": 79960 }, { "epoch": 0.16154445957247382, "grad_norm": 135.4347381591797, "learning_rate": 9.890955621015039e-06, "loss": 36.9487, "step": 79970 }, { "epoch": 0.1615646602051576, "grad_norm": 114.75359344482422, "learning_rate": 9.890883105737156e-06, "loss": 29.5158, "step": 79980 }, { "epoch": 0.16158486083784143, "grad_norm": 703.3095703125, "learning_rate": 9.890810566621702e-06, "loss": 32.883, "step": 79990 }, { "epoch": 0.16160506147052525, "grad_norm": 276.4881286621094, "learning_rate": 9.890738003669029e-06, "loss": 13.6305, "step": 80000 }, { "epoch": 0.16162526210320907, "grad_norm": 756.2501831054688, "learning_rate": 9.890665416879492e-06, "loss": 30.3908, "step": 80010 }, { "epoch": 0.1616454627358929, "grad_norm": 63.37554168701172, "learning_rate": 9.890592806253447e-06, "loss": 10.6785, "step": 80020 }, { "epoch": 0.1616656633685767, "grad_norm": 669.55859375, "learning_rate": 9.890520171791244e-06, "loss": 27.5863, "step": 80030 }, { "epoch": 0.16168586400126053, "grad_norm": 300.28253173828125, "learning_rate": 9.89044751349324e-06, "loss": 18.5517, "step": 80040 }, { "epoch": 0.16170606463394432, "grad_norm": 853.7218627929688, "learning_rate": 9.890374831359787e-06, "loss": 25.1595, "step": 80050 }, { "epoch": 0.16172626526662814, "grad_norm": 80.81767272949219, "learning_rate": 9.89030212539124e-06, "loss": 20.0168, "step": 80060 }, { "epoch": 0.16174646589931196, "grad_norm": 334.1025085449219, "learning_rate": 9.890229395587954e-06, "loss": 21.4223, "step": 80070 }, { "epoch": 0.16176666653199578, "grad_norm": 420.7502136230469, "learning_rate": 9.890156641950284e-06, "loss": 19.9016, "step": 80080 }, { "epoch": 0.1617868671646796, "grad_norm": 312.5161437988281, "learning_rate": 9.890083864478584e-06, "loss": 33.7502, "step": 80090 }, { "epoch": 0.16180706779736342, "grad_norm": 563.5485229492188, "learning_rate": 9.890011063173207e-06, "loss": 35.707, "step": 80100 }, { "epoch": 0.16182726843004722, "grad_norm": 328.59368896484375, "learning_rate": 9.889938238034509e-06, "loss": 23.2849, "step": 80110 }, { "epoch": 0.16184746906273104, "grad_norm": 311.5693359375, "learning_rate": 9.889865389062845e-06, "loss": 22.8267, "step": 80120 }, { "epoch": 0.16186766969541486, "grad_norm": 353.86895751953125, "learning_rate": 9.889792516258571e-06, "loss": 21.1397, "step": 80130 }, { "epoch": 0.16188787032809868, "grad_norm": 381.8929138183594, "learning_rate": 9.88971961962204e-06, "loss": 16.9112, "step": 80140 }, { "epoch": 0.1619080709607825, "grad_norm": 780.3839721679688, "learning_rate": 9.88964669915361e-06, "loss": 19.9509, "step": 80150 }, { "epoch": 0.16192827159346632, "grad_norm": 838.907470703125, "learning_rate": 9.889573754853633e-06, "loss": 28.3124, "step": 80160 }, { "epoch": 0.16194847222615014, "grad_norm": 416.4563903808594, "learning_rate": 9.889500786722471e-06, "loss": 17.7988, "step": 80170 }, { "epoch": 0.16196867285883393, "grad_norm": 491.28765869140625, "learning_rate": 9.889427794760472e-06, "loss": 17.411, "step": 80180 }, { "epoch": 0.16198887349151775, "grad_norm": 208.65689086914062, "learning_rate": 9.889354778967995e-06, "loss": 18.2061, "step": 80190 }, { "epoch": 0.16200907412420157, "grad_norm": 373.3161926269531, "learning_rate": 9.889281739345395e-06, "loss": 23.4229, "step": 80200 }, { "epoch": 0.1620292747568854, "grad_norm": 343.974365234375, "learning_rate": 9.88920867589303e-06, "loss": 11.5185, "step": 80210 }, { "epoch": 0.1620494753895692, "grad_norm": 240.10968017578125, "learning_rate": 9.889135588611254e-06, "loss": 29.3327, "step": 80220 }, { "epoch": 0.16206967602225303, "grad_norm": 447.5523681640625, "learning_rate": 9.889062477500425e-06, "loss": 33.0841, "step": 80230 }, { "epoch": 0.16208987665493682, "grad_norm": 107.09142303466797, "learning_rate": 9.8889893425609e-06, "loss": 20.7825, "step": 80240 }, { "epoch": 0.16211007728762064, "grad_norm": 541.6611328125, "learning_rate": 9.88891618379303e-06, "loss": 35.7423, "step": 80250 }, { "epoch": 0.16213027792030446, "grad_norm": 749.7138671875, "learning_rate": 9.88884300119718e-06, "loss": 29.4236, "step": 80260 }, { "epoch": 0.16215047855298828, "grad_norm": 336.53924560546875, "learning_rate": 9.888769794773699e-06, "loss": 51.9439, "step": 80270 }, { "epoch": 0.1621706791856721, "grad_norm": 143.0902557373047, "learning_rate": 9.888696564522948e-06, "loss": 29.0381, "step": 80280 }, { "epoch": 0.16219087981835592, "grad_norm": 1135.908203125, "learning_rate": 9.888623310445282e-06, "loss": 44.4468, "step": 80290 }, { "epoch": 0.1622110804510397, "grad_norm": 355.0585632324219, "learning_rate": 9.88855003254106e-06, "loss": 12.7337, "step": 80300 }, { "epoch": 0.16223128108372353, "grad_norm": 822.2958984375, "learning_rate": 9.88847673081064e-06, "loss": 29.0946, "step": 80310 }, { "epoch": 0.16225148171640735, "grad_norm": 459.4270935058594, "learning_rate": 9.888403405254374e-06, "loss": 31.1366, "step": 80320 }, { "epoch": 0.16227168234909117, "grad_norm": 378.10626220703125, "learning_rate": 9.888330055872623e-06, "loss": 28.2985, "step": 80330 }, { "epoch": 0.162291882981775, "grad_norm": 463.220458984375, "learning_rate": 9.888256682665744e-06, "loss": 38.7636, "step": 80340 }, { "epoch": 0.1623120836144588, "grad_norm": 752.4970703125, "learning_rate": 9.888183285634097e-06, "loss": 23.502, "step": 80350 }, { "epoch": 0.16233228424714263, "grad_norm": 398.5994567871094, "learning_rate": 9.888109864778036e-06, "loss": 19.4908, "step": 80360 }, { "epoch": 0.16235248487982643, "grad_norm": 648.8282470703125, "learning_rate": 9.88803642009792e-06, "loss": 22.1598, "step": 80370 }, { "epoch": 0.16237268551251025, "grad_norm": 574.5921020507812, "learning_rate": 9.887962951594108e-06, "loss": 27.2408, "step": 80380 }, { "epoch": 0.16239288614519407, "grad_norm": 213.2351837158203, "learning_rate": 9.887889459266957e-06, "loss": 30.9682, "step": 80390 }, { "epoch": 0.16241308677787789, "grad_norm": 579.56298828125, "learning_rate": 9.887815943116827e-06, "loss": 35.2533, "step": 80400 }, { "epoch": 0.1624332874105617, "grad_norm": 317.9184265136719, "learning_rate": 9.887742403144074e-06, "loss": 21.8071, "step": 80410 }, { "epoch": 0.16245348804324553, "grad_norm": 1311.876708984375, "learning_rate": 9.887668839349057e-06, "loss": 28.1026, "step": 80420 }, { "epoch": 0.16247368867592932, "grad_norm": 919.910888671875, "learning_rate": 9.887595251732135e-06, "loss": 31.4523, "step": 80430 }, { "epoch": 0.16249388930861314, "grad_norm": 147.07989501953125, "learning_rate": 9.887521640293668e-06, "loss": 15.9144, "step": 80440 }, { "epoch": 0.16251408994129696, "grad_norm": 426.0009765625, "learning_rate": 9.887448005034011e-06, "loss": 20.6647, "step": 80450 }, { "epoch": 0.16253429057398078, "grad_norm": 85.75631713867188, "learning_rate": 9.887374345953526e-06, "loss": 15.8915, "step": 80460 }, { "epoch": 0.1625544912066646, "grad_norm": 407.2970275878906, "learning_rate": 9.88730066305257e-06, "loss": 22.5537, "step": 80470 }, { "epoch": 0.16257469183934842, "grad_norm": 560.683837890625, "learning_rate": 9.887226956331506e-06, "loss": 26.6558, "step": 80480 }, { "epoch": 0.16259489247203224, "grad_norm": 315.2942199707031, "learning_rate": 9.887153225790688e-06, "loss": 27.6426, "step": 80490 }, { "epoch": 0.16261509310471603, "grad_norm": 193.90476989746094, "learning_rate": 9.887079471430481e-06, "loss": 11.5378, "step": 80500 }, { "epoch": 0.16263529373739985, "grad_norm": 291.6756896972656, "learning_rate": 9.88700569325124e-06, "loss": 28.5151, "step": 80510 }, { "epoch": 0.16265549437008367, "grad_norm": 414.41900634765625, "learning_rate": 9.886931891253324e-06, "loss": 12.7147, "step": 80520 }, { "epoch": 0.1626756950027675, "grad_norm": 742.55078125, "learning_rate": 9.886858065437097e-06, "loss": 55.4787, "step": 80530 }, { "epoch": 0.1626958956354513, "grad_norm": 539.345947265625, "learning_rate": 9.886784215802915e-06, "loss": 43.4888, "step": 80540 }, { "epoch": 0.16271609626813513, "grad_norm": 87.77115631103516, "learning_rate": 9.88671034235114e-06, "loss": 23.1452, "step": 80550 }, { "epoch": 0.16273629690081892, "grad_norm": 287.9833984375, "learning_rate": 9.886636445082132e-06, "loss": 29.6764, "step": 80560 }, { "epoch": 0.16275649753350274, "grad_norm": 16.543699264526367, "learning_rate": 9.88656252399625e-06, "loss": 19.3727, "step": 80570 }, { "epoch": 0.16277669816618656, "grad_norm": 58.35026550292969, "learning_rate": 9.886488579093856e-06, "loss": 23.0495, "step": 80580 }, { "epoch": 0.16279689879887038, "grad_norm": 266.4010314941406, "learning_rate": 9.886414610375309e-06, "loss": 22.8869, "step": 80590 }, { "epoch": 0.1628170994315542, "grad_norm": 634.8953857421875, "learning_rate": 9.886340617840968e-06, "loss": 22.1367, "step": 80600 }, { "epoch": 0.16283730006423802, "grad_norm": 189.2071075439453, "learning_rate": 9.886266601491197e-06, "loss": 32.8526, "step": 80610 }, { "epoch": 0.16285750069692181, "grad_norm": 0.0, "learning_rate": 9.886192561326356e-06, "loss": 25.8318, "step": 80620 }, { "epoch": 0.16287770132960563, "grad_norm": 89.25527954101562, "learning_rate": 9.886118497346804e-06, "loss": 26.2434, "step": 80630 }, { "epoch": 0.16289790196228945, "grad_norm": 630.5790405273438, "learning_rate": 9.886044409552902e-06, "loss": 22.8639, "step": 80640 }, { "epoch": 0.16291810259497327, "grad_norm": 0.0, "learning_rate": 9.885970297945013e-06, "loss": 28.5499, "step": 80650 }, { "epoch": 0.1629383032276571, "grad_norm": 559.54736328125, "learning_rate": 9.885896162523498e-06, "loss": 32.4067, "step": 80660 }, { "epoch": 0.16295850386034091, "grad_norm": 158.8244171142578, "learning_rate": 9.885822003288717e-06, "loss": 29.8507, "step": 80670 }, { "epoch": 0.16297870449302473, "grad_norm": 467.65557861328125, "learning_rate": 9.885747820241032e-06, "loss": 31.5979, "step": 80680 }, { "epoch": 0.16299890512570853, "grad_norm": 497.6941223144531, "learning_rate": 9.885673613380806e-06, "loss": 15.9469, "step": 80690 }, { "epoch": 0.16301910575839235, "grad_norm": 263.04931640625, "learning_rate": 9.8855993827084e-06, "loss": 15.349, "step": 80700 }, { "epoch": 0.16303930639107617, "grad_norm": 274.8655090332031, "learning_rate": 9.885525128224173e-06, "loss": 20.3094, "step": 80710 }, { "epoch": 0.16305950702376, "grad_norm": 378.819091796875, "learning_rate": 9.885450849928489e-06, "loss": 35.3756, "step": 80720 }, { "epoch": 0.1630797076564438, "grad_norm": 501.4461364746094, "learning_rate": 9.885376547821711e-06, "loss": 22.8929, "step": 80730 }, { "epoch": 0.16309990828912763, "grad_norm": 536.461669921875, "learning_rate": 9.885302221904201e-06, "loss": 15.1673, "step": 80740 }, { "epoch": 0.16312010892181142, "grad_norm": 455.5689697265625, "learning_rate": 9.88522787217632e-06, "loss": 34.1152, "step": 80750 }, { "epoch": 0.16314030955449524, "grad_norm": 263.53814697265625, "learning_rate": 9.88515349863843e-06, "loss": 35.1738, "step": 80760 }, { "epoch": 0.16316051018717906, "grad_norm": 345.0772705078125, "learning_rate": 9.885079101290894e-06, "loss": 20.5357, "step": 80770 }, { "epoch": 0.16318071081986288, "grad_norm": 328.6314697265625, "learning_rate": 9.885004680134075e-06, "loss": 18.8966, "step": 80780 }, { "epoch": 0.1632009114525467, "grad_norm": 366.3641662597656, "learning_rate": 9.884930235168338e-06, "loss": 48.2096, "step": 80790 }, { "epoch": 0.16322111208523052, "grad_norm": 729.0635375976562, "learning_rate": 9.884855766394041e-06, "loss": 37.265, "step": 80800 }, { "epoch": 0.16324131271791434, "grad_norm": 574.7216796875, "learning_rate": 9.88478127381155e-06, "loss": 20.4683, "step": 80810 }, { "epoch": 0.16326151335059813, "grad_norm": 457.9243469238281, "learning_rate": 9.884706757421229e-06, "loss": 17.3267, "step": 80820 }, { "epoch": 0.16328171398328195, "grad_norm": 305.1440734863281, "learning_rate": 9.884632217223438e-06, "loss": 28.1274, "step": 80830 }, { "epoch": 0.16330191461596577, "grad_norm": 612.5533447265625, "learning_rate": 9.884557653218544e-06, "loss": 32.4094, "step": 80840 }, { "epoch": 0.1633221152486496, "grad_norm": 1437.5220947265625, "learning_rate": 9.884483065406905e-06, "loss": 26.99, "step": 80850 }, { "epoch": 0.1633423158813334, "grad_norm": 757.4486083984375, "learning_rate": 9.88440845378889e-06, "loss": 33.4293, "step": 80860 }, { "epoch": 0.16336251651401723, "grad_norm": 713.0037841796875, "learning_rate": 9.884333818364861e-06, "loss": 32.334, "step": 80870 }, { "epoch": 0.16338271714670102, "grad_norm": 450.5823669433594, "learning_rate": 9.88425915913518e-06, "loss": 29.5672, "step": 80880 }, { "epoch": 0.16340291777938484, "grad_norm": 496.3887023925781, "learning_rate": 9.884184476100215e-06, "loss": 20.1971, "step": 80890 }, { "epoch": 0.16342311841206866, "grad_norm": 98.01220703125, "learning_rate": 9.884109769260326e-06, "loss": 23.402, "step": 80900 }, { "epoch": 0.16344331904475248, "grad_norm": 253.8908233642578, "learning_rate": 9.884035038615876e-06, "loss": 15.4084, "step": 80910 }, { "epoch": 0.1634635196774363, "grad_norm": 272.7874450683594, "learning_rate": 9.883960284167234e-06, "loss": 25.7597, "step": 80920 }, { "epoch": 0.16348372031012012, "grad_norm": 736.2428588867188, "learning_rate": 9.88388550591476e-06, "loss": 21.9517, "step": 80930 }, { "epoch": 0.16350392094280392, "grad_norm": 453.32965087890625, "learning_rate": 9.883810703858823e-06, "loss": 21.7236, "step": 80940 }, { "epoch": 0.16352412157548774, "grad_norm": 694.5930786132812, "learning_rate": 9.883735877999785e-06, "loss": 32.9681, "step": 80950 }, { "epoch": 0.16354432220817156, "grad_norm": 423.9303894042969, "learning_rate": 9.883661028338009e-06, "loss": 21.3099, "step": 80960 }, { "epoch": 0.16356452284085538, "grad_norm": 305.148681640625, "learning_rate": 9.88358615487386e-06, "loss": 20.2351, "step": 80970 }, { "epoch": 0.1635847234735392, "grad_norm": 310.5696105957031, "learning_rate": 9.883511257607708e-06, "loss": 25.3677, "step": 80980 }, { "epoch": 0.16360492410622302, "grad_norm": 546.5814208984375, "learning_rate": 9.883436336539913e-06, "loss": 33.1325, "step": 80990 }, { "epoch": 0.16362512473890684, "grad_norm": 1494.5396728515625, "learning_rate": 9.883361391670841e-06, "loss": 46.9954, "step": 81000 }, { "epoch": 0.16364532537159063, "grad_norm": 241.7465362548828, "learning_rate": 9.883286423000857e-06, "loss": 10.2596, "step": 81010 }, { "epoch": 0.16366552600427445, "grad_norm": 234.1220245361328, "learning_rate": 9.883211430530329e-06, "loss": 14.028, "step": 81020 }, { "epoch": 0.16368572663695827, "grad_norm": 305.42034912109375, "learning_rate": 9.88313641425962e-06, "loss": 30.106, "step": 81030 }, { "epoch": 0.1637059272696421, "grad_norm": 110.0243148803711, "learning_rate": 9.883061374189095e-06, "loss": 31.9587, "step": 81040 }, { "epoch": 0.1637261279023259, "grad_norm": 0.0, "learning_rate": 9.882986310319124e-06, "loss": 32.0352, "step": 81050 }, { "epoch": 0.16374632853500973, "grad_norm": 217.22923278808594, "learning_rate": 9.882911222650069e-06, "loss": 18.9343, "step": 81060 }, { "epoch": 0.16376652916769352, "grad_norm": 297.120361328125, "learning_rate": 9.882836111182295e-06, "loss": 26.9097, "step": 81070 }, { "epoch": 0.16378672980037734, "grad_norm": 95.64879608154297, "learning_rate": 9.882760975916173e-06, "loss": 16.0606, "step": 81080 }, { "epoch": 0.16380693043306116, "grad_norm": 724.5462036132812, "learning_rate": 9.882685816852064e-06, "loss": 32.3865, "step": 81090 }, { "epoch": 0.16382713106574498, "grad_norm": 648.523681640625, "learning_rate": 9.882610633990337e-06, "loss": 27.8499, "step": 81100 }, { "epoch": 0.1638473316984288, "grad_norm": 352.72210693359375, "learning_rate": 9.882535427331357e-06, "loss": 17.0921, "step": 81110 }, { "epoch": 0.16386753233111262, "grad_norm": 106.14461517333984, "learning_rate": 9.882460196875495e-06, "loss": 37.6977, "step": 81120 }, { "epoch": 0.16388773296379644, "grad_norm": 161.5916748046875, "learning_rate": 9.88238494262311e-06, "loss": 30.4489, "step": 81130 }, { "epoch": 0.16390793359648023, "grad_norm": 852.1343383789062, "learning_rate": 9.882309664574576e-06, "loss": 18.6942, "step": 81140 }, { "epoch": 0.16392813422916405, "grad_norm": 269.9232177734375, "learning_rate": 9.882234362730255e-06, "loss": 16.7037, "step": 81150 }, { "epoch": 0.16394833486184787, "grad_norm": 572.7348022460938, "learning_rate": 9.882159037090517e-06, "loss": 22.5891, "step": 81160 }, { "epoch": 0.1639685354945317, "grad_norm": 443.86895751953125, "learning_rate": 9.882083687655728e-06, "loss": 27.8898, "step": 81170 }, { "epoch": 0.1639887361272155, "grad_norm": 812.183837890625, "learning_rate": 9.882008314426253e-06, "loss": 39.9703, "step": 81180 }, { "epoch": 0.16400893675989933, "grad_norm": 632.869140625, "learning_rate": 9.881932917402464e-06, "loss": 19.4012, "step": 81190 }, { "epoch": 0.16402913739258312, "grad_norm": 374.23455810546875, "learning_rate": 9.881857496584726e-06, "loss": 24.5433, "step": 81200 }, { "epoch": 0.16404933802526694, "grad_norm": 323.980712890625, "learning_rate": 9.881782051973405e-06, "loss": 17.1854, "step": 81210 }, { "epoch": 0.16406953865795076, "grad_norm": 148.46449279785156, "learning_rate": 9.88170658356887e-06, "loss": 35.3014, "step": 81220 }, { "epoch": 0.16408973929063458, "grad_norm": 500.7608947753906, "learning_rate": 9.881631091371492e-06, "loss": 29.5488, "step": 81230 }, { "epoch": 0.1641099399233184, "grad_norm": 1187.115966796875, "learning_rate": 9.881555575381635e-06, "loss": 27.2644, "step": 81240 }, { "epoch": 0.16413014055600222, "grad_norm": 544.7838134765625, "learning_rate": 9.881480035599667e-06, "loss": 43.3424, "step": 81250 }, { "epoch": 0.16415034118868602, "grad_norm": 867.3125610351562, "learning_rate": 9.88140447202596e-06, "loss": 34.9676, "step": 81260 }, { "epoch": 0.16417054182136984, "grad_norm": 1001.7696533203125, "learning_rate": 9.881328884660876e-06, "loss": 22.9481, "step": 81270 }, { "epoch": 0.16419074245405366, "grad_norm": 254.9727020263672, "learning_rate": 9.88125327350479e-06, "loss": 25.8817, "step": 81280 }, { "epoch": 0.16421094308673748, "grad_norm": 839.4126586914062, "learning_rate": 9.881177638558066e-06, "loss": 29.3466, "step": 81290 }, { "epoch": 0.1642311437194213, "grad_norm": 311.6737060546875, "learning_rate": 9.881101979821075e-06, "loss": 42.4605, "step": 81300 }, { "epoch": 0.16425134435210512, "grad_norm": 202.302001953125, "learning_rate": 9.881026297294185e-06, "loss": 38.1463, "step": 81310 }, { "epoch": 0.16427154498478894, "grad_norm": 401.0799560546875, "learning_rate": 9.880950590977764e-06, "loss": 19.8945, "step": 81320 }, { "epoch": 0.16429174561747273, "grad_norm": 120.13053131103516, "learning_rate": 9.880874860872183e-06, "loss": 9.1085, "step": 81330 }, { "epoch": 0.16431194625015655, "grad_norm": 621.6092529296875, "learning_rate": 9.88079910697781e-06, "loss": 21.6557, "step": 81340 }, { "epoch": 0.16433214688284037, "grad_norm": 434.39892578125, "learning_rate": 9.880723329295012e-06, "loss": 20.388, "step": 81350 }, { "epoch": 0.1643523475155242, "grad_norm": 413.6324768066406, "learning_rate": 9.880647527824161e-06, "loss": 19.689, "step": 81360 }, { "epoch": 0.164372548148208, "grad_norm": 903.112548828125, "learning_rate": 9.880571702565627e-06, "loss": 37.867, "step": 81370 }, { "epoch": 0.16439274878089183, "grad_norm": 723.8510131835938, "learning_rate": 9.880495853519777e-06, "loss": 24.6308, "step": 81380 }, { "epoch": 0.16441294941357562, "grad_norm": 371.6813659667969, "learning_rate": 9.880419980686986e-06, "loss": 24.4824, "step": 81390 }, { "epoch": 0.16443315004625944, "grad_norm": 267.4392395019531, "learning_rate": 9.880344084067616e-06, "loss": 37.8947, "step": 81400 }, { "epoch": 0.16445335067894326, "grad_norm": 477.8439636230469, "learning_rate": 9.880268163662043e-06, "loss": 24.8996, "step": 81410 }, { "epoch": 0.16447355131162708, "grad_norm": 916.473388671875, "learning_rate": 9.880192219470633e-06, "loss": 31.2848, "step": 81420 }, { "epoch": 0.1644937519443109, "grad_norm": 298.04296875, "learning_rate": 9.88011625149376e-06, "loss": 17.4741, "step": 81430 }, { "epoch": 0.16451395257699472, "grad_norm": 724.1027221679688, "learning_rate": 9.88004025973179e-06, "loss": 25.3127, "step": 81440 }, { "epoch": 0.16453415320967854, "grad_norm": 766.1636352539062, "learning_rate": 9.879964244185098e-06, "loss": 26.2444, "step": 81450 }, { "epoch": 0.16455435384236233, "grad_norm": 117.17510223388672, "learning_rate": 9.87988820485405e-06, "loss": 35.3773, "step": 81460 }, { "epoch": 0.16457455447504615, "grad_norm": 576.5032958984375, "learning_rate": 9.87981214173902e-06, "loss": 18.0257, "step": 81470 }, { "epoch": 0.16459475510772997, "grad_norm": 90.32857513427734, "learning_rate": 9.879736054840377e-06, "loss": 53.1879, "step": 81480 }, { "epoch": 0.1646149557404138, "grad_norm": 663.2149658203125, "learning_rate": 9.879659944158493e-06, "loss": 25.8617, "step": 81490 }, { "epoch": 0.1646351563730976, "grad_norm": 1498.1995849609375, "learning_rate": 9.879583809693737e-06, "loss": 32.2337, "step": 81500 }, { "epoch": 0.16465535700578143, "grad_norm": 509.119384765625, "learning_rate": 9.879507651446482e-06, "loss": 30.5044, "step": 81510 }, { "epoch": 0.16467555763846523, "grad_norm": 205.88491821289062, "learning_rate": 9.8794314694171e-06, "loss": 22.2007, "step": 81520 }, { "epoch": 0.16469575827114905, "grad_norm": 599.6121215820312, "learning_rate": 9.879355263605958e-06, "loss": 19.7081, "step": 81530 }, { "epoch": 0.16471595890383287, "grad_norm": 666.6627807617188, "learning_rate": 9.879279034013434e-06, "loss": 18.6181, "step": 81540 }, { "epoch": 0.16473615953651669, "grad_norm": 0.0, "learning_rate": 9.879202780639893e-06, "loss": 14.5669, "step": 81550 }, { "epoch": 0.1647563601692005, "grad_norm": 116.70372009277344, "learning_rate": 9.879126503485709e-06, "loss": 31.6664, "step": 81560 }, { "epoch": 0.16477656080188433, "grad_norm": 255.89442443847656, "learning_rate": 9.879050202551256e-06, "loss": 29.5966, "step": 81570 }, { "epoch": 0.16479676143456812, "grad_norm": 287.06884765625, "learning_rate": 9.878973877836902e-06, "loss": 27.2852, "step": 81580 }, { "epoch": 0.16481696206725194, "grad_norm": 426.3962707519531, "learning_rate": 9.878897529343023e-06, "loss": 11.7752, "step": 81590 }, { "epoch": 0.16483716269993576, "grad_norm": 426.3056335449219, "learning_rate": 9.878821157069988e-06, "loss": 21.1229, "step": 81600 }, { "epoch": 0.16485736333261958, "grad_norm": 374.24920654296875, "learning_rate": 9.87874476101817e-06, "loss": 34.8238, "step": 81610 }, { "epoch": 0.1648775639653034, "grad_norm": 382.10540771484375, "learning_rate": 9.878668341187944e-06, "loss": 32.4983, "step": 81620 }, { "epoch": 0.16489776459798722, "grad_norm": 496.6434020996094, "learning_rate": 9.878591897579678e-06, "loss": 28.0837, "step": 81630 }, { "epoch": 0.16491796523067104, "grad_norm": 538.9864501953125, "learning_rate": 9.87851543019375e-06, "loss": 40.5745, "step": 81640 }, { "epoch": 0.16493816586335483, "grad_norm": 407.2173156738281, "learning_rate": 9.878438939030526e-06, "loss": 18.182, "step": 81650 }, { "epoch": 0.16495836649603865, "grad_norm": 507.8926086425781, "learning_rate": 9.878362424090384e-06, "loss": 36.1084, "step": 81660 }, { "epoch": 0.16497856712872247, "grad_norm": 423.6439514160156, "learning_rate": 9.878285885373693e-06, "loss": 26.5332, "step": 81670 }, { "epoch": 0.1649987677614063, "grad_norm": 518.3607788085938, "learning_rate": 9.87820932288083e-06, "loss": 21.0788, "step": 81680 }, { "epoch": 0.1650189683940901, "grad_norm": 309.38507080078125, "learning_rate": 9.878132736612167e-06, "loss": 19.2194, "step": 81690 }, { "epoch": 0.16503916902677393, "grad_norm": 469.4150390625, "learning_rate": 9.878056126568077e-06, "loss": 14.9641, "step": 81700 }, { "epoch": 0.16505936965945772, "grad_norm": 728.9360961914062, "learning_rate": 9.87797949274893e-06, "loss": 27.921, "step": 81710 }, { "epoch": 0.16507957029214154, "grad_norm": 347.7802734375, "learning_rate": 9.877902835155105e-06, "loss": 23.0371, "step": 81720 }, { "epoch": 0.16509977092482536, "grad_norm": 257.8112487792969, "learning_rate": 9.877826153786973e-06, "loss": 19.6124, "step": 81730 }, { "epoch": 0.16511997155750918, "grad_norm": 758.5030517578125, "learning_rate": 9.877749448644908e-06, "loss": 15.3159, "step": 81740 }, { "epoch": 0.165140172190193, "grad_norm": 244.9395294189453, "learning_rate": 9.877672719729283e-06, "loss": 25.8874, "step": 81750 }, { "epoch": 0.16516037282287682, "grad_norm": 468.4497985839844, "learning_rate": 9.877595967040475e-06, "loss": 24.9234, "step": 81760 }, { "epoch": 0.16518057345556064, "grad_norm": 390.4100341796875, "learning_rate": 9.877519190578852e-06, "loss": 22.5945, "step": 81770 }, { "epoch": 0.16520077408824443, "grad_norm": 483.197265625, "learning_rate": 9.877442390344796e-06, "loss": 39.905, "step": 81780 }, { "epoch": 0.16522097472092825, "grad_norm": 181.7025146484375, "learning_rate": 9.877365566338675e-06, "loss": 18.6734, "step": 81790 }, { "epoch": 0.16524117535361207, "grad_norm": 202.7747039794922, "learning_rate": 9.877288718560866e-06, "loss": 12.8252, "step": 81800 }, { "epoch": 0.1652613759862959, "grad_norm": 253.33004760742188, "learning_rate": 9.877211847011744e-06, "loss": 23.8899, "step": 81810 }, { "epoch": 0.16528157661897971, "grad_norm": 387.10028076171875, "learning_rate": 9.877134951691683e-06, "loss": 34.4809, "step": 81820 }, { "epoch": 0.16530177725166353, "grad_norm": 625.840576171875, "learning_rate": 9.877058032601057e-06, "loss": 44.1464, "step": 81830 }, { "epoch": 0.16532197788434733, "grad_norm": 832.5033569335938, "learning_rate": 9.876981089740242e-06, "loss": 29.491, "step": 81840 }, { "epoch": 0.16534217851703115, "grad_norm": 116.0150375366211, "learning_rate": 9.876904123109613e-06, "loss": 36.1661, "step": 81850 }, { "epoch": 0.16536237914971497, "grad_norm": 0.0, "learning_rate": 9.876827132709545e-06, "loss": 16.6707, "step": 81860 }, { "epoch": 0.1653825797823988, "grad_norm": 1153.5323486328125, "learning_rate": 9.876750118540413e-06, "loss": 36.7919, "step": 81870 }, { "epoch": 0.1654027804150826, "grad_norm": 457.7942810058594, "learning_rate": 9.87667308060259e-06, "loss": 26.9531, "step": 81880 }, { "epoch": 0.16542298104776643, "grad_norm": 236.09178161621094, "learning_rate": 9.876596018896457e-06, "loss": 17.4196, "step": 81890 }, { "epoch": 0.16544318168045022, "grad_norm": 166.97422790527344, "learning_rate": 9.876518933422385e-06, "loss": 18.2988, "step": 81900 }, { "epoch": 0.16546338231313404, "grad_norm": 558.3728637695312, "learning_rate": 9.876441824180752e-06, "loss": 24.043, "step": 81910 }, { "epoch": 0.16548358294581786, "grad_norm": 605.8748779296875, "learning_rate": 9.876364691171933e-06, "loss": 23.0312, "step": 81920 }, { "epoch": 0.16550378357850168, "grad_norm": 450.554931640625, "learning_rate": 9.876287534396304e-06, "loss": 22.7319, "step": 81930 }, { "epoch": 0.1655239842111855, "grad_norm": 1077.175537109375, "learning_rate": 9.876210353854239e-06, "loss": 18.6651, "step": 81940 }, { "epoch": 0.16554418484386932, "grad_norm": 187.02159118652344, "learning_rate": 9.876133149546117e-06, "loss": 11.5557, "step": 81950 }, { "epoch": 0.16556438547655314, "grad_norm": 768.2868041992188, "learning_rate": 9.876055921472316e-06, "loss": 33.242, "step": 81960 }, { "epoch": 0.16558458610923693, "grad_norm": 138.95401000976562, "learning_rate": 9.875978669633206e-06, "loss": 13.3355, "step": 81970 }, { "epoch": 0.16560478674192075, "grad_norm": 640.6097412109375, "learning_rate": 9.87590139402917e-06, "loss": 25.534, "step": 81980 }, { "epoch": 0.16562498737460457, "grad_norm": 97.48204803466797, "learning_rate": 9.87582409466058e-06, "loss": 16.6026, "step": 81990 }, { "epoch": 0.1656451880072884, "grad_norm": 296.3551330566406, "learning_rate": 9.875746771527817e-06, "loss": 26.2501, "step": 82000 }, { "epoch": 0.1656653886399722, "grad_norm": 633.5289306640625, "learning_rate": 9.875669424631255e-06, "loss": 17.8387, "step": 82010 }, { "epoch": 0.16568558927265603, "grad_norm": 295.5682373046875, "learning_rate": 9.87559205397127e-06, "loss": 23.7444, "step": 82020 }, { "epoch": 0.16570578990533982, "grad_norm": 200.01937866210938, "learning_rate": 9.875514659548243e-06, "loss": 27.2707, "step": 82030 }, { "epoch": 0.16572599053802364, "grad_norm": 190.04364013671875, "learning_rate": 9.875437241362546e-06, "loss": 16.2927, "step": 82040 }, { "epoch": 0.16574619117070746, "grad_norm": 53.213069915771484, "learning_rate": 9.87535979941456e-06, "loss": 5.1306, "step": 82050 }, { "epoch": 0.16576639180339128, "grad_norm": 455.74365234375, "learning_rate": 9.875282333704665e-06, "loss": 38.2995, "step": 82060 }, { "epoch": 0.1657865924360751, "grad_norm": 678.8593139648438, "learning_rate": 9.875204844233231e-06, "loss": 34.142, "step": 82070 }, { "epoch": 0.16580679306875892, "grad_norm": 179.88424682617188, "learning_rate": 9.875127331000642e-06, "loss": 20.9054, "step": 82080 }, { "epoch": 0.16582699370144272, "grad_norm": 832.3750610351562, "learning_rate": 9.875049794007274e-06, "loss": 48.829, "step": 82090 }, { "epoch": 0.16584719433412654, "grad_norm": 434.0812072753906, "learning_rate": 9.874972233253503e-06, "loss": 27.954, "step": 82100 }, { "epoch": 0.16586739496681036, "grad_norm": 407.52557373046875, "learning_rate": 9.87489464873971e-06, "loss": 7.0023, "step": 82110 }, { "epoch": 0.16588759559949418, "grad_norm": 364.2360534667969, "learning_rate": 9.874817040466271e-06, "loss": 31.6499, "step": 82120 }, { "epoch": 0.165907796232178, "grad_norm": 694.1449584960938, "learning_rate": 9.874739408433565e-06, "loss": 23.6916, "step": 82130 }, { "epoch": 0.16592799686486182, "grad_norm": 261.3028869628906, "learning_rate": 9.87466175264197e-06, "loss": 34.8582, "step": 82140 }, { "epoch": 0.16594819749754564, "grad_norm": 430.9696044921875, "learning_rate": 9.874584073091867e-06, "loss": 30.5337, "step": 82150 }, { "epoch": 0.16596839813022943, "grad_norm": 384.80712890625, "learning_rate": 9.874506369783629e-06, "loss": 17.2107, "step": 82160 }, { "epoch": 0.16598859876291325, "grad_norm": 303.382568359375, "learning_rate": 9.874428642717641e-06, "loss": 26.3623, "step": 82170 }, { "epoch": 0.16600879939559707, "grad_norm": 392.7908630371094, "learning_rate": 9.874350891894278e-06, "loss": 28.234, "step": 82180 }, { "epoch": 0.1660290000282809, "grad_norm": 172.56654357910156, "learning_rate": 9.87427311731392e-06, "loss": 23.7759, "step": 82190 }, { "epoch": 0.1660492006609647, "grad_norm": 345.6696472167969, "learning_rate": 9.874195318976945e-06, "loss": 19.3664, "step": 82200 }, { "epoch": 0.16606940129364853, "grad_norm": 517.3274536132812, "learning_rate": 9.874117496883734e-06, "loss": 21.3027, "step": 82210 }, { "epoch": 0.16608960192633232, "grad_norm": 373.7959289550781, "learning_rate": 9.874039651034665e-06, "loss": 8.4066, "step": 82220 }, { "epoch": 0.16610980255901614, "grad_norm": 1242.808349609375, "learning_rate": 9.873961781430119e-06, "loss": 41.4418, "step": 82230 }, { "epoch": 0.16613000319169996, "grad_norm": 491.39044189453125, "learning_rate": 9.873883888070474e-06, "loss": 38.7835, "step": 82240 }, { "epoch": 0.16615020382438378, "grad_norm": 216.55055236816406, "learning_rate": 9.87380597095611e-06, "loss": 10.1805, "step": 82250 }, { "epoch": 0.1661704044570676, "grad_norm": 617.9553833007812, "learning_rate": 9.873728030087406e-06, "loss": 18.4007, "step": 82260 }, { "epoch": 0.16619060508975142, "grad_norm": 355.6474304199219, "learning_rate": 9.873650065464744e-06, "loss": 15.7755, "step": 82270 }, { "epoch": 0.16621080572243524, "grad_norm": 379.7162170410156, "learning_rate": 9.873572077088502e-06, "loss": 22.0875, "step": 82280 }, { "epoch": 0.16623100635511903, "grad_norm": 303.56890869140625, "learning_rate": 9.87349406495906e-06, "loss": 28.2835, "step": 82290 }, { "epoch": 0.16625120698780285, "grad_norm": 4.648748397827148, "learning_rate": 9.873416029076801e-06, "loss": 28.6749, "step": 82300 }, { "epoch": 0.16627140762048667, "grad_norm": 34.346946716308594, "learning_rate": 9.873337969442102e-06, "loss": 28.4306, "step": 82310 }, { "epoch": 0.1662916082531705, "grad_norm": 302.66461181640625, "learning_rate": 9.873259886055344e-06, "loss": 31.9182, "step": 82320 }, { "epoch": 0.1663118088858543, "grad_norm": 184.1270294189453, "learning_rate": 9.873181778916911e-06, "loss": 23.1773, "step": 82330 }, { "epoch": 0.16633200951853813, "grad_norm": 417.1488037109375, "learning_rate": 9.873103648027178e-06, "loss": 16.6926, "step": 82340 }, { "epoch": 0.16635221015122192, "grad_norm": 770.2700805664062, "learning_rate": 9.873025493386531e-06, "loss": 28.0675, "step": 82350 }, { "epoch": 0.16637241078390574, "grad_norm": 227.6401824951172, "learning_rate": 9.872947314995348e-06, "loss": 39.1034, "step": 82360 }, { "epoch": 0.16639261141658956, "grad_norm": 410.4468078613281, "learning_rate": 9.872869112854011e-06, "loss": 18.9919, "step": 82370 }, { "epoch": 0.16641281204927338, "grad_norm": 435.20819091796875, "learning_rate": 9.872790886962901e-06, "loss": 28.2852, "step": 82380 }, { "epoch": 0.1664330126819572, "grad_norm": 421.23089599609375, "learning_rate": 9.8727126373224e-06, "loss": 32.7884, "step": 82390 }, { "epoch": 0.16645321331464102, "grad_norm": 1861.697265625, "learning_rate": 9.872634363932887e-06, "loss": 36.1657, "step": 82400 }, { "epoch": 0.16647341394732482, "grad_norm": 545.474609375, "learning_rate": 9.872556066794745e-06, "loss": 38.8971, "step": 82410 }, { "epoch": 0.16649361458000864, "grad_norm": 429.2677001953125, "learning_rate": 9.872477745908356e-06, "loss": 25.0905, "step": 82420 }, { "epoch": 0.16651381521269246, "grad_norm": 464.4039306640625, "learning_rate": 9.872399401274103e-06, "loss": 19.8081, "step": 82430 }, { "epoch": 0.16653401584537628, "grad_norm": 215.3997344970703, "learning_rate": 9.872321032892364e-06, "loss": 27.133, "step": 82440 }, { "epoch": 0.1665542164780601, "grad_norm": 328.7926330566406, "learning_rate": 9.872242640763525e-06, "loss": 18.479, "step": 82450 }, { "epoch": 0.16657441711074392, "grad_norm": 259.94964599609375, "learning_rate": 9.872164224887966e-06, "loss": 19.8865, "step": 82460 }, { "epoch": 0.16659461774342774, "grad_norm": 429.4085388183594, "learning_rate": 9.872085785266069e-06, "loss": 28.6425, "step": 82470 }, { "epoch": 0.16661481837611153, "grad_norm": 427.0818786621094, "learning_rate": 9.872007321898218e-06, "loss": 21.0934, "step": 82480 }, { "epoch": 0.16663501900879535, "grad_norm": 203.4080810546875, "learning_rate": 9.871928834784793e-06, "loss": 26.3094, "step": 82490 }, { "epoch": 0.16665521964147917, "grad_norm": 421.71295166015625, "learning_rate": 9.871850323926178e-06, "loss": 20.1551, "step": 82500 }, { "epoch": 0.166675420274163, "grad_norm": 173.3473663330078, "learning_rate": 9.871771789322754e-06, "loss": 27.9404, "step": 82510 }, { "epoch": 0.1666956209068468, "grad_norm": 32.80507278442383, "learning_rate": 9.871693230974907e-06, "loss": 19.7537, "step": 82520 }, { "epoch": 0.16671582153953063, "grad_norm": 810.5501708984375, "learning_rate": 9.871614648883017e-06, "loss": 14.9206, "step": 82530 }, { "epoch": 0.16673602217221442, "grad_norm": 326.1086120605469, "learning_rate": 9.87153604304747e-06, "loss": 28.2606, "step": 82540 }, { "epoch": 0.16675622280489824, "grad_norm": 569.2630004882812, "learning_rate": 9.871457413468645e-06, "loss": 81.8506, "step": 82550 }, { "epoch": 0.16677642343758206, "grad_norm": 306.8781433105469, "learning_rate": 9.871378760146928e-06, "loss": 36.3098, "step": 82560 }, { "epoch": 0.16679662407026588, "grad_norm": 834.5847778320312, "learning_rate": 9.871300083082702e-06, "loss": 31.8916, "step": 82570 }, { "epoch": 0.1668168247029497, "grad_norm": 424.6417541503906, "learning_rate": 9.87122138227635e-06, "loss": 24.0622, "step": 82580 }, { "epoch": 0.16683702533563352, "grad_norm": 218.0953826904297, "learning_rate": 9.871142657728257e-06, "loss": 34.0845, "step": 82590 }, { "epoch": 0.16685722596831734, "grad_norm": 509.2384338378906, "learning_rate": 9.871063909438803e-06, "loss": 32.4494, "step": 82600 }, { "epoch": 0.16687742660100113, "grad_norm": 303.0262756347656, "learning_rate": 9.870985137408375e-06, "loss": 20.673, "step": 82610 }, { "epoch": 0.16689762723368495, "grad_norm": 487.12994384765625, "learning_rate": 9.870906341637358e-06, "loss": 23.9176, "step": 82620 }, { "epoch": 0.16691782786636877, "grad_norm": 731.3406982421875, "learning_rate": 9.870827522126134e-06, "loss": 29.0172, "step": 82630 }, { "epoch": 0.1669380284990526, "grad_norm": 364.0166320800781, "learning_rate": 9.870748678875086e-06, "loss": 18.3425, "step": 82640 }, { "epoch": 0.1669582291317364, "grad_norm": 671.7162475585938, "learning_rate": 9.8706698118846e-06, "loss": 28.2177, "step": 82650 }, { "epoch": 0.16697842976442023, "grad_norm": 827.7516479492188, "learning_rate": 9.870590921155062e-06, "loss": 26.0711, "step": 82660 }, { "epoch": 0.16699863039710403, "grad_norm": 1016.3685913085938, "learning_rate": 9.870512006686852e-06, "loss": 32.4713, "step": 82670 }, { "epoch": 0.16701883102978785, "grad_norm": 374.1891784667969, "learning_rate": 9.870433068480359e-06, "loss": 26.4627, "step": 82680 }, { "epoch": 0.16703903166247167, "grad_norm": 416.0177917480469, "learning_rate": 9.870354106535964e-06, "loss": 26.2241, "step": 82690 }, { "epoch": 0.16705923229515549, "grad_norm": 863.1328735351562, "learning_rate": 9.870275120854055e-06, "loss": 25.0358, "step": 82700 }, { "epoch": 0.1670794329278393, "grad_norm": 165.13653564453125, "learning_rate": 9.870196111435016e-06, "loss": 23.1991, "step": 82710 }, { "epoch": 0.16709963356052313, "grad_norm": 409.42364501953125, "learning_rate": 9.870117078279231e-06, "loss": 48.5756, "step": 82720 }, { "epoch": 0.16711983419320692, "grad_norm": 535.4275512695312, "learning_rate": 9.870038021387087e-06, "loss": 24.4176, "step": 82730 }, { "epoch": 0.16714003482589074, "grad_norm": 231.93536376953125, "learning_rate": 9.869958940758968e-06, "loss": 20.2856, "step": 82740 }, { "epoch": 0.16716023545857456, "grad_norm": 324.0728759765625, "learning_rate": 9.86987983639526e-06, "loss": 24.347, "step": 82750 }, { "epoch": 0.16718043609125838, "grad_norm": 577.8886108398438, "learning_rate": 9.869800708296347e-06, "loss": 24.2615, "step": 82760 }, { "epoch": 0.1672006367239422, "grad_norm": 141.15060424804688, "learning_rate": 9.869721556462617e-06, "loss": 37.6809, "step": 82770 }, { "epoch": 0.16722083735662602, "grad_norm": 227.20361328125, "learning_rate": 9.869642380894454e-06, "loss": 30.4997, "step": 82780 }, { "epoch": 0.16724103798930984, "grad_norm": 1045.2889404296875, "learning_rate": 9.869563181592246e-06, "loss": 29.1216, "step": 82790 }, { "epoch": 0.16726123862199363, "grad_norm": 494.2144775390625, "learning_rate": 9.869483958556376e-06, "loss": 24.3889, "step": 82800 }, { "epoch": 0.16728143925467745, "grad_norm": 361.6865539550781, "learning_rate": 9.869404711787234e-06, "loss": 20.2709, "step": 82810 }, { "epoch": 0.16730163988736127, "grad_norm": 480.38104248046875, "learning_rate": 9.869325441285203e-06, "loss": 24.9362, "step": 82820 }, { "epoch": 0.1673218405200451, "grad_norm": 289.5055236816406, "learning_rate": 9.869246147050669e-06, "loss": 28.7482, "step": 82830 }, { "epoch": 0.1673420411527289, "grad_norm": 259.9599914550781, "learning_rate": 9.869166829084023e-06, "loss": 23.9636, "step": 82840 }, { "epoch": 0.16736224178541273, "grad_norm": 172.7774200439453, "learning_rate": 9.869087487385644e-06, "loss": 26.7328, "step": 82850 }, { "epoch": 0.16738244241809652, "grad_norm": 15.006548881530762, "learning_rate": 9.869008121955928e-06, "loss": 18.947, "step": 82860 }, { "epoch": 0.16740264305078034, "grad_norm": 1037.4061279296875, "learning_rate": 9.868928732795253e-06, "loss": 22.4189, "step": 82870 }, { "epoch": 0.16742284368346416, "grad_norm": 732.235595703125, "learning_rate": 9.868849319904012e-06, "loss": 31.7228, "step": 82880 }, { "epoch": 0.16744304431614798, "grad_norm": 758.1985473632812, "learning_rate": 9.86876988328259e-06, "loss": 18.0874, "step": 82890 }, { "epoch": 0.1674632449488318, "grad_norm": 448.0339660644531, "learning_rate": 9.868690422931372e-06, "loss": 40.1267, "step": 82900 }, { "epoch": 0.16748344558151562, "grad_norm": 440.24273681640625, "learning_rate": 9.86861093885075e-06, "loss": 24.2678, "step": 82910 }, { "epoch": 0.16750364621419944, "grad_norm": 140.989990234375, "learning_rate": 9.868531431041108e-06, "loss": 19.4625, "step": 82920 }, { "epoch": 0.16752384684688323, "grad_norm": 434.0629577636719, "learning_rate": 9.868451899502833e-06, "loss": 21.7049, "step": 82930 }, { "epoch": 0.16754404747956705, "grad_norm": 254.76785278320312, "learning_rate": 9.868372344236314e-06, "loss": 58.1585, "step": 82940 }, { "epoch": 0.16756424811225087, "grad_norm": 459.30084228515625, "learning_rate": 9.86829276524194e-06, "loss": 30.7155, "step": 82950 }, { "epoch": 0.1675844487449347, "grad_norm": 901.2047729492188, "learning_rate": 9.868213162520097e-06, "loss": 29.0213, "step": 82960 }, { "epoch": 0.16760464937761851, "grad_norm": 219.8902587890625, "learning_rate": 9.868133536071174e-06, "loss": 19.2086, "step": 82970 }, { "epoch": 0.16762485001030233, "grad_norm": 344.43682861328125, "learning_rate": 9.868053885895559e-06, "loss": 22.8623, "step": 82980 }, { "epoch": 0.16764505064298613, "grad_norm": 97.78935241699219, "learning_rate": 9.867974211993639e-06, "loss": 24.2089, "step": 82990 }, { "epoch": 0.16766525127566995, "grad_norm": 214.8300323486328, "learning_rate": 9.867894514365802e-06, "loss": 20.0944, "step": 83000 }, { "epoch": 0.16768545190835377, "grad_norm": 73.76973724365234, "learning_rate": 9.867814793012437e-06, "loss": 15.5302, "step": 83010 }, { "epoch": 0.1677056525410376, "grad_norm": 976.6002197265625, "learning_rate": 9.867735047933936e-06, "loss": 24.5506, "step": 83020 }, { "epoch": 0.1677258531737214, "grad_norm": 73.62681579589844, "learning_rate": 9.867655279130684e-06, "loss": 23.3989, "step": 83030 }, { "epoch": 0.16774605380640523, "grad_norm": 818.6986083984375, "learning_rate": 9.86757548660307e-06, "loss": 18.849, "step": 83040 }, { "epoch": 0.16776625443908902, "grad_norm": 716.3596801757812, "learning_rate": 9.867495670351483e-06, "loss": 35.8794, "step": 83050 }, { "epoch": 0.16778645507177284, "grad_norm": 481.1494140625, "learning_rate": 9.867415830376313e-06, "loss": 22.0284, "step": 83060 }, { "epoch": 0.16780665570445666, "grad_norm": 236.226806640625, "learning_rate": 9.867335966677949e-06, "loss": 28.4822, "step": 83070 }, { "epoch": 0.16782685633714048, "grad_norm": 537.2672729492188, "learning_rate": 9.867256079256779e-06, "loss": 22.5255, "step": 83080 }, { "epoch": 0.1678470569698243, "grad_norm": 465.3406066894531, "learning_rate": 9.867176168113193e-06, "loss": 31.2277, "step": 83090 }, { "epoch": 0.16786725760250812, "grad_norm": 162.4087371826172, "learning_rate": 9.867096233247581e-06, "loss": 27.0187, "step": 83100 }, { "epoch": 0.16788745823519194, "grad_norm": 204.7646026611328, "learning_rate": 9.867016274660333e-06, "loss": 26.6123, "step": 83110 }, { "epoch": 0.16790765886787573, "grad_norm": 476.49517822265625, "learning_rate": 9.866936292351837e-06, "loss": 33.0041, "step": 83120 }, { "epoch": 0.16792785950055955, "grad_norm": 310.9793701171875, "learning_rate": 9.866856286322484e-06, "loss": 13.6093, "step": 83130 }, { "epoch": 0.16794806013324337, "grad_norm": 1082.086181640625, "learning_rate": 9.866776256572662e-06, "loss": 47.0691, "step": 83140 }, { "epoch": 0.1679682607659272, "grad_norm": 771.9020385742188, "learning_rate": 9.866696203102765e-06, "loss": 32.4807, "step": 83150 }, { "epoch": 0.167988461398611, "grad_norm": 454.5968017578125, "learning_rate": 9.866616125913182e-06, "loss": 25.2858, "step": 83160 }, { "epoch": 0.16800866203129483, "grad_norm": 537.4826049804688, "learning_rate": 9.8665360250043e-06, "loss": 21.0879, "step": 83170 }, { "epoch": 0.16802886266397862, "grad_norm": 755.97802734375, "learning_rate": 9.866455900376514e-06, "loss": 30.0066, "step": 83180 }, { "epoch": 0.16804906329666244, "grad_norm": 447.1153869628906, "learning_rate": 9.86637575203021e-06, "loss": 21.2045, "step": 83190 }, { "epoch": 0.16806926392934626, "grad_norm": 423.9645080566406, "learning_rate": 9.866295579965782e-06, "loss": 26.5368, "step": 83200 }, { "epoch": 0.16808946456203008, "grad_norm": 630.846923828125, "learning_rate": 9.86621538418362e-06, "loss": 37.2785, "step": 83210 }, { "epoch": 0.1681096651947139, "grad_norm": 443.00054931640625, "learning_rate": 9.866135164684112e-06, "loss": 32.5187, "step": 83220 }, { "epoch": 0.16812986582739772, "grad_norm": 267.3347473144531, "learning_rate": 9.866054921467654e-06, "loss": 27.728, "step": 83230 }, { "epoch": 0.16815006646008154, "grad_norm": 499.41632080078125, "learning_rate": 9.865974654534634e-06, "loss": 22.1323, "step": 83240 }, { "epoch": 0.16817026709276534, "grad_norm": 564.2894897460938, "learning_rate": 9.865894363885442e-06, "loss": 40.6049, "step": 83250 }, { "epoch": 0.16819046772544916, "grad_norm": 87.25096893310547, "learning_rate": 9.865814049520473e-06, "loss": 14.6281, "step": 83260 }, { "epoch": 0.16821066835813298, "grad_norm": 268.3941345214844, "learning_rate": 9.865733711440116e-06, "loss": 16.5669, "step": 83270 }, { "epoch": 0.1682308689908168, "grad_norm": 258.07379150390625, "learning_rate": 9.865653349644761e-06, "loss": 17.1536, "step": 83280 }, { "epoch": 0.16825106962350062, "grad_norm": 564.84912109375, "learning_rate": 9.865572964134804e-06, "loss": 38.7723, "step": 83290 }, { "epoch": 0.16827127025618444, "grad_norm": 198.5872802734375, "learning_rate": 9.865492554910634e-06, "loss": 11.8511, "step": 83300 }, { "epoch": 0.16829147088886823, "grad_norm": 267.9917297363281, "learning_rate": 9.865412121972643e-06, "loss": 18.4249, "step": 83310 }, { "epoch": 0.16831167152155205, "grad_norm": 197.87791442871094, "learning_rate": 9.865331665321222e-06, "loss": 25.7429, "step": 83320 }, { "epoch": 0.16833187215423587, "grad_norm": 877.0123901367188, "learning_rate": 9.865251184956767e-06, "loss": 31.5932, "step": 83330 }, { "epoch": 0.1683520727869197, "grad_norm": 295.51324462890625, "learning_rate": 9.865170680879667e-06, "loss": 26.722, "step": 83340 }, { "epoch": 0.1683722734196035, "grad_norm": 13.86534309387207, "learning_rate": 9.865090153090315e-06, "loss": 30.3432, "step": 83350 }, { "epoch": 0.16839247405228733, "grad_norm": 317.31341552734375, "learning_rate": 9.865009601589105e-06, "loss": 27.8171, "step": 83360 }, { "epoch": 0.16841267468497112, "grad_norm": 425.7890930175781, "learning_rate": 9.864929026376427e-06, "loss": 25.6833, "step": 83370 }, { "epoch": 0.16843287531765494, "grad_norm": 769.4445190429688, "learning_rate": 9.864848427452675e-06, "loss": 28.4851, "step": 83380 }, { "epoch": 0.16845307595033876, "grad_norm": 336.91571044921875, "learning_rate": 9.864767804818242e-06, "loss": 16.6119, "step": 83390 }, { "epoch": 0.16847327658302258, "grad_norm": 524.8754272460938, "learning_rate": 9.86468715847352e-06, "loss": 22.7018, "step": 83400 }, { "epoch": 0.1684934772157064, "grad_norm": 401.87353515625, "learning_rate": 9.864606488418905e-06, "loss": 39.302, "step": 83410 }, { "epoch": 0.16851367784839022, "grad_norm": 365.1914367675781, "learning_rate": 9.864525794654786e-06, "loss": 19.2479, "step": 83420 }, { "epoch": 0.16853387848107404, "grad_norm": 745.577880859375, "learning_rate": 9.864445077181559e-06, "loss": 38.7108, "step": 83430 }, { "epoch": 0.16855407911375783, "grad_norm": 112.34915924072266, "learning_rate": 9.864364335999615e-06, "loss": 23.4639, "step": 83440 }, { "epoch": 0.16857427974644165, "grad_norm": 464.5140075683594, "learning_rate": 9.864283571109352e-06, "loss": 35.8791, "step": 83450 }, { "epoch": 0.16859448037912547, "grad_norm": 132.9011688232422, "learning_rate": 9.864202782511158e-06, "loss": 40.9821, "step": 83460 }, { "epoch": 0.1686146810118093, "grad_norm": 201.52503967285156, "learning_rate": 9.864121970205431e-06, "loss": 25.3062, "step": 83470 }, { "epoch": 0.1686348816444931, "grad_norm": 233.94447326660156, "learning_rate": 9.864041134192563e-06, "loss": 13.6422, "step": 83480 }, { "epoch": 0.16865508227717693, "grad_norm": 336.04986572265625, "learning_rate": 9.86396027447295e-06, "loss": 31.3297, "step": 83490 }, { "epoch": 0.16867528290986072, "grad_norm": 519.1437377929688, "learning_rate": 9.863879391046985e-06, "loss": 18.7927, "step": 83500 }, { "epoch": 0.16869548354254454, "grad_norm": 271.48291015625, "learning_rate": 9.863798483915059e-06, "loss": 24.2579, "step": 83510 }, { "epoch": 0.16871568417522836, "grad_norm": 389.02093505859375, "learning_rate": 9.86371755307757e-06, "loss": 29.0493, "step": 83520 }, { "epoch": 0.16873588480791218, "grad_norm": 696.2223510742188, "learning_rate": 9.863636598534912e-06, "loss": 24.1301, "step": 83530 }, { "epoch": 0.168756085440596, "grad_norm": 234.34986877441406, "learning_rate": 9.863555620287479e-06, "loss": 14.363, "step": 83540 }, { "epoch": 0.16877628607327982, "grad_norm": 279.8359680175781, "learning_rate": 9.863474618335666e-06, "loss": 21.0213, "step": 83550 }, { "epoch": 0.16879648670596364, "grad_norm": 299.2847595214844, "learning_rate": 9.863393592679867e-06, "loss": 30.3096, "step": 83560 }, { "epoch": 0.16881668733864744, "grad_norm": 665.0737915039062, "learning_rate": 9.863312543320479e-06, "loss": 19.6344, "step": 83570 }, { "epoch": 0.16883688797133126, "grad_norm": 608.3574829101562, "learning_rate": 9.863231470257893e-06, "loss": 17.4846, "step": 83580 }, { "epoch": 0.16885708860401508, "grad_norm": 632.60400390625, "learning_rate": 9.863150373492509e-06, "loss": 18.2631, "step": 83590 }, { "epoch": 0.1688772892366989, "grad_norm": 88.76485443115234, "learning_rate": 9.863069253024719e-06, "loss": 26.1825, "step": 83600 }, { "epoch": 0.16889748986938272, "grad_norm": 438.93951416015625, "learning_rate": 9.862988108854919e-06, "loss": 17.7443, "step": 83610 }, { "epoch": 0.16891769050206654, "grad_norm": 496.48602294921875, "learning_rate": 9.862906940983505e-06, "loss": 33.4956, "step": 83620 }, { "epoch": 0.16893789113475033, "grad_norm": 391.3303527832031, "learning_rate": 9.862825749410872e-06, "loss": 21.2826, "step": 83630 }, { "epoch": 0.16895809176743415, "grad_norm": 329.953369140625, "learning_rate": 9.862744534137416e-06, "loss": 14.2922, "step": 83640 }, { "epoch": 0.16897829240011797, "grad_norm": 408.1158752441406, "learning_rate": 9.862663295163533e-06, "loss": 35.4676, "step": 83650 }, { "epoch": 0.1689984930328018, "grad_norm": 244.37925720214844, "learning_rate": 9.862582032489621e-06, "loss": 15.8363, "step": 83660 }, { "epoch": 0.1690186936654856, "grad_norm": 428.01556396484375, "learning_rate": 9.86250074611607e-06, "loss": 15.2981, "step": 83670 }, { "epoch": 0.16903889429816943, "grad_norm": 282.2440185546875, "learning_rate": 9.862419436043284e-06, "loss": 40.3479, "step": 83680 }, { "epoch": 0.16905909493085322, "grad_norm": 425.94366455078125, "learning_rate": 9.862338102271654e-06, "loss": 18.5903, "step": 83690 }, { "epoch": 0.16907929556353704, "grad_norm": 337.7959899902344, "learning_rate": 9.862256744801576e-06, "loss": 19.8282, "step": 83700 }, { "epoch": 0.16909949619622086, "grad_norm": 844.557373046875, "learning_rate": 9.86217536363345e-06, "loss": 29.688, "step": 83710 }, { "epoch": 0.16911969682890468, "grad_norm": 194.49513244628906, "learning_rate": 9.862093958767671e-06, "loss": 36.4832, "step": 83720 }, { "epoch": 0.1691398974615885, "grad_norm": 537.4203491210938, "learning_rate": 9.862012530204636e-06, "loss": 26.5721, "step": 83730 }, { "epoch": 0.16916009809427232, "grad_norm": 731.2578125, "learning_rate": 9.86193107794474e-06, "loss": 17.8927, "step": 83740 }, { "epoch": 0.16918029872695614, "grad_norm": 87.39258575439453, "learning_rate": 9.861849601988384e-06, "loss": 14.7109, "step": 83750 }, { "epoch": 0.16920049935963993, "grad_norm": 188.87950134277344, "learning_rate": 9.861768102335961e-06, "loss": 34.3287, "step": 83760 }, { "epoch": 0.16922069999232375, "grad_norm": 165.73243713378906, "learning_rate": 9.861686578987871e-06, "loss": 33.7244, "step": 83770 }, { "epoch": 0.16924090062500757, "grad_norm": 314.24139404296875, "learning_rate": 9.86160503194451e-06, "loss": 13.688, "step": 83780 }, { "epoch": 0.1692611012576914, "grad_norm": 441.1898193359375, "learning_rate": 9.861523461206275e-06, "loss": 21.0881, "step": 83790 }, { "epoch": 0.1692813018903752, "grad_norm": 375.0849914550781, "learning_rate": 9.861441866773564e-06, "loss": 29.8344, "step": 83800 }, { "epoch": 0.16930150252305903, "grad_norm": 280.3564147949219, "learning_rate": 9.861360248646777e-06, "loss": 30.6994, "step": 83810 }, { "epoch": 0.16932170315574283, "grad_norm": 456.1604919433594, "learning_rate": 9.861278606826307e-06, "loss": 44.3701, "step": 83820 }, { "epoch": 0.16934190378842665, "grad_norm": 213.81643676757812, "learning_rate": 9.861196941312556e-06, "loss": 16.4747, "step": 83830 }, { "epoch": 0.16936210442111047, "grad_norm": 500.5413818359375, "learning_rate": 9.861115252105922e-06, "loss": 25.5642, "step": 83840 }, { "epoch": 0.16938230505379429, "grad_norm": 475.6985778808594, "learning_rate": 9.8610335392068e-06, "loss": 19.0535, "step": 83850 }, { "epoch": 0.1694025056864781, "grad_norm": 351.562744140625, "learning_rate": 9.86095180261559e-06, "loss": 24.703, "step": 83860 }, { "epoch": 0.16942270631916193, "grad_norm": 373.62286376953125, "learning_rate": 9.860870042332693e-06, "loss": 17.4584, "step": 83870 }, { "epoch": 0.16944290695184575, "grad_norm": 230.38662719726562, "learning_rate": 9.860788258358503e-06, "loss": 23.5072, "step": 83880 }, { "epoch": 0.16946310758452954, "grad_norm": 180.81906127929688, "learning_rate": 9.86070645069342e-06, "loss": 17.0741, "step": 83890 }, { "epoch": 0.16948330821721336, "grad_norm": 378.0179748535156, "learning_rate": 9.860624619337844e-06, "loss": 56.2685, "step": 83900 }, { "epoch": 0.16950350884989718, "grad_norm": 320.8547668457031, "learning_rate": 9.860542764292173e-06, "loss": 19.7221, "step": 83910 }, { "epoch": 0.169523709482581, "grad_norm": 245.2682647705078, "learning_rate": 9.860460885556806e-06, "loss": 22.3827, "step": 83920 }, { "epoch": 0.16954391011526482, "grad_norm": 551.7291870117188, "learning_rate": 9.860378983132144e-06, "loss": 19.6086, "step": 83930 }, { "epoch": 0.16956411074794864, "grad_norm": 2057.81494140625, "learning_rate": 9.860297057018581e-06, "loss": 44.4755, "step": 83940 }, { "epoch": 0.16958431138063243, "grad_norm": 314.1662902832031, "learning_rate": 9.860215107216523e-06, "loss": 33.9747, "step": 83950 }, { "epoch": 0.16960451201331625, "grad_norm": 595.001953125, "learning_rate": 9.860133133726364e-06, "loss": 18.0649, "step": 83960 }, { "epoch": 0.16962471264600007, "grad_norm": 651.8950805664062, "learning_rate": 9.860051136548506e-06, "loss": 25.674, "step": 83970 }, { "epoch": 0.1696449132786839, "grad_norm": 7985.6484375, "learning_rate": 9.859969115683348e-06, "loss": 54.6429, "step": 83980 }, { "epoch": 0.1696651139113677, "grad_norm": 529.5379638671875, "learning_rate": 9.85988707113129e-06, "loss": 20.52, "step": 83990 }, { "epoch": 0.16968531454405153, "grad_norm": 249.68833923339844, "learning_rate": 9.859805002892733e-06, "loss": 16.4941, "step": 84000 }, { "epoch": 0.16970551517673532, "grad_norm": 438.7927551269531, "learning_rate": 9.859722910968073e-06, "loss": 22.7703, "step": 84010 }, { "epoch": 0.16972571580941914, "grad_norm": 330.3010559082031, "learning_rate": 9.859640795357716e-06, "loss": 17.3347, "step": 84020 }, { "epoch": 0.16974591644210296, "grad_norm": 682.3013305664062, "learning_rate": 9.859558656062057e-06, "loss": 19.3317, "step": 84030 }, { "epoch": 0.16976611707478678, "grad_norm": 76.0897445678711, "learning_rate": 9.8594764930815e-06, "loss": 11.554, "step": 84040 }, { "epoch": 0.1697863177074706, "grad_norm": 654.0238647460938, "learning_rate": 9.859394306416443e-06, "loss": 15.4549, "step": 84050 }, { "epoch": 0.16980651834015442, "grad_norm": 983.010009765625, "learning_rate": 9.859312096067289e-06, "loss": 25.9158, "step": 84060 }, { "epoch": 0.16982671897283824, "grad_norm": 451.46307373046875, "learning_rate": 9.859229862034436e-06, "loss": 22.6454, "step": 84070 }, { "epoch": 0.16984691960552203, "grad_norm": 1029.73291015625, "learning_rate": 9.859147604318286e-06, "loss": 21.3787, "step": 84080 }, { "epoch": 0.16986712023820585, "grad_norm": 1070.5137939453125, "learning_rate": 9.859065322919239e-06, "loss": 20.8518, "step": 84090 }, { "epoch": 0.16988732087088967, "grad_norm": 217.96324157714844, "learning_rate": 9.8589830178377e-06, "loss": 25.6919, "step": 84100 }, { "epoch": 0.1699075215035735, "grad_norm": 310.8514404296875, "learning_rate": 9.858900689074065e-06, "loss": 18.1398, "step": 84110 }, { "epoch": 0.16992772213625731, "grad_norm": 168.74844360351562, "learning_rate": 9.858818336628737e-06, "loss": 15.2742, "step": 84120 }, { "epoch": 0.16994792276894113, "grad_norm": 485.7269592285156, "learning_rate": 9.858735960502118e-06, "loss": 27.0941, "step": 84130 }, { "epoch": 0.16996812340162493, "grad_norm": 488.62548828125, "learning_rate": 9.858653560694609e-06, "loss": 18.2037, "step": 84140 }, { "epoch": 0.16998832403430875, "grad_norm": 385.607421875, "learning_rate": 9.858571137206611e-06, "loss": 23.4072, "step": 84150 }, { "epoch": 0.17000852466699257, "grad_norm": 844.4804077148438, "learning_rate": 9.858488690038529e-06, "loss": 25.7212, "step": 84160 }, { "epoch": 0.1700287252996764, "grad_norm": 77.7420425415039, "learning_rate": 9.858406219190761e-06, "loss": 21.6422, "step": 84170 }, { "epoch": 0.1700489259323602, "grad_norm": 814.97509765625, "learning_rate": 9.858323724663712e-06, "loss": 35.8841, "step": 84180 }, { "epoch": 0.17006912656504403, "grad_norm": 508.8529357910156, "learning_rate": 9.85824120645778e-06, "loss": 21.9199, "step": 84190 }, { "epoch": 0.17008932719772785, "grad_norm": 136.87261962890625, "learning_rate": 9.85815866457337e-06, "loss": 18.0118, "step": 84200 }, { "epoch": 0.17010952783041164, "grad_norm": 156.2366180419922, "learning_rate": 9.858076099010885e-06, "loss": 18.8808, "step": 84210 }, { "epoch": 0.17012972846309546, "grad_norm": 429.79193115234375, "learning_rate": 9.857993509770725e-06, "loss": 12.0462, "step": 84220 }, { "epoch": 0.17014992909577928, "grad_norm": 380.55255126953125, "learning_rate": 9.857910896853296e-06, "loss": 26.2462, "step": 84230 }, { "epoch": 0.1701701297284631, "grad_norm": 642.4622802734375, "learning_rate": 9.857828260258997e-06, "loss": 26.4779, "step": 84240 }, { "epoch": 0.17019033036114692, "grad_norm": 443.7367858886719, "learning_rate": 9.857745599988231e-06, "loss": 23.3827, "step": 84250 }, { "epoch": 0.17021053099383074, "grad_norm": 489.1470947265625, "learning_rate": 9.857662916041404e-06, "loss": 25.824, "step": 84260 }, { "epoch": 0.17023073162651453, "grad_norm": 832.2505493164062, "learning_rate": 9.857580208418917e-06, "loss": 21.9109, "step": 84270 }, { "epoch": 0.17025093225919835, "grad_norm": 132.86337280273438, "learning_rate": 9.857497477121172e-06, "loss": 21.6228, "step": 84280 }, { "epoch": 0.17027113289188217, "grad_norm": 149.47938537597656, "learning_rate": 9.857414722148574e-06, "loss": 28.1589, "step": 84290 }, { "epoch": 0.170291333524566, "grad_norm": 294.6853942871094, "learning_rate": 9.857331943501527e-06, "loss": 20.025, "step": 84300 }, { "epoch": 0.1703115341572498, "grad_norm": 504.4954833984375, "learning_rate": 9.857249141180431e-06, "loss": 22.9865, "step": 84310 }, { "epoch": 0.17033173478993363, "grad_norm": 406.8207092285156, "learning_rate": 9.857166315185693e-06, "loss": 14.5563, "step": 84320 }, { "epoch": 0.17035193542261742, "grad_norm": 973.2086791992188, "learning_rate": 9.857083465517716e-06, "loss": 32.8805, "step": 84330 }, { "epoch": 0.17037213605530124, "grad_norm": 667.1934814453125, "learning_rate": 9.857000592176902e-06, "loss": 18.7556, "step": 84340 }, { "epoch": 0.17039233668798506, "grad_norm": 49.66745376586914, "learning_rate": 9.856917695163659e-06, "loss": 21.303, "step": 84350 }, { "epoch": 0.17041253732066888, "grad_norm": 467.7310791015625, "learning_rate": 9.856834774478385e-06, "loss": 31.2778, "step": 84360 }, { "epoch": 0.1704327379533527, "grad_norm": 1260.0576171875, "learning_rate": 9.85675183012149e-06, "loss": 35.1867, "step": 84370 }, { "epoch": 0.17045293858603652, "grad_norm": 439.5983581542969, "learning_rate": 9.856668862093372e-06, "loss": 32.415, "step": 84380 }, { "epoch": 0.17047313921872034, "grad_norm": 0.0, "learning_rate": 9.856585870394442e-06, "loss": 26.6695, "step": 84390 }, { "epoch": 0.17049333985140414, "grad_norm": 512.7556762695312, "learning_rate": 9.856502855025101e-06, "loss": 26.631, "step": 84400 }, { "epoch": 0.17051354048408796, "grad_norm": 551.2488403320312, "learning_rate": 9.856419815985754e-06, "loss": 29.4239, "step": 84410 }, { "epoch": 0.17053374111677178, "grad_norm": 490.7176818847656, "learning_rate": 9.856336753276804e-06, "loss": 32.1564, "step": 84420 }, { "epoch": 0.1705539417494556, "grad_norm": 487.49810791015625, "learning_rate": 9.85625366689866e-06, "loss": 31.1072, "step": 84430 }, { "epoch": 0.17057414238213942, "grad_norm": 600.7901000976562, "learning_rate": 9.856170556851725e-06, "loss": 28.5669, "step": 84440 }, { "epoch": 0.17059434301482324, "grad_norm": 98.74726104736328, "learning_rate": 9.856087423136403e-06, "loss": 15.7651, "step": 84450 }, { "epoch": 0.17061454364750703, "grad_norm": 922.817138671875, "learning_rate": 9.856004265753099e-06, "loss": 24.1547, "step": 84460 }, { "epoch": 0.17063474428019085, "grad_norm": 493.76019287109375, "learning_rate": 9.85592108470222e-06, "loss": 23.3009, "step": 84470 }, { "epoch": 0.17065494491287467, "grad_norm": 383.5968017578125, "learning_rate": 9.85583787998417e-06, "loss": 30.4717, "step": 84480 }, { "epoch": 0.1706751455455585, "grad_norm": 350.8125305175781, "learning_rate": 9.855754651599355e-06, "loss": 18.2192, "step": 84490 }, { "epoch": 0.1706953461782423, "grad_norm": 498.9764709472656, "learning_rate": 9.85567139954818e-06, "loss": 30.5408, "step": 84500 }, { "epoch": 0.17071554681092613, "grad_norm": 915.3928833007812, "learning_rate": 9.855588123831053e-06, "loss": 26.5895, "step": 84510 }, { "epoch": 0.17073574744360995, "grad_norm": 261.22625732421875, "learning_rate": 9.855504824448379e-06, "loss": 21.6851, "step": 84520 }, { "epoch": 0.17075594807629374, "grad_norm": 1028.3553466796875, "learning_rate": 9.855421501400562e-06, "loss": 32.9727, "step": 84530 }, { "epoch": 0.17077614870897756, "grad_norm": 632.3440551757812, "learning_rate": 9.85533815468801e-06, "loss": 42.8606, "step": 84540 }, { "epoch": 0.17079634934166138, "grad_norm": 410.7352294921875, "learning_rate": 9.85525478431113e-06, "loss": 35.3848, "step": 84550 }, { "epoch": 0.1708165499743452, "grad_norm": 219.40965270996094, "learning_rate": 9.855171390270325e-06, "loss": 16.5595, "step": 84560 }, { "epoch": 0.17083675060702902, "grad_norm": 847.7810668945312, "learning_rate": 9.855087972566004e-06, "loss": 24.7424, "step": 84570 }, { "epoch": 0.17085695123971284, "grad_norm": 189.41087341308594, "learning_rate": 9.855004531198573e-06, "loss": 19.8264, "step": 84580 }, { "epoch": 0.17087715187239663, "grad_norm": 429.26263427734375, "learning_rate": 9.854921066168439e-06, "loss": 32.3948, "step": 84590 }, { "epoch": 0.17089735250508045, "grad_norm": 938.6051025390625, "learning_rate": 9.854837577476008e-06, "loss": 48.2536, "step": 84600 }, { "epoch": 0.17091755313776427, "grad_norm": 858.9446411132812, "learning_rate": 9.854754065121689e-06, "loss": 27.1367, "step": 84610 }, { "epoch": 0.1709377537704481, "grad_norm": 395.283935546875, "learning_rate": 9.854670529105887e-06, "loss": 23.5078, "step": 84620 }, { "epoch": 0.1709579544031319, "grad_norm": 509.2507629394531, "learning_rate": 9.854586969429009e-06, "loss": 21.7217, "step": 84630 }, { "epoch": 0.17097815503581573, "grad_norm": 197.88796997070312, "learning_rate": 9.854503386091463e-06, "loss": 24.1366, "step": 84640 }, { "epoch": 0.17099835566849952, "grad_norm": 116.27680206298828, "learning_rate": 9.854419779093656e-06, "loss": 22.4091, "step": 84650 }, { "epoch": 0.17101855630118334, "grad_norm": 419.3030700683594, "learning_rate": 9.854336148435997e-06, "loss": 21.6782, "step": 84660 }, { "epoch": 0.17103875693386716, "grad_norm": 330.1683654785156, "learning_rate": 9.85425249411889e-06, "loss": 18.3274, "step": 84670 }, { "epoch": 0.17105895756655098, "grad_norm": 273.65057373046875, "learning_rate": 9.854168816142747e-06, "loss": 31.5305, "step": 84680 }, { "epoch": 0.1710791581992348, "grad_norm": 835.4115600585938, "learning_rate": 9.854085114507974e-06, "loss": 18.9734, "step": 84690 }, { "epoch": 0.17109935883191862, "grad_norm": 1398.6064453125, "learning_rate": 9.854001389214979e-06, "loss": 35.758, "step": 84700 }, { "epoch": 0.17111955946460244, "grad_norm": 1547.03466796875, "learning_rate": 9.853917640264169e-06, "loss": 37.8532, "step": 84710 }, { "epoch": 0.17113976009728624, "grad_norm": 406.7312316894531, "learning_rate": 9.853833867655954e-06, "loss": 21.9945, "step": 84720 }, { "epoch": 0.17115996072997006, "grad_norm": 250.5803680419922, "learning_rate": 9.853750071390739e-06, "loss": 31.857, "step": 84730 }, { "epoch": 0.17118016136265388, "grad_norm": 1100.2838134765625, "learning_rate": 9.853666251468938e-06, "loss": 35.0792, "step": 84740 }, { "epoch": 0.1712003619953377, "grad_norm": 960.6127319335938, "learning_rate": 9.853582407890954e-06, "loss": 38.3057, "step": 84750 }, { "epoch": 0.17122056262802152, "grad_norm": 284.38946533203125, "learning_rate": 9.853498540657201e-06, "loss": 29.5066, "step": 84760 }, { "epoch": 0.17124076326070534, "grad_norm": 342.55877685546875, "learning_rate": 9.853414649768082e-06, "loss": 27.3454, "step": 84770 }, { "epoch": 0.17126096389338913, "grad_norm": 345.8613586425781, "learning_rate": 9.85333073522401e-06, "loss": 21.7276, "step": 84780 }, { "epoch": 0.17128116452607295, "grad_norm": 391.4869689941406, "learning_rate": 9.853246797025391e-06, "loss": 25.0733, "step": 84790 }, { "epoch": 0.17130136515875677, "grad_norm": 260.3005676269531, "learning_rate": 9.853162835172638e-06, "loss": 27.0298, "step": 84800 }, { "epoch": 0.1713215657914406, "grad_norm": 1010.074951171875, "learning_rate": 9.853078849666156e-06, "loss": 43.123, "step": 84810 }, { "epoch": 0.1713417664241244, "grad_norm": 565.1976318359375, "learning_rate": 9.852994840506357e-06, "loss": 20.6687, "step": 84820 }, { "epoch": 0.17136196705680823, "grad_norm": 293.1611022949219, "learning_rate": 9.85291080769365e-06, "loss": 25.6356, "step": 84830 }, { "epoch": 0.17138216768949205, "grad_norm": 373.0630798339844, "learning_rate": 9.852826751228445e-06, "loss": 20.0546, "step": 84840 }, { "epoch": 0.17140236832217584, "grad_norm": 450.2943115234375, "learning_rate": 9.852742671111151e-06, "loss": 24.5589, "step": 84850 }, { "epoch": 0.17142256895485966, "grad_norm": 859.8757934570312, "learning_rate": 9.852658567342177e-06, "loss": 23.4677, "step": 84860 }, { "epoch": 0.17144276958754348, "grad_norm": 847.817138671875, "learning_rate": 9.852574439921933e-06, "loss": 38.1542, "step": 84870 }, { "epoch": 0.1714629702202273, "grad_norm": 113.4184341430664, "learning_rate": 9.85249028885083e-06, "loss": 14.9155, "step": 84880 }, { "epoch": 0.17148317085291112, "grad_norm": 198.20477294921875, "learning_rate": 9.852406114129277e-06, "loss": 11.3782, "step": 84890 }, { "epoch": 0.17150337148559494, "grad_norm": 649.3355712890625, "learning_rate": 9.852321915757688e-06, "loss": 40.6609, "step": 84900 }, { "epoch": 0.17152357211827873, "grad_norm": 476.6934814453125, "learning_rate": 9.852237693736469e-06, "loss": 37.8541, "step": 84910 }, { "epoch": 0.17154377275096255, "grad_norm": 325.90057373046875, "learning_rate": 9.852153448066031e-06, "loss": 30.1682, "step": 84920 }, { "epoch": 0.17156397338364637, "grad_norm": 440.6416931152344, "learning_rate": 9.852069178746786e-06, "loss": 27.6828, "step": 84930 }, { "epoch": 0.1715841740163302, "grad_norm": 1373.3228759765625, "learning_rate": 9.851984885779147e-06, "loss": 24.4666, "step": 84940 }, { "epoch": 0.171604374649014, "grad_norm": 297.64324951171875, "learning_rate": 9.85190056916352e-06, "loss": 31.457, "step": 84950 }, { "epoch": 0.17162457528169783, "grad_norm": 559.4208374023438, "learning_rate": 9.851816228900317e-06, "loss": 16.3533, "step": 84960 }, { "epoch": 0.17164477591438163, "grad_norm": 535.56201171875, "learning_rate": 9.85173186498995e-06, "loss": 9.1985, "step": 84970 }, { "epoch": 0.17166497654706545, "grad_norm": 35.89240264892578, "learning_rate": 9.851647477432834e-06, "loss": 15.0027, "step": 84980 }, { "epoch": 0.17168517717974927, "grad_norm": 407.8486022949219, "learning_rate": 9.851563066229373e-06, "loss": 41.9, "step": 84990 }, { "epoch": 0.17170537781243309, "grad_norm": 476.017333984375, "learning_rate": 9.851478631379982e-06, "loss": 30.2493, "step": 85000 }, { "epoch": 0.1717255784451169, "grad_norm": 438.6534118652344, "learning_rate": 9.851394172885075e-06, "loss": 32.9419, "step": 85010 }, { "epoch": 0.17174577907780073, "grad_norm": 263.57464599609375, "learning_rate": 9.85130969074506e-06, "loss": 27.4015, "step": 85020 }, { "epoch": 0.17176597971048455, "grad_norm": 369.37799072265625, "learning_rate": 9.851225184960349e-06, "loss": 34.6385, "step": 85030 }, { "epoch": 0.17178618034316834, "grad_norm": 590.7777099609375, "learning_rate": 9.851140655531357e-06, "loss": 21.8534, "step": 85040 }, { "epoch": 0.17180638097585216, "grad_norm": 953.6223754882812, "learning_rate": 9.851056102458492e-06, "loss": 23.6818, "step": 85050 }, { "epoch": 0.17182658160853598, "grad_norm": 306.7108459472656, "learning_rate": 9.85097152574217e-06, "loss": 17.0797, "step": 85060 }, { "epoch": 0.1718467822412198, "grad_norm": 671.138671875, "learning_rate": 9.8508869253828e-06, "loss": 30.8856, "step": 85070 }, { "epoch": 0.17186698287390362, "grad_norm": 736.0115356445312, "learning_rate": 9.850802301380793e-06, "loss": 18.4232, "step": 85080 }, { "epoch": 0.17188718350658744, "grad_norm": 24.5797061920166, "learning_rate": 9.850717653736566e-06, "loss": 20.5339, "step": 85090 }, { "epoch": 0.17190738413927123, "grad_norm": 314.4891662597656, "learning_rate": 9.85063298245053e-06, "loss": 29.7254, "step": 85100 }, { "epoch": 0.17192758477195505, "grad_norm": 233.59913635253906, "learning_rate": 9.850548287523096e-06, "loss": 33.0526, "step": 85110 }, { "epoch": 0.17194778540463887, "grad_norm": 505.6223449707031, "learning_rate": 9.850463568954679e-06, "loss": 11.4925, "step": 85120 }, { "epoch": 0.1719679860373227, "grad_norm": 657.2791137695312, "learning_rate": 9.85037882674569e-06, "loss": 20.6603, "step": 85130 }, { "epoch": 0.1719881866700065, "grad_norm": 478.3403015136719, "learning_rate": 9.850294060896544e-06, "loss": 25.793, "step": 85140 }, { "epoch": 0.17200838730269033, "grad_norm": 396.8893127441406, "learning_rate": 9.850209271407653e-06, "loss": 18.696, "step": 85150 }, { "epoch": 0.17202858793537412, "grad_norm": 3137.254150390625, "learning_rate": 9.850124458279429e-06, "loss": 47.7481, "step": 85160 }, { "epoch": 0.17204878856805794, "grad_norm": 836.9934692382812, "learning_rate": 9.850039621512287e-06, "loss": 23.6223, "step": 85170 }, { "epoch": 0.17206898920074176, "grad_norm": 274.5120849609375, "learning_rate": 9.849954761106642e-06, "loss": 17.3965, "step": 85180 }, { "epoch": 0.17208918983342558, "grad_norm": 487.4518737792969, "learning_rate": 9.849869877062903e-06, "loss": 29.7293, "step": 85190 }, { "epoch": 0.1721093904661094, "grad_norm": 590.492431640625, "learning_rate": 9.849784969381488e-06, "loss": 27.5779, "step": 85200 }, { "epoch": 0.17212959109879322, "grad_norm": 1912.063720703125, "learning_rate": 9.849700038062808e-06, "loss": 36.5601, "step": 85210 }, { "epoch": 0.17214979173147704, "grad_norm": 219.05401611328125, "learning_rate": 9.849615083107279e-06, "loss": 26.0575, "step": 85220 }, { "epoch": 0.17216999236416083, "grad_norm": 635.5755615234375, "learning_rate": 9.849530104515314e-06, "loss": 18.6673, "step": 85230 }, { "epoch": 0.17219019299684465, "grad_norm": 503.1160583496094, "learning_rate": 9.849445102287328e-06, "loss": 12.5884, "step": 85240 }, { "epoch": 0.17221039362952847, "grad_norm": 421.8708801269531, "learning_rate": 9.849360076423736e-06, "loss": 31.2692, "step": 85250 }, { "epoch": 0.1722305942622123, "grad_norm": 826.088134765625, "learning_rate": 9.849275026924949e-06, "loss": 31.0778, "step": 85260 }, { "epoch": 0.17225079489489611, "grad_norm": 1230.977783203125, "learning_rate": 9.849189953791385e-06, "loss": 39.1126, "step": 85270 }, { "epoch": 0.17227099552757993, "grad_norm": 291.8015441894531, "learning_rate": 9.849104857023455e-06, "loss": 18.5041, "step": 85280 }, { "epoch": 0.17229119616026373, "grad_norm": 243.10191345214844, "learning_rate": 9.849019736621578e-06, "loss": 34.2812, "step": 85290 }, { "epoch": 0.17231139679294755, "grad_norm": 4.916224002838135, "learning_rate": 9.848934592586165e-06, "loss": 23.2434, "step": 85300 }, { "epoch": 0.17233159742563137, "grad_norm": 353.7478942871094, "learning_rate": 9.848849424917636e-06, "loss": 25.1351, "step": 85310 }, { "epoch": 0.1723517980583152, "grad_norm": 230.8633270263672, "learning_rate": 9.848764233616401e-06, "loss": 33.0791, "step": 85320 }, { "epoch": 0.172371998690999, "grad_norm": 1136.6898193359375, "learning_rate": 9.848679018682879e-06, "loss": 25.9801, "step": 85330 }, { "epoch": 0.17239219932368283, "grad_norm": 288.8282775878906, "learning_rate": 9.848593780117482e-06, "loss": 27.5867, "step": 85340 }, { "epoch": 0.17241239995636665, "grad_norm": 211.8262176513672, "learning_rate": 9.848508517920626e-06, "loss": 26.1852, "step": 85350 }, { "epoch": 0.17243260058905044, "grad_norm": 443.3017272949219, "learning_rate": 9.84842323209273e-06, "loss": 20.2442, "step": 85360 }, { "epoch": 0.17245280122173426, "grad_norm": 618.8023071289062, "learning_rate": 9.848337922634205e-06, "loss": 22.1683, "step": 85370 }, { "epoch": 0.17247300185441808, "grad_norm": 587.0542602539062, "learning_rate": 9.84825258954547e-06, "loss": 25.7737, "step": 85380 }, { "epoch": 0.1724932024871019, "grad_norm": 109.8345947265625, "learning_rate": 9.84816723282694e-06, "loss": 24.3801, "step": 85390 }, { "epoch": 0.17251340311978572, "grad_norm": 1000.1812133789062, "learning_rate": 9.84808185247903e-06, "loss": 37.4684, "step": 85400 }, { "epoch": 0.17253360375246954, "grad_norm": 652.8297729492188, "learning_rate": 9.847996448502159e-06, "loss": 36.792, "step": 85410 }, { "epoch": 0.17255380438515333, "grad_norm": 1045.991455078125, "learning_rate": 9.84791102089674e-06, "loss": 38.8997, "step": 85420 }, { "epoch": 0.17257400501783715, "grad_norm": 268.00225830078125, "learning_rate": 9.84782556966319e-06, "loss": 26.6358, "step": 85430 }, { "epoch": 0.17259420565052097, "grad_norm": 119.07523345947266, "learning_rate": 9.847740094801928e-06, "loss": 19.3829, "step": 85440 }, { "epoch": 0.1726144062832048, "grad_norm": 970.8414916992188, "learning_rate": 9.847654596313368e-06, "loss": 34.3435, "step": 85450 }, { "epoch": 0.1726346069158886, "grad_norm": 625.1595458984375, "learning_rate": 9.847569074197927e-06, "loss": 16.7644, "step": 85460 }, { "epoch": 0.17265480754857243, "grad_norm": 464.2474060058594, "learning_rate": 9.847483528456021e-06, "loss": 29.0098, "step": 85470 }, { "epoch": 0.17267500818125622, "grad_norm": 1429.7886962890625, "learning_rate": 9.84739795908807e-06, "loss": 23.6252, "step": 85480 }, { "epoch": 0.17269520881394004, "grad_norm": 201.17111206054688, "learning_rate": 9.84731236609449e-06, "loss": 28.2237, "step": 85490 }, { "epoch": 0.17271540944662386, "grad_norm": 727.03466796875, "learning_rate": 9.847226749475696e-06, "loss": 17.4406, "step": 85500 }, { "epoch": 0.17273561007930768, "grad_norm": 18.44334602355957, "learning_rate": 9.847141109232105e-06, "loss": 50.6119, "step": 85510 }, { "epoch": 0.1727558107119915, "grad_norm": 290.5135498046875, "learning_rate": 9.84705544536414e-06, "loss": 23.2972, "step": 85520 }, { "epoch": 0.17277601134467532, "grad_norm": 268.5848083496094, "learning_rate": 9.846969757872212e-06, "loss": 32.5315, "step": 85530 }, { "epoch": 0.17279621197735914, "grad_norm": 365.1296081542969, "learning_rate": 9.846884046756742e-06, "loss": 28.6295, "step": 85540 }, { "epoch": 0.17281641261004294, "grad_norm": 184.7109375, "learning_rate": 9.846798312018147e-06, "loss": 21.2583, "step": 85550 }, { "epoch": 0.17283661324272676, "grad_norm": 487.54742431640625, "learning_rate": 9.846712553656845e-06, "loss": 20.1115, "step": 85560 }, { "epoch": 0.17285681387541058, "grad_norm": 369.90167236328125, "learning_rate": 9.846626771673254e-06, "loss": 30.3869, "step": 85570 }, { "epoch": 0.1728770145080944, "grad_norm": 373.9340515136719, "learning_rate": 9.846540966067793e-06, "loss": 23.7879, "step": 85580 }, { "epoch": 0.17289721514077822, "grad_norm": 461.4269104003906, "learning_rate": 9.846455136840876e-06, "loss": 32.9735, "step": 85590 }, { "epoch": 0.17291741577346204, "grad_norm": 240.84036254882812, "learning_rate": 9.846369283992927e-06, "loss": 12.2622, "step": 85600 }, { "epoch": 0.17293761640614583, "grad_norm": 375.2400207519531, "learning_rate": 9.846283407524362e-06, "loss": 18.1758, "step": 85610 }, { "epoch": 0.17295781703882965, "grad_norm": 439.8492736816406, "learning_rate": 9.846197507435598e-06, "loss": 28.5939, "step": 85620 }, { "epoch": 0.17297801767151347, "grad_norm": 313.9316101074219, "learning_rate": 9.846111583727056e-06, "loss": 12.9351, "step": 85630 }, { "epoch": 0.1729982183041973, "grad_norm": 320.2373352050781, "learning_rate": 9.846025636399152e-06, "loss": 34.522, "step": 85640 }, { "epoch": 0.1730184189368811, "grad_norm": 392.8552551269531, "learning_rate": 9.845939665452309e-06, "loss": 20.1833, "step": 85650 }, { "epoch": 0.17303861956956493, "grad_norm": 406.9341735839844, "learning_rate": 9.845853670886945e-06, "loss": 30.3154, "step": 85660 }, { "epoch": 0.17305882020224875, "grad_norm": 1090.675048828125, "learning_rate": 9.845767652703475e-06, "loss": 44.2072, "step": 85670 }, { "epoch": 0.17307902083493254, "grad_norm": 434.3168029785156, "learning_rate": 9.845681610902323e-06, "loss": 40.6966, "step": 85680 }, { "epoch": 0.17309922146761636, "grad_norm": 528.4317626953125, "learning_rate": 9.845595545483906e-06, "loss": 13.6988, "step": 85690 }, { "epoch": 0.17311942210030018, "grad_norm": 299.3660583496094, "learning_rate": 9.845509456448642e-06, "loss": 21.5124, "step": 85700 }, { "epoch": 0.173139622732984, "grad_norm": 355.94366455078125, "learning_rate": 9.845423343796957e-06, "loss": 35.3705, "step": 85710 }, { "epoch": 0.17315982336566782, "grad_norm": 252.6102752685547, "learning_rate": 9.845337207529264e-06, "loss": 14.7852, "step": 85720 }, { "epoch": 0.17318002399835164, "grad_norm": 317.8782958984375, "learning_rate": 9.845251047645984e-06, "loss": 15.0958, "step": 85730 }, { "epoch": 0.17320022463103543, "grad_norm": 671.6204223632812, "learning_rate": 9.84516486414754e-06, "loss": 39.5295, "step": 85740 }, { "epoch": 0.17322042526371925, "grad_norm": 388.7713623046875, "learning_rate": 9.845078657034348e-06, "loss": 30.8397, "step": 85750 }, { "epoch": 0.17324062589640307, "grad_norm": 567.0277099609375, "learning_rate": 9.844992426306832e-06, "loss": 18.9498, "step": 85760 }, { "epoch": 0.1732608265290869, "grad_norm": 291.2276611328125, "learning_rate": 9.84490617196541e-06, "loss": 14.8765, "step": 85770 }, { "epoch": 0.1732810271617707, "grad_norm": 170.3280792236328, "learning_rate": 9.844819894010502e-06, "loss": 11.7575, "step": 85780 }, { "epoch": 0.17330122779445453, "grad_norm": 353.33306884765625, "learning_rate": 9.84473359244253e-06, "loss": 9.9312, "step": 85790 }, { "epoch": 0.17332142842713832, "grad_norm": 567.3309326171875, "learning_rate": 9.844647267261915e-06, "loss": 16.3679, "step": 85800 }, { "epoch": 0.17334162905982214, "grad_norm": 172.38259887695312, "learning_rate": 9.844560918469076e-06, "loss": 35.616, "step": 85810 }, { "epoch": 0.17336182969250596, "grad_norm": 507.5289611816406, "learning_rate": 9.844474546064436e-06, "loss": 39.5978, "step": 85820 }, { "epoch": 0.17338203032518978, "grad_norm": 293.0918884277344, "learning_rate": 9.844388150048413e-06, "loss": 27.7895, "step": 85830 }, { "epoch": 0.1734022309578736, "grad_norm": 375.8603515625, "learning_rate": 9.844301730421431e-06, "loss": 17.5738, "step": 85840 }, { "epoch": 0.17342243159055742, "grad_norm": 121.36817932128906, "learning_rate": 9.84421528718391e-06, "loss": 25.1292, "step": 85850 }, { "epoch": 0.17344263222324124, "grad_norm": 51.05403137207031, "learning_rate": 9.844128820336269e-06, "loss": 27.508, "step": 85860 }, { "epoch": 0.17346283285592504, "grad_norm": 210.55831909179688, "learning_rate": 9.844042329878934e-06, "loss": 27.2181, "step": 85870 }, { "epoch": 0.17348303348860886, "grad_norm": 695.690673828125, "learning_rate": 9.843955815812322e-06, "loss": 27.5437, "step": 85880 }, { "epoch": 0.17350323412129268, "grad_norm": 936.8657836914062, "learning_rate": 9.843869278136857e-06, "loss": 35.3513, "step": 85890 }, { "epoch": 0.1735234347539765, "grad_norm": 485.0730895996094, "learning_rate": 9.843782716852963e-06, "loss": 32.5886, "step": 85900 }, { "epoch": 0.17354363538666032, "grad_norm": 853.9366455078125, "learning_rate": 9.843696131961058e-06, "loss": 33.1918, "step": 85910 }, { "epoch": 0.17356383601934414, "grad_norm": 295.1344909667969, "learning_rate": 9.843609523461565e-06, "loss": 20.3857, "step": 85920 }, { "epoch": 0.17358403665202793, "grad_norm": 115.91895294189453, "learning_rate": 9.843522891354908e-06, "loss": 14.5028, "step": 85930 }, { "epoch": 0.17360423728471175, "grad_norm": 5.01973295211792, "learning_rate": 9.843436235641506e-06, "loss": 16.2039, "step": 85940 }, { "epoch": 0.17362443791739557, "grad_norm": 747.1133422851562, "learning_rate": 9.843349556321787e-06, "loss": 23.1308, "step": 85950 }, { "epoch": 0.1736446385500794, "grad_norm": 187.75865173339844, "learning_rate": 9.843262853396164e-06, "loss": 24.8493, "step": 85960 }, { "epoch": 0.1736648391827632, "grad_norm": 302.3079528808594, "learning_rate": 9.84317612686507e-06, "loss": 21.0599, "step": 85970 }, { "epoch": 0.17368503981544703, "grad_norm": 604.0231323242188, "learning_rate": 9.843089376728922e-06, "loss": 36.2197, "step": 85980 }, { "epoch": 0.17370524044813085, "grad_norm": 125.68768310546875, "learning_rate": 9.843002602988143e-06, "loss": 24.3327, "step": 85990 }, { "epoch": 0.17372544108081464, "grad_norm": 15.420726776123047, "learning_rate": 9.842915805643156e-06, "loss": 23.0155, "step": 86000 }, { "epoch": 0.17374564171349846, "grad_norm": 621.4620361328125, "learning_rate": 9.842828984694385e-06, "loss": 15.173, "step": 86010 }, { "epoch": 0.17376584234618228, "grad_norm": 630.4441528320312, "learning_rate": 9.842742140142255e-06, "loss": 31.3845, "step": 86020 }, { "epoch": 0.1737860429788661, "grad_norm": 446.2193603515625, "learning_rate": 9.842655271987185e-06, "loss": 19.4188, "step": 86030 }, { "epoch": 0.17380624361154992, "grad_norm": 628.043212890625, "learning_rate": 9.8425683802296e-06, "loss": 32.6731, "step": 86040 }, { "epoch": 0.17382644424423374, "grad_norm": 191.02517700195312, "learning_rate": 9.842481464869926e-06, "loss": 32.1473, "step": 86050 }, { "epoch": 0.17384664487691753, "grad_norm": 886.7763671875, "learning_rate": 9.842394525908585e-06, "loss": 46.2831, "step": 86060 }, { "epoch": 0.17386684550960135, "grad_norm": 503.0904235839844, "learning_rate": 9.842307563345999e-06, "loss": 18.0938, "step": 86070 }, { "epoch": 0.17388704614228517, "grad_norm": 584.3623657226562, "learning_rate": 9.842220577182592e-06, "loss": 23.6258, "step": 86080 }, { "epoch": 0.173907246774969, "grad_norm": 102.97643280029297, "learning_rate": 9.842133567418793e-06, "loss": 28.5246, "step": 86090 }, { "epoch": 0.1739274474076528, "grad_norm": 96.3404312133789, "learning_rate": 9.84204653405502e-06, "loss": 38.0371, "step": 86100 }, { "epoch": 0.17394764804033663, "grad_norm": 542.3681030273438, "learning_rate": 9.841959477091698e-06, "loss": 12.9829, "step": 86110 }, { "epoch": 0.17396784867302043, "grad_norm": 432.1968994140625, "learning_rate": 9.841872396529255e-06, "loss": 42.8921, "step": 86120 }, { "epoch": 0.17398804930570425, "grad_norm": 363.61370849609375, "learning_rate": 9.841785292368113e-06, "loss": 31.3374, "step": 86130 }, { "epoch": 0.17400824993838807, "grad_norm": 1183.536865234375, "learning_rate": 9.841698164608696e-06, "loss": 31.7024, "step": 86140 }, { "epoch": 0.17402845057107189, "grad_norm": 323.79974365234375, "learning_rate": 9.841611013251428e-06, "loss": 25.19, "step": 86150 }, { "epoch": 0.1740486512037557, "grad_norm": 659.388427734375, "learning_rate": 9.841523838296738e-06, "loss": 27.4956, "step": 86160 }, { "epoch": 0.17406885183643953, "grad_norm": 655.215576171875, "learning_rate": 9.841436639745046e-06, "loss": 21.8667, "step": 86170 }, { "epoch": 0.17408905246912335, "grad_norm": 12.534440994262695, "learning_rate": 9.84134941759678e-06, "loss": 23.089, "step": 86180 }, { "epoch": 0.17410925310180714, "grad_norm": 508.0062561035156, "learning_rate": 9.841262171852364e-06, "loss": 32.2547, "step": 86190 }, { "epoch": 0.17412945373449096, "grad_norm": 829.3807373046875, "learning_rate": 9.841174902512223e-06, "loss": 18.1947, "step": 86200 }, { "epoch": 0.17414965436717478, "grad_norm": 103.1199951171875, "learning_rate": 9.841087609576782e-06, "loss": 22.1132, "step": 86210 }, { "epoch": 0.1741698549998586, "grad_norm": 655.1348266601562, "learning_rate": 9.841000293046469e-06, "loss": 17.729, "step": 86220 }, { "epoch": 0.17419005563254242, "grad_norm": 319.682861328125, "learning_rate": 9.840912952921707e-06, "loss": 24.888, "step": 86230 }, { "epoch": 0.17421025626522624, "grad_norm": 664.3706665039062, "learning_rate": 9.840825589202922e-06, "loss": 35.6744, "step": 86240 }, { "epoch": 0.17423045689791003, "grad_norm": 440.5725402832031, "learning_rate": 9.84073820189054e-06, "loss": 27.2891, "step": 86250 }, { "epoch": 0.17425065753059385, "grad_norm": 304.0477600097656, "learning_rate": 9.840650790984988e-06, "loss": 31.555, "step": 86260 }, { "epoch": 0.17427085816327767, "grad_norm": 708.5603637695312, "learning_rate": 9.84056335648669e-06, "loss": 23.6433, "step": 86270 }, { "epoch": 0.1742910587959615, "grad_norm": 193.69650268554688, "learning_rate": 9.840475898396073e-06, "loss": 44.6928, "step": 86280 }, { "epoch": 0.1743112594286453, "grad_norm": 157.66673278808594, "learning_rate": 9.840388416713564e-06, "loss": 13.4153, "step": 86290 }, { "epoch": 0.17433146006132913, "grad_norm": 291.8675537109375, "learning_rate": 9.84030091143959e-06, "loss": 22.2489, "step": 86300 }, { "epoch": 0.17435166069401295, "grad_norm": 516.598388671875, "learning_rate": 9.840213382574575e-06, "loss": 29.6473, "step": 86310 }, { "epoch": 0.17437186132669674, "grad_norm": 529.6154174804688, "learning_rate": 9.840125830118949e-06, "loss": 29.525, "step": 86320 }, { "epoch": 0.17439206195938056, "grad_norm": 835.8650512695312, "learning_rate": 9.840038254073136e-06, "loss": 27.0934, "step": 86330 }, { "epoch": 0.17441226259206438, "grad_norm": 285.62811279296875, "learning_rate": 9.839950654437563e-06, "loss": 28.5629, "step": 86340 }, { "epoch": 0.1744324632247482, "grad_norm": 452.54388427734375, "learning_rate": 9.839863031212657e-06, "loss": 21.8137, "step": 86350 }, { "epoch": 0.17445266385743202, "grad_norm": 604.9288330078125, "learning_rate": 9.839775384398846e-06, "loss": 19.8672, "step": 86360 }, { "epoch": 0.17447286449011584, "grad_norm": 336.3262023925781, "learning_rate": 9.839687713996558e-06, "loss": 17.3938, "step": 86370 }, { "epoch": 0.17449306512279963, "grad_norm": 377.794189453125, "learning_rate": 9.839600020006217e-06, "loss": 37.7098, "step": 86380 }, { "epoch": 0.17451326575548345, "grad_norm": 1111.11083984375, "learning_rate": 9.839512302428254e-06, "loss": 27.9636, "step": 86390 }, { "epoch": 0.17453346638816727, "grad_norm": 551.1155395507812, "learning_rate": 9.839424561263094e-06, "loss": 44.3898, "step": 86400 }, { "epoch": 0.1745536670208511, "grad_norm": 305.22747802734375, "learning_rate": 9.839336796511167e-06, "loss": 25.7225, "step": 86410 }, { "epoch": 0.17457386765353491, "grad_norm": 426.5642395019531, "learning_rate": 9.839249008172897e-06, "loss": 25.0607, "step": 86420 }, { "epoch": 0.17459406828621873, "grad_norm": 484.6824645996094, "learning_rate": 9.839161196248717e-06, "loss": 30.5871, "step": 86430 }, { "epoch": 0.17461426891890253, "grad_norm": 225.3271942138672, "learning_rate": 9.839073360739052e-06, "loss": 22.7973, "step": 86440 }, { "epoch": 0.17463446955158635, "grad_norm": 238.68386840820312, "learning_rate": 9.838985501644329e-06, "loss": 8.8989, "step": 86450 }, { "epoch": 0.17465467018427017, "grad_norm": 643.7196655273438, "learning_rate": 9.838897618964978e-06, "loss": 34.3993, "step": 86460 }, { "epoch": 0.174674870816954, "grad_norm": 505.0343933105469, "learning_rate": 9.838809712701426e-06, "loss": 34.1341, "step": 86470 }, { "epoch": 0.1746950714496378, "grad_norm": 531.5932006835938, "learning_rate": 9.838721782854103e-06, "loss": 22.0213, "step": 86480 }, { "epoch": 0.17471527208232163, "grad_norm": 166.92123413085938, "learning_rate": 9.838633829423437e-06, "loss": 20.5236, "step": 86490 }, { "epoch": 0.17473547271500545, "grad_norm": 543.1427612304688, "learning_rate": 9.838545852409857e-06, "loss": 35.5636, "step": 86500 }, { "epoch": 0.17475567334768924, "grad_norm": 595.4486083984375, "learning_rate": 9.83845785181379e-06, "loss": 27.4291, "step": 86510 }, { "epoch": 0.17477587398037306, "grad_norm": 0.0, "learning_rate": 9.838369827635668e-06, "loss": 13.164, "step": 86520 }, { "epoch": 0.17479607461305688, "grad_norm": 188.305419921875, "learning_rate": 9.838281779875918e-06, "loss": 24.5203, "step": 86530 }, { "epoch": 0.1748162752457407, "grad_norm": 354.55157470703125, "learning_rate": 9.838193708534969e-06, "loss": 28.7658, "step": 86540 }, { "epoch": 0.17483647587842452, "grad_norm": 219.94261169433594, "learning_rate": 9.83810561361325e-06, "loss": 30.0866, "step": 86550 }, { "epoch": 0.17485667651110834, "grad_norm": 526.7067260742188, "learning_rate": 9.838017495111191e-06, "loss": 33.0553, "step": 86560 }, { "epoch": 0.17487687714379213, "grad_norm": 427.62835693359375, "learning_rate": 9.837929353029223e-06, "loss": 16.9799, "step": 86570 }, { "epoch": 0.17489707777647595, "grad_norm": 706.796630859375, "learning_rate": 9.837841187367774e-06, "loss": 26.0387, "step": 86580 }, { "epoch": 0.17491727840915977, "grad_norm": 691.537109375, "learning_rate": 9.837752998127272e-06, "loss": 13.7285, "step": 86590 }, { "epoch": 0.1749374790418436, "grad_norm": 211.5023651123047, "learning_rate": 9.83766478530815e-06, "loss": 22.2743, "step": 86600 }, { "epoch": 0.1749576796745274, "grad_norm": 992.1719360351562, "learning_rate": 9.837576548910836e-06, "loss": 31.9646, "step": 86610 }, { "epoch": 0.17497788030721123, "grad_norm": 596.32666015625, "learning_rate": 9.837488288935761e-06, "loss": 31.6708, "step": 86620 }, { "epoch": 0.17499808093989505, "grad_norm": 527.4917602539062, "learning_rate": 9.837400005383355e-06, "loss": 23.7666, "step": 86630 }, { "epoch": 0.17501828157257884, "grad_norm": 297.6175842285156, "learning_rate": 9.837311698254048e-06, "loss": 22.8128, "step": 86640 }, { "epoch": 0.17503848220526266, "grad_norm": 765.5863037109375, "learning_rate": 9.837223367548271e-06, "loss": 52.1946, "step": 86650 }, { "epoch": 0.17505868283794648, "grad_norm": 0.0, "learning_rate": 9.837135013266452e-06, "loss": 12.7448, "step": 86660 }, { "epoch": 0.1750788834706303, "grad_norm": 52.100852966308594, "learning_rate": 9.837046635409026e-06, "loss": 22.3195, "step": 86670 }, { "epoch": 0.17509908410331412, "grad_norm": 118.32286071777344, "learning_rate": 9.83695823397642e-06, "loss": 20.3009, "step": 86680 }, { "epoch": 0.17511928473599794, "grad_norm": 68.52686309814453, "learning_rate": 9.836869808969068e-06, "loss": 18.1231, "step": 86690 }, { "epoch": 0.17513948536868174, "grad_norm": 98.37853240966797, "learning_rate": 9.836781360387396e-06, "loss": 22.4627, "step": 86700 }, { "epoch": 0.17515968600136556, "grad_norm": 633.7237548828125, "learning_rate": 9.83669288823184e-06, "loss": 23.0583, "step": 86710 }, { "epoch": 0.17517988663404938, "grad_norm": 349.5272216796875, "learning_rate": 9.836604392502829e-06, "loss": 47.9853, "step": 86720 }, { "epoch": 0.1752000872667332, "grad_norm": 461.8108825683594, "learning_rate": 9.836515873200796e-06, "loss": 29.9733, "step": 86730 }, { "epoch": 0.17522028789941702, "grad_norm": 2400.55908203125, "learning_rate": 9.83642733032617e-06, "loss": 46.5731, "step": 86740 }, { "epoch": 0.17524048853210084, "grad_norm": 109.9241714477539, "learning_rate": 9.836338763879386e-06, "loss": 15.8482, "step": 86750 }, { "epoch": 0.17526068916478463, "grad_norm": 322.31573486328125, "learning_rate": 9.83625017386087e-06, "loss": 27.7555, "step": 86760 }, { "epoch": 0.17528088979746845, "grad_norm": 241.99822998046875, "learning_rate": 9.836161560271058e-06, "loss": 24.6157, "step": 86770 }, { "epoch": 0.17530109043015227, "grad_norm": 371.5823669433594, "learning_rate": 9.836072923110384e-06, "loss": 27.911, "step": 86780 }, { "epoch": 0.1753212910628361, "grad_norm": 419.4341125488281, "learning_rate": 9.835984262379275e-06, "loss": 35.3021, "step": 86790 }, { "epoch": 0.1753414916955199, "grad_norm": 201.52525329589844, "learning_rate": 9.835895578078165e-06, "loss": 30.3006, "step": 86800 }, { "epoch": 0.17536169232820373, "grad_norm": 648.3505249023438, "learning_rate": 9.835806870207487e-06, "loss": 13.3086, "step": 86810 }, { "epoch": 0.17538189296088755, "grad_norm": 518.4767456054688, "learning_rate": 9.835718138767672e-06, "loss": 32.3735, "step": 86820 }, { "epoch": 0.17540209359357134, "grad_norm": 358.6706848144531, "learning_rate": 9.835629383759155e-06, "loss": 12.7865, "step": 86830 }, { "epoch": 0.17542229422625516, "grad_norm": 423.8340148925781, "learning_rate": 9.835540605182366e-06, "loss": 24.5132, "step": 86840 }, { "epoch": 0.17544249485893898, "grad_norm": 1237.3050537109375, "learning_rate": 9.835451803037738e-06, "loss": 34.7186, "step": 86850 }, { "epoch": 0.1754626954916228, "grad_norm": 684.8671875, "learning_rate": 9.835362977325703e-06, "loss": 21.6767, "step": 86860 }, { "epoch": 0.17548289612430662, "grad_norm": 247.02529907226562, "learning_rate": 9.835274128046698e-06, "loss": 20.9051, "step": 86870 }, { "epoch": 0.17550309675699044, "grad_norm": 740.8329467773438, "learning_rate": 9.835185255201153e-06, "loss": 31.4438, "step": 86880 }, { "epoch": 0.17552329738967423, "grad_norm": 230.60369873046875, "learning_rate": 9.835096358789501e-06, "loss": 28.1067, "step": 86890 }, { "epoch": 0.17554349802235805, "grad_norm": 291.94354248046875, "learning_rate": 9.835007438812177e-06, "loss": 26.0546, "step": 86900 }, { "epoch": 0.17556369865504187, "grad_norm": 477.5041198730469, "learning_rate": 9.834918495269611e-06, "loss": 20.2462, "step": 86910 }, { "epoch": 0.1755838992877257, "grad_norm": 305.2869873046875, "learning_rate": 9.83482952816224e-06, "loss": 19.1239, "step": 86920 }, { "epoch": 0.1756040999204095, "grad_norm": 289.0022888183594, "learning_rate": 9.834740537490495e-06, "loss": 45.3907, "step": 86930 }, { "epoch": 0.17562430055309333, "grad_norm": 197.3190155029297, "learning_rate": 9.834651523254812e-06, "loss": 23.1044, "step": 86940 }, { "epoch": 0.17564450118577715, "grad_norm": 97.71805572509766, "learning_rate": 9.834562485455622e-06, "loss": 10.2724, "step": 86950 }, { "epoch": 0.17566470181846094, "grad_norm": 300.4330139160156, "learning_rate": 9.834473424093364e-06, "loss": 18.5511, "step": 86960 }, { "epoch": 0.17568490245114476, "grad_norm": 543.9212646484375, "learning_rate": 9.834384339168468e-06, "loss": 20.8691, "step": 86970 }, { "epoch": 0.17570510308382858, "grad_norm": 623.2275390625, "learning_rate": 9.834295230681368e-06, "loss": 22.9184, "step": 86980 }, { "epoch": 0.1757253037165124, "grad_norm": 573.5552368164062, "learning_rate": 9.834206098632499e-06, "loss": 19.0464, "step": 86990 }, { "epoch": 0.17574550434919622, "grad_norm": 536.0875854492188, "learning_rate": 9.834116943022299e-06, "loss": 28.9614, "step": 87000 }, { "epoch": 0.17576570498188004, "grad_norm": 142.19651794433594, "learning_rate": 9.834027763851196e-06, "loss": 21.2632, "step": 87010 }, { "epoch": 0.17578590561456384, "grad_norm": 276.2485656738281, "learning_rate": 9.833938561119629e-06, "loss": 35.1163, "step": 87020 }, { "epoch": 0.17580610624724766, "grad_norm": 258.3413391113281, "learning_rate": 9.833849334828033e-06, "loss": 22.3229, "step": 87030 }, { "epoch": 0.17582630687993148, "grad_norm": 228.56153869628906, "learning_rate": 9.833760084976838e-06, "loss": 11.6619, "step": 87040 }, { "epoch": 0.1758465075126153, "grad_norm": 462.5628662109375, "learning_rate": 9.833670811566485e-06, "loss": 18.0792, "step": 87050 }, { "epoch": 0.17586670814529912, "grad_norm": 403.3442687988281, "learning_rate": 9.833581514597408e-06, "loss": 26.6091, "step": 87060 }, { "epoch": 0.17588690877798294, "grad_norm": 405.4480895996094, "learning_rate": 9.833492194070039e-06, "loss": 14.3622, "step": 87070 }, { "epoch": 0.17590710941066673, "grad_norm": 356.7472839355469, "learning_rate": 9.833402849984815e-06, "loss": 33.6278, "step": 87080 }, { "epoch": 0.17592731004335055, "grad_norm": 224.11256408691406, "learning_rate": 9.833313482342173e-06, "loss": 6.896, "step": 87090 }, { "epoch": 0.17594751067603437, "grad_norm": 257.6687927246094, "learning_rate": 9.833224091142548e-06, "loss": 22.3012, "step": 87100 }, { "epoch": 0.1759677113087182, "grad_norm": 246.01637268066406, "learning_rate": 9.833134676386373e-06, "loss": 31.0415, "step": 87110 }, { "epoch": 0.175987911941402, "grad_norm": 257.9366455078125, "learning_rate": 9.833045238074085e-06, "loss": 34.6887, "step": 87120 }, { "epoch": 0.17600811257408583, "grad_norm": 670.2232666015625, "learning_rate": 9.832955776206123e-06, "loss": 26.4223, "step": 87130 }, { "epoch": 0.17602831320676965, "grad_norm": 462.1443786621094, "learning_rate": 9.832866290782922e-06, "loss": 17.0104, "step": 87140 }, { "epoch": 0.17604851383945344, "grad_norm": 312.8056640625, "learning_rate": 9.832776781804913e-06, "loss": 25.8889, "step": 87150 }, { "epoch": 0.17606871447213726, "grad_norm": 519.9537963867188, "learning_rate": 9.83268724927254e-06, "loss": 53.274, "step": 87160 }, { "epoch": 0.17608891510482108, "grad_norm": 492.2060852050781, "learning_rate": 9.832597693186233e-06, "loss": 25.1681, "step": 87170 }, { "epoch": 0.1761091157375049, "grad_norm": 442.8360290527344, "learning_rate": 9.83250811354643e-06, "loss": 18.9079, "step": 87180 }, { "epoch": 0.17612931637018872, "grad_norm": 935.154052734375, "learning_rate": 9.832418510353572e-06, "loss": 30.2418, "step": 87190 }, { "epoch": 0.17614951700287254, "grad_norm": 156.57376098632812, "learning_rate": 9.832328883608088e-06, "loss": 24.3359, "step": 87200 }, { "epoch": 0.17616971763555633, "grad_norm": 440.4377746582031, "learning_rate": 9.832239233310421e-06, "loss": 23.9274, "step": 87210 }, { "epoch": 0.17618991826824015, "grad_norm": 296.2905578613281, "learning_rate": 9.832149559461009e-06, "loss": 21.4329, "step": 87220 }, { "epoch": 0.17621011890092397, "grad_norm": 57.15469741821289, "learning_rate": 9.832059862060282e-06, "loss": 35.4871, "step": 87230 }, { "epoch": 0.1762303195336078, "grad_norm": 461.4469909667969, "learning_rate": 9.831970141108684e-06, "loss": 35.2258, "step": 87240 }, { "epoch": 0.1762505201662916, "grad_norm": 459.0410461425781, "learning_rate": 9.831880396606649e-06, "loss": 15.5546, "step": 87250 }, { "epoch": 0.17627072079897543, "grad_norm": 633.9259643554688, "learning_rate": 9.831790628554613e-06, "loss": 28.6745, "step": 87260 }, { "epoch": 0.17629092143165925, "grad_norm": 545.2411499023438, "learning_rate": 9.831700836953017e-06, "loss": 25.3021, "step": 87270 }, { "epoch": 0.17631112206434305, "grad_norm": 389.36090087890625, "learning_rate": 9.831611021802297e-06, "loss": 33.1733, "step": 87280 }, { "epoch": 0.17633132269702687, "grad_norm": 383.3346252441406, "learning_rate": 9.83152118310289e-06, "loss": 26.8975, "step": 87290 }, { "epoch": 0.17635152332971069, "grad_norm": 550.85791015625, "learning_rate": 9.831431320855235e-06, "loss": 28.9455, "step": 87300 }, { "epoch": 0.1763717239623945, "grad_norm": 721.927978515625, "learning_rate": 9.831341435059772e-06, "loss": 17.3499, "step": 87310 }, { "epoch": 0.17639192459507833, "grad_norm": 742.4193725585938, "learning_rate": 9.831251525716934e-06, "loss": 23.8338, "step": 87320 }, { "epoch": 0.17641212522776215, "grad_norm": 216.81134033203125, "learning_rate": 9.831161592827164e-06, "loss": 30.7533, "step": 87330 }, { "epoch": 0.17643232586044594, "grad_norm": 440.5499267578125, "learning_rate": 9.831071636390899e-06, "loss": 19.2126, "step": 87340 }, { "epoch": 0.17645252649312976, "grad_norm": 222.04574584960938, "learning_rate": 9.830981656408575e-06, "loss": 28.4134, "step": 87350 }, { "epoch": 0.17647272712581358, "grad_norm": 326.693603515625, "learning_rate": 9.830891652880632e-06, "loss": 23.4, "step": 87360 }, { "epoch": 0.1764929277584974, "grad_norm": 380.5382385253906, "learning_rate": 9.83080162580751e-06, "loss": 21.9362, "step": 87370 }, { "epoch": 0.17651312839118122, "grad_norm": 392.95794677734375, "learning_rate": 9.830711575189646e-06, "loss": 22.7161, "step": 87380 }, { "epoch": 0.17653332902386504, "grad_norm": 91.82284545898438, "learning_rate": 9.83062150102748e-06, "loss": 15.9523, "step": 87390 }, { "epoch": 0.17655352965654883, "grad_norm": 1855.065185546875, "learning_rate": 9.830531403321451e-06, "loss": 21.3149, "step": 87400 }, { "epoch": 0.17657373028923265, "grad_norm": 487.7505798339844, "learning_rate": 9.830441282071999e-06, "loss": 30.5719, "step": 87410 }, { "epoch": 0.17659393092191647, "grad_norm": 454.8311462402344, "learning_rate": 9.830351137279559e-06, "loss": 17.6413, "step": 87420 }, { "epoch": 0.1766141315546003, "grad_norm": 1592.8018798828125, "learning_rate": 9.830260968944577e-06, "loss": 26.3831, "step": 87430 }, { "epoch": 0.1766343321872841, "grad_norm": 863.373046875, "learning_rate": 9.830170777067486e-06, "loss": 34.066, "step": 87440 }, { "epoch": 0.17665453281996793, "grad_norm": 207.10821533203125, "learning_rate": 9.83008056164873e-06, "loss": 14.8623, "step": 87450 }, { "epoch": 0.17667473345265175, "grad_norm": 600.4519653320312, "learning_rate": 9.829990322688746e-06, "loss": 28.3211, "step": 87460 }, { "epoch": 0.17669493408533554, "grad_norm": 930.9231567382812, "learning_rate": 9.829900060187976e-06, "loss": 25.3523, "step": 87470 }, { "epoch": 0.17671513471801936, "grad_norm": 200.3104248046875, "learning_rate": 9.82980977414686e-06, "loss": 17.3831, "step": 87480 }, { "epoch": 0.17673533535070318, "grad_norm": 138.60247802734375, "learning_rate": 9.829719464565834e-06, "loss": 34.9849, "step": 87490 }, { "epoch": 0.176755535983387, "grad_norm": 104.7635726928711, "learning_rate": 9.829629131445342e-06, "loss": 29.9168, "step": 87500 }, { "epoch": 0.17677573661607082, "grad_norm": 356.3173828125, "learning_rate": 9.829538774785825e-06, "loss": 36.6058, "step": 87510 }, { "epoch": 0.17679593724875464, "grad_norm": 366.2814636230469, "learning_rate": 9.82944839458772e-06, "loss": 23.335, "step": 87520 }, { "epoch": 0.17681613788143843, "grad_norm": 445.0623474121094, "learning_rate": 9.82935799085147e-06, "loss": 20.7442, "step": 87530 }, { "epoch": 0.17683633851412225, "grad_norm": 332.32598876953125, "learning_rate": 9.829267563577514e-06, "loss": 26.1064, "step": 87540 }, { "epoch": 0.17685653914680607, "grad_norm": 294.8311767578125, "learning_rate": 9.829177112766295e-06, "loss": 19.3694, "step": 87550 }, { "epoch": 0.1768767397794899, "grad_norm": 957.253662109375, "learning_rate": 9.829086638418252e-06, "loss": 19.1206, "step": 87560 }, { "epoch": 0.17689694041217371, "grad_norm": 521.346923828125, "learning_rate": 9.828996140533826e-06, "loss": 28.4056, "step": 87570 }, { "epoch": 0.17691714104485753, "grad_norm": 913.6758422851562, "learning_rate": 9.82890561911346e-06, "loss": 33.756, "step": 87580 }, { "epoch": 0.17693734167754135, "grad_norm": 236.279052734375, "learning_rate": 9.828815074157591e-06, "loss": 17.5507, "step": 87590 }, { "epoch": 0.17695754231022515, "grad_norm": 142.71974182128906, "learning_rate": 9.828724505666664e-06, "loss": 29.2634, "step": 87600 }, { "epoch": 0.17697774294290897, "grad_norm": 328.37646484375, "learning_rate": 9.82863391364112e-06, "loss": 39.9557, "step": 87610 }, { "epoch": 0.1769979435755928, "grad_norm": 189.7563018798828, "learning_rate": 9.828543298081401e-06, "loss": 37.644, "step": 87620 }, { "epoch": 0.1770181442082766, "grad_norm": 751.918701171875, "learning_rate": 9.828452658987946e-06, "loss": 29.8027, "step": 87630 }, { "epoch": 0.17703834484096043, "grad_norm": 619.622802734375, "learning_rate": 9.828361996361199e-06, "loss": 16.2425, "step": 87640 }, { "epoch": 0.17705854547364425, "grad_norm": 768.6655883789062, "learning_rate": 9.828271310201601e-06, "loss": 22.6406, "step": 87650 }, { "epoch": 0.17707874610632804, "grad_norm": 528.748291015625, "learning_rate": 9.828180600509595e-06, "loss": 15.7845, "step": 87660 }, { "epoch": 0.17709894673901186, "grad_norm": 166.1478729248047, "learning_rate": 9.828089867285622e-06, "loss": 12.502, "step": 87670 }, { "epoch": 0.17711914737169568, "grad_norm": 480.45355224609375, "learning_rate": 9.827999110530124e-06, "loss": 34.4591, "step": 87680 }, { "epoch": 0.1771393480043795, "grad_norm": 447.4638671875, "learning_rate": 9.827908330243545e-06, "loss": 32.2473, "step": 87690 }, { "epoch": 0.17715954863706332, "grad_norm": 569.7048950195312, "learning_rate": 9.827817526426324e-06, "loss": 25.3609, "step": 87700 }, { "epoch": 0.17717974926974714, "grad_norm": 0.0, "learning_rate": 9.827726699078907e-06, "loss": 21.6572, "step": 87710 }, { "epoch": 0.17719994990243093, "grad_norm": 465.3253479003906, "learning_rate": 9.827635848201737e-06, "loss": 21.0944, "step": 87720 }, { "epoch": 0.17722015053511475, "grad_norm": 316.3619689941406, "learning_rate": 9.827544973795254e-06, "loss": 18.0882, "step": 87730 }, { "epoch": 0.17724035116779857, "grad_norm": 660.8316040039062, "learning_rate": 9.827454075859904e-06, "loss": 45.2601, "step": 87740 }, { "epoch": 0.1772605518004824, "grad_norm": 550.6033325195312, "learning_rate": 9.827363154396126e-06, "loss": 37.1864, "step": 87750 }, { "epoch": 0.1772807524331662, "grad_norm": 219.22695922851562, "learning_rate": 9.827272209404366e-06, "loss": 25.944, "step": 87760 }, { "epoch": 0.17730095306585003, "grad_norm": 482.16094970703125, "learning_rate": 9.827181240885068e-06, "loss": 25.122, "step": 87770 }, { "epoch": 0.17732115369853385, "grad_norm": 550.360107421875, "learning_rate": 9.827090248838673e-06, "loss": 25.6232, "step": 87780 }, { "epoch": 0.17734135433121764, "grad_norm": 585.909912109375, "learning_rate": 9.826999233265626e-06, "loss": 20.155, "step": 87790 }, { "epoch": 0.17736155496390146, "grad_norm": 283.1487121582031, "learning_rate": 9.82690819416637e-06, "loss": 17.7344, "step": 87800 }, { "epoch": 0.17738175559658528, "grad_norm": 601.101806640625, "learning_rate": 9.826817131541349e-06, "loss": 15.1215, "step": 87810 }, { "epoch": 0.1774019562292691, "grad_norm": 805.8764038085938, "learning_rate": 9.826726045391006e-06, "loss": 15.1918, "step": 87820 }, { "epoch": 0.17742215686195292, "grad_norm": 193.71456909179688, "learning_rate": 9.826634935715787e-06, "loss": 19.3605, "step": 87830 }, { "epoch": 0.17744235749463674, "grad_norm": 60.50779724121094, "learning_rate": 9.826543802516135e-06, "loss": 19.2635, "step": 87840 }, { "epoch": 0.17746255812732054, "grad_norm": 445.96173095703125, "learning_rate": 9.826452645792493e-06, "loss": 26.2762, "step": 87850 }, { "epoch": 0.17748275876000436, "grad_norm": 608.7047119140625, "learning_rate": 9.826361465545306e-06, "loss": 22.6092, "step": 87860 }, { "epoch": 0.17750295939268818, "grad_norm": 211.99295043945312, "learning_rate": 9.826270261775018e-06, "loss": 27.7095, "step": 87870 }, { "epoch": 0.177523160025372, "grad_norm": 932.4351806640625, "learning_rate": 9.826179034482074e-06, "loss": 31.0207, "step": 87880 }, { "epoch": 0.17754336065805582, "grad_norm": 1075.3310546875, "learning_rate": 9.82608778366692e-06, "loss": 18.1641, "step": 87890 }, { "epoch": 0.17756356129073964, "grad_norm": 385.71209716796875, "learning_rate": 9.825996509330001e-06, "loss": 21.5776, "step": 87900 }, { "epoch": 0.17758376192342346, "grad_norm": 556.7341918945312, "learning_rate": 9.825905211471757e-06, "loss": 40.6338, "step": 87910 }, { "epoch": 0.17760396255610725, "grad_norm": 19.428258895874023, "learning_rate": 9.825813890092639e-06, "loss": 20.1369, "step": 87920 }, { "epoch": 0.17762416318879107, "grad_norm": 601.0648193359375, "learning_rate": 9.825722545193087e-06, "loss": 18.9756, "step": 87930 }, { "epoch": 0.1776443638214749, "grad_norm": 681.321044921875, "learning_rate": 9.82563117677355e-06, "loss": 23.8888, "step": 87940 }, { "epoch": 0.1776645644541587, "grad_norm": 507.18701171875, "learning_rate": 9.825539784834472e-06, "loss": 15.3194, "step": 87950 }, { "epoch": 0.17768476508684253, "grad_norm": 523.6488037109375, "learning_rate": 9.825448369376298e-06, "loss": 27.7807, "step": 87960 }, { "epoch": 0.17770496571952635, "grad_norm": 644.4873657226562, "learning_rate": 9.825356930399474e-06, "loss": 17.1149, "step": 87970 }, { "epoch": 0.17772516635221014, "grad_norm": 595.9818115234375, "learning_rate": 9.825265467904446e-06, "loss": 29.5429, "step": 87980 }, { "epoch": 0.17774536698489396, "grad_norm": 385.09405517578125, "learning_rate": 9.825173981891658e-06, "loss": 24.3628, "step": 87990 }, { "epoch": 0.17776556761757778, "grad_norm": 382.9449768066406, "learning_rate": 9.825082472361558e-06, "loss": 30.5227, "step": 88000 }, { "epoch": 0.1777857682502616, "grad_norm": 999.642578125, "learning_rate": 9.82499093931459e-06, "loss": 19.8723, "step": 88010 }, { "epoch": 0.17780596888294542, "grad_norm": 609.13232421875, "learning_rate": 9.824899382751204e-06, "loss": 17.872, "step": 88020 }, { "epoch": 0.17782616951562924, "grad_norm": 479.62408447265625, "learning_rate": 9.824807802671843e-06, "loss": 37.9173, "step": 88030 }, { "epoch": 0.17784637014831303, "grad_norm": 755.3995971679688, "learning_rate": 9.824716199076952e-06, "loss": 28.8684, "step": 88040 }, { "epoch": 0.17786657078099685, "grad_norm": 777.1272583007812, "learning_rate": 9.824624571966982e-06, "loss": 31.0329, "step": 88050 }, { "epoch": 0.17788677141368067, "grad_norm": 42.348876953125, "learning_rate": 9.824532921342375e-06, "loss": 15.062, "step": 88060 }, { "epoch": 0.1779069720463645, "grad_norm": 1899.2667236328125, "learning_rate": 9.82444124720358e-06, "loss": 47.0505, "step": 88070 }, { "epoch": 0.1779271726790483, "grad_norm": 200.7368621826172, "learning_rate": 9.824349549551045e-06, "loss": 37.6947, "step": 88080 }, { "epoch": 0.17794737331173213, "grad_norm": 446.684814453125, "learning_rate": 9.824257828385213e-06, "loss": 11.5831, "step": 88090 }, { "epoch": 0.17796757394441595, "grad_norm": 406.3067626953125, "learning_rate": 9.824166083706534e-06, "loss": 12.2547, "step": 88100 }, { "epoch": 0.17798777457709974, "grad_norm": 197.05783081054688, "learning_rate": 9.824074315515457e-06, "loss": 21.8061, "step": 88110 }, { "epoch": 0.17800797520978356, "grad_norm": 577.2730102539062, "learning_rate": 9.823982523812424e-06, "loss": 41.0802, "step": 88120 }, { "epoch": 0.17802817584246738, "grad_norm": 256.77215576171875, "learning_rate": 9.823890708597887e-06, "loss": 19.7997, "step": 88130 }, { "epoch": 0.1780483764751512, "grad_norm": 455.495361328125, "learning_rate": 9.823798869872291e-06, "loss": 20.599, "step": 88140 }, { "epoch": 0.17806857710783502, "grad_norm": 376.7768859863281, "learning_rate": 9.823707007636085e-06, "loss": 25.4198, "step": 88150 }, { "epoch": 0.17808877774051884, "grad_norm": 84.9360580444336, "learning_rate": 9.823615121889716e-06, "loss": 34.0157, "step": 88160 }, { "epoch": 0.17810897837320264, "grad_norm": 159.77825927734375, "learning_rate": 9.82352321263363e-06, "loss": 18.454, "step": 88170 }, { "epoch": 0.17812917900588646, "grad_norm": 162.8426055908203, "learning_rate": 9.823431279868278e-06, "loss": 16.558, "step": 88180 }, { "epoch": 0.17814937963857028, "grad_norm": 417.8797302246094, "learning_rate": 9.823339323594107e-06, "loss": 103.8009, "step": 88190 }, { "epoch": 0.1781695802712541, "grad_norm": 139.08123779296875, "learning_rate": 9.823247343811567e-06, "loss": 31.3308, "step": 88200 }, { "epoch": 0.17818978090393792, "grad_norm": 346.410888671875, "learning_rate": 9.823155340521104e-06, "loss": 22.9895, "step": 88210 }, { "epoch": 0.17820998153662174, "grad_norm": 328.610595703125, "learning_rate": 9.823063313723165e-06, "loss": 21.5902, "step": 88220 }, { "epoch": 0.17823018216930553, "grad_norm": 645.4741821289062, "learning_rate": 9.822971263418202e-06, "loss": 20.7911, "step": 88230 }, { "epoch": 0.17825038280198935, "grad_norm": 455.9669494628906, "learning_rate": 9.82287918960666e-06, "loss": 19.1658, "step": 88240 }, { "epoch": 0.17827058343467317, "grad_norm": 309.32366943359375, "learning_rate": 9.822787092288991e-06, "loss": 41.181, "step": 88250 }, { "epoch": 0.178290784067357, "grad_norm": 429.23919677734375, "learning_rate": 9.822694971465643e-06, "loss": 24.2197, "step": 88260 }, { "epoch": 0.1783109847000408, "grad_norm": 884.8764038085938, "learning_rate": 9.822602827137065e-06, "loss": 29.4798, "step": 88270 }, { "epoch": 0.17833118533272463, "grad_norm": 259.2513732910156, "learning_rate": 9.822510659303704e-06, "loss": 17.1712, "step": 88280 }, { "epoch": 0.17835138596540845, "grad_norm": 692.7817993164062, "learning_rate": 9.822418467966013e-06, "loss": 17.3893, "step": 88290 }, { "epoch": 0.17837158659809224, "grad_norm": 1016.9531860351562, "learning_rate": 9.822326253124436e-06, "loss": 36.6533, "step": 88300 }, { "epoch": 0.17839178723077606, "grad_norm": 80.7690200805664, "learning_rate": 9.82223401477943e-06, "loss": 21.8245, "step": 88310 }, { "epoch": 0.17841198786345988, "grad_norm": 291.8055114746094, "learning_rate": 9.822141752931438e-06, "loss": 20.9248, "step": 88320 }, { "epoch": 0.1784321884961437, "grad_norm": 444.70440673828125, "learning_rate": 9.822049467580912e-06, "loss": 17.7563, "step": 88330 }, { "epoch": 0.17845238912882752, "grad_norm": 520.4324951171875, "learning_rate": 9.821957158728302e-06, "loss": 31.1346, "step": 88340 }, { "epoch": 0.17847258976151134, "grad_norm": 109.189208984375, "learning_rate": 9.821864826374057e-06, "loss": 21.6773, "step": 88350 }, { "epoch": 0.17849279039419513, "grad_norm": 528.0753173828125, "learning_rate": 9.82177247051863e-06, "loss": 14.649, "step": 88360 }, { "epoch": 0.17851299102687895, "grad_norm": 852.3883666992188, "learning_rate": 9.821680091162466e-06, "loss": 38.5516, "step": 88370 }, { "epoch": 0.17853319165956277, "grad_norm": 429.7975769042969, "learning_rate": 9.821587688306017e-06, "loss": 15.0936, "step": 88380 }, { "epoch": 0.1785533922922466, "grad_norm": 220.61557006835938, "learning_rate": 9.821495261949739e-06, "loss": 20.1851, "step": 88390 }, { "epoch": 0.1785735929249304, "grad_norm": 531.4949951171875, "learning_rate": 9.821402812094074e-06, "loss": 13.8314, "step": 88400 }, { "epoch": 0.17859379355761423, "grad_norm": 411.6770324707031, "learning_rate": 9.821310338739478e-06, "loss": 21.5172, "step": 88410 }, { "epoch": 0.17861399419029805, "grad_norm": 462.91461181640625, "learning_rate": 9.821217841886399e-06, "loss": 35.1949, "step": 88420 }, { "epoch": 0.17863419482298185, "grad_norm": 1127.7225341796875, "learning_rate": 9.82112532153529e-06, "loss": 50.0871, "step": 88430 }, { "epoch": 0.17865439545566567, "grad_norm": 321.0269775390625, "learning_rate": 9.821032777686601e-06, "loss": 25.3275, "step": 88440 }, { "epoch": 0.17867459608834949, "grad_norm": 183.1136016845703, "learning_rate": 9.820940210340784e-06, "loss": 21.9095, "step": 88450 }, { "epoch": 0.1786947967210333, "grad_norm": 133.33180236816406, "learning_rate": 9.820847619498288e-06, "loss": 16.5137, "step": 88460 }, { "epoch": 0.17871499735371713, "grad_norm": 566.6614379882812, "learning_rate": 9.820755005159565e-06, "loss": 26.651, "step": 88470 }, { "epoch": 0.17873519798640095, "grad_norm": 843.701904296875, "learning_rate": 9.820662367325067e-06, "loss": 18.4092, "step": 88480 }, { "epoch": 0.17875539861908474, "grad_norm": 132.54522705078125, "learning_rate": 9.820569705995244e-06, "loss": 19.0154, "step": 88490 }, { "epoch": 0.17877559925176856, "grad_norm": 1032.7620849609375, "learning_rate": 9.82047702117055e-06, "loss": 21.4827, "step": 88500 }, { "epoch": 0.17879579988445238, "grad_norm": 151.09715270996094, "learning_rate": 9.820384312851437e-06, "loss": 16.5228, "step": 88510 }, { "epoch": 0.1788160005171362, "grad_norm": 358.14984130859375, "learning_rate": 9.820291581038354e-06, "loss": 36.5084, "step": 88520 }, { "epoch": 0.17883620114982002, "grad_norm": 475.3742370605469, "learning_rate": 9.820198825731757e-06, "loss": 40.8406, "step": 88530 }, { "epoch": 0.17885640178250384, "grad_norm": 721.9351806640625, "learning_rate": 9.820106046932092e-06, "loss": 60.1918, "step": 88540 }, { "epoch": 0.17887660241518763, "grad_norm": 575.8406982421875, "learning_rate": 9.820013244639817e-06, "loss": 20.3473, "step": 88550 }, { "epoch": 0.17889680304787145, "grad_norm": 486.1315612792969, "learning_rate": 9.81992041885538e-06, "loss": 27.2605, "step": 88560 }, { "epoch": 0.17891700368055527, "grad_norm": 229.42684936523438, "learning_rate": 9.819827569579237e-06, "loss": 21.3187, "step": 88570 }, { "epoch": 0.1789372043132391, "grad_norm": 234.98072814941406, "learning_rate": 9.819734696811839e-06, "loss": 12.1782, "step": 88580 }, { "epoch": 0.1789574049459229, "grad_norm": 808.8175048828125, "learning_rate": 9.81964180055364e-06, "loss": 23.4937, "step": 88590 }, { "epoch": 0.17897760557860673, "grad_norm": 585.3851318359375, "learning_rate": 9.819548880805087e-06, "loss": 36.2704, "step": 88600 }, { "epoch": 0.17899780621129055, "grad_norm": 439.37286376953125, "learning_rate": 9.819455937566642e-06, "loss": 22.6001, "step": 88610 }, { "epoch": 0.17901800684397434, "grad_norm": 212.2778778076172, "learning_rate": 9.819362970838751e-06, "loss": 22.1894, "step": 88620 }, { "epoch": 0.17903820747665816, "grad_norm": 1028.92236328125, "learning_rate": 9.819269980621869e-06, "loss": 17.4052, "step": 88630 }, { "epoch": 0.17905840810934198, "grad_norm": 368.2625732421875, "learning_rate": 9.819176966916451e-06, "loss": 23.653, "step": 88640 }, { "epoch": 0.1790786087420258, "grad_norm": 1294.4925537109375, "learning_rate": 9.819083929722947e-06, "loss": 33.3029, "step": 88650 }, { "epoch": 0.17909880937470962, "grad_norm": 354.2082214355469, "learning_rate": 9.818990869041816e-06, "loss": 18.8248, "step": 88660 }, { "epoch": 0.17911901000739344, "grad_norm": 1367.2764892578125, "learning_rate": 9.818897784873504e-06, "loss": 28.3457, "step": 88670 }, { "epoch": 0.17913921064007723, "grad_norm": 373.48748779296875, "learning_rate": 9.818804677218472e-06, "loss": 27.0261, "step": 88680 }, { "epoch": 0.17915941127276105, "grad_norm": 182.73428344726562, "learning_rate": 9.818711546077169e-06, "loss": 15.2134, "step": 88690 }, { "epoch": 0.17917961190544487, "grad_norm": 119.61083221435547, "learning_rate": 9.81861839145005e-06, "loss": 58.4868, "step": 88700 }, { "epoch": 0.1791998125381287, "grad_norm": 682.3272705078125, "learning_rate": 9.818525213337568e-06, "loss": 38.5422, "step": 88710 }, { "epoch": 0.17922001317081251, "grad_norm": 180.2148895263672, "learning_rate": 9.818432011740181e-06, "loss": 10.1237, "step": 88720 }, { "epoch": 0.17924021380349633, "grad_norm": 307.799072265625, "learning_rate": 9.81833878665834e-06, "loss": 18.1421, "step": 88730 }, { "epoch": 0.17926041443618015, "grad_norm": 378.252197265625, "learning_rate": 9.8182455380925e-06, "loss": 18.8232, "step": 88740 }, { "epoch": 0.17928061506886395, "grad_norm": 1313.838623046875, "learning_rate": 9.818152266043115e-06, "loss": 35.5768, "step": 88750 }, { "epoch": 0.17930081570154777, "grad_norm": 365.5284729003906, "learning_rate": 9.818058970510642e-06, "loss": 17.2489, "step": 88760 }, { "epoch": 0.1793210163342316, "grad_norm": 168.6874237060547, "learning_rate": 9.817965651495533e-06, "loss": 26.2463, "step": 88770 }, { "epoch": 0.1793412169669154, "grad_norm": 953.0068969726562, "learning_rate": 9.817872308998242e-06, "loss": 15.8511, "step": 88780 }, { "epoch": 0.17936141759959923, "grad_norm": 717.1253662109375, "learning_rate": 9.817778943019228e-06, "loss": 12.767, "step": 88790 }, { "epoch": 0.17938161823228305, "grad_norm": 737.7371826171875, "learning_rate": 9.817685553558945e-06, "loss": 23.686, "step": 88800 }, { "epoch": 0.17940181886496684, "grad_norm": 143.72450256347656, "learning_rate": 9.817592140617844e-06, "loss": 33.5139, "step": 88810 }, { "epoch": 0.17942201949765066, "grad_norm": 466.5359191894531, "learning_rate": 9.817498704196384e-06, "loss": 23.8166, "step": 88820 }, { "epoch": 0.17944222013033448, "grad_norm": 761.9892578125, "learning_rate": 9.81740524429502e-06, "loss": 37.5548, "step": 88830 }, { "epoch": 0.1794624207630183, "grad_norm": 608.7931518554688, "learning_rate": 9.817311760914206e-06, "loss": 28.2609, "step": 88840 }, { "epoch": 0.17948262139570212, "grad_norm": 334.7822265625, "learning_rate": 9.8172182540544e-06, "loss": 27.8392, "step": 88850 }, { "epoch": 0.17950282202838594, "grad_norm": 542.93896484375, "learning_rate": 9.817124723716057e-06, "loss": 17.527, "step": 88860 }, { "epoch": 0.17952302266106973, "grad_norm": 809.5488891601562, "learning_rate": 9.817031169899631e-06, "loss": 37.7099, "step": 88870 }, { "epoch": 0.17954322329375355, "grad_norm": 817.4562377929688, "learning_rate": 9.81693759260558e-06, "loss": 30.6387, "step": 88880 }, { "epoch": 0.17956342392643737, "grad_norm": 724.6182861328125, "learning_rate": 9.81684399183436e-06, "loss": 22.2492, "step": 88890 }, { "epoch": 0.1795836245591212, "grad_norm": 162.0093536376953, "learning_rate": 9.816750367586424e-06, "loss": 27.2942, "step": 88900 }, { "epoch": 0.179603825191805, "grad_norm": 408.91680908203125, "learning_rate": 9.816656719862234e-06, "loss": 20.7502, "step": 88910 }, { "epoch": 0.17962402582448883, "grad_norm": 360.9241943359375, "learning_rate": 9.816563048662242e-06, "loss": 15.3958, "step": 88920 }, { "epoch": 0.17964422645717265, "grad_norm": 928.7029418945312, "learning_rate": 9.816469353986905e-06, "loss": 20.9076, "step": 88930 }, { "epoch": 0.17966442708985644, "grad_norm": 352.09735107421875, "learning_rate": 9.816375635836683e-06, "loss": 27.9983, "step": 88940 }, { "epoch": 0.17968462772254026, "grad_norm": 349.9088439941406, "learning_rate": 9.816281894212028e-06, "loss": 22.8719, "step": 88950 }, { "epoch": 0.17970482835522408, "grad_norm": 554.7149047851562, "learning_rate": 9.8161881291134e-06, "loss": 17.0414, "step": 88960 }, { "epoch": 0.1797250289879079, "grad_norm": 437.630126953125, "learning_rate": 9.816094340541256e-06, "loss": 27.4759, "step": 88970 }, { "epoch": 0.17974522962059172, "grad_norm": 253.888916015625, "learning_rate": 9.81600052849605e-06, "loss": 27.4915, "step": 88980 }, { "epoch": 0.17976543025327554, "grad_norm": 387.9095153808594, "learning_rate": 9.815906692978244e-06, "loss": 17.6506, "step": 88990 }, { "epoch": 0.17978563088595934, "grad_norm": 330.9479675292969, "learning_rate": 9.815812833988292e-06, "loss": 21.3976, "step": 89000 }, { "epoch": 0.17980583151864316, "grad_norm": 349.994140625, "learning_rate": 9.815718951526651e-06, "loss": 15.974, "step": 89010 }, { "epoch": 0.17982603215132698, "grad_norm": 630.7244262695312, "learning_rate": 9.815625045593783e-06, "loss": 49.8131, "step": 89020 }, { "epoch": 0.1798462327840108, "grad_norm": 129.8878631591797, "learning_rate": 9.81553111619014e-06, "loss": 35.3881, "step": 89030 }, { "epoch": 0.17986643341669462, "grad_norm": 807.0062866210938, "learning_rate": 9.815437163316182e-06, "loss": 24.98, "step": 89040 }, { "epoch": 0.17988663404937844, "grad_norm": 23.817771911621094, "learning_rate": 9.815343186972369e-06, "loss": 17.7969, "step": 89050 }, { "epoch": 0.17990683468206226, "grad_norm": 278.3724060058594, "learning_rate": 9.815249187159158e-06, "loss": 18.6364, "step": 89060 }, { "epoch": 0.17992703531474605, "grad_norm": 723.9588012695312, "learning_rate": 9.815155163877003e-06, "loss": 37.0394, "step": 89070 }, { "epoch": 0.17994723594742987, "grad_norm": 426.5888977050781, "learning_rate": 9.81506111712637e-06, "loss": 16.4161, "step": 89080 }, { "epoch": 0.1799674365801137, "grad_norm": 156.51239013671875, "learning_rate": 9.81496704690771e-06, "loss": 11.2356, "step": 89090 }, { "epoch": 0.1799876372127975, "grad_norm": 427.3960266113281, "learning_rate": 9.814872953221487e-06, "loss": 12.5643, "step": 89100 }, { "epoch": 0.18000783784548133, "grad_norm": 302.4227294921875, "learning_rate": 9.814778836068154e-06, "loss": 19.3252, "step": 89110 }, { "epoch": 0.18002803847816515, "grad_norm": 263.3395690917969, "learning_rate": 9.814684695448176e-06, "loss": 21.6315, "step": 89120 }, { "epoch": 0.18004823911084894, "grad_norm": 1128.4725341796875, "learning_rate": 9.814590531362006e-06, "loss": 29.6173, "step": 89130 }, { "epoch": 0.18006843974353276, "grad_norm": 845.773681640625, "learning_rate": 9.814496343810109e-06, "loss": 28.2154, "step": 89140 }, { "epoch": 0.18008864037621658, "grad_norm": 837.7940673828125, "learning_rate": 9.814402132792939e-06, "loss": 22.2783, "step": 89150 }, { "epoch": 0.1801088410089004, "grad_norm": 536.5330200195312, "learning_rate": 9.814307898310957e-06, "loss": 49.601, "step": 89160 }, { "epoch": 0.18012904164158422, "grad_norm": 122.6075668334961, "learning_rate": 9.814213640364623e-06, "loss": 24.3462, "step": 89170 }, { "epoch": 0.18014924227426804, "grad_norm": 360.3377380371094, "learning_rate": 9.814119358954394e-06, "loss": 20.5534, "step": 89180 }, { "epoch": 0.18016944290695183, "grad_norm": 262.70587158203125, "learning_rate": 9.81402505408073e-06, "loss": 26.6822, "step": 89190 }, { "epoch": 0.18018964353963565, "grad_norm": 150.52392578125, "learning_rate": 9.813930725744095e-06, "loss": 16.4466, "step": 89200 }, { "epoch": 0.18020984417231947, "grad_norm": 428.7914733886719, "learning_rate": 9.813836373944945e-06, "loss": 23.3214, "step": 89210 }, { "epoch": 0.1802300448050033, "grad_norm": 731.9844360351562, "learning_rate": 9.813741998683738e-06, "loss": 33.7192, "step": 89220 }, { "epoch": 0.1802502454376871, "grad_norm": 50.36422348022461, "learning_rate": 9.813647599960938e-06, "loss": 42.3862, "step": 89230 }, { "epoch": 0.18027044607037093, "grad_norm": 710.2027587890625, "learning_rate": 9.813553177777005e-06, "loss": 28.604, "step": 89240 }, { "epoch": 0.18029064670305475, "grad_norm": 1012.0244750976562, "learning_rate": 9.813458732132395e-06, "loss": 32.8651, "step": 89250 }, { "epoch": 0.18031084733573854, "grad_norm": 124.10336303710938, "learning_rate": 9.813364263027572e-06, "loss": 34.6443, "step": 89260 }, { "epoch": 0.18033104796842236, "grad_norm": 935.6665649414062, "learning_rate": 9.813269770462995e-06, "loss": 32.9984, "step": 89270 }, { "epoch": 0.18035124860110618, "grad_norm": 374.593017578125, "learning_rate": 9.813175254439125e-06, "loss": 17.0858, "step": 89280 }, { "epoch": 0.18037144923379, "grad_norm": 374.7408752441406, "learning_rate": 9.813080714956422e-06, "loss": 15.3942, "step": 89290 }, { "epoch": 0.18039164986647382, "grad_norm": 636.0196533203125, "learning_rate": 9.812986152015349e-06, "loss": 22.3878, "step": 89300 }, { "epoch": 0.18041185049915764, "grad_norm": 249.57730102539062, "learning_rate": 9.812891565616363e-06, "loss": 34.4672, "step": 89310 }, { "epoch": 0.18043205113184144, "grad_norm": 470.1400146484375, "learning_rate": 9.812796955759929e-06, "loss": 39.6037, "step": 89320 }, { "epoch": 0.18045225176452526, "grad_norm": 389.9975891113281, "learning_rate": 9.812702322446506e-06, "loss": 16.4977, "step": 89330 }, { "epoch": 0.18047245239720908, "grad_norm": 350.7633972167969, "learning_rate": 9.812607665676555e-06, "loss": 10.3507, "step": 89340 }, { "epoch": 0.1804926530298929, "grad_norm": 139.79766845703125, "learning_rate": 9.812512985450539e-06, "loss": 15.6512, "step": 89350 }, { "epoch": 0.18051285366257672, "grad_norm": 127.40026092529297, "learning_rate": 9.812418281768919e-06, "loss": 26.3492, "step": 89360 }, { "epoch": 0.18053305429526054, "grad_norm": 243.89901733398438, "learning_rate": 9.812323554632153e-06, "loss": 18.9652, "step": 89370 }, { "epoch": 0.18055325492794436, "grad_norm": 1298.3121337890625, "learning_rate": 9.812228804040708e-06, "loss": 34.2148, "step": 89380 }, { "epoch": 0.18057345556062815, "grad_norm": 759.9421997070312, "learning_rate": 9.812134029995043e-06, "loss": 18.2996, "step": 89390 }, { "epoch": 0.18059365619331197, "grad_norm": 328.4620666503906, "learning_rate": 9.81203923249562e-06, "loss": 30.062, "step": 89400 }, { "epoch": 0.1806138568259958, "grad_norm": 777.0197143554688, "learning_rate": 9.811944411542903e-06, "loss": 24.0067, "step": 89410 }, { "epoch": 0.1806340574586796, "grad_norm": 524.9449462890625, "learning_rate": 9.811849567137351e-06, "loss": 22.9762, "step": 89420 }, { "epoch": 0.18065425809136343, "grad_norm": 234.361572265625, "learning_rate": 9.811754699279428e-06, "loss": 35.5237, "step": 89430 }, { "epoch": 0.18067445872404725, "grad_norm": 306.3564453125, "learning_rate": 9.811659807969596e-06, "loss": 29.2511, "step": 89440 }, { "epoch": 0.18069465935673104, "grad_norm": 303.1343994140625, "learning_rate": 9.811564893208317e-06, "loss": 29.7451, "step": 89450 }, { "epoch": 0.18071485998941486, "grad_norm": 231.79466247558594, "learning_rate": 9.811469954996056e-06, "loss": 13.9908, "step": 89460 }, { "epoch": 0.18073506062209868, "grad_norm": 29.039527893066406, "learning_rate": 9.811374993333274e-06, "loss": 19.4468, "step": 89470 }, { "epoch": 0.1807552612547825, "grad_norm": 379.2695007324219, "learning_rate": 9.811280008220432e-06, "loss": 30.9259, "step": 89480 }, { "epoch": 0.18077546188746632, "grad_norm": 372.0519104003906, "learning_rate": 9.811184999657996e-06, "loss": 14.7546, "step": 89490 }, { "epoch": 0.18079566252015014, "grad_norm": 386.9299621582031, "learning_rate": 9.811089967646427e-06, "loss": 23.1313, "step": 89500 }, { "epoch": 0.18081586315283393, "grad_norm": 1153.033935546875, "learning_rate": 9.81099491218619e-06, "loss": 19.5238, "step": 89510 }, { "epoch": 0.18083606378551775, "grad_norm": 239.7135772705078, "learning_rate": 9.810899833277747e-06, "loss": 28.7153, "step": 89520 }, { "epoch": 0.18085626441820157, "grad_norm": 295.32598876953125, "learning_rate": 9.810804730921561e-06, "loss": 37.923, "step": 89530 }, { "epoch": 0.1808764650508854, "grad_norm": 395.2212219238281, "learning_rate": 9.810709605118098e-06, "loss": 27.4408, "step": 89540 }, { "epoch": 0.1808966656835692, "grad_norm": 380.6080017089844, "learning_rate": 9.810614455867818e-06, "loss": 12.1637, "step": 89550 }, { "epoch": 0.18091686631625303, "grad_norm": 397.90911865234375, "learning_rate": 9.810519283171189e-06, "loss": 12.555, "step": 89560 }, { "epoch": 0.18093706694893685, "grad_norm": 233.5554656982422, "learning_rate": 9.810424087028669e-06, "loss": 22.0574, "step": 89570 }, { "epoch": 0.18095726758162065, "grad_norm": 426.50872802734375, "learning_rate": 9.810328867440729e-06, "loss": 26.7537, "step": 89580 }, { "epoch": 0.18097746821430447, "grad_norm": 319.06915283203125, "learning_rate": 9.810233624407827e-06, "loss": 35.952, "step": 89590 }, { "epoch": 0.18099766884698829, "grad_norm": 293.732666015625, "learning_rate": 9.81013835793043e-06, "loss": 25.265, "step": 89600 }, { "epoch": 0.1810178694796721, "grad_norm": 351.9346618652344, "learning_rate": 9.810043068009002e-06, "loss": 23.6208, "step": 89610 }, { "epoch": 0.18103807011235593, "grad_norm": 253.8270263671875, "learning_rate": 9.809947754644009e-06, "loss": 25.0661, "step": 89620 }, { "epoch": 0.18105827074503975, "grad_norm": 482.05511474609375, "learning_rate": 9.809852417835913e-06, "loss": 18.5693, "step": 89630 }, { "epoch": 0.18107847137772354, "grad_norm": 305.4869079589844, "learning_rate": 9.80975705758518e-06, "loss": 13.1552, "step": 89640 }, { "epoch": 0.18109867201040736, "grad_norm": 352.21221923828125, "learning_rate": 9.809661673892274e-06, "loss": 37.0356, "step": 89650 }, { "epoch": 0.18111887264309118, "grad_norm": 162.22671508789062, "learning_rate": 9.80956626675766e-06, "loss": 12.715, "step": 89660 }, { "epoch": 0.181139073275775, "grad_norm": 542.3357543945312, "learning_rate": 9.809470836181804e-06, "loss": 16.3945, "step": 89670 }, { "epoch": 0.18115927390845882, "grad_norm": 395.6851501464844, "learning_rate": 9.80937538216517e-06, "loss": 19.242, "step": 89680 }, { "epoch": 0.18117947454114264, "grad_norm": 454.8094482421875, "learning_rate": 9.809279904708224e-06, "loss": 31.3309, "step": 89690 }, { "epoch": 0.18119967517382646, "grad_norm": 687.8006591796875, "learning_rate": 9.809184403811432e-06, "loss": 24.0704, "step": 89700 }, { "epoch": 0.18121987580651025, "grad_norm": 309.10076904296875, "learning_rate": 9.809088879475257e-06, "loss": 29.3016, "step": 89710 }, { "epoch": 0.18124007643919407, "grad_norm": 542.1755981445312, "learning_rate": 9.808993331700167e-06, "loss": 18.7698, "step": 89720 }, { "epoch": 0.1812602770718779, "grad_norm": 402.2753601074219, "learning_rate": 9.808897760486626e-06, "loss": 33.7752, "step": 89730 }, { "epoch": 0.1812804777045617, "grad_norm": 206.4251708984375, "learning_rate": 9.808802165835101e-06, "loss": 21.0911, "step": 89740 }, { "epoch": 0.18130067833724553, "grad_norm": 654.4439697265625, "learning_rate": 9.808706547746057e-06, "loss": 28.3269, "step": 89750 }, { "epoch": 0.18132087896992935, "grad_norm": 571.3541870117188, "learning_rate": 9.808610906219963e-06, "loss": 19.0851, "step": 89760 }, { "epoch": 0.18134107960261314, "grad_norm": 350.6704406738281, "learning_rate": 9.80851524125728e-06, "loss": 31.8791, "step": 89770 }, { "epoch": 0.18136128023529696, "grad_norm": 519.6353149414062, "learning_rate": 9.808419552858477e-06, "loss": 19.224, "step": 89780 }, { "epoch": 0.18138148086798078, "grad_norm": 337.0629577636719, "learning_rate": 9.808323841024021e-06, "loss": 34.6968, "step": 89790 }, { "epoch": 0.1814016815006646, "grad_norm": 672.872314453125, "learning_rate": 9.808228105754378e-06, "loss": 16.8094, "step": 89800 }, { "epoch": 0.18142188213334842, "grad_norm": 459.8106994628906, "learning_rate": 9.808132347050013e-06, "loss": 10.5876, "step": 89810 }, { "epoch": 0.18144208276603224, "grad_norm": 525.3077392578125, "learning_rate": 9.808036564911396e-06, "loss": 16.7304, "step": 89820 }, { "epoch": 0.18146228339871603, "grad_norm": 145.01712036132812, "learning_rate": 9.80794075933899e-06, "loss": 16.1975, "step": 89830 }, { "epoch": 0.18148248403139985, "grad_norm": 625.13720703125, "learning_rate": 9.807844930333266e-06, "loss": 30.531, "step": 89840 }, { "epoch": 0.18150268466408367, "grad_norm": 589.8753662109375, "learning_rate": 9.807749077894686e-06, "loss": 19.9581, "step": 89850 }, { "epoch": 0.1815228852967675, "grad_norm": 324.2210998535156, "learning_rate": 9.807653202023723e-06, "loss": 22.9263, "step": 89860 }, { "epoch": 0.18154308592945131, "grad_norm": 408.2608642578125, "learning_rate": 9.80755730272084e-06, "loss": 29.5001, "step": 89870 }, { "epoch": 0.18156328656213513, "grad_norm": 316.8686828613281, "learning_rate": 9.807461379986506e-06, "loss": 17.0876, "step": 89880 }, { "epoch": 0.18158348719481895, "grad_norm": 310.5201110839844, "learning_rate": 9.807365433821188e-06, "loss": 10.0434, "step": 89890 }, { "epoch": 0.18160368782750275, "grad_norm": 289.92010498046875, "learning_rate": 9.807269464225355e-06, "loss": 12.7067, "step": 89900 }, { "epoch": 0.18162388846018657, "grad_norm": 1168.183837890625, "learning_rate": 9.807173471199474e-06, "loss": 30.2959, "step": 89910 }, { "epoch": 0.1816440890928704, "grad_norm": 705.6151123046875, "learning_rate": 9.80707745474401e-06, "loss": 18.0212, "step": 89920 }, { "epoch": 0.1816642897255542, "grad_norm": 508.00830078125, "learning_rate": 9.806981414859435e-06, "loss": 17.4614, "step": 89930 }, { "epoch": 0.18168449035823803, "grad_norm": 589.3351440429688, "learning_rate": 9.806885351546215e-06, "loss": 25.1383, "step": 89940 }, { "epoch": 0.18170469099092185, "grad_norm": 211.0186004638672, "learning_rate": 9.806789264804821e-06, "loss": 17.8697, "step": 89950 }, { "epoch": 0.18172489162360564, "grad_norm": 612.8939819335938, "learning_rate": 9.806693154635719e-06, "loss": 32.0525, "step": 89960 }, { "epoch": 0.18174509225628946, "grad_norm": 294.01751708984375, "learning_rate": 9.806597021039374e-06, "loss": 17.3785, "step": 89970 }, { "epoch": 0.18176529288897328, "grad_norm": 539.4071655273438, "learning_rate": 9.806500864016261e-06, "loss": 19.5875, "step": 89980 }, { "epoch": 0.1817854935216571, "grad_norm": 535.8582153320312, "learning_rate": 9.806404683566845e-06, "loss": 22.0834, "step": 89990 }, { "epoch": 0.18180569415434092, "grad_norm": 371.3377990722656, "learning_rate": 9.806308479691595e-06, "loss": 30.617, "step": 90000 }, { "epoch": 0.18182589478702474, "grad_norm": 166.52806091308594, "learning_rate": 9.80621225239098e-06, "loss": 23.92, "step": 90010 }, { "epoch": 0.18184609541970856, "grad_norm": 512.3137817382812, "learning_rate": 9.806116001665471e-06, "loss": 31.545, "step": 90020 }, { "epoch": 0.18186629605239235, "grad_norm": 316.3475036621094, "learning_rate": 9.806019727515534e-06, "loss": 31.4052, "step": 90030 }, { "epoch": 0.18188649668507617, "grad_norm": 916.9949340820312, "learning_rate": 9.805923429941642e-06, "loss": 33.257, "step": 90040 }, { "epoch": 0.18190669731776, "grad_norm": 545.9802856445312, "learning_rate": 9.80582710894426e-06, "loss": 36.0621, "step": 90050 }, { "epoch": 0.1819268979504438, "grad_norm": 194.6088409423828, "learning_rate": 9.805730764523861e-06, "loss": 19.7219, "step": 90060 }, { "epoch": 0.18194709858312763, "grad_norm": 461.10498046875, "learning_rate": 9.805634396680912e-06, "loss": 17.4369, "step": 90070 }, { "epoch": 0.18196729921581145, "grad_norm": 412.11981201171875, "learning_rate": 9.805538005415885e-06, "loss": 20.6825, "step": 90080 }, { "epoch": 0.18198749984849524, "grad_norm": 813.4285278320312, "learning_rate": 9.805441590729246e-06, "loss": 28.1719, "step": 90090 }, { "epoch": 0.18200770048117906, "grad_norm": 612.4114990234375, "learning_rate": 9.80534515262147e-06, "loss": 26.6351, "step": 90100 }, { "epoch": 0.18202790111386288, "grad_norm": 145.53575134277344, "learning_rate": 9.805248691093023e-06, "loss": 17.3572, "step": 90110 }, { "epoch": 0.1820481017465467, "grad_norm": 852.3433837890625, "learning_rate": 9.805152206144378e-06, "loss": 40.7627, "step": 90120 }, { "epoch": 0.18206830237923052, "grad_norm": 646.241943359375, "learning_rate": 9.805055697776003e-06, "loss": 32.0512, "step": 90130 }, { "epoch": 0.18208850301191434, "grad_norm": 575.9727172851562, "learning_rate": 9.80495916598837e-06, "loss": 31.3801, "step": 90140 }, { "epoch": 0.18210870364459814, "grad_norm": 536.2017211914062, "learning_rate": 9.804862610781949e-06, "loss": 19.398, "step": 90150 }, { "epoch": 0.18212890427728196, "grad_norm": 325.2167663574219, "learning_rate": 9.80476603215721e-06, "loss": 27.9279, "step": 90160 }, { "epoch": 0.18214910490996578, "grad_norm": 813.2481079101562, "learning_rate": 9.804669430114625e-06, "loss": 29.9856, "step": 90170 }, { "epoch": 0.1821693055426496, "grad_norm": 301.8158874511719, "learning_rate": 9.804572804654662e-06, "loss": 16.7911, "step": 90180 }, { "epoch": 0.18218950617533342, "grad_norm": 705.236572265625, "learning_rate": 9.804476155777796e-06, "loss": 33.4667, "step": 90190 }, { "epoch": 0.18220970680801724, "grad_norm": 207.18588256835938, "learning_rate": 9.804379483484493e-06, "loss": 18.2618, "step": 90200 }, { "epoch": 0.18222990744070106, "grad_norm": 989.70849609375, "learning_rate": 9.80428278777523e-06, "loss": 25.4147, "step": 90210 }, { "epoch": 0.18225010807338485, "grad_norm": 1044.6300048828125, "learning_rate": 9.804186068650474e-06, "loss": 23.8518, "step": 90220 }, { "epoch": 0.18227030870606867, "grad_norm": 677.1602783203125, "learning_rate": 9.804089326110697e-06, "loss": 40.6791, "step": 90230 }, { "epoch": 0.1822905093387525, "grad_norm": 532.63623046875, "learning_rate": 9.803992560156372e-06, "loss": 30.1999, "step": 90240 }, { "epoch": 0.1823107099714363, "grad_norm": 689.9764404296875, "learning_rate": 9.803895770787972e-06, "loss": 24.7726, "step": 90250 }, { "epoch": 0.18233091060412013, "grad_norm": 449.429443359375, "learning_rate": 9.803798958005965e-06, "loss": 13.6979, "step": 90260 }, { "epoch": 0.18235111123680395, "grad_norm": 417.9967956542969, "learning_rate": 9.803702121810823e-06, "loss": 27.424, "step": 90270 }, { "epoch": 0.18237131186948774, "grad_norm": 115.34632110595703, "learning_rate": 9.803605262203022e-06, "loss": 11.9526, "step": 90280 }, { "epoch": 0.18239151250217156, "grad_norm": 300.5381774902344, "learning_rate": 9.80350837918303e-06, "loss": 24.2963, "step": 90290 }, { "epoch": 0.18241171313485538, "grad_norm": 199.52664184570312, "learning_rate": 9.803411472751321e-06, "loss": 23.4315, "step": 90300 }, { "epoch": 0.1824319137675392, "grad_norm": 760.7015991210938, "learning_rate": 9.803314542908368e-06, "loss": 19.409, "step": 90310 }, { "epoch": 0.18245211440022302, "grad_norm": 476.14093017578125, "learning_rate": 9.803217589654642e-06, "loss": 39.1428, "step": 90320 }, { "epoch": 0.18247231503290684, "grad_norm": 352.9468078613281, "learning_rate": 9.803120612990616e-06, "loss": 16.5901, "step": 90330 }, { "epoch": 0.18249251566559066, "grad_norm": 260.7500915527344, "learning_rate": 9.803023612916763e-06, "loss": 15.6792, "step": 90340 }, { "epoch": 0.18251271629827445, "grad_norm": 676.7280883789062, "learning_rate": 9.802926589433553e-06, "loss": 22.615, "step": 90350 }, { "epoch": 0.18253291693095827, "grad_norm": 551.415771484375, "learning_rate": 9.802829542541463e-06, "loss": 20.0372, "step": 90360 }, { "epoch": 0.1825531175636421, "grad_norm": 414.0419006347656, "learning_rate": 9.802732472240966e-06, "loss": 24.8075, "step": 90370 }, { "epoch": 0.1825733181963259, "grad_norm": 239.91038513183594, "learning_rate": 9.802635378532531e-06, "loss": 22.1132, "step": 90380 }, { "epoch": 0.18259351882900973, "grad_norm": 322.6324462890625, "learning_rate": 9.802538261416635e-06, "loss": 46.8077, "step": 90390 }, { "epoch": 0.18261371946169355, "grad_norm": 348.8457946777344, "learning_rate": 9.80244112089375e-06, "loss": 21.1164, "step": 90400 }, { "epoch": 0.18263392009437734, "grad_norm": 693.45703125, "learning_rate": 9.802343956964348e-06, "loss": 23.1028, "step": 90410 }, { "epoch": 0.18265412072706116, "grad_norm": 544.8148193359375, "learning_rate": 9.802246769628906e-06, "loss": 22.4803, "step": 90420 }, { "epoch": 0.18267432135974498, "grad_norm": 928.405029296875, "learning_rate": 9.802149558887895e-06, "loss": 11.8932, "step": 90430 }, { "epoch": 0.1826945219924288, "grad_norm": 257.213134765625, "learning_rate": 9.802052324741789e-06, "loss": 14.1029, "step": 90440 }, { "epoch": 0.18271472262511262, "grad_norm": 680.4622802734375, "learning_rate": 9.801955067191062e-06, "loss": 27.1829, "step": 90450 }, { "epoch": 0.18273492325779644, "grad_norm": 267.8209228515625, "learning_rate": 9.80185778623619e-06, "loss": 19.5032, "step": 90460 }, { "epoch": 0.18275512389048024, "grad_norm": 437.3539123535156, "learning_rate": 9.801760481877644e-06, "loss": 24.0697, "step": 90470 }, { "epoch": 0.18277532452316406, "grad_norm": 499.927978515625, "learning_rate": 9.8016631541159e-06, "loss": 17.041, "step": 90480 }, { "epoch": 0.18279552515584788, "grad_norm": 323.8553771972656, "learning_rate": 9.801565802951432e-06, "loss": 15.5036, "step": 90490 }, { "epoch": 0.1828157257885317, "grad_norm": 264.6095886230469, "learning_rate": 9.801468428384716e-06, "loss": 18.6817, "step": 90500 }, { "epoch": 0.18283592642121552, "grad_norm": 80.06328582763672, "learning_rate": 9.801371030416224e-06, "loss": 14.7219, "step": 90510 }, { "epoch": 0.18285612705389934, "grad_norm": 997.8451538085938, "learning_rate": 9.801273609046433e-06, "loss": 24.1481, "step": 90520 }, { "epoch": 0.18287632768658316, "grad_norm": 342.7030334472656, "learning_rate": 9.801176164275816e-06, "loss": 32.648, "step": 90530 }, { "epoch": 0.18289652831926695, "grad_norm": 1081.7420654296875, "learning_rate": 9.801078696104849e-06, "loss": 46.3561, "step": 90540 }, { "epoch": 0.18291672895195077, "grad_norm": 320.661865234375, "learning_rate": 9.800981204534006e-06, "loss": 34.9884, "step": 90550 }, { "epoch": 0.1829369295846346, "grad_norm": 914.1555786132812, "learning_rate": 9.800883689563764e-06, "loss": 21.4398, "step": 90560 }, { "epoch": 0.1829571302173184, "grad_norm": 350.35943603515625, "learning_rate": 9.800786151194596e-06, "loss": 19.3436, "step": 90570 }, { "epoch": 0.18297733085000223, "grad_norm": 423.799560546875, "learning_rate": 9.800688589426978e-06, "loss": 20.0453, "step": 90580 }, { "epoch": 0.18299753148268605, "grad_norm": 466.1945495605469, "learning_rate": 9.800591004261388e-06, "loss": 13.6574, "step": 90590 }, { "epoch": 0.18301773211536984, "grad_norm": 284.81573486328125, "learning_rate": 9.8004933956983e-06, "loss": 32.2185, "step": 90600 }, { "epoch": 0.18303793274805366, "grad_norm": 342.5842590332031, "learning_rate": 9.800395763738189e-06, "loss": 11.7302, "step": 90610 }, { "epoch": 0.18305813338073748, "grad_norm": 291.6769104003906, "learning_rate": 9.80029810838153e-06, "loss": 24.1659, "step": 90620 }, { "epoch": 0.1830783340134213, "grad_norm": 502.8605651855469, "learning_rate": 9.8002004296288e-06, "loss": 16.5568, "step": 90630 }, { "epoch": 0.18309853464610512, "grad_norm": 381.07281494140625, "learning_rate": 9.800102727480476e-06, "loss": 25.4679, "step": 90640 }, { "epoch": 0.18311873527878894, "grad_norm": 352.68731689453125, "learning_rate": 9.800005001937034e-06, "loss": 16.953, "step": 90650 }, { "epoch": 0.18313893591147276, "grad_norm": 1023.3972778320312, "learning_rate": 9.79990725299895e-06, "loss": 34.6754, "step": 90660 }, { "epoch": 0.18315913654415655, "grad_norm": 210.64784240722656, "learning_rate": 9.7998094806667e-06, "loss": 21.666, "step": 90670 }, { "epoch": 0.18317933717684037, "grad_norm": 272.3280029296875, "learning_rate": 9.79971168494076e-06, "loss": 25.4315, "step": 90680 }, { "epoch": 0.1831995378095242, "grad_norm": 656.0177001953125, "learning_rate": 9.799613865821608e-06, "loss": 30.1317, "step": 90690 }, { "epoch": 0.183219738442208, "grad_norm": 432.247802734375, "learning_rate": 9.799516023309719e-06, "loss": 25.4692, "step": 90700 }, { "epoch": 0.18323993907489183, "grad_norm": 402.7593994140625, "learning_rate": 9.799418157405571e-06, "loss": 33.9424, "step": 90710 }, { "epoch": 0.18326013970757565, "grad_norm": 1191.7305908203125, "learning_rate": 9.799320268109644e-06, "loss": 46.6512, "step": 90720 }, { "epoch": 0.18328034034025945, "grad_norm": 299.83856201171875, "learning_rate": 9.799222355422409e-06, "loss": 15.2029, "step": 90730 }, { "epoch": 0.18330054097294327, "grad_norm": 253.31137084960938, "learning_rate": 9.799124419344348e-06, "loss": 21.3899, "step": 90740 }, { "epoch": 0.18332074160562709, "grad_norm": 243.34938049316406, "learning_rate": 9.799026459875935e-06, "loss": 16.0306, "step": 90750 }, { "epoch": 0.1833409422383109, "grad_norm": 365.17510986328125, "learning_rate": 9.798928477017651e-06, "loss": 25.4496, "step": 90760 }, { "epoch": 0.18336114287099473, "grad_norm": 239.15045166015625, "learning_rate": 9.79883047076997e-06, "loss": 25.564, "step": 90770 }, { "epoch": 0.18338134350367855, "grad_norm": 797.671630859375, "learning_rate": 9.798732441133372e-06, "loss": 40.2145, "step": 90780 }, { "epoch": 0.18340154413636234, "grad_norm": 653.4474487304688, "learning_rate": 9.798634388108334e-06, "loss": 32.0455, "step": 90790 }, { "epoch": 0.18342174476904616, "grad_norm": 1001.5005493164062, "learning_rate": 9.798536311695334e-06, "loss": 32.0787, "step": 90800 }, { "epoch": 0.18344194540172998, "grad_norm": 325.3088073730469, "learning_rate": 9.79843821189485e-06, "loss": 26.1869, "step": 90810 }, { "epoch": 0.1834621460344138, "grad_norm": 540.4581298828125, "learning_rate": 9.79834008870736e-06, "loss": 18.94, "step": 90820 }, { "epoch": 0.18348234666709762, "grad_norm": 318.7337341308594, "learning_rate": 9.798241942133344e-06, "loss": 32.0472, "step": 90830 }, { "epoch": 0.18350254729978144, "grad_norm": 279.1095886230469, "learning_rate": 9.798143772173276e-06, "loss": 25.8265, "step": 90840 }, { "epoch": 0.18352274793246526, "grad_norm": 270.6580505371094, "learning_rate": 9.79804557882764e-06, "loss": 18.2445, "step": 90850 }, { "epoch": 0.18354294856514905, "grad_norm": 370.2752685546875, "learning_rate": 9.797947362096909e-06, "loss": 25.2333, "step": 90860 }, { "epoch": 0.18356314919783287, "grad_norm": 352.0126037597656, "learning_rate": 9.797849121981566e-06, "loss": 16.6814, "step": 90870 }, { "epoch": 0.1835833498305167, "grad_norm": 201.3636932373047, "learning_rate": 9.797750858482088e-06, "loss": 17.3969, "step": 90880 }, { "epoch": 0.1836035504632005, "grad_norm": 564.9591064453125, "learning_rate": 9.797652571598954e-06, "loss": 25.868, "step": 90890 }, { "epoch": 0.18362375109588433, "grad_norm": 35.203826904296875, "learning_rate": 9.797554261332644e-06, "loss": 25.6218, "step": 90900 }, { "epoch": 0.18364395172856815, "grad_norm": 226.03759765625, "learning_rate": 9.797455927683637e-06, "loss": 28.4551, "step": 90910 }, { "epoch": 0.18366415236125194, "grad_norm": 231.2720184326172, "learning_rate": 9.79735757065241e-06, "loss": 33.2498, "step": 90920 }, { "epoch": 0.18368435299393576, "grad_norm": 515.2210693359375, "learning_rate": 9.797259190239444e-06, "loss": 27.7162, "step": 90930 }, { "epoch": 0.18370455362661958, "grad_norm": 429.1297607421875, "learning_rate": 9.797160786445218e-06, "loss": 25.1077, "step": 90940 }, { "epoch": 0.1837247542593034, "grad_norm": 789.713134765625, "learning_rate": 9.797062359270215e-06, "loss": 30.9357, "step": 90950 }, { "epoch": 0.18374495489198722, "grad_norm": 1423.0126953125, "learning_rate": 9.79696390871491e-06, "loss": 32.7171, "step": 90960 }, { "epoch": 0.18376515552467104, "grad_norm": 95.33780670166016, "learning_rate": 9.796865434779786e-06, "loss": 26.9618, "step": 90970 }, { "epoch": 0.18378535615735486, "grad_norm": 562.197021484375, "learning_rate": 9.79676693746532e-06, "loss": 26.0074, "step": 90980 }, { "epoch": 0.18380555679003865, "grad_norm": 549.0386962890625, "learning_rate": 9.796668416771996e-06, "loss": 47.5324, "step": 90990 }, { "epoch": 0.18382575742272247, "grad_norm": 720.4099731445312, "learning_rate": 9.796569872700287e-06, "loss": 20.1496, "step": 91000 }, { "epoch": 0.1838459580554063, "grad_norm": 769.2049560546875, "learning_rate": 9.796471305250683e-06, "loss": 24.3029, "step": 91010 }, { "epoch": 0.18386615868809011, "grad_norm": 400.9169616699219, "learning_rate": 9.79637271442366e-06, "loss": 19.8763, "step": 91020 }, { "epoch": 0.18388635932077393, "grad_norm": 239.54544067382812, "learning_rate": 9.796274100219693e-06, "loss": 19.1498, "step": 91030 }, { "epoch": 0.18390655995345775, "grad_norm": 306.354248046875, "learning_rate": 9.796175462639273e-06, "loss": 19.2447, "step": 91040 }, { "epoch": 0.18392676058614155, "grad_norm": 604.0792846679688, "learning_rate": 9.796076801682873e-06, "loss": 33.2674, "step": 91050 }, { "epoch": 0.18394696121882537, "grad_norm": 831.4409790039062, "learning_rate": 9.795978117350976e-06, "loss": 28.5236, "step": 91060 }, { "epoch": 0.1839671618515092, "grad_norm": 374.80401611328125, "learning_rate": 9.795879409644064e-06, "loss": 25.0916, "step": 91070 }, { "epoch": 0.183987362484193, "grad_norm": 621.6986083984375, "learning_rate": 9.795780678562618e-06, "loss": 24.8459, "step": 91080 }, { "epoch": 0.18400756311687683, "grad_norm": 171.66006469726562, "learning_rate": 9.79568192410712e-06, "loss": 15.5722, "step": 91090 }, { "epoch": 0.18402776374956065, "grad_norm": 483.9534606933594, "learning_rate": 9.795583146278047e-06, "loss": 34.7736, "step": 91100 }, { "epoch": 0.18404796438224444, "grad_norm": 390.1487731933594, "learning_rate": 9.795484345075882e-06, "loss": 48.1763, "step": 91110 }, { "epoch": 0.18406816501492826, "grad_norm": 468.73345947265625, "learning_rate": 9.795385520501113e-06, "loss": 28.4409, "step": 91120 }, { "epoch": 0.18408836564761208, "grad_norm": 434.3601379394531, "learning_rate": 9.795286672554214e-06, "loss": 20.1435, "step": 91130 }, { "epoch": 0.1841085662802959, "grad_norm": 322.37896728515625, "learning_rate": 9.795187801235668e-06, "loss": 28.5537, "step": 91140 }, { "epoch": 0.18412876691297972, "grad_norm": 804.7830200195312, "learning_rate": 9.795088906545959e-06, "loss": 37.5352, "step": 91150 }, { "epoch": 0.18414896754566354, "grad_norm": 487.69134521484375, "learning_rate": 9.794989988485571e-06, "loss": 26.9165, "step": 91160 }, { "epoch": 0.18416916817834736, "grad_norm": 270.79864501953125, "learning_rate": 9.79489104705498e-06, "loss": 23.71, "step": 91170 }, { "epoch": 0.18418936881103115, "grad_norm": 1238.611328125, "learning_rate": 9.794792082254673e-06, "loss": 23.848, "step": 91180 }, { "epoch": 0.18420956944371497, "grad_norm": 356.26678466796875, "learning_rate": 9.79469309408513e-06, "loss": 29.2174, "step": 91190 }, { "epoch": 0.1842297700763988, "grad_norm": 645.976318359375, "learning_rate": 9.794594082546835e-06, "loss": 21.5411, "step": 91200 }, { "epoch": 0.1842499707090826, "grad_norm": 25.679527282714844, "learning_rate": 9.794495047640271e-06, "loss": 21.4862, "step": 91210 }, { "epoch": 0.18427017134176643, "grad_norm": 502.16778564453125, "learning_rate": 9.79439598936592e-06, "loss": 19.6671, "step": 91220 }, { "epoch": 0.18429037197445025, "grad_norm": 235.72203063964844, "learning_rate": 9.794296907724262e-06, "loss": 27.9619, "step": 91230 }, { "epoch": 0.18431057260713404, "grad_norm": 429.992431640625, "learning_rate": 9.794197802715784e-06, "loss": 73.0211, "step": 91240 }, { "epoch": 0.18433077323981786, "grad_norm": 625.5643920898438, "learning_rate": 9.794098674340966e-06, "loss": 33.1416, "step": 91250 }, { "epoch": 0.18435097387250168, "grad_norm": 484.22747802734375, "learning_rate": 9.793999522600293e-06, "loss": 45.421, "step": 91260 }, { "epoch": 0.1843711745051855, "grad_norm": 443.10504150390625, "learning_rate": 9.793900347494248e-06, "loss": 29.7148, "step": 91270 }, { "epoch": 0.18439137513786932, "grad_norm": 676.413818359375, "learning_rate": 9.793801149023315e-06, "loss": 30.5974, "step": 91280 }, { "epoch": 0.18441157577055314, "grad_norm": 449.8853454589844, "learning_rate": 9.793701927187975e-06, "loss": 24.3086, "step": 91290 }, { "epoch": 0.18443177640323694, "grad_norm": 237.00990295410156, "learning_rate": 9.793602681988714e-06, "loss": 23.9471, "step": 91300 }, { "epoch": 0.18445197703592076, "grad_norm": 2.009406805038452, "learning_rate": 9.793503413426016e-06, "loss": 25.3265, "step": 91310 }, { "epoch": 0.18447217766860458, "grad_norm": 442.2948913574219, "learning_rate": 9.793404121500362e-06, "loss": 30.3534, "step": 91320 }, { "epoch": 0.1844923783012884, "grad_norm": 292.3894958496094, "learning_rate": 9.79330480621224e-06, "loss": 22.5941, "step": 91330 }, { "epoch": 0.18451257893397222, "grad_norm": 198.1782989501953, "learning_rate": 9.793205467562131e-06, "loss": 27.5463, "step": 91340 }, { "epoch": 0.18453277956665604, "grad_norm": 285.3832092285156, "learning_rate": 9.793106105550518e-06, "loss": 11.2666, "step": 91350 }, { "epoch": 0.18455298019933986, "grad_norm": 914.3174438476562, "learning_rate": 9.793006720177887e-06, "loss": 23.5066, "step": 91360 }, { "epoch": 0.18457318083202365, "grad_norm": 302.8216857910156, "learning_rate": 9.792907311444724e-06, "loss": 26.5094, "step": 91370 }, { "epoch": 0.18459338146470747, "grad_norm": 455.9059143066406, "learning_rate": 9.792807879351513e-06, "loss": 33.6376, "step": 91380 }, { "epoch": 0.1846135820973913, "grad_norm": 337.9946594238281, "learning_rate": 9.792708423898735e-06, "loss": 12.2292, "step": 91390 }, { "epoch": 0.1846337827300751, "grad_norm": 432.6836242675781, "learning_rate": 9.79260894508688e-06, "loss": 21.2961, "step": 91400 }, { "epoch": 0.18465398336275893, "grad_norm": 822.6380615234375, "learning_rate": 9.79250944291643e-06, "loss": 20.3763, "step": 91410 }, { "epoch": 0.18467418399544275, "grad_norm": 714.7791137695312, "learning_rate": 9.792409917387869e-06, "loss": 18.4543, "step": 91420 }, { "epoch": 0.18469438462812654, "grad_norm": 433.1051330566406, "learning_rate": 9.792310368501684e-06, "loss": 13.5443, "step": 91430 }, { "epoch": 0.18471458526081036, "grad_norm": 684.8148803710938, "learning_rate": 9.792210796258358e-06, "loss": 21.7222, "step": 91440 }, { "epoch": 0.18473478589349418, "grad_norm": 287.2908630371094, "learning_rate": 9.79211120065838e-06, "loss": 13.8875, "step": 91450 }, { "epoch": 0.184754986526178, "grad_norm": 425.7033386230469, "learning_rate": 9.792011581702234e-06, "loss": 24.8602, "step": 91460 }, { "epoch": 0.18477518715886182, "grad_norm": 231.6212158203125, "learning_rate": 9.791911939390401e-06, "loss": 11.3502, "step": 91470 }, { "epoch": 0.18479538779154564, "grad_norm": 249.83753967285156, "learning_rate": 9.791812273723374e-06, "loss": 23.6826, "step": 91480 }, { "epoch": 0.18481558842422946, "grad_norm": 499.1154479980469, "learning_rate": 9.791712584701634e-06, "loss": 30.1655, "step": 91490 }, { "epoch": 0.18483578905691325, "grad_norm": 38.84739685058594, "learning_rate": 9.791612872325667e-06, "loss": 30.1333, "step": 91500 }, { "epoch": 0.18485598968959707, "grad_norm": 517.4888305664062, "learning_rate": 9.79151313659596e-06, "loss": 29.1216, "step": 91510 }, { "epoch": 0.1848761903222809, "grad_norm": 113.28861236572266, "learning_rate": 9.791413377513001e-06, "loss": 34.6165, "step": 91520 }, { "epoch": 0.1848963909549647, "grad_norm": 360.3495788574219, "learning_rate": 9.791313595077272e-06, "loss": 19.8101, "step": 91530 }, { "epoch": 0.18491659158764853, "grad_norm": 255.13893127441406, "learning_rate": 9.791213789289264e-06, "loss": 16.1137, "step": 91540 }, { "epoch": 0.18493679222033235, "grad_norm": 652.9347534179688, "learning_rate": 9.791113960149458e-06, "loss": 15.4775, "step": 91550 }, { "epoch": 0.18495699285301614, "grad_norm": 849.7731323242188, "learning_rate": 9.791014107658348e-06, "loss": 37.3864, "step": 91560 }, { "epoch": 0.18497719348569996, "grad_norm": 361.5867614746094, "learning_rate": 9.790914231816414e-06, "loss": 13.7115, "step": 91570 }, { "epoch": 0.18499739411838378, "grad_norm": 182.37893676757812, "learning_rate": 9.790814332624144e-06, "loss": 18.676, "step": 91580 }, { "epoch": 0.1850175947510676, "grad_norm": 256.94537353515625, "learning_rate": 9.790714410082027e-06, "loss": 37.3471, "step": 91590 }, { "epoch": 0.18503779538375142, "grad_norm": 177.5472412109375, "learning_rate": 9.79061446419055e-06, "loss": 11.4554, "step": 91600 }, { "epoch": 0.18505799601643524, "grad_norm": 836.4344482421875, "learning_rate": 9.790514494950196e-06, "loss": 41.3177, "step": 91610 }, { "epoch": 0.18507819664911904, "grad_norm": 246.44635009765625, "learning_rate": 9.790414502361458e-06, "loss": 36.0105, "step": 91620 }, { "epoch": 0.18509839728180286, "grad_norm": 1101.7769775390625, "learning_rate": 9.790314486424821e-06, "loss": 25.5152, "step": 91630 }, { "epoch": 0.18511859791448668, "grad_norm": 561.969970703125, "learning_rate": 9.790214447140771e-06, "loss": 35.5113, "step": 91640 }, { "epoch": 0.1851387985471705, "grad_norm": 431.95269775390625, "learning_rate": 9.790114384509796e-06, "loss": 22.5607, "step": 91650 }, { "epoch": 0.18515899917985432, "grad_norm": 326.224609375, "learning_rate": 9.790014298532386e-06, "loss": 10.0795, "step": 91660 }, { "epoch": 0.18517919981253814, "grad_norm": 463.1079406738281, "learning_rate": 9.789914189209028e-06, "loss": 16.5434, "step": 91670 }, { "epoch": 0.18519940044522196, "grad_norm": 89.60653686523438, "learning_rate": 9.789814056540207e-06, "loss": 35.3536, "step": 91680 }, { "epoch": 0.18521960107790575, "grad_norm": 919.5565185546875, "learning_rate": 9.789713900526415e-06, "loss": 28.1427, "step": 91690 }, { "epoch": 0.18523980171058957, "grad_norm": 409.40673828125, "learning_rate": 9.789613721168138e-06, "loss": 22.8103, "step": 91700 }, { "epoch": 0.1852600023432734, "grad_norm": 670.8854370117188, "learning_rate": 9.789513518465866e-06, "loss": 30.5297, "step": 91710 }, { "epoch": 0.1852802029759572, "grad_norm": 773.198486328125, "learning_rate": 9.789413292420082e-06, "loss": 23.2327, "step": 91720 }, { "epoch": 0.18530040360864103, "grad_norm": 641.0538330078125, "learning_rate": 9.789313043031281e-06, "loss": 27.2788, "step": 91730 }, { "epoch": 0.18532060424132485, "grad_norm": 253.89642333984375, "learning_rate": 9.78921277029995e-06, "loss": 14.6494, "step": 91740 }, { "epoch": 0.18534080487400864, "grad_norm": 114.03929901123047, "learning_rate": 9.789112474226575e-06, "loss": 16.255, "step": 91750 }, { "epoch": 0.18536100550669246, "grad_norm": 450.802001953125, "learning_rate": 9.789012154811648e-06, "loss": 24.0678, "step": 91760 }, { "epoch": 0.18538120613937628, "grad_norm": 444.7924499511719, "learning_rate": 9.788911812055656e-06, "loss": 14.7521, "step": 91770 }, { "epoch": 0.1854014067720601, "grad_norm": 41.541664123535156, "learning_rate": 9.788811445959088e-06, "loss": 21.4242, "step": 91780 }, { "epoch": 0.18542160740474392, "grad_norm": 677.9859008789062, "learning_rate": 9.788711056522436e-06, "loss": 25.3031, "step": 91790 }, { "epoch": 0.18544180803742774, "grad_norm": 320.2901916503906, "learning_rate": 9.788610643746184e-06, "loss": 19.8867, "step": 91800 }, { "epoch": 0.18546200867011156, "grad_norm": 442.32427978515625, "learning_rate": 9.788510207630825e-06, "loss": 19.3791, "step": 91810 }, { "epoch": 0.18548220930279535, "grad_norm": 373.7599792480469, "learning_rate": 9.78840974817685e-06, "loss": 27.9548, "step": 91820 }, { "epoch": 0.18550240993547917, "grad_norm": 512.506591796875, "learning_rate": 9.788309265384745e-06, "loss": 25.047, "step": 91830 }, { "epoch": 0.185522610568163, "grad_norm": 629.9691772460938, "learning_rate": 9.788208759255003e-06, "loss": 23.8249, "step": 91840 }, { "epoch": 0.1855428112008468, "grad_norm": 461.40625, "learning_rate": 9.788108229788111e-06, "loss": 19.3849, "step": 91850 }, { "epoch": 0.18556301183353063, "grad_norm": 686.3657836914062, "learning_rate": 9.788007676984562e-06, "loss": 29.3324, "step": 91860 }, { "epoch": 0.18558321246621445, "grad_norm": 797.767578125, "learning_rate": 9.787907100844842e-06, "loss": 36.3265, "step": 91870 }, { "epoch": 0.18560341309889825, "grad_norm": 852.8424072265625, "learning_rate": 9.787806501369446e-06, "loss": 24.1865, "step": 91880 }, { "epoch": 0.18562361373158207, "grad_norm": 404.8200988769531, "learning_rate": 9.78770587855886e-06, "loss": 25.7485, "step": 91890 }, { "epoch": 0.18564381436426589, "grad_norm": 539.3692626953125, "learning_rate": 9.787605232413575e-06, "loss": 30.4667, "step": 91900 }, { "epoch": 0.1856640149969497, "grad_norm": 1060.843017578125, "learning_rate": 9.787504562934085e-06, "loss": 32.7073, "step": 91910 }, { "epoch": 0.18568421562963353, "grad_norm": 341.1324768066406, "learning_rate": 9.787403870120877e-06, "loss": 21.3345, "step": 91920 }, { "epoch": 0.18570441626231735, "grad_norm": 225.0143280029297, "learning_rate": 9.787303153974444e-06, "loss": 25.7457, "step": 91930 }, { "epoch": 0.18572461689500114, "grad_norm": 486.2966613769531, "learning_rate": 9.787202414495275e-06, "loss": 20.8555, "step": 91940 }, { "epoch": 0.18574481752768496, "grad_norm": 128.24383544921875, "learning_rate": 9.787101651683864e-06, "loss": 15.369, "step": 91950 }, { "epoch": 0.18576501816036878, "grad_norm": 1446.906005859375, "learning_rate": 9.787000865540698e-06, "loss": 27.5928, "step": 91960 }, { "epoch": 0.1857852187930526, "grad_norm": 408.8646545410156, "learning_rate": 9.786900056066272e-06, "loss": 15.0458, "step": 91970 }, { "epoch": 0.18580541942573642, "grad_norm": 128.8407745361328, "learning_rate": 9.786799223261076e-06, "loss": 30.458, "step": 91980 }, { "epoch": 0.18582562005842024, "grad_norm": 135.77200317382812, "learning_rate": 9.7866983671256e-06, "loss": 22.3848, "step": 91990 }, { "epoch": 0.18584582069110406, "grad_norm": 841.3544921875, "learning_rate": 9.786597487660336e-06, "loss": 20.4153, "step": 92000 }, { "epoch": 0.18586602132378785, "grad_norm": 507.96136474609375, "learning_rate": 9.786496584865778e-06, "loss": 13.7371, "step": 92010 }, { "epoch": 0.18588622195647167, "grad_norm": 196.4841766357422, "learning_rate": 9.786395658742415e-06, "loss": 17.8674, "step": 92020 }, { "epoch": 0.1859064225891555, "grad_norm": 211.14559936523438, "learning_rate": 9.786294709290741e-06, "loss": 11.653, "step": 92030 }, { "epoch": 0.1859266232218393, "grad_norm": 312.5054931640625, "learning_rate": 9.786193736511247e-06, "loss": 18.0755, "step": 92040 }, { "epoch": 0.18594682385452313, "grad_norm": 338.7178955078125, "learning_rate": 9.786092740404424e-06, "loss": 18.7011, "step": 92050 }, { "epoch": 0.18596702448720695, "grad_norm": 194.91444396972656, "learning_rate": 9.78599172097077e-06, "loss": 12.2531, "step": 92060 }, { "epoch": 0.18598722511989074, "grad_norm": 577.6099853515625, "learning_rate": 9.785890678210768e-06, "loss": 27.4933, "step": 92070 }, { "epoch": 0.18600742575257456, "grad_norm": 270.8672790527344, "learning_rate": 9.785789612124916e-06, "loss": 31.445, "step": 92080 }, { "epoch": 0.18602762638525838, "grad_norm": 643.9032592773438, "learning_rate": 9.785688522713707e-06, "loss": 20.8608, "step": 92090 }, { "epoch": 0.1860478270179422, "grad_norm": 533.7869262695312, "learning_rate": 9.785587409977632e-06, "loss": 47.6427, "step": 92100 }, { "epoch": 0.18606802765062602, "grad_norm": 524.4956665039062, "learning_rate": 9.785486273917184e-06, "loss": 23.4507, "step": 92110 }, { "epoch": 0.18608822828330984, "grad_norm": 647.6858520507812, "learning_rate": 9.785385114532858e-06, "loss": 25.3778, "step": 92120 }, { "epoch": 0.18610842891599366, "grad_norm": 497.45330810546875, "learning_rate": 9.785283931825143e-06, "loss": 22.2115, "step": 92130 }, { "epoch": 0.18612862954867745, "grad_norm": 124.12263488769531, "learning_rate": 9.785182725794535e-06, "loss": 51.5499, "step": 92140 }, { "epoch": 0.18614883018136127, "grad_norm": 167.64231872558594, "learning_rate": 9.785081496441528e-06, "loss": 31.4775, "step": 92150 }, { "epoch": 0.1861690308140451, "grad_norm": 205.71829223632812, "learning_rate": 9.784980243766613e-06, "loss": 15.2482, "step": 92160 }, { "epoch": 0.18618923144672891, "grad_norm": 320.18206787109375, "learning_rate": 9.784878967770286e-06, "loss": 23.7468, "step": 92170 }, { "epoch": 0.18620943207941273, "grad_norm": 1295.7110595703125, "learning_rate": 9.784777668453039e-06, "loss": 35.8464, "step": 92180 }, { "epoch": 0.18622963271209655, "grad_norm": 475.7550964355469, "learning_rate": 9.784676345815364e-06, "loss": 29.358, "step": 92190 }, { "epoch": 0.18624983334478035, "grad_norm": 651.8758544921875, "learning_rate": 9.784574999857757e-06, "loss": 18.2877, "step": 92200 }, { "epoch": 0.18627003397746417, "grad_norm": 489.3287353515625, "learning_rate": 9.784473630580713e-06, "loss": 38.393, "step": 92210 }, { "epoch": 0.186290234610148, "grad_norm": 632.679931640625, "learning_rate": 9.784372237984726e-06, "loss": 29.1181, "step": 92220 }, { "epoch": 0.1863104352428318, "grad_norm": 826.5474243164062, "learning_rate": 9.784270822070288e-06, "loss": 27.4359, "step": 92230 }, { "epoch": 0.18633063587551563, "grad_norm": 282.6408386230469, "learning_rate": 9.784169382837893e-06, "loss": 34.9363, "step": 92240 }, { "epoch": 0.18635083650819945, "grad_norm": 435.7392883300781, "learning_rate": 9.78406792028804e-06, "loss": 15.9484, "step": 92250 }, { "epoch": 0.18637103714088324, "grad_norm": 780.05419921875, "learning_rate": 9.783966434421215e-06, "loss": 19.4334, "step": 92260 }, { "epoch": 0.18639123777356706, "grad_norm": 383.31512451171875, "learning_rate": 9.783864925237922e-06, "loss": 21.3674, "step": 92270 }, { "epoch": 0.18641143840625088, "grad_norm": 221.4168701171875, "learning_rate": 9.78376339273865e-06, "loss": 23.3482, "step": 92280 }, { "epoch": 0.1864316390389347, "grad_norm": 748.03125, "learning_rate": 9.783661836923894e-06, "loss": 29.0127, "step": 92290 }, { "epoch": 0.18645183967161852, "grad_norm": 366.36187744140625, "learning_rate": 9.783560257794153e-06, "loss": 16.3667, "step": 92300 }, { "epoch": 0.18647204030430234, "grad_norm": 933.26318359375, "learning_rate": 9.783458655349919e-06, "loss": 17.0001, "step": 92310 }, { "epoch": 0.18649224093698616, "grad_norm": 792.078369140625, "learning_rate": 9.783357029591686e-06, "loss": 39.9801, "step": 92320 }, { "epoch": 0.18651244156966995, "grad_norm": 556.2901611328125, "learning_rate": 9.783255380519953e-06, "loss": 39.2964, "step": 92330 }, { "epoch": 0.18653264220235377, "grad_norm": 847.1810913085938, "learning_rate": 9.783153708135214e-06, "loss": 37.5978, "step": 92340 }, { "epoch": 0.1865528428350376, "grad_norm": 396.2494812011719, "learning_rate": 9.783052012437962e-06, "loss": 22.5996, "step": 92350 }, { "epoch": 0.1865730434677214, "grad_norm": 538.60400390625, "learning_rate": 9.782950293428695e-06, "loss": 18.3907, "step": 92360 }, { "epoch": 0.18659324410040523, "grad_norm": 394.4925842285156, "learning_rate": 9.782848551107908e-06, "loss": 16.942, "step": 92370 }, { "epoch": 0.18661344473308905, "grad_norm": 597.7157592773438, "learning_rate": 9.782746785476098e-06, "loss": 20.4589, "step": 92380 }, { "epoch": 0.18663364536577284, "grad_norm": 847.0343017578125, "learning_rate": 9.78264499653376e-06, "loss": 38.7538, "step": 92390 }, { "epoch": 0.18665384599845666, "grad_norm": 930.4030151367188, "learning_rate": 9.78254318428139e-06, "loss": 24.3005, "step": 92400 }, { "epoch": 0.18667404663114048, "grad_norm": 1564.1207275390625, "learning_rate": 9.782441348719485e-06, "loss": 54.1396, "step": 92410 }, { "epoch": 0.1866942472638243, "grad_norm": 529.907470703125, "learning_rate": 9.782339489848541e-06, "loss": 40.0633, "step": 92420 }, { "epoch": 0.18671444789650812, "grad_norm": 451.3714294433594, "learning_rate": 9.782237607669053e-06, "loss": 22.0001, "step": 92430 }, { "epoch": 0.18673464852919194, "grad_norm": 470.5815124511719, "learning_rate": 9.782135702181521e-06, "loss": 14.8972, "step": 92440 }, { "epoch": 0.18675484916187576, "grad_norm": 280.4551086425781, "learning_rate": 9.782033773386439e-06, "loss": 22.6161, "step": 92450 }, { "epoch": 0.18677504979455956, "grad_norm": 862.764404296875, "learning_rate": 9.781931821284305e-06, "loss": 26.2604, "step": 92460 }, { "epoch": 0.18679525042724338, "grad_norm": 690.3392944335938, "learning_rate": 9.781829845875613e-06, "loss": 44.8535, "step": 92470 }, { "epoch": 0.1868154510599272, "grad_norm": 308.93902587890625, "learning_rate": 9.781727847160865e-06, "loss": 19.226, "step": 92480 }, { "epoch": 0.18683565169261102, "grad_norm": 1005.1414794921875, "learning_rate": 9.781625825140552e-06, "loss": 34.8473, "step": 92490 }, { "epoch": 0.18685585232529484, "grad_norm": 709.4344482421875, "learning_rate": 9.781523779815178e-06, "loss": 18.6231, "step": 92500 }, { "epoch": 0.18687605295797866, "grad_norm": 551.372802734375, "learning_rate": 9.781421711185236e-06, "loss": 17.6513, "step": 92510 }, { "epoch": 0.18689625359066245, "grad_norm": 771.9329223632812, "learning_rate": 9.781319619251223e-06, "loss": 49.4025, "step": 92520 }, { "epoch": 0.18691645422334627, "grad_norm": 474.9455871582031, "learning_rate": 9.78121750401364e-06, "loss": 20.0667, "step": 92530 }, { "epoch": 0.1869366548560301, "grad_norm": 354.9248962402344, "learning_rate": 9.781115365472983e-06, "loss": 29.108, "step": 92540 }, { "epoch": 0.1869568554887139, "grad_norm": 910.9464111328125, "learning_rate": 9.781013203629748e-06, "loss": 17.285, "step": 92550 }, { "epoch": 0.18697705612139773, "grad_norm": 612.3464965820312, "learning_rate": 9.780911018484436e-06, "loss": 23.8168, "step": 92560 }, { "epoch": 0.18699725675408155, "grad_norm": 194.9729461669922, "learning_rate": 9.780808810037543e-06, "loss": 19.419, "step": 92570 }, { "epoch": 0.18701745738676534, "grad_norm": 665.943359375, "learning_rate": 9.780706578289567e-06, "loss": 29.7412, "step": 92580 }, { "epoch": 0.18703765801944916, "grad_norm": 522.5048217773438, "learning_rate": 9.780604323241007e-06, "loss": 24.8991, "step": 92590 }, { "epoch": 0.18705785865213298, "grad_norm": 104.87069702148438, "learning_rate": 9.780502044892363e-06, "loss": 12.9761, "step": 92600 }, { "epoch": 0.1870780592848168, "grad_norm": 1118.92822265625, "learning_rate": 9.78039974324413e-06, "loss": 30.0879, "step": 92610 }, { "epoch": 0.18709825991750062, "grad_norm": 348.06439208984375, "learning_rate": 9.78029741829681e-06, "loss": 26.8473, "step": 92620 }, { "epoch": 0.18711846055018444, "grad_norm": 249.37655639648438, "learning_rate": 9.780195070050898e-06, "loss": 14.2755, "step": 92630 }, { "epoch": 0.18713866118286826, "grad_norm": 492.69427490234375, "learning_rate": 9.780092698506897e-06, "loss": 26.6387, "step": 92640 }, { "epoch": 0.18715886181555205, "grad_norm": 268.4568176269531, "learning_rate": 9.779990303665303e-06, "loss": 13.0258, "step": 92650 }, { "epoch": 0.18717906244823587, "grad_norm": 641.7935791015625, "learning_rate": 9.779887885526616e-06, "loss": 17.125, "step": 92660 }, { "epoch": 0.1871992630809197, "grad_norm": 342.4461975097656, "learning_rate": 9.779785444091336e-06, "loss": 12.7438, "step": 92670 }, { "epoch": 0.1872194637136035, "grad_norm": 17.85383415222168, "learning_rate": 9.779682979359961e-06, "loss": 21.3807, "step": 92680 }, { "epoch": 0.18723966434628733, "grad_norm": 701.517578125, "learning_rate": 9.77958049133299e-06, "loss": 26.6243, "step": 92690 }, { "epoch": 0.18725986497897115, "grad_norm": 257.793212890625, "learning_rate": 9.779477980010924e-06, "loss": 37.1666, "step": 92700 }, { "epoch": 0.18728006561165494, "grad_norm": 485.3106689453125, "learning_rate": 9.779375445394262e-06, "loss": 20.7584, "step": 92710 }, { "epoch": 0.18730026624433876, "grad_norm": 204.590087890625, "learning_rate": 9.779272887483503e-06, "loss": 28.2374, "step": 92720 }, { "epoch": 0.18732046687702258, "grad_norm": 116.32440185546875, "learning_rate": 9.77917030627915e-06, "loss": 29.7355, "step": 92730 }, { "epoch": 0.1873406675097064, "grad_norm": 208.10128784179688, "learning_rate": 9.779067701781698e-06, "loss": 26.3415, "step": 92740 }, { "epoch": 0.18736086814239022, "grad_norm": 862.18505859375, "learning_rate": 9.778965073991652e-06, "loss": 39.0226, "step": 92750 }, { "epoch": 0.18738106877507404, "grad_norm": 312.6378479003906, "learning_rate": 9.778862422909507e-06, "loss": 19.8167, "step": 92760 }, { "epoch": 0.18740126940775786, "grad_norm": 520.6155395507812, "learning_rate": 9.778759748535768e-06, "loss": 27.1971, "step": 92770 }, { "epoch": 0.18742147004044166, "grad_norm": 31.449466705322266, "learning_rate": 9.778657050870934e-06, "loss": 22.8948, "step": 92780 }, { "epoch": 0.18744167067312548, "grad_norm": 405.7028503417969, "learning_rate": 9.778554329915503e-06, "loss": 32.0095, "step": 92790 }, { "epoch": 0.1874618713058093, "grad_norm": 389.78369140625, "learning_rate": 9.778451585669982e-06, "loss": 12.5171, "step": 92800 }, { "epoch": 0.18748207193849312, "grad_norm": 260.9162902832031, "learning_rate": 9.778348818134864e-06, "loss": 32.6285, "step": 92810 }, { "epoch": 0.18750227257117694, "grad_norm": 938.2512817382812, "learning_rate": 9.778246027310654e-06, "loss": 46.6952, "step": 92820 }, { "epoch": 0.18752247320386076, "grad_norm": 9.68444538116455, "learning_rate": 9.778143213197852e-06, "loss": 25.1359, "step": 92830 }, { "epoch": 0.18754267383654455, "grad_norm": 164.32061767578125, "learning_rate": 9.77804037579696e-06, "loss": 22.7962, "step": 92840 }, { "epoch": 0.18756287446922837, "grad_norm": 472.6469421386719, "learning_rate": 9.777937515108478e-06, "loss": 17.5599, "step": 92850 }, { "epoch": 0.1875830751019122, "grad_norm": 123.95637512207031, "learning_rate": 9.77783463113291e-06, "loss": 26.1313, "step": 92860 }, { "epoch": 0.187603275734596, "grad_norm": 231.21205139160156, "learning_rate": 9.777731723870753e-06, "loss": 15.3762, "step": 92870 }, { "epoch": 0.18762347636727983, "grad_norm": 223.0507354736328, "learning_rate": 9.777628793322513e-06, "loss": 23.4335, "step": 92880 }, { "epoch": 0.18764367699996365, "grad_norm": 430.06610107421875, "learning_rate": 9.777525839488688e-06, "loss": 28.6041, "step": 92890 }, { "epoch": 0.18766387763264744, "grad_norm": 294.2397766113281, "learning_rate": 9.777422862369782e-06, "loss": 27.0556, "step": 92900 }, { "epoch": 0.18768407826533126, "grad_norm": 972.4275512695312, "learning_rate": 9.777319861966298e-06, "loss": 26.982, "step": 92910 }, { "epoch": 0.18770427889801508, "grad_norm": 198.08999633789062, "learning_rate": 9.777216838278735e-06, "loss": 13.3893, "step": 92920 }, { "epoch": 0.1877244795306989, "grad_norm": 604.3071899414062, "learning_rate": 9.777113791307597e-06, "loss": 32.195, "step": 92930 }, { "epoch": 0.18774468016338272, "grad_norm": 148.30014038085938, "learning_rate": 9.777010721053387e-06, "loss": 26.3873, "step": 92940 }, { "epoch": 0.18776488079606654, "grad_norm": 605.6109619140625, "learning_rate": 9.776907627516604e-06, "loss": 36.4842, "step": 92950 }, { "epoch": 0.18778508142875036, "grad_norm": 257.2639465332031, "learning_rate": 9.776804510697753e-06, "loss": 23.2307, "step": 92960 }, { "epoch": 0.18780528206143415, "grad_norm": 351.9004821777344, "learning_rate": 9.776701370597337e-06, "loss": 18.8501, "step": 92970 }, { "epoch": 0.18782548269411797, "grad_norm": 55.3215446472168, "learning_rate": 9.776598207215857e-06, "loss": 30.9671, "step": 92980 }, { "epoch": 0.1878456833268018, "grad_norm": 394.4371337890625, "learning_rate": 9.776495020553817e-06, "loss": 17.9072, "step": 92990 }, { "epoch": 0.1878658839594856, "grad_norm": 586.047119140625, "learning_rate": 9.776391810611719e-06, "loss": 12.5296, "step": 93000 }, { "epoch": 0.18788608459216943, "grad_norm": 338.12921142578125, "learning_rate": 9.776288577390067e-06, "loss": 13.8105, "step": 93010 }, { "epoch": 0.18790628522485325, "grad_norm": 266.59466552734375, "learning_rate": 9.776185320889364e-06, "loss": 15.234, "step": 93020 }, { "epoch": 0.18792648585753705, "grad_norm": 553.8460693359375, "learning_rate": 9.776082041110112e-06, "loss": 9.0351, "step": 93030 }, { "epoch": 0.18794668649022087, "grad_norm": 422.61676025390625, "learning_rate": 9.775978738052818e-06, "loss": 24.472, "step": 93040 }, { "epoch": 0.18796688712290469, "grad_norm": 215.39242553710938, "learning_rate": 9.775875411717981e-06, "loss": 22.547, "step": 93050 }, { "epoch": 0.1879870877555885, "grad_norm": 240.72828674316406, "learning_rate": 9.775772062106106e-06, "loss": 28.4686, "step": 93060 }, { "epoch": 0.18800728838827233, "grad_norm": 452.4482727050781, "learning_rate": 9.775668689217698e-06, "loss": 15.161, "step": 93070 }, { "epoch": 0.18802748902095615, "grad_norm": 371.5116882324219, "learning_rate": 9.775565293053262e-06, "loss": 24.6782, "step": 93080 }, { "epoch": 0.18804768965363997, "grad_norm": 82.33761596679688, "learning_rate": 9.775461873613297e-06, "loss": 20.9432, "step": 93090 }, { "epoch": 0.18806789028632376, "grad_norm": 297.91217041015625, "learning_rate": 9.775358430898311e-06, "loss": 26.2282, "step": 93100 }, { "epoch": 0.18808809091900758, "grad_norm": 194.85494995117188, "learning_rate": 9.775254964908807e-06, "loss": 19.049, "step": 93110 }, { "epoch": 0.1881082915516914, "grad_norm": 302.1685791015625, "learning_rate": 9.77515147564529e-06, "loss": 17.2638, "step": 93120 }, { "epoch": 0.18812849218437522, "grad_norm": 424.76251220703125, "learning_rate": 9.775047963108264e-06, "loss": 14.7522, "step": 93130 }, { "epoch": 0.18814869281705904, "grad_norm": 386.8235168457031, "learning_rate": 9.774944427298232e-06, "loss": 22.5074, "step": 93140 }, { "epoch": 0.18816889344974286, "grad_norm": 451.16705322265625, "learning_rate": 9.7748408682157e-06, "loss": 40.7772, "step": 93150 }, { "epoch": 0.18818909408242665, "grad_norm": 280.4852600097656, "learning_rate": 9.774737285861176e-06, "loss": 32.5479, "step": 93160 }, { "epoch": 0.18820929471511047, "grad_norm": 634.6343994140625, "learning_rate": 9.774633680235158e-06, "loss": 22.3752, "step": 93170 }, { "epoch": 0.1882294953477943, "grad_norm": 345.7625732421875, "learning_rate": 9.774530051338155e-06, "loss": 15.5305, "step": 93180 }, { "epoch": 0.1882496959804781, "grad_norm": 328.84136962890625, "learning_rate": 9.774426399170673e-06, "loss": 22.2794, "step": 93190 }, { "epoch": 0.18826989661316193, "grad_norm": 306.8594055175781, "learning_rate": 9.774322723733216e-06, "loss": 18.519, "step": 93200 }, { "epoch": 0.18829009724584575, "grad_norm": 433.818603515625, "learning_rate": 9.774219025026289e-06, "loss": 24.7969, "step": 93210 }, { "epoch": 0.18831029787852954, "grad_norm": 208.9978485107422, "learning_rate": 9.774115303050395e-06, "loss": 34.9355, "step": 93220 }, { "epoch": 0.18833049851121336, "grad_norm": 384.2287292480469, "learning_rate": 9.774011557806044e-06, "loss": 13.7821, "step": 93230 }, { "epoch": 0.18835069914389718, "grad_norm": 235.39218139648438, "learning_rate": 9.773907789293739e-06, "loss": 14.9591, "step": 93240 }, { "epoch": 0.188370899776581, "grad_norm": 140.73678588867188, "learning_rate": 9.77380399751399e-06, "loss": 16.6937, "step": 93250 }, { "epoch": 0.18839110040926482, "grad_norm": 756.7713623046875, "learning_rate": 9.773700182467295e-06, "loss": 28.5085, "step": 93260 }, { "epoch": 0.18841130104194864, "grad_norm": 630.3973999023438, "learning_rate": 9.773596344154165e-06, "loss": 14.0977, "step": 93270 }, { "epoch": 0.18843150167463246, "grad_norm": 329.23321533203125, "learning_rate": 9.773492482575106e-06, "loss": 36.9815, "step": 93280 }, { "epoch": 0.18845170230731625, "grad_norm": 262.5638732910156, "learning_rate": 9.773388597730623e-06, "loss": 21.7334, "step": 93290 }, { "epoch": 0.18847190294000007, "grad_norm": 658.1475219726562, "learning_rate": 9.773284689621223e-06, "loss": 31.7606, "step": 93300 }, { "epoch": 0.1884921035726839, "grad_norm": 292.66546630859375, "learning_rate": 9.773180758247413e-06, "loss": 37.4666, "step": 93310 }, { "epoch": 0.18851230420536771, "grad_norm": 109.0897445678711, "learning_rate": 9.773076803609699e-06, "loss": 10.9353, "step": 93320 }, { "epoch": 0.18853250483805153, "grad_norm": 136.06051635742188, "learning_rate": 9.772972825708587e-06, "loss": 9.4173, "step": 93330 }, { "epoch": 0.18855270547073535, "grad_norm": 190.45758056640625, "learning_rate": 9.772868824544585e-06, "loss": 28.1177, "step": 93340 }, { "epoch": 0.18857290610341915, "grad_norm": 313.33502197265625, "learning_rate": 9.7727648001182e-06, "loss": 31.6668, "step": 93350 }, { "epoch": 0.18859310673610297, "grad_norm": 445.6346740722656, "learning_rate": 9.772660752429937e-06, "loss": 21.3606, "step": 93360 }, { "epoch": 0.1886133073687868, "grad_norm": 88.0118408203125, "learning_rate": 9.772556681480303e-06, "loss": 10.1299, "step": 93370 }, { "epoch": 0.1886335080014706, "grad_norm": 262.5257263183594, "learning_rate": 9.772452587269808e-06, "loss": 22.9904, "step": 93380 }, { "epoch": 0.18865370863415443, "grad_norm": 371.00152587890625, "learning_rate": 9.772348469798958e-06, "loss": 27.2311, "step": 93390 }, { "epoch": 0.18867390926683825, "grad_norm": 501.8653564453125, "learning_rate": 9.772244329068261e-06, "loss": 23.2194, "step": 93400 }, { "epoch": 0.18869410989952207, "grad_norm": 358.61175537109375, "learning_rate": 9.772140165078223e-06, "loss": 30.7846, "step": 93410 }, { "epoch": 0.18871431053220586, "grad_norm": 191.6844482421875, "learning_rate": 9.772035977829352e-06, "loss": 52.7518, "step": 93420 }, { "epoch": 0.18873451116488968, "grad_norm": 513.1759643554688, "learning_rate": 9.771931767322158e-06, "loss": 37.1406, "step": 93430 }, { "epoch": 0.1887547117975735, "grad_norm": 528.90087890625, "learning_rate": 9.771827533557147e-06, "loss": 21.9348, "step": 93440 }, { "epoch": 0.18877491243025732, "grad_norm": 364.3177185058594, "learning_rate": 9.771723276534825e-06, "loss": 19.5551, "step": 93450 }, { "epoch": 0.18879511306294114, "grad_norm": 203.65122985839844, "learning_rate": 9.771618996255704e-06, "loss": 26.3584, "step": 93460 }, { "epoch": 0.18881531369562496, "grad_norm": 411.6865539550781, "learning_rate": 9.771514692720293e-06, "loss": 13.1048, "step": 93470 }, { "epoch": 0.18883551432830875, "grad_norm": 280.50433349609375, "learning_rate": 9.771410365929097e-06, "loss": 19.4224, "step": 93480 }, { "epoch": 0.18885571496099257, "grad_norm": 96.84584045410156, "learning_rate": 9.771306015882624e-06, "loss": 20.947, "step": 93490 }, { "epoch": 0.1888759155936764, "grad_norm": 635.2235107421875, "learning_rate": 9.771201642581384e-06, "loss": 30.9145, "step": 93500 }, { "epoch": 0.1888961162263602, "grad_norm": 374.6190185546875, "learning_rate": 9.771097246025889e-06, "loss": 21.1268, "step": 93510 }, { "epoch": 0.18891631685904403, "grad_norm": 412.28717041015625, "learning_rate": 9.770992826216642e-06, "loss": 19.0786, "step": 93520 }, { "epoch": 0.18893651749172785, "grad_norm": 507.44390869140625, "learning_rate": 9.770888383154156e-06, "loss": 26.3149, "step": 93530 }, { "epoch": 0.18895671812441164, "grad_norm": 651.7052001953125, "learning_rate": 9.770783916838938e-06, "loss": 39.9074, "step": 93540 }, { "epoch": 0.18897691875709546, "grad_norm": 290.569091796875, "learning_rate": 9.770679427271496e-06, "loss": 24.4273, "step": 93550 }, { "epoch": 0.18899711938977928, "grad_norm": 2062.24560546875, "learning_rate": 9.770574914452343e-06, "loss": 38.4955, "step": 93560 }, { "epoch": 0.1890173200224631, "grad_norm": 887.5333251953125, "learning_rate": 9.770470378381986e-06, "loss": 25.2377, "step": 93570 }, { "epoch": 0.18903752065514692, "grad_norm": 755.4471435546875, "learning_rate": 9.770365819060936e-06, "loss": 40.5245, "step": 93580 }, { "epoch": 0.18905772128783074, "grad_norm": 604.2175903320312, "learning_rate": 9.7702612364897e-06, "loss": 20.7595, "step": 93590 }, { "epoch": 0.18907792192051456, "grad_norm": 23.88644027709961, "learning_rate": 9.77015663066879e-06, "loss": 17.105, "step": 93600 }, { "epoch": 0.18909812255319836, "grad_norm": 13.520434379577637, "learning_rate": 9.770052001598716e-06, "loss": 17.5936, "step": 93610 }, { "epoch": 0.18911832318588218, "grad_norm": 192.54385375976562, "learning_rate": 9.769947349279987e-06, "loss": 19.7351, "step": 93620 }, { "epoch": 0.189138523818566, "grad_norm": 564.4246215820312, "learning_rate": 9.769842673713112e-06, "loss": 30.1727, "step": 93630 }, { "epoch": 0.18915872445124982, "grad_norm": 751.2989501953125, "learning_rate": 9.769737974898602e-06, "loss": 20.6267, "step": 93640 }, { "epoch": 0.18917892508393364, "grad_norm": 257.06866455078125, "learning_rate": 9.769633252836969e-06, "loss": 31.7081, "step": 93650 }, { "epoch": 0.18919912571661746, "grad_norm": 168.4287567138672, "learning_rate": 9.76952850752872e-06, "loss": 9.743, "step": 93660 }, { "epoch": 0.18921932634930125, "grad_norm": 414.6647644042969, "learning_rate": 9.76942373897437e-06, "loss": 23.1347, "step": 93670 }, { "epoch": 0.18923952698198507, "grad_norm": 330.8674621582031, "learning_rate": 9.769318947174426e-06, "loss": 30.5585, "step": 93680 }, { "epoch": 0.1892597276146689, "grad_norm": 100.80751037597656, "learning_rate": 9.769214132129399e-06, "loss": 29.8406, "step": 93690 }, { "epoch": 0.1892799282473527, "grad_norm": 278.685302734375, "learning_rate": 9.769109293839803e-06, "loss": 21.5444, "step": 93700 }, { "epoch": 0.18930012888003653, "grad_norm": 263.3114318847656, "learning_rate": 9.769004432306145e-06, "loss": 20.2435, "step": 93710 }, { "epoch": 0.18932032951272035, "grad_norm": 523.59326171875, "learning_rate": 9.768899547528939e-06, "loss": 23.3171, "step": 93720 }, { "epoch": 0.18934053014540417, "grad_norm": 618.2498168945312, "learning_rate": 9.768794639508693e-06, "loss": 26.2153, "step": 93730 }, { "epoch": 0.18936073077808796, "grad_norm": 273.69769287109375, "learning_rate": 9.768689708245921e-06, "loss": 24.2667, "step": 93740 }, { "epoch": 0.18938093141077178, "grad_norm": 696.7081298828125, "learning_rate": 9.768584753741134e-06, "loss": 21.3982, "step": 93750 }, { "epoch": 0.1894011320434556, "grad_norm": 820.3089599609375, "learning_rate": 9.768479775994846e-06, "loss": 20.8917, "step": 93760 }, { "epoch": 0.18942133267613942, "grad_norm": 160.27163696289062, "learning_rate": 9.768374775007562e-06, "loss": 17.6037, "step": 93770 }, { "epoch": 0.18944153330882324, "grad_norm": 167.3863067626953, "learning_rate": 9.7682697507798e-06, "loss": 36.3596, "step": 93780 }, { "epoch": 0.18946173394150706, "grad_norm": 382.2495422363281, "learning_rate": 9.768164703312068e-06, "loss": 33.2881, "step": 93790 }, { "epoch": 0.18948193457419085, "grad_norm": 129.846435546875, "learning_rate": 9.768059632604881e-06, "loss": 22.1885, "step": 93800 }, { "epoch": 0.18950213520687467, "grad_norm": 328.9864501953125, "learning_rate": 9.767954538658749e-06, "loss": 33.0496, "step": 93810 }, { "epoch": 0.1895223358395585, "grad_norm": 207.57334899902344, "learning_rate": 9.767849421474185e-06, "loss": 17.3691, "step": 93820 }, { "epoch": 0.1895425364722423, "grad_norm": 0.0, "learning_rate": 9.767744281051702e-06, "loss": 11.1974, "step": 93830 }, { "epoch": 0.18956273710492613, "grad_norm": 599.1231689453125, "learning_rate": 9.76763911739181e-06, "loss": 31.3294, "step": 93840 }, { "epoch": 0.18958293773760995, "grad_norm": 425.5417175292969, "learning_rate": 9.767533930495023e-06, "loss": 14.5931, "step": 93850 }, { "epoch": 0.18960313837029374, "grad_norm": 454.7153625488281, "learning_rate": 9.767428720361854e-06, "loss": 21.8906, "step": 93860 }, { "epoch": 0.18962333900297756, "grad_norm": 1088.3084716796875, "learning_rate": 9.767323486992816e-06, "loss": 34.9469, "step": 93870 }, { "epoch": 0.18964353963566138, "grad_norm": 281.15972900390625, "learning_rate": 9.767218230388423e-06, "loss": 21.4637, "step": 93880 }, { "epoch": 0.1896637402683452, "grad_norm": 267.9508361816406, "learning_rate": 9.767112950549184e-06, "loss": 20.8022, "step": 93890 }, { "epoch": 0.18968394090102902, "grad_norm": 424.75604248046875, "learning_rate": 9.767007647475618e-06, "loss": 30.6686, "step": 93900 }, { "epoch": 0.18970414153371284, "grad_norm": 584.2125854492188, "learning_rate": 9.766902321168232e-06, "loss": 22.9802, "step": 93910 }, { "epoch": 0.18972434216639666, "grad_norm": 344.3031311035156, "learning_rate": 9.766796971627543e-06, "loss": 19.7187, "step": 93920 }, { "epoch": 0.18974454279908046, "grad_norm": 334.32958984375, "learning_rate": 9.766691598854064e-06, "loss": 15.5833, "step": 93930 }, { "epoch": 0.18976474343176428, "grad_norm": 393.0152587890625, "learning_rate": 9.766586202848306e-06, "loss": 22.1252, "step": 93940 }, { "epoch": 0.1897849440644481, "grad_norm": 976.2451782226562, "learning_rate": 9.766480783610789e-06, "loss": 16.7059, "step": 93950 }, { "epoch": 0.18980514469713192, "grad_norm": 655.0115356445312, "learning_rate": 9.76637534114202e-06, "loss": 21.6289, "step": 93960 }, { "epoch": 0.18982534532981574, "grad_norm": 672.9705810546875, "learning_rate": 9.766269875442517e-06, "loss": 26.8585, "step": 93970 }, { "epoch": 0.18984554596249956, "grad_norm": 481.5218200683594, "learning_rate": 9.766164386512794e-06, "loss": 19.6168, "step": 93980 }, { "epoch": 0.18986574659518335, "grad_norm": 416.34222412109375, "learning_rate": 9.766058874353361e-06, "loss": 15.9359, "step": 93990 }, { "epoch": 0.18988594722786717, "grad_norm": 1040.742919921875, "learning_rate": 9.765953338964736e-06, "loss": 39.0241, "step": 94000 }, { "epoch": 0.189906147860551, "grad_norm": 472.5757141113281, "learning_rate": 9.765847780347433e-06, "loss": 26.1622, "step": 94010 }, { "epoch": 0.1899263484932348, "grad_norm": 55.61189270019531, "learning_rate": 9.765742198501965e-06, "loss": 32.746, "step": 94020 }, { "epoch": 0.18994654912591863, "grad_norm": 818.5496826171875, "learning_rate": 9.765636593428849e-06, "loss": 21.8989, "step": 94030 }, { "epoch": 0.18996674975860245, "grad_norm": 689.7357788085938, "learning_rate": 9.765530965128597e-06, "loss": 31.6282, "step": 94040 }, { "epoch": 0.18998695039128627, "grad_norm": 745.34814453125, "learning_rate": 9.765425313601726e-06, "loss": 22.1681, "step": 94050 }, { "epoch": 0.19000715102397006, "grad_norm": 268.80755615234375, "learning_rate": 9.765319638848749e-06, "loss": 13.777, "step": 94060 }, { "epoch": 0.19002735165665388, "grad_norm": 488.5350341796875, "learning_rate": 9.765213940870183e-06, "loss": 31.8917, "step": 94070 }, { "epoch": 0.1900475522893377, "grad_norm": 175.02919006347656, "learning_rate": 9.765108219666542e-06, "loss": 21.4682, "step": 94080 }, { "epoch": 0.19006775292202152, "grad_norm": 109.80484008789062, "learning_rate": 9.76500247523834e-06, "loss": 15.6488, "step": 94090 }, { "epoch": 0.19008795355470534, "grad_norm": 369.6401672363281, "learning_rate": 9.764896707586095e-06, "loss": 21.3393, "step": 94100 }, { "epoch": 0.19010815418738916, "grad_norm": 887.25634765625, "learning_rate": 9.76479091671032e-06, "loss": 15.4807, "step": 94110 }, { "epoch": 0.19012835482007295, "grad_norm": 321.5193176269531, "learning_rate": 9.764685102611535e-06, "loss": 30.7634, "step": 94120 }, { "epoch": 0.19014855545275677, "grad_norm": 418.4538879394531, "learning_rate": 9.76457926529025e-06, "loss": 11.3235, "step": 94130 }, { "epoch": 0.1901687560854406, "grad_norm": 872.686279296875, "learning_rate": 9.764473404746986e-06, "loss": 30.4777, "step": 94140 }, { "epoch": 0.1901889567181244, "grad_norm": 894.0331420898438, "learning_rate": 9.764367520982255e-06, "loss": 28.57, "step": 94150 }, { "epoch": 0.19020915735080823, "grad_norm": 164.6750030517578, "learning_rate": 9.764261613996574e-06, "loss": 16.8684, "step": 94160 }, { "epoch": 0.19022935798349205, "grad_norm": 317.54217529296875, "learning_rate": 9.764155683790461e-06, "loss": 12.447, "step": 94170 }, { "epoch": 0.19024955861617585, "grad_norm": 338.030517578125, "learning_rate": 9.76404973036443e-06, "loss": 28.7921, "step": 94180 }, { "epoch": 0.19026975924885967, "grad_norm": 258.0565185546875, "learning_rate": 9.763943753719e-06, "loss": 35.6858, "step": 94190 }, { "epoch": 0.19028995988154349, "grad_norm": 303.80841064453125, "learning_rate": 9.763837753854684e-06, "loss": 18.3098, "step": 94200 }, { "epoch": 0.1903101605142273, "grad_norm": 817.4520874023438, "learning_rate": 9.763731730772001e-06, "loss": 22.2052, "step": 94210 }, { "epoch": 0.19033036114691113, "grad_norm": 1681.8250732421875, "learning_rate": 9.763625684471467e-06, "loss": 10.2072, "step": 94220 }, { "epoch": 0.19035056177959495, "grad_norm": 275.3839416503906, "learning_rate": 9.7635196149536e-06, "loss": 28.9352, "step": 94230 }, { "epoch": 0.19037076241227877, "grad_norm": 642.3046875, "learning_rate": 9.763413522218917e-06, "loss": 17.2307, "step": 94240 }, { "epoch": 0.19039096304496256, "grad_norm": 534.1638793945312, "learning_rate": 9.763307406267933e-06, "loss": 44.2703, "step": 94250 }, { "epoch": 0.19041116367764638, "grad_norm": 419.8201599121094, "learning_rate": 9.763201267101165e-06, "loss": 13.8423, "step": 94260 }, { "epoch": 0.1904313643103302, "grad_norm": 297.29913330078125, "learning_rate": 9.763095104719133e-06, "loss": 18.2041, "step": 94270 }, { "epoch": 0.19045156494301402, "grad_norm": 283.5640869140625, "learning_rate": 9.762988919122354e-06, "loss": 23.7125, "step": 94280 }, { "epoch": 0.19047176557569784, "grad_norm": 276.22174072265625, "learning_rate": 9.762882710311345e-06, "loss": 20.0406, "step": 94290 }, { "epoch": 0.19049196620838166, "grad_norm": 158.86599731445312, "learning_rate": 9.762776478286622e-06, "loss": 33.5239, "step": 94300 }, { "epoch": 0.19051216684106545, "grad_norm": 921.1744995117188, "learning_rate": 9.762670223048705e-06, "loss": 35.5041, "step": 94310 }, { "epoch": 0.19053236747374927, "grad_norm": 422.2298278808594, "learning_rate": 9.76256394459811e-06, "loss": 16.1197, "step": 94320 }, { "epoch": 0.1905525681064331, "grad_norm": 611.1220092773438, "learning_rate": 9.762457642935357e-06, "loss": 13.1988, "step": 94330 }, { "epoch": 0.1905727687391169, "grad_norm": 116.6915054321289, "learning_rate": 9.762351318060962e-06, "loss": 19.1604, "step": 94340 }, { "epoch": 0.19059296937180073, "grad_norm": 87.05732727050781, "learning_rate": 9.762244969975446e-06, "loss": 18.2294, "step": 94350 }, { "epoch": 0.19061317000448455, "grad_norm": 305.0800476074219, "learning_rate": 9.762138598679324e-06, "loss": 30.1884, "step": 94360 }, { "epoch": 0.19063337063716834, "grad_norm": 272.8003845214844, "learning_rate": 9.762032204173116e-06, "loss": 25.5305, "step": 94370 }, { "epoch": 0.19065357126985216, "grad_norm": 310.4867248535156, "learning_rate": 9.761925786457343e-06, "loss": 21.7396, "step": 94380 }, { "epoch": 0.19067377190253598, "grad_norm": 22.4447021484375, "learning_rate": 9.761819345532519e-06, "loss": 15.6707, "step": 94390 }, { "epoch": 0.1906939725352198, "grad_norm": 212.0104522705078, "learning_rate": 9.761712881399164e-06, "loss": 18.4564, "step": 94400 }, { "epoch": 0.19071417316790362, "grad_norm": 913.8582153320312, "learning_rate": 9.7616063940578e-06, "loss": 53.2364, "step": 94410 }, { "epoch": 0.19073437380058744, "grad_norm": 381.44964599609375, "learning_rate": 9.761499883508942e-06, "loss": 24.3057, "step": 94420 }, { "epoch": 0.19075457443327126, "grad_norm": 361.863037109375, "learning_rate": 9.761393349753115e-06, "loss": 23.973, "step": 94430 }, { "epoch": 0.19077477506595505, "grad_norm": 201.03997802734375, "learning_rate": 9.76128679279083e-06, "loss": 27.5303, "step": 94440 }, { "epoch": 0.19079497569863887, "grad_norm": 598.7647094726562, "learning_rate": 9.761180212622613e-06, "loss": 36.8382, "step": 94450 }, { "epoch": 0.1908151763313227, "grad_norm": 612.81201171875, "learning_rate": 9.761073609248981e-06, "loss": 31.6944, "step": 94460 }, { "epoch": 0.19083537696400651, "grad_norm": 308.01251220703125, "learning_rate": 9.760966982670453e-06, "loss": 38.2548, "step": 94470 }, { "epoch": 0.19085557759669033, "grad_norm": 214.9204864501953, "learning_rate": 9.760860332887549e-06, "loss": 27.0995, "step": 94480 }, { "epoch": 0.19087577822937415, "grad_norm": 2.1772916316986084, "learning_rate": 9.76075365990079e-06, "loss": 19.2659, "step": 94490 }, { "epoch": 0.19089597886205795, "grad_norm": 801.9741821289062, "learning_rate": 9.760646963710694e-06, "loss": 26.7987, "step": 94500 }, { "epoch": 0.19091617949474177, "grad_norm": 192.08648681640625, "learning_rate": 9.760540244317784e-06, "loss": 23.6551, "step": 94510 }, { "epoch": 0.1909363801274256, "grad_norm": 372.5327453613281, "learning_rate": 9.760433501722576e-06, "loss": 12.4313, "step": 94520 }, { "epoch": 0.1909565807601094, "grad_norm": 80.73799133300781, "learning_rate": 9.760326735925594e-06, "loss": 33.106, "step": 94530 }, { "epoch": 0.19097678139279323, "grad_norm": 569.3877563476562, "learning_rate": 9.760219946927357e-06, "loss": 29.0983, "step": 94540 }, { "epoch": 0.19099698202547705, "grad_norm": 188.4160919189453, "learning_rate": 9.760113134728383e-06, "loss": 26.0278, "step": 94550 }, { "epoch": 0.19101718265816087, "grad_norm": 145.1660614013672, "learning_rate": 9.760006299329198e-06, "loss": 19.5478, "step": 94560 }, { "epoch": 0.19103738329084466, "grad_norm": 516.3740234375, "learning_rate": 9.759899440730318e-06, "loss": 36.2998, "step": 94570 }, { "epoch": 0.19105758392352848, "grad_norm": 302.2230529785156, "learning_rate": 9.759792558932267e-06, "loss": 20.6328, "step": 94580 }, { "epoch": 0.1910777845562123, "grad_norm": 321.30389404296875, "learning_rate": 9.759685653935563e-06, "loss": 20.3567, "step": 94590 }, { "epoch": 0.19109798518889612, "grad_norm": 408.89434814453125, "learning_rate": 9.759578725740726e-06, "loss": 24.1724, "step": 94600 }, { "epoch": 0.19111818582157994, "grad_norm": 154.68870544433594, "learning_rate": 9.759471774348284e-06, "loss": 31.3046, "step": 94610 }, { "epoch": 0.19113838645426376, "grad_norm": 371.37109375, "learning_rate": 9.759364799758751e-06, "loss": 16.1947, "step": 94620 }, { "epoch": 0.19115858708694755, "grad_norm": 300.44744873046875, "learning_rate": 9.759257801972652e-06, "loss": 21.571, "step": 94630 }, { "epoch": 0.19117878771963137, "grad_norm": 366.66973876953125, "learning_rate": 9.759150780990508e-06, "loss": 21.162, "step": 94640 }, { "epoch": 0.1911989883523152, "grad_norm": 301.9317321777344, "learning_rate": 9.75904373681284e-06, "loss": 14.5093, "step": 94650 }, { "epoch": 0.191219188984999, "grad_norm": 397.9039611816406, "learning_rate": 9.75893666944017e-06, "loss": 22.7078, "step": 94660 }, { "epoch": 0.19123938961768283, "grad_norm": 398.2569580078125, "learning_rate": 9.758829578873019e-06, "loss": 19.4191, "step": 94670 }, { "epoch": 0.19125959025036665, "grad_norm": 628.5740966796875, "learning_rate": 9.758722465111912e-06, "loss": 35.5664, "step": 94680 }, { "epoch": 0.19127979088305044, "grad_norm": 290.260986328125, "learning_rate": 9.758615328157367e-06, "loss": 27.4729, "step": 94690 }, { "epoch": 0.19129999151573426, "grad_norm": 401.98455810546875, "learning_rate": 9.758508168009908e-06, "loss": 24.0101, "step": 94700 }, { "epoch": 0.19132019214841808, "grad_norm": 363.4964599609375, "learning_rate": 9.75840098467006e-06, "loss": 23.6348, "step": 94710 }, { "epoch": 0.1913403927811019, "grad_norm": 368.26922607421875, "learning_rate": 9.758293778138339e-06, "loss": 32.6702, "step": 94720 }, { "epoch": 0.19136059341378572, "grad_norm": 542.6871337890625, "learning_rate": 9.758186548415274e-06, "loss": 22.5824, "step": 94730 }, { "epoch": 0.19138079404646954, "grad_norm": 34.795433044433594, "learning_rate": 9.758079295501384e-06, "loss": 21.9126, "step": 94740 }, { "epoch": 0.19140099467915336, "grad_norm": 397.8806457519531, "learning_rate": 9.757972019397192e-06, "loss": 26.8283, "step": 94750 }, { "epoch": 0.19142119531183716, "grad_norm": 424.9023132324219, "learning_rate": 9.757864720103222e-06, "loss": 14.3381, "step": 94760 }, { "epoch": 0.19144139594452098, "grad_norm": 753.5485229492188, "learning_rate": 9.757757397619995e-06, "loss": 18.7168, "step": 94770 }, { "epoch": 0.1914615965772048, "grad_norm": 286.5726623535156, "learning_rate": 9.757650051948037e-06, "loss": 21.9167, "step": 94780 }, { "epoch": 0.19148179720988862, "grad_norm": 217.6724853515625, "learning_rate": 9.757542683087871e-06, "loss": 27.2246, "step": 94790 }, { "epoch": 0.19150199784257244, "grad_norm": 490.37493896484375, "learning_rate": 9.757435291040016e-06, "loss": 19.4853, "step": 94800 }, { "epoch": 0.19152219847525626, "grad_norm": 557.5157470703125, "learning_rate": 9.757327875805e-06, "loss": 30.4812, "step": 94810 }, { "epoch": 0.19154239910794005, "grad_norm": 337.1351318359375, "learning_rate": 9.757220437383345e-06, "loss": 13.7743, "step": 94820 }, { "epoch": 0.19156259974062387, "grad_norm": 311.22943115234375, "learning_rate": 9.757112975775575e-06, "loss": 18.5559, "step": 94830 }, { "epoch": 0.1915828003733077, "grad_norm": 293.1070861816406, "learning_rate": 9.757005490982213e-06, "loss": 12.0658, "step": 94840 }, { "epoch": 0.1916030010059915, "grad_norm": 217.46958923339844, "learning_rate": 9.756897983003782e-06, "loss": 18.6618, "step": 94850 }, { "epoch": 0.19162320163867533, "grad_norm": 1383.739990234375, "learning_rate": 9.756790451840807e-06, "loss": 24.5521, "step": 94860 }, { "epoch": 0.19164340227135915, "grad_norm": 584.2903442382812, "learning_rate": 9.756682897493814e-06, "loss": 25.5006, "step": 94870 }, { "epoch": 0.19166360290404297, "grad_norm": 286.12554931640625, "learning_rate": 9.756575319963325e-06, "loss": 24.2221, "step": 94880 }, { "epoch": 0.19168380353672676, "grad_norm": 365.3943786621094, "learning_rate": 9.756467719249865e-06, "loss": 44.9083, "step": 94890 }, { "epoch": 0.19170400416941058, "grad_norm": 476.6283264160156, "learning_rate": 9.756360095353957e-06, "loss": 28.985, "step": 94900 }, { "epoch": 0.1917242048020944, "grad_norm": 223.5682373046875, "learning_rate": 9.756252448276128e-06, "loss": 16.9192, "step": 94910 }, { "epoch": 0.19174440543477822, "grad_norm": 282.4645690917969, "learning_rate": 9.756144778016901e-06, "loss": 16.1518, "step": 94920 }, { "epoch": 0.19176460606746204, "grad_norm": 993.4703979492188, "learning_rate": 9.756037084576801e-06, "loss": 44.8314, "step": 94930 }, { "epoch": 0.19178480670014586, "grad_norm": 162.4573974609375, "learning_rate": 9.755929367956354e-06, "loss": 19.7513, "step": 94940 }, { "epoch": 0.19180500733282965, "grad_norm": 597.595703125, "learning_rate": 9.755821628156083e-06, "loss": 31.1724, "step": 94950 }, { "epoch": 0.19182520796551347, "grad_norm": 607.3621826171875, "learning_rate": 9.755713865176514e-06, "loss": 32.0112, "step": 94960 }, { "epoch": 0.1918454085981973, "grad_norm": 535.0570068359375, "learning_rate": 9.755606079018174e-06, "loss": 29.5479, "step": 94970 }, { "epoch": 0.1918656092308811, "grad_norm": 1022.1478881835938, "learning_rate": 9.755498269681585e-06, "loss": 35.6381, "step": 94980 }, { "epoch": 0.19188580986356493, "grad_norm": 348.110595703125, "learning_rate": 9.755390437167274e-06, "loss": 23.0737, "step": 94990 }, { "epoch": 0.19190601049624875, "grad_norm": 402.2424011230469, "learning_rate": 9.755282581475769e-06, "loss": 14.4856, "step": 95000 }, { "epoch": 0.19192621112893254, "grad_norm": 306.0251159667969, "learning_rate": 9.755174702607592e-06, "loss": 37.8608, "step": 95010 }, { "epoch": 0.19194641176161636, "grad_norm": 547.3987426757812, "learning_rate": 9.75506680056327e-06, "loss": 28.8134, "step": 95020 }, { "epoch": 0.19196661239430018, "grad_norm": 538.8976440429688, "learning_rate": 9.75495887534333e-06, "loss": 25.2399, "step": 95030 }, { "epoch": 0.191986813026984, "grad_norm": 484.52056884765625, "learning_rate": 9.754850926948295e-06, "loss": 18.9739, "step": 95040 }, { "epoch": 0.19200701365966782, "grad_norm": 610.3802490234375, "learning_rate": 9.754742955378697e-06, "loss": 38.8773, "step": 95050 }, { "epoch": 0.19202721429235164, "grad_norm": 204.48780822753906, "learning_rate": 9.754634960635057e-06, "loss": 21.8397, "step": 95060 }, { "epoch": 0.19204741492503546, "grad_norm": 552.1886596679688, "learning_rate": 9.754526942717901e-06, "loss": 24.1843, "step": 95070 }, { "epoch": 0.19206761555771926, "grad_norm": 544.3379516601562, "learning_rate": 9.75441890162776e-06, "loss": 31.6727, "step": 95080 }, { "epoch": 0.19208781619040308, "grad_norm": 191.04501342773438, "learning_rate": 9.754310837365155e-06, "loss": 18.3352, "step": 95090 }, { "epoch": 0.1921080168230869, "grad_norm": 423.8003845214844, "learning_rate": 9.754202749930618e-06, "loss": 19.5345, "step": 95100 }, { "epoch": 0.19212821745577072, "grad_norm": 854.3909912109375, "learning_rate": 9.754094639324672e-06, "loss": 30.5475, "step": 95110 }, { "epoch": 0.19214841808845454, "grad_norm": 485.14215087890625, "learning_rate": 9.753986505547845e-06, "loss": 25.7641, "step": 95120 }, { "epoch": 0.19216861872113836, "grad_norm": 108.00077819824219, "learning_rate": 9.753878348600666e-06, "loss": 20.8481, "step": 95130 }, { "epoch": 0.19218881935382215, "grad_norm": 814.5859985351562, "learning_rate": 9.75377016848366e-06, "loss": 26.7563, "step": 95140 }, { "epoch": 0.19220901998650597, "grad_norm": 388.3916320800781, "learning_rate": 9.753661965197355e-06, "loss": 12.6434, "step": 95150 }, { "epoch": 0.1922292206191898, "grad_norm": 669.4876708984375, "learning_rate": 9.753553738742278e-06, "loss": 44.7474, "step": 95160 }, { "epoch": 0.1922494212518736, "grad_norm": 152.13226318359375, "learning_rate": 9.753445489118955e-06, "loss": 20.3763, "step": 95170 }, { "epoch": 0.19226962188455743, "grad_norm": 1334.5699462890625, "learning_rate": 9.753337216327917e-06, "loss": 37.9783, "step": 95180 }, { "epoch": 0.19228982251724125, "grad_norm": 2598.234375, "learning_rate": 9.75322892036969e-06, "loss": 35.7785, "step": 95190 }, { "epoch": 0.19231002314992507, "grad_norm": 279.7922668457031, "learning_rate": 9.7531206012448e-06, "loss": 19.9184, "step": 95200 }, { "epoch": 0.19233022378260886, "grad_norm": 500.36456298828125, "learning_rate": 9.753012258953778e-06, "loss": 40.2961, "step": 95210 }, { "epoch": 0.19235042441529268, "grad_norm": 418.6767578125, "learning_rate": 9.752903893497152e-06, "loss": 28.6601, "step": 95220 }, { "epoch": 0.1923706250479765, "grad_norm": 190.2279815673828, "learning_rate": 9.752795504875447e-06, "loss": 21.5785, "step": 95230 }, { "epoch": 0.19239082568066032, "grad_norm": 433.3700866699219, "learning_rate": 9.752687093089192e-06, "loss": 34.0436, "step": 95240 }, { "epoch": 0.19241102631334414, "grad_norm": 500.3661804199219, "learning_rate": 9.75257865813892e-06, "loss": 37.865, "step": 95250 }, { "epoch": 0.19243122694602796, "grad_norm": 405.3109130859375, "learning_rate": 9.752470200025153e-06, "loss": 19.5941, "step": 95260 }, { "epoch": 0.19245142757871175, "grad_norm": 543.3590698242188, "learning_rate": 9.752361718748425e-06, "loss": 11.6947, "step": 95270 }, { "epoch": 0.19247162821139557, "grad_norm": 229.73873901367188, "learning_rate": 9.75225321430926e-06, "loss": 13.0533, "step": 95280 }, { "epoch": 0.1924918288440794, "grad_norm": 268.1575012207031, "learning_rate": 9.752144686708192e-06, "loss": 38.0653, "step": 95290 }, { "epoch": 0.1925120294767632, "grad_norm": 301.10516357421875, "learning_rate": 9.752036135945743e-06, "loss": 27.4795, "step": 95300 }, { "epoch": 0.19253223010944703, "grad_norm": 665.0114135742188, "learning_rate": 9.75192756202245e-06, "loss": 29.2695, "step": 95310 }, { "epoch": 0.19255243074213085, "grad_norm": 1296.9481201171875, "learning_rate": 9.751818964938837e-06, "loss": 23.5781, "step": 95320 }, { "epoch": 0.19257263137481465, "grad_norm": 25.395132064819336, "learning_rate": 9.751710344695436e-06, "loss": 18.2023, "step": 95330 }, { "epoch": 0.19259283200749847, "grad_norm": 367.8702697753906, "learning_rate": 9.751601701292773e-06, "loss": 21.1432, "step": 95340 }, { "epoch": 0.19261303264018229, "grad_norm": 228.0545654296875, "learning_rate": 9.75149303473138e-06, "loss": 31.7488, "step": 95350 }, { "epoch": 0.1926332332728661, "grad_norm": 15.69963264465332, "learning_rate": 9.751384345011787e-06, "loss": 15.3159, "step": 95360 }, { "epoch": 0.19265343390554993, "grad_norm": 105.42630004882812, "learning_rate": 9.751275632134523e-06, "loss": 27.5233, "step": 95370 }, { "epoch": 0.19267363453823375, "grad_norm": 21.262840270996094, "learning_rate": 9.751166896100119e-06, "loss": 20.9626, "step": 95380 }, { "epoch": 0.19269383517091757, "grad_norm": 1015.583984375, "learning_rate": 9.751058136909102e-06, "loss": 40.2617, "step": 95390 }, { "epoch": 0.19271403580360136, "grad_norm": 753.014892578125, "learning_rate": 9.750949354562006e-06, "loss": 25.4126, "step": 95400 }, { "epoch": 0.19273423643628518, "grad_norm": 545.4683227539062, "learning_rate": 9.750840549059354e-06, "loss": 14.7867, "step": 95410 }, { "epoch": 0.192754437068969, "grad_norm": 358.6993408203125, "learning_rate": 9.750731720401685e-06, "loss": 22.2825, "step": 95420 }, { "epoch": 0.19277463770165282, "grad_norm": 828.7424926757812, "learning_rate": 9.750622868589527e-06, "loss": 13.9393, "step": 95430 }, { "epoch": 0.19279483833433664, "grad_norm": 485.7572937011719, "learning_rate": 9.750513993623406e-06, "loss": 24.9899, "step": 95440 }, { "epoch": 0.19281503896702046, "grad_norm": 842.6419677734375, "learning_rate": 9.750405095503859e-06, "loss": 59.4863, "step": 95450 }, { "epoch": 0.19283523959970425, "grad_norm": 436.1926574707031, "learning_rate": 9.750296174231412e-06, "loss": 17.983, "step": 95460 }, { "epoch": 0.19285544023238807, "grad_norm": 438.8775939941406, "learning_rate": 9.7501872298066e-06, "loss": 18.8818, "step": 95470 }, { "epoch": 0.1928756408650719, "grad_norm": 370.5317077636719, "learning_rate": 9.75007826222995e-06, "loss": 16.6989, "step": 95480 }, { "epoch": 0.1928958414977557, "grad_norm": 649.5635375976562, "learning_rate": 9.749969271501993e-06, "loss": 18.2181, "step": 95490 }, { "epoch": 0.19291604213043953, "grad_norm": 126.58338165283203, "learning_rate": 9.749860257623262e-06, "loss": 33.4575, "step": 95500 }, { "epoch": 0.19293624276312335, "grad_norm": 20.53302001953125, "learning_rate": 9.74975122059429e-06, "loss": 23.0558, "step": 95510 }, { "epoch": 0.19295644339580717, "grad_norm": 582.2803955078125, "learning_rate": 9.749642160415606e-06, "loss": 18.6158, "step": 95520 }, { "epoch": 0.19297664402849096, "grad_norm": 469.9850769042969, "learning_rate": 9.749533077087742e-06, "loss": 21.8119, "step": 95530 }, { "epoch": 0.19299684466117478, "grad_norm": 116.4792251586914, "learning_rate": 9.749423970611232e-06, "loss": 27.1043, "step": 95540 }, { "epoch": 0.1930170452938586, "grad_norm": 452.0263977050781, "learning_rate": 9.749314840986604e-06, "loss": 15.7223, "step": 95550 }, { "epoch": 0.19303724592654242, "grad_norm": 567.0089111328125, "learning_rate": 9.74920568821439e-06, "loss": 17.3021, "step": 95560 }, { "epoch": 0.19305744655922624, "grad_norm": 454.8739929199219, "learning_rate": 9.749096512295124e-06, "loss": 18.5041, "step": 95570 }, { "epoch": 0.19307764719191006, "grad_norm": 406.47967529296875, "learning_rate": 9.748987313229339e-06, "loss": 26.3245, "step": 95580 }, { "epoch": 0.19309784782459385, "grad_norm": 250.55763244628906, "learning_rate": 9.748878091017565e-06, "loss": 17.7021, "step": 95590 }, { "epoch": 0.19311804845727767, "grad_norm": 342.03485107421875, "learning_rate": 9.748768845660335e-06, "loss": 16.6775, "step": 95600 }, { "epoch": 0.1931382490899615, "grad_norm": 299.2231140136719, "learning_rate": 9.748659577158182e-06, "loss": 37.2432, "step": 95610 }, { "epoch": 0.19315844972264531, "grad_norm": 442.600341796875, "learning_rate": 9.748550285511637e-06, "loss": 21.9335, "step": 95620 }, { "epoch": 0.19317865035532913, "grad_norm": 274.6854553222656, "learning_rate": 9.748440970721236e-06, "loss": 9.4645, "step": 95630 }, { "epoch": 0.19319885098801295, "grad_norm": 571.7615356445312, "learning_rate": 9.74833163278751e-06, "loss": 14.4993, "step": 95640 }, { "epoch": 0.19321905162069675, "grad_norm": 540.5484619140625, "learning_rate": 9.748222271710988e-06, "loss": 26.3743, "step": 95650 }, { "epoch": 0.19323925225338057, "grad_norm": 285.39373779296875, "learning_rate": 9.74811288749221e-06, "loss": 9.7227, "step": 95660 }, { "epoch": 0.1932594528860644, "grad_norm": 42.79730987548828, "learning_rate": 9.748003480131702e-06, "loss": 32.0558, "step": 95670 }, { "epoch": 0.1932796535187482, "grad_norm": 460.7587890625, "learning_rate": 9.747894049630004e-06, "loss": 52.6809, "step": 95680 }, { "epoch": 0.19329985415143203, "grad_norm": 56.83159255981445, "learning_rate": 9.747784595987645e-06, "loss": 41.3574, "step": 95690 }, { "epoch": 0.19332005478411585, "grad_norm": 405.26861572265625, "learning_rate": 9.74767511920516e-06, "loss": 23.6188, "step": 95700 }, { "epoch": 0.19334025541679967, "grad_norm": 215.51358032226562, "learning_rate": 9.747565619283083e-06, "loss": 16.9533, "step": 95710 }, { "epoch": 0.19336045604948346, "grad_norm": 393.44281005859375, "learning_rate": 9.747456096221946e-06, "loss": 43.6118, "step": 95720 }, { "epoch": 0.19338065668216728, "grad_norm": 40.6806526184082, "learning_rate": 9.747346550022282e-06, "loss": 18.7496, "step": 95730 }, { "epoch": 0.1934008573148511, "grad_norm": 110.1174545288086, "learning_rate": 9.747236980684632e-06, "loss": 28.4388, "step": 95740 }, { "epoch": 0.19342105794753492, "grad_norm": 525.581298828125, "learning_rate": 9.74712738820952e-06, "loss": 21.0295, "step": 95750 }, { "epoch": 0.19344125858021874, "grad_norm": 476.88739013671875, "learning_rate": 9.747017772597487e-06, "loss": 20.7958, "step": 95760 }, { "epoch": 0.19346145921290256, "grad_norm": 373.5235900878906, "learning_rate": 9.746908133849065e-06, "loss": 23.7376, "step": 95770 }, { "epoch": 0.19348165984558635, "grad_norm": 460.3899230957031, "learning_rate": 9.746798471964787e-06, "loss": 24.5617, "step": 95780 }, { "epoch": 0.19350186047827017, "grad_norm": 30.116121292114258, "learning_rate": 9.74668878694519e-06, "loss": 19.1157, "step": 95790 }, { "epoch": 0.193522061110954, "grad_norm": 968.26513671875, "learning_rate": 9.746579078790808e-06, "loss": 34.6997, "step": 95800 }, { "epoch": 0.1935422617436378, "grad_norm": 499.2196960449219, "learning_rate": 9.746469347502174e-06, "loss": 40.1381, "step": 95810 }, { "epoch": 0.19356246237632163, "grad_norm": 335.9527893066406, "learning_rate": 9.746359593079825e-06, "loss": 23.6027, "step": 95820 }, { "epoch": 0.19358266300900545, "grad_norm": 130.8729705810547, "learning_rate": 9.746249815524295e-06, "loss": 20.722, "step": 95830 }, { "epoch": 0.19360286364168927, "grad_norm": 212.45811462402344, "learning_rate": 9.746140014836118e-06, "loss": 31.2415, "step": 95840 }, { "epoch": 0.19362306427437306, "grad_norm": 1760.1175537109375, "learning_rate": 9.746030191015831e-06, "loss": 53.4861, "step": 95850 }, { "epoch": 0.19364326490705688, "grad_norm": 475.439697265625, "learning_rate": 9.745920344063969e-06, "loss": 22.7386, "step": 95860 }, { "epoch": 0.1936634655397407, "grad_norm": 463.5982360839844, "learning_rate": 9.745810473981067e-06, "loss": 24.8727, "step": 95870 }, { "epoch": 0.19368366617242452, "grad_norm": 113.54488372802734, "learning_rate": 9.74570058076766e-06, "loss": 30.2602, "step": 95880 }, { "epoch": 0.19370386680510834, "grad_norm": 267.89654541015625, "learning_rate": 9.745590664424283e-06, "loss": 17.2591, "step": 95890 }, { "epoch": 0.19372406743779216, "grad_norm": 1579.3568115234375, "learning_rate": 9.745480724951473e-06, "loss": 42.5363, "step": 95900 }, { "epoch": 0.19374426807047596, "grad_norm": 379.80615234375, "learning_rate": 9.745370762349766e-06, "loss": 28.0911, "step": 95910 }, { "epoch": 0.19376446870315978, "grad_norm": 407.65985107421875, "learning_rate": 9.745260776619698e-06, "loss": 23.3073, "step": 95920 }, { "epoch": 0.1937846693358436, "grad_norm": 409.89239501953125, "learning_rate": 9.745150767761805e-06, "loss": 18.4817, "step": 95930 }, { "epoch": 0.19380486996852742, "grad_norm": 40.69590377807617, "learning_rate": 9.745040735776622e-06, "loss": 19.7151, "step": 95940 }, { "epoch": 0.19382507060121124, "grad_norm": 309.7251281738281, "learning_rate": 9.744930680664685e-06, "loss": 30.5486, "step": 95950 }, { "epoch": 0.19384527123389506, "grad_norm": 777.8043823242188, "learning_rate": 9.74482060242653e-06, "loss": 19.8186, "step": 95960 }, { "epoch": 0.19386547186657885, "grad_norm": 407.1781921386719, "learning_rate": 9.744710501062698e-06, "loss": 32.2383, "step": 95970 }, { "epoch": 0.19388567249926267, "grad_norm": 952.85693359375, "learning_rate": 9.744600376573721e-06, "loss": 26.6892, "step": 95980 }, { "epoch": 0.1939058731319465, "grad_norm": 416.1173095703125, "learning_rate": 9.744490228960137e-06, "loss": 27.1903, "step": 95990 }, { "epoch": 0.1939260737646303, "grad_norm": 254.97972106933594, "learning_rate": 9.744380058222483e-06, "loss": 27.8831, "step": 96000 }, { "epoch": 0.19394627439731413, "grad_norm": 429.0074768066406, "learning_rate": 9.744269864361298e-06, "loss": 14.0351, "step": 96010 }, { "epoch": 0.19396647502999795, "grad_norm": 221.5646209716797, "learning_rate": 9.744159647377114e-06, "loss": 17.8883, "step": 96020 }, { "epoch": 0.19398667566268177, "grad_norm": 503.8222351074219, "learning_rate": 9.744049407270472e-06, "loss": 19.1898, "step": 96030 }, { "epoch": 0.19400687629536556, "grad_norm": 324.0685729980469, "learning_rate": 9.74393914404191e-06, "loss": 23.3817, "step": 96040 }, { "epoch": 0.19402707692804938, "grad_norm": 36.85819625854492, "learning_rate": 9.743828857691964e-06, "loss": 30.638, "step": 96050 }, { "epoch": 0.1940472775607332, "grad_norm": 365.65386962890625, "learning_rate": 9.74371854822117e-06, "loss": 40.5744, "step": 96060 }, { "epoch": 0.19406747819341702, "grad_norm": 223.74566650390625, "learning_rate": 9.74360821563007e-06, "loss": 20.3652, "step": 96070 }, { "epoch": 0.19408767882610084, "grad_norm": 382.7676086425781, "learning_rate": 9.743497859919196e-06, "loss": 29.6577, "step": 96080 }, { "epoch": 0.19410787945878466, "grad_norm": 482.62103271484375, "learning_rate": 9.743387481089091e-06, "loss": 24.2073, "step": 96090 }, { "epoch": 0.19412808009146845, "grad_norm": 332.37127685546875, "learning_rate": 9.743277079140288e-06, "loss": 12.8352, "step": 96100 }, { "epoch": 0.19414828072415227, "grad_norm": 868.508056640625, "learning_rate": 9.74316665407333e-06, "loss": 31.6455, "step": 96110 }, { "epoch": 0.1941684813568361, "grad_norm": 450.9184265136719, "learning_rate": 9.743056205888752e-06, "loss": 24.8632, "step": 96120 }, { "epoch": 0.1941886819895199, "grad_norm": 588.66650390625, "learning_rate": 9.742945734587093e-06, "loss": 25.9008, "step": 96130 }, { "epoch": 0.19420888262220373, "grad_norm": 377.9995422363281, "learning_rate": 9.742835240168893e-06, "loss": 23.8725, "step": 96140 }, { "epoch": 0.19422908325488755, "grad_norm": 280.8848571777344, "learning_rate": 9.742724722634688e-06, "loss": 38.8778, "step": 96150 }, { "epoch": 0.19424928388757137, "grad_norm": 392.6344909667969, "learning_rate": 9.742614181985019e-06, "loss": 22.6172, "step": 96160 }, { "epoch": 0.19426948452025516, "grad_norm": 363.85845947265625, "learning_rate": 9.742503618220422e-06, "loss": 30.3061, "step": 96170 }, { "epoch": 0.19428968515293898, "grad_norm": 328.5239562988281, "learning_rate": 9.74239303134144e-06, "loss": 18.4416, "step": 96180 }, { "epoch": 0.1943098857856228, "grad_norm": 277.4551696777344, "learning_rate": 9.742282421348607e-06, "loss": 20.9246, "step": 96190 }, { "epoch": 0.19433008641830662, "grad_norm": 283.2068176269531, "learning_rate": 9.742171788242468e-06, "loss": 40.5074, "step": 96200 }, { "epoch": 0.19435028705099044, "grad_norm": 925.0282592773438, "learning_rate": 9.742061132023555e-06, "loss": 17.0826, "step": 96210 }, { "epoch": 0.19437048768367426, "grad_norm": 1141.78173828125, "learning_rate": 9.741950452692414e-06, "loss": 20.104, "step": 96220 }, { "epoch": 0.19439068831635806, "grad_norm": 378.7456359863281, "learning_rate": 9.741839750249579e-06, "loss": 15.3081, "step": 96230 }, { "epoch": 0.19441088894904188, "grad_norm": 860.4197387695312, "learning_rate": 9.741729024695594e-06, "loss": 26.3532, "step": 96240 }, { "epoch": 0.1944310895817257, "grad_norm": 415.7444152832031, "learning_rate": 9.741618276030998e-06, "loss": 22.4038, "step": 96250 }, { "epoch": 0.19445129021440952, "grad_norm": 434.64569091796875, "learning_rate": 9.741507504256327e-06, "loss": 18.588, "step": 96260 }, { "epoch": 0.19447149084709334, "grad_norm": 231.0457305908203, "learning_rate": 9.741396709372126e-06, "loss": 29.817, "step": 96270 }, { "epoch": 0.19449169147977716, "grad_norm": 269.8516540527344, "learning_rate": 9.74128589137893e-06, "loss": 24.8359, "step": 96280 }, { "epoch": 0.19451189211246095, "grad_norm": 492.55133056640625, "learning_rate": 9.741175050277283e-06, "loss": 15.1692, "step": 96290 }, { "epoch": 0.19453209274514477, "grad_norm": 410.5207824707031, "learning_rate": 9.741064186067723e-06, "loss": 19.2975, "step": 96300 }, { "epoch": 0.1945522933778286, "grad_norm": 1291.794921875, "learning_rate": 9.740953298750792e-06, "loss": 44.1921, "step": 96310 }, { "epoch": 0.1945724940105124, "grad_norm": 336.7740173339844, "learning_rate": 9.74084238832703e-06, "loss": 35.6793, "step": 96320 }, { "epoch": 0.19459269464319623, "grad_norm": 725.1658325195312, "learning_rate": 9.740731454796976e-06, "loss": 42.7733, "step": 96330 }, { "epoch": 0.19461289527588005, "grad_norm": 732.6233520507812, "learning_rate": 9.740620498161173e-06, "loss": 25.4401, "step": 96340 }, { "epoch": 0.19463309590856387, "grad_norm": 445.3772277832031, "learning_rate": 9.74050951842016e-06, "loss": 16.6677, "step": 96350 }, { "epoch": 0.19465329654124766, "grad_norm": 463.70745849609375, "learning_rate": 9.74039851557448e-06, "loss": 19.5565, "step": 96360 }, { "epoch": 0.19467349717393148, "grad_norm": 484.64068603515625, "learning_rate": 9.740287489624671e-06, "loss": 28.0441, "step": 96370 }, { "epoch": 0.1946936978066153, "grad_norm": 24.487995147705078, "learning_rate": 9.740176440571277e-06, "loss": 18.9507, "step": 96380 }, { "epoch": 0.19471389843929912, "grad_norm": 11.86369800567627, "learning_rate": 9.740065368414837e-06, "loss": 15.8156, "step": 96390 }, { "epoch": 0.19473409907198294, "grad_norm": 582.7681274414062, "learning_rate": 9.739954273155892e-06, "loss": 22.7781, "step": 96400 }, { "epoch": 0.19475429970466676, "grad_norm": 271.8302917480469, "learning_rate": 9.739843154794985e-06, "loss": 17.154, "step": 96410 }, { "epoch": 0.19477450033735055, "grad_norm": 1138.83544921875, "learning_rate": 9.73973201333266e-06, "loss": 32.6356, "step": 96420 }, { "epoch": 0.19479470097003437, "grad_norm": 168.5606689453125, "learning_rate": 9.739620848769455e-06, "loss": 22.8429, "step": 96430 }, { "epoch": 0.1948149016027182, "grad_norm": 0.0, "learning_rate": 9.739509661105912e-06, "loss": 24.7045, "step": 96440 }, { "epoch": 0.194835102235402, "grad_norm": 365.2082824707031, "learning_rate": 9.739398450342573e-06, "loss": 41.4875, "step": 96450 }, { "epoch": 0.19485530286808583, "grad_norm": 128.98590087890625, "learning_rate": 9.739287216479983e-06, "loss": 13.2018, "step": 96460 }, { "epoch": 0.19487550350076965, "grad_norm": 512.8473510742188, "learning_rate": 9.73917595951868e-06, "loss": 31.5895, "step": 96470 }, { "epoch": 0.19489570413345347, "grad_norm": 239.5601043701172, "learning_rate": 9.73906467945921e-06, "loss": 13.3083, "step": 96480 }, { "epoch": 0.19491590476613727, "grad_norm": 397.5332336425781, "learning_rate": 9.738953376302111e-06, "loss": 29.3499, "step": 96490 }, { "epoch": 0.19493610539882109, "grad_norm": 246.18441772460938, "learning_rate": 9.73884205004793e-06, "loss": 25.9016, "step": 96500 }, { "epoch": 0.1949563060315049, "grad_norm": 417.99932861328125, "learning_rate": 9.738730700697207e-06, "loss": 9.6361, "step": 96510 }, { "epoch": 0.19497650666418873, "grad_norm": 947.2726440429688, "learning_rate": 9.738619328250485e-06, "loss": 25.0256, "step": 96520 }, { "epoch": 0.19499670729687255, "grad_norm": 952.1075439453125, "learning_rate": 9.738507932708308e-06, "loss": 12.2096, "step": 96530 }, { "epoch": 0.19501690792955637, "grad_norm": 175.35069274902344, "learning_rate": 9.738396514071216e-06, "loss": 13.2193, "step": 96540 }, { "epoch": 0.19503710856224016, "grad_norm": 633.6536865234375, "learning_rate": 9.738285072339756e-06, "loss": 15.0969, "step": 96550 }, { "epoch": 0.19505730919492398, "grad_norm": 682.6355590820312, "learning_rate": 9.73817360751447e-06, "loss": 27.0863, "step": 96560 }, { "epoch": 0.1950775098276078, "grad_norm": 637.1114501953125, "learning_rate": 9.738062119595897e-06, "loss": 29.1412, "step": 96570 }, { "epoch": 0.19509771046029162, "grad_norm": 172.63792419433594, "learning_rate": 9.737950608584588e-06, "loss": 19.8485, "step": 96580 }, { "epoch": 0.19511791109297544, "grad_norm": 390.24969482421875, "learning_rate": 9.73783907448108e-06, "loss": 20.4416, "step": 96590 }, { "epoch": 0.19513811172565926, "grad_norm": 390.1633605957031, "learning_rate": 9.73772751728592e-06, "loss": 26.4586, "step": 96600 }, { "epoch": 0.19515831235834305, "grad_norm": 128.56723022460938, "learning_rate": 9.73761593699965e-06, "loss": 18.1316, "step": 96610 }, { "epoch": 0.19517851299102687, "grad_norm": 603.45166015625, "learning_rate": 9.737504333622814e-06, "loss": 35.423, "step": 96620 }, { "epoch": 0.1951987136237107, "grad_norm": 2.215932607650757, "learning_rate": 9.737392707155957e-06, "loss": 28.0613, "step": 96630 }, { "epoch": 0.1952189142563945, "grad_norm": 342.9813537597656, "learning_rate": 9.737281057599623e-06, "loss": 37.2923, "step": 96640 }, { "epoch": 0.19523911488907833, "grad_norm": 309.3840637207031, "learning_rate": 9.737169384954356e-06, "loss": 19.6012, "step": 96650 }, { "epoch": 0.19525931552176215, "grad_norm": 247.97494506835938, "learning_rate": 9.7370576892207e-06, "loss": 18.5051, "step": 96660 }, { "epoch": 0.19527951615444597, "grad_norm": 644.9800415039062, "learning_rate": 9.736945970399198e-06, "loss": 16.9285, "step": 96670 }, { "epoch": 0.19529971678712976, "grad_norm": 696.183349609375, "learning_rate": 9.736834228490398e-06, "loss": 21.4221, "step": 96680 }, { "epoch": 0.19531991741981358, "grad_norm": 339.5610046386719, "learning_rate": 9.73672246349484e-06, "loss": 28.4233, "step": 96690 }, { "epoch": 0.1953401180524974, "grad_norm": 488.5916748046875, "learning_rate": 9.736610675413073e-06, "loss": 44.2945, "step": 96700 }, { "epoch": 0.19536031868518122, "grad_norm": 341.2404479980469, "learning_rate": 9.736498864245638e-06, "loss": 29.4299, "step": 96710 }, { "epoch": 0.19538051931786504, "grad_norm": 580.4157104492188, "learning_rate": 9.736387029993084e-06, "loss": 12.2115, "step": 96720 }, { "epoch": 0.19540071995054886, "grad_norm": 527.8916625976562, "learning_rate": 9.736275172655954e-06, "loss": 29.7147, "step": 96730 }, { "epoch": 0.19542092058323265, "grad_norm": 625.5405883789062, "learning_rate": 9.736163292234792e-06, "loss": 17.8191, "step": 96740 }, { "epoch": 0.19544112121591647, "grad_norm": 1239.732421875, "learning_rate": 9.736051388730146e-06, "loss": 37.3495, "step": 96750 }, { "epoch": 0.1954613218486003, "grad_norm": 236.486572265625, "learning_rate": 9.735939462142558e-06, "loss": 22.7968, "step": 96760 }, { "epoch": 0.19548152248128411, "grad_norm": 587.5750732421875, "learning_rate": 9.735827512472576e-06, "loss": 32.5167, "step": 96770 }, { "epoch": 0.19550172311396793, "grad_norm": 643.1697998046875, "learning_rate": 9.735715539720747e-06, "loss": 24.2916, "step": 96780 }, { "epoch": 0.19552192374665175, "grad_norm": 213.8444366455078, "learning_rate": 9.735603543887613e-06, "loss": 16.9851, "step": 96790 }, { "epoch": 0.19554212437933557, "grad_norm": 202.58883666992188, "learning_rate": 9.735491524973723e-06, "loss": 26.4842, "step": 96800 }, { "epoch": 0.19556232501201937, "grad_norm": 362.2386474609375, "learning_rate": 9.73537948297962e-06, "loss": 20.7448, "step": 96810 }, { "epoch": 0.1955825256447032, "grad_norm": 477.868408203125, "learning_rate": 9.735267417905852e-06, "loss": 28.062, "step": 96820 }, { "epoch": 0.195602726277387, "grad_norm": 176.94195556640625, "learning_rate": 9.735155329752965e-06, "loss": 22.0477, "step": 96830 }, { "epoch": 0.19562292691007083, "grad_norm": 415.7734069824219, "learning_rate": 9.735043218521507e-06, "loss": 21.3873, "step": 96840 }, { "epoch": 0.19564312754275465, "grad_norm": 303.6124572753906, "learning_rate": 9.734931084212021e-06, "loss": 22.8141, "step": 96850 }, { "epoch": 0.19566332817543847, "grad_norm": 612.8478393554688, "learning_rate": 9.734818926825056e-06, "loss": 15.9648, "step": 96860 }, { "epoch": 0.19568352880812226, "grad_norm": 190.95999145507812, "learning_rate": 9.734706746361157e-06, "loss": 23.0034, "step": 96870 }, { "epoch": 0.19570372944080608, "grad_norm": 344.77801513671875, "learning_rate": 9.734594542820871e-06, "loss": 17.1716, "step": 96880 }, { "epoch": 0.1957239300734899, "grad_norm": 399.13232421875, "learning_rate": 9.734482316204747e-06, "loss": 21.2854, "step": 96890 }, { "epoch": 0.19574413070617372, "grad_norm": 147.22430419921875, "learning_rate": 9.73437006651333e-06, "loss": 19.4687, "step": 96900 }, { "epoch": 0.19576433133885754, "grad_norm": 35.441829681396484, "learning_rate": 9.734257793747168e-06, "loss": 27.392, "step": 96910 }, { "epoch": 0.19578453197154136, "grad_norm": 1310.0384521484375, "learning_rate": 9.734145497906807e-06, "loss": 34.6572, "step": 96920 }, { "epoch": 0.19580473260422515, "grad_norm": 201.7588348388672, "learning_rate": 9.734033178992794e-06, "loss": 38.5917, "step": 96930 }, { "epoch": 0.19582493323690897, "grad_norm": 1403.2457275390625, "learning_rate": 9.73392083700568e-06, "loss": 28.0674, "step": 96940 }, { "epoch": 0.1958451338695928, "grad_norm": 224.3494873046875, "learning_rate": 9.73380847194601e-06, "loss": 35.1673, "step": 96950 }, { "epoch": 0.1958653345022766, "grad_norm": 416.7572021484375, "learning_rate": 9.733696083814327e-06, "loss": 21.4552, "step": 96960 }, { "epoch": 0.19588553513496043, "grad_norm": 394.698486328125, "learning_rate": 9.733583672611189e-06, "loss": 25.4069, "step": 96970 }, { "epoch": 0.19590573576764425, "grad_norm": 1013.8325805664062, "learning_rate": 9.733471238337136e-06, "loss": 27.7805, "step": 96980 }, { "epoch": 0.19592593640032807, "grad_norm": 440.7705993652344, "learning_rate": 9.733358780992717e-06, "loss": 33.2662, "step": 96990 }, { "epoch": 0.19594613703301186, "grad_norm": 307.0263977050781, "learning_rate": 9.733246300578482e-06, "loss": 24.355, "step": 97000 }, { "epoch": 0.19596633766569568, "grad_norm": 583.3680419921875, "learning_rate": 9.73313379709498e-06, "loss": 32.2298, "step": 97010 }, { "epoch": 0.1959865382983795, "grad_norm": 1420.5535888671875, "learning_rate": 9.733021270542758e-06, "loss": 28.7911, "step": 97020 }, { "epoch": 0.19600673893106332, "grad_norm": 624.2348022460938, "learning_rate": 9.732908720922362e-06, "loss": 30.9689, "step": 97030 }, { "epoch": 0.19602693956374714, "grad_norm": 129.958740234375, "learning_rate": 9.732796148234345e-06, "loss": 12.3031, "step": 97040 }, { "epoch": 0.19604714019643096, "grad_norm": 145.7042694091797, "learning_rate": 9.732683552479253e-06, "loss": 17.4793, "step": 97050 }, { "epoch": 0.19606734082911476, "grad_norm": 711.9461669921875, "learning_rate": 9.732570933657635e-06, "loss": 16.8873, "step": 97060 }, { "epoch": 0.19608754146179858, "grad_norm": 85.0526123046875, "learning_rate": 9.73245829177004e-06, "loss": 16.1001, "step": 97070 }, { "epoch": 0.1961077420944824, "grad_norm": 536.3442993164062, "learning_rate": 9.732345626817018e-06, "loss": 32.5822, "step": 97080 }, { "epoch": 0.19612794272716622, "grad_norm": 243.9501190185547, "learning_rate": 9.732232938799118e-06, "loss": 25.1747, "step": 97090 }, { "epoch": 0.19614814335985004, "grad_norm": 345.52655029296875, "learning_rate": 9.732120227716887e-06, "loss": 32.5119, "step": 97100 }, { "epoch": 0.19616834399253386, "grad_norm": 300.8923645019531, "learning_rate": 9.732007493570877e-06, "loss": 29.92, "step": 97110 }, { "epoch": 0.19618854462521768, "grad_norm": 419.56195068359375, "learning_rate": 9.731894736361636e-06, "loss": 20.8166, "step": 97120 }, { "epoch": 0.19620874525790147, "grad_norm": 330.9029235839844, "learning_rate": 9.731781956089713e-06, "loss": 19.3806, "step": 97130 }, { "epoch": 0.1962289458905853, "grad_norm": 264.5019836425781, "learning_rate": 9.731669152755662e-06, "loss": 10.3985, "step": 97140 }, { "epoch": 0.1962491465232691, "grad_norm": 609.105712890625, "learning_rate": 9.731556326360027e-06, "loss": 33.6437, "step": 97150 }, { "epoch": 0.19626934715595293, "grad_norm": 287.99896240234375, "learning_rate": 9.73144347690336e-06, "loss": 35.8379, "step": 97160 }, { "epoch": 0.19628954778863675, "grad_norm": 981.0174560546875, "learning_rate": 9.731330604386214e-06, "loss": 23.1507, "step": 97170 }, { "epoch": 0.19630974842132057, "grad_norm": 1168.6593017578125, "learning_rate": 9.731217708809133e-06, "loss": 56.7481, "step": 97180 }, { "epoch": 0.19632994905400436, "grad_norm": 288.0087890625, "learning_rate": 9.731104790172672e-06, "loss": 24.3511, "step": 97190 }, { "epoch": 0.19635014968668818, "grad_norm": 321.56048583984375, "learning_rate": 9.73099184847738e-06, "loss": 21.5144, "step": 97200 }, { "epoch": 0.196370350319372, "grad_norm": 570.0701904296875, "learning_rate": 9.73087888372381e-06, "loss": 35.149, "step": 97210 }, { "epoch": 0.19639055095205582, "grad_norm": 635.6535034179688, "learning_rate": 9.730765895912506e-06, "loss": 24.0599, "step": 97220 }, { "epoch": 0.19641075158473964, "grad_norm": 212.3668670654297, "learning_rate": 9.730652885044025e-06, "loss": 10.3039, "step": 97230 }, { "epoch": 0.19643095221742346, "grad_norm": 515.6719970703125, "learning_rate": 9.730539851118913e-06, "loss": 33.4415, "step": 97240 }, { "epoch": 0.19645115285010725, "grad_norm": 306.0253601074219, "learning_rate": 9.730426794137727e-06, "loss": 37.0226, "step": 97250 }, { "epoch": 0.19647135348279107, "grad_norm": 149.7908172607422, "learning_rate": 9.730313714101014e-06, "loss": 22.73, "step": 97260 }, { "epoch": 0.1964915541154749, "grad_norm": 458.1486511230469, "learning_rate": 9.730200611009324e-06, "loss": 26.1247, "step": 97270 }, { "epoch": 0.1965117547481587, "grad_norm": 208.75621032714844, "learning_rate": 9.73008748486321e-06, "loss": 24.2153, "step": 97280 }, { "epoch": 0.19653195538084253, "grad_norm": 245.51007080078125, "learning_rate": 9.729974335663227e-06, "loss": 18.0914, "step": 97290 }, { "epoch": 0.19655215601352635, "grad_norm": 275.8787841796875, "learning_rate": 9.72986116340992e-06, "loss": 27.147, "step": 97300 }, { "epoch": 0.19657235664621017, "grad_norm": 427.34661865234375, "learning_rate": 9.729747968103842e-06, "loss": 36.523, "step": 97310 }, { "epoch": 0.19659255727889396, "grad_norm": 842.1649169921875, "learning_rate": 9.729634749745547e-06, "loss": 28.4887, "step": 97320 }, { "epoch": 0.19661275791157778, "grad_norm": 261.8681640625, "learning_rate": 9.729521508335586e-06, "loss": 10.9931, "step": 97330 }, { "epoch": 0.1966329585442616, "grad_norm": 317.2580871582031, "learning_rate": 9.729408243874511e-06, "loss": 18.809, "step": 97340 }, { "epoch": 0.19665315917694542, "grad_norm": 347.6241760253906, "learning_rate": 9.729294956362873e-06, "loss": 13.2427, "step": 97350 }, { "epoch": 0.19667335980962924, "grad_norm": 109.27238464355469, "learning_rate": 9.729181645801227e-06, "loss": 15.1256, "step": 97360 }, { "epoch": 0.19669356044231306, "grad_norm": 319.5441589355469, "learning_rate": 9.729068312190122e-06, "loss": 10.9112, "step": 97370 }, { "epoch": 0.19671376107499686, "grad_norm": 296.64691162109375, "learning_rate": 9.728954955530113e-06, "loss": 17.7924, "step": 97380 }, { "epoch": 0.19673396170768068, "grad_norm": 476.6488037109375, "learning_rate": 9.728841575821749e-06, "loss": 17.7629, "step": 97390 }, { "epoch": 0.1967541623403645, "grad_norm": 240.47398376464844, "learning_rate": 9.728728173065584e-06, "loss": 14.8832, "step": 97400 }, { "epoch": 0.19677436297304832, "grad_norm": 1217.341552734375, "learning_rate": 9.728614747262173e-06, "loss": 43.1087, "step": 97410 }, { "epoch": 0.19679456360573214, "grad_norm": 737.4136962890625, "learning_rate": 9.728501298412067e-06, "loss": 25.4208, "step": 97420 }, { "epoch": 0.19681476423841596, "grad_norm": 375.95751953125, "learning_rate": 9.72838782651582e-06, "loss": 21.8538, "step": 97430 }, { "epoch": 0.19683496487109975, "grad_norm": 702.6217041015625, "learning_rate": 9.728274331573983e-06, "loss": 23.9543, "step": 97440 }, { "epoch": 0.19685516550378357, "grad_norm": 699.86572265625, "learning_rate": 9.728160813587111e-06, "loss": 24.3078, "step": 97450 }, { "epoch": 0.1968753661364674, "grad_norm": 314.34027099609375, "learning_rate": 9.728047272555756e-06, "loss": 11.6271, "step": 97460 }, { "epoch": 0.1968955667691512, "grad_norm": 1008.5680541992188, "learning_rate": 9.727933708480474e-06, "loss": 39.3888, "step": 97470 }, { "epoch": 0.19691576740183503, "grad_norm": 335.6155090332031, "learning_rate": 9.727820121361815e-06, "loss": 21.5971, "step": 97480 }, { "epoch": 0.19693596803451885, "grad_norm": 174.36721801757812, "learning_rate": 9.727706511200335e-06, "loss": 18.7552, "step": 97490 }, { "epoch": 0.19695616866720267, "grad_norm": 650.0670166015625, "learning_rate": 9.727592877996585e-06, "loss": 37.8017, "step": 97500 }, { "epoch": 0.19697636929988646, "grad_norm": 397.67364501953125, "learning_rate": 9.727479221751122e-06, "loss": 17.679, "step": 97510 }, { "epoch": 0.19699656993257028, "grad_norm": 348.5740661621094, "learning_rate": 9.727365542464498e-06, "loss": 52.2058, "step": 97520 }, { "epoch": 0.1970167705652541, "grad_norm": 512.7111206054688, "learning_rate": 9.727251840137269e-06, "loss": 39.8835, "step": 97530 }, { "epoch": 0.19703697119793792, "grad_norm": 163.2144012451172, "learning_rate": 9.727138114769986e-06, "loss": 16.9144, "step": 97540 }, { "epoch": 0.19705717183062174, "grad_norm": 581.782470703125, "learning_rate": 9.727024366363208e-06, "loss": 27.117, "step": 97550 }, { "epoch": 0.19707737246330556, "grad_norm": 631.5810546875, "learning_rate": 9.726910594917482e-06, "loss": 19.6229, "step": 97560 }, { "epoch": 0.19709757309598935, "grad_norm": 462.9996643066406, "learning_rate": 9.726796800433371e-06, "loss": 24.7656, "step": 97570 }, { "epoch": 0.19711777372867317, "grad_norm": 326.8007507324219, "learning_rate": 9.726682982911423e-06, "loss": 43.7528, "step": 97580 }, { "epoch": 0.197137974361357, "grad_norm": 467.9544982910156, "learning_rate": 9.726569142352197e-06, "loss": 23.2517, "step": 97590 }, { "epoch": 0.1971581749940408, "grad_norm": 318.44317626953125, "learning_rate": 9.726455278756249e-06, "loss": 48.4266, "step": 97600 }, { "epoch": 0.19717837562672463, "grad_norm": 432.2263488769531, "learning_rate": 9.726341392124127e-06, "loss": 13.2294, "step": 97610 }, { "epoch": 0.19719857625940845, "grad_norm": 417.3358459472656, "learning_rate": 9.726227482456391e-06, "loss": 19.9933, "step": 97620 }, { "epoch": 0.19721877689209227, "grad_norm": 764.5451049804688, "learning_rate": 9.726113549753597e-06, "loss": 32.1456, "step": 97630 }, { "epoch": 0.19723897752477607, "grad_norm": 241.2354736328125, "learning_rate": 9.725999594016298e-06, "loss": 23.6431, "step": 97640 }, { "epoch": 0.19725917815745989, "grad_norm": 2175.685302734375, "learning_rate": 9.72588561524505e-06, "loss": 42.8542, "step": 97650 }, { "epoch": 0.1972793787901437, "grad_norm": 268.6827697753906, "learning_rate": 9.725771613440408e-06, "loss": 28.908, "step": 97660 }, { "epoch": 0.19729957942282753, "grad_norm": 387.532470703125, "learning_rate": 9.72565758860293e-06, "loss": 16.1734, "step": 97670 }, { "epoch": 0.19731978005551135, "grad_norm": 1250.755126953125, "learning_rate": 9.725543540733168e-06, "loss": 40.3066, "step": 97680 }, { "epoch": 0.19733998068819517, "grad_norm": 258.7558288574219, "learning_rate": 9.725429469831682e-06, "loss": 18.6515, "step": 97690 }, { "epoch": 0.19736018132087896, "grad_norm": 299.046142578125, "learning_rate": 9.725315375899025e-06, "loss": 17.033, "step": 97700 }, { "epoch": 0.19738038195356278, "grad_norm": 594.5548095703125, "learning_rate": 9.725201258935755e-06, "loss": 12.3045, "step": 97710 }, { "epoch": 0.1974005825862466, "grad_norm": 630.0167236328125, "learning_rate": 9.725087118942425e-06, "loss": 16.1351, "step": 97720 }, { "epoch": 0.19742078321893042, "grad_norm": 6.42670202255249, "learning_rate": 9.724972955919594e-06, "loss": 16.2627, "step": 97730 }, { "epoch": 0.19744098385161424, "grad_norm": 352.1437683105469, "learning_rate": 9.72485876986782e-06, "loss": 17.0874, "step": 97740 }, { "epoch": 0.19746118448429806, "grad_norm": 789.1223754882812, "learning_rate": 9.724744560787655e-06, "loss": 28.869, "step": 97750 }, { "epoch": 0.19748138511698185, "grad_norm": 324.92718505859375, "learning_rate": 9.724630328679658e-06, "loss": 23.7328, "step": 97760 }, { "epoch": 0.19750158574966567, "grad_norm": 484.04547119140625, "learning_rate": 9.724516073544388e-06, "loss": 22.9338, "step": 97770 }, { "epoch": 0.1975217863823495, "grad_norm": 312.01007080078125, "learning_rate": 9.724401795382398e-06, "loss": 13.2022, "step": 97780 }, { "epoch": 0.1975419870150333, "grad_norm": 243.39907836914062, "learning_rate": 9.724287494194247e-06, "loss": 17.0577, "step": 97790 }, { "epoch": 0.19756218764771713, "grad_norm": 160.3676300048828, "learning_rate": 9.724173169980492e-06, "loss": 29.0538, "step": 97800 }, { "epoch": 0.19758238828040095, "grad_norm": 116.18144226074219, "learning_rate": 9.72405882274169e-06, "loss": 16.0328, "step": 97810 }, { "epoch": 0.19760258891308477, "grad_norm": 112.05309295654297, "learning_rate": 9.723944452478398e-06, "loss": 19.876, "step": 97820 }, { "epoch": 0.19762278954576856, "grad_norm": 861.3955688476562, "learning_rate": 9.723830059191173e-06, "loss": 19.4862, "step": 97830 }, { "epoch": 0.19764299017845238, "grad_norm": 411.5660705566406, "learning_rate": 9.723715642880574e-06, "loss": 22.4642, "step": 97840 }, { "epoch": 0.1976631908111362, "grad_norm": 153.12356567382812, "learning_rate": 9.723601203547158e-06, "loss": 18.7759, "step": 97850 }, { "epoch": 0.19768339144382002, "grad_norm": 228.43202209472656, "learning_rate": 9.723486741191482e-06, "loss": 23.4711, "step": 97860 }, { "epoch": 0.19770359207650384, "grad_norm": 827.0603637695312, "learning_rate": 9.723372255814105e-06, "loss": 31.2898, "step": 97870 }, { "epoch": 0.19772379270918766, "grad_norm": 681.8280029296875, "learning_rate": 9.723257747415584e-06, "loss": 26.4491, "step": 97880 }, { "epoch": 0.19774399334187145, "grad_norm": 408.0483703613281, "learning_rate": 9.723143215996479e-06, "loss": 33.7071, "step": 97890 }, { "epoch": 0.19776419397455527, "grad_norm": 165.12281799316406, "learning_rate": 9.723028661557345e-06, "loss": 16.8155, "step": 97900 }, { "epoch": 0.1977843946072391, "grad_norm": 450.7340087890625, "learning_rate": 9.722914084098745e-06, "loss": 40.1197, "step": 97910 }, { "epoch": 0.19780459523992291, "grad_norm": 111.25831604003906, "learning_rate": 9.722799483621232e-06, "loss": 15.7333, "step": 97920 }, { "epoch": 0.19782479587260673, "grad_norm": 479.22613525390625, "learning_rate": 9.722684860125367e-06, "loss": 13.8798, "step": 97930 }, { "epoch": 0.19784499650529055, "grad_norm": 267.47003173828125, "learning_rate": 9.72257021361171e-06, "loss": 20.3385, "step": 97940 }, { "epoch": 0.19786519713797437, "grad_norm": 33.6751823425293, "learning_rate": 9.722455544080818e-06, "loss": 19.3102, "step": 97950 }, { "epoch": 0.19788539777065817, "grad_norm": 6.024238586425781, "learning_rate": 9.72234085153325e-06, "loss": 23.8579, "step": 97960 }, { "epoch": 0.197905598403342, "grad_norm": 360.9849853515625, "learning_rate": 9.722226135969565e-06, "loss": 34.6989, "step": 97970 }, { "epoch": 0.1979257990360258, "grad_norm": 739.1018676757812, "learning_rate": 9.722111397390325e-06, "loss": 24.8314, "step": 97980 }, { "epoch": 0.19794599966870963, "grad_norm": 1417.043212890625, "learning_rate": 9.721996635796085e-06, "loss": 28.6388, "step": 97990 }, { "epoch": 0.19796620030139345, "grad_norm": 329.4474792480469, "learning_rate": 9.721881851187406e-06, "loss": 38.0278, "step": 98000 }, { "epoch": 0.19798640093407727, "grad_norm": 484.0167236328125, "learning_rate": 9.721767043564848e-06, "loss": 29.8133, "step": 98010 }, { "epoch": 0.19800660156676106, "grad_norm": 521.02880859375, "learning_rate": 9.72165221292897e-06, "loss": 26.0634, "step": 98020 }, { "epoch": 0.19802680219944488, "grad_norm": 632.4464721679688, "learning_rate": 9.721537359280332e-06, "loss": 21.9816, "step": 98030 }, { "epoch": 0.1980470028321287, "grad_norm": 591.2565307617188, "learning_rate": 9.721422482619493e-06, "loss": 37.4917, "step": 98040 }, { "epoch": 0.19806720346481252, "grad_norm": 677.9862670898438, "learning_rate": 9.721307582947014e-06, "loss": 23.1645, "step": 98050 }, { "epoch": 0.19808740409749634, "grad_norm": 252.03517150878906, "learning_rate": 9.721192660263454e-06, "loss": 24.8406, "step": 98060 }, { "epoch": 0.19810760473018016, "grad_norm": 333.40264892578125, "learning_rate": 9.721077714569374e-06, "loss": 13.3066, "step": 98070 }, { "epoch": 0.19812780536286395, "grad_norm": 600.9188842773438, "learning_rate": 9.720962745865334e-06, "loss": 27.6109, "step": 98080 }, { "epoch": 0.19814800599554777, "grad_norm": 392.2532958984375, "learning_rate": 9.720847754151894e-06, "loss": 16.8962, "step": 98090 }, { "epoch": 0.1981682066282316, "grad_norm": 157.43276977539062, "learning_rate": 9.720732739429614e-06, "loss": 41.7694, "step": 98100 }, { "epoch": 0.1981884072609154, "grad_norm": 521.4472045898438, "learning_rate": 9.720617701699056e-06, "loss": 31.5027, "step": 98110 }, { "epoch": 0.19820860789359923, "grad_norm": 312.29608154296875, "learning_rate": 9.72050264096078e-06, "loss": 27.9765, "step": 98120 }, { "epoch": 0.19822880852628305, "grad_norm": 198.32687377929688, "learning_rate": 9.720387557215344e-06, "loss": 38.0568, "step": 98130 }, { "epoch": 0.19824900915896687, "grad_norm": 119.37037658691406, "learning_rate": 9.720272450463315e-06, "loss": 16.5318, "step": 98140 }, { "epoch": 0.19826920979165066, "grad_norm": 274.2486572265625, "learning_rate": 9.72015732070525e-06, "loss": 13.2182, "step": 98150 }, { "epoch": 0.19828941042433448, "grad_norm": 392.5074462890625, "learning_rate": 9.72004216794171e-06, "loss": 36.3018, "step": 98160 }, { "epoch": 0.1983096110570183, "grad_norm": 458.8179016113281, "learning_rate": 9.719926992173257e-06, "loss": 23.7772, "step": 98170 }, { "epoch": 0.19832981168970212, "grad_norm": 103.11629486083984, "learning_rate": 9.71981179340045e-06, "loss": 25.5153, "step": 98180 }, { "epoch": 0.19835001232238594, "grad_norm": 970.4147338867188, "learning_rate": 9.719696571623857e-06, "loss": 29.5578, "step": 98190 }, { "epoch": 0.19837021295506976, "grad_norm": 337.5794677734375, "learning_rate": 9.719581326844033e-06, "loss": 12.8689, "step": 98200 }, { "epoch": 0.19839041358775356, "grad_norm": 122.60509490966797, "learning_rate": 9.719466059061542e-06, "loss": 14.564, "step": 98210 }, { "epoch": 0.19841061422043738, "grad_norm": 18.19044303894043, "learning_rate": 9.719350768276947e-06, "loss": 16.1531, "step": 98220 }, { "epoch": 0.1984308148531212, "grad_norm": 448.45513916015625, "learning_rate": 9.719235454490807e-06, "loss": 25.5586, "step": 98230 }, { "epoch": 0.19845101548580502, "grad_norm": 534.3065185546875, "learning_rate": 9.719120117703688e-06, "loss": 12.0728, "step": 98240 }, { "epoch": 0.19847121611848884, "grad_norm": 436.68438720703125, "learning_rate": 9.719004757916149e-06, "loss": 24.4262, "step": 98250 }, { "epoch": 0.19849141675117266, "grad_norm": 595.8948364257812, "learning_rate": 9.718889375128752e-06, "loss": 20.5172, "step": 98260 }, { "epoch": 0.19851161738385648, "grad_norm": 229.6000213623047, "learning_rate": 9.71877396934206e-06, "loss": 13.0411, "step": 98270 }, { "epoch": 0.19853181801654027, "grad_norm": 1049.194580078125, "learning_rate": 9.718658540556638e-06, "loss": 31.0035, "step": 98280 }, { "epoch": 0.1985520186492241, "grad_norm": 431.2273254394531, "learning_rate": 9.718543088773047e-06, "loss": 22.3374, "step": 98290 }, { "epoch": 0.1985722192819079, "grad_norm": 641.8602905273438, "learning_rate": 9.718427613991848e-06, "loss": 38.678, "step": 98300 }, { "epoch": 0.19859241991459173, "grad_norm": 642.9496459960938, "learning_rate": 9.718312116213604e-06, "loss": 27.4916, "step": 98310 }, { "epoch": 0.19861262054727555, "grad_norm": 349.74932861328125, "learning_rate": 9.71819659543888e-06, "loss": 30.3614, "step": 98320 }, { "epoch": 0.19863282117995937, "grad_norm": 258.68194580078125, "learning_rate": 9.71808105166824e-06, "loss": 31.4953, "step": 98330 }, { "epoch": 0.19865302181264316, "grad_norm": 456.8066101074219, "learning_rate": 9.717965484902244e-06, "loss": 24.9158, "step": 98340 }, { "epoch": 0.19867322244532698, "grad_norm": 534.912353515625, "learning_rate": 9.717849895141455e-06, "loss": 24.7208, "step": 98350 }, { "epoch": 0.1986934230780108, "grad_norm": 2246.787841796875, "learning_rate": 9.717734282386439e-06, "loss": 28.4148, "step": 98360 }, { "epoch": 0.19871362371069462, "grad_norm": 269.0535583496094, "learning_rate": 9.717618646637758e-06, "loss": 9.2332, "step": 98370 }, { "epoch": 0.19873382434337844, "grad_norm": 445.5294494628906, "learning_rate": 9.717502987895975e-06, "loss": 19.4302, "step": 98380 }, { "epoch": 0.19875402497606226, "grad_norm": 763.8333129882812, "learning_rate": 9.717387306161657e-06, "loss": 39.5839, "step": 98390 }, { "epoch": 0.19877422560874605, "grad_norm": 832.6790161132812, "learning_rate": 9.717271601435363e-06, "loss": 27.792, "step": 98400 }, { "epoch": 0.19879442624142987, "grad_norm": 334.370849609375, "learning_rate": 9.71715587371766e-06, "loss": 23.0572, "step": 98410 }, { "epoch": 0.1988146268741137, "grad_norm": 202.83169555664062, "learning_rate": 9.717040123009111e-06, "loss": 17.9318, "step": 98420 }, { "epoch": 0.1988348275067975, "grad_norm": 636.1160278320312, "learning_rate": 9.716924349310281e-06, "loss": 27.7571, "step": 98430 }, { "epoch": 0.19885502813948133, "grad_norm": 256.3330078125, "learning_rate": 9.716808552621735e-06, "loss": 10.47, "step": 98440 }, { "epoch": 0.19887522877216515, "grad_norm": 734.7747802734375, "learning_rate": 9.716692732944036e-06, "loss": 35.1714, "step": 98450 }, { "epoch": 0.19889542940484897, "grad_norm": 385.6474609375, "learning_rate": 9.716576890277747e-06, "loss": 34.7533, "step": 98460 }, { "epoch": 0.19891563003753276, "grad_norm": 1101.65478515625, "learning_rate": 9.716461024623437e-06, "loss": 43.279, "step": 98470 }, { "epoch": 0.19893583067021658, "grad_norm": 751.535400390625, "learning_rate": 9.716345135981663e-06, "loss": 47.3784, "step": 98480 }, { "epoch": 0.1989560313029004, "grad_norm": 713.7888793945312, "learning_rate": 9.716229224353e-06, "loss": 20.0859, "step": 98490 }, { "epoch": 0.19897623193558422, "grad_norm": 149.56085205078125, "learning_rate": 9.716113289738005e-06, "loss": 22.1564, "step": 98500 }, { "epoch": 0.19899643256826804, "grad_norm": 529.3665161132812, "learning_rate": 9.715997332137248e-06, "loss": 20.9888, "step": 98510 }, { "epoch": 0.19901663320095186, "grad_norm": 286.5312194824219, "learning_rate": 9.71588135155129e-06, "loss": 18.9513, "step": 98520 }, { "epoch": 0.19903683383363566, "grad_norm": 541.6201782226562, "learning_rate": 9.7157653479807e-06, "loss": 14.7998, "step": 98530 }, { "epoch": 0.19905703446631948, "grad_norm": 286.26678466796875, "learning_rate": 9.71564932142604e-06, "loss": 16.4403, "step": 98540 }, { "epoch": 0.1990772350990033, "grad_norm": 446.2434997558594, "learning_rate": 9.715533271887877e-06, "loss": 24.1389, "step": 98550 }, { "epoch": 0.19909743573168712, "grad_norm": 625.5509033203125, "learning_rate": 9.715417199366778e-06, "loss": 23.0274, "step": 98560 }, { "epoch": 0.19911763636437094, "grad_norm": 141.72207641601562, "learning_rate": 9.715301103863306e-06, "loss": 15.1728, "step": 98570 }, { "epoch": 0.19913783699705476, "grad_norm": 324.7051696777344, "learning_rate": 9.71518498537803e-06, "loss": 21.8118, "step": 98580 }, { "epoch": 0.19915803762973858, "grad_norm": 571.4437866210938, "learning_rate": 9.715068843911513e-06, "loss": 18.9153, "step": 98590 }, { "epoch": 0.19917823826242237, "grad_norm": 219.9671173095703, "learning_rate": 9.714952679464324e-06, "loss": 16.8028, "step": 98600 }, { "epoch": 0.1991984388951062, "grad_norm": 1232.3668212890625, "learning_rate": 9.714836492037025e-06, "loss": 26.0865, "step": 98610 }, { "epoch": 0.19921863952779, "grad_norm": 366.5797119140625, "learning_rate": 9.714720281630186e-06, "loss": 16.8544, "step": 98620 }, { "epoch": 0.19923884016047383, "grad_norm": 872.5311889648438, "learning_rate": 9.714604048244372e-06, "loss": 22.2088, "step": 98630 }, { "epoch": 0.19925904079315765, "grad_norm": 67.38678741455078, "learning_rate": 9.714487791880151e-06, "loss": 12.5812, "step": 98640 }, { "epoch": 0.19927924142584147, "grad_norm": 625.0254516601562, "learning_rate": 9.714371512538088e-06, "loss": 30.3685, "step": 98650 }, { "epoch": 0.19929944205852526, "grad_norm": 464.5794677734375, "learning_rate": 9.714255210218747e-06, "loss": 15.5791, "step": 98660 }, { "epoch": 0.19931964269120908, "grad_norm": 428.2454833984375, "learning_rate": 9.7141388849227e-06, "loss": 31.7191, "step": 98670 }, { "epoch": 0.1993398433238929, "grad_norm": 324.87799072265625, "learning_rate": 9.714022536650513e-06, "loss": 29.7944, "step": 98680 }, { "epoch": 0.19936004395657672, "grad_norm": 193.9474639892578, "learning_rate": 9.713906165402751e-06, "loss": 38.0959, "step": 98690 }, { "epoch": 0.19938024458926054, "grad_norm": 253.7535858154297, "learning_rate": 9.713789771179983e-06, "loss": 21.0864, "step": 98700 }, { "epoch": 0.19940044522194436, "grad_norm": 604.3446044921875, "learning_rate": 9.713673353982773e-06, "loss": 21.4508, "step": 98710 }, { "epoch": 0.19942064585462815, "grad_norm": 290.3115234375, "learning_rate": 9.713556913811693e-06, "loss": 21.0728, "step": 98720 }, { "epoch": 0.19944084648731197, "grad_norm": 224.96560668945312, "learning_rate": 9.713440450667307e-06, "loss": 14.6751, "step": 98730 }, { "epoch": 0.1994610471199958, "grad_norm": 182.87301635742188, "learning_rate": 9.713323964550185e-06, "loss": 16.7221, "step": 98740 }, { "epoch": 0.1994812477526796, "grad_norm": 535.5203247070312, "learning_rate": 9.713207455460893e-06, "loss": 24.1277, "step": 98750 }, { "epoch": 0.19950144838536343, "grad_norm": 439.2107238769531, "learning_rate": 9.713090923399999e-06, "loss": 16.6916, "step": 98760 }, { "epoch": 0.19952164901804725, "grad_norm": 792.3186645507812, "learning_rate": 9.712974368368072e-06, "loss": 34.1692, "step": 98770 }, { "epoch": 0.19954184965073107, "grad_norm": 1346.2783203125, "learning_rate": 9.71285779036568e-06, "loss": 47.4253, "step": 98780 }, { "epoch": 0.19956205028341487, "grad_norm": 530.707763671875, "learning_rate": 9.71274118939339e-06, "loss": 18.1747, "step": 98790 }, { "epoch": 0.19958225091609869, "grad_norm": 330.8648986816406, "learning_rate": 9.712624565451772e-06, "loss": 29.0284, "step": 98800 }, { "epoch": 0.1996024515487825, "grad_norm": 570.5589599609375, "learning_rate": 9.712507918541391e-06, "loss": 21.0312, "step": 98810 }, { "epoch": 0.19962265218146633, "grad_norm": 453.2312316894531, "learning_rate": 9.712391248662821e-06, "loss": 15.446, "step": 98820 }, { "epoch": 0.19964285281415015, "grad_norm": 307.1821594238281, "learning_rate": 9.712274555816626e-06, "loss": 23.4843, "step": 98830 }, { "epoch": 0.19966305344683397, "grad_norm": 216.52488708496094, "learning_rate": 9.712157840003377e-06, "loss": 14.731, "step": 98840 }, { "epoch": 0.19968325407951776, "grad_norm": 635.91552734375, "learning_rate": 9.71204110122364e-06, "loss": 58.3714, "step": 98850 }, { "epoch": 0.19970345471220158, "grad_norm": 182.4932403564453, "learning_rate": 9.71192433947799e-06, "loss": 18.5096, "step": 98860 }, { "epoch": 0.1997236553448854, "grad_norm": 1478.6046142578125, "learning_rate": 9.71180755476699e-06, "loss": 33.0487, "step": 98870 }, { "epoch": 0.19974385597756922, "grad_norm": 123.76673126220703, "learning_rate": 9.711690747091211e-06, "loss": 14.9674, "step": 98880 }, { "epoch": 0.19976405661025304, "grad_norm": 254.47056579589844, "learning_rate": 9.711573916451224e-06, "loss": 13.8541, "step": 98890 }, { "epoch": 0.19978425724293686, "grad_norm": 104.44090270996094, "learning_rate": 9.711457062847596e-06, "loss": 22.3526, "step": 98900 }, { "epoch": 0.19980445787562068, "grad_norm": 213.63504028320312, "learning_rate": 9.7113401862809e-06, "loss": 25.3043, "step": 98910 }, { "epoch": 0.19982465850830447, "grad_norm": 228.15830993652344, "learning_rate": 9.7112232867517e-06, "loss": 26.1682, "step": 98920 }, { "epoch": 0.1998448591409883, "grad_norm": 359.323974609375, "learning_rate": 9.711106364260572e-06, "loss": 17.7793, "step": 98930 }, { "epoch": 0.1998650597736721, "grad_norm": 767.1118774414062, "learning_rate": 9.71098941880808e-06, "loss": 13.5829, "step": 98940 }, { "epoch": 0.19988526040635593, "grad_norm": 600.9185180664062, "learning_rate": 9.7108724503948e-06, "loss": 16.578, "step": 98950 }, { "epoch": 0.19990546103903975, "grad_norm": 394.9085998535156, "learning_rate": 9.710755459021297e-06, "loss": 20.4923, "step": 98960 }, { "epoch": 0.19992566167172357, "grad_norm": 724.1331176757812, "learning_rate": 9.710638444688146e-06, "loss": 16.3844, "step": 98970 }, { "epoch": 0.19994586230440736, "grad_norm": 536.1177368164062, "learning_rate": 9.71052140739591e-06, "loss": 10.3771, "step": 98980 }, { "epoch": 0.19996606293709118, "grad_norm": 622.7116088867188, "learning_rate": 9.710404347145168e-06, "loss": 18.9236, "step": 98990 }, { "epoch": 0.199986263569775, "grad_norm": 475.79736328125, "learning_rate": 9.710287263936485e-06, "loss": 16.2164, "step": 99000 }, { "epoch": 0.20000646420245882, "grad_norm": 354.1842041015625, "learning_rate": 9.710170157770434e-06, "loss": 25.4532, "step": 99010 }, { "epoch": 0.20002666483514264, "grad_norm": 844.4014282226562, "learning_rate": 9.710053028647583e-06, "loss": 46.8019, "step": 99020 }, { "epoch": 0.20004686546782646, "grad_norm": 470.49542236328125, "learning_rate": 9.709935876568506e-06, "loss": 24.6423, "step": 99030 }, { "epoch": 0.20006706610051025, "grad_norm": 480.843017578125, "learning_rate": 9.709818701533774e-06, "loss": 23.2238, "step": 99040 }, { "epoch": 0.20008726673319407, "grad_norm": 650.1700439453125, "learning_rate": 9.709701503543954e-06, "loss": 13.175, "step": 99050 }, { "epoch": 0.2001074673658779, "grad_norm": 546.8789672851562, "learning_rate": 9.709584282599623e-06, "loss": 17.9362, "step": 99060 }, { "epoch": 0.20012766799856171, "grad_norm": 526.370849609375, "learning_rate": 9.709467038701348e-06, "loss": 29.2563, "step": 99070 }, { "epoch": 0.20014786863124553, "grad_norm": 271.8720703125, "learning_rate": 9.709349771849701e-06, "loss": 14.2697, "step": 99080 }, { "epoch": 0.20016806926392935, "grad_norm": 366.5167236328125, "learning_rate": 9.709232482045254e-06, "loss": 23.3278, "step": 99090 }, { "epoch": 0.20018826989661317, "grad_norm": 118.59703063964844, "learning_rate": 9.709115169288582e-06, "loss": 29.0655, "step": 99100 }, { "epoch": 0.20020847052929697, "grad_norm": 1124.3302001953125, "learning_rate": 9.708997833580251e-06, "loss": 41.9663, "step": 99110 }, { "epoch": 0.2002286711619808, "grad_norm": 450.16778564453125, "learning_rate": 9.708880474920836e-06, "loss": 31.6968, "step": 99120 }, { "epoch": 0.2002488717946646, "grad_norm": 1278.3619384765625, "learning_rate": 9.708763093310911e-06, "loss": 25.3642, "step": 99130 }, { "epoch": 0.20026907242734843, "grad_norm": 964.25048828125, "learning_rate": 9.708645688751043e-06, "loss": 36.2234, "step": 99140 }, { "epoch": 0.20028927306003225, "grad_norm": 585.2734375, "learning_rate": 9.70852826124181e-06, "loss": 26.9971, "step": 99150 }, { "epoch": 0.20030947369271607, "grad_norm": 511.50244140625, "learning_rate": 9.70841081078378e-06, "loss": 21.3564, "step": 99160 }, { "epoch": 0.20032967432539986, "grad_norm": 108.579833984375, "learning_rate": 9.708293337377525e-06, "loss": 17.6321, "step": 99170 }, { "epoch": 0.20034987495808368, "grad_norm": 605.6941528320312, "learning_rate": 9.70817584102362e-06, "loss": 17.3211, "step": 99180 }, { "epoch": 0.2003700755907675, "grad_norm": 528.8156127929688, "learning_rate": 9.70805832172264e-06, "loss": 27.5643, "step": 99190 }, { "epoch": 0.20039027622345132, "grad_norm": 906.8600463867188, "learning_rate": 9.707940779475151e-06, "loss": 33.9335, "step": 99200 }, { "epoch": 0.20041047685613514, "grad_norm": 714.3448486328125, "learning_rate": 9.707823214281733e-06, "loss": 33.7857, "step": 99210 }, { "epoch": 0.20043067748881896, "grad_norm": 255.84066772460938, "learning_rate": 9.707705626142952e-06, "loss": 18.3717, "step": 99220 }, { "epoch": 0.20045087812150278, "grad_norm": 512.1227416992188, "learning_rate": 9.707588015059387e-06, "loss": 20.0914, "step": 99230 }, { "epoch": 0.20047107875418657, "grad_norm": 481.708251953125, "learning_rate": 9.707470381031608e-06, "loss": 44.6957, "step": 99240 }, { "epoch": 0.2004912793868704, "grad_norm": 492.8053283691406, "learning_rate": 9.70735272406019e-06, "loss": 38.131, "step": 99250 }, { "epoch": 0.2005114800195542, "grad_norm": 952.0032958984375, "learning_rate": 9.707235044145707e-06, "loss": 12.7383, "step": 99260 }, { "epoch": 0.20053168065223803, "grad_norm": 206.84864807128906, "learning_rate": 9.707117341288728e-06, "loss": 17.0316, "step": 99270 }, { "epoch": 0.20055188128492185, "grad_norm": 471.7038269042969, "learning_rate": 9.706999615489833e-06, "loss": 22.6234, "step": 99280 }, { "epoch": 0.20057208191760567, "grad_norm": 219.2476806640625, "learning_rate": 9.70688186674959e-06, "loss": 25.6948, "step": 99290 }, { "epoch": 0.20059228255028946, "grad_norm": 506.0567321777344, "learning_rate": 9.706764095068579e-06, "loss": 19.0954, "step": 99300 }, { "epoch": 0.20061248318297328, "grad_norm": 414.366455078125, "learning_rate": 9.706646300447369e-06, "loss": 20.1565, "step": 99310 }, { "epoch": 0.2006326838156571, "grad_norm": 3225.341796875, "learning_rate": 9.706528482886535e-06, "loss": 35.9637, "step": 99320 }, { "epoch": 0.20065288444834092, "grad_norm": 493.09442138671875, "learning_rate": 9.706410642386653e-06, "loss": 25.6267, "step": 99330 }, { "epoch": 0.20067308508102474, "grad_norm": 505.97161865234375, "learning_rate": 9.706292778948297e-06, "loss": 27.1495, "step": 99340 }, { "epoch": 0.20069328571370856, "grad_norm": 230.9978485107422, "learning_rate": 9.706174892572038e-06, "loss": 19.038, "step": 99350 }, { "epoch": 0.20071348634639236, "grad_norm": 768.5806884765625, "learning_rate": 9.706056983258456e-06, "loss": 21.1466, "step": 99360 }, { "epoch": 0.20073368697907618, "grad_norm": 572.5496826171875, "learning_rate": 9.705939051008124e-06, "loss": 17.4252, "step": 99370 }, { "epoch": 0.20075388761176, "grad_norm": 402.9242858886719, "learning_rate": 9.705821095821612e-06, "loss": 15.5765, "step": 99380 }, { "epoch": 0.20077408824444382, "grad_norm": 325.3233947753906, "learning_rate": 9.705703117699501e-06, "loss": 26.812, "step": 99390 }, { "epoch": 0.20079428887712764, "grad_norm": 592.6520385742188, "learning_rate": 9.705585116642364e-06, "loss": 24.3809, "step": 99400 }, { "epoch": 0.20081448950981146, "grad_norm": 264.4601135253906, "learning_rate": 9.705467092650775e-06, "loss": 28.0144, "step": 99410 }, { "epoch": 0.20083469014249528, "grad_norm": 221.36004638671875, "learning_rate": 9.705349045725313e-06, "loss": 20.65, "step": 99420 }, { "epoch": 0.20085489077517907, "grad_norm": 306.7693786621094, "learning_rate": 9.705230975866547e-06, "loss": 23.898, "step": 99430 }, { "epoch": 0.2008750914078629, "grad_norm": 323.7845153808594, "learning_rate": 9.705112883075055e-06, "loss": 21.4143, "step": 99440 }, { "epoch": 0.2008952920405467, "grad_norm": 193.15858459472656, "learning_rate": 9.704994767351417e-06, "loss": 16.4971, "step": 99450 }, { "epoch": 0.20091549267323053, "grad_norm": 1528.001220703125, "learning_rate": 9.704876628696202e-06, "loss": 43.906, "step": 99460 }, { "epoch": 0.20093569330591435, "grad_norm": 119.73884582519531, "learning_rate": 9.70475846710999e-06, "loss": 37.649, "step": 99470 }, { "epoch": 0.20095589393859817, "grad_norm": 131.5335693359375, "learning_rate": 9.704640282593359e-06, "loss": 21.3057, "step": 99480 }, { "epoch": 0.20097609457128196, "grad_norm": 433.1357421875, "learning_rate": 9.704522075146878e-06, "loss": 30.4396, "step": 99490 }, { "epoch": 0.20099629520396578, "grad_norm": 187.129638671875, "learning_rate": 9.704403844771128e-06, "loss": 24.8694, "step": 99500 }, { "epoch": 0.2010164958366496, "grad_norm": 192.597900390625, "learning_rate": 9.704285591466685e-06, "loss": 20.0425, "step": 99510 }, { "epoch": 0.20103669646933342, "grad_norm": 327.7987060546875, "learning_rate": 9.704167315234124e-06, "loss": 48.091, "step": 99520 }, { "epoch": 0.20105689710201724, "grad_norm": 192.59085083007812, "learning_rate": 9.704049016074022e-06, "loss": 38.102, "step": 99530 }, { "epoch": 0.20107709773470106, "grad_norm": 448.1267395019531, "learning_rate": 9.703930693986956e-06, "loss": 23.2346, "step": 99540 }, { "epoch": 0.20109729836738488, "grad_norm": 184.4168701171875, "learning_rate": 9.703812348973501e-06, "loss": 33.5095, "step": 99550 }, { "epoch": 0.20111749900006867, "grad_norm": 636.1046142578125, "learning_rate": 9.703693981034236e-06, "loss": 19.6154, "step": 99560 }, { "epoch": 0.2011376996327525, "grad_norm": 508.3456726074219, "learning_rate": 9.703575590169738e-06, "loss": 25.7803, "step": 99570 }, { "epoch": 0.2011579002654363, "grad_norm": 108.27706909179688, "learning_rate": 9.703457176380581e-06, "loss": 34.7553, "step": 99580 }, { "epoch": 0.20117810089812013, "grad_norm": 348.802734375, "learning_rate": 9.703338739667347e-06, "loss": 23.5485, "step": 99590 }, { "epoch": 0.20119830153080395, "grad_norm": 340.8241271972656, "learning_rate": 9.703220280030607e-06, "loss": 31.4257, "step": 99600 }, { "epoch": 0.20121850216348777, "grad_norm": 675.7904052734375, "learning_rate": 9.703101797470944e-06, "loss": 29.3283, "step": 99610 }, { "epoch": 0.20123870279617156, "grad_norm": 134.0491180419922, "learning_rate": 9.702983291988934e-06, "loss": 48.8887, "step": 99620 }, { "epoch": 0.20125890342885538, "grad_norm": 39.543617248535156, "learning_rate": 9.702864763585152e-06, "loss": 32.0166, "step": 99630 }, { "epoch": 0.2012791040615392, "grad_norm": 569.5358276367188, "learning_rate": 9.702746212260179e-06, "loss": 27.353, "step": 99640 }, { "epoch": 0.20129930469422302, "grad_norm": 528.3767700195312, "learning_rate": 9.70262763801459e-06, "loss": 19.063, "step": 99650 }, { "epoch": 0.20131950532690684, "grad_norm": 269.0294189453125, "learning_rate": 9.702509040848964e-06, "loss": 11.7937, "step": 99660 }, { "epoch": 0.20133970595959066, "grad_norm": 395.1287841796875, "learning_rate": 9.70239042076388e-06, "loss": 17.4976, "step": 99670 }, { "epoch": 0.20135990659227446, "grad_norm": 430.4999694824219, "learning_rate": 9.702271777759915e-06, "loss": 16.4138, "step": 99680 }, { "epoch": 0.20138010722495828, "grad_norm": 642.3116455078125, "learning_rate": 9.70215311183765e-06, "loss": 33.394, "step": 99690 }, { "epoch": 0.2014003078576421, "grad_norm": 56.426029205322266, "learning_rate": 9.702034422997658e-06, "loss": 34.9607, "step": 99700 }, { "epoch": 0.20142050849032592, "grad_norm": 195.6506805419922, "learning_rate": 9.701915711240522e-06, "loss": 25.5187, "step": 99710 }, { "epoch": 0.20144070912300974, "grad_norm": 295.8341369628906, "learning_rate": 9.70179697656682e-06, "loss": 21.67, "step": 99720 }, { "epoch": 0.20146090975569356, "grad_norm": 313.3680419921875, "learning_rate": 9.701678218977128e-06, "loss": 14.2844, "step": 99730 }, { "epoch": 0.20148111038837738, "grad_norm": 195.22238159179688, "learning_rate": 9.701559438472026e-06, "loss": 8.5286, "step": 99740 }, { "epoch": 0.20150131102106117, "grad_norm": 242.9795379638672, "learning_rate": 9.701440635052094e-06, "loss": 17.6431, "step": 99750 }, { "epoch": 0.201521511653745, "grad_norm": 630.5758056640625, "learning_rate": 9.701321808717912e-06, "loss": 17.8251, "step": 99760 }, { "epoch": 0.2015417122864288, "grad_norm": 228.25241088867188, "learning_rate": 9.701202959470057e-06, "loss": 21.8293, "step": 99770 }, { "epoch": 0.20156191291911263, "grad_norm": 413.8714599609375, "learning_rate": 9.70108408730911e-06, "loss": 21.084, "step": 99780 }, { "epoch": 0.20158211355179645, "grad_norm": 391.77960205078125, "learning_rate": 9.700965192235647e-06, "loss": 20.4074, "step": 99790 }, { "epoch": 0.20160231418448027, "grad_norm": 48.752891540527344, "learning_rate": 9.700846274250252e-06, "loss": 9.2901, "step": 99800 }, { "epoch": 0.20162251481716406, "grad_norm": 676.621826171875, "learning_rate": 9.700727333353502e-06, "loss": 18.9555, "step": 99810 }, { "epoch": 0.20164271544984788, "grad_norm": 235.96255493164062, "learning_rate": 9.700608369545976e-06, "loss": 24.6128, "step": 99820 }, { "epoch": 0.2016629160825317, "grad_norm": 1.0006201267242432, "learning_rate": 9.700489382828255e-06, "loss": 23.243, "step": 99830 }, { "epoch": 0.20168311671521552, "grad_norm": 202.7542724609375, "learning_rate": 9.70037037320092e-06, "loss": 66.303, "step": 99840 }, { "epoch": 0.20170331734789934, "grad_norm": 396.8049011230469, "learning_rate": 9.70025134066455e-06, "loss": 20.0084, "step": 99850 }, { "epoch": 0.20172351798058316, "grad_norm": 101.11209869384766, "learning_rate": 9.700132285219724e-06, "loss": 20.172, "step": 99860 }, { "epoch": 0.20174371861326698, "grad_norm": 130.86679077148438, "learning_rate": 9.700013206867022e-06, "loss": 23.5345, "step": 99870 }, { "epoch": 0.20176391924595077, "grad_norm": 274.3761901855469, "learning_rate": 9.699894105607028e-06, "loss": 22.9851, "step": 99880 }, { "epoch": 0.2017841198786346, "grad_norm": 12.90697956085205, "learning_rate": 9.69977498144032e-06, "loss": 43.7818, "step": 99890 }, { "epoch": 0.2018043205113184, "grad_norm": 152.14195251464844, "learning_rate": 9.699655834367479e-06, "loss": 20.2141, "step": 99900 }, { "epoch": 0.20182452114400223, "grad_norm": 338.20098876953125, "learning_rate": 9.699536664389084e-06, "loss": 17.3666, "step": 99910 }, { "epoch": 0.20184472177668605, "grad_norm": 704.154052734375, "learning_rate": 9.699417471505717e-06, "loss": 39.9503, "step": 99920 }, { "epoch": 0.20186492240936987, "grad_norm": 210.18307495117188, "learning_rate": 9.699298255717961e-06, "loss": 21.9548, "step": 99930 }, { "epoch": 0.20188512304205367, "grad_norm": 363.80230712890625, "learning_rate": 9.699179017026395e-06, "loss": 22.7939, "step": 99940 }, { "epoch": 0.20190532367473749, "grad_norm": 642.005126953125, "learning_rate": 9.699059755431599e-06, "loss": 29.3214, "step": 99950 }, { "epoch": 0.2019255243074213, "grad_norm": 624.2991943359375, "learning_rate": 9.698940470934158e-06, "loss": 28.8318, "step": 99960 }, { "epoch": 0.20194572494010513, "grad_norm": 628.0044555664062, "learning_rate": 9.698821163534649e-06, "loss": 29.6279, "step": 99970 }, { "epoch": 0.20196592557278895, "grad_norm": 647.2896728515625, "learning_rate": 9.698701833233654e-06, "loss": 55.589, "step": 99980 }, { "epoch": 0.20198612620547277, "grad_norm": 250.62294006347656, "learning_rate": 9.69858248003176e-06, "loss": 18.9179, "step": 99990 }, { "epoch": 0.20200632683815656, "grad_norm": 320.8274230957031, "learning_rate": 9.698463103929542e-06, "loss": 23.6085, "step": 100000 }, { "epoch": 0.20202652747084038, "grad_norm": 446.7718505859375, "learning_rate": 9.698343704927586e-06, "loss": 34.3639, "step": 100010 }, { "epoch": 0.2020467281035242, "grad_norm": 735.5264892578125, "learning_rate": 9.698224283026473e-06, "loss": 26.1571, "step": 100020 }, { "epoch": 0.20206692873620802, "grad_norm": 922.5233154296875, "learning_rate": 9.698104838226783e-06, "loss": 33.7224, "step": 100030 }, { "epoch": 0.20208712936889184, "grad_norm": 366.1944580078125, "learning_rate": 9.697985370529101e-06, "loss": 16.7596, "step": 100040 }, { "epoch": 0.20210733000157566, "grad_norm": 334.633544921875, "learning_rate": 9.69786587993401e-06, "loss": 13.3622, "step": 100050 }, { "epoch": 0.20212753063425948, "grad_norm": 456.33074951171875, "learning_rate": 9.697746366442087e-06, "loss": 32.405, "step": 100060 }, { "epoch": 0.20214773126694327, "grad_norm": 1194.3699951171875, "learning_rate": 9.69762683005392e-06, "loss": 47.251, "step": 100070 }, { "epoch": 0.2021679318996271, "grad_norm": 388.7727966308594, "learning_rate": 9.69750727077009e-06, "loss": 28.373, "step": 100080 }, { "epoch": 0.2021881325323109, "grad_norm": 24.773212432861328, "learning_rate": 9.697387688591178e-06, "loss": 22.8887, "step": 100090 }, { "epoch": 0.20220833316499473, "grad_norm": 359.620361328125, "learning_rate": 9.697268083517767e-06, "loss": 31.703, "step": 100100 }, { "epoch": 0.20222853379767855, "grad_norm": 653.7416381835938, "learning_rate": 9.697148455550444e-06, "loss": 20.0855, "step": 100110 }, { "epoch": 0.20224873443036237, "grad_norm": 284.58673095703125, "learning_rate": 9.697028804689788e-06, "loss": 15.7838, "step": 100120 }, { "epoch": 0.20226893506304616, "grad_norm": 984.1806030273438, "learning_rate": 9.696909130936382e-06, "loss": 14.0852, "step": 100130 }, { "epoch": 0.20228913569572998, "grad_norm": 153.69190979003906, "learning_rate": 9.696789434290812e-06, "loss": 14.096, "step": 100140 }, { "epoch": 0.2023093363284138, "grad_norm": 258.2850646972656, "learning_rate": 9.696669714753658e-06, "loss": 22.492, "step": 100150 }, { "epoch": 0.20232953696109762, "grad_norm": 748.961669921875, "learning_rate": 9.696549972325509e-06, "loss": 19.6018, "step": 100160 }, { "epoch": 0.20234973759378144, "grad_norm": 287.2615051269531, "learning_rate": 9.696430207006942e-06, "loss": 21.6377, "step": 100170 }, { "epoch": 0.20236993822646526, "grad_norm": 339.321044921875, "learning_rate": 9.696310418798544e-06, "loss": 24.7975, "step": 100180 }, { "epoch": 0.20239013885914908, "grad_norm": 459.3150939941406, "learning_rate": 9.696190607700901e-06, "loss": 16.3885, "step": 100190 }, { "epoch": 0.20241033949183287, "grad_norm": 278.3927917480469, "learning_rate": 9.696070773714592e-06, "loss": 33.2706, "step": 100200 }, { "epoch": 0.2024305401245167, "grad_norm": 347.731689453125, "learning_rate": 9.695950916840204e-06, "loss": 21.2646, "step": 100210 }, { "epoch": 0.20245074075720051, "grad_norm": 593.0798950195312, "learning_rate": 9.695831037078323e-06, "loss": 30.1376, "step": 100220 }, { "epoch": 0.20247094138988433, "grad_norm": 543.983154296875, "learning_rate": 9.695711134429529e-06, "loss": 42.1279, "step": 100230 }, { "epoch": 0.20249114202256815, "grad_norm": 389.13482666015625, "learning_rate": 9.695591208894408e-06, "loss": 23.1487, "step": 100240 }, { "epoch": 0.20251134265525197, "grad_norm": 370.58160400390625, "learning_rate": 9.695471260473546e-06, "loss": 60.422, "step": 100250 }, { "epoch": 0.20253154328793577, "grad_norm": 518.9379272460938, "learning_rate": 9.695351289167527e-06, "loss": 24.1456, "step": 100260 }, { "epoch": 0.2025517439206196, "grad_norm": 840.3296508789062, "learning_rate": 9.695231294976935e-06, "loss": 24.2454, "step": 100270 }, { "epoch": 0.2025719445533034, "grad_norm": 737.5658569335938, "learning_rate": 9.695111277902353e-06, "loss": 29.3572, "step": 100280 }, { "epoch": 0.20259214518598723, "grad_norm": 433.4263000488281, "learning_rate": 9.69499123794437e-06, "loss": 34.5794, "step": 100290 }, { "epoch": 0.20261234581867105, "grad_norm": 123.38919067382812, "learning_rate": 9.69487117510357e-06, "loss": 22.7723, "step": 100300 }, { "epoch": 0.20263254645135487, "grad_norm": 452.074462890625, "learning_rate": 9.694751089380536e-06, "loss": 30.7951, "step": 100310 }, { "epoch": 0.20265274708403866, "grad_norm": 0.0, "learning_rate": 9.694630980775856e-06, "loss": 20.6811, "step": 100320 }, { "epoch": 0.20267294771672248, "grad_norm": 229.5910186767578, "learning_rate": 9.694510849290113e-06, "loss": 29.4656, "step": 100330 }, { "epoch": 0.2026931483494063, "grad_norm": 228.48316955566406, "learning_rate": 9.694390694923893e-06, "loss": 17.5072, "step": 100340 }, { "epoch": 0.20271334898209012, "grad_norm": 553.59521484375, "learning_rate": 9.694270517677782e-06, "loss": 20.3764, "step": 100350 }, { "epoch": 0.20273354961477394, "grad_norm": 463.97491455078125, "learning_rate": 9.694150317552367e-06, "loss": 18.7534, "step": 100360 }, { "epoch": 0.20275375024745776, "grad_norm": 281.0998840332031, "learning_rate": 9.694030094548233e-06, "loss": 31.498, "step": 100370 }, { "epoch": 0.20277395088014158, "grad_norm": 256.85919189453125, "learning_rate": 9.693909848665962e-06, "loss": 13.6459, "step": 100380 }, { "epoch": 0.20279415151282537, "grad_norm": 281.70880126953125, "learning_rate": 9.693789579906147e-06, "loss": 22.8798, "step": 100390 }, { "epoch": 0.2028143521455092, "grad_norm": 316.8731994628906, "learning_rate": 9.693669288269371e-06, "loss": 15.9722, "step": 100400 }, { "epoch": 0.202834552778193, "grad_norm": 503.97418212890625, "learning_rate": 9.69354897375622e-06, "loss": 25.7422, "step": 100410 }, { "epoch": 0.20285475341087683, "grad_norm": 351.5521240234375, "learning_rate": 9.693428636367279e-06, "loss": 22.8897, "step": 100420 }, { "epoch": 0.20287495404356065, "grad_norm": 259.8196716308594, "learning_rate": 9.693308276103136e-06, "loss": 16.2709, "step": 100430 }, { "epoch": 0.20289515467624447, "grad_norm": 556.3468017578125, "learning_rate": 9.693187892964381e-06, "loss": 32.2715, "step": 100440 }, { "epoch": 0.20291535530892826, "grad_norm": 577.9406127929688, "learning_rate": 9.693067486951595e-06, "loss": 44.8964, "step": 100450 }, { "epoch": 0.20293555594161208, "grad_norm": 76.00013732910156, "learning_rate": 9.692947058065367e-06, "loss": 31.2101, "step": 100460 }, { "epoch": 0.2029557565742959, "grad_norm": 668.4077758789062, "learning_rate": 9.692826606306284e-06, "loss": 33.4007, "step": 100470 }, { "epoch": 0.20297595720697972, "grad_norm": 514.4503173828125, "learning_rate": 9.692706131674935e-06, "loss": 31.504, "step": 100480 }, { "epoch": 0.20299615783966354, "grad_norm": 886.607666015625, "learning_rate": 9.692585634171906e-06, "loss": 20.9908, "step": 100490 }, { "epoch": 0.20301635847234736, "grad_norm": 360.0110778808594, "learning_rate": 9.69246511379778e-06, "loss": 19.564, "step": 100500 }, { "epoch": 0.20303655910503116, "grad_norm": 254.813232421875, "learning_rate": 9.692344570553152e-06, "loss": 27.0873, "step": 100510 }, { "epoch": 0.20305675973771498, "grad_norm": 321.7149963378906, "learning_rate": 9.692224004438603e-06, "loss": 15.984, "step": 100520 }, { "epoch": 0.2030769603703988, "grad_norm": 233.30462646484375, "learning_rate": 9.692103415454724e-06, "loss": 29.4682, "step": 100530 }, { "epoch": 0.20309716100308262, "grad_norm": 619.5001220703125, "learning_rate": 9.691982803602102e-06, "loss": 24.5903, "step": 100540 }, { "epoch": 0.20311736163576644, "grad_norm": 566.3729858398438, "learning_rate": 9.691862168881325e-06, "loss": 26.2647, "step": 100550 }, { "epoch": 0.20313756226845026, "grad_norm": 412.0101318359375, "learning_rate": 9.691741511292983e-06, "loss": 16.6728, "step": 100560 }, { "epoch": 0.20315776290113408, "grad_norm": 261.8351135253906, "learning_rate": 9.691620830837659e-06, "loss": 22.1806, "step": 100570 }, { "epoch": 0.20317796353381787, "grad_norm": 200.07791137695312, "learning_rate": 9.691500127515945e-06, "loss": 11.2046, "step": 100580 }, { "epoch": 0.2031981641665017, "grad_norm": 426.7507629394531, "learning_rate": 9.69137940132843e-06, "loss": 16.9677, "step": 100590 }, { "epoch": 0.2032183647991855, "grad_norm": 332.6185302734375, "learning_rate": 9.691258652275698e-06, "loss": 19.8274, "step": 100600 }, { "epoch": 0.20323856543186933, "grad_norm": 490.4311218261719, "learning_rate": 9.691137880358341e-06, "loss": 28.4095, "step": 100610 }, { "epoch": 0.20325876606455315, "grad_norm": 701.2317504882812, "learning_rate": 9.691017085576947e-06, "loss": 26.9267, "step": 100620 }, { "epoch": 0.20327896669723697, "grad_norm": 328.32666015625, "learning_rate": 9.690896267932106e-06, "loss": 16.1199, "step": 100630 }, { "epoch": 0.20329916732992076, "grad_norm": 273.16168212890625, "learning_rate": 9.690775427424406e-06, "loss": 36.8842, "step": 100640 }, { "epoch": 0.20331936796260458, "grad_norm": 854.030517578125, "learning_rate": 9.690654564054433e-06, "loss": 30.4505, "step": 100650 }, { "epoch": 0.2033395685952884, "grad_norm": 509.2734069824219, "learning_rate": 9.69053367782278e-06, "loss": 32.0603, "step": 100660 }, { "epoch": 0.20335976922797222, "grad_norm": 215.01947021484375, "learning_rate": 9.690412768730036e-06, "loss": 11.925, "step": 100670 }, { "epoch": 0.20337996986065604, "grad_norm": 149.21881103515625, "learning_rate": 9.690291836776786e-06, "loss": 15.3797, "step": 100680 }, { "epoch": 0.20340017049333986, "grad_norm": 383.1336669921875, "learning_rate": 9.690170881963624e-06, "loss": 37.7917, "step": 100690 }, { "epoch": 0.20342037112602368, "grad_norm": 0.5593068599700928, "learning_rate": 9.690049904291139e-06, "loss": 22.0224, "step": 100700 }, { "epoch": 0.20344057175870747, "grad_norm": 202.04061889648438, "learning_rate": 9.689928903759918e-06, "loss": 11.615, "step": 100710 }, { "epoch": 0.2034607723913913, "grad_norm": 342.6385498046875, "learning_rate": 9.689807880370554e-06, "loss": 19.7127, "step": 100720 }, { "epoch": 0.2034809730240751, "grad_norm": 284.9398193359375, "learning_rate": 9.689686834123633e-06, "loss": 19.4428, "step": 100730 }, { "epoch": 0.20350117365675893, "grad_norm": 267.9001159667969, "learning_rate": 9.689565765019748e-06, "loss": 21.0667, "step": 100740 }, { "epoch": 0.20352137428944275, "grad_norm": 150.65304565429688, "learning_rate": 9.68944467305949e-06, "loss": 10.7642, "step": 100750 }, { "epoch": 0.20354157492212657, "grad_norm": 760.1095581054688, "learning_rate": 9.689323558243446e-06, "loss": 25.7059, "step": 100760 }, { "epoch": 0.20356177555481036, "grad_norm": 219.99559020996094, "learning_rate": 9.689202420572207e-06, "loss": 25.8721, "step": 100770 }, { "epoch": 0.20358197618749418, "grad_norm": 428.0672912597656, "learning_rate": 9.689081260046365e-06, "loss": 16.5707, "step": 100780 }, { "epoch": 0.203602176820178, "grad_norm": 288.5916442871094, "learning_rate": 9.68896007666651e-06, "loss": 9.0285, "step": 100790 }, { "epoch": 0.20362237745286182, "grad_norm": 189.73159790039062, "learning_rate": 9.68883887043323e-06, "loss": 31.2634, "step": 100800 }, { "epoch": 0.20364257808554564, "grad_norm": 354.1461181640625, "learning_rate": 9.688717641347121e-06, "loss": 30.3951, "step": 100810 }, { "epoch": 0.20366277871822946, "grad_norm": 472.4429626464844, "learning_rate": 9.688596389408769e-06, "loss": 23.1525, "step": 100820 }, { "epoch": 0.20368297935091326, "grad_norm": 579.1621704101562, "learning_rate": 9.688475114618768e-06, "loss": 11.8348, "step": 100830 }, { "epoch": 0.20370317998359708, "grad_norm": 408.7290954589844, "learning_rate": 9.688353816977708e-06, "loss": 26.5684, "step": 100840 }, { "epoch": 0.2037233806162809, "grad_norm": 438.94097900390625, "learning_rate": 9.688232496486179e-06, "loss": 21.479, "step": 100850 }, { "epoch": 0.20374358124896472, "grad_norm": 315.06683349609375, "learning_rate": 9.688111153144775e-06, "loss": 18.5876, "step": 100860 }, { "epoch": 0.20376378188164854, "grad_norm": 368.37176513671875, "learning_rate": 9.687989786954084e-06, "loss": 25.1675, "step": 100870 }, { "epoch": 0.20378398251433236, "grad_norm": 408.96441650390625, "learning_rate": 9.687868397914701e-06, "loss": 28.6644, "step": 100880 }, { "epoch": 0.20380418314701618, "grad_norm": 83.42842864990234, "learning_rate": 9.687746986027215e-06, "loss": 15.1172, "step": 100890 }, { "epoch": 0.20382438377969997, "grad_norm": 778.487548828125, "learning_rate": 9.687625551292219e-06, "loss": 25.876, "step": 100900 }, { "epoch": 0.2038445844123838, "grad_norm": 555.4335327148438, "learning_rate": 9.687504093710304e-06, "loss": 23.7801, "step": 100910 }, { "epoch": 0.2038647850450676, "grad_norm": 340.75274658203125, "learning_rate": 9.687382613282063e-06, "loss": 18.6781, "step": 100920 }, { "epoch": 0.20388498567775143, "grad_norm": 315.389404296875, "learning_rate": 9.687261110008088e-06, "loss": 15.0097, "step": 100930 }, { "epoch": 0.20390518631043525, "grad_norm": 344.5794982910156, "learning_rate": 9.687139583888971e-06, "loss": 41.7478, "step": 100940 }, { "epoch": 0.20392538694311907, "grad_norm": 367.5317687988281, "learning_rate": 9.687018034925304e-06, "loss": 36.2405, "step": 100950 }, { "epoch": 0.20394558757580286, "grad_norm": 930.07080078125, "learning_rate": 9.686896463117679e-06, "loss": 24.3968, "step": 100960 }, { "epoch": 0.20396578820848668, "grad_norm": 676.9625854492188, "learning_rate": 9.68677486846669e-06, "loss": 34.5976, "step": 100970 }, { "epoch": 0.2039859888411705, "grad_norm": 263.13287353515625, "learning_rate": 9.686653250972928e-06, "loss": 13.8814, "step": 100980 }, { "epoch": 0.20400618947385432, "grad_norm": 263.9481201171875, "learning_rate": 9.686531610636986e-06, "loss": 14.7206, "step": 100990 }, { "epoch": 0.20402639010653814, "grad_norm": 411.3809509277344, "learning_rate": 9.68640994745946e-06, "loss": 24.1162, "step": 101000 }, { "epoch": 0.20404659073922196, "grad_norm": 208.5432891845703, "learning_rate": 9.686288261440937e-06, "loss": 20.4968, "step": 101010 }, { "epoch": 0.20406679137190578, "grad_norm": 348.7425231933594, "learning_rate": 9.686166552582015e-06, "loss": 18.1232, "step": 101020 }, { "epoch": 0.20408699200458957, "grad_norm": 688.8912353515625, "learning_rate": 9.686044820883284e-06, "loss": 17.6353, "step": 101030 }, { "epoch": 0.2041071926372734, "grad_norm": 166.44342041015625, "learning_rate": 9.68592306634534e-06, "loss": 10.5638, "step": 101040 }, { "epoch": 0.2041273932699572, "grad_norm": 343.3615417480469, "learning_rate": 9.685801288968777e-06, "loss": 31.3551, "step": 101050 }, { "epoch": 0.20414759390264103, "grad_norm": 363.5527038574219, "learning_rate": 9.685679488754184e-06, "loss": 16.4418, "step": 101060 }, { "epoch": 0.20416779453532485, "grad_norm": 626.4178466796875, "learning_rate": 9.685557665702158e-06, "loss": 25.6591, "step": 101070 }, { "epoch": 0.20418799516800867, "grad_norm": 433.0784912109375, "learning_rate": 9.685435819813294e-06, "loss": 25.233, "step": 101080 }, { "epoch": 0.20420819580069247, "grad_norm": 107.19353485107422, "learning_rate": 9.685313951088184e-06, "loss": 15.3305, "step": 101090 }, { "epoch": 0.20422839643337629, "grad_norm": 315.6756286621094, "learning_rate": 9.68519205952742e-06, "loss": 37.633, "step": 101100 }, { "epoch": 0.2042485970660601, "grad_norm": 483.7098083496094, "learning_rate": 9.6850701451316e-06, "loss": 26.4755, "step": 101110 }, { "epoch": 0.20426879769874393, "grad_norm": 349.6911926269531, "learning_rate": 9.684948207901315e-06, "loss": 20.3785, "step": 101120 }, { "epoch": 0.20428899833142775, "grad_norm": 40.62013244628906, "learning_rate": 9.684826247837162e-06, "loss": 16.121, "step": 101130 }, { "epoch": 0.20430919896411157, "grad_norm": 1106.7093505859375, "learning_rate": 9.684704264939734e-06, "loss": 29.3836, "step": 101140 }, { "epoch": 0.20432939959679536, "grad_norm": 663.3167114257812, "learning_rate": 9.684582259209625e-06, "loss": 14.8486, "step": 101150 }, { "epoch": 0.20434960022947918, "grad_norm": 268.1558532714844, "learning_rate": 9.68446023064743e-06, "loss": 16.3439, "step": 101160 }, { "epoch": 0.204369800862163, "grad_norm": 866.4666137695312, "learning_rate": 9.684338179253744e-06, "loss": 16.8831, "step": 101170 }, { "epoch": 0.20439000149484682, "grad_norm": 120.70707702636719, "learning_rate": 9.684216105029163e-06, "loss": 21.0449, "step": 101180 }, { "epoch": 0.20441020212753064, "grad_norm": 683.2108764648438, "learning_rate": 9.684094007974278e-06, "loss": 18.4854, "step": 101190 }, { "epoch": 0.20443040276021446, "grad_norm": 653.0943603515625, "learning_rate": 9.68397188808969e-06, "loss": 26.5146, "step": 101200 }, { "epoch": 0.20445060339289828, "grad_norm": 258.76812744140625, "learning_rate": 9.683849745375991e-06, "loss": 28.7401, "step": 101210 }, { "epoch": 0.20447080402558207, "grad_norm": 331.1708984375, "learning_rate": 9.683727579833776e-06, "loss": 23.6947, "step": 101220 }, { "epoch": 0.2044910046582659, "grad_norm": 329.5214538574219, "learning_rate": 9.68360539146364e-06, "loss": 22.2485, "step": 101230 }, { "epoch": 0.2045112052909497, "grad_norm": 253.5193328857422, "learning_rate": 9.683483180266179e-06, "loss": 27.205, "step": 101240 }, { "epoch": 0.20453140592363353, "grad_norm": 1097.482177734375, "learning_rate": 9.683360946241988e-06, "loss": 31.314, "step": 101250 }, { "epoch": 0.20455160655631735, "grad_norm": 208.3839111328125, "learning_rate": 9.683238689391667e-06, "loss": 17.2174, "step": 101260 }, { "epoch": 0.20457180718900117, "grad_norm": 184.15036010742188, "learning_rate": 9.683116409715807e-06, "loss": 35.4615, "step": 101270 }, { "epoch": 0.20459200782168496, "grad_norm": 486.5890197753906, "learning_rate": 9.682994107215005e-06, "loss": 22.9579, "step": 101280 }, { "epoch": 0.20461220845436878, "grad_norm": 431.9108581542969, "learning_rate": 9.682871781889858e-06, "loss": 21.2137, "step": 101290 }, { "epoch": 0.2046324090870526, "grad_norm": 182.5166015625, "learning_rate": 9.682749433740963e-06, "loss": 37.8259, "step": 101300 }, { "epoch": 0.20465260971973642, "grad_norm": 438.4036865234375, "learning_rate": 9.682627062768914e-06, "loss": 17.6295, "step": 101310 }, { "epoch": 0.20467281035242024, "grad_norm": 309.7554931640625, "learning_rate": 9.682504668974308e-06, "loss": 16.9148, "step": 101320 }, { "epoch": 0.20469301098510406, "grad_norm": 393.6847229003906, "learning_rate": 9.682382252357745e-06, "loss": 18.2264, "step": 101330 }, { "epoch": 0.20471321161778788, "grad_norm": 294.20904541015625, "learning_rate": 9.682259812919817e-06, "loss": 25.2018, "step": 101340 }, { "epoch": 0.20473341225047167, "grad_norm": 300.732421875, "learning_rate": 9.682137350661123e-06, "loss": 17.3774, "step": 101350 }, { "epoch": 0.2047536128831555, "grad_norm": 423.7529602050781, "learning_rate": 9.682014865582259e-06, "loss": 17.0439, "step": 101360 }, { "epoch": 0.20477381351583931, "grad_norm": 279.9488220214844, "learning_rate": 9.681892357683822e-06, "loss": 30.9309, "step": 101370 }, { "epoch": 0.20479401414852313, "grad_norm": 1027.737060546875, "learning_rate": 9.68176982696641e-06, "loss": 30.3881, "step": 101380 }, { "epoch": 0.20481421478120695, "grad_norm": 660.5496215820312, "learning_rate": 9.681647273430618e-06, "loss": 41.3444, "step": 101390 }, { "epoch": 0.20483441541389077, "grad_norm": 818.9313354492188, "learning_rate": 9.681524697077047e-06, "loss": 19.0959, "step": 101400 }, { "epoch": 0.20485461604657457, "grad_norm": 647.1447143554688, "learning_rate": 9.681402097906293e-06, "loss": 19.1615, "step": 101410 }, { "epoch": 0.2048748166792584, "grad_norm": 243.00503540039062, "learning_rate": 9.681279475918952e-06, "loss": 24.1584, "step": 101420 }, { "epoch": 0.2048950173119422, "grad_norm": 49.23984146118164, "learning_rate": 9.681156831115622e-06, "loss": 29.7425, "step": 101430 }, { "epoch": 0.20491521794462603, "grad_norm": 252.69985961914062, "learning_rate": 9.681034163496902e-06, "loss": 26.8188, "step": 101440 }, { "epoch": 0.20493541857730985, "grad_norm": 206.2428741455078, "learning_rate": 9.68091147306339e-06, "loss": 29.8481, "step": 101450 }, { "epoch": 0.20495561920999367, "grad_norm": 156.89279174804688, "learning_rate": 9.680788759815682e-06, "loss": 8.749, "step": 101460 }, { "epoch": 0.20497581984267746, "grad_norm": 670.9077758789062, "learning_rate": 9.680666023754377e-06, "loss": 21.0357, "step": 101470 }, { "epoch": 0.20499602047536128, "grad_norm": 494.2296447753906, "learning_rate": 9.680543264880075e-06, "loss": 14.5555, "step": 101480 }, { "epoch": 0.2050162211080451, "grad_norm": 604.6879272460938, "learning_rate": 9.680420483193371e-06, "loss": 25.0197, "step": 101490 }, { "epoch": 0.20503642174072892, "grad_norm": 15.526825904846191, "learning_rate": 9.680297678694867e-06, "loss": 21.2223, "step": 101500 }, { "epoch": 0.20505662237341274, "grad_norm": 267.8724060058594, "learning_rate": 9.680174851385158e-06, "loss": 16.4051, "step": 101510 }, { "epoch": 0.20507682300609656, "grad_norm": 316.9556579589844, "learning_rate": 9.680052001264847e-06, "loss": 24.9228, "step": 101520 }, { "epoch": 0.20509702363878038, "grad_norm": 198.80479431152344, "learning_rate": 9.679929128334529e-06, "loss": 41.2172, "step": 101530 }, { "epoch": 0.20511722427146417, "grad_norm": 244.0272674560547, "learning_rate": 9.679806232594803e-06, "loss": 10.5038, "step": 101540 }, { "epoch": 0.205137424904148, "grad_norm": 1116.754150390625, "learning_rate": 9.67968331404627e-06, "loss": 19.753, "step": 101550 }, { "epoch": 0.2051576255368318, "grad_norm": 508.11761474609375, "learning_rate": 9.679560372689527e-06, "loss": 40.9566, "step": 101560 }, { "epoch": 0.20517782616951563, "grad_norm": 520.5548095703125, "learning_rate": 9.679437408525175e-06, "loss": 21.2994, "step": 101570 }, { "epoch": 0.20519802680219945, "grad_norm": 441.4461975097656, "learning_rate": 9.679314421553814e-06, "loss": 22.9699, "step": 101580 }, { "epoch": 0.20521822743488327, "grad_norm": 219.525146484375, "learning_rate": 9.67919141177604e-06, "loss": 22.78, "step": 101590 }, { "epoch": 0.20523842806756706, "grad_norm": 224.53749084472656, "learning_rate": 9.679068379192455e-06, "loss": 20.2164, "step": 101600 }, { "epoch": 0.20525862870025088, "grad_norm": 228.8566131591797, "learning_rate": 9.67894532380366e-06, "loss": 32.5503, "step": 101610 }, { "epoch": 0.2052788293329347, "grad_norm": 283.5226745605469, "learning_rate": 9.67882224561025e-06, "loss": 22.461, "step": 101620 }, { "epoch": 0.20529902996561852, "grad_norm": 385.9081115722656, "learning_rate": 9.678699144612829e-06, "loss": 20.0961, "step": 101630 }, { "epoch": 0.20531923059830234, "grad_norm": 375.8223876953125, "learning_rate": 9.678576020811996e-06, "loss": 13.825, "step": 101640 }, { "epoch": 0.20533943123098616, "grad_norm": 580.4359130859375, "learning_rate": 9.678452874208352e-06, "loss": 17.9582, "step": 101650 }, { "epoch": 0.20535963186366998, "grad_norm": 652.5196533203125, "learning_rate": 9.678329704802495e-06, "loss": 18.9625, "step": 101660 }, { "epoch": 0.20537983249635378, "grad_norm": 501.9729309082031, "learning_rate": 9.678206512595027e-06, "loss": 32.7783, "step": 101670 }, { "epoch": 0.2054000331290376, "grad_norm": 892.1295776367188, "learning_rate": 9.678083297586547e-06, "loss": 28.2075, "step": 101680 }, { "epoch": 0.20542023376172142, "grad_norm": 307.83026123046875, "learning_rate": 9.677960059777656e-06, "loss": 28.0779, "step": 101690 }, { "epoch": 0.20544043439440524, "grad_norm": 390.3979187011719, "learning_rate": 9.677836799168958e-06, "loss": 36.6936, "step": 101700 }, { "epoch": 0.20546063502708906, "grad_norm": 764.1943969726562, "learning_rate": 9.677713515761046e-06, "loss": 36.1618, "step": 101710 }, { "epoch": 0.20548083565977288, "grad_norm": 767.7238159179688, "learning_rate": 9.677590209554531e-06, "loss": 29.0246, "step": 101720 }, { "epoch": 0.20550103629245667, "grad_norm": 894.7352905273438, "learning_rate": 9.677466880550004e-06, "loss": 29.4374, "step": 101730 }, { "epoch": 0.2055212369251405, "grad_norm": 228.73587036132812, "learning_rate": 9.677343528748073e-06, "loss": 23.9688, "step": 101740 }, { "epoch": 0.2055414375578243, "grad_norm": 550.2158203125, "learning_rate": 9.677220154149338e-06, "loss": 35.1127, "step": 101750 }, { "epoch": 0.20556163819050813, "grad_norm": 912.5822143554688, "learning_rate": 9.677096756754397e-06, "loss": 26.836, "step": 101760 }, { "epoch": 0.20558183882319195, "grad_norm": 1875.5404052734375, "learning_rate": 9.676973336563856e-06, "loss": 32.0896, "step": 101770 }, { "epoch": 0.20560203945587577, "grad_norm": 644.2416381835938, "learning_rate": 9.676849893578312e-06, "loss": 23.7709, "step": 101780 }, { "epoch": 0.20562224008855956, "grad_norm": 529.8448486328125, "learning_rate": 9.67672642779837e-06, "loss": 20.3778, "step": 101790 }, { "epoch": 0.20564244072124338, "grad_norm": 184.81747436523438, "learning_rate": 9.67660293922463e-06, "loss": 19.5761, "step": 101800 }, { "epoch": 0.2056626413539272, "grad_norm": 372.7962341308594, "learning_rate": 9.676479427857694e-06, "loss": 34.0516, "step": 101810 }, { "epoch": 0.20568284198661102, "grad_norm": 780.0181274414062, "learning_rate": 9.676355893698165e-06, "loss": 27.3943, "step": 101820 }, { "epoch": 0.20570304261929484, "grad_norm": 439.5355224609375, "learning_rate": 9.676232336746645e-06, "loss": 23.2605, "step": 101830 }, { "epoch": 0.20572324325197866, "grad_norm": 213.00759887695312, "learning_rate": 9.676108757003735e-06, "loss": 20.3967, "step": 101840 }, { "epoch": 0.20574344388466248, "grad_norm": 1251.4813232421875, "learning_rate": 9.67598515447004e-06, "loss": 47.155, "step": 101850 }, { "epoch": 0.20576364451734627, "grad_norm": 398.27423095703125, "learning_rate": 9.67586152914616e-06, "loss": 36.8861, "step": 101860 }, { "epoch": 0.2057838451500301, "grad_norm": 123.7984848022461, "learning_rate": 9.675737881032696e-06, "loss": 14.2424, "step": 101870 }, { "epoch": 0.2058040457827139, "grad_norm": 408.1427001953125, "learning_rate": 9.675614210130252e-06, "loss": 46.8987, "step": 101880 }, { "epoch": 0.20582424641539773, "grad_norm": 766.0629272460938, "learning_rate": 9.675490516439434e-06, "loss": 31.8348, "step": 101890 }, { "epoch": 0.20584444704808155, "grad_norm": 284.36663818359375, "learning_rate": 9.675366799960842e-06, "loss": 23.4975, "step": 101900 }, { "epoch": 0.20586464768076537, "grad_norm": 284.55548095703125, "learning_rate": 9.675243060695079e-06, "loss": 24.3463, "step": 101910 }, { "epoch": 0.20588484831344916, "grad_norm": 422.52703857421875, "learning_rate": 9.675119298642748e-06, "loss": 38.6667, "step": 101920 }, { "epoch": 0.20590504894613298, "grad_norm": 430.506103515625, "learning_rate": 9.674995513804452e-06, "loss": 23.379, "step": 101930 }, { "epoch": 0.2059252495788168, "grad_norm": 910.5701904296875, "learning_rate": 9.674871706180796e-06, "loss": 22.0203, "step": 101940 }, { "epoch": 0.20594545021150062, "grad_norm": 1329.03125, "learning_rate": 9.674747875772381e-06, "loss": 19.5187, "step": 101950 }, { "epoch": 0.20596565084418444, "grad_norm": 2294.890625, "learning_rate": 9.674624022579814e-06, "loss": 26.2692, "step": 101960 }, { "epoch": 0.20598585147686826, "grad_norm": 377.37701416015625, "learning_rate": 9.674500146603695e-06, "loss": 17.2762, "step": 101970 }, { "epoch": 0.20600605210955208, "grad_norm": 270.7948303222656, "learning_rate": 9.674376247844628e-06, "loss": 16.6206, "step": 101980 }, { "epoch": 0.20602625274223588, "grad_norm": 708.5576782226562, "learning_rate": 9.67425232630322e-06, "loss": 27.1679, "step": 101990 }, { "epoch": 0.2060464533749197, "grad_norm": 372.044677734375, "learning_rate": 9.674128381980073e-06, "loss": 29.2218, "step": 102000 }, { "epoch": 0.20606665400760352, "grad_norm": 515.59228515625, "learning_rate": 9.67400441487579e-06, "loss": 18.0636, "step": 102010 }, { "epoch": 0.20608685464028734, "grad_norm": 312.6241760253906, "learning_rate": 9.673880424990978e-06, "loss": 33.0065, "step": 102020 }, { "epoch": 0.20610705527297116, "grad_norm": 420.17803955078125, "learning_rate": 9.673756412326238e-06, "loss": 17.6985, "step": 102030 }, { "epoch": 0.20612725590565498, "grad_norm": 106.03390502929688, "learning_rate": 9.673632376882178e-06, "loss": 20.5482, "step": 102040 }, { "epoch": 0.20614745653833877, "grad_norm": 212.96438598632812, "learning_rate": 9.673508318659399e-06, "loss": 15.4148, "step": 102050 }, { "epoch": 0.2061676571710226, "grad_norm": 926.5879516601562, "learning_rate": 9.673384237658508e-06, "loss": 27.9721, "step": 102060 }, { "epoch": 0.2061878578037064, "grad_norm": 431.341064453125, "learning_rate": 9.67326013388011e-06, "loss": 13.3387, "step": 102070 }, { "epoch": 0.20620805843639023, "grad_norm": 318.4190979003906, "learning_rate": 9.673136007324806e-06, "loss": 25.5216, "step": 102080 }, { "epoch": 0.20622825906907405, "grad_norm": 886.5811157226562, "learning_rate": 9.673011857993207e-06, "loss": 25.061, "step": 102090 }, { "epoch": 0.20624845970175787, "grad_norm": 642.0043334960938, "learning_rate": 9.672887685885913e-06, "loss": 24.7124, "step": 102100 }, { "epoch": 0.20626866033444166, "grad_norm": 0.0, "learning_rate": 9.672763491003531e-06, "loss": 26.6625, "step": 102110 }, { "epoch": 0.20628886096712548, "grad_norm": 323.4170227050781, "learning_rate": 9.672639273346668e-06, "loss": 19.3366, "step": 102120 }, { "epoch": 0.2063090615998093, "grad_norm": 511.27362060546875, "learning_rate": 9.672515032915926e-06, "loss": 29.8732, "step": 102130 }, { "epoch": 0.20632926223249312, "grad_norm": 131.7635498046875, "learning_rate": 9.672390769711914e-06, "loss": 16.1435, "step": 102140 }, { "epoch": 0.20634946286517694, "grad_norm": 455.59521484375, "learning_rate": 9.672266483735235e-06, "loss": 26.799, "step": 102150 }, { "epoch": 0.20636966349786076, "grad_norm": 781.8112182617188, "learning_rate": 9.672142174986497e-06, "loss": 32.1946, "step": 102160 }, { "epoch": 0.20638986413054458, "grad_norm": 287.041748046875, "learning_rate": 9.672017843466305e-06, "loss": 49.6158, "step": 102170 }, { "epoch": 0.20641006476322837, "grad_norm": 883.7597045898438, "learning_rate": 9.671893489175263e-06, "loss": 38.0351, "step": 102180 }, { "epoch": 0.2064302653959122, "grad_norm": 295.4107666015625, "learning_rate": 9.67176911211398e-06, "loss": 42.8857, "step": 102190 }, { "epoch": 0.206450466028596, "grad_norm": 913.2815551757812, "learning_rate": 9.671644712283061e-06, "loss": 26.6924, "step": 102200 }, { "epoch": 0.20647066666127983, "grad_norm": 803.0249633789062, "learning_rate": 9.671520289683112e-06, "loss": 13.8448, "step": 102210 }, { "epoch": 0.20649086729396365, "grad_norm": 93.17762756347656, "learning_rate": 9.671395844314739e-06, "loss": 18.475, "step": 102220 }, { "epoch": 0.20651106792664747, "grad_norm": 508.1399230957031, "learning_rate": 9.67127137617855e-06, "loss": 26.1959, "step": 102230 }, { "epoch": 0.20653126855933127, "grad_norm": 118.68540954589844, "learning_rate": 9.67114688527515e-06, "loss": 18.8665, "step": 102240 }, { "epoch": 0.20655146919201509, "grad_norm": 76.24480438232422, "learning_rate": 9.671022371605148e-06, "loss": 26.302, "step": 102250 }, { "epoch": 0.2065716698246989, "grad_norm": 771.3875122070312, "learning_rate": 9.670897835169149e-06, "loss": 26.3433, "step": 102260 }, { "epoch": 0.20659187045738273, "grad_norm": 698.0466918945312, "learning_rate": 9.67077327596776e-06, "loss": 21.2148, "step": 102270 }, { "epoch": 0.20661207109006655, "grad_norm": 216.55682373046875, "learning_rate": 9.67064869400159e-06, "loss": 30.4819, "step": 102280 }, { "epoch": 0.20663227172275037, "grad_norm": 1432.4974365234375, "learning_rate": 9.670524089271242e-06, "loss": 48.6451, "step": 102290 }, { "epoch": 0.20665247235543419, "grad_norm": 580.0415649414062, "learning_rate": 9.670399461777328e-06, "loss": 17.0973, "step": 102300 }, { "epoch": 0.20667267298811798, "grad_norm": 731.4429321289062, "learning_rate": 9.670274811520454e-06, "loss": 18.9645, "step": 102310 }, { "epoch": 0.2066928736208018, "grad_norm": 345.7098388671875, "learning_rate": 9.670150138501226e-06, "loss": 23.997, "step": 102320 }, { "epoch": 0.20671307425348562, "grad_norm": 614.6451416015625, "learning_rate": 9.670025442720253e-06, "loss": 22.9857, "step": 102330 }, { "epoch": 0.20673327488616944, "grad_norm": 97.4102783203125, "learning_rate": 9.669900724178142e-06, "loss": 16.508, "step": 102340 }, { "epoch": 0.20675347551885326, "grad_norm": 477.67340087890625, "learning_rate": 9.6697759828755e-06, "loss": 39.7115, "step": 102350 }, { "epoch": 0.20677367615153708, "grad_norm": 302.7767639160156, "learning_rate": 9.669651218812938e-06, "loss": 29.2609, "step": 102360 }, { "epoch": 0.20679387678422087, "grad_norm": 336.0804138183594, "learning_rate": 9.669526431991063e-06, "loss": 19.4055, "step": 102370 }, { "epoch": 0.2068140774169047, "grad_norm": 560.2570190429688, "learning_rate": 9.669401622410481e-06, "loss": 23.5082, "step": 102380 }, { "epoch": 0.2068342780495885, "grad_norm": 868.6182861328125, "learning_rate": 9.669276790071803e-06, "loss": 40.4677, "step": 102390 }, { "epoch": 0.20685447868227233, "grad_norm": 604.2676391601562, "learning_rate": 9.669151934975635e-06, "loss": 20.6695, "step": 102400 }, { "epoch": 0.20687467931495615, "grad_norm": 791.1727905273438, "learning_rate": 9.669027057122586e-06, "loss": 29.302, "step": 102410 }, { "epoch": 0.20689487994763997, "grad_norm": 75.31989288330078, "learning_rate": 9.668902156513268e-06, "loss": 12.0986, "step": 102420 }, { "epoch": 0.20691508058032376, "grad_norm": 500.6473693847656, "learning_rate": 9.668777233148285e-06, "loss": 19.4095, "step": 102430 }, { "epoch": 0.20693528121300758, "grad_norm": 233.5106201171875, "learning_rate": 9.668652287028249e-06, "loss": 31.9816, "step": 102440 }, { "epoch": 0.2069554818456914, "grad_norm": 2282.65478515625, "learning_rate": 9.668527318153769e-06, "loss": 35.4047, "step": 102450 }, { "epoch": 0.20697568247837522, "grad_norm": 301.068359375, "learning_rate": 9.66840232652545e-06, "loss": 37.4903, "step": 102460 }, { "epoch": 0.20699588311105904, "grad_norm": 380.9482421875, "learning_rate": 9.668277312143908e-06, "loss": 18.6953, "step": 102470 }, { "epoch": 0.20701608374374286, "grad_norm": 339.640380859375, "learning_rate": 9.668152275009747e-06, "loss": 26.4746, "step": 102480 }, { "epoch": 0.20703628437642668, "grad_norm": 325.6827087402344, "learning_rate": 9.668027215123577e-06, "loss": 26.1166, "step": 102490 }, { "epoch": 0.20705648500911047, "grad_norm": 452.2302551269531, "learning_rate": 9.667902132486009e-06, "loss": 27.4258, "step": 102500 }, { "epoch": 0.2070766856417943, "grad_norm": 462.70391845703125, "learning_rate": 9.667777027097654e-06, "loss": 20.7429, "step": 102510 }, { "epoch": 0.20709688627447811, "grad_norm": 277.04974365234375, "learning_rate": 9.667651898959118e-06, "loss": 22.5922, "step": 102520 }, { "epoch": 0.20711708690716193, "grad_norm": 276.8566589355469, "learning_rate": 9.667526748071013e-06, "loss": 17.4676, "step": 102530 }, { "epoch": 0.20713728753984575, "grad_norm": 312.2072448730469, "learning_rate": 9.667401574433948e-06, "loss": 24.4108, "step": 102540 }, { "epoch": 0.20715748817252957, "grad_norm": 303.51275634765625, "learning_rate": 9.667276378048535e-06, "loss": 17.7054, "step": 102550 }, { "epoch": 0.20717768880521337, "grad_norm": 516.8126220703125, "learning_rate": 9.667151158915382e-06, "loss": 21.9469, "step": 102560 }, { "epoch": 0.2071978894378972, "grad_norm": 469.1775817871094, "learning_rate": 9.667025917035102e-06, "loss": 16.2955, "step": 102570 }, { "epoch": 0.207218090070581, "grad_norm": 346.0306701660156, "learning_rate": 9.666900652408302e-06, "loss": 17.62, "step": 102580 }, { "epoch": 0.20723829070326483, "grad_norm": 314.55615234375, "learning_rate": 9.666775365035596e-06, "loss": 25.255, "step": 102590 }, { "epoch": 0.20725849133594865, "grad_norm": 311.3092956542969, "learning_rate": 9.666650054917591e-06, "loss": 27.1351, "step": 102600 }, { "epoch": 0.20727869196863247, "grad_norm": 420.1705322265625, "learning_rate": 9.666524722054902e-06, "loss": 22.8531, "step": 102610 }, { "epoch": 0.2072988926013163, "grad_norm": 86.36851501464844, "learning_rate": 9.666399366448135e-06, "loss": 12.1727, "step": 102620 }, { "epoch": 0.20731909323400008, "grad_norm": 659.5982055664062, "learning_rate": 9.666273988097904e-06, "loss": 30.465, "step": 102630 }, { "epoch": 0.2073392938666839, "grad_norm": 325.5073547363281, "learning_rate": 9.66614858700482e-06, "loss": 22.9632, "step": 102640 }, { "epoch": 0.20735949449936772, "grad_norm": 755.0143432617188, "learning_rate": 9.666023163169493e-06, "loss": 30.1206, "step": 102650 }, { "epoch": 0.20737969513205154, "grad_norm": 72.33990478515625, "learning_rate": 9.665897716592537e-06, "loss": 13.5545, "step": 102660 }, { "epoch": 0.20739989576473536, "grad_norm": 255.3713836669922, "learning_rate": 9.66577224727456e-06, "loss": 35.5251, "step": 102670 }, { "epoch": 0.20742009639741918, "grad_norm": 615.615478515625, "learning_rate": 9.665646755216175e-06, "loss": 23.9089, "step": 102680 }, { "epoch": 0.20744029703010297, "grad_norm": 419.4078369140625, "learning_rate": 9.665521240417993e-06, "loss": 23.9347, "step": 102690 }, { "epoch": 0.2074604976627868, "grad_norm": 367.4272766113281, "learning_rate": 9.665395702880627e-06, "loss": 24.4317, "step": 102700 }, { "epoch": 0.2074806982954706, "grad_norm": 671.7747192382812, "learning_rate": 9.665270142604688e-06, "loss": 13.4263, "step": 102710 }, { "epoch": 0.20750089892815443, "grad_norm": 589.3701171875, "learning_rate": 9.665144559590789e-06, "loss": 18.6563, "step": 102720 }, { "epoch": 0.20752109956083825, "grad_norm": 583.2495727539062, "learning_rate": 9.66501895383954e-06, "loss": 22.839, "step": 102730 }, { "epoch": 0.20754130019352207, "grad_norm": 1090.4007568359375, "learning_rate": 9.664893325351556e-06, "loss": 30.8803, "step": 102740 }, { "epoch": 0.20756150082620586, "grad_norm": 237.1215057373047, "learning_rate": 9.664767674127447e-06, "loss": 31.7639, "step": 102750 }, { "epoch": 0.20758170145888968, "grad_norm": 187.4674072265625, "learning_rate": 9.664642000167825e-06, "loss": 19.2407, "step": 102760 }, { "epoch": 0.2076019020915735, "grad_norm": 650.6720581054688, "learning_rate": 9.664516303473305e-06, "loss": 27.2862, "step": 102770 }, { "epoch": 0.20762210272425732, "grad_norm": 462.8194885253906, "learning_rate": 9.664390584044497e-06, "loss": 31.5226, "step": 102780 }, { "epoch": 0.20764230335694114, "grad_norm": 520.2129516601562, "learning_rate": 9.664264841882016e-06, "loss": 20.4408, "step": 102790 }, { "epoch": 0.20766250398962496, "grad_norm": 231.10311889648438, "learning_rate": 9.664139076986473e-06, "loss": 16.1323, "step": 102800 }, { "epoch": 0.20768270462230878, "grad_norm": 754.71533203125, "learning_rate": 9.664013289358483e-06, "loss": 30.6725, "step": 102810 }, { "epoch": 0.20770290525499258, "grad_norm": 328.50787353515625, "learning_rate": 9.663887478998657e-06, "loss": 23.7939, "step": 102820 }, { "epoch": 0.2077231058876764, "grad_norm": 608.3097534179688, "learning_rate": 9.66376164590761e-06, "loss": 26.8249, "step": 102830 }, { "epoch": 0.20774330652036022, "grad_norm": 566.4948120117188, "learning_rate": 9.663635790085954e-06, "loss": 34.519, "step": 102840 }, { "epoch": 0.20776350715304404, "grad_norm": 39.48759841918945, "learning_rate": 9.663509911534302e-06, "loss": 21.2754, "step": 102850 }, { "epoch": 0.20778370778572786, "grad_norm": 789.2013549804688, "learning_rate": 9.663384010253269e-06, "loss": 25.6627, "step": 102860 }, { "epoch": 0.20780390841841168, "grad_norm": 213.62619018554688, "learning_rate": 9.663258086243468e-06, "loss": 14.5341, "step": 102870 }, { "epoch": 0.20782410905109547, "grad_norm": 358.7830505371094, "learning_rate": 9.663132139505513e-06, "loss": 14.7057, "step": 102880 }, { "epoch": 0.2078443096837793, "grad_norm": 541.4920043945312, "learning_rate": 9.663006170040016e-06, "loss": 17.3757, "step": 102890 }, { "epoch": 0.2078645103164631, "grad_norm": 871.7129516601562, "learning_rate": 9.662880177847595e-06, "loss": 22.9835, "step": 102900 }, { "epoch": 0.20788471094914693, "grad_norm": 420.2844543457031, "learning_rate": 9.66275416292886e-06, "loss": 17.0057, "step": 102910 }, { "epoch": 0.20790491158183075, "grad_norm": 362.6865234375, "learning_rate": 9.662628125284426e-06, "loss": 27.016, "step": 102920 }, { "epoch": 0.20792511221451457, "grad_norm": 25.914304733276367, "learning_rate": 9.66250206491491e-06, "loss": 29.3879, "step": 102930 }, { "epoch": 0.2079453128471984, "grad_norm": 1035.3394775390625, "learning_rate": 9.662375981820921e-06, "loss": 46.7387, "step": 102940 }, { "epoch": 0.20796551347988218, "grad_norm": 455.292236328125, "learning_rate": 9.66224987600308e-06, "loss": 14.5877, "step": 102950 }, { "epoch": 0.207985714112566, "grad_norm": 517.5321655273438, "learning_rate": 9.662123747461998e-06, "loss": 24.5114, "step": 102960 }, { "epoch": 0.20800591474524982, "grad_norm": 438.7643737792969, "learning_rate": 9.66199759619829e-06, "loss": 39.6918, "step": 102970 }, { "epoch": 0.20802611537793364, "grad_norm": 243.06369018554688, "learning_rate": 9.661871422212572e-06, "loss": 22.8764, "step": 102980 }, { "epoch": 0.20804631601061746, "grad_norm": 571.2363891601562, "learning_rate": 9.661745225505457e-06, "loss": 20.3634, "step": 102990 }, { "epoch": 0.20806651664330128, "grad_norm": 16.093774795532227, "learning_rate": 9.661619006077562e-06, "loss": 13.9841, "step": 103000 }, { "epoch": 0.20808671727598507, "grad_norm": 394.1217041015625, "learning_rate": 9.661492763929502e-06, "loss": 29.1695, "step": 103010 }, { "epoch": 0.2081069179086689, "grad_norm": 463.3786926269531, "learning_rate": 9.66136649906189e-06, "loss": 17.631, "step": 103020 }, { "epoch": 0.2081271185413527, "grad_norm": 218.5435333251953, "learning_rate": 9.661240211475342e-06, "loss": 24.5859, "step": 103030 }, { "epoch": 0.20814731917403653, "grad_norm": 322.4661560058594, "learning_rate": 9.661113901170477e-06, "loss": 28.7746, "step": 103040 }, { "epoch": 0.20816751980672035, "grad_norm": 739.1019287109375, "learning_rate": 9.660987568147907e-06, "loss": 32.986, "step": 103050 }, { "epoch": 0.20818772043940417, "grad_norm": 206.95220947265625, "learning_rate": 9.66086121240825e-06, "loss": 36.8508, "step": 103060 }, { "epoch": 0.20820792107208796, "grad_norm": 174.70111083984375, "learning_rate": 9.66073483395212e-06, "loss": 15.9502, "step": 103070 }, { "epoch": 0.20822812170477178, "grad_norm": 437.3995666503906, "learning_rate": 9.660608432780133e-06, "loss": 12.2834, "step": 103080 }, { "epoch": 0.2082483223374556, "grad_norm": 288.0544128417969, "learning_rate": 9.660482008892907e-06, "loss": 13.4444, "step": 103090 }, { "epoch": 0.20826852297013942, "grad_norm": 543.4057006835938, "learning_rate": 9.660355562291055e-06, "loss": 32.2566, "step": 103100 }, { "epoch": 0.20828872360282324, "grad_norm": 509.3530578613281, "learning_rate": 9.660229092975197e-06, "loss": 15.5038, "step": 103110 }, { "epoch": 0.20830892423550706, "grad_norm": 367.4169921875, "learning_rate": 9.660102600945947e-06, "loss": 16.8844, "step": 103120 }, { "epoch": 0.20832912486819088, "grad_norm": 319.7675476074219, "learning_rate": 9.659976086203922e-06, "loss": 14.6273, "step": 103130 }, { "epoch": 0.20834932550087468, "grad_norm": 290.2574157714844, "learning_rate": 9.65984954874974e-06, "loss": 29.0676, "step": 103140 }, { "epoch": 0.2083695261335585, "grad_norm": 267.9770812988281, "learning_rate": 9.659722988584015e-06, "loss": 34.6491, "step": 103150 }, { "epoch": 0.20838972676624232, "grad_norm": 745.9423217773438, "learning_rate": 9.659596405707366e-06, "loss": 32.7889, "step": 103160 }, { "epoch": 0.20840992739892614, "grad_norm": 438.6147766113281, "learning_rate": 9.659469800120408e-06, "loss": 24.7799, "step": 103170 }, { "epoch": 0.20843012803160996, "grad_norm": 74.18256378173828, "learning_rate": 9.65934317182376e-06, "loss": 12.9358, "step": 103180 }, { "epoch": 0.20845032866429378, "grad_norm": 146.2864227294922, "learning_rate": 9.65921652081804e-06, "loss": 19.3974, "step": 103190 }, { "epoch": 0.20847052929697757, "grad_norm": 320.2649230957031, "learning_rate": 9.659089847103863e-06, "loss": 18.9058, "step": 103200 }, { "epoch": 0.2084907299296614, "grad_norm": 916.2047119140625, "learning_rate": 9.658963150681848e-06, "loss": 35.5412, "step": 103210 }, { "epoch": 0.2085109305623452, "grad_norm": 359.853515625, "learning_rate": 9.658836431552609e-06, "loss": 24.8434, "step": 103220 }, { "epoch": 0.20853113119502903, "grad_norm": 867.36376953125, "learning_rate": 9.658709689716768e-06, "loss": 20.0789, "step": 103230 }, { "epoch": 0.20855133182771285, "grad_norm": 513.3993530273438, "learning_rate": 9.65858292517494e-06, "loss": 16.9374, "step": 103240 }, { "epoch": 0.20857153246039667, "grad_norm": 599.906982421875, "learning_rate": 9.658456137927745e-06, "loss": 31.0198, "step": 103250 }, { "epoch": 0.20859173309308046, "grad_norm": 230.84130859375, "learning_rate": 9.6583293279758e-06, "loss": 18.9309, "step": 103260 }, { "epoch": 0.20861193372576428, "grad_norm": 403.97515869140625, "learning_rate": 9.658202495319721e-06, "loss": 13.8265, "step": 103270 }, { "epoch": 0.2086321343584481, "grad_norm": 620.8816528320312, "learning_rate": 9.65807563996013e-06, "loss": 13.429, "step": 103280 }, { "epoch": 0.20865233499113192, "grad_norm": 287.4898986816406, "learning_rate": 9.657948761897643e-06, "loss": 22.3302, "step": 103290 }, { "epoch": 0.20867253562381574, "grad_norm": 122.81134033203125, "learning_rate": 9.65782186113288e-06, "loss": 15.098, "step": 103300 }, { "epoch": 0.20869273625649956, "grad_norm": 0.0, "learning_rate": 9.657694937666454e-06, "loss": 13.8586, "step": 103310 }, { "epoch": 0.20871293688918338, "grad_norm": 328.1044616699219, "learning_rate": 9.65756799149899e-06, "loss": 25.5796, "step": 103320 }, { "epoch": 0.20873313752186717, "grad_norm": 190.67489624023438, "learning_rate": 9.657441022631105e-06, "loss": 32.4643, "step": 103330 }, { "epoch": 0.208753338154551, "grad_norm": 411.0813903808594, "learning_rate": 9.657314031063419e-06, "loss": 17.2426, "step": 103340 }, { "epoch": 0.2087735387872348, "grad_norm": 391.1584777832031, "learning_rate": 9.657187016796546e-06, "loss": 12.9593, "step": 103350 }, { "epoch": 0.20879373941991863, "grad_norm": 313.9464416503906, "learning_rate": 9.657059979831109e-06, "loss": 18.2481, "step": 103360 }, { "epoch": 0.20881394005260245, "grad_norm": 771.1447143554688, "learning_rate": 9.656932920167727e-06, "loss": 34.8314, "step": 103370 }, { "epoch": 0.20883414068528627, "grad_norm": 607.5734252929688, "learning_rate": 9.65680583780702e-06, "loss": 21.0373, "step": 103380 }, { "epoch": 0.20885434131797007, "grad_norm": 123.56460571289062, "learning_rate": 9.656678732749605e-06, "loss": 12.7128, "step": 103390 }, { "epoch": 0.20887454195065389, "grad_norm": 103.83605194091797, "learning_rate": 9.656551604996102e-06, "loss": 29.9846, "step": 103400 }, { "epoch": 0.2088947425833377, "grad_norm": 339.4877014160156, "learning_rate": 9.656424454547131e-06, "loss": 21.3867, "step": 103410 }, { "epoch": 0.20891494321602153, "grad_norm": 695.9169921875, "learning_rate": 9.656297281403315e-06, "loss": 16.8246, "step": 103420 }, { "epoch": 0.20893514384870535, "grad_norm": 800.305419921875, "learning_rate": 9.656170085565268e-06, "loss": 22.556, "step": 103430 }, { "epoch": 0.20895534448138917, "grad_norm": 80.22218322753906, "learning_rate": 9.656042867033613e-06, "loss": 16.0496, "step": 103440 }, { "epoch": 0.20897554511407299, "grad_norm": 492.98284912109375, "learning_rate": 9.655915625808971e-06, "loss": 26.8057, "step": 103450 }, { "epoch": 0.20899574574675678, "grad_norm": 252.5966796875, "learning_rate": 9.65578836189196e-06, "loss": 26.845, "step": 103460 }, { "epoch": 0.2090159463794406, "grad_norm": 609.3297729492188, "learning_rate": 9.6556610752832e-06, "loss": 25.3696, "step": 103470 }, { "epoch": 0.20903614701212442, "grad_norm": 268.52362060546875, "learning_rate": 9.655533765983315e-06, "loss": 18.3948, "step": 103480 }, { "epoch": 0.20905634764480824, "grad_norm": 547.3050537109375, "learning_rate": 9.655406433992922e-06, "loss": 40.6968, "step": 103490 }, { "epoch": 0.20907654827749206, "grad_norm": 429.0570983886719, "learning_rate": 9.655279079312643e-06, "loss": 25.0425, "step": 103500 }, { "epoch": 0.20909674891017588, "grad_norm": 451.627197265625, "learning_rate": 9.655151701943098e-06, "loss": 30.1036, "step": 103510 }, { "epoch": 0.20911694954285967, "grad_norm": 289.7569885253906, "learning_rate": 9.655024301884908e-06, "loss": 22.6742, "step": 103520 }, { "epoch": 0.2091371501755435, "grad_norm": 350.6484375, "learning_rate": 9.654896879138693e-06, "loss": 13.1503, "step": 103530 }, { "epoch": 0.2091573508082273, "grad_norm": 286.00164794921875, "learning_rate": 9.654769433705079e-06, "loss": 18.3662, "step": 103540 }, { "epoch": 0.20917755144091113, "grad_norm": 205.3463592529297, "learning_rate": 9.65464196558468e-06, "loss": 90.5848, "step": 103550 }, { "epoch": 0.20919775207359495, "grad_norm": 372.85943603515625, "learning_rate": 9.65451447477812e-06, "loss": 25.0431, "step": 103560 }, { "epoch": 0.20921795270627877, "grad_norm": 272.0143127441406, "learning_rate": 9.654386961286023e-06, "loss": 15.6093, "step": 103570 }, { "epoch": 0.20923815333896256, "grad_norm": 347.99725341796875, "learning_rate": 9.654259425109009e-06, "loss": 20.3958, "step": 103580 }, { "epoch": 0.20925835397164638, "grad_norm": 339.36376953125, "learning_rate": 9.654131866247698e-06, "loss": 23.5363, "step": 103590 }, { "epoch": 0.2092785546043302, "grad_norm": 473.11767578125, "learning_rate": 9.654004284702712e-06, "loss": 11.7759, "step": 103600 }, { "epoch": 0.20929875523701402, "grad_norm": 551.1157836914062, "learning_rate": 9.653876680474674e-06, "loss": 28.9522, "step": 103610 }, { "epoch": 0.20931895586969784, "grad_norm": 370.058349609375, "learning_rate": 9.653749053564206e-06, "loss": 25.6404, "step": 103620 }, { "epoch": 0.20933915650238166, "grad_norm": 590.947265625, "learning_rate": 9.65362140397193e-06, "loss": 18.4211, "step": 103630 }, { "epoch": 0.20935935713506548, "grad_norm": 429.77056884765625, "learning_rate": 9.653493731698467e-06, "loss": 22.7104, "step": 103640 }, { "epoch": 0.20937955776774927, "grad_norm": 330.9519348144531, "learning_rate": 9.65336603674444e-06, "loss": 22.992, "step": 103650 }, { "epoch": 0.2093997584004331, "grad_norm": 993.672119140625, "learning_rate": 9.653238319110473e-06, "loss": 20.435, "step": 103660 }, { "epoch": 0.20941995903311691, "grad_norm": 217.466552734375, "learning_rate": 9.653110578797183e-06, "loss": 35.0793, "step": 103670 }, { "epoch": 0.20944015966580073, "grad_norm": 529.8526000976562, "learning_rate": 9.652982815805199e-06, "loss": 18.7878, "step": 103680 }, { "epoch": 0.20946036029848455, "grad_norm": 683.9608764648438, "learning_rate": 9.652855030135139e-06, "loss": 23.9079, "step": 103690 }, { "epoch": 0.20948056093116837, "grad_norm": 495.77899169921875, "learning_rate": 9.65272722178763e-06, "loss": 32.7164, "step": 103700 }, { "epoch": 0.20950076156385217, "grad_norm": 44.03288269042969, "learning_rate": 9.652599390763294e-06, "loss": 52.573, "step": 103710 }, { "epoch": 0.209520962196536, "grad_norm": 154.39260864257812, "learning_rate": 9.652471537062751e-06, "loss": 13.7941, "step": 103720 }, { "epoch": 0.2095411628292198, "grad_norm": 417.5998840332031, "learning_rate": 9.652343660686626e-06, "loss": 14.2793, "step": 103730 }, { "epoch": 0.20956136346190363, "grad_norm": 491.297607421875, "learning_rate": 9.652215761635541e-06, "loss": 23.3906, "step": 103740 }, { "epoch": 0.20958156409458745, "grad_norm": 415.0970153808594, "learning_rate": 9.652087839910123e-06, "loss": 32.32, "step": 103750 }, { "epoch": 0.20960176472727127, "grad_norm": 91.13452911376953, "learning_rate": 9.651959895510992e-06, "loss": 29.1928, "step": 103760 }, { "epoch": 0.2096219653599551, "grad_norm": 332.85137939453125, "learning_rate": 9.651831928438773e-06, "loss": 23.4254, "step": 103770 }, { "epoch": 0.20964216599263888, "grad_norm": 323.9944152832031, "learning_rate": 9.65170393869409e-06, "loss": 12.4928, "step": 103780 }, { "epoch": 0.2096623666253227, "grad_norm": 322.8095397949219, "learning_rate": 9.651575926277566e-06, "loss": 16.172, "step": 103790 }, { "epoch": 0.20968256725800652, "grad_norm": 219.4324493408203, "learning_rate": 9.651447891189824e-06, "loss": 19.0577, "step": 103800 }, { "epoch": 0.20970276789069034, "grad_norm": 46.137203216552734, "learning_rate": 9.65131983343149e-06, "loss": 30.1033, "step": 103810 }, { "epoch": 0.20972296852337416, "grad_norm": 839.6621704101562, "learning_rate": 9.651191753003187e-06, "loss": 26.9795, "step": 103820 }, { "epoch": 0.20974316915605798, "grad_norm": 665.8576049804688, "learning_rate": 9.65106364990554e-06, "loss": 35.2283, "step": 103830 }, { "epoch": 0.20976336978874177, "grad_norm": 543.6165161132812, "learning_rate": 9.650935524139172e-06, "loss": 32.0244, "step": 103840 }, { "epoch": 0.2097835704214256, "grad_norm": 555.6519165039062, "learning_rate": 9.650807375704708e-06, "loss": 30.0249, "step": 103850 }, { "epoch": 0.2098037710541094, "grad_norm": 261.15020751953125, "learning_rate": 9.650679204602773e-06, "loss": 18.333, "step": 103860 }, { "epoch": 0.20982397168679323, "grad_norm": 203.61585998535156, "learning_rate": 9.650551010833993e-06, "loss": 17.2493, "step": 103870 }, { "epoch": 0.20984417231947705, "grad_norm": 465.2434997558594, "learning_rate": 9.650422794398991e-06, "loss": 26.312, "step": 103880 }, { "epoch": 0.20986437295216087, "grad_norm": 907.5621337890625, "learning_rate": 9.650294555298392e-06, "loss": 35.4733, "step": 103890 }, { "epoch": 0.20988457358484466, "grad_norm": 1396.000244140625, "learning_rate": 9.650166293532822e-06, "loss": 21.6182, "step": 103900 }, { "epoch": 0.20990477421752848, "grad_norm": 572.03369140625, "learning_rate": 9.650038009102905e-06, "loss": 16.3848, "step": 103910 }, { "epoch": 0.2099249748502123, "grad_norm": 485.6903991699219, "learning_rate": 9.649909702009265e-06, "loss": 19.6476, "step": 103920 }, { "epoch": 0.20994517548289612, "grad_norm": 650.719482421875, "learning_rate": 9.649781372252532e-06, "loss": 13.2238, "step": 103930 }, { "epoch": 0.20996537611557994, "grad_norm": 160.12863159179688, "learning_rate": 9.649653019833327e-06, "loss": 20.9152, "step": 103940 }, { "epoch": 0.20998557674826376, "grad_norm": 271.49151611328125, "learning_rate": 9.649524644752278e-06, "loss": 24.1985, "step": 103950 }, { "epoch": 0.21000577738094758, "grad_norm": 562.4899291992188, "learning_rate": 9.649396247010008e-06, "loss": 27.713, "step": 103960 }, { "epoch": 0.21002597801363138, "grad_norm": 695.3604125976562, "learning_rate": 9.649267826607145e-06, "loss": 25.0402, "step": 103970 }, { "epoch": 0.2100461786463152, "grad_norm": 761.8713989257812, "learning_rate": 9.649139383544315e-06, "loss": 23.7212, "step": 103980 }, { "epoch": 0.21006637927899902, "grad_norm": 214.13221740722656, "learning_rate": 9.649010917822145e-06, "loss": 23.0139, "step": 103990 }, { "epoch": 0.21008657991168284, "grad_norm": 342.4206237792969, "learning_rate": 9.648882429441258e-06, "loss": 30.4913, "step": 104000 }, { "epoch": 0.21010678054436666, "grad_norm": 317.7519226074219, "learning_rate": 9.648753918402283e-06, "loss": 18.0373, "step": 104010 }, { "epoch": 0.21012698117705048, "grad_norm": 742.5284423828125, "learning_rate": 9.648625384705844e-06, "loss": 27.8134, "step": 104020 }, { "epoch": 0.21014718180973427, "grad_norm": 424.1588439941406, "learning_rate": 9.648496828352569e-06, "loss": 20.2973, "step": 104030 }, { "epoch": 0.2101673824424181, "grad_norm": 455.66943359375, "learning_rate": 9.648368249343084e-06, "loss": 23.7883, "step": 104040 }, { "epoch": 0.2101875830751019, "grad_norm": 163.74085998535156, "learning_rate": 9.648239647678017e-06, "loss": 11.1417, "step": 104050 }, { "epoch": 0.21020778370778573, "grad_norm": 190.27513122558594, "learning_rate": 9.648111023357994e-06, "loss": 29.0521, "step": 104060 }, { "epoch": 0.21022798434046955, "grad_norm": 625.2974243164062, "learning_rate": 9.64798237638364e-06, "loss": 17.1522, "step": 104070 }, { "epoch": 0.21024818497315337, "grad_norm": 596.1205444335938, "learning_rate": 9.647853706755583e-06, "loss": 31.4544, "step": 104080 }, { "epoch": 0.2102683856058372, "grad_norm": 511.16522216796875, "learning_rate": 9.647725014474452e-06, "loss": 23.0785, "step": 104090 }, { "epoch": 0.21028858623852098, "grad_norm": 153.3165740966797, "learning_rate": 9.647596299540874e-06, "loss": 17.5788, "step": 104100 }, { "epoch": 0.2103087868712048, "grad_norm": 352.26904296875, "learning_rate": 9.647467561955474e-06, "loss": 15.7225, "step": 104110 }, { "epoch": 0.21032898750388862, "grad_norm": 783.6790771484375, "learning_rate": 9.647338801718882e-06, "loss": 29.8876, "step": 104120 }, { "epoch": 0.21034918813657244, "grad_norm": 620.0906372070312, "learning_rate": 9.647210018831723e-06, "loss": 27.5526, "step": 104130 }, { "epoch": 0.21036938876925626, "grad_norm": 190.4215087890625, "learning_rate": 9.647081213294627e-06, "loss": 21.3827, "step": 104140 }, { "epoch": 0.21038958940194008, "grad_norm": 20.406620025634766, "learning_rate": 9.64695238510822e-06, "loss": 17.9227, "step": 104150 }, { "epoch": 0.21040979003462387, "grad_norm": 314.3428649902344, "learning_rate": 9.646823534273131e-06, "loss": 24.0574, "step": 104160 }, { "epoch": 0.2104299906673077, "grad_norm": 370.3890686035156, "learning_rate": 9.646694660789986e-06, "loss": 19.9015, "step": 104170 }, { "epoch": 0.2104501912999915, "grad_norm": 474.7753601074219, "learning_rate": 9.646565764659418e-06, "loss": 29.9416, "step": 104180 }, { "epoch": 0.21047039193267533, "grad_norm": 284.2029724121094, "learning_rate": 9.64643684588205e-06, "loss": 26.8573, "step": 104190 }, { "epoch": 0.21049059256535915, "grad_norm": 695.9445190429688, "learning_rate": 9.646307904458513e-06, "loss": 29.3759, "step": 104200 }, { "epoch": 0.21051079319804297, "grad_norm": 174.19967651367188, "learning_rate": 9.646178940389435e-06, "loss": 12.2988, "step": 104210 }, { "epoch": 0.21053099383072676, "grad_norm": 449.9962158203125, "learning_rate": 9.646049953675443e-06, "loss": 20.4931, "step": 104220 }, { "epoch": 0.21055119446341058, "grad_norm": 227.7549285888672, "learning_rate": 9.645920944317168e-06, "loss": 17.4696, "step": 104230 }, { "epoch": 0.2105713950960944, "grad_norm": 317.04510498046875, "learning_rate": 9.645791912315239e-06, "loss": 30.6065, "step": 104240 }, { "epoch": 0.21059159572877822, "grad_norm": 964.8438110351562, "learning_rate": 9.645662857670281e-06, "loss": 33.8488, "step": 104250 }, { "epoch": 0.21061179636146204, "grad_norm": 360.98028564453125, "learning_rate": 9.645533780382928e-06, "loss": 43.4764, "step": 104260 }, { "epoch": 0.21063199699414586, "grad_norm": 304.18011474609375, "learning_rate": 9.645404680453805e-06, "loss": 18.507, "step": 104270 }, { "epoch": 0.21065219762682968, "grad_norm": 519.6947631835938, "learning_rate": 9.645275557883545e-06, "loss": 10.1371, "step": 104280 }, { "epoch": 0.21067239825951348, "grad_norm": 346.36541748046875, "learning_rate": 9.645146412672774e-06, "loss": 18.4591, "step": 104290 }, { "epoch": 0.2106925988921973, "grad_norm": 30.78389549255371, "learning_rate": 9.645017244822124e-06, "loss": 38.215, "step": 104300 }, { "epoch": 0.21071279952488112, "grad_norm": 523.0682983398438, "learning_rate": 9.644888054332222e-06, "loss": 29.042, "step": 104310 }, { "epoch": 0.21073300015756494, "grad_norm": 576.9373779296875, "learning_rate": 9.644758841203698e-06, "loss": 30.983, "step": 104320 }, { "epoch": 0.21075320079024876, "grad_norm": 955.591796875, "learning_rate": 9.644629605437186e-06, "loss": 32.7122, "step": 104330 }, { "epoch": 0.21077340142293258, "grad_norm": 510.2848205566406, "learning_rate": 9.64450034703331e-06, "loss": 31.5209, "step": 104340 }, { "epoch": 0.21079360205561637, "grad_norm": 509.8435363769531, "learning_rate": 9.644371065992704e-06, "loss": 23.3649, "step": 104350 }, { "epoch": 0.2108138026883002, "grad_norm": 662.75341796875, "learning_rate": 9.644241762315995e-06, "loss": 21.6943, "step": 104360 }, { "epoch": 0.210834003320984, "grad_norm": 392.33282470703125, "learning_rate": 9.644112436003817e-06, "loss": 18.6042, "step": 104370 }, { "epoch": 0.21085420395366783, "grad_norm": 277.52398681640625, "learning_rate": 9.643983087056796e-06, "loss": 48.5873, "step": 104380 }, { "epoch": 0.21087440458635165, "grad_norm": 558.379638671875, "learning_rate": 9.643853715475567e-06, "loss": 8.8655, "step": 104390 }, { "epoch": 0.21089460521903547, "grad_norm": 912.6490478515625, "learning_rate": 9.643724321260757e-06, "loss": 39.9867, "step": 104400 }, { "epoch": 0.2109148058517193, "grad_norm": 196.27220153808594, "learning_rate": 9.643594904412998e-06, "loss": 36.5609, "step": 104410 }, { "epoch": 0.21093500648440308, "grad_norm": 399.4701843261719, "learning_rate": 9.64346546493292e-06, "loss": 29.8103, "step": 104420 }, { "epoch": 0.2109552071170869, "grad_norm": 390.3523254394531, "learning_rate": 9.643336002821155e-06, "loss": 21.7872, "step": 104430 }, { "epoch": 0.21097540774977072, "grad_norm": 307.52020263671875, "learning_rate": 9.643206518078335e-06, "loss": 24.2028, "step": 104440 }, { "epoch": 0.21099560838245454, "grad_norm": 150.6370849609375, "learning_rate": 9.643077010705088e-06, "loss": 15.9814, "step": 104450 }, { "epoch": 0.21101580901513836, "grad_norm": 29.605432510375977, "learning_rate": 9.642947480702047e-06, "loss": 17.2623, "step": 104460 }, { "epoch": 0.21103600964782218, "grad_norm": 521.399658203125, "learning_rate": 9.642817928069843e-06, "loss": 29.4208, "step": 104470 }, { "epoch": 0.21105621028050597, "grad_norm": 389.67401123046875, "learning_rate": 9.642688352809108e-06, "loss": 12.6442, "step": 104480 }, { "epoch": 0.2110764109131898, "grad_norm": 1051.3519287109375, "learning_rate": 9.642558754920472e-06, "loss": 27.8617, "step": 104490 }, { "epoch": 0.2110966115458736, "grad_norm": 242.0317840576172, "learning_rate": 9.642429134404568e-06, "loss": 19.388, "step": 104500 }, { "epoch": 0.21111681217855743, "grad_norm": 183.55413818359375, "learning_rate": 9.642299491262028e-06, "loss": 66.1441, "step": 104510 }, { "epoch": 0.21113701281124125, "grad_norm": 182.1591796875, "learning_rate": 9.642169825493483e-06, "loss": 15.527, "step": 104520 }, { "epoch": 0.21115721344392507, "grad_norm": 760.7169189453125, "learning_rate": 9.642040137099567e-06, "loss": 23.7755, "step": 104530 }, { "epoch": 0.21117741407660887, "grad_norm": 69.65254974365234, "learning_rate": 9.641910426080909e-06, "loss": 24.1172, "step": 104540 }, { "epoch": 0.21119761470929269, "grad_norm": 109.78113555908203, "learning_rate": 9.641780692438142e-06, "loss": 20.1373, "step": 104550 }, { "epoch": 0.2112178153419765, "grad_norm": 258.04345703125, "learning_rate": 9.641650936171899e-06, "loss": 21.6432, "step": 104560 }, { "epoch": 0.21123801597466033, "grad_norm": 295.4312438964844, "learning_rate": 9.641521157282812e-06, "loss": 40.5421, "step": 104570 }, { "epoch": 0.21125821660734415, "grad_norm": 88.19575500488281, "learning_rate": 9.641391355771515e-06, "loss": 41.9684, "step": 104580 }, { "epoch": 0.21127841724002797, "grad_norm": 446.5942687988281, "learning_rate": 9.641261531638639e-06, "loss": 24.5742, "step": 104590 }, { "epoch": 0.21129861787271179, "grad_norm": 541.0067138671875, "learning_rate": 9.641131684884817e-06, "loss": 24.1824, "step": 104600 }, { "epoch": 0.21131881850539558, "grad_norm": 721.6463012695312, "learning_rate": 9.641001815510683e-06, "loss": 21.8882, "step": 104610 }, { "epoch": 0.2113390191380794, "grad_norm": 232.47772216796875, "learning_rate": 9.640871923516868e-06, "loss": 13.5951, "step": 104620 }, { "epoch": 0.21135921977076322, "grad_norm": 381.5535583496094, "learning_rate": 9.640742008904006e-06, "loss": 19.7345, "step": 104630 }, { "epoch": 0.21137942040344704, "grad_norm": 406.0082092285156, "learning_rate": 9.64061207167273e-06, "loss": 33.6257, "step": 104640 }, { "epoch": 0.21139962103613086, "grad_norm": 450.4580383300781, "learning_rate": 9.640482111823675e-06, "loss": 15.2805, "step": 104650 }, { "epoch": 0.21141982166881468, "grad_norm": 348.883544921875, "learning_rate": 9.640352129357473e-06, "loss": 17.2821, "step": 104660 }, { "epoch": 0.21144002230149847, "grad_norm": 259.6910095214844, "learning_rate": 9.640222124274756e-06, "loss": 33.3449, "step": 104670 }, { "epoch": 0.2114602229341823, "grad_norm": 760.593994140625, "learning_rate": 9.64009209657616e-06, "loss": 25.1494, "step": 104680 }, { "epoch": 0.2114804235668661, "grad_norm": 487.2242126464844, "learning_rate": 9.639962046262319e-06, "loss": 20.9965, "step": 104690 }, { "epoch": 0.21150062419954993, "grad_norm": 369.1243591308594, "learning_rate": 9.639831973333864e-06, "loss": 18.845, "step": 104700 }, { "epoch": 0.21152082483223375, "grad_norm": 826.623779296875, "learning_rate": 9.63970187779143e-06, "loss": 29.863, "step": 104710 }, { "epoch": 0.21154102546491757, "grad_norm": 679.8445434570312, "learning_rate": 9.639571759635655e-06, "loss": 29.9109, "step": 104720 }, { "epoch": 0.2115612260976014, "grad_norm": 602.4418334960938, "learning_rate": 9.639441618867167e-06, "loss": 35.8549, "step": 104730 }, { "epoch": 0.21158142673028518, "grad_norm": 916.6395263671875, "learning_rate": 9.639311455486603e-06, "loss": 21.7738, "step": 104740 }, { "epoch": 0.211601627362969, "grad_norm": 480.5182189941406, "learning_rate": 9.6391812694946e-06, "loss": 69.9435, "step": 104750 }, { "epoch": 0.21162182799565282, "grad_norm": 460.5019836425781, "learning_rate": 9.639051060891789e-06, "loss": 29.9818, "step": 104760 }, { "epoch": 0.21164202862833664, "grad_norm": 467.3058166503906, "learning_rate": 9.638920829678806e-06, "loss": 16.7311, "step": 104770 }, { "epoch": 0.21166222926102046, "grad_norm": 313.87542724609375, "learning_rate": 9.638790575856285e-06, "loss": 25.4375, "step": 104780 }, { "epoch": 0.21168242989370428, "grad_norm": 298.5509338378906, "learning_rate": 9.638660299424863e-06, "loss": 13.7837, "step": 104790 }, { "epoch": 0.21170263052638807, "grad_norm": 480.658935546875, "learning_rate": 9.638530000385171e-06, "loss": 27.33, "step": 104800 }, { "epoch": 0.2117228311590719, "grad_norm": 1329.874755859375, "learning_rate": 9.63839967873785e-06, "loss": 20.4955, "step": 104810 }, { "epoch": 0.21174303179175571, "grad_norm": 118.67254638671875, "learning_rate": 9.638269334483528e-06, "loss": 37.3196, "step": 104820 }, { "epoch": 0.21176323242443953, "grad_norm": 560.5855102539062, "learning_rate": 9.638138967622845e-06, "loss": 15.966, "step": 104830 }, { "epoch": 0.21178343305712335, "grad_norm": 391.7400817871094, "learning_rate": 9.638008578156435e-06, "loss": 30.7358, "step": 104840 }, { "epoch": 0.21180363368980717, "grad_norm": 334.184326171875, "learning_rate": 9.637878166084932e-06, "loss": 21.3462, "step": 104850 }, { "epoch": 0.21182383432249097, "grad_norm": 481.4829406738281, "learning_rate": 9.637747731408975e-06, "loss": 39.2185, "step": 104860 }, { "epoch": 0.2118440349551748, "grad_norm": 2150.810546875, "learning_rate": 9.637617274129198e-06, "loss": 40.7387, "step": 104870 }, { "epoch": 0.2118642355878586, "grad_norm": 714.3262939453125, "learning_rate": 9.637486794246237e-06, "loss": 26.9667, "step": 104880 }, { "epoch": 0.21188443622054243, "grad_norm": 362.8081359863281, "learning_rate": 9.637356291760729e-06, "loss": 10.2341, "step": 104890 }, { "epoch": 0.21190463685322625, "grad_norm": 989.9890747070312, "learning_rate": 9.637225766673309e-06, "loss": 19.2879, "step": 104900 }, { "epoch": 0.21192483748591007, "grad_norm": 469.71044921875, "learning_rate": 9.63709521898461e-06, "loss": 22.5119, "step": 104910 }, { "epoch": 0.2119450381185939, "grad_norm": 477.49322509765625, "learning_rate": 9.636964648695272e-06, "loss": 27.2541, "step": 104920 }, { "epoch": 0.21196523875127768, "grad_norm": 767.3585205078125, "learning_rate": 9.636834055805933e-06, "loss": 16.5624, "step": 104930 }, { "epoch": 0.2119854393839615, "grad_norm": 575.4326171875, "learning_rate": 9.636703440317225e-06, "loss": 19.2976, "step": 104940 }, { "epoch": 0.21200564001664532, "grad_norm": 352.4774169921875, "learning_rate": 9.636572802229789e-06, "loss": 19.7583, "step": 104950 }, { "epoch": 0.21202584064932914, "grad_norm": 234.3727569580078, "learning_rate": 9.636442141544259e-06, "loss": 22.481, "step": 104960 }, { "epoch": 0.21204604128201296, "grad_norm": 460.0948486328125, "learning_rate": 9.63631145826127e-06, "loss": 19.744, "step": 104970 }, { "epoch": 0.21206624191469678, "grad_norm": 248.75135803222656, "learning_rate": 9.636180752381464e-06, "loss": 27.15, "step": 104980 }, { "epoch": 0.21208644254738057, "grad_norm": 683.0396118164062, "learning_rate": 9.636050023905473e-06, "loss": 21.9629, "step": 104990 }, { "epoch": 0.2121066431800644, "grad_norm": 541.5690307617188, "learning_rate": 9.635919272833938e-06, "loss": 15.2957, "step": 105000 }, { "epoch": 0.2121268438127482, "grad_norm": 623.2774047851562, "learning_rate": 9.635788499167494e-06, "loss": 33.0525, "step": 105010 }, { "epoch": 0.21214704444543203, "grad_norm": 456.1404113769531, "learning_rate": 9.63565770290678e-06, "loss": 28.6321, "step": 105020 }, { "epoch": 0.21216724507811585, "grad_norm": 51.168670654296875, "learning_rate": 9.635526884052431e-06, "loss": 27.4311, "step": 105030 }, { "epoch": 0.21218744571079967, "grad_norm": 110.73509979248047, "learning_rate": 9.635396042605088e-06, "loss": 21.2991, "step": 105040 }, { "epoch": 0.2122076463434835, "grad_norm": 334.5740051269531, "learning_rate": 9.635265178565386e-06, "loss": 12.511, "step": 105050 }, { "epoch": 0.21222784697616728, "grad_norm": 317.1121520996094, "learning_rate": 9.635134291933964e-06, "loss": 13.7092, "step": 105060 }, { "epoch": 0.2122480476088511, "grad_norm": 395.42852783203125, "learning_rate": 9.63500338271146e-06, "loss": 16.8476, "step": 105070 }, { "epoch": 0.21226824824153492, "grad_norm": 185.52284240722656, "learning_rate": 9.634872450898511e-06, "loss": 19.4594, "step": 105080 }, { "epoch": 0.21228844887421874, "grad_norm": 442.63720703125, "learning_rate": 9.634741496495755e-06, "loss": 18.2281, "step": 105090 }, { "epoch": 0.21230864950690256, "grad_norm": 454.5132751464844, "learning_rate": 9.634610519503833e-06, "loss": 19.671, "step": 105100 }, { "epoch": 0.21232885013958638, "grad_norm": 175.4168243408203, "learning_rate": 9.63447951992338e-06, "loss": 14.6504, "step": 105110 }, { "epoch": 0.21234905077227018, "grad_norm": 506.148681640625, "learning_rate": 9.634348497755035e-06, "loss": 32.2731, "step": 105120 }, { "epoch": 0.212369251404954, "grad_norm": 191.395751953125, "learning_rate": 9.63421745299944e-06, "loss": 25.8174, "step": 105130 }, { "epoch": 0.21238945203763782, "grad_norm": 549.5165405273438, "learning_rate": 9.634086385657231e-06, "loss": 18.8579, "step": 105140 }, { "epoch": 0.21240965267032164, "grad_norm": 558.5545654296875, "learning_rate": 9.633955295729044e-06, "loss": 28.8999, "step": 105150 }, { "epoch": 0.21242985330300546, "grad_norm": 726.7172241210938, "learning_rate": 9.633824183215525e-06, "loss": 34.1918, "step": 105160 }, { "epoch": 0.21245005393568928, "grad_norm": 396.98297119140625, "learning_rate": 9.633693048117306e-06, "loss": 14.727, "step": 105170 }, { "epoch": 0.21247025456837307, "grad_norm": 205.2148895263672, "learning_rate": 9.633561890435031e-06, "loss": 35.7451, "step": 105180 }, { "epoch": 0.2124904552010569, "grad_norm": 32.1508674621582, "learning_rate": 9.633430710169335e-06, "loss": 30.3772, "step": 105190 }, { "epoch": 0.2125106558337407, "grad_norm": 188.542724609375, "learning_rate": 9.633299507320862e-06, "loss": 23.9483, "step": 105200 }, { "epoch": 0.21253085646642453, "grad_norm": 673.6744384765625, "learning_rate": 9.633168281890248e-06, "loss": 26.4373, "step": 105210 }, { "epoch": 0.21255105709910835, "grad_norm": 564.9224853515625, "learning_rate": 9.633037033878135e-06, "loss": 26.3376, "step": 105220 }, { "epoch": 0.21257125773179217, "grad_norm": 307.643798828125, "learning_rate": 9.632905763285159e-06, "loss": 16.5857, "step": 105230 }, { "epoch": 0.212591458364476, "grad_norm": 520.0028076171875, "learning_rate": 9.632774470111964e-06, "loss": 19.2623, "step": 105240 }, { "epoch": 0.21261165899715978, "grad_norm": 1217.56494140625, "learning_rate": 9.632643154359187e-06, "loss": 27.696, "step": 105250 }, { "epoch": 0.2126318596298436, "grad_norm": 1328.8448486328125, "learning_rate": 9.63251181602747e-06, "loss": 42.2831, "step": 105260 }, { "epoch": 0.21265206026252742, "grad_norm": 274.86114501953125, "learning_rate": 9.632380455117452e-06, "loss": 34.2978, "step": 105270 }, { "epoch": 0.21267226089521124, "grad_norm": 877.2543334960938, "learning_rate": 9.632249071629773e-06, "loss": 29.2062, "step": 105280 }, { "epoch": 0.21269246152789506, "grad_norm": 161.43724060058594, "learning_rate": 9.632117665565075e-06, "loss": 17.8729, "step": 105290 }, { "epoch": 0.21271266216057888, "grad_norm": 9.050535202026367, "learning_rate": 9.631986236923998e-06, "loss": 32.4674, "step": 105300 }, { "epoch": 0.21273286279326267, "grad_norm": 378.3053894042969, "learning_rate": 9.63185478570718e-06, "loss": 30.5205, "step": 105310 }, { "epoch": 0.2127530634259465, "grad_norm": 639.4319458007812, "learning_rate": 9.631723311915264e-06, "loss": 19.0381, "step": 105320 }, { "epoch": 0.2127732640586303, "grad_norm": 526.9933471679688, "learning_rate": 9.63159181554889e-06, "loss": 47.6003, "step": 105330 }, { "epoch": 0.21279346469131413, "grad_norm": 470.7127990722656, "learning_rate": 9.6314602966087e-06, "loss": 17.5264, "step": 105340 }, { "epoch": 0.21281366532399795, "grad_norm": 807.1245727539062, "learning_rate": 9.631328755095334e-06, "loss": 43.1902, "step": 105350 }, { "epoch": 0.21283386595668177, "grad_norm": 495.2669677734375, "learning_rate": 9.631197191009433e-06, "loss": 28.6787, "step": 105360 }, { "epoch": 0.2128540665893656, "grad_norm": 254.3119354248047, "learning_rate": 9.631065604351639e-06, "loss": 16.0459, "step": 105370 }, { "epoch": 0.21287426722204938, "grad_norm": 184.9499969482422, "learning_rate": 9.630933995122594e-06, "loss": 15.4068, "step": 105380 }, { "epoch": 0.2128944678547332, "grad_norm": 687.2736206054688, "learning_rate": 9.630802363322936e-06, "loss": 21.8519, "step": 105390 }, { "epoch": 0.21291466848741702, "grad_norm": 505.1662292480469, "learning_rate": 9.630670708953311e-06, "loss": 24.3643, "step": 105400 }, { "epoch": 0.21293486912010084, "grad_norm": 440.841064453125, "learning_rate": 9.630539032014358e-06, "loss": 23.5284, "step": 105410 }, { "epoch": 0.21295506975278466, "grad_norm": 113.52627563476562, "learning_rate": 9.630407332506718e-06, "loss": 13.9981, "step": 105420 }, { "epoch": 0.21297527038546848, "grad_norm": 316.8888244628906, "learning_rate": 9.630275610431036e-06, "loss": 16.4941, "step": 105430 }, { "epoch": 0.21299547101815228, "grad_norm": 431.79595947265625, "learning_rate": 9.630143865787951e-06, "loss": 16.3349, "step": 105440 }, { "epoch": 0.2130156716508361, "grad_norm": 161.92481994628906, "learning_rate": 9.630012098578108e-06, "loss": 21.3761, "step": 105450 }, { "epoch": 0.21303587228351992, "grad_norm": 124.17230224609375, "learning_rate": 9.629880308802147e-06, "loss": 40.3034, "step": 105460 }, { "epoch": 0.21305607291620374, "grad_norm": 501.4466247558594, "learning_rate": 9.62974849646071e-06, "loss": 22.316, "step": 105470 }, { "epoch": 0.21307627354888756, "grad_norm": 614.591796875, "learning_rate": 9.62961666155444e-06, "loss": 16.7649, "step": 105480 }, { "epoch": 0.21309647418157138, "grad_norm": 315.0533142089844, "learning_rate": 9.629484804083982e-06, "loss": 37.166, "step": 105490 }, { "epoch": 0.21311667481425517, "grad_norm": 338.9149169921875, "learning_rate": 9.629352924049975e-06, "loss": 30.9103, "step": 105500 }, { "epoch": 0.213136875446939, "grad_norm": 229.65621948242188, "learning_rate": 9.629221021453063e-06, "loss": 20.4643, "step": 105510 }, { "epoch": 0.2131570760796228, "grad_norm": 604.4810180664062, "learning_rate": 9.62908909629389e-06, "loss": 18.4634, "step": 105520 }, { "epoch": 0.21317727671230663, "grad_norm": 126.59526824951172, "learning_rate": 9.628957148573099e-06, "loss": 23.5431, "step": 105530 }, { "epoch": 0.21319747734499045, "grad_norm": 320.6859436035156, "learning_rate": 9.62882517829133e-06, "loss": 28.3505, "step": 105540 }, { "epoch": 0.21321767797767427, "grad_norm": 29.92398452758789, "learning_rate": 9.62869318544923e-06, "loss": 24.5034, "step": 105550 }, { "epoch": 0.2132378786103581, "grad_norm": 320.8935546875, "learning_rate": 9.62856117004744e-06, "loss": 24.5576, "step": 105560 }, { "epoch": 0.21325807924304188, "grad_norm": 777.5241088867188, "learning_rate": 9.628429132086606e-06, "loss": 22.4324, "step": 105570 }, { "epoch": 0.2132782798757257, "grad_norm": 405.69134521484375, "learning_rate": 9.628297071567368e-06, "loss": 49.801, "step": 105580 }, { "epoch": 0.21329848050840952, "grad_norm": 335.9660339355469, "learning_rate": 9.628164988490372e-06, "loss": 19.5176, "step": 105590 }, { "epoch": 0.21331868114109334, "grad_norm": 292.7089538574219, "learning_rate": 9.628032882856262e-06, "loss": 17.4636, "step": 105600 }, { "epoch": 0.21333888177377716, "grad_norm": 362.6543273925781, "learning_rate": 9.62790075466568e-06, "loss": 18.5619, "step": 105610 }, { "epoch": 0.21335908240646098, "grad_norm": 341.5017395019531, "learning_rate": 9.62776860391927e-06, "loss": 11.2724, "step": 105620 }, { "epoch": 0.21337928303914477, "grad_norm": 832.9976806640625, "learning_rate": 9.62763643061768e-06, "loss": 22.233, "step": 105630 }, { "epoch": 0.2133994836718286, "grad_norm": 602.6585083007812, "learning_rate": 9.62750423476155e-06, "loss": 28.1702, "step": 105640 }, { "epoch": 0.2134196843045124, "grad_norm": 556.5542602539062, "learning_rate": 9.627372016351524e-06, "loss": 31.1442, "step": 105650 }, { "epoch": 0.21343988493719623, "grad_norm": 845.3563842773438, "learning_rate": 9.62723977538825e-06, "loss": 25.2998, "step": 105660 }, { "epoch": 0.21346008556988005, "grad_norm": 510.3428649902344, "learning_rate": 9.62710751187237e-06, "loss": 21.3298, "step": 105670 }, { "epoch": 0.21348028620256387, "grad_norm": 346.706787109375, "learning_rate": 9.62697522580453e-06, "loss": 25.0908, "step": 105680 }, { "epoch": 0.2135004868352477, "grad_norm": 319.75732421875, "learning_rate": 9.62684291718537e-06, "loss": 23.8758, "step": 105690 }, { "epoch": 0.21352068746793149, "grad_norm": 439.9209899902344, "learning_rate": 9.626710586015543e-06, "loss": 24.0023, "step": 105700 }, { "epoch": 0.2135408881006153, "grad_norm": 706.7144775390625, "learning_rate": 9.626578232295689e-06, "loss": 18.1785, "step": 105710 }, { "epoch": 0.21356108873329913, "grad_norm": 255.9178009033203, "learning_rate": 9.626445856026453e-06, "loss": 13.3758, "step": 105720 }, { "epoch": 0.21358128936598295, "grad_norm": 352.71368408203125, "learning_rate": 9.626313457208482e-06, "loss": 11.3927, "step": 105730 }, { "epoch": 0.21360148999866677, "grad_norm": 337.35638427734375, "learning_rate": 9.626181035842418e-06, "loss": 18.072, "step": 105740 }, { "epoch": 0.21362169063135059, "grad_norm": 141.04266357421875, "learning_rate": 9.626048591928911e-06, "loss": 22.8306, "step": 105750 }, { "epoch": 0.21364189126403438, "grad_norm": 573.4781494140625, "learning_rate": 9.625916125468604e-06, "loss": 33.1273, "step": 105760 }, { "epoch": 0.2136620918967182, "grad_norm": 515.8070068359375, "learning_rate": 9.625783636462143e-06, "loss": 18.2064, "step": 105770 }, { "epoch": 0.21368229252940202, "grad_norm": 241.1742401123047, "learning_rate": 9.625651124910173e-06, "loss": 36.4317, "step": 105780 }, { "epoch": 0.21370249316208584, "grad_norm": 453.4202880859375, "learning_rate": 9.625518590813342e-06, "loss": 25.3025, "step": 105790 }, { "epoch": 0.21372269379476966, "grad_norm": 113.76762390136719, "learning_rate": 9.62538603417229e-06, "loss": 30.8774, "step": 105800 }, { "epoch": 0.21374289442745348, "grad_norm": 673.5302124023438, "learning_rate": 9.625253454987672e-06, "loss": 11.2741, "step": 105810 }, { "epoch": 0.21376309506013727, "grad_norm": 469.35845947265625, "learning_rate": 9.625120853260129e-06, "loss": 27.6385, "step": 105820 }, { "epoch": 0.2137832956928211, "grad_norm": 146.98387145996094, "learning_rate": 9.624988228990305e-06, "loss": 42.1381, "step": 105830 }, { "epoch": 0.2138034963255049, "grad_norm": 372.7845153808594, "learning_rate": 9.624855582178852e-06, "loss": 30.9867, "step": 105840 }, { "epoch": 0.21382369695818873, "grad_norm": 253.32186889648438, "learning_rate": 9.624722912826412e-06, "loss": 10.2385, "step": 105850 }, { "epoch": 0.21384389759087255, "grad_norm": 419.5494384765625, "learning_rate": 9.624590220933635e-06, "loss": 29.3135, "step": 105860 }, { "epoch": 0.21386409822355637, "grad_norm": 1056.16748046875, "learning_rate": 9.624457506501165e-06, "loss": 32.2349, "step": 105870 }, { "epoch": 0.2138842988562402, "grad_norm": 172.6009979248047, "learning_rate": 9.624324769529652e-06, "loss": 20.3206, "step": 105880 }, { "epoch": 0.21390449948892398, "grad_norm": 40.16970443725586, "learning_rate": 9.624192010019739e-06, "loss": 18.4412, "step": 105890 }, { "epoch": 0.2139247001216078, "grad_norm": 598.6088256835938, "learning_rate": 9.624059227972077e-06, "loss": 26.2211, "step": 105900 }, { "epoch": 0.21394490075429162, "grad_norm": 228.47137451171875, "learning_rate": 9.623926423387308e-06, "loss": 39.7224, "step": 105910 }, { "epoch": 0.21396510138697544, "grad_norm": 910.9898071289062, "learning_rate": 9.623793596266084e-06, "loss": 29.0457, "step": 105920 }, { "epoch": 0.21398530201965926, "grad_norm": 181.25929260253906, "learning_rate": 9.623660746609051e-06, "loss": 28.0292, "step": 105930 }, { "epoch": 0.21400550265234308, "grad_norm": 11.92809772491455, "learning_rate": 9.623527874416857e-06, "loss": 26.4717, "step": 105940 }, { "epoch": 0.21402570328502687, "grad_norm": 529.3529052734375, "learning_rate": 9.623394979690149e-06, "loss": 17.4593, "step": 105950 }, { "epoch": 0.2140459039177107, "grad_norm": 454.60321044921875, "learning_rate": 9.623262062429573e-06, "loss": 22.5844, "step": 105960 }, { "epoch": 0.21406610455039451, "grad_norm": 906.16748046875, "learning_rate": 9.623129122635778e-06, "loss": 50.2058, "step": 105970 }, { "epoch": 0.21408630518307833, "grad_norm": 182.6000213623047, "learning_rate": 9.622996160309415e-06, "loss": 16.7407, "step": 105980 }, { "epoch": 0.21410650581576215, "grad_norm": 469.909423828125, "learning_rate": 9.622863175451128e-06, "loss": 14.0903, "step": 105990 }, { "epoch": 0.21412670644844597, "grad_norm": 177.44290161132812, "learning_rate": 9.622730168061568e-06, "loss": 24.2696, "step": 106000 }, { "epoch": 0.2141469070811298, "grad_norm": 913.941162109375, "learning_rate": 9.622597138141379e-06, "loss": 25.4878, "step": 106010 }, { "epoch": 0.2141671077138136, "grad_norm": 214.55035400390625, "learning_rate": 9.622464085691214e-06, "loss": 32.1625, "step": 106020 }, { "epoch": 0.2141873083464974, "grad_norm": 640.6804809570312, "learning_rate": 9.622331010711718e-06, "loss": 13.401, "step": 106030 }, { "epoch": 0.21420750897918123, "grad_norm": 70.52410125732422, "learning_rate": 9.622197913203543e-06, "loss": 18.1005, "step": 106040 }, { "epoch": 0.21422770961186505, "grad_norm": 778.2262573242188, "learning_rate": 9.622064793167336e-06, "loss": 28.411, "step": 106050 }, { "epoch": 0.21424791024454887, "grad_norm": 225.76895141601562, "learning_rate": 9.621931650603747e-06, "loss": 21.2988, "step": 106060 }, { "epoch": 0.2142681108772327, "grad_norm": 330.450927734375, "learning_rate": 9.62179848551342e-06, "loss": 21.3016, "step": 106070 }, { "epoch": 0.21428831150991648, "grad_norm": 333.67413330078125, "learning_rate": 9.62166529789701e-06, "loss": 19.7966, "step": 106080 }, { "epoch": 0.2143085121426003, "grad_norm": 299.3463439941406, "learning_rate": 9.621532087755163e-06, "loss": 18.3854, "step": 106090 }, { "epoch": 0.21432871277528412, "grad_norm": 495.6922302246094, "learning_rate": 9.62139885508853e-06, "loss": 28.7322, "step": 106100 }, { "epoch": 0.21434891340796794, "grad_norm": 150.03817749023438, "learning_rate": 9.621265599897759e-06, "loss": 23.8521, "step": 106110 }, { "epoch": 0.21436911404065176, "grad_norm": 287.9223327636719, "learning_rate": 9.621132322183502e-06, "loss": 32.3816, "step": 106120 }, { "epoch": 0.21438931467333558, "grad_norm": 168.50302124023438, "learning_rate": 9.620999021946404e-06, "loss": 20.1759, "step": 106130 }, { "epoch": 0.21440951530601937, "grad_norm": 804.5635375976562, "learning_rate": 9.620865699187118e-06, "loss": 37.1679, "step": 106140 }, { "epoch": 0.2144297159387032, "grad_norm": 235.29754638671875, "learning_rate": 9.620732353906293e-06, "loss": 23.3458, "step": 106150 }, { "epoch": 0.214449916571387, "grad_norm": 150.1128692626953, "learning_rate": 9.620598986104578e-06, "loss": 30.2106, "step": 106160 }, { "epoch": 0.21447011720407083, "grad_norm": 453.7975769042969, "learning_rate": 9.620465595782626e-06, "loss": 17.6138, "step": 106170 }, { "epoch": 0.21449031783675465, "grad_norm": 352.6557922363281, "learning_rate": 9.620332182941084e-06, "loss": 21.9908, "step": 106180 }, { "epoch": 0.21451051846943847, "grad_norm": 643.4624633789062, "learning_rate": 9.620198747580604e-06, "loss": 14.7946, "step": 106190 }, { "epoch": 0.2145307191021223, "grad_norm": 365.9803161621094, "learning_rate": 9.620065289701835e-06, "loss": 14.0779, "step": 106200 }, { "epoch": 0.21455091973480608, "grad_norm": 322.1335754394531, "learning_rate": 9.619931809305428e-06, "loss": 22.869, "step": 106210 }, { "epoch": 0.2145711203674899, "grad_norm": 450.4367370605469, "learning_rate": 9.619798306392034e-06, "loss": 50.7252, "step": 106220 }, { "epoch": 0.21459132100017372, "grad_norm": 520.1192626953125, "learning_rate": 9.619664780962304e-06, "loss": 20.1309, "step": 106230 }, { "epoch": 0.21461152163285754, "grad_norm": 496.409423828125, "learning_rate": 9.619531233016885e-06, "loss": 23.2359, "step": 106240 }, { "epoch": 0.21463172226554136, "grad_norm": 200.6538543701172, "learning_rate": 9.619397662556434e-06, "loss": 22.8069, "step": 106250 }, { "epoch": 0.21465192289822518, "grad_norm": 465.4859313964844, "learning_rate": 9.6192640695816e-06, "loss": 19.7823, "step": 106260 }, { "epoch": 0.21467212353090898, "grad_norm": 682.9042358398438, "learning_rate": 9.619130454093031e-06, "loss": 18.6066, "step": 106270 }, { "epoch": 0.2146923241635928, "grad_norm": 605.0374145507812, "learning_rate": 9.618996816091382e-06, "loss": 23.5346, "step": 106280 }, { "epoch": 0.21471252479627662, "grad_norm": 757.7998657226562, "learning_rate": 9.6188631555773e-06, "loss": 37.1718, "step": 106290 }, { "epoch": 0.21473272542896044, "grad_norm": 583.7636108398438, "learning_rate": 9.61872947255144e-06, "loss": 42.9624, "step": 106300 }, { "epoch": 0.21475292606164426, "grad_norm": 506.0607604980469, "learning_rate": 9.618595767014456e-06, "loss": 29.5447, "step": 106310 }, { "epoch": 0.21477312669432808, "grad_norm": 388.08837890625, "learning_rate": 9.618462038966994e-06, "loss": 18.6144, "step": 106320 }, { "epoch": 0.21479332732701187, "grad_norm": 468.95562744140625, "learning_rate": 9.618328288409708e-06, "loss": 23.4604, "step": 106330 }, { "epoch": 0.2148135279596957, "grad_norm": 284.74102783203125, "learning_rate": 9.61819451534325e-06, "loss": 18.2775, "step": 106340 }, { "epoch": 0.2148337285923795, "grad_norm": 279.8360595703125, "learning_rate": 9.618060719768272e-06, "loss": 21.2849, "step": 106350 }, { "epoch": 0.21485392922506333, "grad_norm": 351.17144775390625, "learning_rate": 9.617926901685427e-06, "loss": 18.6386, "step": 106360 }, { "epoch": 0.21487412985774715, "grad_norm": 30.238037109375, "learning_rate": 9.617793061095366e-06, "loss": 35.902, "step": 106370 }, { "epoch": 0.21489433049043097, "grad_norm": 218.7053985595703, "learning_rate": 9.617659197998741e-06, "loss": 14.3084, "step": 106380 }, { "epoch": 0.2149145311231148, "grad_norm": 165.39132690429688, "learning_rate": 9.617525312396206e-06, "loss": 25.3413, "step": 106390 }, { "epoch": 0.21493473175579858, "grad_norm": 350.9925231933594, "learning_rate": 9.617391404288412e-06, "loss": 41.2889, "step": 106400 }, { "epoch": 0.2149549323884824, "grad_norm": 345.8832702636719, "learning_rate": 9.617257473676014e-06, "loss": 12.9189, "step": 106410 }, { "epoch": 0.21497513302116622, "grad_norm": 357.4729919433594, "learning_rate": 9.617123520559662e-06, "loss": 24.3934, "step": 106420 }, { "epoch": 0.21499533365385004, "grad_norm": 186.6416778564453, "learning_rate": 9.616989544940008e-06, "loss": 26.8619, "step": 106430 }, { "epoch": 0.21501553428653386, "grad_norm": 170.92840576171875, "learning_rate": 9.61685554681771e-06, "loss": 20.7272, "step": 106440 }, { "epoch": 0.21503573491921768, "grad_norm": 466.51190185546875, "learning_rate": 9.616721526193416e-06, "loss": 25.6284, "step": 106450 }, { "epoch": 0.21505593555190147, "grad_norm": 732.600830078125, "learning_rate": 9.616587483067782e-06, "loss": 34.207, "step": 106460 }, { "epoch": 0.2150761361845853, "grad_norm": 262.58404541015625, "learning_rate": 9.61645341744146e-06, "loss": 16.7664, "step": 106470 }, { "epoch": 0.2150963368172691, "grad_norm": 200.99440002441406, "learning_rate": 9.616319329315105e-06, "loss": 19.5444, "step": 106480 }, { "epoch": 0.21511653744995293, "grad_norm": 504.94146728515625, "learning_rate": 9.616185218689369e-06, "loss": 17.5292, "step": 106490 }, { "epoch": 0.21513673808263675, "grad_norm": 345.2034606933594, "learning_rate": 9.616051085564905e-06, "loss": 24.942, "step": 106500 }, { "epoch": 0.21515693871532057, "grad_norm": 491.8196105957031, "learning_rate": 9.61591692994237e-06, "loss": 24.6979, "step": 106510 }, { "epoch": 0.2151771393480044, "grad_norm": 0.0, "learning_rate": 9.615782751822413e-06, "loss": 18.6308, "step": 106520 }, { "epoch": 0.21519733998068818, "grad_norm": 261.9867858886719, "learning_rate": 9.615648551205692e-06, "loss": 12.4077, "step": 106530 }, { "epoch": 0.215217540613372, "grad_norm": 394.6444396972656, "learning_rate": 9.61551432809286e-06, "loss": 18.7726, "step": 106540 }, { "epoch": 0.21523774124605582, "grad_norm": 223.36647033691406, "learning_rate": 9.615380082484573e-06, "loss": 18.6529, "step": 106550 }, { "epoch": 0.21525794187873964, "grad_norm": 515.4940795898438, "learning_rate": 9.61524581438148e-06, "loss": 22.5348, "step": 106560 }, { "epoch": 0.21527814251142346, "grad_norm": 606.5992431640625, "learning_rate": 9.61511152378424e-06, "loss": 25.2943, "step": 106570 }, { "epoch": 0.21529834314410728, "grad_norm": 405.7440185546875, "learning_rate": 9.614977210693505e-06, "loss": 30.6357, "step": 106580 }, { "epoch": 0.21531854377679108, "grad_norm": 481.6194152832031, "learning_rate": 9.614842875109933e-06, "loss": 26.367, "step": 106590 }, { "epoch": 0.2153387444094749, "grad_norm": 341.5598449707031, "learning_rate": 9.614708517034176e-06, "loss": 15.5823, "step": 106600 }, { "epoch": 0.21535894504215872, "grad_norm": 346.8001708984375, "learning_rate": 9.61457413646689e-06, "loss": 33.0959, "step": 106610 }, { "epoch": 0.21537914567484254, "grad_norm": 271.7821960449219, "learning_rate": 9.614439733408726e-06, "loss": 57.9604, "step": 106620 }, { "epoch": 0.21539934630752636, "grad_norm": 79.5567398071289, "learning_rate": 9.614305307860346e-06, "loss": 22.1122, "step": 106630 }, { "epoch": 0.21541954694021018, "grad_norm": 5.893133163452148, "learning_rate": 9.6141708598224e-06, "loss": 23.4139, "step": 106640 }, { "epoch": 0.21543974757289397, "grad_norm": 450.4199523925781, "learning_rate": 9.614036389295545e-06, "loss": 35.7561, "step": 106650 }, { "epoch": 0.2154599482055778, "grad_norm": 395.6968078613281, "learning_rate": 9.613901896280436e-06, "loss": 31.719, "step": 106660 }, { "epoch": 0.2154801488382616, "grad_norm": 14.778434753417969, "learning_rate": 9.61376738077773e-06, "loss": 19.4138, "step": 106670 }, { "epoch": 0.21550034947094543, "grad_norm": 81.33409118652344, "learning_rate": 9.613632842788079e-06, "loss": 19.4228, "step": 106680 }, { "epoch": 0.21552055010362925, "grad_norm": 655.814697265625, "learning_rate": 9.613498282312143e-06, "loss": 37.7355, "step": 106690 }, { "epoch": 0.21554075073631307, "grad_norm": 945.9786376953125, "learning_rate": 9.613363699350575e-06, "loss": 33.2514, "step": 106700 }, { "epoch": 0.2155609513689969, "grad_norm": 613.876708984375, "learning_rate": 9.613229093904033e-06, "loss": 20.2544, "step": 106710 }, { "epoch": 0.21558115200168068, "grad_norm": 347.0071716308594, "learning_rate": 9.613094465973171e-06, "loss": 19.2405, "step": 106720 }, { "epoch": 0.2156013526343645, "grad_norm": 391.2256774902344, "learning_rate": 9.612959815558645e-06, "loss": 21.5669, "step": 106730 }, { "epoch": 0.21562155326704832, "grad_norm": 262.7676696777344, "learning_rate": 9.612825142661115e-06, "loss": 20.3953, "step": 106740 }, { "epoch": 0.21564175389973214, "grad_norm": 215.56687927246094, "learning_rate": 9.612690447281233e-06, "loss": 18.0082, "step": 106750 }, { "epoch": 0.21566195453241596, "grad_norm": 419.5194396972656, "learning_rate": 9.612555729419656e-06, "loss": 32.976, "step": 106760 }, { "epoch": 0.21568215516509978, "grad_norm": 352.706298828125, "learning_rate": 9.612420989077045e-06, "loss": 15.573, "step": 106770 }, { "epoch": 0.21570235579778357, "grad_norm": 788.6929321289062, "learning_rate": 9.61228622625405e-06, "loss": 40.8067, "step": 106780 }, { "epoch": 0.2157225564304674, "grad_norm": 252.27297973632812, "learning_rate": 9.612151440951334e-06, "loss": 28.4294, "step": 106790 }, { "epoch": 0.2157427570631512, "grad_norm": 272.7535400390625, "learning_rate": 9.612016633169552e-06, "loss": 23.2759, "step": 106800 }, { "epoch": 0.21576295769583503, "grad_norm": 226.66220092773438, "learning_rate": 9.611881802909358e-06, "loss": 46.624, "step": 106810 }, { "epoch": 0.21578315832851885, "grad_norm": 302.8719177246094, "learning_rate": 9.611746950171412e-06, "loss": 29.6137, "step": 106820 }, { "epoch": 0.21580335896120267, "grad_norm": 571.4275512695312, "learning_rate": 9.61161207495637e-06, "loss": 23.2898, "step": 106830 }, { "epoch": 0.2158235595938865, "grad_norm": 164.63987731933594, "learning_rate": 9.611477177264892e-06, "loss": 27.7975, "step": 106840 }, { "epoch": 0.21584376022657029, "grad_norm": 535.705810546875, "learning_rate": 9.611342257097632e-06, "loss": 41.7002, "step": 106850 }, { "epoch": 0.2158639608592541, "grad_norm": 590.9491577148438, "learning_rate": 9.61120731445525e-06, "loss": 17.6121, "step": 106860 }, { "epoch": 0.21588416149193793, "grad_norm": 6.254391670227051, "learning_rate": 9.611072349338402e-06, "loss": 22.8545, "step": 106870 }, { "epoch": 0.21590436212462175, "grad_norm": 286.39208984375, "learning_rate": 9.610937361747747e-06, "loss": 14.3659, "step": 106880 }, { "epoch": 0.21592456275730557, "grad_norm": 967.7152099609375, "learning_rate": 9.610802351683943e-06, "loss": 34.7506, "step": 106890 }, { "epoch": 0.21594476338998939, "grad_norm": 17.1362247467041, "learning_rate": 9.610667319147648e-06, "loss": 15.6267, "step": 106900 }, { "epoch": 0.21596496402267318, "grad_norm": 359.0757751464844, "learning_rate": 9.610532264139518e-06, "loss": 21.3726, "step": 106910 }, { "epoch": 0.215985164655357, "grad_norm": 388.51324462890625, "learning_rate": 9.610397186660214e-06, "loss": 16.1625, "step": 106920 }, { "epoch": 0.21600536528804082, "grad_norm": 455.5050048828125, "learning_rate": 9.610262086710393e-06, "loss": 15.9479, "step": 106930 }, { "epoch": 0.21602556592072464, "grad_norm": 376.7058410644531, "learning_rate": 9.610126964290714e-06, "loss": 16.8989, "step": 106940 }, { "epoch": 0.21604576655340846, "grad_norm": 345.0117492675781, "learning_rate": 9.609991819401836e-06, "loss": 31.7803, "step": 106950 }, { "epoch": 0.21606596718609228, "grad_norm": 252.3141632080078, "learning_rate": 9.609856652044417e-06, "loss": 19.7048, "step": 106960 }, { "epoch": 0.21608616781877607, "grad_norm": 678.9373779296875, "learning_rate": 9.609721462219115e-06, "loss": 26.4886, "step": 106970 }, { "epoch": 0.2161063684514599, "grad_norm": 409.2751159667969, "learning_rate": 9.609586249926589e-06, "loss": 30.0331, "step": 106980 }, { "epoch": 0.2161265690841437, "grad_norm": 473.8980407714844, "learning_rate": 9.609451015167498e-06, "loss": 16.8544, "step": 106990 }, { "epoch": 0.21614676971682753, "grad_norm": 390.9248046875, "learning_rate": 9.609315757942504e-06, "loss": 27.1546, "step": 107000 }, { "epoch": 0.21616697034951135, "grad_norm": 29.839492797851562, "learning_rate": 9.609180478252262e-06, "loss": 25.4001, "step": 107010 }, { "epoch": 0.21618717098219517, "grad_norm": 298.6227111816406, "learning_rate": 9.609045176097435e-06, "loss": 25.3501, "step": 107020 }, { "epoch": 0.216207371614879, "grad_norm": 784.2269287109375, "learning_rate": 9.608909851478681e-06, "loss": 17.4117, "step": 107030 }, { "epoch": 0.21622757224756278, "grad_norm": 347.4877014160156, "learning_rate": 9.608774504396659e-06, "loss": 17.0232, "step": 107040 }, { "epoch": 0.2162477728802466, "grad_norm": 592.7592163085938, "learning_rate": 9.608639134852028e-06, "loss": 35.3994, "step": 107050 }, { "epoch": 0.21626797351293042, "grad_norm": 275.7370300292969, "learning_rate": 9.60850374284545e-06, "loss": 29.4426, "step": 107060 }, { "epoch": 0.21628817414561424, "grad_norm": 101.7520751953125, "learning_rate": 9.608368328377582e-06, "loss": 24.4883, "step": 107070 }, { "epoch": 0.21630837477829806, "grad_norm": 369.6875, "learning_rate": 9.608232891449088e-06, "loss": 22.2167, "step": 107080 }, { "epoch": 0.21632857541098188, "grad_norm": 453.6797180175781, "learning_rate": 9.608097432060626e-06, "loss": 28.343, "step": 107090 }, { "epoch": 0.21634877604366567, "grad_norm": 128.98529052734375, "learning_rate": 9.607961950212855e-06, "loss": 31.0087, "step": 107100 }, { "epoch": 0.2163689766763495, "grad_norm": 189.78399658203125, "learning_rate": 9.607826445906434e-06, "loss": 28.4285, "step": 107110 }, { "epoch": 0.21638917730903331, "grad_norm": 274.7174072265625, "learning_rate": 9.607690919142028e-06, "loss": 24.8506, "step": 107120 }, { "epoch": 0.21640937794171713, "grad_norm": 725.8607788085938, "learning_rate": 9.607555369920296e-06, "loss": 18.3541, "step": 107130 }, { "epoch": 0.21642957857440095, "grad_norm": 507.2761535644531, "learning_rate": 9.607419798241897e-06, "loss": 22.0484, "step": 107140 }, { "epoch": 0.21644977920708477, "grad_norm": 570.9094848632812, "learning_rate": 9.607284204107493e-06, "loss": 20.056, "step": 107150 }, { "epoch": 0.2164699798397686, "grad_norm": 208.45387268066406, "learning_rate": 9.607148587517746e-06, "loss": 18.9086, "step": 107160 }, { "epoch": 0.2164901804724524, "grad_norm": 378.5279846191406, "learning_rate": 9.607012948473314e-06, "loss": 28.591, "step": 107170 }, { "epoch": 0.2165103811051362, "grad_norm": 308.3681945800781, "learning_rate": 9.606877286974859e-06, "loss": 28.9336, "step": 107180 }, { "epoch": 0.21653058173782003, "grad_norm": 543.411865234375, "learning_rate": 9.606741603023043e-06, "loss": 25.0842, "step": 107190 }, { "epoch": 0.21655078237050385, "grad_norm": 321.71380615234375, "learning_rate": 9.606605896618528e-06, "loss": 19.3353, "step": 107200 }, { "epoch": 0.21657098300318767, "grad_norm": 296.8135070800781, "learning_rate": 9.606470167761975e-06, "loss": 29.9971, "step": 107210 }, { "epoch": 0.2165911836358715, "grad_norm": 807.8027954101562, "learning_rate": 9.606334416454045e-06, "loss": 21.1007, "step": 107220 }, { "epoch": 0.21661138426855528, "grad_norm": 192.43112182617188, "learning_rate": 9.606198642695398e-06, "loss": 17.6655, "step": 107230 }, { "epoch": 0.2166315849012391, "grad_norm": 502.1242370605469, "learning_rate": 9.606062846486698e-06, "loss": 26.5737, "step": 107240 }, { "epoch": 0.21665178553392292, "grad_norm": 261.8995666503906, "learning_rate": 9.605927027828608e-06, "loss": 29.288, "step": 107250 }, { "epoch": 0.21667198616660674, "grad_norm": 581.6419067382812, "learning_rate": 9.605791186721787e-06, "loss": 55.1319, "step": 107260 }, { "epoch": 0.21669218679929056, "grad_norm": 714.2874145507812, "learning_rate": 9.605655323166899e-06, "loss": 22.0499, "step": 107270 }, { "epoch": 0.21671238743197438, "grad_norm": 754.3344116210938, "learning_rate": 9.605519437164604e-06, "loss": 18.609, "step": 107280 }, { "epoch": 0.21673258806465817, "grad_norm": 246.62757873535156, "learning_rate": 9.605383528715567e-06, "loss": 25.0839, "step": 107290 }, { "epoch": 0.216752788697342, "grad_norm": 207.9958953857422, "learning_rate": 9.605247597820448e-06, "loss": 16.158, "step": 107300 }, { "epoch": 0.2167729893300258, "grad_norm": 906.9510498046875, "learning_rate": 9.605111644479913e-06, "loss": 24.2108, "step": 107310 }, { "epoch": 0.21679318996270963, "grad_norm": 886.7114868164062, "learning_rate": 9.604975668694621e-06, "loss": 20.522, "step": 107320 }, { "epoch": 0.21681339059539345, "grad_norm": 625.944091796875, "learning_rate": 9.604839670465236e-06, "loss": 27.3642, "step": 107330 }, { "epoch": 0.21683359122807727, "grad_norm": 304.8418884277344, "learning_rate": 9.604703649792421e-06, "loss": 23.6104, "step": 107340 }, { "epoch": 0.2168537918607611, "grad_norm": 721.0968017578125, "learning_rate": 9.60456760667684e-06, "loss": 19.9008, "step": 107350 }, { "epoch": 0.21687399249344488, "grad_norm": 625.4487915039062, "learning_rate": 9.604431541119155e-06, "loss": 29.7023, "step": 107360 }, { "epoch": 0.2168941931261287, "grad_norm": 378.6073913574219, "learning_rate": 9.604295453120028e-06, "loss": 18.9232, "step": 107370 }, { "epoch": 0.21691439375881252, "grad_norm": 556.75439453125, "learning_rate": 9.604159342680125e-06, "loss": 18.6585, "step": 107380 }, { "epoch": 0.21693459439149634, "grad_norm": 1016.4850463867188, "learning_rate": 9.604023209800106e-06, "loss": 50.3389, "step": 107390 }, { "epoch": 0.21695479502418016, "grad_norm": 438.6138916015625, "learning_rate": 9.603887054480636e-06, "loss": 14.1566, "step": 107400 }, { "epoch": 0.21697499565686398, "grad_norm": 349.15924072265625, "learning_rate": 9.60375087672238e-06, "loss": 37.0312, "step": 107410 }, { "epoch": 0.21699519628954778, "grad_norm": 451.6742248535156, "learning_rate": 9.603614676526e-06, "loss": 17.6007, "step": 107420 }, { "epoch": 0.2170153969222316, "grad_norm": 675.8563842773438, "learning_rate": 9.603478453892161e-06, "loss": 51.1016, "step": 107430 }, { "epoch": 0.21703559755491542, "grad_norm": 371.1186828613281, "learning_rate": 9.603342208821526e-06, "loss": 21.7236, "step": 107440 }, { "epoch": 0.21705579818759924, "grad_norm": 523.7366943359375, "learning_rate": 9.60320594131476e-06, "loss": 32.1254, "step": 107450 }, { "epoch": 0.21707599882028306, "grad_norm": 411.5473327636719, "learning_rate": 9.603069651372526e-06, "loss": 25.7108, "step": 107460 }, { "epoch": 0.21709619945296688, "grad_norm": 419.90386962890625, "learning_rate": 9.602933338995489e-06, "loss": 31.5363, "step": 107470 }, { "epoch": 0.2171164000856507, "grad_norm": 211.2162628173828, "learning_rate": 9.602797004184311e-06, "loss": 17.8802, "step": 107480 }, { "epoch": 0.2171366007183345, "grad_norm": 178.4124755859375, "learning_rate": 9.602660646939663e-06, "loss": 20.6594, "step": 107490 }, { "epoch": 0.2171568013510183, "grad_norm": 436.7868347167969, "learning_rate": 9.602524267262202e-06, "loss": 17.0319, "step": 107500 }, { "epoch": 0.21717700198370213, "grad_norm": 613.3228149414062, "learning_rate": 9.602387865152597e-06, "loss": 19.3349, "step": 107510 }, { "epoch": 0.21719720261638595, "grad_norm": 271.5887145996094, "learning_rate": 9.602251440611512e-06, "loss": 14.5195, "step": 107520 }, { "epoch": 0.21721740324906977, "grad_norm": 226.7190399169922, "learning_rate": 9.602114993639611e-06, "loss": 30.9531, "step": 107530 }, { "epoch": 0.2172376038817536, "grad_norm": 1274.6104736328125, "learning_rate": 9.60197852423756e-06, "loss": 24.8917, "step": 107540 }, { "epoch": 0.21725780451443738, "grad_norm": 257.9808349609375, "learning_rate": 9.601842032406024e-06, "loss": 14.5826, "step": 107550 }, { "epoch": 0.2172780051471212, "grad_norm": 138.10923767089844, "learning_rate": 9.601705518145668e-06, "loss": 18.0225, "step": 107560 }, { "epoch": 0.21729820577980502, "grad_norm": 652.5715942382812, "learning_rate": 9.601568981457156e-06, "loss": 20.4635, "step": 107570 }, { "epoch": 0.21731840641248884, "grad_norm": 677.6094970703125, "learning_rate": 9.601432422341156e-06, "loss": 38.3076, "step": 107580 }, { "epoch": 0.21733860704517266, "grad_norm": 172.53482055664062, "learning_rate": 9.601295840798333e-06, "loss": 27.0276, "step": 107590 }, { "epoch": 0.21735880767785648, "grad_norm": 790.575439453125, "learning_rate": 9.601159236829353e-06, "loss": 35.0712, "step": 107600 }, { "epoch": 0.21737900831054027, "grad_norm": 952.8884887695312, "learning_rate": 9.60102261043488e-06, "loss": 32.1461, "step": 107610 }, { "epoch": 0.2173992089432241, "grad_norm": 219.7374725341797, "learning_rate": 9.60088596161558e-06, "loss": 29.0019, "step": 107620 }, { "epoch": 0.2174194095759079, "grad_norm": 458.9838562011719, "learning_rate": 9.60074929037212e-06, "loss": 23.6285, "step": 107630 }, { "epoch": 0.21743961020859173, "grad_norm": 432.98138427734375, "learning_rate": 9.600612596705167e-06, "loss": 21.9855, "step": 107640 }, { "epoch": 0.21745981084127555, "grad_norm": 760.4342651367188, "learning_rate": 9.600475880615385e-06, "loss": 34.3365, "step": 107650 }, { "epoch": 0.21748001147395937, "grad_norm": 322.08953857421875, "learning_rate": 9.600339142103441e-06, "loss": 23.2254, "step": 107660 }, { "epoch": 0.2175002121066432, "grad_norm": 737.721923828125, "learning_rate": 9.600202381170004e-06, "loss": 53.575, "step": 107670 }, { "epoch": 0.21752041273932698, "grad_norm": 923.2410888671875, "learning_rate": 9.600065597815738e-06, "loss": 50.5392, "step": 107680 }, { "epoch": 0.2175406133720108, "grad_norm": 271.1661376953125, "learning_rate": 9.59992879204131e-06, "loss": 37.8647, "step": 107690 }, { "epoch": 0.21756081400469462, "grad_norm": 338.381591796875, "learning_rate": 9.599791963847388e-06, "loss": 27.4579, "step": 107700 }, { "epoch": 0.21758101463737844, "grad_norm": 256.3115234375, "learning_rate": 9.599655113234635e-06, "loss": 13.811, "step": 107710 }, { "epoch": 0.21760121527006226, "grad_norm": 391.17803955078125, "learning_rate": 9.599518240203722e-06, "loss": 15.3174, "step": 107720 }, { "epoch": 0.21762141590274608, "grad_norm": 588.9943237304688, "learning_rate": 9.599381344755315e-06, "loss": 13.929, "step": 107730 }, { "epoch": 0.21764161653542988, "grad_norm": 257.79913330078125, "learning_rate": 9.599244426890082e-06, "loss": 9.7967, "step": 107740 }, { "epoch": 0.2176618171681137, "grad_norm": 558.75244140625, "learning_rate": 9.59910748660869e-06, "loss": 18.7975, "step": 107750 }, { "epoch": 0.21768201780079752, "grad_norm": 470.57305908203125, "learning_rate": 9.598970523911803e-06, "loss": 40.6686, "step": 107760 }, { "epoch": 0.21770221843348134, "grad_norm": 563.8414916992188, "learning_rate": 9.598833538800094e-06, "loss": 27.8138, "step": 107770 }, { "epoch": 0.21772241906616516, "grad_norm": 31.067607879638672, "learning_rate": 9.598696531274227e-06, "loss": 22.4565, "step": 107780 }, { "epoch": 0.21774261969884898, "grad_norm": 517.5081787109375, "learning_rate": 9.598559501334872e-06, "loss": 24.7159, "step": 107790 }, { "epoch": 0.2177628203315328, "grad_norm": 511.7974548339844, "learning_rate": 9.598422448982697e-06, "loss": 25.7823, "step": 107800 }, { "epoch": 0.2177830209642166, "grad_norm": 656.6521606445312, "learning_rate": 9.598285374218367e-06, "loss": 17.8043, "step": 107810 }, { "epoch": 0.2178032215969004, "grad_norm": 465.1502380371094, "learning_rate": 9.59814827704255e-06, "loss": 12.7316, "step": 107820 }, { "epoch": 0.21782342222958423, "grad_norm": 231.67138671875, "learning_rate": 9.598011157455917e-06, "loss": 15.0623, "step": 107830 }, { "epoch": 0.21784362286226805, "grad_norm": 346.3719787597656, "learning_rate": 9.597874015459136e-06, "loss": 25.4132, "step": 107840 }, { "epoch": 0.21786382349495187, "grad_norm": 277.23211669921875, "learning_rate": 9.597736851052874e-06, "loss": 24.0558, "step": 107850 }, { "epoch": 0.2178840241276357, "grad_norm": 830.926513671875, "learning_rate": 9.597599664237801e-06, "loss": 16.0927, "step": 107860 }, { "epoch": 0.21790422476031948, "grad_norm": 175.6227264404297, "learning_rate": 9.597462455014585e-06, "loss": 35.8062, "step": 107870 }, { "epoch": 0.2179244253930033, "grad_norm": 2479.54052734375, "learning_rate": 9.597325223383895e-06, "loss": 31.0808, "step": 107880 }, { "epoch": 0.21794462602568712, "grad_norm": 187.91407775878906, "learning_rate": 9.597187969346398e-06, "loss": 13.4602, "step": 107890 }, { "epoch": 0.21796482665837094, "grad_norm": 132.35665893554688, "learning_rate": 9.597050692902765e-06, "loss": 21.4644, "step": 107900 }, { "epoch": 0.21798502729105476, "grad_norm": 367.8532409667969, "learning_rate": 9.596913394053664e-06, "loss": 24.8335, "step": 107910 }, { "epoch": 0.21800522792373858, "grad_norm": 546.89306640625, "learning_rate": 9.596776072799767e-06, "loss": 19.5862, "step": 107920 }, { "epoch": 0.21802542855642237, "grad_norm": 348.8690185546875, "learning_rate": 9.596638729141739e-06, "loss": 10.1862, "step": 107930 }, { "epoch": 0.2180456291891062, "grad_norm": 241.12767028808594, "learning_rate": 9.59650136308025e-06, "loss": 17.195, "step": 107940 }, { "epoch": 0.21806582982179, "grad_norm": 635.4271240234375, "learning_rate": 9.596363974615973e-06, "loss": 25.7954, "step": 107950 }, { "epoch": 0.21808603045447383, "grad_norm": 255.62783813476562, "learning_rate": 9.596226563749575e-06, "loss": 28.5505, "step": 107960 }, { "epoch": 0.21810623108715765, "grad_norm": 165.38198852539062, "learning_rate": 9.596089130481727e-06, "loss": 13.5765, "step": 107970 }, { "epoch": 0.21812643171984147, "grad_norm": 506.2337951660156, "learning_rate": 9.595951674813096e-06, "loss": 24.8279, "step": 107980 }, { "epoch": 0.2181466323525253, "grad_norm": 225.37368774414062, "learning_rate": 9.595814196744355e-06, "loss": 12.9455, "step": 107990 }, { "epoch": 0.21816683298520909, "grad_norm": 201.5785675048828, "learning_rate": 9.595676696276173e-06, "loss": 21.0967, "step": 108000 }, { "epoch": 0.2181870336178929, "grad_norm": 1118.9906005859375, "learning_rate": 9.595539173409222e-06, "loss": 26.1802, "step": 108010 }, { "epoch": 0.21820723425057673, "grad_norm": 380.7827453613281, "learning_rate": 9.595401628144166e-06, "loss": 24.0537, "step": 108020 }, { "epoch": 0.21822743488326055, "grad_norm": 0.0, "learning_rate": 9.595264060481684e-06, "loss": 19.8214, "step": 108030 }, { "epoch": 0.21824763551594437, "grad_norm": 196.51121520996094, "learning_rate": 9.59512647042244e-06, "loss": 19.1049, "step": 108040 }, { "epoch": 0.21826783614862819, "grad_norm": 146.11227416992188, "learning_rate": 9.594988857967107e-06, "loss": 23.5608, "step": 108050 }, { "epoch": 0.21828803678131198, "grad_norm": 262.90264892578125, "learning_rate": 9.594851223116357e-06, "loss": 13.5468, "step": 108060 }, { "epoch": 0.2183082374139958, "grad_norm": 575.9906616210938, "learning_rate": 9.594713565870859e-06, "loss": 15.4239, "step": 108070 }, { "epoch": 0.21832843804667962, "grad_norm": 368.9980163574219, "learning_rate": 9.594575886231284e-06, "loss": 18.8937, "step": 108080 }, { "epoch": 0.21834863867936344, "grad_norm": 323.3814392089844, "learning_rate": 9.594438184198302e-06, "loss": 50.8597, "step": 108090 }, { "epoch": 0.21836883931204726, "grad_norm": 762.1126098632812, "learning_rate": 9.594300459772588e-06, "loss": 16.905, "step": 108100 }, { "epoch": 0.21838903994473108, "grad_norm": 782.7981567382812, "learning_rate": 9.59416271295481e-06, "loss": 22.6836, "step": 108110 }, { "epoch": 0.2184092405774149, "grad_norm": 413.7884521484375, "learning_rate": 9.594024943745638e-06, "loss": 34.0252, "step": 108120 }, { "epoch": 0.2184294412100987, "grad_norm": 502.3115539550781, "learning_rate": 9.59388715214575e-06, "loss": 27.0354, "step": 108130 }, { "epoch": 0.2184496418427825, "grad_norm": 6.615095138549805, "learning_rate": 9.593749338155809e-06, "loss": 12.11, "step": 108140 }, { "epoch": 0.21846984247546633, "grad_norm": 203.57705688476562, "learning_rate": 9.593611501776493e-06, "loss": 25.7498, "step": 108150 }, { "epoch": 0.21849004310815015, "grad_norm": 627.1359252929688, "learning_rate": 9.59347364300847e-06, "loss": 23.113, "step": 108160 }, { "epoch": 0.21851024374083397, "grad_norm": 1197.2982177734375, "learning_rate": 9.593335761852416e-06, "loss": 27.1871, "step": 108170 }, { "epoch": 0.2185304443735178, "grad_norm": 403.3395080566406, "learning_rate": 9.593197858309e-06, "loss": 30.3373, "step": 108180 }, { "epoch": 0.21855064500620158, "grad_norm": 379.2122802734375, "learning_rate": 9.593059932378894e-06, "loss": 27.3166, "step": 108190 }, { "epoch": 0.2185708456388854, "grad_norm": 180.17523193359375, "learning_rate": 9.592921984062771e-06, "loss": 20.1945, "step": 108200 }, { "epoch": 0.21859104627156922, "grad_norm": 225.83389282226562, "learning_rate": 9.592784013361303e-06, "loss": 33.6536, "step": 108210 }, { "epoch": 0.21861124690425304, "grad_norm": 355.9768371582031, "learning_rate": 9.592646020275165e-06, "loss": 29.3099, "step": 108220 }, { "epoch": 0.21863144753693686, "grad_norm": 580.0260009765625, "learning_rate": 9.592508004805023e-06, "loss": 28.8732, "step": 108230 }, { "epoch": 0.21865164816962068, "grad_norm": 102.36605834960938, "learning_rate": 9.592369966951557e-06, "loss": 61.1071, "step": 108240 }, { "epoch": 0.21867184880230447, "grad_norm": 735.1100463867188, "learning_rate": 9.592231906715437e-06, "loss": 31.9127, "step": 108250 }, { "epoch": 0.2186920494349883, "grad_norm": 1059.560546875, "learning_rate": 9.592093824097335e-06, "loss": 37.0552, "step": 108260 }, { "epoch": 0.21871225006767211, "grad_norm": 342.75823974609375, "learning_rate": 9.591955719097924e-06, "loss": 29.1568, "step": 108270 }, { "epoch": 0.21873245070035593, "grad_norm": 442.01934814453125, "learning_rate": 9.591817591717878e-06, "loss": 20.9597, "step": 108280 }, { "epoch": 0.21875265133303975, "grad_norm": 566.12548828125, "learning_rate": 9.591679441957871e-06, "loss": 25.3353, "step": 108290 }, { "epoch": 0.21877285196572357, "grad_norm": 371.56494140625, "learning_rate": 9.591541269818574e-06, "loss": 22.3805, "step": 108300 }, { "epoch": 0.2187930525984074, "grad_norm": 259.03778076171875, "learning_rate": 9.591403075300662e-06, "loss": 20.911, "step": 108310 }, { "epoch": 0.2188132532310912, "grad_norm": 588.49462890625, "learning_rate": 9.591264858404809e-06, "loss": 26.2738, "step": 108320 }, { "epoch": 0.218833453863775, "grad_norm": 344.5282897949219, "learning_rate": 9.591126619131687e-06, "loss": 21.055, "step": 108330 }, { "epoch": 0.21885365449645883, "grad_norm": 162.2899627685547, "learning_rate": 9.590988357481972e-06, "loss": 8.9625, "step": 108340 }, { "epoch": 0.21887385512914265, "grad_norm": 599.2951049804688, "learning_rate": 9.590850073456335e-06, "loss": 29.8846, "step": 108350 }, { "epoch": 0.21889405576182647, "grad_norm": 317.268798828125, "learning_rate": 9.590711767055454e-06, "loss": 17.3586, "step": 108360 }, { "epoch": 0.2189142563945103, "grad_norm": 329.30364990234375, "learning_rate": 9.590573438279997e-06, "loss": 19.8385, "step": 108370 }, { "epoch": 0.21893445702719408, "grad_norm": 664.9000854492188, "learning_rate": 9.590435087130645e-06, "loss": 36.2991, "step": 108380 }, { "epoch": 0.2189546576598779, "grad_norm": 495.962890625, "learning_rate": 9.590296713608068e-06, "loss": 16.7955, "step": 108390 }, { "epoch": 0.21897485829256172, "grad_norm": 474.8290100097656, "learning_rate": 9.590158317712941e-06, "loss": 20.0439, "step": 108400 }, { "epoch": 0.21899505892524554, "grad_norm": 149.83889770507812, "learning_rate": 9.59001989944594e-06, "loss": 22.0663, "step": 108410 }, { "epoch": 0.21901525955792936, "grad_norm": 467.04229736328125, "learning_rate": 9.589881458807738e-06, "loss": 26.8434, "step": 108420 }, { "epoch": 0.21903546019061318, "grad_norm": 445.6435241699219, "learning_rate": 9.589742995799011e-06, "loss": 25.06, "step": 108430 }, { "epoch": 0.219055660823297, "grad_norm": 739.6090698242188, "learning_rate": 9.589604510420434e-06, "loss": 22.3647, "step": 108440 }, { "epoch": 0.2190758614559808, "grad_norm": 485.4827575683594, "learning_rate": 9.58946600267268e-06, "loss": 15.544, "step": 108450 }, { "epoch": 0.2190960620886646, "grad_norm": 1479.1304931640625, "learning_rate": 9.589327472556424e-06, "loss": 47.4186, "step": 108460 }, { "epoch": 0.21911626272134843, "grad_norm": 283.0572204589844, "learning_rate": 9.589188920072346e-06, "loss": 18.6419, "step": 108470 }, { "epoch": 0.21913646335403225, "grad_norm": 187.07830810546875, "learning_rate": 9.589050345221116e-06, "loss": 19.724, "step": 108480 }, { "epoch": 0.21915666398671607, "grad_norm": 414.17437744140625, "learning_rate": 9.58891174800341e-06, "loss": 21.7915, "step": 108490 }, { "epoch": 0.2191768646193999, "grad_norm": 627.4232177734375, "learning_rate": 9.588773128419907e-06, "loss": 30.9095, "step": 108500 }, { "epoch": 0.21919706525208368, "grad_norm": 338.1419372558594, "learning_rate": 9.588634486471279e-06, "loss": 17.5366, "step": 108510 }, { "epoch": 0.2192172658847675, "grad_norm": 997.3385009765625, "learning_rate": 9.588495822158203e-06, "loss": 29.2473, "step": 108520 }, { "epoch": 0.21923746651745132, "grad_norm": 406.22979736328125, "learning_rate": 9.588357135481355e-06, "loss": 26.403, "step": 108530 }, { "epoch": 0.21925766715013514, "grad_norm": 400.2430419921875, "learning_rate": 9.58821842644141e-06, "loss": 21.4057, "step": 108540 }, { "epoch": 0.21927786778281896, "grad_norm": 607.7499389648438, "learning_rate": 9.588079695039048e-06, "loss": 36.6994, "step": 108550 }, { "epoch": 0.21929806841550278, "grad_norm": 506.59906005859375, "learning_rate": 9.587940941274939e-06, "loss": 25.7518, "step": 108560 }, { "epoch": 0.21931826904818658, "grad_norm": 186.6425323486328, "learning_rate": 9.587802165149763e-06, "loss": 21.7186, "step": 108570 }, { "epoch": 0.2193384696808704, "grad_norm": 369.2998046875, "learning_rate": 9.587663366664197e-06, "loss": 22.8296, "step": 108580 }, { "epoch": 0.21935867031355422, "grad_norm": 115.20941162109375, "learning_rate": 9.587524545818914e-06, "loss": 23.3687, "step": 108590 }, { "epoch": 0.21937887094623804, "grad_norm": 725.7109985351562, "learning_rate": 9.587385702614593e-06, "loss": 20.6851, "step": 108600 }, { "epoch": 0.21939907157892186, "grad_norm": 943.8353271484375, "learning_rate": 9.587246837051912e-06, "loss": 28.3232, "step": 108610 }, { "epoch": 0.21941927221160568, "grad_norm": 109.12712860107422, "learning_rate": 9.587107949131545e-06, "loss": 10.8042, "step": 108620 }, { "epoch": 0.2194394728442895, "grad_norm": 523.9343872070312, "learning_rate": 9.586969038854172e-06, "loss": 22.0542, "step": 108630 }, { "epoch": 0.2194596734769733, "grad_norm": 389.647705078125, "learning_rate": 9.586830106220466e-06, "loss": 8.5891, "step": 108640 }, { "epoch": 0.2194798741096571, "grad_norm": 480.90130615234375, "learning_rate": 9.586691151231109e-06, "loss": 14.0215, "step": 108650 }, { "epoch": 0.21950007474234093, "grad_norm": 517.3112182617188, "learning_rate": 9.586552173886773e-06, "loss": 27.6435, "step": 108660 }, { "epoch": 0.21952027537502475, "grad_norm": 735.51953125, "learning_rate": 9.586413174188139e-06, "loss": 17.1136, "step": 108670 }, { "epoch": 0.21954047600770857, "grad_norm": 401.6905822753906, "learning_rate": 9.586274152135883e-06, "loss": 23.0881, "step": 108680 }, { "epoch": 0.2195606766403924, "grad_norm": 68.62858581542969, "learning_rate": 9.586135107730684e-06, "loss": 29.8915, "step": 108690 }, { "epoch": 0.21958087727307618, "grad_norm": 491.77764892578125, "learning_rate": 9.585996040973218e-06, "loss": 17.5886, "step": 108700 }, { "epoch": 0.21960107790576, "grad_norm": 234.37025451660156, "learning_rate": 9.585856951864163e-06, "loss": 28.1159, "step": 108710 }, { "epoch": 0.21962127853844382, "grad_norm": 604.2092895507812, "learning_rate": 9.585717840404198e-06, "loss": 15.2642, "step": 108720 }, { "epoch": 0.21964147917112764, "grad_norm": 735.1883544921875, "learning_rate": 9.585578706594e-06, "loss": 39.425, "step": 108730 }, { "epoch": 0.21966167980381146, "grad_norm": 660.2123413085938, "learning_rate": 9.585439550434249e-06, "loss": 34.6851, "step": 108740 }, { "epoch": 0.21968188043649528, "grad_norm": 500.07293701171875, "learning_rate": 9.58530037192562e-06, "loss": 33.1483, "step": 108750 }, { "epoch": 0.2197020810691791, "grad_norm": 293.2021484375, "learning_rate": 9.585161171068796e-06, "loss": 23.5174, "step": 108760 }, { "epoch": 0.2197222817018629, "grad_norm": 100.20855712890625, "learning_rate": 9.58502194786445e-06, "loss": 13.2763, "step": 108770 }, { "epoch": 0.2197424823345467, "grad_norm": 335.36566162109375, "learning_rate": 9.584882702313263e-06, "loss": 14.1443, "step": 108780 }, { "epoch": 0.21976268296723053, "grad_norm": 293.823974609375, "learning_rate": 9.584743434415915e-06, "loss": 19.8771, "step": 108790 }, { "epoch": 0.21978288359991435, "grad_norm": 374.0918273925781, "learning_rate": 9.584604144173084e-06, "loss": 16.1451, "step": 108800 }, { "epoch": 0.21980308423259817, "grad_norm": 234.1194305419922, "learning_rate": 9.584464831585446e-06, "loss": 19.1597, "step": 108810 }, { "epoch": 0.219823284865282, "grad_norm": 288.78277587890625, "learning_rate": 9.584325496653683e-06, "loss": 44.7845, "step": 108820 }, { "epoch": 0.21984348549796578, "grad_norm": 258.80322265625, "learning_rate": 9.584186139378476e-06, "loss": 18.7545, "step": 108830 }, { "epoch": 0.2198636861306496, "grad_norm": 431.63507080078125, "learning_rate": 9.5840467597605e-06, "loss": 33.6674, "step": 108840 }, { "epoch": 0.21988388676333342, "grad_norm": 25.456167221069336, "learning_rate": 9.583907357800436e-06, "loss": 24.8607, "step": 108850 }, { "epoch": 0.21990408739601724, "grad_norm": 363.77581787109375, "learning_rate": 9.583767933498964e-06, "loss": 32.6472, "step": 108860 }, { "epoch": 0.21992428802870106, "grad_norm": 593.9925537109375, "learning_rate": 9.583628486856761e-06, "loss": 29.4518, "step": 108870 }, { "epoch": 0.21994448866138488, "grad_norm": 221.8987274169922, "learning_rate": 9.583489017874512e-06, "loss": 22.8747, "step": 108880 }, { "epoch": 0.21996468929406868, "grad_norm": 495.2400207519531, "learning_rate": 9.583349526552893e-06, "loss": 19.2236, "step": 108890 }, { "epoch": 0.2199848899267525, "grad_norm": 414.316162109375, "learning_rate": 9.583210012892582e-06, "loss": 19.9706, "step": 108900 }, { "epoch": 0.22000509055943632, "grad_norm": 664.9085083007812, "learning_rate": 9.583070476894263e-06, "loss": 24.9737, "step": 108910 }, { "epoch": 0.22002529119212014, "grad_norm": 620.5914916992188, "learning_rate": 9.582930918558613e-06, "loss": 22.0299, "step": 108920 }, { "epoch": 0.22004549182480396, "grad_norm": 344.242431640625, "learning_rate": 9.582791337886314e-06, "loss": 23.7912, "step": 108930 }, { "epoch": 0.22006569245748778, "grad_norm": 258.2727355957031, "learning_rate": 9.582651734878048e-06, "loss": 12.6917, "step": 108940 }, { "epoch": 0.2200858930901716, "grad_norm": 711.8133544921875, "learning_rate": 9.58251210953449e-06, "loss": 26.5246, "step": 108950 }, { "epoch": 0.2201060937228554, "grad_norm": 313.27752685546875, "learning_rate": 9.582372461856326e-06, "loss": 26.1567, "step": 108960 }, { "epoch": 0.2201262943555392, "grad_norm": 722.0053100585938, "learning_rate": 9.582232791844233e-06, "loss": 41.2081, "step": 108970 }, { "epoch": 0.22014649498822303, "grad_norm": 487.4915771484375, "learning_rate": 9.582093099498895e-06, "loss": 38.4402, "step": 108980 }, { "epoch": 0.22016669562090685, "grad_norm": 604.3970947265625, "learning_rate": 9.581953384820989e-06, "loss": 20.8286, "step": 108990 }, { "epoch": 0.22018689625359067, "grad_norm": 136.74668884277344, "learning_rate": 9.581813647811199e-06, "loss": 19.8767, "step": 109000 }, { "epoch": 0.2202070968862745, "grad_norm": 489.6549987792969, "learning_rate": 9.581673888470203e-06, "loss": 9.5, "step": 109010 }, { "epoch": 0.22022729751895828, "grad_norm": 349.5637512207031, "learning_rate": 9.581534106798687e-06, "loss": 11.5665, "step": 109020 }, { "epoch": 0.2202474981516421, "grad_norm": 837.957275390625, "learning_rate": 9.58139430279733e-06, "loss": 33.0956, "step": 109030 }, { "epoch": 0.22026769878432592, "grad_norm": 520.0826416015625, "learning_rate": 9.58125447646681e-06, "loss": 13.8629, "step": 109040 }, { "epoch": 0.22028789941700974, "grad_norm": 252.03729248046875, "learning_rate": 9.581114627807812e-06, "loss": 31.939, "step": 109050 }, { "epoch": 0.22030810004969356, "grad_norm": 1100.6593017578125, "learning_rate": 9.580974756821017e-06, "loss": 23.3874, "step": 109060 }, { "epoch": 0.22032830068237738, "grad_norm": 381.92694091796875, "learning_rate": 9.580834863507107e-06, "loss": 15.2103, "step": 109070 }, { "epoch": 0.2203485013150612, "grad_norm": 143.5346221923828, "learning_rate": 9.580694947866765e-06, "loss": 16.3813, "step": 109080 }, { "epoch": 0.220368701947745, "grad_norm": 438.9550476074219, "learning_rate": 9.58055500990067e-06, "loss": 32.5477, "step": 109090 }, { "epoch": 0.2203889025804288, "grad_norm": 772.0028686523438, "learning_rate": 9.580415049609503e-06, "loss": 22.2763, "step": 109100 }, { "epoch": 0.22040910321311263, "grad_norm": 436.929931640625, "learning_rate": 9.580275066993952e-06, "loss": 19.3724, "step": 109110 }, { "epoch": 0.22042930384579645, "grad_norm": 693.9835205078125, "learning_rate": 9.580135062054693e-06, "loss": 23.5535, "step": 109120 }, { "epoch": 0.22044950447848027, "grad_norm": 506.35821533203125, "learning_rate": 9.579995034792415e-06, "loss": 17.582, "step": 109130 }, { "epoch": 0.2204697051111641, "grad_norm": 585.5834350585938, "learning_rate": 9.579854985207794e-06, "loss": 21.895, "step": 109140 }, { "epoch": 0.22048990574384789, "grad_norm": 91.59484100341797, "learning_rate": 9.579714913301516e-06, "loss": 16.7803, "step": 109150 }, { "epoch": 0.2205101063765317, "grad_norm": 233.09005737304688, "learning_rate": 9.579574819074263e-06, "loss": 21.4606, "step": 109160 }, { "epoch": 0.22053030700921553, "grad_norm": 554.2740478515625, "learning_rate": 9.579434702526717e-06, "loss": 39.7076, "step": 109170 }, { "epoch": 0.22055050764189935, "grad_norm": 418.4535217285156, "learning_rate": 9.579294563659562e-06, "loss": 29.9358, "step": 109180 }, { "epoch": 0.22057070827458317, "grad_norm": 220.78564453125, "learning_rate": 9.579154402473482e-06, "loss": 44.5528, "step": 109190 }, { "epoch": 0.22059090890726699, "grad_norm": 6.106456279754639, "learning_rate": 9.579014218969158e-06, "loss": 27.3099, "step": 109200 }, { "epoch": 0.22061110953995078, "grad_norm": 783.509521484375, "learning_rate": 9.578874013147274e-06, "loss": 17.8169, "step": 109210 }, { "epoch": 0.2206313101726346, "grad_norm": 419.3775939941406, "learning_rate": 9.578733785008513e-06, "loss": 17.4024, "step": 109220 }, { "epoch": 0.22065151080531842, "grad_norm": 181.3259735107422, "learning_rate": 9.578593534553558e-06, "loss": 17.975, "step": 109230 }, { "epoch": 0.22067171143800224, "grad_norm": 487.99969482421875, "learning_rate": 9.578453261783096e-06, "loss": 30.2975, "step": 109240 }, { "epoch": 0.22069191207068606, "grad_norm": 329.6623229980469, "learning_rate": 9.578312966697807e-06, "loss": 16.3705, "step": 109250 }, { "epoch": 0.22071211270336988, "grad_norm": 591.29150390625, "learning_rate": 9.578172649298374e-06, "loss": 34.7328, "step": 109260 }, { "epoch": 0.2207323133360537, "grad_norm": 383.66900634765625, "learning_rate": 9.578032309585485e-06, "loss": 21.4675, "step": 109270 }, { "epoch": 0.2207525139687375, "grad_norm": 910.4567260742188, "learning_rate": 9.577891947559821e-06, "loss": 34.6472, "step": 109280 }, { "epoch": 0.2207727146014213, "grad_norm": 248.37745666503906, "learning_rate": 9.577751563222067e-06, "loss": 29.2419, "step": 109290 }, { "epoch": 0.22079291523410513, "grad_norm": 304.04144287109375, "learning_rate": 9.577611156572908e-06, "loss": 16.7587, "step": 109300 }, { "epoch": 0.22081311586678895, "grad_norm": 96.64410400390625, "learning_rate": 9.577470727613025e-06, "loss": 15.9196, "step": 109310 }, { "epoch": 0.22083331649947277, "grad_norm": 576.2413940429688, "learning_rate": 9.577330276343106e-06, "loss": 24.888, "step": 109320 }, { "epoch": 0.2208535171321566, "grad_norm": 207.7760009765625, "learning_rate": 9.577189802763836e-06, "loss": 25.2301, "step": 109330 }, { "epoch": 0.22087371776484038, "grad_norm": 104.967041015625, "learning_rate": 9.577049306875895e-06, "loss": 49.4909, "step": 109340 }, { "epoch": 0.2208939183975242, "grad_norm": 209.85760498046875, "learning_rate": 9.576908788679975e-06, "loss": 20.2544, "step": 109350 }, { "epoch": 0.22091411903020802, "grad_norm": 465.70330810546875, "learning_rate": 9.576768248176752e-06, "loss": 20.1185, "step": 109360 }, { "epoch": 0.22093431966289184, "grad_norm": 786.9861450195312, "learning_rate": 9.576627685366919e-06, "loss": 17.1637, "step": 109370 }, { "epoch": 0.22095452029557566, "grad_norm": 336.6026306152344, "learning_rate": 9.576487100251157e-06, "loss": 21.1663, "step": 109380 }, { "epoch": 0.22097472092825948, "grad_norm": 354.6676330566406, "learning_rate": 9.576346492830151e-06, "loss": 19.0305, "step": 109390 }, { "epoch": 0.22099492156094327, "grad_norm": 1076.187744140625, "learning_rate": 9.576205863104588e-06, "loss": 21.228, "step": 109400 }, { "epoch": 0.2210151221936271, "grad_norm": 614.33203125, "learning_rate": 9.576065211075153e-06, "loss": 30.4912, "step": 109410 }, { "epoch": 0.22103532282631091, "grad_norm": 833.4633178710938, "learning_rate": 9.575924536742532e-06, "loss": 22.7512, "step": 109420 }, { "epoch": 0.22105552345899473, "grad_norm": 281.7948303222656, "learning_rate": 9.575783840107407e-06, "loss": 30.2963, "step": 109430 }, { "epoch": 0.22107572409167855, "grad_norm": 354.9866943359375, "learning_rate": 9.575643121170468e-06, "loss": 16.2715, "step": 109440 }, { "epoch": 0.22109592472436237, "grad_norm": 432.01953125, "learning_rate": 9.5755023799324e-06, "loss": 33.8724, "step": 109450 }, { "epoch": 0.2211161253570462, "grad_norm": 1179.9603271484375, "learning_rate": 9.575361616393888e-06, "loss": 27.342, "step": 109460 }, { "epoch": 0.22113632598973, "grad_norm": 109.8602523803711, "learning_rate": 9.575220830555618e-06, "loss": 20.4152, "step": 109470 }, { "epoch": 0.2211565266224138, "grad_norm": 637.301025390625, "learning_rate": 9.575080022418276e-06, "loss": 15.0124, "step": 109480 }, { "epoch": 0.22117672725509763, "grad_norm": 335.2337341308594, "learning_rate": 9.574939191982549e-06, "loss": 17.1853, "step": 109490 }, { "epoch": 0.22119692788778145, "grad_norm": 272.91522216796875, "learning_rate": 9.574798339249124e-06, "loss": 24.9664, "step": 109500 }, { "epoch": 0.22121712852046527, "grad_norm": 354.1695556640625, "learning_rate": 9.574657464218688e-06, "loss": 42.1583, "step": 109510 }, { "epoch": 0.2212373291531491, "grad_norm": 758.5416259765625, "learning_rate": 9.574516566891925e-06, "loss": 29.1923, "step": 109520 }, { "epoch": 0.22125752978583288, "grad_norm": 67.76472473144531, "learning_rate": 9.574375647269523e-06, "loss": 34.9052, "step": 109530 }, { "epoch": 0.2212777304185167, "grad_norm": 564.2835693359375, "learning_rate": 9.574234705352167e-06, "loss": 16.1783, "step": 109540 }, { "epoch": 0.22129793105120052, "grad_norm": 139.2586669921875, "learning_rate": 9.574093741140549e-06, "loss": 18.3567, "step": 109550 }, { "epoch": 0.22131813168388434, "grad_norm": 230.43524169921875, "learning_rate": 9.573952754635351e-06, "loss": 15.0595, "step": 109560 }, { "epoch": 0.22133833231656816, "grad_norm": 1182.6629638671875, "learning_rate": 9.573811745837262e-06, "loss": 34.0825, "step": 109570 }, { "epoch": 0.22135853294925198, "grad_norm": 317.4469909667969, "learning_rate": 9.573670714746973e-06, "loss": 16.955, "step": 109580 }, { "epoch": 0.2213787335819358, "grad_norm": 520.1302490234375, "learning_rate": 9.573529661365163e-06, "loss": 26.3998, "step": 109590 }, { "epoch": 0.2213989342146196, "grad_norm": 367.7105407714844, "learning_rate": 9.573388585692525e-06, "loss": 15.3604, "step": 109600 }, { "epoch": 0.2214191348473034, "grad_norm": 625.9605102539062, "learning_rate": 9.573247487729747e-06, "loss": 23.8124, "step": 109610 }, { "epoch": 0.22143933547998723, "grad_norm": 314.0114440917969, "learning_rate": 9.573106367477515e-06, "loss": 21.104, "step": 109620 }, { "epoch": 0.22145953611267105, "grad_norm": 25.299861907958984, "learning_rate": 9.572965224936517e-06, "loss": 24.232, "step": 109630 }, { "epoch": 0.22147973674535487, "grad_norm": 476.4053649902344, "learning_rate": 9.572824060107442e-06, "loss": 16.1803, "step": 109640 }, { "epoch": 0.2214999373780387, "grad_norm": 353.9746398925781, "learning_rate": 9.572682872990975e-06, "loss": 28.4468, "step": 109650 }, { "epoch": 0.22152013801072248, "grad_norm": 579.5936279296875, "learning_rate": 9.572541663587809e-06, "loss": 14.2607, "step": 109660 }, { "epoch": 0.2215403386434063, "grad_norm": 81.84984588623047, "learning_rate": 9.572400431898627e-06, "loss": 28.0746, "step": 109670 }, { "epoch": 0.22156053927609012, "grad_norm": 371.1416931152344, "learning_rate": 9.572259177924122e-06, "loss": 27.2328, "step": 109680 }, { "epoch": 0.22158073990877394, "grad_norm": 247.1607208251953, "learning_rate": 9.572117901664979e-06, "loss": 25.4636, "step": 109690 }, { "epoch": 0.22160094054145776, "grad_norm": 362.3720397949219, "learning_rate": 9.571976603121889e-06, "loss": 38.2739, "step": 109700 }, { "epoch": 0.22162114117414158, "grad_norm": 306.3987731933594, "learning_rate": 9.571835282295539e-06, "loss": 23.3747, "step": 109710 }, { "epoch": 0.22164134180682538, "grad_norm": 220.51995849609375, "learning_rate": 9.571693939186618e-06, "loss": 15.8963, "step": 109720 }, { "epoch": 0.2216615424395092, "grad_norm": 876.3275756835938, "learning_rate": 9.571552573795814e-06, "loss": 20.8997, "step": 109730 }, { "epoch": 0.22168174307219302, "grad_norm": 891.5435791015625, "learning_rate": 9.571411186123818e-06, "loss": 27.2309, "step": 109740 }, { "epoch": 0.22170194370487684, "grad_norm": 642.4542236328125, "learning_rate": 9.571269776171319e-06, "loss": 29.3748, "step": 109750 }, { "epoch": 0.22172214433756066, "grad_norm": 42.82487869262695, "learning_rate": 9.571128343939006e-06, "loss": 11.178, "step": 109760 }, { "epoch": 0.22174234497024448, "grad_norm": 449.6181945800781, "learning_rate": 9.570986889427567e-06, "loss": 23.855, "step": 109770 }, { "epoch": 0.2217625456029283, "grad_norm": 321.8900146484375, "learning_rate": 9.570845412637692e-06, "loss": 36.1449, "step": 109780 }, { "epoch": 0.2217827462356121, "grad_norm": 322.9187927246094, "learning_rate": 9.570703913570072e-06, "loss": 22.747, "step": 109790 }, { "epoch": 0.2218029468682959, "grad_norm": 166.61514282226562, "learning_rate": 9.570562392225395e-06, "loss": 7.5422, "step": 109800 }, { "epoch": 0.22182314750097973, "grad_norm": 532.9989013671875, "learning_rate": 9.570420848604351e-06, "loss": 18.9286, "step": 109810 }, { "epoch": 0.22184334813366355, "grad_norm": 379.5887756347656, "learning_rate": 9.57027928270763e-06, "loss": 21.7228, "step": 109820 }, { "epoch": 0.22186354876634737, "grad_norm": 145.89117431640625, "learning_rate": 9.570137694535922e-06, "loss": 29.015, "step": 109830 }, { "epoch": 0.2218837493990312, "grad_norm": 511.4658508300781, "learning_rate": 9.569996084089918e-06, "loss": 17.8443, "step": 109840 }, { "epoch": 0.22190395003171498, "grad_norm": 989.8679809570312, "learning_rate": 9.569854451370306e-06, "loss": 39.1116, "step": 109850 }, { "epoch": 0.2219241506643988, "grad_norm": 792.4555053710938, "learning_rate": 9.569712796377779e-06, "loss": 35.5257, "step": 109860 }, { "epoch": 0.22194435129708262, "grad_norm": 370.8437194824219, "learning_rate": 9.569571119113025e-06, "loss": 27.1154, "step": 109870 }, { "epoch": 0.22196455192976644, "grad_norm": 341.374755859375, "learning_rate": 9.569429419576737e-06, "loss": 33.4845, "step": 109880 }, { "epoch": 0.22198475256245026, "grad_norm": 71.42806243896484, "learning_rate": 9.569287697769602e-06, "loss": 14.3483, "step": 109890 }, { "epoch": 0.22200495319513408, "grad_norm": 348.98779296875, "learning_rate": 9.569145953692316e-06, "loss": 19.8431, "step": 109900 }, { "epoch": 0.2220251538278179, "grad_norm": 178.1228485107422, "learning_rate": 9.569004187345565e-06, "loss": 25.9737, "step": 109910 }, { "epoch": 0.2220453544605017, "grad_norm": 129.86044311523438, "learning_rate": 9.568862398730042e-06, "loss": 24.8706, "step": 109920 }, { "epoch": 0.2220655550931855, "grad_norm": 377.76983642578125, "learning_rate": 9.568720587846437e-06, "loss": 17.8867, "step": 109930 }, { "epoch": 0.22208575572586933, "grad_norm": 641.7771606445312, "learning_rate": 9.568578754695443e-06, "loss": 34.9395, "step": 109940 }, { "epoch": 0.22210595635855315, "grad_norm": 422.3647766113281, "learning_rate": 9.56843689927775e-06, "loss": 24.1158, "step": 109950 }, { "epoch": 0.22212615699123697, "grad_norm": 449.8459167480469, "learning_rate": 9.568295021594049e-06, "loss": 30.3078, "step": 109960 }, { "epoch": 0.2221463576239208, "grad_norm": 323.5790100097656, "learning_rate": 9.568153121645033e-06, "loss": 14.0972, "step": 109970 }, { "epoch": 0.22216655825660458, "grad_norm": 792.2882080078125, "learning_rate": 9.568011199431392e-06, "loss": 16.4563, "step": 109980 }, { "epoch": 0.2221867588892884, "grad_norm": 425.24822998046875, "learning_rate": 9.56786925495382e-06, "loss": 45.4373, "step": 109990 }, { "epoch": 0.22220695952197222, "grad_norm": 1241.8756103515625, "learning_rate": 9.567727288213005e-06, "loss": 38.42, "step": 110000 }, { "epoch": 0.22222716015465604, "grad_norm": 217.55137634277344, "learning_rate": 9.567585299209642e-06, "loss": 15.3444, "step": 110010 }, { "epoch": 0.22224736078733986, "grad_norm": 413.5858154296875, "learning_rate": 9.567443287944422e-06, "loss": 23.6093, "step": 110020 }, { "epoch": 0.22226756142002368, "grad_norm": 497.9522705078125, "learning_rate": 9.567301254418038e-06, "loss": 24.472, "step": 110030 }, { "epoch": 0.22228776205270748, "grad_norm": 576.4864501953125, "learning_rate": 9.567159198631181e-06, "loss": 27.0471, "step": 110040 }, { "epoch": 0.2223079626853913, "grad_norm": 466.5008850097656, "learning_rate": 9.567017120584545e-06, "loss": 35.0435, "step": 110050 }, { "epoch": 0.22232816331807512, "grad_norm": 166.7375946044922, "learning_rate": 9.566875020278822e-06, "loss": 33.9176, "step": 110060 }, { "epoch": 0.22234836395075894, "grad_norm": 338.6051025390625, "learning_rate": 9.566732897714703e-06, "loss": 25.5896, "step": 110070 }, { "epoch": 0.22236856458344276, "grad_norm": 231.14297485351562, "learning_rate": 9.56659075289288e-06, "loss": 21.4084, "step": 110080 }, { "epoch": 0.22238876521612658, "grad_norm": 303.2173767089844, "learning_rate": 9.566448585814051e-06, "loss": 21.725, "step": 110090 }, { "epoch": 0.2224089658488104, "grad_norm": 16.235166549682617, "learning_rate": 9.566306396478904e-06, "loss": 18.9133, "step": 110100 }, { "epoch": 0.2224291664814942, "grad_norm": 314.7041015625, "learning_rate": 9.566164184888134e-06, "loss": 25.7187, "step": 110110 }, { "epoch": 0.222449367114178, "grad_norm": 408.3448181152344, "learning_rate": 9.566021951042432e-06, "loss": 21.4768, "step": 110120 }, { "epoch": 0.22246956774686183, "grad_norm": 354.4145202636719, "learning_rate": 9.565879694942493e-06, "loss": 18.8502, "step": 110130 }, { "epoch": 0.22248976837954565, "grad_norm": 399.0462951660156, "learning_rate": 9.565737416589011e-06, "loss": 11.6276, "step": 110140 }, { "epoch": 0.22250996901222947, "grad_norm": 413.0665588378906, "learning_rate": 9.565595115982678e-06, "loss": 18.873, "step": 110150 }, { "epoch": 0.2225301696449133, "grad_norm": 591.3002319335938, "learning_rate": 9.56545279312419e-06, "loss": 22.1303, "step": 110160 }, { "epoch": 0.22255037027759708, "grad_norm": 359.10064697265625, "learning_rate": 9.565310448014236e-06, "loss": 24.5434, "step": 110170 }, { "epoch": 0.2225705709102809, "grad_norm": 223.58663940429688, "learning_rate": 9.565168080653514e-06, "loss": 22.9331, "step": 110180 }, { "epoch": 0.22259077154296472, "grad_norm": 206.38140869140625, "learning_rate": 9.565025691042717e-06, "loss": 42.607, "step": 110190 }, { "epoch": 0.22261097217564854, "grad_norm": 312.63934326171875, "learning_rate": 9.564883279182538e-06, "loss": 12.663, "step": 110200 }, { "epoch": 0.22263117280833236, "grad_norm": 666.5807495117188, "learning_rate": 9.56474084507367e-06, "loss": 17.0194, "step": 110210 }, { "epoch": 0.22265137344101618, "grad_norm": 292.62994384765625, "learning_rate": 9.56459838871681e-06, "loss": 27.7475, "step": 110220 }, { "epoch": 0.2226715740737, "grad_norm": 348.21539306640625, "learning_rate": 9.564455910112651e-06, "loss": 14.2087, "step": 110230 }, { "epoch": 0.2226917747063838, "grad_norm": 287.2896423339844, "learning_rate": 9.564313409261888e-06, "loss": 36.3649, "step": 110240 }, { "epoch": 0.2227119753390676, "grad_norm": 256.77691650390625, "learning_rate": 9.564170886165214e-06, "loss": 27.8815, "step": 110250 }, { "epoch": 0.22273217597175143, "grad_norm": 72.4677963256836, "learning_rate": 9.564028340823325e-06, "loss": 17.3781, "step": 110260 }, { "epoch": 0.22275237660443525, "grad_norm": 508.245361328125, "learning_rate": 9.563885773236917e-06, "loss": 17.9092, "step": 110270 }, { "epoch": 0.22277257723711907, "grad_norm": 628.5128784179688, "learning_rate": 9.563743183406683e-06, "loss": 23.3545, "step": 110280 }, { "epoch": 0.2227927778698029, "grad_norm": 436.4483337402344, "learning_rate": 9.563600571333316e-06, "loss": 24.9046, "step": 110290 }, { "epoch": 0.22281297850248669, "grad_norm": 4.252965450286865, "learning_rate": 9.563457937017514e-06, "loss": 32.6382, "step": 110300 }, { "epoch": 0.2228331791351705, "grad_norm": 171.72998046875, "learning_rate": 9.563315280459973e-06, "loss": 34.4523, "step": 110310 }, { "epoch": 0.22285337976785433, "grad_norm": 510.9077453613281, "learning_rate": 9.563172601661386e-06, "loss": 41.5938, "step": 110320 }, { "epoch": 0.22287358040053815, "grad_norm": 221.45542907714844, "learning_rate": 9.56302990062245e-06, "loss": 21.51, "step": 110330 }, { "epoch": 0.22289378103322197, "grad_norm": 317.7682800292969, "learning_rate": 9.562887177343858e-06, "loss": 17.6238, "step": 110340 }, { "epoch": 0.22291398166590579, "grad_norm": 966.6275634765625, "learning_rate": 9.562744431826307e-06, "loss": 27.6289, "step": 110350 }, { "epoch": 0.22293418229858958, "grad_norm": 270.346435546875, "learning_rate": 9.562601664070495e-06, "loss": 14.6713, "step": 110360 }, { "epoch": 0.2229543829312734, "grad_norm": 817.4618530273438, "learning_rate": 9.562458874077114e-06, "loss": 33.7798, "step": 110370 }, { "epoch": 0.22297458356395722, "grad_norm": 566.02294921875, "learning_rate": 9.562316061846863e-06, "loss": 23.0801, "step": 110380 }, { "epoch": 0.22299478419664104, "grad_norm": 520.538818359375, "learning_rate": 9.562173227380437e-06, "loss": 20.5937, "step": 110390 }, { "epoch": 0.22301498482932486, "grad_norm": 469.1568298339844, "learning_rate": 9.562030370678533e-06, "loss": 18.5103, "step": 110400 }, { "epoch": 0.22303518546200868, "grad_norm": 220.47048950195312, "learning_rate": 9.561887491741844e-06, "loss": 22.8271, "step": 110410 }, { "epoch": 0.2230553860946925, "grad_norm": 63.98490905761719, "learning_rate": 9.56174459057107e-06, "loss": 13.5171, "step": 110420 }, { "epoch": 0.2230755867273763, "grad_norm": 1099.8052978515625, "learning_rate": 9.561601667166905e-06, "loss": 26.0947, "step": 110430 }, { "epoch": 0.2230957873600601, "grad_norm": 251.17816162109375, "learning_rate": 9.561458721530047e-06, "loss": 20.2432, "step": 110440 }, { "epoch": 0.22311598799274393, "grad_norm": 168.3972930908203, "learning_rate": 9.561315753661194e-06, "loss": 20.0466, "step": 110450 }, { "epoch": 0.22313618862542775, "grad_norm": 699.1183471679688, "learning_rate": 9.56117276356104e-06, "loss": 24.6622, "step": 110460 }, { "epoch": 0.22315638925811157, "grad_norm": 579.03759765625, "learning_rate": 9.561029751230282e-06, "loss": 24.6237, "step": 110470 }, { "epoch": 0.2231765898907954, "grad_norm": 277.62646484375, "learning_rate": 9.56088671666962e-06, "loss": 11.2887, "step": 110480 }, { "epoch": 0.22319679052347918, "grad_norm": 620.6677856445312, "learning_rate": 9.56074365987975e-06, "loss": 15.1699, "step": 110490 }, { "epoch": 0.223216991156163, "grad_norm": 936.4269409179688, "learning_rate": 9.560600580861366e-06, "loss": 35.6911, "step": 110500 }, { "epoch": 0.22323719178884682, "grad_norm": 552.4637451171875, "learning_rate": 9.560457479615168e-06, "loss": 24.9559, "step": 110510 }, { "epoch": 0.22325739242153064, "grad_norm": 576.7377319335938, "learning_rate": 9.560314356141856e-06, "loss": 22.4393, "step": 110520 }, { "epoch": 0.22327759305421446, "grad_norm": 358.72589111328125, "learning_rate": 9.560171210442123e-06, "loss": 20.9439, "step": 110530 }, { "epoch": 0.22329779368689828, "grad_norm": 249.48648071289062, "learning_rate": 9.56002804251667e-06, "loss": 15.8705, "step": 110540 }, { "epoch": 0.2233179943195821, "grad_norm": 308.5624694824219, "learning_rate": 9.559884852366191e-06, "loss": 27.0888, "step": 110550 }, { "epoch": 0.2233381949522659, "grad_norm": 187.78858947753906, "learning_rate": 9.55974163999139e-06, "loss": 35.8462, "step": 110560 }, { "epoch": 0.22335839558494971, "grad_norm": 439.64764404296875, "learning_rate": 9.559598405392959e-06, "loss": 17.1107, "step": 110570 }, { "epoch": 0.22337859621763353, "grad_norm": 146.07711791992188, "learning_rate": 9.559455148571597e-06, "loss": 28.0377, "step": 110580 }, { "epoch": 0.22339879685031735, "grad_norm": 270.3213806152344, "learning_rate": 9.559311869528005e-06, "loss": 28.6909, "step": 110590 }, { "epoch": 0.22341899748300117, "grad_norm": 200.6590576171875, "learning_rate": 9.55916856826288e-06, "loss": 25.6525, "step": 110600 }, { "epoch": 0.223439198115685, "grad_norm": 612.1190185546875, "learning_rate": 9.55902524477692e-06, "loss": 15.2635, "step": 110610 }, { "epoch": 0.2234593987483688, "grad_norm": 322.1373291015625, "learning_rate": 9.558881899070824e-06, "loss": 33.6983, "step": 110620 }, { "epoch": 0.2234795993810526, "grad_norm": 134.87986755371094, "learning_rate": 9.55873853114529e-06, "loss": 24.8128, "step": 110630 }, { "epoch": 0.22349980001373643, "grad_norm": 1127.307373046875, "learning_rate": 9.55859514100102e-06, "loss": 29.5784, "step": 110640 }, { "epoch": 0.22352000064642025, "grad_norm": 471.3376770019531, "learning_rate": 9.558451728638706e-06, "loss": 17.0943, "step": 110650 }, { "epoch": 0.22354020127910407, "grad_norm": 368.75640869140625, "learning_rate": 9.558308294059055e-06, "loss": 8.737, "step": 110660 }, { "epoch": 0.2235604019117879, "grad_norm": 265.4960632324219, "learning_rate": 9.55816483726276e-06, "loss": 29.3541, "step": 110670 }, { "epoch": 0.22358060254447168, "grad_norm": 170.4871063232422, "learning_rate": 9.558021358250523e-06, "loss": 40.0966, "step": 110680 }, { "epoch": 0.2236008031771555, "grad_norm": 931.2318725585938, "learning_rate": 9.557877857023043e-06, "loss": 21.5192, "step": 110690 }, { "epoch": 0.22362100380983932, "grad_norm": 1998.2803955078125, "learning_rate": 9.557734333581019e-06, "loss": 49.9292, "step": 110700 }, { "epoch": 0.22364120444252314, "grad_norm": 155.32125854492188, "learning_rate": 9.557590787925151e-06, "loss": 40.1303, "step": 110710 }, { "epoch": 0.22366140507520696, "grad_norm": 454.46685791015625, "learning_rate": 9.557447220056137e-06, "loss": 23.0269, "step": 110720 }, { "epoch": 0.22368160570789078, "grad_norm": 303.2588195800781, "learning_rate": 9.557303629974679e-06, "loss": 19.352, "step": 110730 }, { "epoch": 0.2237018063405746, "grad_norm": 259.6453552246094, "learning_rate": 9.557160017681475e-06, "loss": 16.9695, "step": 110740 }, { "epoch": 0.2237220069732584, "grad_norm": 341.01123046875, "learning_rate": 9.557016383177226e-06, "loss": 34.446, "step": 110750 }, { "epoch": 0.2237422076059422, "grad_norm": 861.7747192382812, "learning_rate": 9.556872726462634e-06, "loss": 16.9156, "step": 110760 }, { "epoch": 0.22376240823862603, "grad_norm": 589.8540649414062, "learning_rate": 9.556729047538395e-06, "loss": 35.2068, "step": 110770 }, { "epoch": 0.22378260887130985, "grad_norm": 404.4129638671875, "learning_rate": 9.556585346405211e-06, "loss": 21.4091, "step": 110780 }, { "epoch": 0.22380280950399367, "grad_norm": 726.181640625, "learning_rate": 9.556441623063783e-06, "loss": 26.9133, "step": 110790 }, { "epoch": 0.2238230101366775, "grad_norm": 148.09535217285156, "learning_rate": 9.556297877514812e-06, "loss": 19.4471, "step": 110800 }, { "epoch": 0.22384321076936128, "grad_norm": 372.90972900390625, "learning_rate": 9.556154109758998e-06, "loss": 23.5338, "step": 110810 }, { "epoch": 0.2238634114020451, "grad_norm": 181.201171875, "learning_rate": 9.55601031979704e-06, "loss": 15.8666, "step": 110820 }, { "epoch": 0.22388361203472892, "grad_norm": 878.591796875, "learning_rate": 9.55586650762964e-06, "loss": 44.4969, "step": 110830 }, { "epoch": 0.22390381266741274, "grad_norm": 212.8592987060547, "learning_rate": 9.555722673257502e-06, "loss": 21.3121, "step": 110840 }, { "epoch": 0.22392401330009656, "grad_norm": 12.053426742553711, "learning_rate": 9.555578816681321e-06, "loss": 22.3437, "step": 110850 }, { "epoch": 0.22394421393278038, "grad_norm": 232.9084930419922, "learning_rate": 9.555434937901801e-06, "loss": 29.2265, "step": 110860 }, { "epoch": 0.2239644145654642, "grad_norm": 596.0853271484375, "learning_rate": 9.555291036919647e-06, "loss": 28.1046, "step": 110870 }, { "epoch": 0.223984615198148, "grad_norm": 362.330322265625, "learning_rate": 9.555147113735555e-06, "loss": 17.273, "step": 110880 }, { "epoch": 0.22400481583083182, "grad_norm": 537.8699951171875, "learning_rate": 9.55500316835023e-06, "loss": 31.301, "step": 110890 }, { "epoch": 0.22402501646351564, "grad_norm": 532.1046142578125, "learning_rate": 9.554859200764371e-06, "loss": 14.2249, "step": 110900 }, { "epoch": 0.22404521709619946, "grad_norm": 143.5594024658203, "learning_rate": 9.55471521097868e-06, "loss": 13.2904, "step": 110910 }, { "epoch": 0.22406541772888328, "grad_norm": 9.103543281555176, "learning_rate": 9.554571198993858e-06, "loss": 15.9683, "step": 110920 }, { "epoch": 0.2240856183615671, "grad_norm": 0.0, "learning_rate": 9.55442716481061e-06, "loss": 38.8312, "step": 110930 }, { "epoch": 0.2241058189942509, "grad_norm": 301.2005310058594, "learning_rate": 9.554283108429639e-06, "loss": 13.2248, "step": 110940 }, { "epoch": 0.2241260196269347, "grad_norm": 341.2347412109375, "learning_rate": 9.55413902985164e-06, "loss": 22.6971, "step": 110950 }, { "epoch": 0.22414622025961853, "grad_norm": 544.2408447265625, "learning_rate": 9.553994929077323e-06, "loss": 30.891, "step": 110960 }, { "epoch": 0.22416642089230235, "grad_norm": 157.01345825195312, "learning_rate": 9.553850806107387e-06, "loss": 17.0911, "step": 110970 }, { "epoch": 0.22418662152498617, "grad_norm": 134.6532440185547, "learning_rate": 9.553706660942532e-06, "loss": 25.8211, "step": 110980 }, { "epoch": 0.22420682215767, "grad_norm": 1391.4884033203125, "learning_rate": 9.553562493583466e-06, "loss": 25.441, "step": 110990 }, { "epoch": 0.22422702279035378, "grad_norm": 791.5779418945312, "learning_rate": 9.553418304030886e-06, "loss": 28.1263, "step": 111000 }, { "epoch": 0.2242472234230376, "grad_norm": 667.532470703125, "learning_rate": 9.553274092285498e-06, "loss": 20.6861, "step": 111010 }, { "epoch": 0.22426742405572142, "grad_norm": 198.34072875976562, "learning_rate": 9.553129858348006e-06, "loss": 27.3397, "step": 111020 }, { "epoch": 0.22428762468840524, "grad_norm": 674.15625, "learning_rate": 9.55298560221911e-06, "loss": 32.4214, "step": 111030 }, { "epoch": 0.22430782532108906, "grad_norm": 243.1038360595703, "learning_rate": 9.552841323899515e-06, "loss": 24.5726, "step": 111040 }, { "epoch": 0.22432802595377288, "grad_norm": 359.1784362792969, "learning_rate": 9.552697023389923e-06, "loss": 20.5158, "step": 111050 }, { "epoch": 0.2243482265864567, "grad_norm": 272.7997131347656, "learning_rate": 9.552552700691037e-06, "loss": 28.2356, "step": 111060 }, { "epoch": 0.2243684272191405, "grad_norm": 947.9569702148438, "learning_rate": 9.552408355803564e-06, "loss": 30.3974, "step": 111070 }, { "epoch": 0.2243886278518243, "grad_norm": 199.59693908691406, "learning_rate": 9.552263988728204e-06, "loss": 22.4818, "step": 111080 }, { "epoch": 0.22440882848450813, "grad_norm": 400.09222412109375, "learning_rate": 9.552119599465659e-06, "loss": 36.573, "step": 111090 }, { "epoch": 0.22442902911719195, "grad_norm": 525.7420043945312, "learning_rate": 9.551975188016638e-06, "loss": 29.9474, "step": 111100 }, { "epoch": 0.22444922974987577, "grad_norm": 146.42710876464844, "learning_rate": 9.55183075438184e-06, "loss": 12.032, "step": 111110 }, { "epoch": 0.2244694303825596, "grad_norm": 440.4906005859375, "learning_rate": 9.551686298561972e-06, "loss": 23.1909, "step": 111120 }, { "epoch": 0.22448963101524338, "grad_norm": 417.2303771972656, "learning_rate": 9.551541820557737e-06, "loss": 24.6403, "step": 111130 }, { "epoch": 0.2245098316479272, "grad_norm": 379.7185974121094, "learning_rate": 9.551397320369838e-06, "loss": 18.976, "step": 111140 }, { "epoch": 0.22453003228061102, "grad_norm": 175.88348388671875, "learning_rate": 9.551252797998982e-06, "loss": 11.4578, "step": 111150 }, { "epoch": 0.22455023291329484, "grad_norm": 307.9947814941406, "learning_rate": 9.55110825344587e-06, "loss": 14.3487, "step": 111160 }, { "epoch": 0.22457043354597866, "grad_norm": 229.0950927734375, "learning_rate": 9.550963686711212e-06, "loss": 31.3663, "step": 111170 }, { "epoch": 0.22459063417866248, "grad_norm": 649.1748657226562, "learning_rate": 9.550819097795706e-06, "loss": 28.7961, "step": 111180 }, { "epoch": 0.2246108348113463, "grad_norm": 199.32424926757812, "learning_rate": 9.55067448670006e-06, "loss": 7.9647, "step": 111190 }, { "epoch": 0.2246310354440301, "grad_norm": 837.6842041015625, "learning_rate": 9.550529853424979e-06, "loss": 24.2685, "step": 111200 }, { "epoch": 0.22465123607671392, "grad_norm": 598.1119384765625, "learning_rate": 9.550385197971168e-06, "loss": 20.1005, "step": 111210 }, { "epoch": 0.22467143670939774, "grad_norm": 620.2124633789062, "learning_rate": 9.55024052033933e-06, "loss": 30.4976, "step": 111220 }, { "epoch": 0.22469163734208156, "grad_norm": 495.8719177246094, "learning_rate": 9.550095820530172e-06, "loss": 15.5378, "step": 111230 }, { "epoch": 0.22471183797476538, "grad_norm": 118.84636688232422, "learning_rate": 9.549951098544399e-06, "loss": 13.2547, "step": 111240 }, { "epoch": 0.2247320386074492, "grad_norm": 369.2759704589844, "learning_rate": 9.549806354382716e-06, "loss": 36.1086, "step": 111250 }, { "epoch": 0.224752239240133, "grad_norm": 393.2582092285156, "learning_rate": 9.54966158804583e-06, "loss": 21.1021, "step": 111260 }, { "epoch": 0.2247724398728168, "grad_norm": 916.140869140625, "learning_rate": 9.549516799534444e-06, "loss": 20.0107, "step": 111270 }, { "epoch": 0.22479264050550063, "grad_norm": 274.2108154296875, "learning_rate": 9.549371988849266e-06, "loss": 27.7032, "step": 111280 }, { "epoch": 0.22481284113818445, "grad_norm": 323.6493225097656, "learning_rate": 9.549227155991e-06, "loss": 27.2792, "step": 111290 }, { "epoch": 0.22483304177086827, "grad_norm": 489.6667785644531, "learning_rate": 9.549082300960351e-06, "loss": 15.4463, "step": 111300 }, { "epoch": 0.2248532424035521, "grad_norm": 504.2126159667969, "learning_rate": 9.54893742375803e-06, "loss": 16.0059, "step": 111310 }, { "epoch": 0.22487344303623588, "grad_norm": 856.1909790039062, "learning_rate": 9.548792524384735e-06, "loss": 38.3172, "step": 111320 }, { "epoch": 0.2248936436689197, "grad_norm": 282.4917297363281, "learning_rate": 9.54864760284118e-06, "loss": 19.7568, "step": 111330 }, { "epoch": 0.22491384430160352, "grad_norm": 543.8866577148438, "learning_rate": 9.548502659128069e-06, "loss": 21.8816, "step": 111340 }, { "epoch": 0.22493404493428734, "grad_norm": 304.6163635253906, "learning_rate": 9.548357693246107e-06, "loss": 31.6129, "step": 111350 }, { "epoch": 0.22495424556697116, "grad_norm": 413.11669921875, "learning_rate": 9.548212705196e-06, "loss": 17.6464, "step": 111360 }, { "epoch": 0.22497444619965498, "grad_norm": 845.2156982421875, "learning_rate": 9.548067694978457e-06, "loss": 49.6162, "step": 111370 }, { "epoch": 0.2249946468323388, "grad_norm": 89.01980590820312, "learning_rate": 9.547922662594183e-06, "loss": 25.6655, "step": 111380 }, { "epoch": 0.2250148474650226, "grad_norm": 380.6142578125, "learning_rate": 9.547777608043886e-06, "loss": 24.6077, "step": 111390 }, { "epoch": 0.2250350480977064, "grad_norm": 460.5263671875, "learning_rate": 9.547632531328273e-06, "loss": 31.8443, "step": 111400 }, { "epoch": 0.22505524873039023, "grad_norm": 510.1470031738281, "learning_rate": 9.54748743244805e-06, "loss": 33.939, "step": 111410 }, { "epoch": 0.22507544936307405, "grad_norm": 775.4287719726562, "learning_rate": 9.547342311403924e-06, "loss": 23.3685, "step": 111420 }, { "epoch": 0.22509564999575787, "grad_norm": 920.58544921875, "learning_rate": 9.547197168196605e-06, "loss": 27.1471, "step": 111430 }, { "epoch": 0.2251158506284417, "grad_norm": 850.289306640625, "learning_rate": 9.547052002826797e-06, "loss": 16.5851, "step": 111440 }, { "epoch": 0.22513605126112549, "grad_norm": 235.76002502441406, "learning_rate": 9.546906815295209e-06, "loss": 15.4825, "step": 111450 }, { "epoch": 0.2251562518938093, "grad_norm": 392.30694580078125, "learning_rate": 9.54676160560255e-06, "loss": 28.6752, "step": 111460 }, { "epoch": 0.22517645252649313, "grad_norm": 790.8316650390625, "learning_rate": 9.546616373749525e-06, "loss": 22.8385, "step": 111470 }, { "epoch": 0.22519665315917695, "grad_norm": 321.32208251953125, "learning_rate": 9.546471119736845e-06, "loss": 22.9641, "step": 111480 }, { "epoch": 0.22521685379186077, "grad_norm": 283.69757080078125, "learning_rate": 9.546325843565213e-06, "loss": 22.2612, "step": 111490 }, { "epoch": 0.22523705442454459, "grad_norm": 750.2130737304688, "learning_rate": 9.546180545235344e-06, "loss": 35.2801, "step": 111500 }, { "epoch": 0.2252572550572284, "grad_norm": 676.2631225585938, "learning_rate": 9.54603522474794e-06, "loss": 36.5525, "step": 111510 }, { "epoch": 0.2252774556899122, "grad_norm": 475.3135986328125, "learning_rate": 9.545889882103712e-06, "loss": 21.8077, "step": 111520 }, { "epoch": 0.22529765632259602, "grad_norm": 381.10906982421875, "learning_rate": 9.545744517303368e-06, "loss": 18.8951, "step": 111530 }, { "epoch": 0.22531785695527984, "grad_norm": 235.5245819091797, "learning_rate": 9.545599130347618e-06, "loss": 20.7448, "step": 111540 }, { "epoch": 0.22533805758796366, "grad_norm": 642.7045288085938, "learning_rate": 9.545453721237167e-06, "loss": 40.0942, "step": 111550 }, { "epoch": 0.22535825822064748, "grad_norm": 204.07562255859375, "learning_rate": 9.545308289972727e-06, "loss": 26.127, "step": 111560 }, { "epoch": 0.2253784588533313, "grad_norm": 640.2852783203125, "learning_rate": 9.545162836555006e-06, "loss": 31.8185, "step": 111570 }, { "epoch": 0.2253986594860151, "grad_norm": 175.4241180419922, "learning_rate": 9.545017360984713e-06, "loss": 13.45, "step": 111580 }, { "epoch": 0.2254188601186989, "grad_norm": 404.0137023925781, "learning_rate": 9.544871863262556e-06, "loss": 22.8184, "step": 111590 }, { "epoch": 0.22543906075138273, "grad_norm": 473.1271667480469, "learning_rate": 9.544726343389245e-06, "loss": 17.9111, "step": 111600 }, { "epoch": 0.22545926138406655, "grad_norm": 268.3240661621094, "learning_rate": 9.544580801365488e-06, "loss": 22.2862, "step": 111610 }, { "epoch": 0.22547946201675037, "grad_norm": 25.966659545898438, "learning_rate": 9.544435237191996e-06, "loss": 8.5848, "step": 111620 }, { "epoch": 0.2254996626494342, "grad_norm": 814.0604858398438, "learning_rate": 9.544289650869477e-06, "loss": 37.6229, "step": 111630 }, { "epoch": 0.22551986328211798, "grad_norm": 266.2414855957031, "learning_rate": 9.544144042398643e-06, "loss": 17.7856, "step": 111640 }, { "epoch": 0.2255400639148018, "grad_norm": 972.8816528320312, "learning_rate": 9.543998411780202e-06, "loss": 21.9517, "step": 111650 }, { "epoch": 0.22556026454748562, "grad_norm": 123.94517517089844, "learning_rate": 9.543852759014863e-06, "loss": 27.2369, "step": 111660 }, { "epoch": 0.22558046518016944, "grad_norm": 396.4852294921875, "learning_rate": 9.543707084103337e-06, "loss": 18.2719, "step": 111670 }, { "epoch": 0.22560066581285326, "grad_norm": 260.7420959472656, "learning_rate": 9.543561387046333e-06, "loss": 22.495, "step": 111680 }, { "epoch": 0.22562086644553708, "grad_norm": 660.4745483398438, "learning_rate": 9.543415667844562e-06, "loss": 14.4435, "step": 111690 }, { "epoch": 0.2256410670782209, "grad_norm": 599.4949951171875, "learning_rate": 9.543269926498735e-06, "loss": 21.1961, "step": 111700 }, { "epoch": 0.2256612677109047, "grad_norm": 991.4645385742188, "learning_rate": 9.54312416300956e-06, "loss": 29.988, "step": 111710 }, { "epoch": 0.22568146834358851, "grad_norm": 1059.73779296875, "learning_rate": 9.542978377377752e-06, "loss": 19.3108, "step": 111720 }, { "epoch": 0.22570166897627233, "grad_norm": 53.69286346435547, "learning_rate": 9.542832569604014e-06, "loss": 19.8365, "step": 111730 }, { "epoch": 0.22572186960895615, "grad_norm": 449.1264343261719, "learning_rate": 9.542686739689064e-06, "loss": 31.5003, "step": 111740 }, { "epoch": 0.22574207024163997, "grad_norm": 1351.2528076171875, "learning_rate": 9.54254088763361e-06, "loss": 38.6739, "step": 111750 }, { "epoch": 0.2257622708743238, "grad_norm": 235.5785675048828, "learning_rate": 9.54239501343836e-06, "loss": 12.7008, "step": 111760 }, { "epoch": 0.2257824715070076, "grad_norm": 852.9555053710938, "learning_rate": 9.542249117104032e-06, "loss": 26.4823, "step": 111770 }, { "epoch": 0.2258026721396914, "grad_norm": 936.4501342773438, "learning_rate": 9.54210319863133e-06, "loss": 28.6118, "step": 111780 }, { "epoch": 0.22582287277237523, "grad_norm": 168.53668212890625, "learning_rate": 9.541957258020967e-06, "loss": 20.6891, "step": 111790 }, { "epoch": 0.22584307340505905, "grad_norm": 417.7174072265625, "learning_rate": 9.541811295273657e-06, "loss": 19.725, "step": 111800 }, { "epoch": 0.22586327403774287, "grad_norm": 171.90603637695312, "learning_rate": 9.541665310390109e-06, "loss": 25.9919, "step": 111810 }, { "epoch": 0.2258834746704267, "grad_norm": 428.6181335449219, "learning_rate": 9.541519303371034e-06, "loss": 14.9894, "step": 111820 }, { "epoch": 0.2259036753031105, "grad_norm": 104.92010498046875, "learning_rate": 9.541373274217145e-06, "loss": 19.1093, "step": 111830 }, { "epoch": 0.2259238759357943, "grad_norm": 191.61997985839844, "learning_rate": 9.541227222929155e-06, "loss": 20.3224, "step": 111840 }, { "epoch": 0.22594407656847812, "grad_norm": 79.96380615234375, "learning_rate": 9.541081149507774e-06, "loss": 25.1008, "step": 111850 }, { "epoch": 0.22596427720116194, "grad_norm": 336.1278076171875, "learning_rate": 9.540935053953713e-06, "loss": 29.8053, "step": 111860 }, { "epoch": 0.22598447783384576, "grad_norm": 9.312488555908203, "learning_rate": 9.540788936267686e-06, "loss": 26.7509, "step": 111870 }, { "epoch": 0.22600467846652958, "grad_norm": 24.288253784179688, "learning_rate": 9.540642796450403e-06, "loss": 18.424, "step": 111880 }, { "epoch": 0.2260248790992134, "grad_norm": 492.887451171875, "learning_rate": 9.540496634502581e-06, "loss": 17.8159, "step": 111890 }, { "epoch": 0.2260450797318972, "grad_norm": 566.306396484375, "learning_rate": 9.540350450424927e-06, "loss": 16.7127, "step": 111900 }, { "epoch": 0.226065280364581, "grad_norm": 519.557861328125, "learning_rate": 9.540204244218155e-06, "loss": 26.8343, "step": 111910 }, { "epoch": 0.22608548099726483, "grad_norm": 24.523590087890625, "learning_rate": 9.54005801588298e-06, "loss": 22.0106, "step": 111920 }, { "epoch": 0.22610568162994865, "grad_norm": 392.7301940917969, "learning_rate": 9.539911765420112e-06, "loss": 17.0939, "step": 111930 }, { "epoch": 0.22612588226263247, "grad_norm": 214.16812133789062, "learning_rate": 9.539765492830265e-06, "loss": 18.5989, "step": 111940 }, { "epoch": 0.2261460828953163, "grad_norm": 410.8782043457031, "learning_rate": 9.53961919811415e-06, "loss": 17.4185, "step": 111950 }, { "epoch": 0.22616628352800008, "grad_norm": 475.45269775390625, "learning_rate": 9.539472881272483e-06, "loss": 24.886, "step": 111960 }, { "epoch": 0.2261864841606839, "grad_norm": 322.9455261230469, "learning_rate": 9.539326542305975e-06, "loss": 15.8871, "step": 111970 }, { "epoch": 0.22620668479336772, "grad_norm": 180.3836669921875, "learning_rate": 9.53918018121534e-06, "loss": 23.3022, "step": 111980 }, { "epoch": 0.22622688542605154, "grad_norm": 707.36669921875, "learning_rate": 9.539033798001293e-06, "loss": 26.9615, "step": 111990 }, { "epoch": 0.22624708605873536, "grad_norm": 3.039818525314331, "learning_rate": 9.538887392664544e-06, "loss": 17.2103, "step": 112000 }, { "epoch": 0.22626728669141918, "grad_norm": 361.5117492675781, "learning_rate": 9.538740965205809e-06, "loss": 32.8633, "step": 112010 }, { "epoch": 0.226287487324103, "grad_norm": 146.8135986328125, "learning_rate": 9.538594515625802e-06, "loss": 18.9709, "step": 112020 }, { "epoch": 0.2263076879567868, "grad_norm": 872.6524658203125, "learning_rate": 9.538448043925234e-06, "loss": 24.574, "step": 112030 }, { "epoch": 0.22632788858947062, "grad_norm": 659.1326293945312, "learning_rate": 9.538301550104822e-06, "loss": 15.0339, "step": 112040 }, { "epoch": 0.22634808922215444, "grad_norm": 143.9538116455078, "learning_rate": 9.538155034165277e-06, "loss": 16.5316, "step": 112050 }, { "epoch": 0.22636828985483826, "grad_norm": 904.4579467773438, "learning_rate": 9.538008496107317e-06, "loss": 21.554, "step": 112060 }, { "epoch": 0.22638849048752208, "grad_norm": 102.9317626953125, "learning_rate": 9.537861935931651e-06, "loss": 18.2125, "step": 112070 }, { "epoch": 0.2264086911202059, "grad_norm": 378.7657165527344, "learning_rate": 9.537715353639e-06, "loss": 21.2663, "step": 112080 }, { "epoch": 0.2264288917528897, "grad_norm": 641.4647827148438, "learning_rate": 9.537568749230074e-06, "loss": 21.2919, "step": 112090 }, { "epoch": 0.2264490923855735, "grad_norm": 263.9422302246094, "learning_rate": 9.537422122705585e-06, "loss": 32.0067, "step": 112100 }, { "epoch": 0.22646929301825733, "grad_norm": 938.2413940429688, "learning_rate": 9.537275474066254e-06, "loss": 33.6979, "step": 112110 }, { "epoch": 0.22648949365094115, "grad_norm": 665.4387817382812, "learning_rate": 9.537128803312792e-06, "loss": 26.8343, "step": 112120 }, { "epoch": 0.22650969428362497, "grad_norm": 271.7997741699219, "learning_rate": 9.536982110445913e-06, "loss": 32.9056, "step": 112130 }, { "epoch": 0.2265298949163088, "grad_norm": 257.39288330078125, "learning_rate": 9.536835395466334e-06, "loss": 12.9188, "step": 112140 }, { "epoch": 0.2265500955489926, "grad_norm": 767.7734985351562, "learning_rate": 9.536688658374771e-06, "loss": 33.6596, "step": 112150 }, { "epoch": 0.2265702961816764, "grad_norm": 471.4989929199219, "learning_rate": 9.536541899171936e-06, "loss": 23.1471, "step": 112160 }, { "epoch": 0.22659049681436022, "grad_norm": 717.8173217773438, "learning_rate": 9.536395117858548e-06, "loss": 18.662, "step": 112170 }, { "epoch": 0.22661069744704404, "grad_norm": 242.91160583496094, "learning_rate": 9.53624831443532e-06, "loss": 29.4914, "step": 112180 }, { "epoch": 0.22663089807972786, "grad_norm": 279.1715087890625, "learning_rate": 9.536101488902966e-06, "loss": 19.7264, "step": 112190 }, { "epoch": 0.22665109871241168, "grad_norm": 438.48895263671875, "learning_rate": 9.535954641262206e-06, "loss": 30.4032, "step": 112200 }, { "epoch": 0.2266712993450955, "grad_norm": 118.90886688232422, "learning_rate": 9.535807771513751e-06, "loss": 12.928, "step": 112210 }, { "epoch": 0.2266914999777793, "grad_norm": 493.9158935546875, "learning_rate": 9.53566087965832e-06, "loss": 18.6299, "step": 112220 }, { "epoch": 0.2267117006104631, "grad_norm": 15.68353271484375, "learning_rate": 9.535513965696628e-06, "loss": 13.0776, "step": 112230 }, { "epoch": 0.22673190124314693, "grad_norm": 35.68951416015625, "learning_rate": 9.535367029629392e-06, "loss": 18.3588, "step": 112240 }, { "epoch": 0.22675210187583075, "grad_norm": 236.45184326171875, "learning_rate": 9.535220071457325e-06, "loss": 12.6726, "step": 112250 }, { "epoch": 0.22677230250851457, "grad_norm": 247.4632110595703, "learning_rate": 9.535073091181148e-06, "loss": 32.0151, "step": 112260 }, { "epoch": 0.2267925031411984, "grad_norm": 541.4338989257812, "learning_rate": 9.534926088801572e-06, "loss": 16.6592, "step": 112270 }, { "epoch": 0.22681270377388218, "grad_norm": 585.0704345703125, "learning_rate": 9.534779064319318e-06, "loss": 21.0367, "step": 112280 }, { "epoch": 0.226832904406566, "grad_norm": 198.61337280273438, "learning_rate": 9.5346320177351e-06, "loss": 18.1143, "step": 112290 }, { "epoch": 0.22685310503924982, "grad_norm": 369.2171325683594, "learning_rate": 9.534484949049636e-06, "loss": 13.4298, "step": 112300 }, { "epoch": 0.22687330567193364, "grad_norm": 388.0574951171875, "learning_rate": 9.534337858263643e-06, "loss": 44.5849, "step": 112310 }, { "epoch": 0.22689350630461746, "grad_norm": 365.3808898925781, "learning_rate": 9.534190745377837e-06, "loss": 15.5231, "step": 112320 }, { "epoch": 0.22691370693730128, "grad_norm": 521.5186767578125, "learning_rate": 9.534043610392934e-06, "loss": 41.7762, "step": 112330 }, { "epoch": 0.2269339075699851, "grad_norm": 520.6649169921875, "learning_rate": 9.533896453309654e-06, "loss": 22.0477, "step": 112340 }, { "epoch": 0.2269541082026689, "grad_norm": 124.58840942382812, "learning_rate": 9.533749274128712e-06, "loss": 15.6566, "step": 112350 }, { "epoch": 0.22697430883535272, "grad_norm": 254.6902618408203, "learning_rate": 9.533602072850826e-06, "loss": 21.6187, "step": 112360 }, { "epoch": 0.22699450946803654, "grad_norm": 391.0091247558594, "learning_rate": 9.533454849476712e-06, "loss": 28.8004, "step": 112370 }, { "epoch": 0.22701471010072036, "grad_norm": 491.1453552246094, "learning_rate": 9.533307604007089e-06, "loss": 11.2923, "step": 112380 }, { "epoch": 0.22703491073340418, "grad_norm": 175.738525390625, "learning_rate": 9.533160336442677e-06, "loss": 36.2641, "step": 112390 }, { "epoch": 0.227055111366088, "grad_norm": 795.9658203125, "learning_rate": 9.53301304678419e-06, "loss": 27.7926, "step": 112400 }, { "epoch": 0.2270753119987718, "grad_norm": 3.568608283996582, "learning_rate": 9.532865735032346e-06, "loss": 12.5416, "step": 112410 }, { "epoch": 0.2270955126314556, "grad_norm": 279.2919006347656, "learning_rate": 9.532718401187866e-06, "loss": 16.607, "step": 112420 }, { "epoch": 0.22711571326413943, "grad_norm": 816.3533935546875, "learning_rate": 9.532571045251465e-06, "loss": 21.4067, "step": 112430 }, { "epoch": 0.22713591389682325, "grad_norm": 700.81884765625, "learning_rate": 9.532423667223863e-06, "loss": 32.1505, "step": 112440 }, { "epoch": 0.22715611452950707, "grad_norm": 813.9895629882812, "learning_rate": 9.532276267105778e-06, "loss": 14.7046, "step": 112450 }, { "epoch": 0.2271763151621909, "grad_norm": 821.1465454101562, "learning_rate": 9.532128844897928e-06, "loss": 28.3573, "step": 112460 }, { "epoch": 0.22719651579487468, "grad_norm": 235.25338745117188, "learning_rate": 9.531981400601032e-06, "loss": 13.526, "step": 112470 }, { "epoch": 0.2272167164275585, "grad_norm": 904.8468017578125, "learning_rate": 9.531833934215807e-06, "loss": 28.7744, "step": 112480 }, { "epoch": 0.22723691706024232, "grad_norm": 13.919620513916016, "learning_rate": 9.531686445742973e-06, "loss": 14.4584, "step": 112490 }, { "epoch": 0.22725711769292614, "grad_norm": 431.6491394042969, "learning_rate": 9.531538935183252e-06, "loss": 27.3013, "step": 112500 }, { "epoch": 0.22727731832560996, "grad_norm": 325.6951904296875, "learning_rate": 9.531391402537355e-06, "loss": 20.2664, "step": 112510 }, { "epoch": 0.22729751895829378, "grad_norm": 207.41888427734375, "learning_rate": 9.53124384780601e-06, "loss": 32.2594, "step": 112520 }, { "epoch": 0.2273177195909776, "grad_norm": 669.0628051757812, "learning_rate": 9.53109627098993e-06, "loss": 22.2678, "step": 112530 }, { "epoch": 0.2273379202236614, "grad_norm": 403.3323974609375, "learning_rate": 9.530948672089837e-06, "loss": 16.2452, "step": 112540 }, { "epoch": 0.2273581208563452, "grad_norm": 529.3194580078125, "learning_rate": 9.530801051106449e-06, "loss": 16.2755, "step": 112550 }, { "epoch": 0.22737832148902903, "grad_norm": 595.4201049804688, "learning_rate": 9.530653408040487e-06, "loss": 33.8954, "step": 112560 }, { "epoch": 0.22739852212171285, "grad_norm": 463.9022521972656, "learning_rate": 9.53050574289267e-06, "loss": 10.3893, "step": 112570 }, { "epoch": 0.22741872275439667, "grad_norm": 379.9695129394531, "learning_rate": 9.530358055663718e-06, "loss": 45.6644, "step": 112580 }, { "epoch": 0.2274389233870805, "grad_norm": 393.81353759765625, "learning_rate": 9.53021034635435e-06, "loss": 16.6902, "step": 112590 }, { "epoch": 0.22745912401976429, "grad_norm": 434.11199951171875, "learning_rate": 9.530062614965286e-06, "loss": 18.362, "step": 112600 }, { "epoch": 0.2274793246524481, "grad_norm": 171.09864807128906, "learning_rate": 9.529914861497244e-06, "loss": 21.0003, "step": 112610 }, { "epoch": 0.22749952528513193, "grad_norm": 449.3027038574219, "learning_rate": 9.52976708595095e-06, "loss": 32.4687, "step": 112620 }, { "epoch": 0.22751972591781575, "grad_norm": 510.5466613769531, "learning_rate": 9.529619288327118e-06, "loss": 23.3475, "step": 112630 }, { "epoch": 0.22753992655049957, "grad_norm": 550.9493408203125, "learning_rate": 9.529471468626472e-06, "loss": 16.5837, "step": 112640 }, { "epoch": 0.22756012718318339, "grad_norm": 85.97711944580078, "learning_rate": 9.529323626849731e-06, "loss": 23.2321, "step": 112650 }, { "epoch": 0.2275803278158672, "grad_norm": 622.16650390625, "learning_rate": 9.529175762997616e-06, "loss": 29.7396, "step": 112660 }, { "epoch": 0.227600528448551, "grad_norm": 308.7371826171875, "learning_rate": 9.529027877070848e-06, "loss": 16.2574, "step": 112670 }, { "epoch": 0.22762072908123482, "grad_norm": 538.2896728515625, "learning_rate": 9.528879969070148e-06, "loss": 14.8027, "step": 112680 }, { "epoch": 0.22764092971391864, "grad_norm": 121.7942123413086, "learning_rate": 9.528732038996236e-06, "loss": 16.3837, "step": 112690 }, { "epoch": 0.22766113034660246, "grad_norm": 317.30841064453125, "learning_rate": 9.528584086849832e-06, "loss": 15.4871, "step": 112700 }, { "epoch": 0.22768133097928628, "grad_norm": 615.7967529296875, "learning_rate": 9.528436112631659e-06, "loss": 20.6995, "step": 112710 }, { "epoch": 0.2277015316119701, "grad_norm": 272.8873596191406, "learning_rate": 9.528288116342439e-06, "loss": 20.939, "step": 112720 }, { "epoch": 0.2277217322446539, "grad_norm": 316.3938903808594, "learning_rate": 9.52814009798289e-06, "loss": 30.0582, "step": 112730 }, { "epoch": 0.2277419328773377, "grad_norm": 345.8398132324219, "learning_rate": 9.527992057553736e-06, "loss": 23.2847, "step": 112740 }, { "epoch": 0.22776213351002153, "grad_norm": 638.2636108398438, "learning_rate": 9.527843995055698e-06, "loss": 19.8952, "step": 112750 }, { "epoch": 0.22778233414270535, "grad_norm": 160.67169189453125, "learning_rate": 9.527695910489498e-06, "loss": 22.0684, "step": 112760 }, { "epoch": 0.22780253477538917, "grad_norm": 281.597900390625, "learning_rate": 9.527547803855858e-06, "loss": 26.0992, "step": 112770 }, { "epoch": 0.227822735408073, "grad_norm": 515.9508056640625, "learning_rate": 9.527399675155497e-06, "loss": 21.5287, "step": 112780 }, { "epoch": 0.22784293604075678, "grad_norm": 284.7909851074219, "learning_rate": 9.52725152438914e-06, "loss": 53.4256, "step": 112790 }, { "epoch": 0.2278631366734406, "grad_norm": 876.8087158203125, "learning_rate": 9.52710335155751e-06, "loss": 24.4455, "step": 112800 }, { "epoch": 0.22788333730612442, "grad_norm": 279.994140625, "learning_rate": 9.526955156661324e-06, "loss": 13.2789, "step": 112810 }, { "epoch": 0.22790353793880824, "grad_norm": 479.1186218261719, "learning_rate": 9.52680693970131e-06, "loss": 32.3463, "step": 112820 }, { "epoch": 0.22792373857149206, "grad_norm": 243.9750213623047, "learning_rate": 9.526658700678188e-06, "loss": 43.7144, "step": 112830 }, { "epoch": 0.22794393920417588, "grad_norm": 639.2210083007812, "learning_rate": 9.52651043959268e-06, "loss": 16.2402, "step": 112840 }, { "epoch": 0.2279641398368597, "grad_norm": 250.73939514160156, "learning_rate": 9.526362156445508e-06, "loss": 18.0421, "step": 112850 }, { "epoch": 0.2279843404695435, "grad_norm": 760.5935668945312, "learning_rate": 9.526213851237396e-06, "loss": 25.3497, "step": 112860 }, { "epoch": 0.22800454110222731, "grad_norm": 196.826904296875, "learning_rate": 9.526065523969068e-06, "loss": 21.9941, "step": 112870 }, { "epoch": 0.22802474173491113, "grad_norm": 480.93963623046875, "learning_rate": 9.525917174641246e-06, "loss": 33.3459, "step": 112880 }, { "epoch": 0.22804494236759495, "grad_norm": 358.2469177246094, "learning_rate": 9.525768803254651e-06, "loss": 20.38, "step": 112890 }, { "epoch": 0.22806514300027877, "grad_norm": 581.6536254882812, "learning_rate": 9.525620409810009e-06, "loss": 19.1054, "step": 112900 }, { "epoch": 0.2280853436329626, "grad_norm": 692.8551635742188, "learning_rate": 9.52547199430804e-06, "loss": 32.9187, "step": 112910 }, { "epoch": 0.2281055442656464, "grad_norm": 148.31716918945312, "learning_rate": 9.525323556749472e-06, "loss": 23.5628, "step": 112920 }, { "epoch": 0.2281257448983302, "grad_norm": 1818.9072265625, "learning_rate": 9.525175097135024e-06, "loss": 25.2567, "step": 112930 }, { "epoch": 0.22814594553101403, "grad_norm": 522.2452392578125, "learning_rate": 9.525026615465423e-06, "loss": 27.8579, "step": 112940 }, { "epoch": 0.22816614616369785, "grad_norm": 467.10198974609375, "learning_rate": 9.524878111741388e-06, "loss": 11.4076, "step": 112950 }, { "epoch": 0.22818634679638167, "grad_norm": 308.8968505859375, "learning_rate": 9.52472958596365e-06, "loss": 38.7424, "step": 112960 }, { "epoch": 0.2282065474290655, "grad_norm": 1003.9118041992188, "learning_rate": 9.524581038132927e-06, "loss": 52.8455, "step": 112970 }, { "epoch": 0.2282267480617493, "grad_norm": 84.62493896484375, "learning_rate": 9.524432468249944e-06, "loss": 29.7125, "step": 112980 }, { "epoch": 0.2282469486944331, "grad_norm": 271.1474609375, "learning_rate": 9.524283876315427e-06, "loss": 51.8876, "step": 112990 }, { "epoch": 0.22826714932711692, "grad_norm": 345.8349609375, "learning_rate": 9.524135262330098e-06, "loss": 29.9309, "step": 113000 }, { "epoch": 0.22828734995980074, "grad_norm": 87.12869262695312, "learning_rate": 9.523986626294683e-06, "loss": 30.1322, "step": 113010 }, { "epoch": 0.22830755059248456, "grad_norm": 341.88641357421875, "learning_rate": 9.523837968209906e-06, "loss": 20.9121, "step": 113020 }, { "epoch": 0.22832775122516838, "grad_norm": 96.60131072998047, "learning_rate": 9.523689288076491e-06, "loss": 26.9941, "step": 113030 }, { "epoch": 0.2283479518578522, "grad_norm": 236.14979553222656, "learning_rate": 9.523540585895164e-06, "loss": 28.8258, "step": 113040 }, { "epoch": 0.228368152490536, "grad_norm": 124.12235260009766, "learning_rate": 9.523391861666649e-06, "loss": 15.0267, "step": 113050 }, { "epoch": 0.2283883531232198, "grad_norm": 914.7811889648438, "learning_rate": 9.52324311539167e-06, "loss": 31.5337, "step": 113060 }, { "epoch": 0.22840855375590363, "grad_norm": 457.1055603027344, "learning_rate": 9.523094347070951e-06, "loss": 28.5129, "step": 113070 }, { "epoch": 0.22842875438858745, "grad_norm": 285.68121337890625, "learning_rate": 9.522945556705221e-06, "loss": 25.1645, "step": 113080 }, { "epoch": 0.22844895502127127, "grad_norm": 228.176025390625, "learning_rate": 9.522796744295202e-06, "loss": 15.6948, "step": 113090 }, { "epoch": 0.2284691556539551, "grad_norm": 198.18495178222656, "learning_rate": 9.52264790984162e-06, "loss": 21.6688, "step": 113100 }, { "epoch": 0.22848935628663888, "grad_norm": 365.2386169433594, "learning_rate": 9.522499053345203e-06, "loss": 21.5408, "step": 113110 }, { "epoch": 0.2285095569193227, "grad_norm": 750.3790893554688, "learning_rate": 9.522350174806672e-06, "loss": 29.4794, "step": 113120 }, { "epoch": 0.22852975755200652, "grad_norm": 326.8194580078125, "learning_rate": 9.522201274226755e-06, "loss": 17.8257, "step": 113130 }, { "epoch": 0.22854995818469034, "grad_norm": 378.5831604003906, "learning_rate": 9.522052351606177e-06, "loss": 17.2237, "step": 113140 }, { "epoch": 0.22857015881737416, "grad_norm": 251.2360076904297, "learning_rate": 9.521903406945666e-06, "loss": 25.7183, "step": 113150 }, { "epoch": 0.22859035945005798, "grad_norm": 592.8699951171875, "learning_rate": 9.521754440245944e-06, "loss": 16.1047, "step": 113160 }, { "epoch": 0.2286105600827418, "grad_norm": 464.73004150390625, "learning_rate": 9.52160545150774e-06, "loss": 34.6223, "step": 113170 }, { "epoch": 0.2286307607154256, "grad_norm": 495.97625732421875, "learning_rate": 9.52145644073178e-06, "loss": 18.1199, "step": 113180 }, { "epoch": 0.22865096134810942, "grad_norm": 785.1619262695312, "learning_rate": 9.52130740791879e-06, "loss": 20.4017, "step": 113190 }, { "epoch": 0.22867116198079324, "grad_norm": 369.74822998046875, "learning_rate": 9.521158353069494e-06, "loss": 21.0754, "step": 113200 }, { "epoch": 0.22869136261347706, "grad_norm": 249.15182495117188, "learning_rate": 9.521009276184624e-06, "loss": 11.2801, "step": 113210 }, { "epoch": 0.22871156324616088, "grad_norm": 665.263671875, "learning_rate": 9.520860177264898e-06, "loss": 11.3987, "step": 113220 }, { "epoch": 0.2287317638788447, "grad_norm": 1030.80078125, "learning_rate": 9.520711056311052e-06, "loss": 33.1406, "step": 113230 }, { "epoch": 0.2287519645115285, "grad_norm": 1151.8558349609375, "learning_rate": 9.520561913323807e-06, "loss": 30.7249, "step": 113240 }, { "epoch": 0.2287721651442123, "grad_norm": 860.5791015625, "learning_rate": 9.520412748303894e-06, "loss": 33.5727, "step": 113250 }, { "epoch": 0.22879236577689613, "grad_norm": 982.9208984375, "learning_rate": 9.520263561252035e-06, "loss": 29.1938, "step": 113260 }, { "epoch": 0.22881256640957995, "grad_norm": 307.06475830078125, "learning_rate": 9.52011435216896e-06, "loss": 25.6368, "step": 113270 }, { "epoch": 0.22883276704226377, "grad_norm": 247.4782257080078, "learning_rate": 9.519965121055395e-06, "loss": 25.1266, "step": 113280 }, { "epoch": 0.2288529676749476, "grad_norm": 870.2067260742188, "learning_rate": 9.51981586791207e-06, "loss": 32.7014, "step": 113290 }, { "epoch": 0.2288731683076314, "grad_norm": 69.65391540527344, "learning_rate": 9.51966659273971e-06, "loss": 28.6963, "step": 113300 }, { "epoch": 0.2288933689403152, "grad_norm": 327.62384033203125, "learning_rate": 9.519517295539042e-06, "loss": 33.9486, "step": 113310 }, { "epoch": 0.22891356957299902, "grad_norm": 345.7291259765625, "learning_rate": 9.519367976310796e-06, "loss": 19.4765, "step": 113320 }, { "epoch": 0.22893377020568284, "grad_norm": 516.2244262695312, "learning_rate": 9.5192186350557e-06, "loss": 22.1515, "step": 113330 }, { "epoch": 0.22895397083836666, "grad_norm": 175.62525939941406, "learning_rate": 9.519069271774479e-06, "loss": 17.5981, "step": 113340 }, { "epoch": 0.22897417147105048, "grad_norm": 374.07574462890625, "learning_rate": 9.51891988646786e-06, "loss": 29.519, "step": 113350 }, { "epoch": 0.2289943721037343, "grad_norm": 430.2989807128906, "learning_rate": 9.51877047913658e-06, "loss": 24.8094, "step": 113360 }, { "epoch": 0.2290145727364181, "grad_norm": 630.27392578125, "learning_rate": 9.518621049781356e-06, "loss": 20.1694, "step": 113370 }, { "epoch": 0.2290347733691019, "grad_norm": 365.74542236328125, "learning_rate": 9.51847159840292e-06, "loss": 21.1702, "step": 113380 }, { "epoch": 0.22905497400178573, "grad_norm": 506.5300598144531, "learning_rate": 9.518322125002004e-06, "loss": 15.3547, "step": 113390 }, { "epoch": 0.22907517463446955, "grad_norm": 395.3695983886719, "learning_rate": 9.518172629579334e-06, "loss": 17.4382, "step": 113400 }, { "epoch": 0.22909537526715337, "grad_norm": 669.5462036132812, "learning_rate": 9.518023112135636e-06, "loss": 23.1995, "step": 113410 }, { "epoch": 0.2291155758998372, "grad_norm": 181.2217254638672, "learning_rate": 9.517873572671646e-06, "loss": 25.9407, "step": 113420 }, { "epoch": 0.22913577653252098, "grad_norm": 992.484619140625, "learning_rate": 9.517724011188083e-06, "loss": 29.2899, "step": 113430 }, { "epoch": 0.2291559771652048, "grad_norm": 842.7274169921875, "learning_rate": 9.517574427685686e-06, "loss": 32.8349, "step": 113440 }, { "epoch": 0.22917617779788862, "grad_norm": 422.0934753417969, "learning_rate": 9.517424822165175e-06, "loss": 26.1276, "step": 113450 }, { "epoch": 0.22919637843057244, "grad_norm": 738.347412109375, "learning_rate": 9.517275194627285e-06, "loss": 27.2194, "step": 113460 }, { "epoch": 0.22921657906325626, "grad_norm": 692.671875, "learning_rate": 9.517125545072743e-06, "loss": 17.9071, "step": 113470 }, { "epoch": 0.22923677969594008, "grad_norm": 302.58978271484375, "learning_rate": 9.51697587350228e-06, "loss": 22.6542, "step": 113480 }, { "epoch": 0.2292569803286239, "grad_norm": 560.926025390625, "learning_rate": 9.516826179916625e-06, "loss": 28.4785, "step": 113490 }, { "epoch": 0.2292771809613077, "grad_norm": 296.8754577636719, "learning_rate": 9.516676464316505e-06, "loss": 26.4145, "step": 113500 }, { "epoch": 0.22929738159399152, "grad_norm": 276.0968933105469, "learning_rate": 9.516526726702653e-06, "loss": 18.9407, "step": 113510 }, { "epoch": 0.22931758222667534, "grad_norm": 711.6101684570312, "learning_rate": 9.516376967075797e-06, "loss": 45.2008, "step": 113520 }, { "epoch": 0.22933778285935916, "grad_norm": 378.4603576660156, "learning_rate": 9.516227185436667e-06, "loss": 25.7412, "step": 113530 }, { "epoch": 0.22935798349204298, "grad_norm": 275.92950439453125, "learning_rate": 9.516077381785995e-06, "loss": 11.1533, "step": 113540 }, { "epoch": 0.2293781841247268, "grad_norm": 602.3226928710938, "learning_rate": 9.515927556124508e-06, "loss": 21.1007, "step": 113550 }, { "epoch": 0.2293983847574106, "grad_norm": 15.636978149414062, "learning_rate": 9.515777708452938e-06, "loss": 35.1794, "step": 113560 }, { "epoch": 0.2294185853900944, "grad_norm": 364.009521484375, "learning_rate": 9.515627838772016e-06, "loss": 11.867, "step": 113570 }, { "epoch": 0.22943878602277823, "grad_norm": 677.7067260742188, "learning_rate": 9.515477947082473e-06, "loss": 18.9565, "step": 113580 }, { "epoch": 0.22945898665546205, "grad_norm": 423.3853759765625, "learning_rate": 9.515328033385035e-06, "loss": 17.6075, "step": 113590 }, { "epoch": 0.22947918728814587, "grad_norm": 252.23199462890625, "learning_rate": 9.515178097680437e-06, "loss": 18.3802, "step": 113600 }, { "epoch": 0.2294993879208297, "grad_norm": 440.6402893066406, "learning_rate": 9.515028139969409e-06, "loss": 28.2897, "step": 113610 }, { "epoch": 0.2295195885535135, "grad_norm": 836.3695068359375, "learning_rate": 9.514878160252681e-06, "loss": 28.0076, "step": 113620 }, { "epoch": 0.2295397891861973, "grad_norm": 387.6072082519531, "learning_rate": 9.514728158530983e-06, "loss": 22.3299, "step": 113630 }, { "epoch": 0.22955998981888112, "grad_norm": 1034.2071533203125, "learning_rate": 9.51457813480505e-06, "loss": 23.7812, "step": 113640 }, { "epoch": 0.22958019045156494, "grad_norm": 415.7701721191406, "learning_rate": 9.514428089075611e-06, "loss": 31.8249, "step": 113650 }, { "epoch": 0.22960039108424876, "grad_norm": 496.410888671875, "learning_rate": 9.514278021343395e-06, "loss": 12.046, "step": 113660 }, { "epoch": 0.22962059171693258, "grad_norm": 748.201171875, "learning_rate": 9.514127931609136e-06, "loss": 28.4757, "step": 113670 }, { "epoch": 0.2296407923496164, "grad_norm": 80.744140625, "learning_rate": 9.513977819873565e-06, "loss": 25.2247, "step": 113680 }, { "epoch": 0.2296609929823002, "grad_norm": 148.37989807128906, "learning_rate": 9.513827686137415e-06, "loss": 43.3999, "step": 113690 }, { "epoch": 0.229681193614984, "grad_norm": 455.69915771484375, "learning_rate": 9.513677530401415e-06, "loss": 23.1143, "step": 113700 }, { "epoch": 0.22970139424766783, "grad_norm": 338.1597595214844, "learning_rate": 9.513527352666298e-06, "loss": 26.9409, "step": 113710 }, { "epoch": 0.22972159488035165, "grad_norm": 375.36627197265625, "learning_rate": 9.513377152932796e-06, "loss": 16.2152, "step": 113720 }, { "epoch": 0.22974179551303547, "grad_norm": 55.9153938293457, "learning_rate": 9.513226931201642e-06, "loss": 30.1076, "step": 113730 }, { "epoch": 0.2297619961457193, "grad_norm": 522.5823974609375, "learning_rate": 9.513076687473568e-06, "loss": 31.5706, "step": 113740 }, { "epoch": 0.22978219677840309, "grad_norm": 474.190185546875, "learning_rate": 9.512926421749305e-06, "loss": 28.7402, "step": 113750 }, { "epoch": 0.2298023974110869, "grad_norm": 568.06005859375, "learning_rate": 9.512776134029585e-06, "loss": 26.3172, "step": 113760 }, { "epoch": 0.22982259804377073, "grad_norm": 829.3409423828125, "learning_rate": 9.512625824315142e-06, "loss": 25.2735, "step": 113770 }, { "epoch": 0.22984279867645455, "grad_norm": 744.4042358398438, "learning_rate": 9.512475492606707e-06, "loss": 31.0177, "step": 113780 }, { "epoch": 0.22986299930913837, "grad_norm": 757.982177734375, "learning_rate": 9.512325138905015e-06, "loss": 19.734, "step": 113790 }, { "epoch": 0.22988319994182219, "grad_norm": 524.1359252929688, "learning_rate": 9.512174763210798e-06, "loss": 19.0216, "step": 113800 }, { "epoch": 0.229903400574506, "grad_norm": 266.0520324707031, "learning_rate": 9.512024365524788e-06, "loss": 29.473, "step": 113810 }, { "epoch": 0.2299236012071898, "grad_norm": 283.6138916015625, "learning_rate": 9.511873945847718e-06, "loss": 20.3686, "step": 113820 }, { "epoch": 0.22994380183987362, "grad_norm": 327.4731750488281, "learning_rate": 9.511723504180321e-06, "loss": 10.5312, "step": 113830 }, { "epoch": 0.22996400247255744, "grad_norm": 85.706298828125, "learning_rate": 9.511573040523332e-06, "loss": 29.0429, "step": 113840 }, { "epoch": 0.22998420310524126, "grad_norm": 209.13125610351562, "learning_rate": 9.511422554877482e-06, "loss": 52.1279, "step": 113850 }, { "epoch": 0.23000440373792508, "grad_norm": 306.1725158691406, "learning_rate": 9.511272047243507e-06, "loss": 23.8104, "step": 113860 }, { "epoch": 0.2300246043706089, "grad_norm": 101.19133758544922, "learning_rate": 9.51112151762214e-06, "loss": 23.7251, "step": 113870 }, { "epoch": 0.2300448050032927, "grad_norm": 710.2012329101562, "learning_rate": 9.510970966014112e-06, "loss": 20.1284, "step": 113880 }, { "epoch": 0.2300650056359765, "grad_norm": 807.711181640625, "learning_rate": 9.51082039242016e-06, "loss": 16.4794, "step": 113890 }, { "epoch": 0.23008520626866033, "grad_norm": 560.6378784179688, "learning_rate": 9.510669796841014e-06, "loss": 38.8351, "step": 113900 }, { "epoch": 0.23010540690134415, "grad_norm": 3355.491943359375, "learning_rate": 9.510519179277414e-06, "loss": 40.4513, "step": 113910 }, { "epoch": 0.23012560753402797, "grad_norm": 583.879638671875, "learning_rate": 9.510368539730089e-06, "loss": 26.1312, "step": 113920 }, { "epoch": 0.2301458081667118, "grad_norm": 146.33290100097656, "learning_rate": 9.510217878199773e-06, "loss": 30.3424, "step": 113930 }, { "epoch": 0.2301660087993956, "grad_norm": 609.0533447265625, "learning_rate": 9.510067194687205e-06, "loss": 26.2492, "step": 113940 }, { "epoch": 0.2301862094320794, "grad_norm": 240.16952514648438, "learning_rate": 9.509916489193114e-06, "loss": 28.5853, "step": 113950 }, { "epoch": 0.23020641006476322, "grad_norm": 862.90234375, "learning_rate": 9.50976576171824e-06, "loss": 21.525, "step": 113960 }, { "epoch": 0.23022661069744704, "grad_norm": 549.5601806640625, "learning_rate": 9.509615012263311e-06, "loss": 21.0239, "step": 113970 }, { "epoch": 0.23024681133013086, "grad_norm": 185.5392303466797, "learning_rate": 9.509464240829067e-06, "loss": 23.9225, "step": 113980 }, { "epoch": 0.23026701196281468, "grad_norm": 881.9199829101562, "learning_rate": 9.509313447416241e-06, "loss": 22.6354, "step": 113990 }, { "epoch": 0.2302872125954985, "grad_norm": 229.3199462890625, "learning_rate": 9.50916263202557e-06, "loss": 15.036, "step": 114000 }, { "epoch": 0.2303074132281823, "grad_norm": 208.56417846679688, "learning_rate": 9.509011794657785e-06, "loss": 18.8227, "step": 114010 }, { "epoch": 0.23032761386086611, "grad_norm": 432.16339111328125, "learning_rate": 9.508860935313623e-06, "loss": 19.2313, "step": 114020 }, { "epoch": 0.23034781449354993, "grad_norm": 704.3341064453125, "learning_rate": 9.508710053993822e-06, "loss": 25.9992, "step": 114030 }, { "epoch": 0.23036801512623375, "grad_norm": 382.67144775390625, "learning_rate": 9.508559150699115e-06, "loss": 12.15, "step": 114040 }, { "epoch": 0.23038821575891757, "grad_norm": 115.54539489746094, "learning_rate": 9.508408225430237e-06, "loss": 25.6531, "step": 114050 }, { "epoch": 0.2304084163916014, "grad_norm": 557.89892578125, "learning_rate": 9.508257278187923e-06, "loss": 25.4176, "step": 114060 }, { "epoch": 0.2304286170242852, "grad_norm": 154.92672729492188, "learning_rate": 9.50810630897291e-06, "loss": 14.019, "step": 114070 }, { "epoch": 0.230448817656969, "grad_norm": 530.8345336914062, "learning_rate": 9.507955317785935e-06, "loss": 21.4318, "step": 114080 }, { "epoch": 0.23046901828965283, "grad_norm": 230.95797729492188, "learning_rate": 9.50780430462773e-06, "loss": 28.5689, "step": 114090 }, { "epoch": 0.23048921892233665, "grad_norm": 389.99969482421875, "learning_rate": 9.507653269499035e-06, "loss": 27.033, "step": 114100 }, { "epoch": 0.23050941955502047, "grad_norm": 452.2672424316406, "learning_rate": 9.507502212400585e-06, "loss": 25.4473, "step": 114110 }, { "epoch": 0.2305296201877043, "grad_norm": 111.10558319091797, "learning_rate": 9.507351133333116e-06, "loss": 23.7443, "step": 114120 }, { "epoch": 0.2305498208203881, "grad_norm": 7.091848850250244, "learning_rate": 9.507200032297364e-06, "loss": 42.0829, "step": 114130 }, { "epoch": 0.2305700214530719, "grad_norm": 282.49462890625, "learning_rate": 9.507048909294065e-06, "loss": 30.353, "step": 114140 }, { "epoch": 0.23059022208575572, "grad_norm": 221.93344116210938, "learning_rate": 9.506897764323957e-06, "loss": 29.1922, "step": 114150 }, { "epoch": 0.23061042271843954, "grad_norm": 432.72589111328125, "learning_rate": 9.506746597387776e-06, "loss": 23.8334, "step": 114160 }, { "epoch": 0.23063062335112336, "grad_norm": 385.8450012207031, "learning_rate": 9.50659540848626e-06, "loss": 13.5499, "step": 114170 }, { "epoch": 0.23065082398380718, "grad_norm": 153.2083740234375, "learning_rate": 9.506444197620142e-06, "loss": 13.4851, "step": 114180 }, { "epoch": 0.230671024616491, "grad_norm": 226.3197479248047, "learning_rate": 9.506292964790162e-06, "loss": 15.1022, "step": 114190 }, { "epoch": 0.2306912252491748, "grad_norm": 421.44403076171875, "learning_rate": 9.506141709997058e-06, "loss": 17.9024, "step": 114200 }, { "epoch": 0.2307114258818586, "grad_norm": 657.6426391601562, "learning_rate": 9.505990433241565e-06, "loss": 17.4907, "step": 114210 }, { "epoch": 0.23073162651454243, "grad_norm": 714.735595703125, "learning_rate": 9.50583913452442e-06, "loss": 33.5603, "step": 114220 }, { "epoch": 0.23075182714722625, "grad_norm": 419.1542053222656, "learning_rate": 9.505687813846363e-06, "loss": 8.4052, "step": 114230 }, { "epoch": 0.23077202777991007, "grad_norm": 719.9962158203125, "learning_rate": 9.50553647120813e-06, "loss": 36.5737, "step": 114240 }, { "epoch": 0.2307922284125939, "grad_norm": 463.834228515625, "learning_rate": 9.505385106610458e-06, "loss": 25.4381, "step": 114250 }, { "epoch": 0.2308124290452777, "grad_norm": 253.3015899658203, "learning_rate": 9.505233720054086e-06, "loss": 25.6091, "step": 114260 }, { "epoch": 0.2308326296779615, "grad_norm": 309.06396484375, "learning_rate": 9.505082311539752e-06, "loss": 32.5767, "step": 114270 }, { "epoch": 0.23085283031064532, "grad_norm": 387.4048156738281, "learning_rate": 9.504930881068193e-06, "loss": 33.0901, "step": 114280 }, { "epoch": 0.23087303094332914, "grad_norm": 509.7787170410156, "learning_rate": 9.504779428640146e-06, "loss": 15.3536, "step": 114290 }, { "epoch": 0.23089323157601296, "grad_norm": 419.689208984375, "learning_rate": 9.504627954256352e-06, "loss": 22.8543, "step": 114300 }, { "epoch": 0.23091343220869678, "grad_norm": 524.3058471679688, "learning_rate": 9.504476457917546e-06, "loss": 21.437, "step": 114310 }, { "epoch": 0.2309336328413806, "grad_norm": 582.3783569335938, "learning_rate": 9.504324939624469e-06, "loss": 27.2149, "step": 114320 }, { "epoch": 0.2309538334740644, "grad_norm": 572.2953491210938, "learning_rate": 9.504173399377858e-06, "loss": 13.3763, "step": 114330 }, { "epoch": 0.23097403410674822, "grad_norm": 831.9983520507812, "learning_rate": 9.504021837178452e-06, "loss": 20.36, "step": 114340 }, { "epoch": 0.23099423473943204, "grad_norm": 1011.7919921875, "learning_rate": 9.503870253026992e-06, "loss": 37.3983, "step": 114350 }, { "epoch": 0.23101443537211586, "grad_norm": 662.2227783203125, "learning_rate": 9.503718646924211e-06, "loss": 24.7651, "step": 114360 }, { "epoch": 0.23103463600479968, "grad_norm": 674.4508056640625, "learning_rate": 9.503567018870855e-06, "loss": 25.7229, "step": 114370 }, { "epoch": 0.2310548366374835, "grad_norm": 447.40557861328125, "learning_rate": 9.503415368867658e-06, "loss": 16.263, "step": 114380 }, { "epoch": 0.2310750372701673, "grad_norm": 412.7706298828125, "learning_rate": 9.503263696915361e-06, "loss": 20.0842, "step": 114390 }, { "epoch": 0.2310952379028511, "grad_norm": 934.6969604492188, "learning_rate": 9.503112003014702e-06, "loss": 33.9432, "step": 114400 }, { "epoch": 0.23111543853553493, "grad_norm": 325.6151123046875, "learning_rate": 9.502960287166423e-06, "loss": 20.7496, "step": 114410 }, { "epoch": 0.23113563916821875, "grad_norm": 72.51821899414062, "learning_rate": 9.50280854937126e-06, "loss": 21.2566, "step": 114420 }, { "epoch": 0.23115583980090257, "grad_norm": 172.7029266357422, "learning_rate": 9.502656789629956e-06, "loss": 18.6805, "step": 114430 }, { "epoch": 0.2311760404335864, "grad_norm": 1268.697265625, "learning_rate": 9.502505007943248e-06, "loss": 28.5747, "step": 114440 }, { "epoch": 0.2311962410662702, "grad_norm": 383.0333251953125, "learning_rate": 9.502353204311876e-06, "loss": 25.1674, "step": 114450 }, { "epoch": 0.231216441698954, "grad_norm": 565.036865234375, "learning_rate": 9.50220137873658e-06, "loss": 26.0444, "step": 114460 }, { "epoch": 0.23123664233163782, "grad_norm": 174.50863647460938, "learning_rate": 9.502049531218103e-06, "loss": 25.0231, "step": 114470 }, { "epoch": 0.23125684296432164, "grad_norm": 396.9252014160156, "learning_rate": 9.501897661757182e-06, "loss": 27.3269, "step": 114480 }, { "epoch": 0.23127704359700546, "grad_norm": 202.41270446777344, "learning_rate": 9.501745770354555e-06, "loss": 17.6822, "step": 114490 }, { "epoch": 0.23129724422968928, "grad_norm": 246.49981689453125, "learning_rate": 9.501593857010968e-06, "loss": 20.3343, "step": 114500 }, { "epoch": 0.2313174448623731, "grad_norm": 0.0, "learning_rate": 9.501441921727158e-06, "loss": 20.5623, "step": 114510 }, { "epoch": 0.2313376454950569, "grad_norm": 356.1559143066406, "learning_rate": 9.501289964503866e-06, "loss": 13.547, "step": 114520 }, { "epoch": 0.2313578461277407, "grad_norm": 146.77247619628906, "learning_rate": 9.501137985341832e-06, "loss": 23.4428, "step": 114530 }, { "epoch": 0.23137804676042453, "grad_norm": 193.9114227294922, "learning_rate": 9.500985984241797e-06, "loss": 40.4239, "step": 114540 }, { "epoch": 0.23139824739310835, "grad_norm": 916.8033447265625, "learning_rate": 9.500833961204504e-06, "loss": 27.8038, "step": 114550 }, { "epoch": 0.23141844802579217, "grad_norm": 481.2568359375, "learning_rate": 9.500681916230692e-06, "loss": 16.5165, "step": 114560 }, { "epoch": 0.231438648658476, "grad_norm": 603.3992309570312, "learning_rate": 9.500529849321101e-06, "loss": 30.3873, "step": 114570 }, { "epoch": 0.2314588492911598, "grad_norm": 615.4487915039062, "learning_rate": 9.500377760476473e-06, "loss": 29.7213, "step": 114580 }, { "epoch": 0.2314790499238436, "grad_norm": 243.64566040039062, "learning_rate": 9.500225649697551e-06, "loss": 15.1665, "step": 114590 }, { "epoch": 0.23149925055652743, "grad_norm": 710.8563232421875, "learning_rate": 9.500073516985074e-06, "loss": 35.1212, "step": 114600 }, { "epoch": 0.23151945118921125, "grad_norm": 417.65704345703125, "learning_rate": 9.499921362339786e-06, "loss": 23.8463, "step": 114610 }, { "epoch": 0.23153965182189506, "grad_norm": 76.81388854980469, "learning_rate": 9.499769185762425e-06, "loss": 34.4666, "step": 114620 }, { "epoch": 0.23155985245457888, "grad_norm": 228.5997314453125, "learning_rate": 9.499616987253734e-06, "loss": 39.6991, "step": 114630 }, { "epoch": 0.2315800530872627, "grad_norm": 573.35498046875, "learning_rate": 9.49946476681446e-06, "loss": 34.7777, "step": 114640 }, { "epoch": 0.2316002537199465, "grad_norm": 261.21356201171875, "learning_rate": 9.499312524445337e-06, "loss": 10.3347, "step": 114650 }, { "epoch": 0.23162045435263032, "grad_norm": 0.0, "learning_rate": 9.499160260147111e-06, "loss": 21.4552, "step": 114660 }, { "epoch": 0.23164065498531414, "grad_norm": 466.75244140625, "learning_rate": 9.499007973920523e-06, "loss": 24.9358, "step": 114670 }, { "epoch": 0.23166085561799796, "grad_norm": 160.8385772705078, "learning_rate": 9.498855665766316e-06, "loss": 15.6375, "step": 114680 }, { "epoch": 0.23168105625068178, "grad_norm": 639.6903076171875, "learning_rate": 9.498703335685233e-06, "loss": 28.7835, "step": 114690 }, { "epoch": 0.2317012568833656, "grad_norm": 398.8724060058594, "learning_rate": 9.498550983678016e-06, "loss": 25.5717, "step": 114700 }, { "epoch": 0.2317214575160494, "grad_norm": 409.29437255859375, "learning_rate": 9.498398609745405e-06, "loss": 27.9308, "step": 114710 }, { "epoch": 0.2317416581487332, "grad_norm": 391.4109802246094, "learning_rate": 9.498246213888148e-06, "loss": 16.9435, "step": 114720 }, { "epoch": 0.23176185878141703, "grad_norm": 312.2937316894531, "learning_rate": 9.498093796106982e-06, "loss": 31.0777, "step": 114730 }, { "epoch": 0.23178205941410085, "grad_norm": 407.076171875, "learning_rate": 9.497941356402653e-06, "loss": 16.9753, "step": 114740 }, { "epoch": 0.23180226004678467, "grad_norm": 283.9637756347656, "learning_rate": 9.497788894775903e-06, "loss": 19.82, "step": 114750 }, { "epoch": 0.2318224606794685, "grad_norm": 124.15992736816406, "learning_rate": 9.497636411227476e-06, "loss": 23.3035, "step": 114760 }, { "epoch": 0.2318426613121523, "grad_norm": 662.7410888671875, "learning_rate": 9.497483905758114e-06, "loss": 27.8046, "step": 114770 }, { "epoch": 0.2318628619448361, "grad_norm": 737.6326293945312, "learning_rate": 9.49733137836856e-06, "loss": 24.7341, "step": 114780 }, { "epoch": 0.23188306257751992, "grad_norm": 245.0981903076172, "learning_rate": 9.497178829059561e-06, "loss": 25.5373, "step": 114790 }, { "epoch": 0.23190326321020374, "grad_norm": 300.2393798828125, "learning_rate": 9.497026257831856e-06, "loss": 23.1748, "step": 114800 }, { "epoch": 0.23192346384288756, "grad_norm": 321.494873046875, "learning_rate": 9.49687366468619e-06, "loss": 40.4303, "step": 114810 }, { "epoch": 0.23194366447557138, "grad_norm": 557.5247802734375, "learning_rate": 9.49672104962331e-06, "loss": 26.8173, "step": 114820 }, { "epoch": 0.2319638651082552, "grad_norm": 1040.2940673828125, "learning_rate": 9.496568412643954e-06, "loss": 45.3982, "step": 114830 }, { "epoch": 0.231984065740939, "grad_norm": 247.10780334472656, "learning_rate": 9.49641575374887e-06, "loss": 17.9488, "step": 114840 }, { "epoch": 0.23200426637362281, "grad_norm": 348.88427734375, "learning_rate": 9.496263072938801e-06, "loss": 16.5253, "step": 114850 }, { "epoch": 0.23202446700630663, "grad_norm": 170.89480590820312, "learning_rate": 9.49611037021449e-06, "loss": 18.8638, "step": 114860 }, { "epoch": 0.23204466763899045, "grad_norm": 639.7593383789062, "learning_rate": 9.495957645576685e-06, "loss": 32.4327, "step": 114870 }, { "epoch": 0.23206486827167427, "grad_norm": 160.24334716796875, "learning_rate": 9.495804899026126e-06, "loss": 21.9294, "step": 114880 }, { "epoch": 0.2320850689043581, "grad_norm": 84.15010070800781, "learning_rate": 9.49565213056356e-06, "loss": 25.3117, "step": 114890 }, { "epoch": 0.23210526953704191, "grad_norm": 325.7503967285156, "learning_rate": 9.495499340189729e-06, "loss": 37.906, "step": 114900 }, { "epoch": 0.2321254701697257, "grad_norm": 611.479248046875, "learning_rate": 9.495346527905381e-06, "loss": 32.5419, "step": 114910 }, { "epoch": 0.23214567080240953, "grad_norm": 536.5656127929688, "learning_rate": 9.495193693711259e-06, "loss": 21.853, "step": 114920 }, { "epoch": 0.23216587143509335, "grad_norm": 714.2391357421875, "learning_rate": 9.495040837608107e-06, "loss": 37.9951, "step": 114930 }, { "epoch": 0.23218607206777717, "grad_norm": 343.0403137207031, "learning_rate": 9.494887959596672e-06, "loss": 24.4986, "step": 114940 }, { "epoch": 0.23220627270046099, "grad_norm": 463.78173828125, "learning_rate": 9.4947350596777e-06, "loss": 22.8133, "step": 114950 }, { "epoch": 0.2322264733331448, "grad_norm": 211.89515686035156, "learning_rate": 9.494582137851932e-06, "loss": 23.9647, "step": 114960 }, { "epoch": 0.2322466739658286, "grad_norm": 44.057308197021484, "learning_rate": 9.494429194120117e-06, "loss": 26.3957, "step": 114970 }, { "epoch": 0.23226687459851242, "grad_norm": 346.5908203125, "learning_rate": 9.494276228482998e-06, "loss": 21.106, "step": 114980 }, { "epoch": 0.23228707523119624, "grad_norm": 586.0257568359375, "learning_rate": 9.494123240941321e-06, "loss": 25.8126, "step": 114990 }, { "epoch": 0.23230727586388006, "grad_norm": 766.7527465820312, "learning_rate": 9.493970231495836e-06, "loss": 28.4578, "step": 115000 }, { "epoch": 0.23232747649656388, "grad_norm": 518.5211181640625, "learning_rate": 9.493817200147282e-06, "loss": 32.7023, "step": 115010 }, { "epoch": 0.2323476771292477, "grad_norm": 354.9346008300781, "learning_rate": 9.493664146896411e-06, "loss": 20.4563, "step": 115020 }, { "epoch": 0.2323678777619315, "grad_norm": 339.10443115234375, "learning_rate": 9.493511071743963e-06, "loss": 24.1566, "step": 115030 }, { "epoch": 0.2323880783946153, "grad_norm": 731.3485107421875, "learning_rate": 9.493357974690689e-06, "loss": 30.5574, "step": 115040 }, { "epoch": 0.23240827902729913, "grad_norm": 395.5550231933594, "learning_rate": 9.493204855737332e-06, "loss": 27.9317, "step": 115050 }, { "epoch": 0.23242847965998295, "grad_norm": 115.83318328857422, "learning_rate": 9.49305171488464e-06, "loss": 15.4804, "step": 115060 }, { "epoch": 0.23244868029266677, "grad_norm": 0.0, "learning_rate": 9.492898552133358e-06, "loss": 13.9627, "step": 115070 }, { "epoch": 0.2324688809253506, "grad_norm": 338.2238464355469, "learning_rate": 9.492745367484234e-06, "loss": 10.4695, "step": 115080 }, { "epoch": 0.2324890815580344, "grad_norm": 604.24560546875, "learning_rate": 9.492592160938014e-06, "loss": 18.8031, "step": 115090 }, { "epoch": 0.2325092821907182, "grad_norm": 497.5995178222656, "learning_rate": 9.492438932495444e-06, "loss": 15.7976, "step": 115100 }, { "epoch": 0.23252948282340202, "grad_norm": 740.5108032226562, "learning_rate": 9.492285682157274e-06, "loss": 24.4216, "step": 115110 }, { "epoch": 0.23254968345608584, "grad_norm": 571.9484252929688, "learning_rate": 9.492132409924247e-06, "loss": 33.1764, "step": 115120 }, { "epoch": 0.23256988408876966, "grad_norm": 461.05108642578125, "learning_rate": 9.49197911579711e-06, "loss": 24.2657, "step": 115130 }, { "epoch": 0.23259008472145348, "grad_norm": 32.682518005371094, "learning_rate": 9.491825799776613e-06, "loss": 29.9697, "step": 115140 }, { "epoch": 0.2326102853541373, "grad_norm": 244.05482482910156, "learning_rate": 9.491672461863501e-06, "loss": 31.8098, "step": 115150 }, { "epoch": 0.2326304859868211, "grad_norm": 21.306440353393555, "learning_rate": 9.491519102058523e-06, "loss": 33.1123, "step": 115160 }, { "epoch": 0.23265068661950492, "grad_norm": 324.6059265136719, "learning_rate": 9.491365720362427e-06, "loss": 18.2033, "step": 115170 }, { "epoch": 0.23267088725218874, "grad_norm": 190.6021270751953, "learning_rate": 9.491212316775956e-06, "loss": 22.1311, "step": 115180 }, { "epoch": 0.23269108788487256, "grad_norm": 506.2206115722656, "learning_rate": 9.491058891299863e-06, "loss": 29.1972, "step": 115190 }, { "epoch": 0.23271128851755638, "grad_norm": 89.00489044189453, "learning_rate": 9.490905443934892e-06, "loss": 24.539, "step": 115200 }, { "epoch": 0.2327314891502402, "grad_norm": 453.9803771972656, "learning_rate": 9.490751974681795e-06, "loss": 14.0995, "step": 115210 }, { "epoch": 0.23275168978292402, "grad_norm": 576.4542236328125, "learning_rate": 9.490598483541316e-06, "loss": 38.7763, "step": 115220 }, { "epoch": 0.2327718904156078, "grad_norm": 409.8153381347656, "learning_rate": 9.490444970514205e-06, "loss": 21.8848, "step": 115230 }, { "epoch": 0.23279209104829163, "grad_norm": 360.01007080078125, "learning_rate": 9.49029143560121e-06, "loss": 29.0496, "step": 115240 }, { "epoch": 0.23281229168097545, "grad_norm": 32.68125534057617, "learning_rate": 9.490137878803078e-06, "loss": 19.6349, "step": 115250 }, { "epoch": 0.23283249231365927, "grad_norm": 586.7561645507812, "learning_rate": 9.48998430012056e-06, "loss": 16.7389, "step": 115260 }, { "epoch": 0.2328526929463431, "grad_norm": 709.1950073242188, "learning_rate": 9.489830699554403e-06, "loss": 26.743, "step": 115270 }, { "epoch": 0.2328728935790269, "grad_norm": 39.28255844116211, "learning_rate": 9.489677077105355e-06, "loss": 19.1558, "step": 115280 }, { "epoch": 0.2328930942117107, "grad_norm": 488.423095703125, "learning_rate": 9.489523432774165e-06, "loss": 28.9454, "step": 115290 }, { "epoch": 0.23291329484439452, "grad_norm": 145.56295776367188, "learning_rate": 9.489369766561584e-06, "loss": 22.6579, "step": 115300 }, { "epoch": 0.23293349547707834, "grad_norm": 838.90966796875, "learning_rate": 9.489216078468359e-06, "loss": 13.8322, "step": 115310 }, { "epoch": 0.23295369610976216, "grad_norm": 531.090576171875, "learning_rate": 9.48906236849524e-06, "loss": 46.4313, "step": 115320 }, { "epoch": 0.23297389674244598, "grad_norm": 238.19915771484375, "learning_rate": 9.488908636642972e-06, "loss": 17.5851, "step": 115330 }, { "epoch": 0.2329940973751298, "grad_norm": 351.15106201171875, "learning_rate": 9.48875488291231e-06, "loss": 27.8192, "step": 115340 }, { "epoch": 0.2330142980078136, "grad_norm": 323.8350524902344, "learning_rate": 9.488601107304001e-06, "loss": 25.9669, "step": 115350 }, { "epoch": 0.2330344986404974, "grad_norm": 376.9538879394531, "learning_rate": 9.488447309818795e-06, "loss": 37.1908, "step": 115360 }, { "epoch": 0.23305469927318123, "grad_norm": 79.04154968261719, "learning_rate": 9.488293490457441e-06, "loss": 16.2454, "step": 115370 }, { "epoch": 0.23307489990586505, "grad_norm": 664.3616943359375, "learning_rate": 9.48813964922069e-06, "loss": 34.9776, "step": 115380 }, { "epoch": 0.23309510053854887, "grad_norm": 836.3585205078125, "learning_rate": 9.487985786109288e-06, "loss": 18.6744, "step": 115390 }, { "epoch": 0.2331153011712327, "grad_norm": 612.2698364257812, "learning_rate": 9.487831901123989e-06, "loss": 24.8608, "step": 115400 }, { "epoch": 0.2331355018039165, "grad_norm": 100.1115493774414, "learning_rate": 9.48767799426554e-06, "loss": 11.9513, "step": 115410 }, { "epoch": 0.2331557024366003, "grad_norm": 516.9166259765625, "learning_rate": 9.487524065534696e-06, "loss": 16.9859, "step": 115420 }, { "epoch": 0.23317590306928412, "grad_norm": 773.397705078125, "learning_rate": 9.487370114932201e-06, "loss": 24.2073, "step": 115430 }, { "epoch": 0.23319610370196794, "grad_norm": 295.19964599609375, "learning_rate": 9.48721614245881e-06, "loss": 35.1301, "step": 115440 }, { "epoch": 0.23321630433465176, "grad_norm": 247.9727020263672, "learning_rate": 9.487062148115272e-06, "loss": 28.4932, "step": 115450 }, { "epoch": 0.23323650496733558, "grad_norm": 312.9109802246094, "learning_rate": 9.486908131902336e-06, "loss": 26.1924, "step": 115460 }, { "epoch": 0.2332567056000194, "grad_norm": 381.10455322265625, "learning_rate": 9.486754093820755e-06, "loss": 20.8769, "step": 115470 }, { "epoch": 0.2332769062327032, "grad_norm": 469.810302734375, "learning_rate": 9.486600033871279e-06, "loss": 46.3613, "step": 115480 }, { "epoch": 0.23329710686538702, "grad_norm": 277.09747314453125, "learning_rate": 9.486445952054658e-06, "loss": 15.3754, "step": 115490 }, { "epoch": 0.23331730749807084, "grad_norm": 954.3956298828125, "learning_rate": 9.486291848371642e-06, "loss": 34.4752, "step": 115500 }, { "epoch": 0.23333750813075466, "grad_norm": 231.85748291015625, "learning_rate": 9.486137722822986e-06, "loss": 21.8481, "step": 115510 }, { "epoch": 0.23335770876343848, "grad_norm": 199.6193389892578, "learning_rate": 9.48598357540944e-06, "loss": 11.9974, "step": 115520 }, { "epoch": 0.2333779093961223, "grad_norm": 422.9839172363281, "learning_rate": 9.48582940613175e-06, "loss": 27.3381, "step": 115530 }, { "epoch": 0.2333981100288061, "grad_norm": 173.09454345703125, "learning_rate": 9.485675214990673e-06, "loss": 20.8606, "step": 115540 }, { "epoch": 0.2334183106614899, "grad_norm": 368.62884521484375, "learning_rate": 9.485521001986961e-06, "loss": 24.6399, "step": 115550 }, { "epoch": 0.23343851129417373, "grad_norm": 1182.2962646484375, "learning_rate": 9.485366767121363e-06, "loss": 54.2244, "step": 115560 }, { "epoch": 0.23345871192685755, "grad_norm": 276.2958679199219, "learning_rate": 9.48521251039463e-06, "loss": 27.9185, "step": 115570 }, { "epoch": 0.23347891255954137, "grad_norm": 435.47625732421875, "learning_rate": 9.485058231807515e-06, "loss": 21.1052, "step": 115580 }, { "epoch": 0.2334991131922252, "grad_norm": 682.1556396484375, "learning_rate": 9.484903931360772e-06, "loss": 39.5824, "step": 115590 }, { "epoch": 0.233519313824909, "grad_norm": 487.5298156738281, "learning_rate": 9.484749609055151e-06, "loss": 24.5813, "step": 115600 }, { "epoch": 0.2335395144575928, "grad_norm": 287.06866455078125, "learning_rate": 9.484595264891403e-06, "loss": 23.584, "step": 115610 }, { "epoch": 0.23355971509027662, "grad_norm": 1571.0631103515625, "learning_rate": 9.484440898870282e-06, "loss": 31.6812, "step": 115620 }, { "epoch": 0.23357991572296044, "grad_norm": 50.13099670410156, "learning_rate": 9.484286510992541e-06, "loss": 17.1068, "step": 115630 }, { "epoch": 0.23360011635564426, "grad_norm": 374.7189636230469, "learning_rate": 9.48413210125893e-06, "loss": 26.3826, "step": 115640 }, { "epoch": 0.23362031698832808, "grad_norm": 165.21685791015625, "learning_rate": 9.483977669670204e-06, "loss": 17.2194, "step": 115650 }, { "epoch": 0.2336405176210119, "grad_norm": 311.05792236328125, "learning_rate": 9.483823216227115e-06, "loss": 29.0539, "step": 115660 }, { "epoch": 0.2336607182536957, "grad_norm": 547.1766357421875, "learning_rate": 9.483668740930413e-06, "loss": 25.1947, "step": 115670 }, { "epoch": 0.2336809188863795, "grad_norm": 82.5296630859375, "learning_rate": 9.483514243780856e-06, "loss": 17.5631, "step": 115680 }, { "epoch": 0.23370111951906333, "grad_norm": 161.6363525390625, "learning_rate": 9.483359724779194e-06, "loss": 35.9948, "step": 115690 }, { "epoch": 0.23372132015174715, "grad_norm": 442.1202697753906, "learning_rate": 9.48320518392618e-06, "loss": 30.1484, "step": 115700 }, { "epoch": 0.23374152078443097, "grad_norm": 950.1380615234375, "learning_rate": 9.483050621222567e-06, "loss": 19.3751, "step": 115710 }, { "epoch": 0.2337617214171148, "grad_norm": 682.4646606445312, "learning_rate": 9.482896036669111e-06, "loss": 50.0991, "step": 115720 }, { "epoch": 0.2337819220497986, "grad_norm": 361.4498596191406, "learning_rate": 9.482741430266563e-06, "loss": 25.1781, "step": 115730 }, { "epoch": 0.2338021226824824, "grad_norm": 712.1513061523438, "learning_rate": 9.482586802015673e-06, "loss": 20.1124, "step": 115740 }, { "epoch": 0.23382232331516623, "grad_norm": 644.6577758789062, "learning_rate": 9.482432151917204e-06, "loss": 30.7717, "step": 115750 }, { "epoch": 0.23384252394785005, "grad_norm": 428.7471618652344, "learning_rate": 9.482277479971902e-06, "loss": 20.5207, "step": 115760 }, { "epoch": 0.23386272458053387, "grad_norm": 1580.9759521484375, "learning_rate": 9.482122786180524e-06, "loss": 39.2061, "step": 115770 }, { "epoch": 0.23388292521321769, "grad_norm": 819.4335327148438, "learning_rate": 9.481968070543822e-06, "loss": 23.9595, "step": 115780 }, { "epoch": 0.2339031258459015, "grad_norm": 222.69732666015625, "learning_rate": 9.48181333306255e-06, "loss": 22.0195, "step": 115790 }, { "epoch": 0.2339233264785853, "grad_norm": 786.6558837890625, "learning_rate": 9.481658573737465e-06, "loss": 19.2621, "step": 115800 }, { "epoch": 0.23394352711126912, "grad_norm": 129.64132690429688, "learning_rate": 9.48150379256932e-06, "loss": 25.3186, "step": 115810 }, { "epoch": 0.23396372774395294, "grad_norm": 769.839599609375, "learning_rate": 9.48134898955887e-06, "loss": 19.8686, "step": 115820 }, { "epoch": 0.23398392837663676, "grad_norm": 260.1893615722656, "learning_rate": 9.481194164706866e-06, "loss": 26.4367, "step": 115830 }, { "epoch": 0.23400412900932058, "grad_norm": 141.46632385253906, "learning_rate": 9.481039318014068e-06, "loss": 20.8942, "step": 115840 }, { "epoch": 0.2340243296420044, "grad_norm": 426.383056640625, "learning_rate": 9.480884449481224e-06, "loss": 30.8326, "step": 115850 }, { "epoch": 0.2340445302746882, "grad_norm": 944.14208984375, "learning_rate": 9.480729559109096e-06, "loss": 45.3746, "step": 115860 }, { "epoch": 0.234064730907372, "grad_norm": 54.95215606689453, "learning_rate": 9.480574646898434e-06, "loss": 13.8032, "step": 115870 }, { "epoch": 0.23408493154005583, "grad_norm": 1639.69677734375, "learning_rate": 9.480419712849996e-06, "loss": 21.4958, "step": 115880 }, { "epoch": 0.23410513217273965, "grad_norm": 222.08889770507812, "learning_rate": 9.480264756964535e-06, "loss": 23.5316, "step": 115890 }, { "epoch": 0.23412533280542347, "grad_norm": 242.14276123046875, "learning_rate": 9.480109779242805e-06, "loss": 16.9658, "step": 115900 }, { "epoch": 0.2341455334381073, "grad_norm": 95.41576385498047, "learning_rate": 9.479954779685566e-06, "loss": 16.2513, "step": 115910 }, { "epoch": 0.2341657340707911, "grad_norm": 364.3272399902344, "learning_rate": 9.47979975829357e-06, "loss": 19.5354, "step": 115920 }, { "epoch": 0.2341859347034749, "grad_norm": 590.6675415039062, "learning_rate": 9.479644715067572e-06, "loss": 16.4956, "step": 115930 }, { "epoch": 0.23420613533615872, "grad_norm": 351.85833740234375, "learning_rate": 9.479489650008331e-06, "loss": 32.4146, "step": 115940 }, { "epoch": 0.23422633596884254, "grad_norm": 371.4349670410156, "learning_rate": 9.4793345631166e-06, "loss": 14.2059, "step": 115950 }, { "epoch": 0.23424653660152636, "grad_norm": 194.46673583984375, "learning_rate": 9.479179454393135e-06, "loss": 15.5061, "step": 115960 }, { "epoch": 0.23426673723421018, "grad_norm": 223.16856384277344, "learning_rate": 9.479024323838694e-06, "loss": 29.0022, "step": 115970 }, { "epoch": 0.234286937866894, "grad_norm": 44.55643081665039, "learning_rate": 9.478869171454031e-06, "loss": 16.332, "step": 115980 }, { "epoch": 0.2343071384995778, "grad_norm": 376.872314453125, "learning_rate": 9.478713997239902e-06, "loss": 33.6666, "step": 115990 }, { "epoch": 0.23432733913226161, "grad_norm": 296.7268981933594, "learning_rate": 9.478558801197065e-06, "loss": 21.58, "step": 116000 }, { "epoch": 0.23434753976494543, "grad_norm": 426.90374755859375, "learning_rate": 9.478403583326275e-06, "loss": 16.5128, "step": 116010 }, { "epoch": 0.23436774039762925, "grad_norm": 207.19375610351562, "learning_rate": 9.47824834362829e-06, "loss": 16.7943, "step": 116020 }, { "epoch": 0.23438794103031307, "grad_norm": 73.24336242675781, "learning_rate": 9.478093082103865e-06, "loss": 30.4707, "step": 116030 }, { "epoch": 0.2344081416629969, "grad_norm": 968.002197265625, "learning_rate": 9.477937798753757e-06, "loss": 29.8604, "step": 116040 }, { "epoch": 0.23442834229568071, "grad_norm": 612.5117797851562, "learning_rate": 9.477782493578724e-06, "loss": 14.9429, "step": 116050 }, { "epoch": 0.2344485429283645, "grad_norm": 384.3153076171875, "learning_rate": 9.477627166579523e-06, "loss": 26.4373, "step": 116060 }, { "epoch": 0.23446874356104833, "grad_norm": 462.5488586425781, "learning_rate": 9.47747181775691e-06, "loss": 37.0143, "step": 116070 }, { "epoch": 0.23448894419373215, "grad_norm": 1343.555908203125, "learning_rate": 9.477316447111642e-06, "loss": 29.2987, "step": 116080 }, { "epoch": 0.23450914482641597, "grad_norm": 382.63250732421875, "learning_rate": 9.477161054644475e-06, "loss": 18.1374, "step": 116090 }, { "epoch": 0.2345293454590998, "grad_norm": 262.3546447753906, "learning_rate": 9.47700564035617e-06, "loss": 26.7119, "step": 116100 }, { "epoch": 0.2345495460917836, "grad_norm": 261.574951171875, "learning_rate": 9.476850204247483e-06, "loss": 43.5217, "step": 116110 }, { "epoch": 0.2345697467244674, "grad_norm": 491.40167236328125, "learning_rate": 9.47669474631917e-06, "loss": 18.6674, "step": 116120 }, { "epoch": 0.23458994735715122, "grad_norm": 467.8964538574219, "learning_rate": 9.476539266571988e-06, "loss": 21.3494, "step": 116130 }, { "epoch": 0.23461014798983504, "grad_norm": 394.40240478515625, "learning_rate": 9.4763837650067e-06, "loss": 32.0296, "step": 116140 }, { "epoch": 0.23463034862251886, "grad_norm": 301.9280090332031, "learning_rate": 9.476228241624059e-06, "loss": 28.1887, "step": 116150 }, { "epoch": 0.23465054925520268, "grad_norm": 350.1358947753906, "learning_rate": 9.476072696424825e-06, "loss": 29.1411, "step": 116160 }, { "epoch": 0.2346707498878865, "grad_norm": 184.35031127929688, "learning_rate": 9.475917129409755e-06, "loss": 24.4947, "step": 116170 }, { "epoch": 0.2346909505205703, "grad_norm": 555.5595703125, "learning_rate": 9.475761540579607e-06, "loss": 24.8404, "step": 116180 }, { "epoch": 0.2347111511532541, "grad_norm": 301.1944885253906, "learning_rate": 9.475605929935142e-06, "loss": 39.6237, "step": 116190 }, { "epoch": 0.23473135178593793, "grad_norm": 88.20915985107422, "learning_rate": 9.475450297477113e-06, "loss": 19.002, "step": 116200 }, { "epoch": 0.23475155241862175, "grad_norm": 466.5421447753906, "learning_rate": 9.475294643206285e-06, "loss": 15.9766, "step": 116210 }, { "epoch": 0.23477175305130557, "grad_norm": 6.774113655090332, "learning_rate": 9.475138967123414e-06, "loss": 12.3952, "step": 116220 }, { "epoch": 0.2347919536839894, "grad_norm": 230.72752380371094, "learning_rate": 9.474983269229256e-06, "loss": 12.4458, "step": 116230 }, { "epoch": 0.2348121543166732, "grad_norm": 268.2113037109375, "learning_rate": 9.474827549524574e-06, "loss": 14.7699, "step": 116240 }, { "epoch": 0.234832354949357, "grad_norm": 178.18630981445312, "learning_rate": 9.474671808010126e-06, "loss": 13.323, "step": 116250 }, { "epoch": 0.23485255558204082, "grad_norm": 458.60760498046875, "learning_rate": 9.47451604468667e-06, "loss": 19.6697, "step": 116260 }, { "epoch": 0.23487275621472464, "grad_norm": 317.4723205566406, "learning_rate": 9.474360259554965e-06, "loss": 22.8527, "step": 116270 }, { "epoch": 0.23489295684740846, "grad_norm": 272.6497802734375, "learning_rate": 9.47420445261577e-06, "loss": 26.309, "step": 116280 }, { "epoch": 0.23491315748009228, "grad_norm": 630.2426147460938, "learning_rate": 9.474048623869846e-06, "loss": 13.5902, "step": 116290 }, { "epoch": 0.2349333581127761, "grad_norm": 2414.823486328125, "learning_rate": 9.473892773317952e-06, "loss": 33.2801, "step": 116300 }, { "epoch": 0.2349535587454599, "grad_norm": 948.0327758789062, "learning_rate": 9.473736900960845e-06, "loss": 26.3757, "step": 116310 }, { "epoch": 0.23497375937814372, "grad_norm": 824.4536743164062, "learning_rate": 9.47358100679929e-06, "loss": 20.7127, "step": 116320 }, { "epoch": 0.23499396001082754, "grad_norm": 274.6886901855469, "learning_rate": 9.473425090834041e-06, "loss": 28.9095, "step": 116330 }, { "epoch": 0.23501416064351136, "grad_norm": 557.03466796875, "learning_rate": 9.473269153065863e-06, "loss": 34.9061, "step": 116340 }, { "epoch": 0.23503436127619518, "grad_norm": 184.921142578125, "learning_rate": 9.473113193495513e-06, "loss": 24.6152, "step": 116350 }, { "epoch": 0.235054561908879, "grad_norm": 229.9757843017578, "learning_rate": 9.472957212123751e-06, "loss": 10.6152, "step": 116360 }, { "epoch": 0.23507476254156282, "grad_norm": 450.23809814453125, "learning_rate": 9.472801208951339e-06, "loss": 17.6649, "step": 116370 }, { "epoch": 0.2350949631742466, "grad_norm": 337.5602722167969, "learning_rate": 9.472645183979037e-06, "loss": 25.5706, "step": 116380 }, { "epoch": 0.23511516380693043, "grad_norm": 638.6205444335938, "learning_rate": 9.472489137207604e-06, "loss": 17.8359, "step": 116390 }, { "epoch": 0.23513536443961425, "grad_norm": 721.173583984375, "learning_rate": 9.4723330686378e-06, "loss": 23.6118, "step": 116400 }, { "epoch": 0.23515556507229807, "grad_norm": 544.9619140625, "learning_rate": 9.472176978270389e-06, "loss": 25.2253, "step": 116410 }, { "epoch": 0.2351757657049819, "grad_norm": 738.7373657226562, "learning_rate": 9.472020866106128e-06, "loss": 34.294, "step": 116420 }, { "epoch": 0.2351959663376657, "grad_norm": 765.0400390625, "learning_rate": 9.47186473214578e-06, "loss": 20.823, "step": 116430 }, { "epoch": 0.2352161669703495, "grad_norm": 480.0275573730469, "learning_rate": 9.471708576390108e-06, "loss": 22.5498, "step": 116440 }, { "epoch": 0.23523636760303332, "grad_norm": 566.5287475585938, "learning_rate": 9.47155239883987e-06, "loss": 21.7083, "step": 116450 }, { "epoch": 0.23525656823571714, "grad_norm": 627.3939208984375, "learning_rate": 9.471396199495825e-06, "loss": 22.2768, "step": 116460 }, { "epoch": 0.23527676886840096, "grad_norm": 845.6305541992188, "learning_rate": 9.471239978358741e-06, "loss": 25.5389, "step": 116470 }, { "epoch": 0.23529696950108478, "grad_norm": 421.12469482421875, "learning_rate": 9.471083735429374e-06, "loss": 43.7774, "step": 116480 }, { "epoch": 0.2353171701337686, "grad_norm": 229.1207733154297, "learning_rate": 9.470927470708486e-06, "loss": 52.3167, "step": 116490 }, { "epoch": 0.2353373707664524, "grad_norm": 416.5636901855469, "learning_rate": 9.470771184196842e-06, "loss": 65.1665, "step": 116500 }, { "epoch": 0.2353575713991362, "grad_norm": 441.8174743652344, "learning_rate": 9.4706148758952e-06, "loss": 18.8124, "step": 116510 }, { "epoch": 0.23537777203182003, "grad_norm": 510.05340576171875, "learning_rate": 9.470458545804325e-06, "loss": 34.6727, "step": 116520 }, { "epoch": 0.23539797266450385, "grad_norm": 434.70062255859375, "learning_rate": 9.470302193924975e-06, "loss": 22.0199, "step": 116530 }, { "epoch": 0.23541817329718767, "grad_norm": 411.9683837890625, "learning_rate": 9.470145820257915e-06, "loss": 32.3334, "step": 116540 }, { "epoch": 0.2354383739298715, "grad_norm": 462.7821350097656, "learning_rate": 9.469989424803907e-06, "loss": 12.0787, "step": 116550 }, { "epoch": 0.2354585745625553, "grad_norm": 46.51176834106445, "learning_rate": 9.469833007563712e-06, "loss": 14.8491, "step": 116560 }, { "epoch": 0.2354787751952391, "grad_norm": 171.05972290039062, "learning_rate": 9.469676568538094e-06, "loss": 31.8924, "step": 116570 }, { "epoch": 0.23549897582792292, "grad_norm": 293.23077392578125, "learning_rate": 9.469520107727815e-06, "loss": 24.2429, "step": 116580 }, { "epoch": 0.23551917646060674, "grad_norm": 632.8076782226562, "learning_rate": 9.469363625133634e-06, "loss": 15.9344, "step": 116590 }, { "epoch": 0.23553937709329056, "grad_norm": 186.47857666015625, "learning_rate": 9.46920712075632e-06, "loss": 31.9356, "step": 116600 }, { "epoch": 0.23555957772597438, "grad_norm": 368.3692626953125, "learning_rate": 9.469050594596631e-06, "loss": 15.9615, "step": 116610 }, { "epoch": 0.2355797783586582, "grad_norm": 337.5687255859375, "learning_rate": 9.468894046655332e-06, "loss": 22.172, "step": 116620 }, { "epoch": 0.235599978991342, "grad_norm": 372.5582580566406, "learning_rate": 9.468737476933186e-06, "loss": 17.3395, "step": 116630 }, { "epoch": 0.23562017962402582, "grad_norm": 458.6085205078125, "learning_rate": 9.468580885430953e-06, "loss": 17.8977, "step": 116640 }, { "epoch": 0.23564038025670964, "grad_norm": 527.5307006835938, "learning_rate": 9.468424272149402e-06, "loss": 31.6, "step": 116650 }, { "epoch": 0.23566058088939346, "grad_norm": 0.0, "learning_rate": 9.46826763708929e-06, "loss": 15.7834, "step": 116660 }, { "epoch": 0.23568078152207728, "grad_norm": 751.5640258789062, "learning_rate": 9.468110980251386e-06, "loss": 25.4928, "step": 116670 }, { "epoch": 0.2357009821547611, "grad_norm": 623.1112060546875, "learning_rate": 9.467954301636451e-06, "loss": 24.9689, "step": 116680 }, { "epoch": 0.23572118278744492, "grad_norm": 209.68223571777344, "learning_rate": 9.467797601245246e-06, "loss": 21.5656, "step": 116690 }, { "epoch": 0.2357413834201287, "grad_norm": 492.0634460449219, "learning_rate": 9.46764087907854e-06, "loss": 16.8456, "step": 116700 }, { "epoch": 0.23576158405281253, "grad_norm": 536.7673950195312, "learning_rate": 9.467484135137093e-06, "loss": 32.4083, "step": 116710 }, { "epoch": 0.23578178468549635, "grad_norm": 1734.8011474609375, "learning_rate": 9.46732736942167e-06, "loss": 30.417, "step": 116720 }, { "epoch": 0.23580198531818017, "grad_norm": 538.3679809570312, "learning_rate": 9.467170581933037e-06, "loss": 31.2273, "step": 116730 }, { "epoch": 0.235822185950864, "grad_norm": 232.93023681640625, "learning_rate": 9.467013772671953e-06, "loss": 12.2296, "step": 116740 }, { "epoch": 0.2358423865835478, "grad_norm": 820.4888305664062, "learning_rate": 9.46685694163919e-06, "loss": 20.1756, "step": 116750 }, { "epoch": 0.2358625872162316, "grad_norm": 378.5989990234375, "learning_rate": 9.466700088835505e-06, "loss": 24.1628, "step": 116760 }, { "epoch": 0.23588278784891542, "grad_norm": 0.0, "learning_rate": 9.466543214261666e-06, "loss": 19.2676, "step": 116770 }, { "epoch": 0.23590298848159924, "grad_norm": 143.71340942382812, "learning_rate": 9.466386317918436e-06, "loss": 19.0917, "step": 116780 }, { "epoch": 0.23592318911428306, "grad_norm": 734.0843505859375, "learning_rate": 9.466229399806583e-06, "loss": 24.1006, "step": 116790 }, { "epoch": 0.23594338974696688, "grad_norm": 654.5097045898438, "learning_rate": 9.46607245992687e-06, "loss": 30.1238, "step": 116800 }, { "epoch": 0.2359635903796507, "grad_norm": 198.99844360351562, "learning_rate": 9.465915498280058e-06, "loss": 18.8972, "step": 116810 }, { "epoch": 0.2359837910123345, "grad_norm": 635.9158935546875, "learning_rate": 9.465758514866919e-06, "loss": 33.3588, "step": 116820 }, { "epoch": 0.2360039916450183, "grad_norm": 253.74960327148438, "learning_rate": 9.465601509688212e-06, "loss": 21.6923, "step": 116830 }, { "epoch": 0.23602419227770213, "grad_norm": 268.11517333984375, "learning_rate": 9.465444482744708e-06, "loss": 20.9421, "step": 116840 }, { "epoch": 0.23604439291038595, "grad_norm": 155.50680541992188, "learning_rate": 9.465287434037167e-06, "loss": 21.0103, "step": 116850 }, { "epoch": 0.23606459354306977, "grad_norm": 969.3081665039062, "learning_rate": 9.465130363566357e-06, "loss": 24.801, "step": 116860 }, { "epoch": 0.2360847941757536, "grad_norm": 793.8674926757812, "learning_rate": 9.464973271333042e-06, "loss": 15.4893, "step": 116870 }, { "epoch": 0.2361049948084374, "grad_norm": 338.47918701171875, "learning_rate": 9.464816157337991e-06, "loss": 29.3621, "step": 116880 }, { "epoch": 0.2361251954411212, "grad_norm": 641.8385009765625, "learning_rate": 9.464659021581966e-06, "loss": 21.0026, "step": 116890 }, { "epoch": 0.23614539607380503, "grad_norm": 145.12539672851562, "learning_rate": 9.464501864065735e-06, "loss": 20.4296, "step": 116900 }, { "epoch": 0.23616559670648885, "grad_norm": 417.63482666015625, "learning_rate": 9.464344684790063e-06, "loss": 27.0664, "step": 116910 }, { "epoch": 0.23618579733917267, "grad_norm": 582.3400268554688, "learning_rate": 9.464187483755718e-06, "loss": 33.1719, "step": 116920 }, { "epoch": 0.23620599797185649, "grad_norm": 356.63433837890625, "learning_rate": 9.464030260963463e-06, "loss": 16.0722, "step": 116930 }, { "epoch": 0.2362261986045403, "grad_norm": 431.3008728027344, "learning_rate": 9.463873016414066e-06, "loss": 18.8018, "step": 116940 }, { "epoch": 0.2362463992372241, "grad_norm": 230.9427490234375, "learning_rate": 9.463715750108293e-06, "loss": 19.6771, "step": 116950 }, { "epoch": 0.23626659986990792, "grad_norm": 4518.6083984375, "learning_rate": 9.463558462046912e-06, "loss": 30.278, "step": 116960 }, { "epoch": 0.23628680050259174, "grad_norm": 729.4716796875, "learning_rate": 9.463401152230688e-06, "loss": 18.7407, "step": 116970 }, { "epoch": 0.23630700113527556, "grad_norm": 413.8880920410156, "learning_rate": 9.463243820660389e-06, "loss": 23.859, "step": 116980 }, { "epoch": 0.23632720176795938, "grad_norm": 14.609228134155273, "learning_rate": 9.463086467336779e-06, "loss": 22.6864, "step": 116990 }, { "epoch": 0.2363474024006432, "grad_norm": 275.2424011230469, "learning_rate": 9.46292909226063e-06, "loss": 20.4147, "step": 117000 }, { "epoch": 0.23636760303332702, "grad_norm": 254.34274291992188, "learning_rate": 9.462771695432702e-06, "loss": 15.3926, "step": 117010 }, { "epoch": 0.2363878036660108, "grad_norm": 345.982421875, "learning_rate": 9.462614276853767e-06, "loss": 31.9936, "step": 117020 }, { "epoch": 0.23640800429869463, "grad_norm": 89.97821807861328, "learning_rate": 9.462456836524593e-06, "loss": 24.8761, "step": 117030 }, { "epoch": 0.23642820493137845, "grad_norm": 177.04977416992188, "learning_rate": 9.462299374445944e-06, "loss": 21.204, "step": 117040 }, { "epoch": 0.23644840556406227, "grad_norm": 456.4011535644531, "learning_rate": 9.46214189061859e-06, "loss": 35.3761, "step": 117050 }, { "epoch": 0.2364686061967461, "grad_norm": 292.1644287109375, "learning_rate": 9.461984385043297e-06, "loss": 20.2125, "step": 117060 }, { "epoch": 0.2364888068294299, "grad_norm": 743.0880126953125, "learning_rate": 9.461826857720835e-06, "loss": 34.9542, "step": 117070 }, { "epoch": 0.2365090074621137, "grad_norm": 0.0, "learning_rate": 9.461669308651968e-06, "loss": 25.9587, "step": 117080 }, { "epoch": 0.23652920809479752, "grad_norm": 336.8008117675781, "learning_rate": 9.461511737837467e-06, "loss": 17.3046, "step": 117090 }, { "epoch": 0.23654940872748134, "grad_norm": 651.3720092773438, "learning_rate": 9.461354145278098e-06, "loss": 23.1151, "step": 117100 }, { "epoch": 0.23656960936016516, "grad_norm": 353.6770935058594, "learning_rate": 9.46119653097463e-06, "loss": 20.6156, "step": 117110 }, { "epoch": 0.23658980999284898, "grad_norm": 910.6161499023438, "learning_rate": 9.461038894927833e-06, "loss": 36.3535, "step": 117120 }, { "epoch": 0.2366100106255328, "grad_norm": 339.4697265625, "learning_rate": 9.460881237138472e-06, "loss": 13.3867, "step": 117130 }, { "epoch": 0.2366302112582166, "grad_norm": 675.8087158203125, "learning_rate": 9.460723557607317e-06, "loss": 29.5929, "step": 117140 }, { "epoch": 0.23665041189090041, "grad_norm": 351.864501953125, "learning_rate": 9.460565856335136e-06, "loss": 22.8608, "step": 117150 }, { "epoch": 0.23667061252358423, "grad_norm": 287.1529541015625, "learning_rate": 9.460408133322698e-06, "loss": 37.3685, "step": 117160 }, { "epoch": 0.23669081315626805, "grad_norm": 905.3443603515625, "learning_rate": 9.460250388570772e-06, "loss": 51.5777, "step": 117170 }, { "epoch": 0.23671101378895187, "grad_norm": 815.42626953125, "learning_rate": 9.460092622080128e-06, "loss": 40.0098, "step": 117180 }, { "epoch": 0.2367312144216357, "grad_norm": 6.8798604011535645, "learning_rate": 9.459934833851531e-06, "loss": 15.9156, "step": 117190 }, { "epoch": 0.23675141505431951, "grad_norm": 303.4665222167969, "learning_rate": 9.459777023885754e-06, "loss": 32.0095, "step": 117200 }, { "epoch": 0.2367716156870033, "grad_norm": 525.3986206054688, "learning_rate": 9.459619192183565e-06, "loss": 28.3864, "step": 117210 }, { "epoch": 0.23679181631968713, "grad_norm": 805.0297241210938, "learning_rate": 9.459461338745733e-06, "loss": 12.2511, "step": 117220 }, { "epoch": 0.23681201695237095, "grad_norm": 108.30651092529297, "learning_rate": 9.459303463573027e-06, "loss": 29.9991, "step": 117230 }, { "epoch": 0.23683221758505477, "grad_norm": 545.5927734375, "learning_rate": 9.459145566666216e-06, "loss": 27.7245, "step": 117240 }, { "epoch": 0.2368524182177386, "grad_norm": 832.1116333007812, "learning_rate": 9.458987648026071e-06, "loss": 17.0151, "step": 117250 }, { "epoch": 0.2368726188504224, "grad_norm": 125.13150024414062, "learning_rate": 9.458829707653362e-06, "loss": 33.1239, "step": 117260 }, { "epoch": 0.2368928194831062, "grad_norm": 186.43370056152344, "learning_rate": 9.458671745548855e-06, "loss": 16.7115, "step": 117270 }, { "epoch": 0.23691302011579002, "grad_norm": 708.531005859375, "learning_rate": 9.458513761713324e-06, "loss": 21.2725, "step": 117280 }, { "epoch": 0.23693322074847384, "grad_norm": 810.84375, "learning_rate": 9.45835575614754e-06, "loss": 29.8557, "step": 117290 }, { "epoch": 0.23695342138115766, "grad_norm": 277.344970703125, "learning_rate": 9.458197728852268e-06, "loss": 16.7018, "step": 117300 }, { "epoch": 0.23697362201384148, "grad_norm": 448.7570495605469, "learning_rate": 9.458039679828281e-06, "loss": 24.4709, "step": 117310 }, { "epoch": 0.2369938226465253, "grad_norm": 330.569091796875, "learning_rate": 9.457881609076352e-06, "loss": 16.9305, "step": 117320 }, { "epoch": 0.23701402327920912, "grad_norm": 63.78606414794922, "learning_rate": 9.457723516597247e-06, "loss": 27.3925, "step": 117330 }, { "epoch": 0.2370342239118929, "grad_norm": 265.64031982421875, "learning_rate": 9.457565402391738e-06, "loss": 8.668, "step": 117340 }, { "epoch": 0.23705442454457673, "grad_norm": 503.2449645996094, "learning_rate": 9.457407266460595e-06, "loss": 23.1339, "step": 117350 }, { "epoch": 0.23707462517726055, "grad_norm": 528.9575805664062, "learning_rate": 9.45724910880459e-06, "loss": 24.2766, "step": 117360 }, { "epoch": 0.23709482580994437, "grad_norm": 511.1745910644531, "learning_rate": 9.457090929424495e-06, "loss": 27.5194, "step": 117370 }, { "epoch": 0.2371150264426282, "grad_norm": 244.69229125976562, "learning_rate": 9.456932728321078e-06, "loss": 17.6802, "step": 117380 }, { "epoch": 0.237135227075312, "grad_norm": 433.9642028808594, "learning_rate": 9.456774505495112e-06, "loss": 11.6906, "step": 117390 }, { "epoch": 0.2371554277079958, "grad_norm": 514.8110961914062, "learning_rate": 9.456616260947367e-06, "loss": 20.3018, "step": 117400 }, { "epoch": 0.23717562834067962, "grad_norm": 294.82403564453125, "learning_rate": 9.456457994678616e-06, "loss": 19.9833, "step": 117410 }, { "epoch": 0.23719582897336344, "grad_norm": 672.5086669921875, "learning_rate": 9.456299706689627e-06, "loss": 31.9594, "step": 117420 }, { "epoch": 0.23721602960604726, "grad_norm": 38.29913330078125, "learning_rate": 9.456141396981176e-06, "loss": 19.7459, "step": 117430 }, { "epoch": 0.23723623023873108, "grad_norm": 317.1351318359375, "learning_rate": 9.455983065554032e-06, "loss": 25.9537, "step": 117440 }, { "epoch": 0.2372564308714149, "grad_norm": 591.7095947265625, "learning_rate": 9.455824712408967e-06, "loss": 25.0482, "step": 117450 }, { "epoch": 0.2372766315040987, "grad_norm": 1024.886962890625, "learning_rate": 9.455666337546751e-06, "loss": 53.5332, "step": 117460 }, { "epoch": 0.23729683213678252, "grad_norm": 530.8159790039062, "learning_rate": 9.45550794096816e-06, "loss": 19.6555, "step": 117470 }, { "epoch": 0.23731703276946634, "grad_norm": 665.9408569335938, "learning_rate": 9.455349522673962e-06, "loss": 39.3288, "step": 117480 }, { "epoch": 0.23733723340215016, "grad_norm": 522.2376098632812, "learning_rate": 9.455191082664931e-06, "loss": 20.3337, "step": 117490 }, { "epoch": 0.23735743403483398, "grad_norm": 383.4013977050781, "learning_rate": 9.45503262094184e-06, "loss": 29.2384, "step": 117500 }, { "epoch": 0.2373776346675178, "grad_norm": 446.2220153808594, "learning_rate": 9.45487413750546e-06, "loss": 33.0276, "step": 117510 }, { "epoch": 0.23739783530020162, "grad_norm": 641.8443603515625, "learning_rate": 9.454715632356564e-06, "loss": 23.3312, "step": 117520 }, { "epoch": 0.2374180359328854, "grad_norm": 696.3156127929688, "learning_rate": 9.454557105495922e-06, "loss": 21.8034, "step": 117530 }, { "epoch": 0.23743823656556923, "grad_norm": 398.4124450683594, "learning_rate": 9.454398556924312e-06, "loss": 29.9253, "step": 117540 }, { "epoch": 0.23745843719825305, "grad_norm": 1015.231689453125, "learning_rate": 9.454239986642503e-06, "loss": 20.2801, "step": 117550 }, { "epoch": 0.23747863783093687, "grad_norm": 324.8863525390625, "learning_rate": 9.454081394651267e-06, "loss": 15.2749, "step": 117560 }, { "epoch": 0.2374988384636207, "grad_norm": 670.9252319335938, "learning_rate": 9.453922780951382e-06, "loss": 22.9192, "step": 117570 }, { "epoch": 0.2375190390963045, "grad_norm": 64.24264526367188, "learning_rate": 9.453764145543614e-06, "loss": 24.5582, "step": 117580 }, { "epoch": 0.2375392397289883, "grad_norm": 348.1638488769531, "learning_rate": 9.453605488428741e-06, "loss": 15.0601, "step": 117590 }, { "epoch": 0.23755944036167212, "grad_norm": 530.2588500976562, "learning_rate": 9.453446809607534e-06, "loss": 32.7515, "step": 117600 }, { "epoch": 0.23757964099435594, "grad_norm": 1041.0078125, "learning_rate": 9.453288109080768e-06, "loss": 29.4589, "step": 117610 }, { "epoch": 0.23759984162703976, "grad_norm": 236.34274291992188, "learning_rate": 9.453129386849216e-06, "loss": 29.0242, "step": 117620 }, { "epoch": 0.23762004225972358, "grad_norm": 474.6619873046875, "learning_rate": 9.452970642913652e-06, "loss": 13.05, "step": 117630 }, { "epoch": 0.2376402428924074, "grad_norm": 379.6215515136719, "learning_rate": 9.452811877274848e-06, "loss": 15.9084, "step": 117640 }, { "epoch": 0.23766044352509122, "grad_norm": 482.5552673339844, "learning_rate": 9.45265308993358e-06, "loss": 14.5373, "step": 117650 }, { "epoch": 0.237680644157775, "grad_norm": 498.3376159667969, "learning_rate": 9.452494280890621e-06, "loss": 37.3605, "step": 117660 }, { "epoch": 0.23770084479045883, "grad_norm": 502.6356201171875, "learning_rate": 9.452335450146744e-06, "loss": 24.045, "step": 117670 }, { "epoch": 0.23772104542314265, "grad_norm": 480.24542236328125, "learning_rate": 9.452176597702724e-06, "loss": 26.7962, "step": 117680 }, { "epoch": 0.23774124605582647, "grad_norm": 397.1744079589844, "learning_rate": 9.452017723559337e-06, "loss": 24.1105, "step": 117690 }, { "epoch": 0.2377614466885103, "grad_norm": 0.5460114479064941, "learning_rate": 9.451858827717354e-06, "loss": 24.0886, "step": 117700 }, { "epoch": 0.2377816473211941, "grad_norm": 29.42403221130371, "learning_rate": 9.451699910177551e-06, "loss": 23.887, "step": 117710 }, { "epoch": 0.2378018479538779, "grad_norm": 366.1441955566406, "learning_rate": 9.451540970940703e-06, "loss": 14.9171, "step": 117720 }, { "epoch": 0.23782204858656172, "grad_norm": 42.585025787353516, "learning_rate": 9.451382010007584e-06, "loss": 26.0252, "step": 117730 }, { "epoch": 0.23784224921924554, "grad_norm": 572.30419921875, "learning_rate": 9.45122302737897e-06, "loss": 22.0163, "step": 117740 }, { "epoch": 0.23786244985192936, "grad_norm": 498.8094787597656, "learning_rate": 9.451064023055634e-06, "loss": 18.0851, "step": 117750 }, { "epoch": 0.23788265048461318, "grad_norm": 382.15118408203125, "learning_rate": 9.450904997038351e-06, "loss": 20.9569, "step": 117760 }, { "epoch": 0.237902851117297, "grad_norm": 671.4730834960938, "learning_rate": 9.450745949327897e-06, "loss": 15.1307, "step": 117770 }, { "epoch": 0.2379230517499808, "grad_norm": 457.44891357421875, "learning_rate": 9.450586879925048e-06, "loss": 17.5412, "step": 117780 }, { "epoch": 0.23794325238266462, "grad_norm": 503.4642333984375, "learning_rate": 9.450427788830578e-06, "loss": 20.7526, "step": 117790 }, { "epoch": 0.23796345301534844, "grad_norm": 639.3961181640625, "learning_rate": 9.450268676045261e-06, "loss": 31.7642, "step": 117800 }, { "epoch": 0.23798365364803226, "grad_norm": 872.4010620117188, "learning_rate": 9.450109541569879e-06, "loss": 25.558, "step": 117810 }, { "epoch": 0.23800385428071608, "grad_norm": 937.5293579101562, "learning_rate": 9.4499503854052e-06, "loss": 25.0143, "step": 117820 }, { "epoch": 0.2380240549133999, "grad_norm": 587.4414672851562, "learning_rate": 9.449791207552001e-06, "loss": 15.5493, "step": 117830 }, { "epoch": 0.23804425554608372, "grad_norm": 441.7513122558594, "learning_rate": 9.44963200801106e-06, "loss": 14.2472, "step": 117840 }, { "epoch": 0.2380644561787675, "grad_norm": 603.2429809570312, "learning_rate": 9.449472786783153e-06, "loss": 22.1237, "step": 117850 }, { "epoch": 0.23808465681145133, "grad_norm": 518.7314453125, "learning_rate": 9.449313543869056e-06, "loss": 28.391, "step": 117860 }, { "epoch": 0.23810485744413515, "grad_norm": 611.9623413085938, "learning_rate": 9.449154279269543e-06, "loss": 25.6668, "step": 117870 }, { "epoch": 0.23812505807681897, "grad_norm": 445.8013000488281, "learning_rate": 9.448994992985393e-06, "loss": 16.1492, "step": 117880 }, { "epoch": 0.2381452587095028, "grad_norm": 466.94317626953125, "learning_rate": 9.44883568501738e-06, "loss": 12.4663, "step": 117890 }, { "epoch": 0.2381654593421866, "grad_norm": 209.4217529296875, "learning_rate": 9.448676355366282e-06, "loss": 26.3823, "step": 117900 }, { "epoch": 0.2381856599748704, "grad_norm": 551.5216674804688, "learning_rate": 9.448517004032876e-06, "loss": 14.8769, "step": 117910 }, { "epoch": 0.23820586060755422, "grad_norm": 620.2489013671875, "learning_rate": 9.448357631017934e-06, "loss": 20.2437, "step": 117920 }, { "epoch": 0.23822606124023804, "grad_norm": 348.79095458984375, "learning_rate": 9.44819823632224e-06, "loss": 23.0824, "step": 117930 }, { "epoch": 0.23824626187292186, "grad_norm": 201.4728546142578, "learning_rate": 9.448038819946566e-06, "loss": 16.3949, "step": 117940 }, { "epoch": 0.23826646250560568, "grad_norm": 835.5105590820312, "learning_rate": 9.447879381891691e-06, "loss": 34.25, "step": 117950 }, { "epoch": 0.2382866631382895, "grad_norm": 349.479736328125, "learning_rate": 9.447719922158391e-06, "loss": 15.4773, "step": 117960 }, { "epoch": 0.23830686377097332, "grad_norm": 626.60205078125, "learning_rate": 9.447560440747443e-06, "loss": 31.5524, "step": 117970 }, { "epoch": 0.2383270644036571, "grad_norm": 294.943359375, "learning_rate": 9.447400937659625e-06, "loss": 48.2904, "step": 117980 }, { "epoch": 0.23834726503634093, "grad_norm": 209.84666442871094, "learning_rate": 9.447241412895714e-06, "loss": 23.8491, "step": 117990 }, { "epoch": 0.23836746566902475, "grad_norm": 969.4122924804688, "learning_rate": 9.44708186645649e-06, "loss": 37.3985, "step": 118000 }, { "epoch": 0.23838766630170857, "grad_norm": 94.50203704833984, "learning_rate": 9.446922298342725e-06, "loss": 27.4426, "step": 118010 }, { "epoch": 0.2384078669343924, "grad_norm": 392.3721618652344, "learning_rate": 9.446762708555202e-06, "loss": 21.8015, "step": 118020 }, { "epoch": 0.2384280675670762, "grad_norm": 428.6141052246094, "learning_rate": 9.446603097094696e-06, "loss": 26.3162, "step": 118030 }, { "epoch": 0.23844826819976, "grad_norm": 472.3879699707031, "learning_rate": 9.446443463961986e-06, "loss": 10.2355, "step": 118040 }, { "epoch": 0.23846846883244383, "grad_norm": 272.6233215332031, "learning_rate": 9.44628380915785e-06, "loss": 7.925, "step": 118050 }, { "epoch": 0.23848866946512765, "grad_norm": 1400.2432861328125, "learning_rate": 9.446124132683066e-06, "loss": 30.2269, "step": 118060 }, { "epoch": 0.23850887009781147, "grad_norm": 491.30865478515625, "learning_rate": 9.445964434538412e-06, "loss": 26.4811, "step": 118070 }, { "epoch": 0.23852907073049529, "grad_norm": 474.2497863769531, "learning_rate": 9.445804714724667e-06, "loss": 30.6656, "step": 118080 }, { "epoch": 0.2385492713631791, "grad_norm": 9.611652374267578, "learning_rate": 9.44564497324261e-06, "loss": 28.9517, "step": 118090 }, { "epoch": 0.2385694719958629, "grad_norm": 590.2717895507812, "learning_rate": 9.445485210093018e-06, "loss": 19.1242, "step": 118100 }, { "epoch": 0.23858967262854672, "grad_norm": 244.5931854248047, "learning_rate": 9.445325425276668e-06, "loss": 14.1212, "step": 118110 }, { "epoch": 0.23860987326123054, "grad_norm": 709.9424438476562, "learning_rate": 9.445165618794343e-06, "loss": 21.64, "step": 118120 }, { "epoch": 0.23863007389391436, "grad_norm": 316.25927734375, "learning_rate": 9.44500579064682e-06, "loss": 18.8832, "step": 118130 }, { "epoch": 0.23865027452659818, "grad_norm": 217.62611389160156, "learning_rate": 9.444845940834876e-06, "loss": 18.0076, "step": 118140 }, { "epoch": 0.238670475159282, "grad_norm": 251.6680145263672, "learning_rate": 9.444686069359294e-06, "loss": 27.4444, "step": 118150 }, { "epoch": 0.23869067579196582, "grad_norm": 362.588623046875, "learning_rate": 9.444526176220851e-06, "loss": 34.5831, "step": 118160 }, { "epoch": 0.2387108764246496, "grad_norm": 298.76190185546875, "learning_rate": 9.444366261420328e-06, "loss": 12.2111, "step": 118170 }, { "epoch": 0.23873107705733343, "grad_norm": 59.43899917602539, "learning_rate": 9.4442063249585e-06, "loss": 26.4651, "step": 118180 }, { "epoch": 0.23875127769001725, "grad_norm": 327.2350769042969, "learning_rate": 9.44404636683615e-06, "loss": 28.7903, "step": 118190 }, { "epoch": 0.23877147832270107, "grad_norm": 409.3868103027344, "learning_rate": 9.443886387054058e-06, "loss": 33.7824, "step": 118200 }, { "epoch": 0.2387916789553849, "grad_norm": 632.2372436523438, "learning_rate": 9.443726385613003e-06, "loss": 14.7928, "step": 118210 }, { "epoch": 0.2388118795880687, "grad_norm": 587.41357421875, "learning_rate": 9.443566362513763e-06, "loss": 23.6732, "step": 118220 }, { "epoch": 0.2388320802207525, "grad_norm": 1263.5318603515625, "learning_rate": 9.44340631775712e-06, "loss": 46.6303, "step": 118230 }, { "epoch": 0.23885228085343632, "grad_norm": 158.19912719726562, "learning_rate": 9.443246251343855e-06, "loss": 19.1064, "step": 118240 }, { "epoch": 0.23887248148612014, "grad_norm": 843.1229248046875, "learning_rate": 9.443086163274745e-06, "loss": 38.6345, "step": 118250 }, { "epoch": 0.23889268211880396, "grad_norm": 163.65879821777344, "learning_rate": 9.442926053550572e-06, "loss": 18.5162, "step": 118260 }, { "epoch": 0.23891288275148778, "grad_norm": 1377.5970458984375, "learning_rate": 9.442765922172117e-06, "loss": 23.9794, "step": 118270 }, { "epoch": 0.2389330833841716, "grad_norm": 83.8387680053711, "learning_rate": 9.442605769140159e-06, "loss": 18.4275, "step": 118280 }, { "epoch": 0.23895328401685542, "grad_norm": 246.1664581298828, "learning_rate": 9.44244559445548e-06, "loss": 19.1218, "step": 118290 }, { "epoch": 0.23897348464953921, "grad_norm": 283.3323974609375, "learning_rate": 9.44228539811886e-06, "loss": 34.5833, "step": 118300 }, { "epoch": 0.23899368528222303, "grad_norm": 349.25634765625, "learning_rate": 9.44212518013108e-06, "loss": 23.3768, "step": 118310 }, { "epoch": 0.23901388591490685, "grad_norm": 243.878173828125, "learning_rate": 9.44196494049292e-06, "loss": 25.8638, "step": 118320 }, { "epoch": 0.23903408654759067, "grad_norm": 558.2456665039062, "learning_rate": 9.44180467920516e-06, "loss": 25.0371, "step": 118330 }, { "epoch": 0.2390542871802745, "grad_norm": 84.71350860595703, "learning_rate": 9.441644396268586e-06, "loss": 18.0213, "step": 118340 }, { "epoch": 0.23907448781295831, "grad_norm": 7.51152229309082, "learning_rate": 9.441484091683975e-06, "loss": 14.0777, "step": 118350 }, { "epoch": 0.2390946884456421, "grad_norm": 434.8461608886719, "learning_rate": 9.441323765452107e-06, "loss": 12.1847, "step": 118360 }, { "epoch": 0.23911488907832593, "grad_norm": 642.7026977539062, "learning_rate": 9.441163417573768e-06, "loss": 23.7775, "step": 118370 }, { "epoch": 0.23913508971100975, "grad_norm": 748.6219482421875, "learning_rate": 9.441003048049734e-06, "loss": 27.3327, "step": 118380 }, { "epoch": 0.23915529034369357, "grad_norm": 625.2716674804688, "learning_rate": 9.440842656880792e-06, "loss": 22.5399, "step": 118390 }, { "epoch": 0.2391754909763774, "grad_norm": 180.45635986328125, "learning_rate": 9.440682244067724e-06, "loss": 23.0293, "step": 118400 }, { "epoch": 0.2391956916090612, "grad_norm": 221.6510772705078, "learning_rate": 9.440521809611307e-06, "loss": 18.2394, "step": 118410 }, { "epoch": 0.239215892241745, "grad_norm": 1343.8333740234375, "learning_rate": 9.440361353512325e-06, "loss": 33.6273, "step": 118420 }, { "epoch": 0.23923609287442882, "grad_norm": 658.0180053710938, "learning_rate": 9.44020087577156e-06, "loss": 21.0594, "step": 118430 }, { "epoch": 0.23925629350711264, "grad_norm": 436.8914794921875, "learning_rate": 9.440040376389795e-06, "loss": 20.5959, "step": 118440 }, { "epoch": 0.23927649413979646, "grad_norm": 496.1158142089844, "learning_rate": 9.439879855367813e-06, "loss": 21.0232, "step": 118450 }, { "epoch": 0.23929669477248028, "grad_norm": 676.6805419921875, "learning_rate": 9.439719312706393e-06, "loss": 15.3883, "step": 118460 }, { "epoch": 0.2393168954051641, "grad_norm": 498.2493591308594, "learning_rate": 9.43955874840632e-06, "loss": 34.6778, "step": 118470 }, { "epoch": 0.23933709603784792, "grad_norm": 743.6807861328125, "learning_rate": 9.439398162468376e-06, "loss": 26.3646, "step": 118480 }, { "epoch": 0.2393572966705317, "grad_norm": 175.42564392089844, "learning_rate": 9.439237554893344e-06, "loss": 18.911, "step": 118490 }, { "epoch": 0.23937749730321553, "grad_norm": 300.9667663574219, "learning_rate": 9.439076925682006e-06, "loss": 31.7426, "step": 118500 }, { "epoch": 0.23939769793589935, "grad_norm": 286.52227783203125, "learning_rate": 9.438916274835148e-06, "loss": 37.3805, "step": 118510 }, { "epoch": 0.23941789856858317, "grad_norm": 279.5403747558594, "learning_rate": 9.438755602353549e-06, "loss": 8.7395, "step": 118520 }, { "epoch": 0.239438099201267, "grad_norm": 511.781005859375, "learning_rate": 9.438594908237993e-06, "loss": 13.8962, "step": 118530 }, { "epoch": 0.2394582998339508, "grad_norm": 442.2402038574219, "learning_rate": 9.438434192489263e-06, "loss": 16.4463, "step": 118540 }, { "epoch": 0.2394785004666346, "grad_norm": 737.2451171875, "learning_rate": 9.438273455108145e-06, "loss": 21.8331, "step": 118550 }, { "epoch": 0.23949870109931842, "grad_norm": 558.0885009765625, "learning_rate": 9.43811269609542e-06, "loss": 17.9035, "step": 118560 }, { "epoch": 0.23951890173200224, "grad_norm": 815.43798828125, "learning_rate": 9.43795191545187e-06, "loss": 16.2661, "step": 118570 }, { "epoch": 0.23953910236468606, "grad_norm": 859.9912719726562, "learning_rate": 9.437791113178283e-06, "loss": 17.6522, "step": 118580 }, { "epoch": 0.23955930299736988, "grad_norm": 528.0261840820312, "learning_rate": 9.43763028927544e-06, "loss": 20.2724, "step": 118590 }, { "epoch": 0.2395795036300537, "grad_norm": 719.481201171875, "learning_rate": 9.437469443744124e-06, "loss": 29.6069, "step": 118600 }, { "epoch": 0.2395997042627375, "grad_norm": 405.55743408203125, "learning_rate": 9.437308576585121e-06, "loss": 24.4762, "step": 118610 }, { "epoch": 0.23961990489542132, "grad_norm": 184.1614227294922, "learning_rate": 9.437147687799213e-06, "loss": 26.7506, "step": 118620 }, { "epoch": 0.23964010552810514, "grad_norm": 239.3774871826172, "learning_rate": 9.436986777387187e-06, "loss": 18.1325, "step": 118630 }, { "epoch": 0.23966030616078896, "grad_norm": 587.8845825195312, "learning_rate": 9.436825845349826e-06, "loss": 26.7064, "step": 118640 }, { "epoch": 0.23968050679347278, "grad_norm": 848.9060668945312, "learning_rate": 9.436664891687911e-06, "loss": 29.1183, "step": 118650 }, { "epoch": 0.2397007074261566, "grad_norm": 127.81281280517578, "learning_rate": 9.436503916402234e-06, "loss": 21.8435, "step": 118660 }, { "epoch": 0.23972090805884042, "grad_norm": 753.8478393554688, "learning_rate": 9.436342919493571e-06, "loss": 31.7494, "step": 118670 }, { "epoch": 0.2397411086915242, "grad_norm": 482.82415771484375, "learning_rate": 9.436181900962713e-06, "loss": 28.5088, "step": 118680 }, { "epoch": 0.23976130932420803, "grad_norm": 286.2489318847656, "learning_rate": 9.43602086081044e-06, "loss": 15.6981, "step": 118690 }, { "epoch": 0.23978150995689185, "grad_norm": 228.73312377929688, "learning_rate": 9.435859799037541e-06, "loss": 15.802, "step": 118700 }, { "epoch": 0.23980171058957567, "grad_norm": 841.956298828125, "learning_rate": 9.4356987156448e-06, "loss": 22.0875, "step": 118710 }, { "epoch": 0.2398219112222595, "grad_norm": 455.1575012207031, "learning_rate": 9.435537610633002e-06, "loss": 25.2311, "step": 118720 }, { "epoch": 0.2398421118549433, "grad_norm": 403.8409729003906, "learning_rate": 9.435376484002927e-06, "loss": 21.4939, "step": 118730 }, { "epoch": 0.2398623124876271, "grad_norm": 1453.3387451171875, "learning_rate": 9.43521533575537e-06, "loss": 37.6629, "step": 118740 }, { "epoch": 0.23988251312031092, "grad_norm": 278.9483337402344, "learning_rate": 9.43505416589111e-06, "loss": 15.3473, "step": 118750 }, { "epoch": 0.23990271375299474, "grad_norm": 823.5610961914062, "learning_rate": 9.434892974410932e-06, "loss": 31.414, "step": 118760 }, { "epoch": 0.23992291438567856, "grad_norm": 374.9403381347656, "learning_rate": 9.434731761315625e-06, "loss": 28.9586, "step": 118770 }, { "epoch": 0.23994311501836238, "grad_norm": 307.4246520996094, "learning_rate": 9.434570526605974e-06, "loss": 15.3941, "step": 118780 }, { "epoch": 0.2399633156510462, "grad_norm": 424.46905517578125, "learning_rate": 9.434409270282762e-06, "loss": 26.6051, "step": 118790 }, { "epoch": 0.23998351628373002, "grad_norm": 362.89013671875, "learning_rate": 9.43424799234678e-06, "loss": 28.606, "step": 118800 }, { "epoch": 0.2400037169164138, "grad_norm": 671.39501953125, "learning_rate": 9.43408669279881e-06, "loss": 12.6954, "step": 118810 }, { "epoch": 0.24002391754909763, "grad_norm": 168.6937255859375, "learning_rate": 9.433925371639639e-06, "loss": 19.6997, "step": 118820 }, { "epoch": 0.24004411818178145, "grad_norm": 288.7983703613281, "learning_rate": 9.433764028870053e-06, "loss": 25.7118, "step": 118830 }, { "epoch": 0.24006431881446527, "grad_norm": 322.3669738769531, "learning_rate": 9.433602664490838e-06, "loss": 10.3236, "step": 118840 }, { "epoch": 0.2400845194471491, "grad_norm": 250.17198181152344, "learning_rate": 9.433441278502784e-06, "loss": 19.0235, "step": 118850 }, { "epoch": 0.2401047200798329, "grad_norm": 538.3212890625, "learning_rate": 9.433279870906673e-06, "loss": 23.5239, "step": 118860 }, { "epoch": 0.2401249207125167, "grad_norm": 864.250732421875, "learning_rate": 9.433118441703293e-06, "loss": 26.127, "step": 118870 }, { "epoch": 0.24014512134520052, "grad_norm": 504.7637939453125, "learning_rate": 9.432956990893434e-06, "loss": 26.1027, "step": 118880 }, { "epoch": 0.24016532197788434, "grad_norm": 338.23028564453125, "learning_rate": 9.432795518477878e-06, "loss": 12.1233, "step": 118890 }, { "epoch": 0.24018552261056816, "grad_norm": 782.1828002929688, "learning_rate": 9.432634024457414e-06, "loss": 17.7826, "step": 118900 }, { "epoch": 0.24020572324325198, "grad_norm": 355.4798278808594, "learning_rate": 9.43247250883283e-06, "loss": 31.2056, "step": 118910 }, { "epoch": 0.2402259238759358, "grad_norm": 203.5426483154297, "learning_rate": 9.432310971604914e-06, "loss": 19.9125, "step": 118920 }, { "epoch": 0.2402461245086196, "grad_norm": 439.1073303222656, "learning_rate": 9.432149412774452e-06, "loss": 21.6993, "step": 118930 }, { "epoch": 0.24026632514130342, "grad_norm": 28.7581787109375, "learning_rate": 9.431987832342228e-06, "loss": 7.5493, "step": 118940 }, { "epoch": 0.24028652577398724, "grad_norm": 434.7237243652344, "learning_rate": 9.431826230309035e-06, "loss": 24.2173, "step": 118950 }, { "epoch": 0.24030672640667106, "grad_norm": 136.76669311523438, "learning_rate": 9.431664606675659e-06, "loss": 33.7794, "step": 118960 }, { "epoch": 0.24032692703935488, "grad_norm": 550.2783203125, "learning_rate": 9.431502961442887e-06, "loss": 19.0586, "step": 118970 }, { "epoch": 0.2403471276720387, "grad_norm": 470.1292724609375, "learning_rate": 9.431341294611506e-06, "loss": 23.7827, "step": 118980 }, { "epoch": 0.24036732830472252, "grad_norm": 397.0516662597656, "learning_rate": 9.431179606182306e-06, "loss": 33.7048, "step": 118990 }, { "epoch": 0.2403875289374063, "grad_norm": 419.5294494628906, "learning_rate": 9.431017896156074e-06, "loss": 21.6062, "step": 119000 }, { "epoch": 0.24040772957009013, "grad_norm": 433.27874755859375, "learning_rate": 9.430856164533598e-06, "loss": 29.5763, "step": 119010 }, { "epoch": 0.24042793020277395, "grad_norm": 420.1417236328125, "learning_rate": 9.430694411315667e-06, "loss": 21.6851, "step": 119020 }, { "epoch": 0.24044813083545777, "grad_norm": 90.33391571044922, "learning_rate": 9.430532636503067e-06, "loss": 13.6489, "step": 119030 }, { "epoch": 0.2404683314681416, "grad_norm": 480.8397216796875, "learning_rate": 9.43037084009659e-06, "loss": 16.3171, "step": 119040 }, { "epoch": 0.2404885321008254, "grad_norm": 325.5364685058594, "learning_rate": 9.430209022097024e-06, "loss": 25.2489, "step": 119050 }, { "epoch": 0.2405087327335092, "grad_norm": 162.39744567871094, "learning_rate": 9.430047182505152e-06, "loss": 18.4027, "step": 119060 }, { "epoch": 0.24052893336619302, "grad_norm": 401.30877685546875, "learning_rate": 9.429885321321772e-06, "loss": 24.318, "step": 119070 }, { "epoch": 0.24054913399887684, "grad_norm": 362.2718505859375, "learning_rate": 9.429723438547666e-06, "loss": 27.725, "step": 119080 }, { "epoch": 0.24056933463156066, "grad_norm": 667.8294677734375, "learning_rate": 9.429561534183627e-06, "loss": 21.12, "step": 119090 }, { "epoch": 0.24058953526424448, "grad_norm": 592.030029296875, "learning_rate": 9.429399608230441e-06, "loss": 33.3193, "step": 119100 }, { "epoch": 0.2406097358969283, "grad_norm": 246.8064727783203, "learning_rate": 9.429237660688896e-06, "loss": 17.4129, "step": 119110 }, { "epoch": 0.24062993652961212, "grad_norm": 442.88714599609375, "learning_rate": 9.429075691559788e-06, "loss": 14.079, "step": 119120 }, { "epoch": 0.2406501371622959, "grad_norm": 565.0037841796875, "learning_rate": 9.4289137008439e-06, "loss": 29.0246, "step": 119130 }, { "epoch": 0.24067033779497973, "grad_norm": 500.62115478515625, "learning_rate": 9.428751688542025e-06, "loss": 18.6905, "step": 119140 }, { "epoch": 0.24069053842766355, "grad_norm": 153.9894256591797, "learning_rate": 9.428589654654951e-06, "loss": 25.6751, "step": 119150 }, { "epoch": 0.24071073906034737, "grad_norm": 24.473669052124023, "learning_rate": 9.428427599183467e-06, "loss": 10.0738, "step": 119160 }, { "epoch": 0.2407309396930312, "grad_norm": 326.5309753417969, "learning_rate": 9.428265522128366e-06, "loss": 14.4481, "step": 119170 }, { "epoch": 0.240751140325715, "grad_norm": 373.0992126464844, "learning_rate": 9.428103423490434e-06, "loss": 27.2887, "step": 119180 }, { "epoch": 0.2407713409583988, "grad_norm": 8.004453659057617, "learning_rate": 9.427941303270464e-06, "loss": 11.0246, "step": 119190 }, { "epoch": 0.24079154159108263, "grad_norm": 314.0025329589844, "learning_rate": 9.427779161469246e-06, "loss": 24.9284, "step": 119200 }, { "epoch": 0.24081174222376645, "grad_norm": 228.9098358154297, "learning_rate": 9.427616998087568e-06, "loss": 36.9234, "step": 119210 }, { "epoch": 0.24083194285645027, "grad_norm": 47.23506164550781, "learning_rate": 9.427454813126222e-06, "loss": 30.2929, "step": 119220 }, { "epoch": 0.24085214348913409, "grad_norm": 195.05516052246094, "learning_rate": 9.427292606585998e-06, "loss": 17.1163, "step": 119230 }, { "epoch": 0.2408723441218179, "grad_norm": 244.37149047851562, "learning_rate": 9.427130378467689e-06, "loss": 29.368, "step": 119240 }, { "epoch": 0.2408925447545017, "grad_norm": 667.91455078125, "learning_rate": 9.42696812877208e-06, "loss": 21.5214, "step": 119250 }, { "epoch": 0.24091274538718552, "grad_norm": 169.0312957763672, "learning_rate": 9.426805857499968e-06, "loss": 17.9621, "step": 119260 }, { "epoch": 0.24093294601986934, "grad_norm": 0.0, "learning_rate": 9.426643564652139e-06, "loss": 20.2011, "step": 119270 }, { "epoch": 0.24095314665255316, "grad_norm": 694.5930786132812, "learning_rate": 9.426481250229387e-06, "loss": 19.3448, "step": 119280 }, { "epoch": 0.24097334728523698, "grad_norm": 168.7473602294922, "learning_rate": 9.426318914232503e-06, "loss": 20.6237, "step": 119290 }, { "epoch": 0.2409935479179208, "grad_norm": 223.9930877685547, "learning_rate": 9.426156556662276e-06, "loss": 37.335, "step": 119300 }, { "epoch": 0.24101374855060462, "grad_norm": 709.3331298828125, "learning_rate": 9.425994177519501e-06, "loss": 35.3504, "step": 119310 }, { "epoch": 0.2410339491832884, "grad_norm": 148.9415740966797, "learning_rate": 9.425831776804966e-06, "loss": 20.1295, "step": 119320 }, { "epoch": 0.24105414981597223, "grad_norm": 130.87818908691406, "learning_rate": 9.425669354519464e-06, "loss": 10.6013, "step": 119330 }, { "epoch": 0.24107435044865605, "grad_norm": 484.9830627441406, "learning_rate": 9.425506910663785e-06, "loss": 27.6242, "step": 119340 }, { "epoch": 0.24109455108133987, "grad_norm": 152.82427978515625, "learning_rate": 9.425344445238723e-06, "loss": 11.1665, "step": 119350 }, { "epoch": 0.2411147517140237, "grad_norm": 394.40301513671875, "learning_rate": 9.425181958245069e-06, "loss": 26.7533, "step": 119360 }, { "epoch": 0.2411349523467075, "grad_norm": 547.2018432617188, "learning_rate": 9.425019449683614e-06, "loss": 29.0329, "step": 119370 }, { "epoch": 0.2411551529793913, "grad_norm": 49.635162353515625, "learning_rate": 9.424856919555152e-06, "loss": 17.9029, "step": 119380 }, { "epoch": 0.24117535361207512, "grad_norm": 227.09913635253906, "learning_rate": 9.424694367860475e-06, "loss": 15.4209, "step": 119390 }, { "epoch": 0.24119555424475894, "grad_norm": 470.6295166015625, "learning_rate": 9.424531794600372e-06, "loss": 15.6939, "step": 119400 }, { "epoch": 0.24121575487744276, "grad_norm": 377.6591796875, "learning_rate": 9.424369199775639e-06, "loss": 28.1742, "step": 119410 }, { "epoch": 0.24123595551012658, "grad_norm": 477.1115417480469, "learning_rate": 9.424206583387066e-06, "loss": 28.7678, "step": 119420 }, { "epoch": 0.2412561561428104, "grad_norm": 404.892822265625, "learning_rate": 9.424043945435449e-06, "loss": 32.6534, "step": 119430 }, { "epoch": 0.24127635677549422, "grad_norm": 431.056884765625, "learning_rate": 9.423881285921576e-06, "loss": 16.7084, "step": 119440 }, { "epoch": 0.24129655740817801, "grad_norm": 587.3072509765625, "learning_rate": 9.423718604846243e-06, "loss": 28.7116, "step": 119450 }, { "epoch": 0.24131675804086183, "grad_norm": 286.3149719238281, "learning_rate": 9.423555902210241e-06, "loss": 29.833, "step": 119460 }, { "epoch": 0.24133695867354565, "grad_norm": 488.9961853027344, "learning_rate": 9.423393178014366e-06, "loss": 23.0982, "step": 119470 }, { "epoch": 0.24135715930622947, "grad_norm": 1299.418701171875, "learning_rate": 9.423230432259409e-06, "loss": 19.3942, "step": 119480 }, { "epoch": 0.2413773599389133, "grad_norm": 489.6849060058594, "learning_rate": 9.423067664946162e-06, "loss": 15.4288, "step": 119490 }, { "epoch": 0.24139756057159711, "grad_norm": 479.0276794433594, "learning_rate": 9.42290487607542e-06, "loss": 21.8808, "step": 119500 }, { "epoch": 0.2414177612042809, "grad_norm": 454.5273742675781, "learning_rate": 9.422742065647976e-06, "loss": 21.8085, "step": 119510 }, { "epoch": 0.24143796183696473, "grad_norm": 1.8563333749771118, "learning_rate": 9.422579233664624e-06, "loss": 15.3194, "step": 119520 }, { "epoch": 0.24145816246964855, "grad_norm": 672.4547119140625, "learning_rate": 9.422416380126157e-06, "loss": 31.2583, "step": 119530 }, { "epoch": 0.24147836310233237, "grad_norm": 327.16064453125, "learning_rate": 9.42225350503337e-06, "loss": 23.3336, "step": 119540 }, { "epoch": 0.2414985637350162, "grad_norm": 458.59814453125, "learning_rate": 9.422090608387056e-06, "loss": 41.3047, "step": 119550 }, { "epoch": 0.2415187643677, "grad_norm": 359.75482177734375, "learning_rate": 9.421927690188006e-06, "loss": 23.0521, "step": 119560 }, { "epoch": 0.2415389650003838, "grad_norm": 286.3177795410156, "learning_rate": 9.421764750437019e-06, "loss": 19.8835, "step": 119570 }, { "epoch": 0.24155916563306762, "grad_norm": 489.7880554199219, "learning_rate": 9.421601789134887e-06, "loss": 29.9945, "step": 119580 }, { "epoch": 0.24157936626575144, "grad_norm": 194.4234619140625, "learning_rate": 9.421438806282402e-06, "loss": 17.1849, "step": 119590 }, { "epoch": 0.24159956689843526, "grad_norm": 0.0, "learning_rate": 9.421275801880363e-06, "loss": 28.6513, "step": 119600 }, { "epoch": 0.24161976753111908, "grad_norm": 222.85772705078125, "learning_rate": 9.42111277592956e-06, "loss": 16.1013, "step": 119610 }, { "epoch": 0.2416399681638029, "grad_norm": 537.223388671875, "learning_rate": 9.42094972843079e-06, "loss": 32.1397, "step": 119620 }, { "epoch": 0.24166016879648672, "grad_norm": 357.1800231933594, "learning_rate": 9.420786659384849e-06, "loss": 13.7829, "step": 119630 }, { "epoch": 0.2416803694291705, "grad_norm": 674.0662841796875, "learning_rate": 9.420623568792528e-06, "loss": 9.7583, "step": 119640 }, { "epoch": 0.24170057006185433, "grad_norm": 674.779296875, "learning_rate": 9.420460456654625e-06, "loss": 43.7078, "step": 119650 }, { "epoch": 0.24172077069453815, "grad_norm": 269.4070129394531, "learning_rate": 9.420297322971934e-06, "loss": 42.7253, "step": 119660 }, { "epoch": 0.24174097132722197, "grad_norm": 273.6134948730469, "learning_rate": 9.420134167745249e-06, "loss": 17.3662, "step": 119670 }, { "epoch": 0.2417611719599058, "grad_norm": 364.1605224609375, "learning_rate": 9.419970990975366e-06, "loss": 33.5243, "step": 119680 }, { "epoch": 0.2417813725925896, "grad_norm": 81.76612854003906, "learning_rate": 9.41980779266308e-06, "loss": 25.4665, "step": 119690 }, { "epoch": 0.2418015732252734, "grad_norm": 155.71640014648438, "learning_rate": 9.419644572809189e-06, "loss": 25.1629, "step": 119700 }, { "epoch": 0.24182177385795722, "grad_norm": 261.5838623046875, "learning_rate": 9.419481331414485e-06, "loss": 29.5781, "step": 119710 }, { "epoch": 0.24184197449064104, "grad_norm": 535.605224609375, "learning_rate": 9.419318068479765e-06, "loss": 26.7493, "step": 119720 }, { "epoch": 0.24186217512332486, "grad_norm": 575.5614624023438, "learning_rate": 9.419154784005826e-06, "loss": 24.8415, "step": 119730 }, { "epoch": 0.24188237575600868, "grad_norm": 468.4418029785156, "learning_rate": 9.418991477993461e-06, "loss": 30.0817, "step": 119740 }, { "epoch": 0.2419025763886925, "grad_norm": 400.21563720703125, "learning_rate": 9.418828150443469e-06, "loss": 32.9474, "step": 119750 }, { "epoch": 0.24192277702137632, "grad_norm": 758.7032470703125, "learning_rate": 9.418664801356643e-06, "loss": 23.37, "step": 119760 }, { "epoch": 0.24194297765406012, "grad_norm": 685.31591796875, "learning_rate": 9.418501430733781e-06, "loss": 35.0462, "step": 119770 }, { "epoch": 0.24196317828674394, "grad_norm": 859.9779663085938, "learning_rate": 9.418338038575678e-06, "loss": 21.0738, "step": 119780 }, { "epoch": 0.24198337891942776, "grad_norm": 309.0888977050781, "learning_rate": 9.418174624883134e-06, "loss": 18.5668, "step": 119790 }, { "epoch": 0.24200357955211158, "grad_norm": 359.8492431640625, "learning_rate": 9.418011189656942e-06, "loss": 34.0436, "step": 119800 }, { "epoch": 0.2420237801847954, "grad_norm": 372.9681091308594, "learning_rate": 9.417847732897897e-06, "loss": 26.2594, "step": 119810 }, { "epoch": 0.24204398081747922, "grad_norm": 289.66851806640625, "learning_rate": 9.4176842546068e-06, "loss": 19.6268, "step": 119820 }, { "epoch": 0.242064181450163, "grad_norm": 827.5708618164062, "learning_rate": 9.417520754784445e-06, "loss": 30.1527, "step": 119830 }, { "epoch": 0.24208438208284683, "grad_norm": 680.2725219726562, "learning_rate": 9.41735723343163e-06, "loss": 34.8086, "step": 119840 }, { "epoch": 0.24210458271553065, "grad_norm": 652.0809326171875, "learning_rate": 9.417193690549151e-06, "loss": 27.441, "step": 119850 }, { "epoch": 0.24212478334821447, "grad_norm": 362.1368103027344, "learning_rate": 9.417030126137807e-06, "loss": 20.5512, "step": 119860 }, { "epoch": 0.2421449839808983, "grad_norm": 154.55960083007812, "learning_rate": 9.416866540198393e-06, "loss": 26.823, "step": 119870 }, { "epoch": 0.2421651846135821, "grad_norm": 470.00567626953125, "learning_rate": 9.416702932731707e-06, "loss": 44.4742, "step": 119880 }, { "epoch": 0.2421853852462659, "grad_norm": 920.4050903320312, "learning_rate": 9.416539303738546e-06, "loss": 26.894, "step": 119890 }, { "epoch": 0.24220558587894972, "grad_norm": 629.051025390625, "learning_rate": 9.41637565321971e-06, "loss": 26.9012, "step": 119900 }, { "epoch": 0.24222578651163354, "grad_norm": 538.2350463867188, "learning_rate": 9.416211981175993e-06, "loss": 39.9106, "step": 119910 }, { "epoch": 0.24224598714431736, "grad_norm": 532.4452514648438, "learning_rate": 9.416048287608195e-06, "loss": 13.5495, "step": 119920 }, { "epoch": 0.24226618777700118, "grad_norm": 203.54214477539062, "learning_rate": 9.415884572517113e-06, "loss": 17.1925, "step": 119930 }, { "epoch": 0.242286388409685, "grad_norm": 463.0719909667969, "learning_rate": 9.415720835903546e-06, "loss": 28.3784, "step": 119940 }, { "epoch": 0.24230658904236882, "grad_norm": 352.2186584472656, "learning_rate": 9.41555707776829e-06, "loss": 30.7774, "step": 119950 }, { "epoch": 0.2423267896750526, "grad_norm": 860.6155395507812, "learning_rate": 9.415393298112145e-06, "loss": 60.4257, "step": 119960 }, { "epoch": 0.24234699030773643, "grad_norm": 319.4090576171875, "learning_rate": 9.415229496935909e-06, "loss": 28.4482, "step": 119970 }, { "epoch": 0.24236719094042025, "grad_norm": 486.388916015625, "learning_rate": 9.41506567424038e-06, "loss": 23.7481, "step": 119980 }, { "epoch": 0.24238739157310407, "grad_norm": 812.5471801757812, "learning_rate": 9.414901830026355e-06, "loss": 36.6252, "step": 119990 }, { "epoch": 0.2424075922057879, "grad_norm": 485.1556091308594, "learning_rate": 9.414737964294636e-06, "loss": 25.9231, "step": 120000 }, { "epoch": 0.2424277928384717, "grad_norm": 369.75054931640625, "learning_rate": 9.414574077046019e-06, "loss": 11.3446, "step": 120010 }, { "epoch": 0.2424479934711555, "grad_norm": 216.30418395996094, "learning_rate": 9.414410168281303e-06, "loss": 28.49, "step": 120020 }, { "epoch": 0.24246819410383932, "grad_norm": 412.84197998046875, "learning_rate": 9.414246238001286e-06, "loss": 21.8082, "step": 120030 }, { "epoch": 0.24248839473652314, "grad_norm": 370.929443359375, "learning_rate": 9.414082286206769e-06, "loss": 21.473, "step": 120040 }, { "epoch": 0.24250859536920696, "grad_norm": 827.7217407226562, "learning_rate": 9.41391831289855e-06, "loss": 21.2329, "step": 120050 }, { "epoch": 0.24252879600189078, "grad_norm": 259.69818115234375, "learning_rate": 9.41375431807743e-06, "loss": 38.319, "step": 120060 }, { "epoch": 0.2425489966345746, "grad_norm": 640.1962280273438, "learning_rate": 9.413590301744207e-06, "loss": 19.2748, "step": 120070 }, { "epoch": 0.24256919726725842, "grad_norm": 366.4822998046875, "learning_rate": 9.413426263899677e-06, "loss": 15.9771, "step": 120080 }, { "epoch": 0.24258939789994222, "grad_norm": 819.957763671875, "learning_rate": 9.413262204544645e-06, "loss": 13.7612, "step": 120090 }, { "epoch": 0.24260959853262604, "grad_norm": 292.8883361816406, "learning_rate": 9.41309812367991e-06, "loss": 17.4417, "step": 120100 }, { "epoch": 0.24262979916530986, "grad_norm": 463.2019958496094, "learning_rate": 9.412934021306267e-06, "loss": 30.2767, "step": 120110 }, { "epoch": 0.24264999979799368, "grad_norm": 233.68606567382812, "learning_rate": 9.412769897424519e-06, "loss": 17.6851, "step": 120120 }, { "epoch": 0.2426702004306775, "grad_norm": 2112.52294921875, "learning_rate": 9.412605752035467e-06, "loss": 32.6304, "step": 120130 }, { "epoch": 0.24269040106336132, "grad_norm": 206.77734375, "learning_rate": 9.412441585139908e-06, "loss": 25.8283, "step": 120140 }, { "epoch": 0.2427106016960451, "grad_norm": 519.0888671875, "learning_rate": 9.412277396738647e-06, "loss": 23.6317, "step": 120150 }, { "epoch": 0.24273080232872893, "grad_norm": 284.4267272949219, "learning_rate": 9.41211318683248e-06, "loss": 25.1953, "step": 120160 }, { "epoch": 0.24275100296141275, "grad_norm": 525.6731567382812, "learning_rate": 9.411948955422207e-06, "loss": 28.4889, "step": 120170 }, { "epoch": 0.24277120359409657, "grad_norm": 1001.0545043945312, "learning_rate": 9.411784702508631e-06, "loss": 26.5903, "step": 120180 }, { "epoch": 0.2427914042267804, "grad_norm": 105.27946472167969, "learning_rate": 9.41162042809255e-06, "loss": 8.426, "step": 120190 }, { "epoch": 0.2428116048594642, "grad_norm": 738.8587646484375, "learning_rate": 9.411456132174768e-06, "loss": 37.9383, "step": 120200 }, { "epoch": 0.242831805492148, "grad_norm": 464.0848693847656, "learning_rate": 9.411291814756082e-06, "loss": 24.2003, "step": 120210 }, { "epoch": 0.24285200612483182, "grad_norm": 1074.05517578125, "learning_rate": 9.411127475837297e-06, "loss": 23.8898, "step": 120220 }, { "epoch": 0.24287220675751564, "grad_norm": 698.5719604492188, "learning_rate": 9.410963115419209e-06, "loss": 31.1965, "step": 120230 }, { "epoch": 0.24289240739019946, "grad_norm": 708.5223999023438, "learning_rate": 9.410798733502624e-06, "loss": 27.1567, "step": 120240 }, { "epoch": 0.24291260802288328, "grad_norm": 225.2885284423828, "learning_rate": 9.41063433008834e-06, "loss": 26.5465, "step": 120250 }, { "epoch": 0.2429328086555671, "grad_norm": 242.16827392578125, "learning_rate": 9.410469905177159e-06, "loss": 23.8723, "step": 120260 }, { "epoch": 0.24295300928825092, "grad_norm": 489.69720458984375, "learning_rate": 9.410305458769882e-06, "loss": 27.86, "step": 120270 }, { "epoch": 0.2429732099209347, "grad_norm": 210.8108673095703, "learning_rate": 9.410140990867313e-06, "loss": 18.1833, "step": 120280 }, { "epoch": 0.24299341055361853, "grad_norm": 102.53578186035156, "learning_rate": 9.40997650147025e-06, "loss": 11.5105, "step": 120290 }, { "epoch": 0.24301361118630235, "grad_norm": 482.7730712890625, "learning_rate": 9.409811990579498e-06, "loss": 38.5142, "step": 120300 }, { "epoch": 0.24303381181898617, "grad_norm": 496.66717529296875, "learning_rate": 9.409647458195857e-06, "loss": 19.2649, "step": 120310 }, { "epoch": 0.24305401245167, "grad_norm": 339.0413818359375, "learning_rate": 9.409482904320128e-06, "loss": 14.2474, "step": 120320 }, { "epoch": 0.2430742130843538, "grad_norm": 334.78167724609375, "learning_rate": 9.409318328953115e-06, "loss": 18.835, "step": 120330 }, { "epoch": 0.2430944137170376, "grad_norm": 608.3538208007812, "learning_rate": 9.409153732095617e-06, "loss": 20.304, "step": 120340 }, { "epoch": 0.24311461434972143, "grad_norm": 52.93294143676758, "learning_rate": 9.408989113748442e-06, "loss": 22.8354, "step": 120350 }, { "epoch": 0.24313481498240525, "grad_norm": 441.4234619140625, "learning_rate": 9.408824473912387e-06, "loss": 13.467, "step": 120360 }, { "epoch": 0.24315501561508907, "grad_norm": 390.3205261230469, "learning_rate": 9.408659812588257e-06, "loss": 26.7265, "step": 120370 }, { "epoch": 0.24317521624777289, "grad_norm": 246.2841339111328, "learning_rate": 9.408495129776851e-06, "loss": 21.4439, "step": 120380 }, { "epoch": 0.2431954168804567, "grad_norm": 686.0664672851562, "learning_rate": 9.408330425478978e-06, "loss": 35.6674, "step": 120390 }, { "epoch": 0.24321561751314053, "grad_norm": 456.25872802734375, "learning_rate": 9.408165699695435e-06, "loss": 26.4402, "step": 120400 }, { "epoch": 0.24323581814582432, "grad_norm": 776.8385620117188, "learning_rate": 9.408000952427028e-06, "loss": 24.7706, "step": 120410 }, { "epoch": 0.24325601877850814, "grad_norm": 295.9007263183594, "learning_rate": 9.40783618367456e-06, "loss": 12.6116, "step": 120420 }, { "epoch": 0.24327621941119196, "grad_norm": 102.0827407836914, "learning_rate": 9.40767139343883e-06, "loss": 14.6564, "step": 120430 }, { "epoch": 0.24329642004387578, "grad_norm": 109.94143676757812, "learning_rate": 9.407506581720647e-06, "loss": 20.5629, "step": 120440 }, { "epoch": 0.2433166206765596, "grad_norm": 721.9949951171875, "learning_rate": 9.407341748520811e-06, "loss": 37.1303, "step": 120450 }, { "epoch": 0.24333682130924342, "grad_norm": 252.1715087890625, "learning_rate": 9.407176893840125e-06, "loss": 17.8643, "step": 120460 }, { "epoch": 0.2433570219419272, "grad_norm": 108.42328643798828, "learning_rate": 9.407012017679393e-06, "loss": 18.0431, "step": 120470 }, { "epoch": 0.24337722257461103, "grad_norm": 608.70947265625, "learning_rate": 9.40684712003942e-06, "loss": 27.4288, "step": 120480 }, { "epoch": 0.24339742320729485, "grad_norm": 679.6499633789062, "learning_rate": 9.40668220092101e-06, "loss": 22.7119, "step": 120490 }, { "epoch": 0.24341762383997867, "grad_norm": 501.53582763671875, "learning_rate": 9.406517260324962e-06, "loss": 25.1944, "step": 120500 }, { "epoch": 0.2434378244726625, "grad_norm": 473.3844299316406, "learning_rate": 9.406352298252085e-06, "loss": 37.3264, "step": 120510 }, { "epoch": 0.2434580251053463, "grad_norm": 537.971923828125, "learning_rate": 9.406187314703182e-06, "loss": 28.715, "step": 120520 }, { "epoch": 0.2434782257380301, "grad_norm": 425.8844909667969, "learning_rate": 9.406022309679055e-06, "loss": 18.4034, "step": 120530 }, { "epoch": 0.24349842637071392, "grad_norm": 538.1511840820312, "learning_rate": 9.40585728318051e-06, "loss": 17.8412, "step": 120540 }, { "epoch": 0.24351862700339774, "grad_norm": 154.53443908691406, "learning_rate": 9.405692235208353e-06, "loss": 24.7516, "step": 120550 }, { "epoch": 0.24353882763608156, "grad_norm": 360.3779602050781, "learning_rate": 9.405527165763384e-06, "loss": 26.9894, "step": 120560 }, { "epoch": 0.24355902826876538, "grad_norm": 817.6304321289062, "learning_rate": 9.40536207484641e-06, "loss": 21.4723, "step": 120570 }, { "epoch": 0.2435792289014492, "grad_norm": 281.68701171875, "learning_rate": 9.405196962458235e-06, "loss": 12.8594, "step": 120580 }, { "epoch": 0.24359942953413302, "grad_norm": 612.75634765625, "learning_rate": 9.405031828599666e-06, "loss": 39.7872, "step": 120590 }, { "epoch": 0.24361963016681681, "grad_norm": 75.3757095336914, "learning_rate": 9.404866673271506e-06, "loss": 7.8291, "step": 120600 }, { "epoch": 0.24363983079950063, "grad_norm": 498.9643249511719, "learning_rate": 9.40470149647456e-06, "loss": 20.3929, "step": 120610 }, { "epoch": 0.24366003143218445, "grad_norm": 166.3463134765625, "learning_rate": 9.404536298209633e-06, "loss": 20.5384, "step": 120620 }, { "epoch": 0.24368023206486827, "grad_norm": 358.24346923828125, "learning_rate": 9.40437107847753e-06, "loss": 18.2434, "step": 120630 }, { "epoch": 0.2437004326975521, "grad_norm": 562.0745849609375, "learning_rate": 9.404205837279057e-06, "loss": 15.5326, "step": 120640 }, { "epoch": 0.24372063333023591, "grad_norm": 441.2648010253906, "learning_rate": 9.404040574615018e-06, "loss": 24.7184, "step": 120650 }, { "epoch": 0.2437408339629197, "grad_norm": 253.6767578125, "learning_rate": 9.40387529048622e-06, "loss": 19.4828, "step": 120660 }, { "epoch": 0.24376103459560353, "grad_norm": 235.23439025878906, "learning_rate": 9.403709984893469e-06, "loss": 20.0599, "step": 120670 }, { "epoch": 0.24378123522828735, "grad_norm": 363.85443115234375, "learning_rate": 9.403544657837569e-06, "loss": 16.9035, "step": 120680 }, { "epoch": 0.24380143586097117, "grad_norm": 475.27203369140625, "learning_rate": 9.403379309319325e-06, "loss": 17.1978, "step": 120690 }, { "epoch": 0.243821636493655, "grad_norm": 208.4928741455078, "learning_rate": 9.403213939339546e-06, "loss": 26.1766, "step": 120700 }, { "epoch": 0.2438418371263388, "grad_norm": 297.894775390625, "learning_rate": 9.403048547899034e-06, "loss": 19.4729, "step": 120710 }, { "epoch": 0.24386203775902263, "grad_norm": 898.6644897460938, "learning_rate": 9.402883134998601e-06, "loss": 29.4924, "step": 120720 }, { "epoch": 0.24388223839170642, "grad_norm": 350.4821472167969, "learning_rate": 9.402717700639047e-06, "loss": 17.4218, "step": 120730 }, { "epoch": 0.24390243902439024, "grad_norm": 542.610595703125, "learning_rate": 9.402552244821181e-06, "loss": 23.77, "step": 120740 }, { "epoch": 0.24392263965707406, "grad_norm": 475.7983093261719, "learning_rate": 9.40238676754581e-06, "loss": 10.7983, "step": 120750 }, { "epoch": 0.24394284028975788, "grad_norm": 776.457275390625, "learning_rate": 9.402221268813741e-06, "loss": 29.9458, "step": 120760 }, { "epoch": 0.2439630409224417, "grad_norm": 663.1685791015625, "learning_rate": 9.402055748625779e-06, "loss": 16.0243, "step": 120770 }, { "epoch": 0.24398324155512552, "grad_norm": 687.3117065429688, "learning_rate": 9.40189020698273e-06, "loss": 18.4069, "step": 120780 }, { "epoch": 0.2440034421878093, "grad_norm": 642.6618041992188, "learning_rate": 9.4017246438854e-06, "loss": 20.7356, "step": 120790 }, { "epoch": 0.24402364282049313, "grad_norm": 304.1505432128906, "learning_rate": 9.401559059334601e-06, "loss": 16.3073, "step": 120800 }, { "epoch": 0.24404384345317695, "grad_norm": 92.07533264160156, "learning_rate": 9.401393453331138e-06, "loss": 15.306, "step": 120810 }, { "epoch": 0.24406404408586077, "grad_norm": 159.5988311767578, "learning_rate": 9.401227825875814e-06, "loss": 27.5869, "step": 120820 }, { "epoch": 0.2440842447185446, "grad_norm": 394.03057861328125, "learning_rate": 9.401062176969442e-06, "loss": 17.108, "step": 120830 }, { "epoch": 0.2441044453512284, "grad_norm": 89.00894165039062, "learning_rate": 9.400896506612824e-06, "loss": 32.4733, "step": 120840 }, { "epoch": 0.2441246459839122, "grad_norm": 211.0887908935547, "learning_rate": 9.400730814806774e-06, "loss": 34.7247, "step": 120850 }, { "epoch": 0.24414484661659602, "grad_norm": 295.0898132324219, "learning_rate": 9.400565101552093e-06, "loss": 27.0561, "step": 120860 }, { "epoch": 0.24416504724927984, "grad_norm": 402.59906005859375, "learning_rate": 9.400399366849591e-06, "loss": 25.1451, "step": 120870 }, { "epoch": 0.24418524788196366, "grad_norm": 315.1714782714844, "learning_rate": 9.400233610700078e-06, "loss": 13.9015, "step": 120880 }, { "epoch": 0.24420544851464748, "grad_norm": 17.67019271850586, "learning_rate": 9.400067833104358e-06, "loss": 41.4506, "step": 120890 }, { "epoch": 0.2442256491473313, "grad_norm": 234.94468688964844, "learning_rate": 9.399902034063244e-06, "loss": 18.8753, "step": 120900 }, { "epoch": 0.24424584978001512, "grad_norm": 137.276123046875, "learning_rate": 9.399736213577537e-06, "loss": 17.0852, "step": 120910 }, { "epoch": 0.24426605041269892, "grad_norm": 332.58282470703125, "learning_rate": 9.399570371648052e-06, "loss": 28.0115, "step": 120920 }, { "epoch": 0.24428625104538274, "grad_norm": 497.4787902832031, "learning_rate": 9.399404508275596e-06, "loss": 21.4659, "step": 120930 }, { "epoch": 0.24430645167806656, "grad_norm": 437.379638671875, "learning_rate": 9.399238623460973e-06, "loss": 10.7566, "step": 120940 }, { "epoch": 0.24432665231075038, "grad_norm": 479.4838562011719, "learning_rate": 9.399072717204995e-06, "loss": 21.467, "step": 120950 }, { "epoch": 0.2443468529434342, "grad_norm": 0.0, "learning_rate": 9.398906789508474e-06, "loss": 12.9235, "step": 120960 }, { "epoch": 0.24436705357611802, "grad_norm": 714.1256103515625, "learning_rate": 9.39874084037221e-06, "loss": 32.9326, "step": 120970 }, { "epoch": 0.2443872542088018, "grad_norm": 171.83334350585938, "learning_rate": 9.39857486979702e-06, "loss": 16.5461, "step": 120980 }, { "epoch": 0.24440745484148563, "grad_norm": 504.98468017578125, "learning_rate": 9.398408877783707e-06, "loss": 17.338, "step": 120990 }, { "epoch": 0.24442765547416945, "grad_norm": 526.178955078125, "learning_rate": 9.398242864333084e-06, "loss": 23.0164, "step": 121000 }, { "epoch": 0.24444785610685327, "grad_norm": 173.3816375732422, "learning_rate": 9.398076829445958e-06, "loss": 20.6809, "step": 121010 }, { "epoch": 0.2444680567395371, "grad_norm": 407.1698913574219, "learning_rate": 9.397910773123139e-06, "loss": 7.2511, "step": 121020 }, { "epoch": 0.2444882573722209, "grad_norm": 299.86993408203125, "learning_rate": 9.397744695365435e-06, "loss": 24.5106, "step": 121030 }, { "epoch": 0.24450845800490473, "grad_norm": 1071.1138916015625, "learning_rate": 9.39757859617366e-06, "loss": 34.3683, "step": 121040 }, { "epoch": 0.24452865863758852, "grad_norm": 149.59170532226562, "learning_rate": 9.397412475548619e-06, "loss": 12.2644, "step": 121050 }, { "epoch": 0.24454885927027234, "grad_norm": 867.1926879882812, "learning_rate": 9.397246333491121e-06, "loss": 26.0245, "step": 121060 }, { "epoch": 0.24456905990295616, "grad_norm": 202.2417449951172, "learning_rate": 9.39708017000198e-06, "loss": 46.8165, "step": 121070 }, { "epoch": 0.24458926053563998, "grad_norm": 95.05743408203125, "learning_rate": 9.396913985082003e-06, "loss": 17.9895, "step": 121080 }, { "epoch": 0.2446094611683238, "grad_norm": 987.2474365234375, "learning_rate": 9.396747778732001e-06, "loss": 23.2907, "step": 121090 }, { "epoch": 0.24462966180100762, "grad_norm": 492.4799499511719, "learning_rate": 9.396581550952781e-06, "loss": 42.5354, "step": 121100 }, { "epoch": 0.2446498624336914, "grad_norm": 224.4016876220703, "learning_rate": 9.396415301745158e-06, "loss": 29.2263, "step": 121110 }, { "epoch": 0.24467006306637523, "grad_norm": 596.227294921875, "learning_rate": 9.39624903110994e-06, "loss": 22.3733, "step": 121120 }, { "epoch": 0.24469026369905905, "grad_norm": 203.60916137695312, "learning_rate": 9.396082739047938e-06, "loss": 14.8306, "step": 121130 }, { "epoch": 0.24471046433174287, "grad_norm": 499.0995178222656, "learning_rate": 9.39591642555996e-06, "loss": 11.4995, "step": 121140 }, { "epoch": 0.2447306649644267, "grad_norm": 265.4461975097656, "learning_rate": 9.39575009064682e-06, "loss": 19.1836, "step": 121150 }, { "epoch": 0.2447508655971105, "grad_norm": 507.4720153808594, "learning_rate": 9.395583734309327e-06, "loss": 13.8517, "step": 121160 }, { "epoch": 0.2447710662297943, "grad_norm": 416.8929748535156, "learning_rate": 9.39541735654829e-06, "loss": 38.9064, "step": 121170 }, { "epoch": 0.24479126686247812, "grad_norm": 363.3443298339844, "learning_rate": 9.395250957364526e-06, "loss": 18.1596, "step": 121180 }, { "epoch": 0.24481146749516194, "grad_norm": 585.190185546875, "learning_rate": 9.395084536758838e-06, "loss": 13.4733, "step": 121190 }, { "epoch": 0.24483166812784576, "grad_norm": 266.3756103515625, "learning_rate": 9.394918094732044e-06, "loss": 15.6169, "step": 121200 }, { "epoch": 0.24485186876052958, "grad_norm": 80.72816467285156, "learning_rate": 9.394751631284951e-06, "loss": 14.0201, "step": 121210 }, { "epoch": 0.2448720693932134, "grad_norm": 325.4801330566406, "learning_rate": 9.39458514641837e-06, "loss": 27.5059, "step": 121220 }, { "epoch": 0.24489227002589722, "grad_norm": 257.91668701171875, "learning_rate": 9.394418640133116e-06, "loss": 8.8437, "step": 121230 }, { "epoch": 0.24491247065858102, "grad_norm": 191.24307250976562, "learning_rate": 9.394252112429998e-06, "loss": 13.821, "step": 121240 }, { "epoch": 0.24493267129126484, "grad_norm": 654.3187255859375, "learning_rate": 9.394085563309827e-06, "loss": 39.5012, "step": 121250 }, { "epoch": 0.24495287192394866, "grad_norm": 134.41928100585938, "learning_rate": 9.393918992773418e-06, "loss": 30.6244, "step": 121260 }, { "epoch": 0.24497307255663248, "grad_norm": 289.5005798339844, "learning_rate": 9.393752400821578e-06, "loss": 34.7754, "step": 121270 }, { "epoch": 0.2449932731893163, "grad_norm": 314.14031982421875, "learning_rate": 9.393585787455125e-06, "loss": 27.0609, "step": 121280 }, { "epoch": 0.24501347382200012, "grad_norm": 803.166259765625, "learning_rate": 9.393419152674866e-06, "loss": 15.494, "step": 121290 }, { "epoch": 0.2450336744546839, "grad_norm": 485.0401611328125, "learning_rate": 9.393252496481615e-06, "loss": 19.0768, "step": 121300 }, { "epoch": 0.24505387508736773, "grad_norm": 142.10533142089844, "learning_rate": 9.393085818876184e-06, "loss": 16.7439, "step": 121310 }, { "epoch": 0.24507407572005155, "grad_norm": 271.49945068359375, "learning_rate": 9.392919119859387e-06, "loss": 9.3258, "step": 121320 }, { "epoch": 0.24509427635273537, "grad_norm": 1029.8131103515625, "learning_rate": 9.392752399432032e-06, "loss": 30.2744, "step": 121330 }, { "epoch": 0.2451144769854192, "grad_norm": 349.37835693359375, "learning_rate": 9.392585657594938e-06, "loss": 17.3984, "step": 121340 }, { "epoch": 0.245134677618103, "grad_norm": 407.53228759765625, "learning_rate": 9.392418894348912e-06, "loss": 24.1279, "step": 121350 }, { "epoch": 0.24515487825078683, "grad_norm": 308.86920166015625, "learning_rate": 9.39225210969477e-06, "loss": 24.4006, "step": 121360 }, { "epoch": 0.24517507888347062, "grad_norm": 78.63629150390625, "learning_rate": 9.392085303633322e-06, "loss": 21.8307, "step": 121370 }, { "epoch": 0.24519527951615444, "grad_norm": 640.35888671875, "learning_rate": 9.391918476165385e-06, "loss": 27.6322, "step": 121380 }, { "epoch": 0.24521548014883826, "grad_norm": 98.9591064453125, "learning_rate": 9.39175162729177e-06, "loss": 17.5419, "step": 121390 }, { "epoch": 0.24523568078152208, "grad_norm": 1276.994140625, "learning_rate": 9.39158475701329e-06, "loss": 32.9141, "step": 121400 }, { "epoch": 0.2452558814142059, "grad_norm": 712.490478515625, "learning_rate": 9.391417865330759e-06, "loss": 26.0464, "step": 121410 }, { "epoch": 0.24527608204688972, "grad_norm": 421.4114685058594, "learning_rate": 9.391250952244987e-06, "loss": 15.0933, "step": 121420 }, { "epoch": 0.2452962826795735, "grad_norm": 512.715576171875, "learning_rate": 9.391084017756794e-06, "loss": 31.4504, "step": 121430 }, { "epoch": 0.24531648331225733, "grad_norm": 669.548583984375, "learning_rate": 9.390917061866988e-06, "loss": 32.6564, "step": 121440 }, { "epoch": 0.24533668394494115, "grad_norm": 459.4366760253906, "learning_rate": 9.390750084576387e-06, "loss": 36.9348, "step": 121450 }, { "epoch": 0.24535688457762497, "grad_norm": 218.767822265625, "learning_rate": 9.3905830858858e-06, "loss": 17.6133, "step": 121460 }, { "epoch": 0.2453770852103088, "grad_norm": 693.1647338867188, "learning_rate": 9.390416065796045e-06, "loss": 29.3941, "step": 121470 }, { "epoch": 0.2453972858429926, "grad_norm": 509.7262878417969, "learning_rate": 9.390249024307934e-06, "loss": 17.0121, "step": 121480 }, { "epoch": 0.2454174864756764, "grad_norm": 480.4788513183594, "learning_rate": 9.390081961422283e-06, "loss": 16.8307, "step": 121490 }, { "epoch": 0.24543768710836023, "grad_norm": 754.36474609375, "learning_rate": 9.389914877139903e-06, "loss": 28.9384, "step": 121500 }, { "epoch": 0.24545788774104405, "grad_norm": 223.61109924316406, "learning_rate": 9.389747771461612e-06, "loss": 20.6062, "step": 121510 }, { "epoch": 0.24547808837372787, "grad_norm": 598.3352661132812, "learning_rate": 9.389580644388222e-06, "loss": 15.6587, "step": 121520 }, { "epoch": 0.24549828900641169, "grad_norm": 1525.0599365234375, "learning_rate": 9.38941349592055e-06, "loss": 52.0996, "step": 121530 }, { "epoch": 0.2455184896390955, "grad_norm": 81.1236572265625, "learning_rate": 9.389246326059406e-06, "loss": 20.4734, "step": 121540 }, { "epoch": 0.24553869027177933, "grad_norm": 391.3033142089844, "learning_rate": 9.38907913480561e-06, "loss": 23.5889, "step": 121550 }, { "epoch": 0.24555889090446312, "grad_norm": 527.8965454101562, "learning_rate": 9.388911922159973e-06, "loss": 21.0002, "step": 121560 }, { "epoch": 0.24557909153714694, "grad_norm": 635.4254760742188, "learning_rate": 9.388744688123313e-06, "loss": 20.7065, "step": 121570 }, { "epoch": 0.24559929216983076, "grad_norm": 361.8155517578125, "learning_rate": 9.388577432696441e-06, "loss": 16.9074, "step": 121580 }, { "epoch": 0.24561949280251458, "grad_norm": 853.1696166992188, "learning_rate": 9.388410155880178e-06, "loss": 23.3569, "step": 121590 }, { "epoch": 0.2456396934351984, "grad_norm": 162.77798461914062, "learning_rate": 9.388242857675336e-06, "loss": 19.8373, "step": 121600 }, { "epoch": 0.24565989406788222, "grad_norm": 18.374544143676758, "learning_rate": 9.388075538082729e-06, "loss": 17.2537, "step": 121610 }, { "epoch": 0.245680094700566, "grad_norm": 259.9162292480469, "learning_rate": 9.387908197103175e-06, "loss": 14.7752, "step": 121620 }, { "epoch": 0.24570029533324983, "grad_norm": 359.5132141113281, "learning_rate": 9.38774083473749e-06, "loss": 20.5081, "step": 121630 }, { "epoch": 0.24572049596593365, "grad_norm": 135.332275390625, "learning_rate": 9.387573450986485e-06, "loss": 33.3474, "step": 121640 }, { "epoch": 0.24574069659861747, "grad_norm": 75.51333618164062, "learning_rate": 9.38740604585098e-06, "loss": 28.3052, "step": 121650 }, { "epoch": 0.2457608972313013, "grad_norm": 229.2150421142578, "learning_rate": 9.387238619331791e-06, "loss": 25.0102, "step": 121660 }, { "epoch": 0.2457810978639851, "grad_norm": 449.738037109375, "learning_rate": 9.387071171429734e-06, "loss": 21.5454, "step": 121670 }, { "epoch": 0.2458012984966689, "grad_norm": 23.67981719970703, "learning_rate": 9.386903702145622e-06, "loss": 23.7492, "step": 121680 }, { "epoch": 0.24582149912935272, "grad_norm": 591.4840698242188, "learning_rate": 9.386736211480276e-06, "loss": 20.9584, "step": 121690 }, { "epoch": 0.24584169976203654, "grad_norm": 771.146728515625, "learning_rate": 9.386568699434509e-06, "loss": 16.8184, "step": 121700 }, { "epoch": 0.24586190039472036, "grad_norm": 372.983154296875, "learning_rate": 9.386401166009135e-06, "loss": 21.9896, "step": 121710 }, { "epoch": 0.24588210102740418, "grad_norm": 2.5158207416534424, "learning_rate": 9.386233611204979e-06, "loss": 17.8203, "step": 121720 }, { "epoch": 0.245902301660088, "grad_norm": 437.3326721191406, "learning_rate": 9.386066035022849e-06, "loss": 32.8128, "step": 121730 }, { "epoch": 0.24592250229277182, "grad_norm": 124.55708312988281, "learning_rate": 9.385898437463565e-06, "loss": 15.7339, "step": 121740 }, { "epoch": 0.24594270292545561, "grad_norm": 569.1013793945312, "learning_rate": 9.385730818527945e-06, "loss": 21.1294, "step": 121750 }, { "epoch": 0.24596290355813943, "grad_norm": 438.2088928222656, "learning_rate": 9.385563178216804e-06, "loss": 42.4981, "step": 121760 }, { "epoch": 0.24598310419082325, "grad_norm": 476.3517150878906, "learning_rate": 9.38539551653096e-06, "loss": 33.1509, "step": 121770 }, { "epoch": 0.24600330482350707, "grad_norm": 438.19091796875, "learning_rate": 9.385227833471232e-06, "loss": 16.9427, "step": 121780 }, { "epoch": 0.2460235054561909, "grad_norm": 627.431396484375, "learning_rate": 9.385060129038434e-06, "loss": 23.1106, "step": 121790 }, { "epoch": 0.24604370608887471, "grad_norm": 863.0065307617188, "learning_rate": 9.384892403233384e-06, "loss": 35.0544, "step": 121800 }, { "epoch": 0.2460639067215585, "grad_norm": 797.5457763671875, "learning_rate": 9.384724656056902e-06, "loss": 32.2254, "step": 121810 }, { "epoch": 0.24608410735424233, "grad_norm": 339.25347900390625, "learning_rate": 9.384556887509802e-06, "loss": 25.5886, "step": 121820 }, { "epoch": 0.24610430798692615, "grad_norm": 117.08829498291016, "learning_rate": 9.384389097592904e-06, "loss": 18.4469, "step": 121830 }, { "epoch": 0.24612450861960997, "grad_norm": 681.5288696289062, "learning_rate": 9.384221286307028e-06, "loss": 18.808, "step": 121840 }, { "epoch": 0.2461447092522938, "grad_norm": 913.95703125, "learning_rate": 9.384053453652986e-06, "loss": 37.2574, "step": 121850 }, { "epoch": 0.2461649098849776, "grad_norm": 273.5444030761719, "learning_rate": 9.3838855996316e-06, "loss": 17.6005, "step": 121860 }, { "epoch": 0.24618511051766143, "grad_norm": 410.3824462890625, "learning_rate": 9.383717724243688e-06, "loss": 16.0778, "step": 121870 }, { "epoch": 0.24620531115034522, "grad_norm": 311.57318115234375, "learning_rate": 9.383549827490066e-06, "loss": 41.0608, "step": 121880 }, { "epoch": 0.24622551178302904, "grad_norm": 471.83331298828125, "learning_rate": 9.383381909371555e-06, "loss": 26.9694, "step": 121890 }, { "epoch": 0.24624571241571286, "grad_norm": 129.34288024902344, "learning_rate": 9.383213969888972e-06, "loss": 26.0415, "step": 121900 }, { "epoch": 0.24626591304839668, "grad_norm": 260.0030517578125, "learning_rate": 9.383046009043134e-06, "loss": 26.3524, "step": 121910 }, { "epoch": 0.2462861136810805, "grad_norm": 292.06964111328125, "learning_rate": 9.382878026834865e-06, "loss": 16.6176, "step": 121920 }, { "epoch": 0.24630631431376432, "grad_norm": 267.7262268066406, "learning_rate": 9.382710023264978e-06, "loss": 10.9908, "step": 121930 }, { "epoch": 0.2463265149464481, "grad_norm": 552.8212280273438, "learning_rate": 9.382541998334293e-06, "loss": 24.4882, "step": 121940 }, { "epoch": 0.24634671557913193, "grad_norm": 392.6438903808594, "learning_rate": 9.382373952043631e-06, "loss": 23.7668, "step": 121950 }, { "epoch": 0.24636691621181575, "grad_norm": 157.22264099121094, "learning_rate": 9.38220588439381e-06, "loss": 18.662, "step": 121960 }, { "epoch": 0.24638711684449957, "grad_norm": 660.43310546875, "learning_rate": 9.38203779538565e-06, "loss": 15.036, "step": 121970 }, { "epoch": 0.2464073174771834, "grad_norm": 428.3371887207031, "learning_rate": 9.381869685019967e-06, "loss": 25.8523, "step": 121980 }, { "epoch": 0.2464275181098672, "grad_norm": 245.2991943359375, "learning_rate": 9.381701553297584e-06, "loss": 11.6478, "step": 121990 }, { "epoch": 0.246447718742551, "grad_norm": 1.7876890897750854, "learning_rate": 9.381533400219319e-06, "loss": 32.3656, "step": 122000 }, { "epoch": 0.24646791937523482, "grad_norm": 264.4454040527344, "learning_rate": 9.38136522578599e-06, "loss": 26.1722, "step": 122010 }, { "epoch": 0.24648812000791864, "grad_norm": 1047.522705078125, "learning_rate": 9.381197029998422e-06, "loss": 60.2953, "step": 122020 }, { "epoch": 0.24650832064060246, "grad_norm": 1142.136474609375, "learning_rate": 9.381028812857426e-06, "loss": 33.6777, "step": 122030 }, { "epoch": 0.24652852127328628, "grad_norm": 295.67657470703125, "learning_rate": 9.38086057436383e-06, "loss": 26.6534, "step": 122040 }, { "epoch": 0.2465487219059701, "grad_norm": 701.2647705078125, "learning_rate": 9.38069231451845e-06, "loss": 26.1704, "step": 122050 }, { "epoch": 0.24656892253865392, "grad_norm": 89.219970703125, "learning_rate": 9.380524033322108e-06, "loss": 17.7089, "step": 122060 }, { "epoch": 0.24658912317133772, "grad_norm": 161.54380798339844, "learning_rate": 9.380355730775623e-06, "loss": 26.5697, "step": 122070 }, { "epoch": 0.24660932380402154, "grad_norm": 879.8580322265625, "learning_rate": 9.380187406879815e-06, "loss": 44.7753, "step": 122080 }, { "epoch": 0.24662952443670536, "grad_norm": 297.9383544921875, "learning_rate": 9.380019061635506e-06, "loss": 18.0576, "step": 122090 }, { "epoch": 0.24664972506938918, "grad_norm": 502.1921691894531, "learning_rate": 9.379850695043513e-06, "loss": 15.0395, "step": 122100 }, { "epoch": 0.246669925702073, "grad_norm": 479.7265319824219, "learning_rate": 9.37968230710466e-06, "loss": 12.569, "step": 122110 }, { "epoch": 0.24669012633475682, "grad_norm": 135.19354248046875, "learning_rate": 9.379513897819768e-06, "loss": 24.4919, "step": 122120 }, { "epoch": 0.2467103269674406, "grad_norm": 159.18887329101562, "learning_rate": 9.379345467189655e-06, "loss": 14.196, "step": 122130 }, { "epoch": 0.24673052760012443, "grad_norm": 530.904052734375, "learning_rate": 9.379177015215145e-06, "loss": 22.6805, "step": 122140 }, { "epoch": 0.24675072823280825, "grad_norm": 529.093017578125, "learning_rate": 9.379008541897054e-06, "loss": 25.6806, "step": 122150 }, { "epoch": 0.24677092886549207, "grad_norm": 518.6043701171875, "learning_rate": 9.378840047236209e-06, "loss": 21.9906, "step": 122160 }, { "epoch": 0.2467911294981759, "grad_norm": 847.4872436523438, "learning_rate": 9.378671531233428e-06, "loss": 32.1995, "step": 122170 }, { "epoch": 0.2468113301308597, "grad_norm": 609.507080078125, "learning_rate": 9.378502993889533e-06, "loss": 16.1266, "step": 122180 }, { "epoch": 0.24683153076354353, "grad_norm": 308.99468994140625, "learning_rate": 9.378334435205345e-06, "loss": 19.4652, "step": 122190 }, { "epoch": 0.24685173139622732, "grad_norm": 234.73680114746094, "learning_rate": 9.378165855181687e-06, "loss": 28.5967, "step": 122200 }, { "epoch": 0.24687193202891114, "grad_norm": 882.1802978515625, "learning_rate": 9.377997253819378e-06, "loss": 30.3689, "step": 122210 }, { "epoch": 0.24689213266159496, "grad_norm": 647.518798828125, "learning_rate": 9.377828631119243e-06, "loss": 29.7149, "step": 122220 }, { "epoch": 0.24691233329427878, "grad_norm": 196.408203125, "learning_rate": 9.377659987082101e-06, "loss": 21.2224, "step": 122230 }, { "epoch": 0.2469325339269626, "grad_norm": 687.9680786132812, "learning_rate": 9.377491321708777e-06, "loss": 42.0738, "step": 122240 }, { "epoch": 0.24695273455964642, "grad_norm": 221.20986938476562, "learning_rate": 9.37732263500009e-06, "loss": 34.4007, "step": 122250 }, { "epoch": 0.2469729351923302, "grad_norm": 421.7748107910156, "learning_rate": 9.377153926956864e-06, "loss": 11.6844, "step": 122260 }, { "epoch": 0.24699313582501403, "grad_norm": 608.4430541992188, "learning_rate": 9.376985197579919e-06, "loss": 38.0833, "step": 122270 }, { "epoch": 0.24701333645769785, "grad_norm": 37.26884841918945, "learning_rate": 9.37681644687008e-06, "loss": 13.1641, "step": 122280 }, { "epoch": 0.24703353709038167, "grad_norm": 871.173828125, "learning_rate": 9.37664767482817e-06, "loss": 29.0666, "step": 122290 }, { "epoch": 0.2470537377230655, "grad_norm": 231.88839721679688, "learning_rate": 9.376478881455008e-06, "loss": 50.6193, "step": 122300 }, { "epoch": 0.2470739383557493, "grad_norm": 58.269229888916016, "learning_rate": 9.37631006675142e-06, "loss": 27.0972, "step": 122310 }, { "epoch": 0.2470941389884331, "grad_norm": 155.5174102783203, "learning_rate": 9.376141230718228e-06, "loss": 34.523, "step": 122320 }, { "epoch": 0.24711433962111692, "grad_norm": 484.3507995605469, "learning_rate": 9.375972373356253e-06, "loss": 36.5884, "step": 122330 }, { "epoch": 0.24713454025380074, "grad_norm": 343.76629638671875, "learning_rate": 9.375803494666319e-06, "loss": 18.2555, "step": 122340 }, { "epoch": 0.24715474088648456, "grad_norm": 183.2716064453125, "learning_rate": 9.37563459464925e-06, "loss": 33.7194, "step": 122350 }, { "epoch": 0.24717494151916838, "grad_norm": 570.3797607421875, "learning_rate": 9.37546567330587e-06, "loss": 28.8831, "step": 122360 }, { "epoch": 0.2471951421518522, "grad_norm": 331.79754638671875, "learning_rate": 9.375296730636999e-06, "loss": 19.3407, "step": 122370 }, { "epoch": 0.24721534278453602, "grad_norm": 270.3473815917969, "learning_rate": 9.375127766643464e-06, "loss": 25.9224, "step": 122380 }, { "epoch": 0.24723554341721982, "grad_norm": 555.08349609375, "learning_rate": 9.374958781326085e-06, "loss": 20.2447, "step": 122390 }, { "epoch": 0.24725574404990364, "grad_norm": 406.95281982421875, "learning_rate": 9.37478977468569e-06, "loss": 22.6607, "step": 122400 }, { "epoch": 0.24727594468258746, "grad_norm": 434.1924743652344, "learning_rate": 9.374620746723097e-06, "loss": 14.5743, "step": 122410 }, { "epoch": 0.24729614531527128, "grad_norm": 72.60065460205078, "learning_rate": 9.374451697439137e-06, "loss": 28.7614, "step": 122420 }, { "epoch": 0.2473163459479551, "grad_norm": 458.9649963378906, "learning_rate": 9.374282626834627e-06, "loss": 29.1018, "step": 122430 }, { "epoch": 0.24733654658063892, "grad_norm": 328.2051086425781, "learning_rate": 9.374113534910396e-06, "loss": 12.9496, "step": 122440 }, { "epoch": 0.2473567472133227, "grad_norm": 322.018798828125, "learning_rate": 9.373944421667264e-06, "loss": 16.3701, "step": 122450 }, { "epoch": 0.24737694784600653, "grad_norm": 463.0480651855469, "learning_rate": 9.37377528710606e-06, "loss": 33.54, "step": 122460 }, { "epoch": 0.24739714847869035, "grad_norm": 196.13043212890625, "learning_rate": 9.373606131227604e-06, "loss": 29.395, "step": 122470 }, { "epoch": 0.24741734911137417, "grad_norm": 521.9775390625, "learning_rate": 9.373436954032722e-06, "loss": 22.9881, "step": 122480 }, { "epoch": 0.247437549744058, "grad_norm": 784.3359985351562, "learning_rate": 9.373267755522239e-06, "loss": 23.227, "step": 122490 }, { "epoch": 0.2474577503767418, "grad_norm": 486.8966064453125, "learning_rate": 9.37309853569698e-06, "loss": 14.8526, "step": 122500 }, { "epoch": 0.24747795100942563, "grad_norm": 637.5076904296875, "learning_rate": 9.372929294557768e-06, "loss": 25.1422, "step": 122510 }, { "epoch": 0.24749815164210942, "grad_norm": 1082.929443359375, "learning_rate": 9.37276003210543e-06, "loss": 28.4446, "step": 122520 }, { "epoch": 0.24751835227479324, "grad_norm": 687.2918701171875, "learning_rate": 9.37259074834079e-06, "loss": 25.4553, "step": 122530 }, { "epoch": 0.24753855290747706, "grad_norm": 328.588134765625, "learning_rate": 9.372421443264672e-06, "loss": 22.607, "step": 122540 }, { "epoch": 0.24755875354016088, "grad_norm": 397.86151123046875, "learning_rate": 9.372252116877904e-06, "loss": 19.548, "step": 122550 }, { "epoch": 0.2475789541728447, "grad_norm": 283.01861572265625, "learning_rate": 9.372082769181307e-06, "loss": 27.8385, "step": 122560 }, { "epoch": 0.24759915480552852, "grad_norm": 219.72633361816406, "learning_rate": 9.371913400175711e-06, "loss": 13.4904, "step": 122570 }, { "epoch": 0.2476193554382123, "grad_norm": 226.8003692626953, "learning_rate": 9.371744009861938e-06, "loss": 20.0402, "step": 122580 }, { "epoch": 0.24763955607089613, "grad_norm": 317.58880615234375, "learning_rate": 9.371574598240816e-06, "loss": 16.201, "step": 122590 }, { "epoch": 0.24765975670357995, "grad_norm": 123.96427917480469, "learning_rate": 9.371405165313169e-06, "loss": 12.6909, "step": 122600 }, { "epoch": 0.24767995733626377, "grad_norm": 538.021240234375, "learning_rate": 9.371235711079824e-06, "loss": 27.7117, "step": 122610 }, { "epoch": 0.2477001579689476, "grad_norm": 649.9598388671875, "learning_rate": 9.371066235541607e-06, "loss": 19.2145, "step": 122620 }, { "epoch": 0.2477203586016314, "grad_norm": 320.6629943847656, "learning_rate": 9.37089673869934e-06, "loss": 30.1592, "step": 122630 }, { "epoch": 0.2477405592343152, "grad_norm": 710.37548828125, "learning_rate": 9.370727220553854e-06, "loss": 33.0565, "step": 122640 }, { "epoch": 0.24776075986699903, "grad_norm": 1021.4251098632812, "learning_rate": 9.370557681105975e-06, "loss": 23.931, "step": 122650 }, { "epoch": 0.24778096049968285, "grad_norm": 335.6628112792969, "learning_rate": 9.370388120356527e-06, "loss": 27.4605, "step": 122660 }, { "epoch": 0.24780116113236667, "grad_norm": 286.7644958496094, "learning_rate": 9.370218538306338e-06, "loss": 21.0476, "step": 122670 }, { "epoch": 0.24782136176505049, "grad_norm": 302.48785400390625, "learning_rate": 9.370048934956232e-06, "loss": 26.8166, "step": 122680 }, { "epoch": 0.2478415623977343, "grad_norm": 848.8372802734375, "learning_rate": 9.36987931030704e-06, "loss": 35.0775, "step": 122690 }, { "epoch": 0.24786176303041813, "grad_norm": 293.80426025390625, "learning_rate": 9.369709664359585e-06, "loss": 27.3197, "step": 122700 }, { "epoch": 0.24788196366310192, "grad_norm": 5.1223015785217285, "learning_rate": 9.369539997114694e-06, "loss": 23.6149, "step": 122710 }, { "epoch": 0.24790216429578574, "grad_norm": 380.6856384277344, "learning_rate": 9.369370308573198e-06, "loss": 17.1666, "step": 122720 }, { "epoch": 0.24792236492846956, "grad_norm": 444.42645263671875, "learning_rate": 9.36920059873592e-06, "loss": 18.0811, "step": 122730 }, { "epoch": 0.24794256556115338, "grad_norm": 275.97589111328125, "learning_rate": 9.369030867603686e-06, "loss": 18.1146, "step": 122740 }, { "epoch": 0.2479627661938372, "grad_norm": 192.14076232910156, "learning_rate": 9.368861115177327e-06, "loss": 20.6096, "step": 122750 }, { "epoch": 0.24798296682652102, "grad_norm": 1214.2664794921875, "learning_rate": 9.36869134145767e-06, "loss": 37.8635, "step": 122760 }, { "epoch": 0.2480031674592048, "grad_norm": 194.01011657714844, "learning_rate": 9.36852154644554e-06, "loss": 25.4198, "step": 122770 }, { "epoch": 0.24802336809188863, "grad_norm": 304.0776672363281, "learning_rate": 9.368351730141764e-06, "loss": 18.8762, "step": 122780 }, { "epoch": 0.24804356872457245, "grad_norm": 606.027587890625, "learning_rate": 9.368181892547174e-06, "loss": 29.1206, "step": 122790 }, { "epoch": 0.24806376935725627, "grad_norm": 583.8778686523438, "learning_rate": 9.368012033662594e-06, "loss": 26.1224, "step": 122800 }, { "epoch": 0.2480839699899401, "grad_norm": 216.15966796875, "learning_rate": 9.367842153488853e-06, "loss": 14.5165, "step": 122810 }, { "epoch": 0.2481041706226239, "grad_norm": 140.9263916015625, "learning_rate": 9.36767225202678e-06, "loss": 29.1583, "step": 122820 }, { "epoch": 0.24812437125530773, "grad_norm": 489.19024658203125, "learning_rate": 9.367502329277203e-06, "loss": 16.5625, "step": 122830 }, { "epoch": 0.24814457188799152, "grad_norm": 784.4625854492188, "learning_rate": 9.367332385240949e-06, "loss": 17.7571, "step": 122840 }, { "epoch": 0.24816477252067534, "grad_norm": 1200.139892578125, "learning_rate": 9.367162419918845e-06, "loss": 31.9097, "step": 122850 }, { "epoch": 0.24818497315335916, "grad_norm": 1665.3638916015625, "learning_rate": 9.366992433311722e-06, "loss": 26.635, "step": 122860 }, { "epoch": 0.24820517378604298, "grad_norm": 333.7507019042969, "learning_rate": 9.366822425420407e-06, "loss": 23.2256, "step": 122870 }, { "epoch": 0.2482253744187268, "grad_norm": 670.1807861328125, "learning_rate": 9.36665239624573e-06, "loss": 36.5895, "step": 122880 }, { "epoch": 0.24824557505141062, "grad_norm": 127.76399993896484, "learning_rate": 9.366482345788519e-06, "loss": 25.4427, "step": 122890 }, { "epoch": 0.24826577568409441, "grad_norm": 239.83836364746094, "learning_rate": 9.366312274049602e-06, "loss": 21.146, "step": 122900 }, { "epoch": 0.24828597631677823, "grad_norm": 47.40636444091797, "learning_rate": 9.366142181029808e-06, "loss": 17.1439, "step": 122910 }, { "epoch": 0.24830617694946205, "grad_norm": 257.0614013671875, "learning_rate": 9.365972066729967e-06, "loss": 24.4304, "step": 122920 }, { "epoch": 0.24832637758214587, "grad_norm": 262.1944885253906, "learning_rate": 9.365801931150909e-06, "loss": 12.9803, "step": 122930 }, { "epoch": 0.2483465782148297, "grad_norm": 593.2332763671875, "learning_rate": 9.36563177429346e-06, "loss": 23.0006, "step": 122940 }, { "epoch": 0.24836677884751351, "grad_norm": 267.6748046875, "learning_rate": 9.365461596158451e-06, "loss": 18.7587, "step": 122950 }, { "epoch": 0.2483869794801973, "grad_norm": 326.822021484375, "learning_rate": 9.365291396746714e-06, "loss": 17.3896, "step": 122960 }, { "epoch": 0.24840718011288113, "grad_norm": 348.2276916503906, "learning_rate": 9.365121176059075e-06, "loss": 14.8069, "step": 122970 }, { "epoch": 0.24842738074556495, "grad_norm": 256.3633117675781, "learning_rate": 9.364950934096365e-06, "loss": 15.0468, "step": 122980 }, { "epoch": 0.24844758137824877, "grad_norm": 666.3392944335938, "learning_rate": 9.364780670859412e-06, "loss": 25.8556, "step": 122990 }, { "epoch": 0.2484677820109326, "grad_norm": 477.95025634765625, "learning_rate": 9.364610386349048e-06, "loss": 27.4389, "step": 123000 }, { "epoch": 0.2484879826436164, "grad_norm": 380.0039367675781, "learning_rate": 9.364440080566104e-06, "loss": 16.6239, "step": 123010 }, { "epoch": 0.24850818327630023, "grad_norm": 472.1536560058594, "learning_rate": 9.364269753511407e-06, "loss": 37.1813, "step": 123020 }, { "epoch": 0.24852838390898402, "grad_norm": 291.4718322753906, "learning_rate": 9.36409940518579e-06, "loss": 33.4035, "step": 123030 }, { "epoch": 0.24854858454166784, "grad_norm": 438.97589111328125, "learning_rate": 9.363929035590081e-06, "loss": 27.887, "step": 123040 }, { "epoch": 0.24856878517435166, "grad_norm": 222.2158203125, "learning_rate": 9.36375864472511e-06, "loss": 39.406, "step": 123050 }, { "epoch": 0.24858898580703548, "grad_norm": 177.656005859375, "learning_rate": 9.363588232591709e-06, "loss": 37.6204, "step": 123060 }, { "epoch": 0.2486091864397193, "grad_norm": 834.9160766601562, "learning_rate": 9.363417799190708e-06, "loss": 30.6445, "step": 123070 }, { "epoch": 0.24862938707240312, "grad_norm": 207.18080139160156, "learning_rate": 9.363247344522939e-06, "loss": 17.49, "step": 123080 }, { "epoch": 0.2486495877050869, "grad_norm": 387.3349914550781, "learning_rate": 9.363076868589232e-06, "loss": 12.6426, "step": 123090 }, { "epoch": 0.24866978833777073, "grad_norm": 316.5187683105469, "learning_rate": 9.362906371390416e-06, "loss": 18.9102, "step": 123100 }, { "epoch": 0.24868998897045455, "grad_norm": 1221.362060546875, "learning_rate": 9.362735852927324e-06, "loss": 41.7353, "step": 123110 }, { "epoch": 0.24871018960313837, "grad_norm": 835.676025390625, "learning_rate": 9.362565313200786e-06, "loss": 30.4273, "step": 123120 }, { "epoch": 0.2487303902358222, "grad_norm": 370.47784423828125, "learning_rate": 9.362394752211636e-06, "loss": 8.4214, "step": 123130 }, { "epoch": 0.248750590868506, "grad_norm": 692.7476806640625, "learning_rate": 9.3622241699607e-06, "loss": 20.3629, "step": 123140 }, { "epoch": 0.24877079150118983, "grad_norm": 908.5383911132812, "learning_rate": 9.362053566448816e-06, "loss": 24.5112, "step": 123150 }, { "epoch": 0.24879099213387362, "grad_norm": 200.86033630371094, "learning_rate": 9.36188294167681e-06, "loss": 15.0903, "step": 123160 }, { "epoch": 0.24881119276655744, "grad_norm": 141.50022888183594, "learning_rate": 9.361712295645515e-06, "loss": 20.2606, "step": 123170 }, { "epoch": 0.24883139339924126, "grad_norm": 587.0171508789062, "learning_rate": 9.361541628355763e-06, "loss": 25.998, "step": 123180 }, { "epoch": 0.24885159403192508, "grad_norm": 357.0301818847656, "learning_rate": 9.361370939808387e-06, "loss": 12.2195, "step": 123190 }, { "epoch": 0.2488717946646089, "grad_norm": 718.8137817382812, "learning_rate": 9.361200230004219e-06, "loss": 34.9343, "step": 123200 }, { "epoch": 0.24889199529729272, "grad_norm": 170.9770965576172, "learning_rate": 9.36102949894409e-06, "loss": 13.2904, "step": 123210 }, { "epoch": 0.24891219592997652, "grad_norm": 665.4484252929688, "learning_rate": 9.36085874662883e-06, "loss": 26.2902, "step": 123220 }, { "epoch": 0.24893239656266034, "grad_norm": 572.6903076171875, "learning_rate": 9.360687973059274e-06, "loss": 29.4998, "step": 123230 }, { "epoch": 0.24895259719534416, "grad_norm": 405.1957092285156, "learning_rate": 9.360517178236255e-06, "loss": 24.1565, "step": 123240 }, { "epoch": 0.24897279782802798, "grad_norm": 488.4898376464844, "learning_rate": 9.360346362160604e-06, "loss": 21.1325, "step": 123250 }, { "epoch": 0.2489929984607118, "grad_norm": 320.693603515625, "learning_rate": 9.360175524833153e-06, "loss": 34.7406, "step": 123260 }, { "epoch": 0.24901319909339562, "grad_norm": 60.05967330932617, "learning_rate": 9.360004666254735e-06, "loss": 30.0965, "step": 123270 }, { "epoch": 0.2490333997260794, "grad_norm": 175.448486328125, "learning_rate": 9.359833786426183e-06, "loss": 15.8951, "step": 123280 }, { "epoch": 0.24905360035876323, "grad_norm": 278.36834716796875, "learning_rate": 9.35966288534833e-06, "loss": 12.8098, "step": 123290 }, { "epoch": 0.24907380099144705, "grad_norm": 524.27392578125, "learning_rate": 9.35949196302201e-06, "loss": 25.2071, "step": 123300 }, { "epoch": 0.24909400162413087, "grad_norm": 353.5171203613281, "learning_rate": 9.359321019448054e-06, "loss": 18.8964, "step": 123310 }, { "epoch": 0.2491142022568147, "grad_norm": 693.8262939453125, "learning_rate": 9.359150054627298e-06, "loss": 17.0254, "step": 123320 }, { "epoch": 0.2491344028894985, "grad_norm": 403.408935546875, "learning_rate": 9.35897906856057e-06, "loss": 11.0137, "step": 123330 }, { "epoch": 0.24915460352218233, "grad_norm": 1126.0416259765625, "learning_rate": 9.358808061248708e-06, "loss": 32.328, "step": 123340 }, { "epoch": 0.24917480415486612, "grad_norm": 249.3668670654297, "learning_rate": 9.358637032692546e-06, "loss": 12.516, "step": 123350 }, { "epoch": 0.24919500478754994, "grad_norm": 617.3007202148438, "learning_rate": 9.358465982892913e-06, "loss": 23.9673, "step": 123360 }, { "epoch": 0.24921520542023376, "grad_norm": 24.10763931274414, "learning_rate": 9.358294911850648e-06, "loss": 10.9903, "step": 123370 }, { "epoch": 0.24923540605291758, "grad_norm": 424.51885986328125, "learning_rate": 9.35812381956658e-06, "loss": 10.8367, "step": 123380 }, { "epoch": 0.2492556066856014, "grad_norm": 338.3468322753906, "learning_rate": 9.357952706041545e-06, "loss": 47.2815, "step": 123390 }, { "epoch": 0.24927580731828522, "grad_norm": 124.84664154052734, "learning_rate": 9.357781571276379e-06, "loss": 34.763, "step": 123400 }, { "epoch": 0.249296007950969, "grad_norm": 654.6494750976562, "learning_rate": 9.357610415271913e-06, "loss": 26.2736, "step": 123410 }, { "epoch": 0.24931620858365283, "grad_norm": 230.8025360107422, "learning_rate": 9.357439238028982e-06, "loss": 24.442, "step": 123420 }, { "epoch": 0.24933640921633665, "grad_norm": 689.7447509765625, "learning_rate": 9.357268039548422e-06, "loss": 25.5089, "step": 123430 }, { "epoch": 0.24935660984902047, "grad_norm": 71.48304748535156, "learning_rate": 9.357096819831065e-06, "loss": 14.6578, "step": 123440 }, { "epoch": 0.2493768104817043, "grad_norm": 1084.497802734375, "learning_rate": 9.356925578877748e-06, "loss": 25.6389, "step": 123450 }, { "epoch": 0.2493970111143881, "grad_norm": 68.59516143798828, "learning_rate": 9.3567543166893e-06, "loss": 28.2204, "step": 123460 }, { "epoch": 0.24941721174707193, "grad_norm": 457.4917297363281, "learning_rate": 9.356583033266565e-06, "loss": 27.1434, "step": 123470 }, { "epoch": 0.24943741237975572, "grad_norm": 414.35943603515625, "learning_rate": 9.356411728610368e-06, "loss": 22.7297, "step": 123480 }, { "epoch": 0.24945761301243954, "grad_norm": 540.7085571289062, "learning_rate": 9.356240402721552e-06, "loss": 25.5983, "step": 123490 }, { "epoch": 0.24947781364512336, "grad_norm": 24.585803985595703, "learning_rate": 9.356069055600949e-06, "loss": 20.0636, "step": 123500 }, { "epoch": 0.24949801427780718, "grad_norm": 492.99542236328125, "learning_rate": 9.35589768724939e-06, "loss": 25.6995, "step": 123510 }, { "epoch": 0.249518214910491, "grad_norm": 631.908447265625, "learning_rate": 9.355726297667717e-06, "loss": 34.1041, "step": 123520 }, { "epoch": 0.24953841554317482, "grad_norm": 354.6398620605469, "learning_rate": 9.355554886856762e-06, "loss": 20.5488, "step": 123530 }, { "epoch": 0.24955861617585862, "grad_norm": 272.0207214355469, "learning_rate": 9.355383454817362e-06, "loss": 19.0143, "step": 123540 }, { "epoch": 0.24957881680854244, "grad_norm": 254.9385528564453, "learning_rate": 9.355212001550349e-06, "loss": 32.9845, "step": 123550 }, { "epoch": 0.24959901744122626, "grad_norm": 309.18115234375, "learning_rate": 9.35504052705656e-06, "loss": 25.1024, "step": 123560 }, { "epoch": 0.24961921807391008, "grad_norm": 370.5217590332031, "learning_rate": 9.354869031336835e-06, "loss": 22.137, "step": 123570 }, { "epoch": 0.2496394187065939, "grad_norm": 237.80308532714844, "learning_rate": 9.354697514392005e-06, "loss": 37.1087, "step": 123580 }, { "epoch": 0.24965961933927772, "grad_norm": 332.1741027832031, "learning_rate": 9.354525976222907e-06, "loss": 23.1131, "step": 123590 }, { "epoch": 0.2496798199719615, "grad_norm": 216.78390502929688, "learning_rate": 9.354354416830377e-06, "loss": 32.0582, "step": 123600 }, { "epoch": 0.24970002060464533, "grad_norm": 353.84637451171875, "learning_rate": 9.354182836215252e-06, "loss": 20.9798, "step": 123610 }, { "epoch": 0.24972022123732915, "grad_norm": 361.56280517578125, "learning_rate": 9.35401123437837e-06, "loss": 28.2848, "step": 123620 }, { "epoch": 0.24974042187001297, "grad_norm": 722.0396118164062, "learning_rate": 9.353839611320563e-06, "loss": 35.3713, "step": 123630 }, { "epoch": 0.2497606225026968, "grad_norm": 759.531982421875, "learning_rate": 9.35366796704267e-06, "loss": 68.8292, "step": 123640 }, { "epoch": 0.2497808231353806, "grad_norm": 540.5938720703125, "learning_rate": 9.353496301545529e-06, "loss": 36.0932, "step": 123650 }, { "epoch": 0.24980102376806443, "grad_norm": 1160.757080078125, "learning_rate": 9.353324614829974e-06, "loss": 30.2268, "step": 123660 }, { "epoch": 0.24982122440074822, "grad_norm": 377.4170837402344, "learning_rate": 9.353152906896842e-06, "loss": 10.1157, "step": 123670 }, { "epoch": 0.24984142503343204, "grad_norm": 586.1427001953125, "learning_rate": 9.352981177746972e-06, "loss": 30.9146, "step": 123680 }, { "epoch": 0.24986162566611586, "grad_norm": 490.35498046875, "learning_rate": 9.3528094273812e-06, "loss": 16.9506, "step": 123690 }, { "epoch": 0.24988182629879968, "grad_norm": 178.823974609375, "learning_rate": 9.352637655800362e-06, "loss": 7.6823, "step": 123700 }, { "epoch": 0.2499020269314835, "grad_norm": 260.7527770996094, "learning_rate": 9.352465863005295e-06, "loss": 27.2347, "step": 123710 }, { "epoch": 0.24992222756416732, "grad_norm": 626.5932006835938, "learning_rate": 9.35229404899684e-06, "loss": 21.2371, "step": 123720 }, { "epoch": 0.2499424281968511, "grad_norm": 637.3138427734375, "learning_rate": 9.352122213775829e-06, "loss": 27.2617, "step": 123730 }, { "epoch": 0.24996262882953493, "grad_norm": 320.4120178222656, "learning_rate": 9.351950357343103e-06, "loss": 6.5722, "step": 123740 }, { "epoch": 0.24998282946221875, "grad_norm": 1096.346435546875, "learning_rate": 9.351778479699499e-06, "loss": 36.8681, "step": 123750 }, { "epoch": 0.25000303009490255, "grad_norm": 263.7565002441406, "learning_rate": 9.351606580845854e-06, "loss": 15.5306, "step": 123760 }, { "epoch": 0.25002323072758637, "grad_norm": 611.5940551757812, "learning_rate": 9.351434660783007e-06, "loss": 17.9882, "step": 123770 }, { "epoch": 0.2500434313602702, "grad_norm": 200.8699493408203, "learning_rate": 9.351262719511796e-06, "loss": 11.1526, "step": 123780 }, { "epoch": 0.250063631992954, "grad_norm": 279.4588928222656, "learning_rate": 9.351090757033056e-06, "loss": 21.3229, "step": 123790 }, { "epoch": 0.2500838326256378, "grad_norm": 146.48106384277344, "learning_rate": 9.35091877334763e-06, "loss": 30.026, "step": 123800 }, { "epoch": 0.25010403325832165, "grad_norm": 462.09674072265625, "learning_rate": 9.350746768456351e-06, "loss": 17.7745, "step": 123810 }, { "epoch": 0.25012423389100547, "grad_norm": 535.4681396484375, "learning_rate": 9.350574742360062e-06, "loss": 21.889, "step": 123820 }, { "epoch": 0.2501444345236893, "grad_norm": 523.9403686523438, "learning_rate": 9.350402695059597e-06, "loss": 28.2628, "step": 123830 }, { "epoch": 0.2501646351563731, "grad_norm": 716.45166015625, "learning_rate": 9.3502306265558e-06, "loss": 36.8315, "step": 123840 }, { "epoch": 0.2501848357890569, "grad_norm": 176.59716796875, "learning_rate": 9.350058536849505e-06, "loss": 14.8663, "step": 123850 }, { "epoch": 0.25020503642174075, "grad_norm": 352.4153137207031, "learning_rate": 9.349886425941553e-06, "loss": 16.6689, "step": 123860 }, { "epoch": 0.25022523705442457, "grad_norm": 814.4611206054688, "learning_rate": 9.34971429383278e-06, "loss": 49.609, "step": 123870 }, { "epoch": 0.2502454376871084, "grad_norm": 696.630859375, "learning_rate": 9.349542140524029e-06, "loss": 31.2652, "step": 123880 }, { "epoch": 0.25026563831979215, "grad_norm": 253.35569763183594, "learning_rate": 9.349369966016135e-06, "loss": 19.0699, "step": 123890 }, { "epoch": 0.25028583895247597, "grad_norm": 776.6314086914062, "learning_rate": 9.349197770309942e-06, "loss": 22.1644, "step": 123900 }, { "epoch": 0.2503060395851598, "grad_norm": 338.4505310058594, "learning_rate": 9.349025553406286e-06, "loss": 18.2984, "step": 123910 }, { "epoch": 0.2503262402178436, "grad_norm": 505.5835876464844, "learning_rate": 9.348853315306006e-06, "loss": 16.0944, "step": 123920 }, { "epoch": 0.25034644085052743, "grad_norm": 213.57562255859375, "learning_rate": 9.348681056009942e-06, "loss": 17.9506, "step": 123930 }, { "epoch": 0.25036664148321125, "grad_norm": 196.11744689941406, "learning_rate": 9.348508775518935e-06, "loss": 13.4443, "step": 123940 }, { "epoch": 0.25038684211589507, "grad_norm": 486.9748229980469, "learning_rate": 9.348336473833824e-06, "loss": 21.9713, "step": 123950 }, { "epoch": 0.2504070427485789, "grad_norm": 202.58990478515625, "learning_rate": 9.348164150955448e-06, "loss": 18.8582, "step": 123960 }, { "epoch": 0.2504272433812627, "grad_norm": 446.57952880859375, "learning_rate": 9.347991806884646e-06, "loss": 19.8669, "step": 123970 }, { "epoch": 0.25044744401394653, "grad_norm": 685.4554443359375, "learning_rate": 9.347819441622261e-06, "loss": 8.7122, "step": 123980 }, { "epoch": 0.25046764464663035, "grad_norm": 937.5147094726562, "learning_rate": 9.347647055169132e-06, "loss": 33.5275, "step": 123990 }, { "epoch": 0.25048784527931417, "grad_norm": 887.2894897460938, "learning_rate": 9.347474647526095e-06, "loss": 24.8595, "step": 124000 }, { "epoch": 0.250508045911998, "grad_norm": 0.0, "learning_rate": 9.347302218693997e-06, "loss": 18.8776, "step": 124010 }, { "epoch": 0.25052824654468175, "grad_norm": 3751.490478515625, "learning_rate": 9.347129768673675e-06, "loss": 37.2533, "step": 124020 }, { "epoch": 0.2505484471773656, "grad_norm": 355.5738525390625, "learning_rate": 9.346957297465968e-06, "loss": 15.4733, "step": 124030 }, { "epoch": 0.2505686478100494, "grad_norm": 49.08369445800781, "learning_rate": 9.34678480507172e-06, "loss": 20.9675, "step": 124040 }, { "epoch": 0.2505888484427332, "grad_norm": 257.855224609375, "learning_rate": 9.34661229149177e-06, "loss": 11.4117, "step": 124050 }, { "epoch": 0.25060904907541703, "grad_norm": 481.6152038574219, "learning_rate": 9.346439756726959e-06, "loss": 29.9393, "step": 124060 }, { "epoch": 0.25062924970810085, "grad_norm": 562.333984375, "learning_rate": 9.346267200778127e-06, "loss": 25.6238, "step": 124070 }, { "epoch": 0.2506494503407847, "grad_norm": 262.5706787109375, "learning_rate": 9.346094623646116e-06, "loss": 36.1854, "step": 124080 }, { "epoch": 0.2506696509734685, "grad_norm": 798.1765747070312, "learning_rate": 9.345922025331765e-06, "loss": 18.6843, "step": 124090 }, { "epoch": 0.2506898516061523, "grad_norm": 656.6163940429688, "learning_rate": 9.34574940583592e-06, "loss": 50.7797, "step": 124100 }, { "epoch": 0.25071005223883613, "grad_norm": 737.9656372070312, "learning_rate": 9.345576765159419e-06, "loss": 14.9178, "step": 124110 }, { "epoch": 0.25073025287151995, "grad_norm": 159.78631591796875, "learning_rate": 9.345404103303104e-06, "loss": 20.8397, "step": 124120 }, { "epoch": 0.2507504535042038, "grad_norm": 250.57003784179688, "learning_rate": 9.345231420267816e-06, "loss": 30.9404, "step": 124130 }, { "epoch": 0.2507706541368876, "grad_norm": 323.52801513671875, "learning_rate": 9.345058716054396e-06, "loss": 29.3399, "step": 124140 }, { "epoch": 0.25079085476957136, "grad_norm": 485.9049377441406, "learning_rate": 9.344885990663689e-06, "loss": 18.454, "step": 124150 }, { "epoch": 0.2508110554022552, "grad_norm": 185.63893127441406, "learning_rate": 9.344713244096533e-06, "loss": 28.1718, "step": 124160 }, { "epoch": 0.250831256034939, "grad_norm": 491.8743591308594, "learning_rate": 9.344540476353772e-06, "loss": 27.9859, "step": 124170 }, { "epoch": 0.2508514566676228, "grad_norm": 513.3983764648438, "learning_rate": 9.344367687436246e-06, "loss": 17.946, "step": 124180 }, { "epoch": 0.25087165730030664, "grad_norm": 173.89114379882812, "learning_rate": 9.344194877344802e-06, "loss": 16.4617, "step": 124190 }, { "epoch": 0.25089185793299046, "grad_norm": 221.052001953125, "learning_rate": 9.344022046080277e-06, "loss": 12.404, "step": 124200 }, { "epoch": 0.2509120585656743, "grad_norm": 184.27113342285156, "learning_rate": 9.343849193643517e-06, "loss": 25.8977, "step": 124210 }, { "epoch": 0.2509322591983581, "grad_norm": 214.9903564453125, "learning_rate": 9.343676320035362e-06, "loss": 15.2502, "step": 124220 }, { "epoch": 0.2509524598310419, "grad_norm": 739.7628784179688, "learning_rate": 9.343503425256655e-06, "loss": 29.346, "step": 124230 }, { "epoch": 0.25097266046372574, "grad_norm": 279.92120361328125, "learning_rate": 9.343330509308239e-06, "loss": 18.9458, "step": 124240 }, { "epoch": 0.25099286109640956, "grad_norm": 189.4911346435547, "learning_rate": 9.343157572190957e-06, "loss": 20.5485, "step": 124250 }, { "epoch": 0.2510130617290934, "grad_norm": 346.7785339355469, "learning_rate": 9.342984613905653e-06, "loss": 23.0963, "step": 124260 }, { "epoch": 0.2510332623617772, "grad_norm": 107.39372253417969, "learning_rate": 9.342811634453168e-06, "loss": 10.2146, "step": 124270 }, { "epoch": 0.25105346299446096, "grad_norm": 411.5723571777344, "learning_rate": 9.342638633834344e-06, "loss": 17.3026, "step": 124280 }, { "epoch": 0.2510736636271448, "grad_norm": 599.7190551757812, "learning_rate": 9.342465612050028e-06, "loss": 21.0134, "step": 124290 }, { "epoch": 0.2510938642598286, "grad_norm": 708.0547485351562, "learning_rate": 9.342292569101061e-06, "loss": 30.2688, "step": 124300 }, { "epoch": 0.2511140648925124, "grad_norm": 707.1171264648438, "learning_rate": 9.342119504988287e-06, "loss": 29.5308, "step": 124310 }, { "epoch": 0.25113426552519624, "grad_norm": 379.90093994140625, "learning_rate": 9.341946419712549e-06, "loss": 23.1947, "step": 124320 }, { "epoch": 0.25115446615788006, "grad_norm": 552.807861328125, "learning_rate": 9.341773313274689e-06, "loss": 12.9583, "step": 124330 }, { "epoch": 0.2511746667905639, "grad_norm": 649.8052368164062, "learning_rate": 9.341600185675555e-06, "loss": 31.7203, "step": 124340 }, { "epoch": 0.2511948674232477, "grad_norm": 326.8381042480469, "learning_rate": 9.341427036915987e-06, "loss": 29.4605, "step": 124350 }, { "epoch": 0.2512150680559315, "grad_norm": 362.11376953125, "learning_rate": 9.34125386699683e-06, "loss": 21.7367, "step": 124360 }, { "epoch": 0.25123526868861534, "grad_norm": 742.8120727539062, "learning_rate": 9.341080675918927e-06, "loss": 14.9553, "step": 124370 }, { "epoch": 0.25125546932129916, "grad_norm": 259.9765930175781, "learning_rate": 9.340907463683126e-06, "loss": 33.5456, "step": 124380 }, { "epoch": 0.251275669953983, "grad_norm": 598.995849609375, "learning_rate": 9.340734230290267e-06, "loss": 28.3483, "step": 124390 }, { "epoch": 0.25129587058666675, "grad_norm": 745.3855590820312, "learning_rate": 9.340560975741198e-06, "loss": 24.1055, "step": 124400 }, { "epoch": 0.25131607121935057, "grad_norm": 18.715286254882812, "learning_rate": 9.340387700036758e-06, "loss": 20.7292, "step": 124410 }, { "epoch": 0.2513362718520344, "grad_norm": 491.3661804199219, "learning_rate": 9.340214403177797e-06, "loss": 20.2669, "step": 124420 }, { "epoch": 0.2513564724847182, "grad_norm": 192.1144256591797, "learning_rate": 9.340041085165157e-06, "loss": 20.6693, "step": 124430 }, { "epoch": 0.251376673117402, "grad_norm": 286.21240234375, "learning_rate": 9.339867745999682e-06, "loss": 18.1965, "step": 124440 }, { "epoch": 0.25139687375008585, "grad_norm": 494.5196228027344, "learning_rate": 9.339694385682219e-06, "loss": 12.9852, "step": 124450 }, { "epoch": 0.25141707438276967, "grad_norm": 502.58599853515625, "learning_rate": 9.339521004213611e-06, "loss": 18.5937, "step": 124460 }, { "epoch": 0.2514372750154535, "grad_norm": 535.0, "learning_rate": 9.339347601594704e-06, "loss": 25.7947, "step": 124470 }, { "epoch": 0.2514574756481373, "grad_norm": 841.1640014648438, "learning_rate": 9.339174177826345e-06, "loss": 15.6669, "step": 124480 }, { "epoch": 0.2514776762808211, "grad_norm": 568.3220825195312, "learning_rate": 9.339000732909376e-06, "loss": 17.6957, "step": 124490 }, { "epoch": 0.25149787691350495, "grad_norm": 348.40625, "learning_rate": 9.338827266844643e-06, "loss": 22.942, "step": 124500 }, { "epoch": 0.25151807754618877, "grad_norm": 431.1079406738281, "learning_rate": 9.338653779632993e-06, "loss": 18.2831, "step": 124510 }, { "epoch": 0.2515382781788726, "grad_norm": 1128.4862060546875, "learning_rate": 9.33848027127527e-06, "loss": 31.4034, "step": 124520 }, { "epoch": 0.25155847881155635, "grad_norm": 49.614112854003906, "learning_rate": 9.33830674177232e-06, "loss": 14.1368, "step": 124530 }, { "epoch": 0.25157867944424017, "grad_norm": 329.799560546875, "learning_rate": 9.338133191124992e-06, "loss": 52.1327, "step": 124540 }, { "epoch": 0.251598880076924, "grad_norm": 521.410888671875, "learning_rate": 9.337959619334125e-06, "loss": 25.7208, "step": 124550 }, { "epoch": 0.2516190807096078, "grad_norm": 449.039794921875, "learning_rate": 9.337786026400573e-06, "loss": 20.2129, "step": 124560 }, { "epoch": 0.25163928134229163, "grad_norm": 72.53112030029297, "learning_rate": 9.337612412325174e-06, "loss": 28.8025, "step": 124570 }, { "epoch": 0.25165948197497545, "grad_norm": 627.7360229492188, "learning_rate": 9.33743877710878e-06, "loss": 21.4003, "step": 124580 }, { "epoch": 0.25167968260765927, "grad_norm": 595.9915771484375, "learning_rate": 9.337265120752236e-06, "loss": 25.925, "step": 124590 }, { "epoch": 0.2516998832403431, "grad_norm": 402.636474609375, "learning_rate": 9.337091443256388e-06, "loss": 13.7396, "step": 124600 }, { "epoch": 0.2517200838730269, "grad_norm": 628.917724609375, "learning_rate": 9.336917744622083e-06, "loss": 26.3352, "step": 124610 }, { "epoch": 0.25174028450571073, "grad_norm": 202.30224609375, "learning_rate": 9.336744024850165e-06, "loss": 28.0125, "step": 124620 }, { "epoch": 0.25176048513839455, "grad_norm": 97.6500473022461, "learning_rate": 9.336570283941483e-06, "loss": 19.9929, "step": 124630 }, { "epoch": 0.25178068577107837, "grad_norm": 143.10940551757812, "learning_rate": 9.336396521896884e-06, "loss": 19.2747, "step": 124640 }, { "epoch": 0.2518008864037622, "grad_norm": 663.7200317382812, "learning_rate": 9.336222738717214e-06, "loss": 22.4514, "step": 124650 }, { "epoch": 0.25182108703644596, "grad_norm": 553.7361450195312, "learning_rate": 9.33604893440332e-06, "loss": 23.7986, "step": 124660 }, { "epoch": 0.2518412876691298, "grad_norm": 317.4814758300781, "learning_rate": 9.33587510895605e-06, "loss": 30.1541, "step": 124670 }, { "epoch": 0.2518614883018136, "grad_norm": 479.4104919433594, "learning_rate": 9.335701262376249e-06, "loss": 38.8416, "step": 124680 }, { "epoch": 0.2518816889344974, "grad_norm": 298.1420593261719, "learning_rate": 9.335527394664767e-06, "loss": 29.4055, "step": 124690 }, { "epoch": 0.25190188956718124, "grad_norm": 212.33372497558594, "learning_rate": 9.33535350582245e-06, "loss": 12.1949, "step": 124700 }, { "epoch": 0.25192209019986506, "grad_norm": 507.652099609375, "learning_rate": 9.335179595850147e-06, "loss": 34.6706, "step": 124710 }, { "epoch": 0.2519422908325489, "grad_norm": 91.93364715576172, "learning_rate": 9.335005664748704e-06, "loss": 15.2404, "step": 124720 }, { "epoch": 0.2519624914652327, "grad_norm": 547.1591186523438, "learning_rate": 9.33483171251897e-06, "loss": 21.646, "step": 124730 }, { "epoch": 0.2519826920979165, "grad_norm": 287.1502380371094, "learning_rate": 9.33465773916179e-06, "loss": 15.2263, "step": 124740 }, { "epoch": 0.25200289273060034, "grad_norm": 267.3254699707031, "learning_rate": 9.334483744678015e-06, "loss": 14.3156, "step": 124750 }, { "epoch": 0.25202309336328416, "grad_norm": 462.9267883300781, "learning_rate": 9.33430972906849e-06, "loss": 22.0699, "step": 124760 }, { "epoch": 0.252043293995968, "grad_norm": 1060.0771484375, "learning_rate": 9.334135692334067e-06, "loss": 29.6059, "step": 124770 }, { "epoch": 0.2520634946286518, "grad_norm": 1192.935791015625, "learning_rate": 9.333961634475593e-06, "loss": 31.8512, "step": 124780 }, { "epoch": 0.25208369526133556, "grad_norm": 671.791748046875, "learning_rate": 9.333787555493915e-06, "loss": 20.1712, "step": 124790 }, { "epoch": 0.2521038958940194, "grad_norm": 606.4688720703125, "learning_rate": 9.333613455389883e-06, "loss": 14.9991, "step": 124800 }, { "epoch": 0.2521240965267032, "grad_norm": 583.3821411132812, "learning_rate": 9.333439334164343e-06, "loss": 18.1324, "step": 124810 }, { "epoch": 0.252144297159387, "grad_norm": 616.3452758789062, "learning_rate": 9.333265191818145e-06, "loss": 25.5548, "step": 124820 }, { "epoch": 0.25216449779207084, "grad_norm": 96.70596313476562, "learning_rate": 9.333091028352138e-06, "loss": 53.8161, "step": 124830 }, { "epoch": 0.25218469842475466, "grad_norm": 740.4271240234375, "learning_rate": 9.332916843767173e-06, "loss": 23.7683, "step": 124840 }, { "epoch": 0.2522048990574385, "grad_norm": 479.810302734375, "learning_rate": 9.332742638064094e-06, "loss": 27.6112, "step": 124850 }, { "epoch": 0.2522250996901223, "grad_norm": 184.2608184814453, "learning_rate": 9.332568411243754e-06, "loss": 19.7329, "step": 124860 }, { "epoch": 0.2522453003228061, "grad_norm": 362.44598388671875, "learning_rate": 9.332394163307003e-06, "loss": 27.9859, "step": 124870 }, { "epoch": 0.25226550095548994, "grad_norm": 198.32406616210938, "learning_rate": 9.332219894254686e-06, "loss": 19.4701, "step": 124880 }, { "epoch": 0.25228570158817376, "grad_norm": 43.71586227416992, "learning_rate": 9.332045604087656e-06, "loss": 21.2054, "step": 124890 }, { "epoch": 0.2523059022208576, "grad_norm": 165.63328552246094, "learning_rate": 9.33187129280676e-06, "loss": 11.1213, "step": 124900 }, { "epoch": 0.25232610285354135, "grad_norm": 0.5600117444992065, "learning_rate": 9.331696960412849e-06, "loss": 23.7717, "step": 124910 }, { "epoch": 0.25234630348622517, "grad_norm": 377.10498046875, "learning_rate": 9.331522606906773e-06, "loss": 16.4109, "step": 124920 }, { "epoch": 0.252366504118909, "grad_norm": 45.88465118408203, "learning_rate": 9.331348232289382e-06, "loss": 15.0914, "step": 124930 }, { "epoch": 0.2523867047515928, "grad_norm": 474.8992919921875, "learning_rate": 9.331173836561522e-06, "loss": 51.2465, "step": 124940 }, { "epoch": 0.2524069053842766, "grad_norm": 520.2603759765625, "learning_rate": 9.330999419724048e-06, "loss": 30.8441, "step": 124950 }, { "epoch": 0.25242710601696045, "grad_norm": 631.8528442382812, "learning_rate": 9.330824981777808e-06, "loss": 20.9879, "step": 124960 }, { "epoch": 0.25244730664964427, "grad_norm": 455.13140869140625, "learning_rate": 9.330650522723653e-06, "loss": 29.3983, "step": 124970 }, { "epoch": 0.2524675072823281, "grad_norm": 847.7002563476562, "learning_rate": 9.33047604256243e-06, "loss": 30.7107, "step": 124980 }, { "epoch": 0.2524877079150119, "grad_norm": 339.4563903808594, "learning_rate": 9.330301541294994e-06, "loss": 33.6187, "step": 124990 }, { "epoch": 0.2525079085476957, "grad_norm": 318.11883544921875, "learning_rate": 9.330127018922195e-06, "loss": 17.0702, "step": 125000 }, { "epoch": 0.25252810918037955, "grad_norm": 336.95672607421875, "learning_rate": 9.32995247544488e-06, "loss": 23.9262, "step": 125010 }, { "epoch": 0.25254830981306337, "grad_norm": 269.0525817871094, "learning_rate": 9.329777910863902e-06, "loss": 13.5515, "step": 125020 }, { "epoch": 0.2525685104457472, "grad_norm": 295.6583251953125, "learning_rate": 9.32960332518011e-06, "loss": 23.0723, "step": 125030 }, { "epoch": 0.25258871107843095, "grad_norm": 90.92164611816406, "learning_rate": 9.32942871839436e-06, "loss": 15.5435, "step": 125040 }, { "epoch": 0.25260891171111477, "grad_norm": 1134.6337890625, "learning_rate": 9.329254090507498e-06, "loss": 18.1013, "step": 125050 }, { "epoch": 0.2526291123437986, "grad_norm": 399.2601318359375, "learning_rate": 9.329079441520377e-06, "loss": 28.3305, "step": 125060 }, { "epoch": 0.2526493129764824, "grad_norm": 131.95816040039062, "learning_rate": 9.328904771433846e-06, "loss": 51.0927, "step": 125070 }, { "epoch": 0.25266951360916623, "grad_norm": 6.422798156738281, "learning_rate": 9.32873008024876e-06, "loss": 15.6498, "step": 125080 }, { "epoch": 0.25268971424185005, "grad_norm": 252.98773193359375, "learning_rate": 9.328555367965969e-06, "loss": 16.4012, "step": 125090 }, { "epoch": 0.25270991487453387, "grad_norm": 435.1174011230469, "learning_rate": 9.328380634586322e-06, "loss": 45.6191, "step": 125100 }, { "epoch": 0.2527301155072177, "grad_norm": 405.8626708984375, "learning_rate": 9.328205880110675e-06, "loss": 27.0458, "step": 125110 }, { "epoch": 0.2527503161399015, "grad_norm": 456.6902160644531, "learning_rate": 9.328031104539876e-06, "loss": 40.6759, "step": 125120 }, { "epoch": 0.25277051677258533, "grad_norm": 5385.3447265625, "learning_rate": 9.32785630787478e-06, "loss": 39.5671, "step": 125130 }, { "epoch": 0.25279071740526915, "grad_norm": 1896.0120849609375, "learning_rate": 9.327681490116233e-06, "loss": 34.2355, "step": 125140 }, { "epoch": 0.25281091803795297, "grad_norm": 389.5846252441406, "learning_rate": 9.327506651265096e-06, "loss": 18.4686, "step": 125150 }, { "epoch": 0.2528311186706368, "grad_norm": 295.4189453125, "learning_rate": 9.327331791322214e-06, "loss": 32.8881, "step": 125160 }, { "epoch": 0.25285131930332055, "grad_norm": 358.54852294921875, "learning_rate": 9.327156910288444e-06, "loss": 49.5071, "step": 125170 }, { "epoch": 0.2528715199360044, "grad_norm": 456.89776611328125, "learning_rate": 9.326982008164633e-06, "loss": 11.1545, "step": 125180 }, { "epoch": 0.2528917205686882, "grad_norm": 459.1197204589844, "learning_rate": 9.326807084951639e-06, "loss": 26.3985, "step": 125190 }, { "epoch": 0.252911921201372, "grad_norm": 272.07696533203125, "learning_rate": 9.326632140650311e-06, "loss": 15.2639, "step": 125200 }, { "epoch": 0.25293212183405583, "grad_norm": 327.7573547363281, "learning_rate": 9.326457175261503e-06, "loss": 43.4969, "step": 125210 }, { "epoch": 0.25295232246673965, "grad_norm": 609.4105224609375, "learning_rate": 9.326282188786066e-06, "loss": 28.7113, "step": 125220 }, { "epoch": 0.2529725230994235, "grad_norm": 177.5101318359375, "learning_rate": 9.326107181224857e-06, "loss": 33.3363, "step": 125230 }, { "epoch": 0.2529927237321073, "grad_norm": 382.63250732421875, "learning_rate": 9.325932152578726e-06, "loss": 33.746, "step": 125240 }, { "epoch": 0.2530129243647911, "grad_norm": 206.65513610839844, "learning_rate": 9.325757102848523e-06, "loss": 25.7514, "step": 125250 }, { "epoch": 0.25303312499747493, "grad_norm": 332.5850524902344, "learning_rate": 9.325582032035108e-06, "loss": 19.6042, "step": 125260 }, { "epoch": 0.25305332563015875, "grad_norm": 314.7016296386719, "learning_rate": 9.325406940139327e-06, "loss": 20.9479, "step": 125270 }, { "epoch": 0.2530735262628426, "grad_norm": 500.2131652832031, "learning_rate": 9.32523182716204e-06, "loss": 20.6874, "step": 125280 }, { "epoch": 0.2530937268955264, "grad_norm": 303.85430908203125, "learning_rate": 9.325056693104099e-06, "loss": 24.6704, "step": 125290 }, { "epoch": 0.25311392752821016, "grad_norm": 716.2509155273438, "learning_rate": 9.324881537966355e-06, "loss": 20.6399, "step": 125300 }, { "epoch": 0.253134128160894, "grad_norm": 337.0372009277344, "learning_rate": 9.324706361749663e-06, "loss": 31.2655, "step": 125310 }, { "epoch": 0.2531543287935778, "grad_norm": 490.2641296386719, "learning_rate": 9.324531164454876e-06, "loss": 41.4906, "step": 125320 }, { "epoch": 0.2531745294262616, "grad_norm": 355.2698059082031, "learning_rate": 9.324355946082848e-06, "loss": 26.2124, "step": 125330 }, { "epoch": 0.25319473005894544, "grad_norm": 927.72900390625, "learning_rate": 9.324180706634434e-06, "loss": 31.8564, "step": 125340 }, { "epoch": 0.25321493069162926, "grad_norm": 335.15618896484375, "learning_rate": 9.32400544611049e-06, "loss": 34.1606, "step": 125350 }, { "epoch": 0.2532351313243131, "grad_norm": 701.79248046875, "learning_rate": 9.323830164511865e-06, "loss": 31.1435, "step": 125360 }, { "epoch": 0.2532553319569969, "grad_norm": 432.52545166015625, "learning_rate": 9.323654861839418e-06, "loss": 14.9118, "step": 125370 }, { "epoch": 0.2532755325896807, "grad_norm": 340.4264221191406, "learning_rate": 9.323479538094001e-06, "loss": 23.3531, "step": 125380 }, { "epoch": 0.25329573322236454, "grad_norm": 368.6153564453125, "learning_rate": 9.323304193276468e-06, "loss": 13.5629, "step": 125390 }, { "epoch": 0.25331593385504836, "grad_norm": 346.3034362792969, "learning_rate": 9.323128827387675e-06, "loss": 28.6539, "step": 125400 }, { "epoch": 0.2533361344877322, "grad_norm": 82.48759460449219, "learning_rate": 9.322953440428478e-06, "loss": 24.1632, "step": 125410 }, { "epoch": 0.253356335120416, "grad_norm": 127.63971710205078, "learning_rate": 9.322778032399728e-06, "loss": 10.0801, "step": 125420 }, { "epoch": 0.25337653575309976, "grad_norm": 126.68302917480469, "learning_rate": 9.322602603302285e-06, "loss": 15.9136, "step": 125430 }, { "epoch": 0.2533967363857836, "grad_norm": 214.1927032470703, "learning_rate": 9.322427153136999e-06, "loss": 12.6137, "step": 125440 }, { "epoch": 0.2534169370184674, "grad_norm": 501.4695739746094, "learning_rate": 9.322251681904728e-06, "loss": 17.3749, "step": 125450 }, { "epoch": 0.2534371376511512, "grad_norm": 473.4718933105469, "learning_rate": 9.322076189606326e-06, "loss": 23.6231, "step": 125460 }, { "epoch": 0.25345733828383504, "grad_norm": 274.3175048828125, "learning_rate": 9.32190067624265e-06, "loss": 24.7301, "step": 125470 }, { "epoch": 0.25347753891651886, "grad_norm": 624.3567504882812, "learning_rate": 9.321725141814553e-06, "loss": 34.4827, "step": 125480 }, { "epoch": 0.2534977395492027, "grad_norm": 136.22207641601562, "learning_rate": 9.321549586322894e-06, "loss": 30.056, "step": 125490 }, { "epoch": 0.2535179401818865, "grad_norm": 246.750732421875, "learning_rate": 9.321374009768525e-06, "loss": 9.8188, "step": 125500 }, { "epoch": 0.2535381408145703, "grad_norm": 372.09375, "learning_rate": 9.321198412152303e-06, "loss": 15.8852, "step": 125510 }, { "epoch": 0.25355834144725414, "grad_norm": 243.27505493164062, "learning_rate": 9.321022793475082e-06, "loss": 20.912, "step": 125520 }, { "epoch": 0.25357854207993796, "grad_norm": 719.737060546875, "learning_rate": 9.320847153737724e-06, "loss": 21.6542, "step": 125530 }, { "epoch": 0.2535987427126218, "grad_norm": 2670.60546875, "learning_rate": 9.320671492941079e-06, "loss": 86.2205, "step": 125540 }, { "epoch": 0.25361894334530555, "grad_norm": 165.8459014892578, "learning_rate": 9.320495811086005e-06, "loss": 29.4207, "step": 125550 }, { "epoch": 0.25363914397798937, "grad_norm": 102.05403900146484, "learning_rate": 9.320320108173359e-06, "loss": 11.2042, "step": 125560 }, { "epoch": 0.2536593446106732, "grad_norm": 253.1824951171875, "learning_rate": 9.320144384203997e-06, "loss": 15.9787, "step": 125570 }, { "epoch": 0.253679545243357, "grad_norm": 28.418079376220703, "learning_rate": 9.319968639178775e-06, "loss": 22.1027, "step": 125580 }, { "epoch": 0.2536997458760408, "grad_norm": 580.892333984375, "learning_rate": 9.31979287309855e-06, "loss": 37.5344, "step": 125590 }, { "epoch": 0.25371994650872465, "grad_norm": 705.298095703125, "learning_rate": 9.319617085964177e-06, "loss": 30.1274, "step": 125600 }, { "epoch": 0.25374014714140847, "grad_norm": 263.25457763671875, "learning_rate": 9.319441277776515e-06, "loss": 18.3837, "step": 125610 }, { "epoch": 0.2537603477740923, "grad_norm": 440.63330078125, "learning_rate": 9.31926544853642e-06, "loss": 21.0317, "step": 125620 }, { "epoch": 0.2537805484067761, "grad_norm": 188.52142333984375, "learning_rate": 9.319089598244751e-06, "loss": 21.7909, "step": 125630 }, { "epoch": 0.2538007490394599, "grad_norm": 318.95556640625, "learning_rate": 9.318913726902361e-06, "loss": 10.416, "step": 125640 }, { "epoch": 0.25382094967214375, "grad_norm": 202.9970245361328, "learning_rate": 9.31873783451011e-06, "loss": 26.4527, "step": 125650 }, { "epoch": 0.25384115030482757, "grad_norm": 312.7405090332031, "learning_rate": 9.318561921068856e-06, "loss": 23.1204, "step": 125660 }, { "epoch": 0.2538613509375114, "grad_norm": 516.2415771484375, "learning_rate": 9.318385986579453e-06, "loss": 32.1379, "step": 125670 }, { "epoch": 0.25388155157019515, "grad_norm": 94.18694305419922, "learning_rate": 9.31821003104276e-06, "loss": 13.0049, "step": 125680 }, { "epoch": 0.25390175220287897, "grad_norm": 390.0401306152344, "learning_rate": 9.318034054459637e-06, "loss": 22.9145, "step": 125690 }, { "epoch": 0.2539219528355628, "grad_norm": 178.14857482910156, "learning_rate": 9.317858056830938e-06, "loss": 26.2758, "step": 125700 }, { "epoch": 0.2539421534682466, "grad_norm": 623.6525268554688, "learning_rate": 9.317682038157523e-06, "loss": 26.7489, "step": 125710 }, { "epoch": 0.25396235410093043, "grad_norm": 785.6990966796875, "learning_rate": 9.31750599844025e-06, "loss": 42.065, "step": 125720 }, { "epoch": 0.25398255473361425, "grad_norm": 441.115966796875, "learning_rate": 9.317329937679976e-06, "loss": 15.2299, "step": 125730 }, { "epoch": 0.25400275536629807, "grad_norm": 411.2340087890625, "learning_rate": 9.31715385587756e-06, "loss": 20.9233, "step": 125740 }, { "epoch": 0.2540229559989819, "grad_norm": 0.0, "learning_rate": 9.316977753033858e-06, "loss": 26.4595, "step": 125750 }, { "epoch": 0.2540431566316657, "grad_norm": 417.7156677246094, "learning_rate": 9.316801629149732e-06, "loss": 21.4012, "step": 125760 }, { "epoch": 0.25406335726434953, "grad_norm": 297.7471008300781, "learning_rate": 9.316625484226039e-06, "loss": 22.0466, "step": 125770 }, { "epoch": 0.25408355789703335, "grad_norm": 514.2144165039062, "learning_rate": 9.316449318263635e-06, "loss": 12.2763, "step": 125780 }, { "epoch": 0.25410375852971717, "grad_norm": 429.86260986328125, "learning_rate": 9.316273131263382e-06, "loss": 33.9502, "step": 125790 }, { "epoch": 0.254123959162401, "grad_norm": 487.69451904296875, "learning_rate": 9.316096923226135e-06, "loss": 16.7534, "step": 125800 }, { "epoch": 0.25414415979508476, "grad_norm": 408.4544677734375, "learning_rate": 9.315920694152758e-06, "loss": 37.152, "step": 125810 }, { "epoch": 0.2541643604277686, "grad_norm": 348.5964050292969, "learning_rate": 9.315744444044105e-06, "loss": 27.8744, "step": 125820 }, { "epoch": 0.2541845610604524, "grad_norm": 360.5810546875, "learning_rate": 9.315568172901038e-06, "loss": 33.8791, "step": 125830 }, { "epoch": 0.2542047616931362, "grad_norm": 145.67672729492188, "learning_rate": 9.315391880724414e-06, "loss": 27.9125, "step": 125840 }, { "epoch": 0.25422496232582004, "grad_norm": 315.48248291015625, "learning_rate": 9.315215567515095e-06, "loss": 12.6001, "step": 125850 }, { "epoch": 0.25424516295850386, "grad_norm": 364.5586242675781, "learning_rate": 9.315039233273937e-06, "loss": 28.8424, "step": 125860 }, { "epoch": 0.2542653635911877, "grad_norm": 731.7398681640625, "learning_rate": 9.314862878001802e-06, "loss": 19.6772, "step": 125870 }, { "epoch": 0.2542855642238715, "grad_norm": 22.68682098388672, "learning_rate": 9.314686501699548e-06, "loss": 35.0526, "step": 125880 }, { "epoch": 0.2543057648565553, "grad_norm": 1074.5439453125, "learning_rate": 9.314510104368036e-06, "loss": 56.1964, "step": 125890 }, { "epoch": 0.25432596548923914, "grad_norm": 579.945556640625, "learning_rate": 9.314333686008125e-06, "loss": 8.7587, "step": 125900 }, { "epoch": 0.25434616612192296, "grad_norm": 301.3296203613281, "learning_rate": 9.314157246620677e-06, "loss": 28.6569, "step": 125910 }, { "epoch": 0.2543663667546068, "grad_norm": 433.71417236328125, "learning_rate": 9.313980786206547e-06, "loss": 14.9204, "step": 125920 }, { "epoch": 0.2543865673872906, "grad_norm": 531.1284790039062, "learning_rate": 9.313804304766598e-06, "loss": 24.0472, "step": 125930 }, { "epoch": 0.25440676801997436, "grad_norm": 549.3108520507812, "learning_rate": 9.31362780230169e-06, "loss": 25.5688, "step": 125940 }, { "epoch": 0.2544269686526582, "grad_norm": 15.446022987365723, "learning_rate": 9.313451278812684e-06, "loss": 21.3832, "step": 125950 }, { "epoch": 0.254447169285342, "grad_norm": 416.72601318359375, "learning_rate": 9.31327473430044e-06, "loss": 14.7539, "step": 125960 }, { "epoch": 0.2544673699180258, "grad_norm": 204.7281494140625, "learning_rate": 9.313098168765818e-06, "loss": 18.962, "step": 125970 }, { "epoch": 0.25448757055070964, "grad_norm": 1.3953946828842163, "learning_rate": 9.312921582209678e-06, "loss": 22.8346, "step": 125980 }, { "epoch": 0.25450777118339346, "grad_norm": 380.04534912109375, "learning_rate": 9.31274497463288e-06, "loss": 17.6816, "step": 125990 }, { "epoch": 0.2545279718160773, "grad_norm": 1047.90966796875, "learning_rate": 9.312568346036288e-06, "loss": 39.1876, "step": 126000 }, { "epoch": 0.2545481724487611, "grad_norm": 304.3274230957031, "learning_rate": 9.31239169642076e-06, "loss": 18.6807, "step": 126010 }, { "epoch": 0.2545683730814449, "grad_norm": 605.7481689453125, "learning_rate": 9.312215025787159e-06, "loss": 27.6769, "step": 126020 }, { "epoch": 0.25458857371412874, "grad_norm": 185.8279571533203, "learning_rate": 9.312038334136345e-06, "loss": 11.9562, "step": 126030 }, { "epoch": 0.25460877434681256, "grad_norm": 270.0262451171875, "learning_rate": 9.311861621469178e-06, "loss": 15.7294, "step": 126040 }, { "epoch": 0.2546289749794964, "grad_norm": 432.89654541015625, "learning_rate": 9.31168488778652e-06, "loss": 16.5122, "step": 126050 }, { "epoch": 0.2546491756121802, "grad_norm": 430.0558776855469, "learning_rate": 9.311508133089234e-06, "loss": 20.3825, "step": 126060 }, { "epoch": 0.25466937624486397, "grad_norm": 550.1329345703125, "learning_rate": 9.311331357378181e-06, "loss": 35.2528, "step": 126070 }, { "epoch": 0.2546895768775478, "grad_norm": 407.1488037109375, "learning_rate": 9.31115456065422e-06, "loss": 13.0109, "step": 126080 }, { "epoch": 0.2547097775102316, "grad_norm": 107.07001495361328, "learning_rate": 9.310977742918215e-06, "loss": 27.5119, "step": 126090 }, { "epoch": 0.2547299781429154, "grad_norm": 417.6630554199219, "learning_rate": 9.31080090417103e-06, "loss": 17.8397, "step": 126100 }, { "epoch": 0.25475017877559925, "grad_norm": 360.2039489746094, "learning_rate": 9.310624044413521e-06, "loss": 23.1929, "step": 126110 }, { "epoch": 0.25477037940828307, "grad_norm": 299.057373046875, "learning_rate": 9.310447163646554e-06, "loss": 25.1292, "step": 126120 }, { "epoch": 0.2547905800409669, "grad_norm": 311.0969543457031, "learning_rate": 9.31027026187099e-06, "loss": 17.6882, "step": 126130 }, { "epoch": 0.2548107806736507, "grad_norm": 32.40909194946289, "learning_rate": 9.31009333908769e-06, "loss": 18.7168, "step": 126140 }, { "epoch": 0.2548309813063345, "grad_norm": 319.6306457519531, "learning_rate": 9.309916395297523e-06, "loss": 26.3919, "step": 126150 }, { "epoch": 0.25485118193901835, "grad_norm": 417.1777038574219, "learning_rate": 9.309739430501341e-06, "loss": 21.3647, "step": 126160 }, { "epoch": 0.25487138257170217, "grad_norm": 487.8786926269531, "learning_rate": 9.309562444700016e-06, "loss": 17.336, "step": 126170 }, { "epoch": 0.254891583204386, "grad_norm": 91.7564926147461, "learning_rate": 9.309385437894402e-06, "loss": 11.7092, "step": 126180 }, { "epoch": 0.25491178383706975, "grad_norm": 182.48744201660156, "learning_rate": 9.309208410085368e-06, "loss": 26.1819, "step": 126190 }, { "epoch": 0.25493198446975357, "grad_norm": 602.0535888671875, "learning_rate": 9.309031361273775e-06, "loss": 24.8527, "step": 126200 }, { "epoch": 0.2549521851024374, "grad_norm": 328.72174072265625, "learning_rate": 9.308854291460487e-06, "loss": 16.4376, "step": 126210 }, { "epoch": 0.2549723857351212, "grad_norm": 898.3261108398438, "learning_rate": 9.308677200646364e-06, "loss": 26.2678, "step": 126220 }, { "epoch": 0.25499258636780503, "grad_norm": 438.5179443359375, "learning_rate": 9.308500088832271e-06, "loss": 34.204, "step": 126230 }, { "epoch": 0.25501278700048885, "grad_norm": 160.48248291015625, "learning_rate": 9.308322956019073e-06, "loss": 14.5189, "step": 126240 }, { "epoch": 0.25503298763317267, "grad_norm": 188.25396728515625, "learning_rate": 9.30814580220763e-06, "loss": 23.2258, "step": 126250 }, { "epoch": 0.2550531882658565, "grad_norm": 122.05780792236328, "learning_rate": 9.307968627398807e-06, "loss": 15.8943, "step": 126260 }, { "epoch": 0.2550733888985403, "grad_norm": 178.4373321533203, "learning_rate": 9.307791431593468e-06, "loss": 35.2254, "step": 126270 }, { "epoch": 0.25509358953122413, "grad_norm": 380.3116455078125, "learning_rate": 9.307614214792474e-06, "loss": 21.2028, "step": 126280 }, { "epoch": 0.25511379016390795, "grad_norm": 366.8847351074219, "learning_rate": 9.307436976996692e-06, "loss": 23.8709, "step": 126290 }, { "epoch": 0.25513399079659177, "grad_norm": 671.3023071289062, "learning_rate": 9.307259718206984e-06, "loss": 17.903, "step": 126300 }, { "epoch": 0.2551541914292756, "grad_norm": 427.435791015625, "learning_rate": 9.307082438424216e-06, "loss": 14.3205, "step": 126310 }, { "epoch": 0.25517439206195935, "grad_norm": 150.7323760986328, "learning_rate": 9.30690513764925e-06, "loss": 13.4545, "step": 126320 }, { "epoch": 0.2551945926946432, "grad_norm": 492.7773132324219, "learning_rate": 9.30672781588295e-06, "loss": 35.5066, "step": 126330 }, { "epoch": 0.255214793327327, "grad_norm": 252.93759155273438, "learning_rate": 9.306550473126182e-06, "loss": 16.4858, "step": 126340 }, { "epoch": 0.2552349939600108, "grad_norm": 233.88818359375, "learning_rate": 9.30637310937981e-06, "loss": 12.5319, "step": 126350 }, { "epoch": 0.25525519459269463, "grad_norm": 158.14404296875, "learning_rate": 9.306195724644695e-06, "loss": 17.5493, "step": 126360 }, { "epoch": 0.25527539522537845, "grad_norm": 16.396745681762695, "learning_rate": 9.306018318921707e-06, "loss": 28.2651, "step": 126370 }, { "epoch": 0.2552955958580623, "grad_norm": 534.8278198242188, "learning_rate": 9.305840892211705e-06, "loss": 41.8798, "step": 126380 }, { "epoch": 0.2553157964907461, "grad_norm": 567.7081909179688, "learning_rate": 9.30566344451556e-06, "loss": 32.1383, "step": 126390 }, { "epoch": 0.2553359971234299, "grad_norm": 482.4659118652344, "learning_rate": 9.305485975834132e-06, "loss": 24.9557, "step": 126400 }, { "epoch": 0.25535619775611373, "grad_norm": 480.99163818359375, "learning_rate": 9.305308486168288e-06, "loss": 24.6496, "step": 126410 }, { "epoch": 0.25537639838879755, "grad_norm": 14.627530097961426, "learning_rate": 9.305130975518893e-06, "loss": 12.2533, "step": 126420 }, { "epoch": 0.2553965990214814, "grad_norm": 611.604736328125, "learning_rate": 9.304953443886811e-06, "loss": 41.7902, "step": 126430 }, { "epoch": 0.2554167996541652, "grad_norm": 248.27984619140625, "learning_rate": 9.304775891272908e-06, "loss": 24.2118, "step": 126440 }, { "epoch": 0.25543700028684896, "grad_norm": 324.9423828125, "learning_rate": 9.30459831767805e-06, "loss": 24.758, "step": 126450 }, { "epoch": 0.2554572009195328, "grad_norm": 61.277671813964844, "learning_rate": 9.304420723103101e-06, "loss": 16.0402, "step": 126460 }, { "epoch": 0.2554774015522166, "grad_norm": 127.63201141357422, "learning_rate": 9.304243107548928e-06, "loss": 31.894, "step": 126470 }, { "epoch": 0.2554976021849004, "grad_norm": 521.2554321289062, "learning_rate": 9.304065471016396e-06, "loss": 23.6338, "step": 126480 }, { "epoch": 0.25551780281758424, "grad_norm": 381.10595703125, "learning_rate": 9.303887813506372e-06, "loss": 14.5358, "step": 126490 }, { "epoch": 0.25553800345026806, "grad_norm": 433.2705078125, "learning_rate": 9.30371013501972e-06, "loss": 21.2155, "step": 126500 }, { "epoch": 0.2555582040829519, "grad_norm": 291.43621826171875, "learning_rate": 9.303532435557305e-06, "loss": 20.5499, "step": 126510 }, { "epoch": 0.2555784047156357, "grad_norm": 344.5585632324219, "learning_rate": 9.303354715119997e-06, "loss": 28.2469, "step": 126520 }, { "epoch": 0.2555986053483195, "grad_norm": 677.4943237304688, "learning_rate": 9.30317697370866e-06, "loss": 29.1504, "step": 126530 }, { "epoch": 0.25561880598100334, "grad_norm": 710.51171875, "learning_rate": 9.302999211324159e-06, "loss": 22.6089, "step": 126540 }, { "epoch": 0.25563900661368716, "grad_norm": 515.8234252929688, "learning_rate": 9.302821427967363e-06, "loss": 46.5509, "step": 126550 }, { "epoch": 0.255659207246371, "grad_norm": 169.32244873046875, "learning_rate": 9.302643623639136e-06, "loss": 31.265, "step": 126560 }, { "epoch": 0.2556794078790548, "grad_norm": 19.139883041381836, "learning_rate": 9.302465798340347e-06, "loss": 23.1725, "step": 126570 }, { "epoch": 0.25569960851173856, "grad_norm": 740.4823608398438, "learning_rate": 9.302287952071862e-06, "loss": 37.6291, "step": 126580 }, { "epoch": 0.2557198091444224, "grad_norm": 231.51718139648438, "learning_rate": 9.302110084834545e-06, "loss": 25.9045, "step": 126590 }, { "epoch": 0.2557400097771062, "grad_norm": 248.5474853515625, "learning_rate": 9.301932196629267e-06, "loss": 29.5799, "step": 126600 }, { "epoch": 0.25576021040979, "grad_norm": 477.0473327636719, "learning_rate": 9.301754287456894e-06, "loss": 20.7088, "step": 126610 }, { "epoch": 0.25578041104247384, "grad_norm": 1489.47998046875, "learning_rate": 9.301576357318291e-06, "loss": 27.1703, "step": 126620 }, { "epoch": 0.25580061167515766, "grad_norm": 218.8579559326172, "learning_rate": 9.301398406214326e-06, "loss": 30.6336, "step": 126630 }, { "epoch": 0.2558208123078415, "grad_norm": 362.53033447265625, "learning_rate": 9.301220434145868e-06, "loss": 23.0781, "step": 126640 }, { "epoch": 0.2558410129405253, "grad_norm": 491.1239318847656, "learning_rate": 9.301042441113784e-06, "loss": 20.6586, "step": 126650 }, { "epoch": 0.2558612135732091, "grad_norm": 650.5254516601562, "learning_rate": 9.300864427118938e-06, "loss": 21.4127, "step": 126660 }, { "epoch": 0.25588141420589294, "grad_norm": 504.1291198730469, "learning_rate": 9.300686392162203e-06, "loss": 22.4969, "step": 126670 }, { "epoch": 0.25590161483857676, "grad_norm": 773.797607421875, "learning_rate": 9.300508336244443e-06, "loss": 27.0239, "step": 126680 }, { "epoch": 0.2559218154712606, "grad_norm": 350.9950866699219, "learning_rate": 9.300330259366528e-06, "loss": 9.2837, "step": 126690 }, { "epoch": 0.2559420161039444, "grad_norm": 721.0562744140625, "learning_rate": 9.300152161529325e-06, "loss": 21.5986, "step": 126700 }, { "epoch": 0.25596221673662817, "grad_norm": 304.818603515625, "learning_rate": 9.299974042733701e-06, "loss": 22.3794, "step": 126710 }, { "epoch": 0.255982417369312, "grad_norm": 330.103759765625, "learning_rate": 9.299795902980524e-06, "loss": 35.504, "step": 126720 }, { "epoch": 0.2560026180019958, "grad_norm": 726.6624145507812, "learning_rate": 9.299617742270665e-06, "loss": 13.4531, "step": 126730 }, { "epoch": 0.2560228186346796, "grad_norm": 817.6095581054688, "learning_rate": 9.29943956060499e-06, "loss": 24.4785, "step": 126740 }, { "epoch": 0.25604301926736345, "grad_norm": 472.0202331542969, "learning_rate": 9.299261357984368e-06, "loss": 31.5839, "step": 126750 }, { "epoch": 0.25606321990004727, "grad_norm": 377.89208984375, "learning_rate": 9.299083134409667e-06, "loss": 19.9071, "step": 126760 }, { "epoch": 0.2560834205327311, "grad_norm": 1075.21435546875, "learning_rate": 9.298904889881757e-06, "loss": 26.7717, "step": 126770 }, { "epoch": 0.2561036211654149, "grad_norm": 332.4275817871094, "learning_rate": 9.298726624401507e-06, "loss": 21.1105, "step": 126780 }, { "epoch": 0.2561238217980987, "grad_norm": 422.2454528808594, "learning_rate": 9.298548337969784e-06, "loss": 24.6704, "step": 126790 }, { "epoch": 0.25614402243078255, "grad_norm": 1162.3631591796875, "learning_rate": 9.298370030587456e-06, "loss": 32.4518, "step": 126800 }, { "epoch": 0.25616422306346637, "grad_norm": 221.30853271484375, "learning_rate": 9.298191702255395e-06, "loss": 22.9621, "step": 126810 }, { "epoch": 0.2561844236961502, "grad_norm": 205.13914489746094, "learning_rate": 9.298013352974469e-06, "loss": 22.1188, "step": 126820 }, { "epoch": 0.25620462432883395, "grad_norm": 349.7917785644531, "learning_rate": 9.297834982745548e-06, "loss": 29.8036, "step": 126830 }, { "epoch": 0.25622482496151777, "grad_norm": 614.1033935546875, "learning_rate": 9.2976565915695e-06, "loss": 30.9366, "step": 126840 }, { "epoch": 0.2562450255942016, "grad_norm": 349.589111328125, "learning_rate": 9.297478179447195e-06, "loss": 21.0276, "step": 126850 }, { "epoch": 0.2562652262268854, "grad_norm": 590.1732788085938, "learning_rate": 9.297299746379503e-06, "loss": 23.7506, "step": 126860 }, { "epoch": 0.25628542685956923, "grad_norm": 904.5769653320312, "learning_rate": 9.297121292367293e-06, "loss": 31.6848, "step": 126870 }, { "epoch": 0.25630562749225305, "grad_norm": 446.6373291015625, "learning_rate": 9.296942817411433e-06, "loss": 17.5534, "step": 126880 }, { "epoch": 0.25632582812493687, "grad_norm": 3.2007405757904053, "learning_rate": 9.296764321512797e-06, "loss": 34.6667, "step": 126890 }, { "epoch": 0.2563460287576207, "grad_norm": 352.6800842285156, "learning_rate": 9.296585804672253e-06, "loss": 14.5502, "step": 126900 }, { "epoch": 0.2563662293903045, "grad_norm": 555.1561889648438, "learning_rate": 9.29640726689067e-06, "loss": 42.8015, "step": 126910 }, { "epoch": 0.25638643002298833, "grad_norm": 1103.8218994140625, "learning_rate": 9.29622870816892e-06, "loss": 51.0637, "step": 126920 }, { "epoch": 0.25640663065567215, "grad_norm": 283.2348327636719, "learning_rate": 9.29605012850787e-06, "loss": 31.336, "step": 126930 }, { "epoch": 0.25642683128835597, "grad_norm": 283.6893310546875, "learning_rate": 9.295871527908396e-06, "loss": 27.0819, "step": 126940 }, { "epoch": 0.2564470319210398, "grad_norm": 938.28369140625, "learning_rate": 9.295692906371362e-06, "loss": 51.4435, "step": 126950 }, { "epoch": 0.25646723255372356, "grad_norm": 370.732421875, "learning_rate": 9.295514263897644e-06, "loss": 15.1923, "step": 126960 }, { "epoch": 0.2564874331864074, "grad_norm": 837.4765014648438, "learning_rate": 9.29533560048811e-06, "loss": 30.7102, "step": 126970 }, { "epoch": 0.2565076338190912, "grad_norm": 291.0663146972656, "learning_rate": 9.295156916143631e-06, "loss": 33.8184, "step": 126980 }, { "epoch": 0.256527834451775, "grad_norm": 300.3931579589844, "learning_rate": 9.294978210865078e-06, "loss": 22.4324, "step": 126990 }, { "epoch": 0.25654803508445884, "grad_norm": 324.9526062011719, "learning_rate": 9.294799484653323e-06, "loss": 20.5186, "step": 127000 }, { "epoch": 0.25656823571714266, "grad_norm": 558.15576171875, "learning_rate": 9.294620737509235e-06, "loss": 25.6781, "step": 127010 }, { "epoch": 0.2565884363498265, "grad_norm": 526.6491088867188, "learning_rate": 9.294441969433688e-06, "loss": 26.2408, "step": 127020 }, { "epoch": 0.2566086369825103, "grad_norm": 279.3838806152344, "learning_rate": 9.294263180427549e-06, "loss": 22.4848, "step": 127030 }, { "epoch": 0.2566288376151941, "grad_norm": 297.49139404296875, "learning_rate": 9.294084370491695e-06, "loss": 24.0509, "step": 127040 }, { "epoch": 0.25664903824787794, "grad_norm": 768.2383422851562, "learning_rate": 9.293905539626992e-06, "loss": 31.2574, "step": 127050 }, { "epoch": 0.25666923888056176, "grad_norm": 235.2180938720703, "learning_rate": 9.293726687834318e-06, "loss": 22.5829, "step": 127060 }, { "epoch": 0.2566894395132456, "grad_norm": 491.4215087890625, "learning_rate": 9.293547815114537e-06, "loss": 16.4592, "step": 127070 }, { "epoch": 0.2567096401459294, "grad_norm": 306.5160827636719, "learning_rate": 9.293368921468526e-06, "loss": 22.34, "step": 127080 }, { "epoch": 0.25672984077861316, "grad_norm": 455.01708984375, "learning_rate": 9.293190006897156e-06, "loss": 23.3811, "step": 127090 }, { "epoch": 0.256750041411297, "grad_norm": 402.4524230957031, "learning_rate": 9.293011071401299e-06, "loss": 17.4413, "step": 127100 }, { "epoch": 0.2567702420439808, "grad_norm": 406.9162902832031, "learning_rate": 9.292832114981825e-06, "loss": 26.5433, "step": 127110 }, { "epoch": 0.2567904426766646, "grad_norm": 460.7688293457031, "learning_rate": 9.29265313763961e-06, "loss": 25.3499, "step": 127120 }, { "epoch": 0.25681064330934844, "grad_norm": 385.5970764160156, "learning_rate": 9.292474139375523e-06, "loss": 19.3667, "step": 127130 }, { "epoch": 0.25683084394203226, "grad_norm": 479.5018615722656, "learning_rate": 9.292295120190438e-06, "loss": 21.7879, "step": 127140 }, { "epoch": 0.2568510445747161, "grad_norm": 410.8296203613281, "learning_rate": 9.292116080085226e-06, "loss": 14.9632, "step": 127150 }, { "epoch": 0.2568712452073999, "grad_norm": 256.1704406738281, "learning_rate": 9.291937019060762e-06, "loss": 30.5486, "step": 127160 }, { "epoch": 0.2568914458400837, "grad_norm": 314.48760986328125, "learning_rate": 9.291757937117917e-06, "loss": 18.2682, "step": 127170 }, { "epoch": 0.25691164647276754, "grad_norm": 363.6204528808594, "learning_rate": 9.291578834257565e-06, "loss": 26.4588, "step": 127180 }, { "epoch": 0.25693184710545136, "grad_norm": 143.50018310546875, "learning_rate": 9.291399710480578e-06, "loss": 21.9556, "step": 127190 }, { "epoch": 0.2569520477381352, "grad_norm": 328.93560791015625, "learning_rate": 9.291220565787829e-06, "loss": 25.0751, "step": 127200 }, { "epoch": 0.256972248370819, "grad_norm": 155.34873962402344, "learning_rate": 9.291041400180193e-06, "loss": 19.5352, "step": 127210 }, { "epoch": 0.25699244900350277, "grad_norm": 182.27647399902344, "learning_rate": 9.29086221365854e-06, "loss": 40.0581, "step": 127220 }, { "epoch": 0.2570126496361866, "grad_norm": 35.468082427978516, "learning_rate": 9.290683006223745e-06, "loss": 19.2183, "step": 127230 }, { "epoch": 0.2570328502688704, "grad_norm": 217.8312530517578, "learning_rate": 9.290503777876683e-06, "loss": 26.7485, "step": 127240 }, { "epoch": 0.2570530509015542, "grad_norm": 244.3412628173828, "learning_rate": 9.290324528618225e-06, "loss": 20.2491, "step": 127250 }, { "epoch": 0.25707325153423805, "grad_norm": 340.1300964355469, "learning_rate": 9.290145258449243e-06, "loss": 25.7546, "step": 127260 }, { "epoch": 0.25709345216692187, "grad_norm": 624.1416625976562, "learning_rate": 9.289965967370617e-06, "loss": 32.9281, "step": 127270 }, { "epoch": 0.2571136527996057, "grad_norm": 398.3768615722656, "learning_rate": 9.289786655383215e-06, "loss": 20.2076, "step": 127280 }, { "epoch": 0.2571338534322895, "grad_norm": 447.0444641113281, "learning_rate": 9.289607322487914e-06, "loss": 20.0938, "step": 127290 }, { "epoch": 0.2571540540649733, "grad_norm": 764.7389526367188, "learning_rate": 9.289427968685588e-06, "loss": 20.0609, "step": 127300 }, { "epoch": 0.25717425469765715, "grad_norm": 594.932373046875, "learning_rate": 9.28924859397711e-06, "loss": 23.3758, "step": 127310 }, { "epoch": 0.25719445533034097, "grad_norm": 153.19818115234375, "learning_rate": 9.289069198363353e-06, "loss": 15.733, "step": 127320 }, { "epoch": 0.2572146559630248, "grad_norm": 650.463134765625, "learning_rate": 9.288889781845193e-06, "loss": 26.4759, "step": 127330 }, { "epoch": 0.2572348565957086, "grad_norm": 384.6339111328125, "learning_rate": 9.288710344423505e-06, "loss": 21.3026, "step": 127340 }, { "epoch": 0.25725505722839237, "grad_norm": 1784.262939453125, "learning_rate": 9.288530886099165e-06, "loss": 30.8165, "step": 127350 }, { "epoch": 0.2572752578610762, "grad_norm": 328.6500549316406, "learning_rate": 9.288351406873044e-06, "loss": 18.8066, "step": 127360 }, { "epoch": 0.25729545849376, "grad_norm": 528.574462890625, "learning_rate": 9.288171906746018e-06, "loss": 15.9671, "step": 127370 }, { "epoch": 0.25731565912644383, "grad_norm": 514.7537231445312, "learning_rate": 9.287992385718963e-06, "loss": 15.6263, "step": 127380 }, { "epoch": 0.25733585975912765, "grad_norm": 248.57421875, "learning_rate": 9.287812843792752e-06, "loss": 25.0234, "step": 127390 }, { "epoch": 0.25735606039181147, "grad_norm": 666.0214233398438, "learning_rate": 9.287633280968263e-06, "loss": 30.8517, "step": 127400 }, { "epoch": 0.2573762610244953, "grad_norm": 346.4957580566406, "learning_rate": 9.287453697246367e-06, "loss": 20.1712, "step": 127410 }, { "epoch": 0.2573964616571791, "grad_norm": 343.5994873046875, "learning_rate": 9.287274092627944e-06, "loss": 31.8904, "step": 127420 }, { "epoch": 0.25741666228986293, "grad_norm": 892.2613525390625, "learning_rate": 9.287094467113866e-06, "loss": 46.5281, "step": 127430 }, { "epoch": 0.25743686292254675, "grad_norm": 586.280517578125, "learning_rate": 9.28691482070501e-06, "loss": 36.4094, "step": 127440 }, { "epoch": 0.25745706355523057, "grad_norm": 852.3339233398438, "learning_rate": 9.28673515340225e-06, "loss": 37.6252, "step": 127450 }, { "epoch": 0.2574772641879144, "grad_norm": 217.954833984375, "learning_rate": 9.286555465206463e-06, "loss": 18.7746, "step": 127460 }, { "epoch": 0.25749746482059815, "grad_norm": 298.8985290527344, "learning_rate": 9.286375756118526e-06, "loss": 18.3606, "step": 127470 }, { "epoch": 0.257517665453282, "grad_norm": 375.60205078125, "learning_rate": 9.286196026139311e-06, "loss": 28.7833, "step": 127480 }, { "epoch": 0.2575378660859658, "grad_norm": 376.5921936035156, "learning_rate": 9.2860162752697e-06, "loss": 14.3099, "step": 127490 }, { "epoch": 0.2575580667186496, "grad_norm": 249.449462890625, "learning_rate": 9.285836503510562e-06, "loss": 19.4126, "step": 127500 }, { "epoch": 0.25757826735133343, "grad_norm": 9.907649993896484, "learning_rate": 9.285656710862778e-06, "loss": 33.7773, "step": 127510 }, { "epoch": 0.25759846798401725, "grad_norm": 627.5342407226562, "learning_rate": 9.285476897327223e-06, "loss": 22.5205, "step": 127520 }, { "epoch": 0.2576186686167011, "grad_norm": 223.77120971679688, "learning_rate": 9.285297062904774e-06, "loss": 21.9005, "step": 127530 }, { "epoch": 0.2576388692493849, "grad_norm": 185.26669311523438, "learning_rate": 9.285117207596307e-06, "loss": 18.3743, "step": 127540 }, { "epoch": 0.2576590698820687, "grad_norm": 319.34002685546875, "learning_rate": 9.284937331402697e-06, "loss": 32.4146, "step": 127550 }, { "epoch": 0.25767927051475253, "grad_norm": 1139.358642578125, "learning_rate": 9.284757434324823e-06, "loss": 24.3872, "step": 127560 }, { "epoch": 0.25769947114743635, "grad_norm": 46.01262283325195, "learning_rate": 9.284577516363561e-06, "loss": 34.8342, "step": 127570 }, { "epoch": 0.2577196717801202, "grad_norm": 78.2344741821289, "learning_rate": 9.284397577519788e-06, "loss": 43.596, "step": 127580 }, { "epoch": 0.257739872412804, "grad_norm": 695.995849609375, "learning_rate": 9.28421761779438e-06, "loss": 30.5016, "step": 127590 }, { "epoch": 0.25776007304548776, "grad_norm": 519.5594482421875, "learning_rate": 9.284037637188215e-06, "loss": 21.8365, "step": 127600 }, { "epoch": 0.2577802736781716, "grad_norm": 239.10166931152344, "learning_rate": 9.28385763570217e-06, "loss": 21.9353, "step": 127610 }, { "epoch": 0.2578004743108554, "grad_norm": 716.4600219726562, "learning_rate": 9.283677613337124e-06, "loss": 21.1781, "step": 127620 }, { "epoch": 0.2578206749435392, "grad_norm": 349.0670471191406, "learning_rate": 9.283497570093952e-06, "loss": 12.2865, "step": 127630 }, { "epoch": 0.25784087557622304, "grad_norm": 222.88279724121094, "learning_rate": 9.283317505973533e-06, "loss": 26.176, "step": 127640 }, { "epoch": 0.25786107620890686, "grad_norm": 289.0815734863281, "learning_rate": 9.283137420976742e-06, "loss": 22.7614, "step": 127650 }, { "epoch": 0.2578812768415907, "grad_norm": 1140.44091796875, "learning_rate": 9.282957315104462e-06, "loss": 34.3784, "step": 127660 }, { "epoch": 0.2579014774742745, "grad_norm": 181.1317138671875, "learning_rate": 9.282777188357563e-06, "loss": 22.6653, "step": 127670 }, { "epoch": 0.2579216781069583, "grad_norm": 420.8912353515625, "learning_rate": 9.282597040736932e-06, "loss": 22.2477, "step": 127680 }, { "epoch": 0.25794187873964214, "grad_norm": 6.192861557006836, "learning_rate": 9.282416872243441e-06, "loss": 22.3886, "step": 127690 }, { "epoch": 0.25796207937232596, "grad_norm": 312.1874694824219, "learning_rate": 9.282236682877968e-06, "loss": 29.201, "step": 127700 }, { "epoch": 0.2579822800050098, "grad_norm": 299.1607666015625, "learning_rate": 9.282056472641393e-06, "loss": 18.8325, "step": 127710 }, { "epoch": 0.2580024806376936, "grad_norm": 181.7383270263672, "learning_rate": 9.281876241534595e-06, "loss": 19.3202, "step": 127720 }, { "epoch": 0.25802268127037736, "grad_norm": 542.5490112304688, "learning_rate": 9.28169598955845e-06, "loss": 31.3417, "step": 127730 }, { "epoch": 0.2580428819030612, "grad_norm": 238.58201599121094, "learning_rate": 9.28151571671384e-06, "loss": 24.5436, "step": 127740 }, { "epoch": 0.258063082535745, "grad_norm": 390.826904296875, "learning_rate": 9.281335423001641e-06, "loss": 21.4078, "step": 127750 }, { "epoch": 0.2580832831684288, "grad_norm": 618.39111328125, "learning_rate": 9.281155108422732e-06, "loss": 21.4185, "step": 127760 }, { "epoch": 0.25810348380111264, "grad_norm": 194.64149475097656, "learning_rate": 9.280974772977994e-06, "loss": 14.8643, "step": 127770 }, { "epoch": 0.25812368443379646, "grad_norm": 726.81005859375, "learning_rate": 9.280794416668303e-06, "loss": 26.2176, "step": 127780 }, { "epoch": 0.2581438850664803, "grad_norm": 223.7606964111328, "learning_rate": 9.280614039494538e-06, "loss": 15.3907, "step": 127790 }, { "epoch": 0.2581640856991641, "grad_norm": 294.724609375, "learning_rate": 9.280433641457582e-06, "loss": 15.0769, "step": 127800 }, { "epoch": 0.2581842863318479, "grad_norm": 269.91461181640625, "learning_rate": 9.28025322255831e-06, "loss": 16.6367, "step": 127810 }, { "epoch": 0.25820448696453174, "grad_norm": 277.8019714355469, "learning_rate": 9.280072782797602e-06, "loss": 9.468, "step": 127820 }, { "epoch": 0.25822468759721556, "grad_norm": 363.867919921875, "learning_rate": 9.279892322176341e-06, "loss": 11.1287, "step": 127830 }, { "epoch": 0.2582448882298994, "grad_norm": 55.111881256103516, "learning_rate": 9.279711840695401e-06, "loss": 43.9801, "step": 127840 }, { "epoch": 0.2582650888625832, "grad_norm": 228.3421630859375, "learning_rate": 9.279531338355666e-06, "loss": 15.5273, "step": 127850 }, { "epoch": 0.25828528949526697, "grad_norm": 594.6398315429688, "learning_rate": 9.279350815158014e-06, "loss": 27.1824, "step": 127860 }, { "epoch": 0.2583054901279508, "grad_norm": 611.1942749023438, "learning_rate": 9.279170271103326e-06, "loss": 16.6828, "step": 127870 }, { "epoch": 0.2583256907606346, "grad_norm": 245.9205322265625, "learning_rate": 9.27898970619248e-06, "loss": 17.6414, "step": 127880 }, { "epoch": 0.2583458913933184, "grad_norm": 314.4803771972656, "learning_rate": 9.278809120426358e-06, "loss": 16.0779, "step": 127890 }, { "epoch": 0.25836609202600225, "grad_norm": 169.61856079101562, "learning_rate": 9.278628513805838e-06, "loss": 16.6567, "step": 127900 }, { "epoch": 0.25838629265868607, "grad_norm": 659.0407104492188, "learning_rate": 9.278447886331803e-06, "loss": 27.8375, "step": 127910 }, { "epoch": 0.2584064932913699, "grad_norm": 297.7413024902344, "learning_rate": 9.27826723800513e-06, "loss": 19.5705, "step": 127920 }, { "epoch": 0.2584266939240537, "grad_norm": 366.10211181640625, "learning_rate": 9.278086568826702e-06, "loss": 23.52, "step": 127930 }, { "epoch": 0.2584468945567375, "grad_norm": 121.39274597167969, "learning_rate": 9.277905878797401e-06, "loss": 8.4138, "step": 127940 }, { "epoch": 0.25846709518942135, "grad_norm": 106.3432846069336, "learning_rate": 9.277725167918103e-06, "loss": 24.0516, "step": 127950 }, { "epoch": 0.25848729582210517, "grad_norm": 202.67037963867188, "learning_rate": 9.277544436189693e-06, "loss": 24.5088, "step": 127960 }, { "epoch": 0.258507496454789, "grad_norm": 521.38427734375, "learning_rate": 9.27736368361305e-06, "loss": 14.0373, "step": 127970 }, { "epoch": 0.25852769708747275, "grad_norm": 328.37091064453125, "learning_rate": 9.277182910189056e-06, "loss": 19.4296, "step": 127980 }, { "epoch": 0.25854789772015657, "grad_norm": 70.05731964111328, "learning_rate": 9.27700211591859e-06, "loss": 22.5398, "step": 127990 }, { "epoch": 0.2585680983528404, "grad_norm": 293.0453186035156, "learning_rate": 9.276821300802535e-06, "loss": 14.6514, "step": 128000 }, { "epoch": 0.2585882989855242, "grad_norm": 384.33099365234375, "learning_rate": 9.27664046484177e-06, "loss": 23.3837, "step": 128010 }, { "epoch": 0.25860849961820803, "grad_norm": 344.4331359863281, "learning_rate": 9.27645960803718e-06, "loss": 24.3928, "step": 128020 }, { "epoch": 0.25862870025089185, "grad_norm": 467.5977783203125, "learning_rate": 9.276278730389642e-06, "loss": 21.3627, "step": 128030 }, { "epoch": 0.25864890088357567, "grad_norm": 633.8275146484375, "learning_rate": 9.276097831900044e-06, "loss": 24.7434, "step": 128040 }, { "epoch": 0.2586691015162595, "grad_norm": 797.9624633789062, "learning_rate": 9.275916912569261e-06, "loss": 13.6561, "step": 128050 }, { "epoch": 0.2586893021489433, "grad_norm": 675.3600463867188, "learning_rate": 9.27573597239818e-06, "loss": 27.3217, "step": 128060 }, { "epoch": 0.25870950278162713, "grad_norm": 677.53515625, "learning_rate": 9.275555011387679e-06, "loss": 9.3107, "step": 128070 }, { "epoch": 0.25872970341431095, "grad_norm": 60.00661087036133, "learning_rate": 9.275374029538639e-06, "loss": 35.8117, "step": 128080 }, { "epoch": 0.25874990404699477, "grad_norm": 431.9416809082031, "learning_rate": 9.275193026851947e-06, "loss": 16.7078, "step": 128090 }, { "epoch": 0.2587701046796786, "grad_norm": 206.30618286132812, "learning_rate": 9.275012003328483e-06, "loss": 21.6261, "step": 128100 }, { "epoch": 0.25879030531236236, "grad_norm": 149.74598693847656, "learning_rate": 9.274830958969129e-06, "loss": 23.0925, "step": 128110 }, { "epoch": 0.2588105059450462, "grad_norm": 473.61517333984375, "learning_rate": 9.274649893774768e-06, "loss": 36.3299, "step": 128120 }, { "epoch": 0.25883070657773, "grad_norm": 210.2192840576172, "learning_rate": 9.27446880774628e-06, "loss": 39.463, "step": 128130 }, { "epoch": 0.2588509072104138, "grad_norm": 338.50628662109375, "learning_rate": 9.27428770088455e-06, "loss": 14.9153, "step": 128140 }, { "epoch": 0.25887110784309764, "grad_norm": 15.398112297058105, "learning_rate": 9.27410657319046e-06, "loss": 27.2724, "step": 128150 }, { "epoch": 0.25889130847578146, "grad_norm": 289.1322937011719, "learning_rate": 9.273925424664894e-06, "loss": 14.9957, "step": 128160 }, { "epoch": 0.2589115091084653, "grad_norm": 551.4632568359375, "learning_rate": 9.273744255308733e-06, "loss": 24.0425, "step": 128170 }, { "epoch": 0.2589317097411491, "grad_norm": 342.7764892578125, "learning_rate": 9.273563065122862e-06, "loss": 21.7269, "step": 128180 }, { "epoch": 0.2589519103738329, "grad_norm": 715.4881591796875, "learning_rate": 9.27338185410816e-06, "loss": 23.4469, "step": 128190 }, { "epoch": 0.25897211100651674, "grad_norm": 710.1723022460938, "learning_rate": 9.273200622265516e-06, "loss": 23.1727, "step": 128200 }, { "epoch": 0.25899231163920056, "grad_norm": 335.8462219238281, "learning_rate": 9.27301936959581e-06, "loss": 26.6075, "step": 128210 }, { "epoch": 0.2590125122718844, "grad_norm": 757.0194091796875, "learning_rate": 9.272838096099926e-06, "loss": 38.6605, "step": 128220 }, { "epoch": 0.2590327129045682, "grad_norm": 285.6708068847656, "learning_rate": 9.272656801778745e-06, "loss": 21.8727, "step": 128230 }, { "epoch": 0.25905291353725196, "grad_norm": 182.349365234375, "learning_rate": 9.272475486633155e-06, "loss": 21.5104, "step": 128240 }, { "epoch": 0.2590731141699358, "grad_norm": 310.44403076171875, "learning_rate": 9.272294150664039e-06, "loss": 43.0297, "step": 128250 }, { "epoch": 0.2590933148026196, "grad_norm": 414.88885498046875, "learning_rate": 9.272112793872277e-06, "loss": 18.6812, "step": 128260 }, { "epoch": 0.2591135154353034, "grad_norm": 254.77940368652344, "learning_rate": 9.271931416258756e-06, "loss": 31.6537, "step": 128270 }, { "epoch": 0.25913371606798724, "grad_norm": 203.3861541748047, "learning_rate": 9.27175001782436e-06, "loss": 17.7203, "step": 128280 }, { "epoch": 0.25915391670067106, "grad_norm": 674.7282104492188, "learning_rate": 9.271568598569971e-06, "loss": 23.375, "step": 128290 }, { "epoch": 0.2591741173333549, "grad_norm": 314.7342529296875, "learning_rate": 9.271387158496477e-06, "loss": 13.6596, "step": 128300 }, { "epoch": 0.2591943179660387, "grad_norm": 298.5931701660156, "learning_rate": 9.271205697604759e-06, "loss": 11.1612, "step": 128310 }, { "epoch": 0.2592145185987225, "grad_norm": 680.8109130859375, "learning_rate": 9.271024215895702e-06, "loss": 25.9474, "step": 128320 }, { "epoch": 0.25923471923140634, "grad_norm": 501.0088195800781, "learning_rate": 9.270842713370192e-06, "loss": 20.2804, "step": 128330 }, { "epoch": 0.25925491986409016, "grad_norm": 278.8302917480469, "learning_rate": 9.270661190029112e-06, "loss": 15.063, "step": 128340 }, { "epoch": 0.259275120496774, "grad_norm": 368.35577392578125, "learning_rate": 9.270479645873347e-06, "loss": 26.0217, "step": 128350 }, { "epoch": 0.2592953211294578, "grad_norm": 692.879638671875, "learning_rate": 9.270298080903782e-06, "loss": 25.3173, "step": 128360 }, { "epoch": 0.25931552176214157, "grad_norm": 437.3133239746094, "learning_rate": 9.270116495121303e-06, "loss": 39.2009, "step": 128370 }, { "epoch": 0.2593357223948254, "grad_norm": 466.69622802734375, "learning_rate": 9.269934888526793e-06, "loss": 25.8048, "step": 128380 }, { "epoch": 0.2593559230275092, "grad_norm": 87.55073547363281, "learning_rate": 9.269753261121139e-06, "loss": 21.5006, "step": 128390 }, { "epoch": 0.259376123660193, "grad_norm": 210.5568084716797, "learning_rate": 9.269571612905227e-06, "loss": 15.77, "step": 128400 }, { "epoch": 0.25939632429287685, "grad_norm": 611.5077514648438, "learning_rate": 9.269389943879938e-06, "loss": 18.144, "step": 128410 }, { "epoch": 0.25941652492556067, "grad_norm": 155.47767639160156, "learning_rate": 9.269208254046161e-06, "loss": 22.0572, "step": 128420 }, { "epoch": 0.2594367255582445, "grad_norm": 497.9201965332031, "learning_rate": 9.269026543404782e-06, "loss": 18.4371, "step": 128430 }, { "epoch": 0.2594569261909283, "grad_norm": 448.6292724609375, "learning_rate": 9.268844811956683e-06, "loss": 11.5372, "step": 128440 }, { "epoch": 0.2594771268236121, "grad_norm": 618.7467651367188, "learning_rate": 9.268663059702753e-06, "loss": 36.2373, "step": 128450 }, { "epoch": 0.25949732745629595, "grad_norm": 165.6209259033203, "learning_rate": 9.268481286643878e-06, "loss": 25.0362, "step": 128460 }, { "epoch": 0.25951752808897977, "grad_norm": 292.6033630371094, "learning_rate": 9.268299492780942e-06, "loss": 11.3669, "step": 128470 }, { "epoch": 0.2595377287216636, "grad_norm": 1330.187744140625, "learning_rate": 9.268117678114833e-06, "loss": 37.4719, "step": 128480 }, { "epoch": 0.2595579293543474, "grad_norm": 211.21011352539062, "learning_rate": 9.267935842646437e-06, "loss": 21.5859, "step": 128490 }, { "epoch": 0.25957812998703117, "grad_norm": 214.05702209472656, "learning_rate": 9.267753986376638e-06, "loss": 30.5191, "step": 128500 }, { "epoch": 0.259598330619715, "grad_norm": 1050.858642578125, "learning_rate": 9.267572109306325e-06, "loss": 30.533, "step": 128510 }, { "epoch": 0.2596185312523988, "grad_norm": 405.3336181640625, "learning_rate": 9.26739021143638e-06, "loss": 40.4927, "step": 128520 }, { "epoch": 0.25963873188508263, "grad_norm": 607.3820190429688, "learning_rate": 9.267208292767696e-06, "loss": 24.9273, "step": 128530 }, { "epoch": 0.25965893251776645, "grad_norm": 551.8255004882812, "learning_rate": 9.267026353301155e-06, "loss": 32.4756, "step": 128540 }, { "epoch": 0.25967913315045027, "grad_norm": 1008.7666015625, "learning_rate": 9.266844393037644e-06, "loss": 35.6306, "step": 128550 }, { "epoch": 0.2596993337831341, "grad_norm": 679.9075927734375, "learning_rate": 9.266662411978052e-06, "loss": 16.6644, "step": 128560 }, { "epoch": 0.2597195344158179, "grad_norm": 0.0, "learning_rate": 9.266480410123264e-06, "loss": 23.0932, "step": 128570 }, { "epoch": 0.25973973504850173, "grad_norm": 347.7939758300781, "learning_rate": 9.266298387474169e-06, "loss": 22.2822, "step": 128580 }, { "epoch": 0.25975993568118555, "grad_norm": 508.5229187011719, "learning_rate": 9.266116344031652e-06, "loss": 13.1423, "step": 128590 }, { "epoch": 0.25978013631386937, "grad_norm": 478.3236083984375, "learning_rate": 9.265934279796602e-06, "loss": 20.7805, "step": 128600 }, { "epoch": 0.2598003369465532, "grad_norm": 407.348388671875, "learning_rate": 9.265752194769906e-06, "loss": 18.2017, "step": 128610 }, { "epoch": 0.25982053757923695, "grad_norm": 0.0, "learning_rate": 9.265570088952452e-06, "loss": 22.6018, "step": 128620 }, { "epoch": 0.2598407382119208, "grad_norm": 640.04296875, "learning_rate": 9.265387962345125e-06, "loss": 40.0134, "step": 128630 }, { "epoch": 0.2598609388446046, "grad_norm": 108.46620178222656, "learning_rate": 9.265205814948814e-06, "loss": 21.3176, "step": 128640 }, { "epoch": 0.2598811394772884, "grad_norm": 237.70730590820312, "learning_rate": 9.265023646764409e-06, "loss": 26.6285, "step": 128650 }, { "epoch": 0.25990134010997223, "grad_norm": 1318.1649169921875, "learning_rate": 9.264841457792795e-06, "loss": 34.5219, "step": 128660 }, { "epoch": 0.25992154074265605, "grad_norm": 185.10508728027344, "learning_rate": 9.264659248034861e-06, "loss": 39.616, "step": 128670 }, { "epoch": 0.2599417413753399, "grad_norm": 173.03404235839844, "learning_rate": 9.264477017491496e-06, "loss": 9.3008, "step": 128680 }, { "epoch": 0.2599619420080237, "grad_norm": 412.54022216796875, "learning_rate": 9.264294766163587e-06, "loss": 26.4566, "step": 128690 }, { "epoch": 0.2599821426407075, "grad_norm": 589.6472778320312, "learning_rate": 9.264112494052022e-06, "loss": 18.9564, "step": 128700 }, { "epoch": 0.26000234327339133, "grad_norm": 354.973388671875, "learning_rate": 9.26393020115769e-06, "loss": 36.5721, "step": 128710 }, { "epoch": 0.26002254390607515, "grad_norm": 221.44171142578125, "learning_rate": 9.26374788748148e-06, "loss": 23.2721, "step": 128720 }, { "epoch": 0.260042744538759, "grad_norm": 152.46949768066406, "learning_rate": 9.263565553024279e-06, "loss": 30.7172, "step": 128730 }, { "epoch": 0.2600629451714428, "grad_norm": 396.7294921875, "learning_rate": 9.263383197786978e-06, "loss": 18.8694, "step": 128740 }, { "epoch": 0.26008314580412656, "grad_norm": 647.7705078125, "learning_rate": 9.263200821770462e-06, "loss": 24.1541, "step": 128750 }, { "epoch": 0.2601033464368104, "grad_norm": 296.6133728027344, "learning_rate": 9.263018424975624e-06, "loss": 23.7705, "step": 128760 }, { "epoch": 0.2601235470694942, "grad_norm": 296.7532958984375, "learning_rate": 9.262836007403352e-06, "loss": 18.1715, "step": 128770 }, { "epoch": 0.260143747702178, "grad_norm": 285.9989318847656, "learning_rate": 9.262653569054532e-06, "loss": 22.2177, "step": 128780 }, { "epoch": 0.26016394833486184, "grad_norm": 214.42108154296875, "learning_rate": 9.262471109930056e-06, "loss": 14.9034, "step": 128790 }, { "epoch": 0.26018414896754566, "grad_norm": 420.13201904296875, "learning_rate": 9.262288630030814e-06, "loss": 33.405, "step": 128800 }, { "epoch": 0.2602043496002295, "grad_norm": 779.2055053710938, "learning_rate": 9.262106129357693e-06, "loss": 22.0531, "step": 128810 }, { "epoch": 0.2602245502329133, "grad_norm": 295.20928955078125, "learning_rate": 9.261923607911584e-06, "loss": 19.7791, "step": 128820 }, { "epoch": 0.2602447508655971, "grad_norm": 365.7851257324219, "learning_rate": 9.261741065693377e-06, "loss": 19.8517, "step": 128830 }, { "epoch": 0.26026495149828094, "grad_norm": 494.4757080078125, "learning_rate": 9.26155850270396e-06, "loss": 12.8213, "step": 128840 }, { "epoch": 0.26028515213096476, "grad_norm": 258.4847412109375, "learning_rate": 9.261375918944224e-06, "loss": 18.8568, "step": 128850 }, { "epoch": 0.2603053527636486, "grad_norm": 369.2894287109375, "learning_rate": 9.261193314415058e-06, "loss": 16.7122, "step": 128860 }, { "epoch": 0.2603255533963324, "grad_norm": 539.140869140625, "learning_rate": 9.261010689117353e-06, "loss": 40.7588, "step": 128870 }, { "epoch": 0.26034575402901616, "grad_norm": 216.14306640625, "learning_rate": 9.260828043051999e-06, "loss": 24.3153, "step": 128880 }, { "epoch": 0.2603659546617, "grad_norm": 368.29034423828125, "learning_rate": 9.260645376219887e-06, "loss": 14.8069, "step": 128890 }, { "epoch": 0.2603861552943838, "grad_norm": 886.6476440429688, "learning_rate": 9.260462688621906e-06, "loss": 20.6665, "step": 128900 }, { "epoch": 0.2604063559270676, "grad_norm": 649.2222290039062, "learning_rate": 9.260279980258945e-06, "loss": 21.8367, "step": 128910 }, { "epoch": 0.26042655655975144, "grad_norm": 171.771728515625, "learning_rate": 9.260097251131896e-06, "loss": 13.6678, "step": 128920 }, { "epoch": 0.26044675719243526, "grad_norm": 389.5770263671875, "learning_rate": 9.259914501241651e-06, "loss": 22.7834, "step": 128930 }, { "epoch": 0.2604669578251191, "grad_norm": 237.25209045410156, "learning_rate": 9.259731730589099e-06, "loss": 21.3835, "step": 128940 }, { "epoch": 0.2604871584578029, "grad_norm": 442.96246337890625, "learning_rate": 9.25954893917513e-06, "loss": 21.5349, "step": 128950 }, { "epoch": 0.2605073590904867, "grad_norm": 955.6649780273438, "learning_rate": 9.259366127000637e-06, "loss": 15.3926, "step": 128960 }, { "epoch": 0.26052755972317054, "grad_norm": 894.2137451171875, "learning_rate": 9.259183294066512e-06, "loss": 25.5139, "step": 128970 }, { "epoch": 0.26054776035585436, "grad_norm": 378.1171569824219, "learning_rate": 9.259000440373643e-06, "loss": 34.2166, "step": 128980 }, { "epoch": 0.2605679609885382, "grad_norm": 427.4106140136719, "learning_rate": 9.258817565922919e-06, "loss": 25.1068, "step": 128990 }, { "epoch": 0.260588161621222, "grad_norm": 396.2770690917969, "learning_rate": 9.25863467071524e-06, "loss": 17.0746, "step": 129000 }, { "epoch": 0.26060836225390577, "grad_norm": 768.878662109375, "learning_rate": 9.258451754751488e-06, "loss": 27.6665, "step": 129010 }, { "epoch": 0.2606285628865896, "grad_norm": 377.0856628417969, "learning_rate": 9.25826881803256e-06, "loss": 23.5891, "step": 129020 }, { "epoch": 0.2606487635192734, "grad_norm": 942.701171875, "learning_rate": 9.258085860559348e-06, "loss": 33.9537, "step": 129030 }, { "epoch": 0.2606689641519572, "grad_norm": 1123.97509765625, "learning_rate": 9.257902882332739e-06, "loss": 17.8747, "step": 129040 }, { "epoch": 0.26068916478464105, "grad_norm": 260.6478576660156, "learning_rate": 9.25771988335363e-06, "loss": 28.6881, "step": 129050 }, { "epoch": 0.26070936541732487, "grad_norm": 357.36151123046875, "learning_rate": 9.25753686362291e-06, "loss": 33.5757, "step": 129060 }, { "epoch": 0.2607295660500087, "grad_norm": 423.8187255859375, "learning_rate": 9.257353823141472e-06, "loss": 16.5748, "step": 129070 }, { "epoch": 0.2607497666826925, "grad_norm": 384.4149475097656, "learning_rate": 9.257170761910208e-06, "loss": 24.709, "step": 129080 }, { "epoch": 0.2607699673153763, "grad_norm": 1516.5589599609375, "learning_rate": 9.25698767993001e-06, "loss": 45.4619, "step": 129090 }, { "epoch": 0.26079016794806015, "grad_norm": 548.0400390625, "learning_rate": 9.256804577201768e-06, "loss": 39.1325, "step": 129100 }, { "epoch": 0.26081036858074397, "grad_norm": 751.7698974609375, "learning_rate": 9.25662145372638e-06, "loss": 33.602, "step": 129110 }, { "epoch": 0.2608305692134278, "grad_norm": 709.4901733398438, "learning_rate": 9.256438309504733e-06, "loss": 31.2311, "step": 129120 }, { "epoch": 0.2608507698461116, "grad_norm": 785.0630493164062, "learning_rate": 9.256255144537724e-06, "loss": 16.7887, "step": 129130 }, { "epoch": 0.26087097047879537, "grad_norm": 260.6936340332031, "learning_rate": 9.256071958826243e-06, "loss": 19.8295, "step": 129140 }, { "epoch": 0.2608911711114792, "grad_norm": 613.6012573242188, "learning_rate": 9.255888752371182e-06, "loss": 25.0147, "step": 129150 }, { "epoch": 0.260911371744163, "grad_norm": 464.50225830078125, "learning_rate": 9.255705525173437e-06, "loss": 21.9297, "step": 129160 }, { "epoch": 0.26093157237684683, "grad_norm": 494.2185974121094, "learning_rate": 9.255522277233899e-06, "loss": 31.056, "step": 129170 }, { "epoch": 0.26095177300953065, "grad_norm": 172.20408630371094, "learning_rate": 9.255339008553462e-06, "loss": 11.3265, "step": 129180 }, { "epoch": 0.26097197364221447, "grad_norm": 808.3148193359375, "learning_rate": 9.255155719133016e-06, "loss": 41.8549, "step": 129190 }, { "epoch": 0.2609921742748983, "grad_norm": 479.7608642578125, "learning_rate": 9.25497240897346e-06, "loss": 26.3807, "step": 129200 }, { "epoch": 0.2610123749075821, "grad_norm": 312.90435791015625, "learning_rate": 9.254789078075684e-06, "loss": 20.9368, "step": 129210 }, { "epoch": 0.26103257554026593, "grad_norm": 249.3673095703125, "learning_rate": 9.254605726440582e-06, "loss": 28.2333, "step": 129220 }, { "epoch": 0.26105277617294975, "grad_norm": 8.875776290893555, "learning_rate": 9.254422354069048e-06, "loss": 47.21, "step": 129230 }, { "epoch": 0.26107297680563357, "grad_norm": 676.8912963867188, "learning_rate": 9.254238960961975e-06, "loss": 26.9934, "step": 129240 }, { "epoch": 0.2610931774383174, "grad_norm": 288.67999267578125, "learning_rate": 9.254055547120258e-06, "loss": 19.4308, "step": 129250 }, { "epoch": 0.26111337807100116, "grad_norm": 215.28472900390625, "learning_rate": 9.253872112544788e-06, "loss": 16.4805, "step": 129260 }, { "epoch": 0.261133578703685, "grad_norm": 202.24591064453125, "learning_rate": 9.253688657236463e-06, "loss": 29.6428, "step": 129270 }, { "epoch": 0.2611537793363688, "grad_norm": 246.48748779296875, "learning_rate": 9.253505181196176e-06, "loss": 34.1483, "step": 129280 }, { "epoch": 0.2611739799690526, "grad_norm": 745.95166015625, "learning_rate": 9.25332168442482e-06, "loss": 15.1879, "step": 129290 }, { "epoch": 0.26119418060173644, "grad_norm": 805.440185546875, "learning_rate": 9.25313816692329e-06, "loss": 24.1355, "step": 129300 }, { "epoch": 0.26121438123442026, "grad_norm": 663.4802856445312, "learning_rate": 9.252954628692479e-06, "loss": 19.649, "step": 129310 }, { "epoch": 0.2612345818671041, "grad_norm": 197.54844665527344, "learning_rate": 9.252771069733285e-06, "loss": 24.2695, "step": 129320 }, { "epoch": 0.2612547824997879, "grad_norm": 470.1011047363281, "learning_rate": 9.2525874900466e-06, "loss": 17.2617, "step": 129330 }, { "epoch": 0.2612749831324717, "grad_norm": 385.9554443359375, "learning_rate": 9.252403889633319e-06, "loss": 15.2263, "step": 129340 }, { "epoch": 0.26129518376515554, "grad_norm": 262.08868408203125, "learning_rate": 9.252220268494336e-06, "loss": 25.9276, "step": 129350 }, { "epoch": 0.26131538439783936, "grad_norm": 526.8399658203125, "learning_rate": 9.25203662663055e-06, "loss": 27.2101, "step": 129360 }, { "epoch": 0.2613355850305232, "grad_norm": 315.19610595703125, "learning_rate": 9.251852964042852e-06, "loss": 14.869, "step": 129370 }, { "epoch": 0.261355785663207, "grad_norm": 349.370361328125, "learning_rate": 9.251669280732137e-06, "loss": 40.9022, "step": 129380 }, { "epoch": 0.26137598629589076, "grad_norm": 304.42108154296875, "learning_rate": 9.251485576699302e-06, "loss": 45.1989, "step": 129390 }, { "epoch": 0.2613961869285746, "grad_norm": 348.2828369140625, "learning_rate": 9.251301851945244e-06, "loss": 16.924, "step": 129400 }, { "epoch": 0.2614163875612584, "grad_norm": 498.5204162597656, "learning_rate": 9.251118106470855e-06, "loss": 28.0091, "step": 129410 }, { "epoch": 0.2614365881939422, "grad_norm": 599.5923461914062, "learning_rate": 9.250934340277031e-06, "loss": 19.8481, "step": 129420 }, { "epoch": 0.26145678882662604, "grad_norm": 218.1306610107422, "learning_rate": 9.250750553364669e-06, "loss": 21.7586, "step": 129430 }, { "epoch": 0.26147698945930986, "grad_norm": 538.0509643554688, "learning_rate": 9.250566745734666e-06, "loss": 11.41, "step": 129440 }, { "epoch": 0.2614971900919937, "grad_norm": 306.4842529296875, "learning_rate": 9.250382917387915e-06, "loss": 24.8353, "step": 129450 }, { "epoch": 0.2615173907246775, "grad_norm": 590.1456298828125, "learning_rate": 9.250199068325314e-06, "loss": 23.3935, "step": 129460 }, { "epoch": 0.2615375913573613, "grad_norm": 461.6360778808594, "learning_rate": 9.250015198547757e-06, "loss": 15.0752, "step": 129470 }, { "epoch": 0.26155779199004514, "grad_norm": 509.09942626953125, "learning_rate": 9.249831308056141e-06, "loss": 30.315, "step": 129480 }, { "epoch": 0.26157799262272896, "grad_norm": 353.9985656738281, "learning_rate": 9.249647396851364e-06, "loss": 24.9397, "step": 129490 }, { "epoch": 0.2615981932554128, "grad_norm": 305.169921875, "learning_rate": 9.24946346493432e-06, "loss": 22.4525, "step": 129500 }, { "epoch": 0.2616183938880966, "grad_norm": 231.9904327392578, "learning_rate": 9.249279512305907e-06, "loss": 10.315, "step": 129510 }, { "epoch": 0.26163859452078037, "grad_norm": 152.6187286376953, "learning_rate": 9.249095538967021e-06, "loss": 20.6671, "step": 129520 }, { "epoch": 0.2616587951534642, "grad_norm": 474.48828125, "learning_rate": 9.248911544918559e-06, "loss": 35.5178, "step": 129530 }, { "epoch": 0.261678995786148, "grad_norm": 339.6426696777344, "learning_rate": 9.248727530161417e-06, "loss": 16.1868, "step": 129540 }, { "epoch": 0.2616991964188318, "grad_norm": 502.6196594238281, "learning_rate": 9.248543494696493e-06, "loss": 32.4252, "step": 129550 }, { "epoch": 0.26171939705151565, "grad_norm": 104.3818359375, "learning_rate": 9.248359438524683e-06, "loss": 17.978, "step": 129560 }, { "epoch": 0.26173959768419947, "grad_norm": 536.156005859375, "learning_rate": 9.248175361646884e-06, "loss": 20.8205, "step": 129570 }, { "epoch": 0.2617597983168833, "grad_norm": 112.09039306640625, "learning_rate": 9.247991264063994e-06, "loss": 12.219, "step": 129580 }, { "epoch": 0.2617799989495671, "grad_norm": 381.011962890625, "learning_rate": 9.247807145776909e-06, "loss": 23.9607, "step": 129590 }, { "epoch": 0.2618001995822509, "grad_norm": 86.920166015625, "learning_rate": 9.247623006786529e-06, "loss": 16.1657, "step": 129600 }, { "epoch": 0.26182040021493475, "grad_norm": 379.3994140625, "learning_rate": 9.247438847093747e-06, "loss": 22.3879, "step": 129610 }, { "epoch": 0.26184060084761857, "grad_norm": 583.8914184570312, "learning_rate": 9.247254666699465e-06, "loss": 21.4429, "step": 129620 }, { "epoch": 0.2618608014803024, "grad_norm": 155.83740234375, "learning_rate": 9.247070465604578e-06, "loss": 22.1425, "step": 129630 }, { "epoch": 0.2618810021129862, "grad_norm": 0.0, "learning_rate": 9.246886243809985e-06, "loss": 25.3997, "step": 129640 }, { "epoch": 0.26190120274566997, "grad_norm": 722.5323486328125, "learning_rate": 9.246702001316584e-06, "loss": 23.2897, "step": 129650 }, { "epoch": 0.2619214033783538, "grad_norm": 293.03741455078125, "learning_rate": 9.246517738125271e-06, "loss": 18.687, "step": 129660 }, { "epoch": 0.2619416040110376, "grad_norm": 763.05859375, "learning_rate": 9.246333454236946e-06, "loss": 30.6125, "step": 129670 }, { "epoch": 0.26196180464372143, "grad_norm": 316.6152038574219, "learning_rate": 9.246149149652507e-06, "loss": 24.5053, "step": 129680 }, { "epoch": 0.26198200527640525, "grad_norm": 319.44903564453125, "learning_rate": 9.245964824372855e-06, "loss": 20.3691, "step": 129690 }, { "epoch": 0.26200220590908907, "grad_norm": 428.3174133300781, "learning_rate": 9.245780478398883e-06, "loss": 29.8476, "step": 129700 }, { "epoch": 0.2620224065417729, "grad_norm": 305.292236328125, "learning_rate": 9.245596111731492e-06, "loss": 15.4894, "step": 129710 }, { "epoch": 0.2620426071744567, "grad_norm": 107.9541015625, "learning_rate": 9.245411724371578e-06, "loss": 18.3254, "step": 129720 }, { "epoch": 0.26206280780714053, "grad_norm": 519.9327392578125, "learning_rate": 9.245227316320046e-06, "loss": 14.6097, "step": 129730 }, { "epoch": 0.26208300843982435, "grad_norm": 408.0529479980469, "learning_rate": 9.245042887577789e-06, "loss": 22.4248, "step": 129740 }, { "epoch": 0.26210320907250817, "grad_norm": 772.3802490234375, "learning_rate": 9.244858438145709e-06, "loss": 46.9441, "step": 129750 }, { "epoch": 0.262123409705192, "grad_norm": 654.0269775390625, "learning_rate": 9.244673968024701e-06, "loss": 31.8628, "step": 129760 }, { "epoch": 0.2621436103378758, "grad_norm": 159.417724609375, "learning_rate": 9.24448947721567e-06, "loss": 22.3518, "step": 129770 }, { "epoch": 0.2621638109705596, "grad_norm": 395.1319274902344, "learning_rate": 9.24430496571951e-06, "loss": 15.4356, "step": 129780 }, { "epoch": 0.2621840116032434, "grad_norm": 196.5967559814453, "learning_rate": 9.244120433537126e-06, "loss": 13.7517, "step": 129790 }, { "epoch": 0.2622042122359272, "grad_norm": 434.8796081542969, "learning_rate": 9.24393588066941e-06, "loss": 33.0948, "step": 129800 }, { "epoch": 0.26222441286861103, "grad_norm": 1161.2562255859375, "learning_rate": 9.243751307117266e-06, "loss": 22.9127, "step": 129810 }, { "epoch": 0.26224461350129485, "grad_norm": 59.643428802490234, "learning_rate": 9.243566712881593e-06, "loss": 17.4929, "step": 129820 }, { "epoch": 0.2622648141339787, "grad_norm": 390.9410400390625, "learning_rate": 9.243382097963292e-06, "loss": 18.3477, "step": 129830 }, { "epoch": 0.2622850147666625, "grad_norm": 0.0, "learning_rate": 9.24319746236326e-06, "loss": 15.5247, "step": 129840 }, { "epoch": 0.2623052153993463, "grad_norm": 256.6327209472656, "learning_rate": 9.243012806082398e-06, "loss": 36.769, "step": 129850 }, { "epoch": 0.26232541603203013, "grad_norm": 471.3330383300781, "learning_rate": 9.242828129121607e-06, "loss": 15.0537, "step": 129860 }, { "epoch": 0.26234561666471395, "grad_norm": 514.5746459960938, "learning_rate": 9.242643431481783e-06, "loss": 26.198, "step": 129870 }, { "epoch": 0.2623658172973978, "grad_norm": 481.345458984375, "learning_rate": 9.242458713163834e-06, "loss": 22.4653, "step": 129880 }, { "epoch": 0.2623860179300816, "grad_norm": 155.65939331054688, "learning_rate": 9.242273974168655e-06, "loss": 30.5696, "step": 129890 }, { "epoch": 0.26240621856276536, "grad_norm": 383.5204162597656, "learning_rate": 9.242089214497146e-06, "loss": 36.1662, "step": 129900 }, { "epoch": 0.2624264191954492, "grad_norm": 235.37841796875, "learning_rate": 9.241904434150208e-06, "loss": 29.4283, "step": 129910 }, { "epoch": 0.262446619828133, "grad_norm": 1004.9755249023438, "learning_rate": 9.241719633128743e-06, "loss": 55.9024, "step": 129920 }, { "epoch": 0.2624668204608168, "grad_norm": 759.134765625, "learning_rate": 9.241534811433651e-06, "loss": 29.0743, "step": 129930 }, { "epoch": 0.26248702109350064, "grad_norm": 522.8464965820312, "learning_rate": 9.241349969065834e-06, "loss": 14.5997, "step": 129940 }, { "epoch": 0.26250722172618446, "grad_norm": 476.12823486328125, "learning_rate": 9.241165106026189e-06, "loss": 32.0307, "step": 129950 }, { "epoch": 0.2625274223588683, "grad_norm": 395.36431884765625, "learning_rate": 9.24098022231562e-06, "loss": 29.4751, "step": 129960 }, { "epoch": 0.2625476229915521, "grad_norm": 204.78297424316406, "learning_rate": 9.24079531793503e-06, "loss": 25.3252, "step": 129970 }, { "epoch": 0.2625678236242359, "grad_norm": 29.498233795166016, "learning_rate": 9.24061039288532e-06, "loss": 14.5478, "step": 129980 }, { "epoch": 0.26258802425691974, "grad_norm": 4.016162872314453, "learning_rate": 9.240425447167384e-06, "loss": 22.3436, "step": 129990 }, { "epoch": 0.26260822488960356, "grad_norm": 320.4252014160156, "learning_rate": 9.24024048078213e-06, "loss": 31.1223, "step": 130000 }, { "epoch": 0.2626284255222874, "grad_norm": 149.36740112304688, "learning_rate": 9.24005549373046e-06, "loss": 11.1488, "step": 130010 }, { "epoch": 0.2626486261549712, "grad_norm": 415.9408264160156, "learning_rate": 9.239870486013272e-06, "loss": 30.8264, "step": 130020 }, { "epoch": 0.26266882678765496, "grad_norm": 311.9599609375, "learning_rate": 9.23968545763147e-06, "loss": 23.694, "step": 130030 }, { "epoch": 0.2626890274203388, "grad_norm": 177.8672637939453, "learning_rate": 9.239500408585956e-06, "loss": 24.0172, "step": 130040 }, { "epoch": 0.2627092280530226, "grad_norm": 159.47459411621094, "learning_rate": 9.239315338877632e-06, "loss": 16.004, "step": 130050 }, { "epoch": 0.2627294286857064, "grad_norm": 143.70700073242188, "learning_rate": 9.239130248507398e-06, "loss": 10.5977, "step": 130060 }, { "epoch": 0.26274962931839024, "grad_norm": 714.693603515625, "learning_rate": 9.238945137476157e-06, "loss": 23.609, "step": 130070 }, { "epoch": 0.26276982995107406, "grad_norm": 313.3095703125, "learning_rate": 9.23876000578481e-06, "loss": 29.8307, "step": 130080 }, { "epoch": 0.2627900305837579, "grad_norm": 882.6751708984375, "learning_rate": 9.238574853434264e-06, "loss": 17.9319, "step": 130090 }, { "epoch": 0.2628102312164417, "grad_norm": 421.0577087402344, "learning_rate": 9.238389680425417e-06, "loss": 31.1081, "step": 130100 }, { "epoch": 0.2628304318491255, "grad_norm": 305.2528991699219, "learning_rate": 9.238204486759172e-06, "loss": 28.0084, "step": 130110 }, { "epoch": 0.26285063248180934, "grad_norm": 252.5142364501953, "learning_rate": 9.238019272436434e-06, "loss": 24.6949, "step": 130120 }, { "epoch": 0.26287083311449316, "grad_norm": 502.39666748046875, "learning_rate": 9.237834037458102e-06, "loss": 16.0274, "step": 130130 }, { "epoch": 0.262891033747177, "grad_norm": 710.8877563476562, "learning_rate": 9.237648781825082e-06, "loss": 31.8371, "step": 130140 }, { "epoch": 0.2629112343798608, "grad_norm": 328.3483581542969, "learning_rate": 9.237463505538277e-06, "loss": 29.8537, "step": 130150 }, { "epoch": 0.26293143501254457, "grad_norm": 227.37306213378906, "learning_rate": 9.237278208598587e-06, "loss": 22.7519, "step": 130160 }, { "epoch": 0.2629516356452284, "grad_norm": 210.96592712402344, "learning_rate": 9.237092891006918e-06, "loss": 17.8267, "step": 130170 }, { "epoch": 0.2629718362779122, "grad_norm": 525.88037109375, "learning_rate": 9.236907552764171e-06, "loss": 16.9909, "step": 130180 }, { "epoch": 0.262992036910596, "grad_norm": 43.60026931762695, "learning_rate": 9.236722193871252e-06, "loss": 16.2976, "step": 130190 }, { "epoch": 0.26301223754327985, "grad_norm": 512.9727172851562, "learning_rate": 9.236536814329062e-06, "loss": 21.3238, "step": 130200 }, { "epoch": 0.26303243817596367, "grad_norm": 68.06608581542969, "learning_rate": 9.236351414138505e-06, "loss": 14.0629, "step": 130210 }, { "epoch": 0.2630526388086475, "grad_norm": 297.6448669433594, "learning_rate": 9.236165993300486e-06, "loss": 15.0702, "step": 130220 }, { "epoch": 0.2630728394413313, "grad_norm": 377.9416809082031, "learning_rate": 9.235980551815907e-06, "loss": 14.0996, "step": 130230 }, { "epoch": 0.2630930400740151, "grad_norm": 254.04336547851562, "learning_rate": 9.235795089685673e-06, "loss": 25.687, "step": 130240 }, { "epoch": 0.26311324070669895, "grad_norm": 416.20892333984375, "learning_rate": 9.235609606910687e-06, "loss": 22.9083, "step": 130250 }, { "epoch": 0.26313344133938277, "grad_norm": 98.65184783935547, "learning_rate": 9.235424103491853e-06, "loss": 21.522, "step": 130260 }, { "epoch": 0.2631536419720666, "grad_norm": 712.9963989257812, "learning_rate": 9.235238579430077e-06, "loss": 20.6476, "step": 130270 }, { "epoch": 0.2631738426047504, "grad_norm": 431.2131042480469, "learning_rate": 9.235053034726261e-06, "loss": 16.2226, "step": 130280 }, { "epoch": 0.26319404323743417, "grad_norm": 429.3744812011719, "learning_rate": 9.23486746938131e-06, "loss": 23.2903, "step": 130290 }, { "epoch": 0.263214243870118, "grad_norm": 937.98583984375, "learning_rate": 9.234681883396129e-06, "loss": 16.3498, "step": 130300 }, { "epoch": 0.2632344445028018, "grad_norm": 419.97271728515625, "learning_rate": 9.234496276771622e-06, "loss": 12.2704, "step": 130310 }, { "epoch": 0.26325464513548563, "grad_norm": 261.0768127441406, "learning_rate": 9.234310649508694e-06, "loss": 18.5051, "step": 130320 }, { "epoch": 0.26327484576816945, "grad_norm": 640.5191650390625, "learning_rate": 9.23412500160825e-06, "loss": 30.3407, "step": 130330 }, { "epoch": 0.26329504640085327, "grad_norm": 0.0, "learning_rate": 9.233939333071193e-06, "loss": 20.475, "step": 130340 }, { "epoch": 0.2633152470335371, "grad_norm": 270.3042297363281, "learning_rate": 9.233753643898428e-06, "loss": 20.3726, "step": 130350 }, { "epoch": 0.2633354476662209, "grad_norm": 309.3485107421875, "learning_rate": 9.233567934090864e-06, "loss": 10.1988, "step": 130360 }, { "epoch": 0.26335564829890473, "grad_norm": 881.8538208007812, "learning_rate": 9.233382203649402e-06, "loss": 43.7343, "step": 130370 }, { "epoch": 0.26337584893158855, "grad_norm": 261.611572265625, "learning_rate": 9.23319645257495e-06, "loss": 19.0534, "step": 130380 }, { "epoch": 0.26339604956427237, "grad_norm": 305.3958435058594, "learning_rate": 9.233010680868409e-06, "loss": 22.8678, "step": 130390 }, { "epoch": 0.2634162501969562, "grad_norm": 885.7476806640625, "learning_rate": 9.232824888530689e-06, "loss": 20.3841, "step": 130400 }, { "epoch": 0.26343645082964, "grad_norm": 751.0313110351562, "learning_rate": 9.232639075562695e-06, "loss": 25.4823, "step": 130410 }, { "epoch": 0.2634566514623238, "grad_norm": 535.940673828125, "learning_rate": 9.23245324196533e-06, "loss": 33.4305, "step": 130420 }, { "epoch": 0.2634768520950076, "grad_norm": 330.66009521484375, "learning_rate": 9.232267387739502e-06, "loss": 24.7655, "step": 130430 }, { "epoch": 0.2634970527276914, "grad_norm": 475.0093078613281, "learning_rate": 9.232081512886116e-06, "loss": 17.1832, "step": 130440 }, { "epoch": 0.26351725336037524, "grad_norm": 1036.0198974609375, "learning_rate": 9.231895617406076e-06, "loss": 17.8261, "step": 130450 }, { "epoch": 0.26353745399305906, "grad_norm": 1016.4810180664062, "learning_rate": 9.231709701300293e-06, "loss": 38.8252, "step": 130460 }, { "epoch": 0.2635576546257429, "grad_norm": 725.9237060546875, "learning_rate": 9.23152376456967e-06, "loss": 24.6006, "step": 130470 }, { "epoch": 0.2635778552584267, "grad_norm": 305.17742919921875, "learning_rate": 9.231337807215111e-06, "loss": 23.3218, "step": 130480 }, { "epoch": 0.2635980558911105, "grad_norm": 383.8266296386719, "learning_rate": 9.231151829237527e-06, "loss": 30.612, "step": 130490 }, { "epoch": 0.26361825652379434, "grad_norm": 311.66461181640625, "learning_rate": 9.230965830637821e-06, "loss": 16.6466, "step": 130500 }, { "epoch": 0.26363845715647816, "grad_norm": 391.4200439453125, "learning_rate": 9.230779811416901e-06, "loss": 13.5666, "step": 130510 }, { "epoch": 0.263658657789162, "grad_norm": 273.90948486328125, "learning_rate": 9.230593771575673e-06, "loss": 19.3705, "step": 130520 }, { "epoch": 0.2636788584218458, "grad_norm": 291.2491760253906, "learning_rate": 9.230407711115043e-06, "loss": 14.9211, "step": 130530 }, { "epoch": 0.26369905905452956, "grad_norm": 964.318115234375, "learning_rate": 9.230221630035921e-06, "loss": 18.9726, "step": 130540 }, { "epoch": 0.2637192596872134, "grad_norm": 253.50123596191406, "learning_rate": 9.230035528339212e-06, "loss": 15.7515, "step": 130550 }, { "epoch": 0.2637394603198972, "grad_norm": 256.3843688964844, "learning_rate": 9.229849406025821e-06, "loss": 24.3174, "step": 130560 }, { "epoch": 0.263759660952581, "grad_norm": 745.5055541992188, "learning_rate": 9.22966326309666e-06, "loss": 23.5996, "step": 130570 }, { "epoch": 0.26377986158526484, "grad_norm": 312.48614501953125, "learning_rate": 9.22947709955263e-06, "loss": 9.0544, "step": 130580 }, { "epoch": 0.26380006221794866, "grad_norm": 367.24407958984375, "learning_rate": 9.229290915394643e-06, "loss": 22.1031, "step": 130590 }, { "epoch": 0.2638202628506325, "grad_norm": 169.04710388183594, "learning_rate": 9.229104710623604e-06, "loss": 17.1724, "step": 130600 }, { "epoch": 0.2638404634833163, "grad_norm": 228.10475158691406, "learning_rate": 9.228918485240423e-06, "loss": 16.7043, "step": 130610 }, { "epoch": 0.2638606641160001, "grad_norm": 189.06100463867188, "learning_rate": 9.228732239246005e-06, "loss": 16.8849, "step": 130620 }, { "epoch": 0.26388086474868394, "grad_norm": 416.0789489746094, "learning_rate": 9.22854597264126e-06, "loss": 13.3817, "step": 130630 }, { "epoch": 0.26390106538136776, "grad_norm": 662.724609375, "learning_rate": 9.228359685427095e-06, "loss": 47.7258, "step": 130640 }, { "epoch": 0.2639212660140516, "grad_norm": 282.7579650878906, "learning_rate": 9.228173377604417e-06, "loss": 20.3818, "step": 130650 }, { "epoch": 0.2639414666467354, "grad_norm": 272.92059326171875, "learning_rate": 9.227987049174133e-06, "loss": 15.7969, "step": 130660 }, { "epoch": 0.26396166727941917, "grad_norm": 249.759521484375, "learning_rate": 9.227800700137156e-06, "loss": 39.5092, "step": 130670 }, { "epoch": 0.263981867912103, "grad_norm": 421.3340148925781, "learning_rate": 9.22761433049439e-06, "loss": 15.4833, "step": 130680 }, { "epoch": 0.2640020685447868, "grad_norm": 301.9707946777344, "learning_rate": 9.227427940246744e-06, "loss": 17.5809, "step": 130690 }, { "epoch": 0.2640222691774706, "grad_norm": 400.67236328125, "learning_rate": 9.227241529395127e-06, "loss": 26.925, "step": 130700 }, { "epoch": 0.26404246981015445, "grad_norm": 619.1936645507812, "learning_rate": 9.22705509794045e-06, "loss": 19.8829, "step": 130710 }, { "epoch": 0.26406267044283827, "grad_norm": 857.1577758789062, "learning_rate": 9.226868645883616e-06, "loss": 34.858, "step": 130720 }, { "epoch": 0.2640828710755221, "grad_norm": 753.0050048828125, "learning_rate": 9.226682173225537e-06, "loss": 16.9288, "step": 130730 }, { "epoch": 0.2641030717082059, "grad_norm": 195.84051513671875, "learning_rate": 9.226495679967123e-06, "loss": 29.0746, "step": 130740 }, { "epoch": 0.2641232723408897, "grad_norm": 890.2536010742188, "learning_rate": 9.226309166109281e-06, "loss": 37.294, "step": 130750 }, { "epoch": 0.26414347297357355, "grad_norm": 576.2210693359375, "learning_rate": 9.226122631652921e-06, "loss": 26.1176, "step": 130760 }, { "epoch": 0.26416367360625737, "grad_norm": 340.2896728515625, "learning_rate": 9.225936076598952e-06, "loss": 13.9397, "step": 130770 }, { "epoch": 0.2641838742389412, "grad_norm": 226.89231872558594, "learning_rate": 9.225749500948283e-06, "loss": 16.8131, "step": 130780 }, { "epoch": 0.264204074871625, "grad_norm": 303.71234130859375, "learning_rate": 9.225562904701823e-06, "loss": 37.1931, "step": 130790 }, { "epoch": 0.26422427550430877, "grad_norm": 440.4466552734375, "learning_rate": 9.225376287860484e-06, "loss": 25.3327, "step": 130800 }, { "epoch": 0.2642444761369926, "grad_norm": 573.6793212890625, "learning_rate": 9.22518965042517e-06, "loss": 28.6933, "step": 130810 }, { "epoch": 0.2642646767696764, "grad_norm": 279.36041259765625, "learning_rate": 9.225002992396797e-06, "loss": 22.1667, "step": 130820 }, { "epoch": 0.26428487740236023, "grad_norm": 210.71812438964844, "learning_rate": 9.22481631377627e-06, "loss": 14.5621, "step": 130830 }, { "epoch": 0.26430507803504405, "grad_norm": 578.2293701171875, "learning_rate": 9.224629614564502e-06, "loss": 15.9755, "step": 130840 }, { "epoch": 0.26432527866772787, "grad_norm": 100.01342010498047, "learning_rate": 9.224442894762401e-06, "loss": 24.9578, "step": 130850 }, { "epoch": 0.2643454793004117, "grad_norm": 2.627403974533081, "learning_rate": 9.224256154370878e-06, "loss": 21.8294, "step": 130860 }, { "epoch": 0.2643656799330955, "grad_norm": 225.63209533691406, "learning_rate": 9.224069393390843e-06, "loss": 25.4549, "step": 130870 }, { "epoch": 0.26438588056577933, "grad_norm": 549.8444213867188, "learning_rate": 9.223882611823205e-06, "loss": 32.2733, "step": 130880 }, { "epoch": 0.26440608119846315, "grad_norm": 439.44549560546875, "learning_rate": 9.223695809668876e-06, "loss": 19.6963, "step": 130890 }, { "epoch": 0.26442628183114697, "grad_norm": 125.24061584472656, "learning_rate": 9.223508986928766e-06, "loss": 18.845, "step": 130900 }, { "epoch": 0.2644464824638308, "grad_norm": 212.72653198242188, "learning_rate": 9.223322143603786e-06, "loss": 27.8434, "step": 130910 }, { "epoch": 0.2644666830965146, "grad_norm": 1187.4769287109375, "learning_rate": 9.223135279694845e-06, "loss": 17.0523, "step": 130920 }, { "epoch": 0.2644868837291984, "grad_norm": 213.18798828125, "learning_rate": 9.222948395202855e-06, "loss": 18.4288, "step": 130930 }, { "epoch": 0.2645070843618822, "grad_norm": 890.8534545898438, "learning_rate": 9.222761490128726e-06, "loss": 28.4153, "step": 130940 }, { "epoch": 0.264527284994566, "grad_norm": 285.16400146484375, "learning_rate": 9.222574564473372e-06, "loss": 23.9872, "step": 130950 }, { "epoch": 0.26454748562724983, "grad_norm": 436.7076110839844, "learning_rate": 9.222387618237701e-06, "loss": 28.0928, "step": 130960 }, { "epoch": 0.26456768625993365, "grad_norm": 523.7005004882812, "learning_rate": 9.222200651422624e-06, "loss": 18.3557, "step": 130970 }, { "epoch": 0.2645878868926175, "grad_norm": 363.5169372558594, "learning_rate": 9.222013664029053e-06, "loss": 17.662, "step": 130980 }, { "epoch": 0.2646080875253013, "grad_norm": 249.8449249267578, "learning_rate": 9.2218266560579e-06, "loss": 19.4438, "step": 130990 }, { "epoch": 0.2646282881579851, "grad_norm": 404.82635498046875, "learning_rate": 9.221639627510076e-06, "loss": 14.5118, "step": 131000 }, { "epoch": 0.26464848879066893, "grad_norm": 347.28900146484375, "learning_rate": 9.221452578386492e-06, "loss": 29.3885, "step": 131010 }, { "epoch": 0.26466868942335275, "grad_norm": 310.66021728515625, "learning_rate": 9.221265508688061e-06, "loss": 17.9626, "step": 131020 }, { "epoch": 0.2646888900560366, "grad_norm": 421.09246826171875, "learning_rate": 9.221078418415692e-06, "loss": 19.3031, "step": 131030 }, { "epoch": 0.2647090906887204, "grad_norm": 373.6748352050781, "learning_rate": 9.220891307570301e-06, "loss": 11.5313, "step": 131040 }, { "epoch": 0.26472929132140416, "grad_norm": 375.723876953125, "learning_rate": 9.220704176152798e-06, "loss": 16.8964, "step": 131050 }, { "epoch": 0.264749491954088, "grad_norm": 82.238525390625, "learning_rate": 9.220517024164092e-06, "loss": 14.9036, "step": 131060 }, { "epoch": 0.2647696925867718, "grad_norm": 649.5223999023438, "learning_rate": 9.2203298516051e-06, "loss": 16.9638, "step": 131070 }, { "epoch": 0.2647898932194556, "grad_norm": 438.2021789550781, "learning_rate": 9.220142658476732e-06, "loss": 24.1164, "step": 131080 }, { "epoch": 0.26481009385213944, "grad_norm": 409.599365234375, "learning_rate": 9.2199554447799e-06, "loss": 13.8918, "step": 131090 }, { "epoch": 0.26483029448482326, "grad_norm": 563.10546875, "learning_rate": 9.219768210515518e-06, "loss": 19.8584, "step": 131100 }, { "epoch": 0.2648504951175071, "grad_norm": 448.8885803222656, "learning_rate": 9.219580955684495e-06, "loss": 30.1777, "step": 131110 }, { "epoch": 0.2648706957501909, "grad_norm": 115.77897644042969, "learning_rate": 9.21939368028775e-06, "loss": 25.812, "step": 131120 }, { "epoch": 0.2648908963828747, "grad_norm": 526.2110595703125, "learning_rate": 9.21920638432619e-06, "loss": 17.335, "step": 131130 }, { "epoch": 0.26491109701555854, "grad_norm": 590.3601684570312, "learning_rate": 9.219019067800728e-06, "loss": 31.8216, "step": 131140 }, { "epoch": 0.26493129764824236, "grad_norm": 239.01412963867188, "learning_rate": 9.218831730712281e-06, "loss": 25.5872, "step": 131150 }, { "epoch": 0.2649514982809262, "grad_norm": 281.72625732421875, "learning_rate": 9.218644373061759e-06, "loss": 28.0989, "step": 131160 }, { "epoch": 0.26497169891361, "grad_norm": 679.1547241210938, "learning_rate": 9.218456994850076e-06, "loss": 27.0523, "step": 131170 }, { "epoch": 0.26499189954629376, "grad_norm": 516.555908203125, "learning_rate": 9.218269596078145e-06, "loss": 18.7261, "step": 131180 }, { "epoch": 0.2650121001789776, "grad_norm": 611.985595703125, "learning_rate": 9.21808217674688e-06, "loss": 29.438, "step": 131190 }, { "epoch": 0.2650323008116614, "grad_norm": 415.1947326660156, "learning_rate": 9.217894736857195e-06, "loss": 32.3591, "step": 131200 }, { "epoch": 0.2650525014443452, "grad_norm": 424.1458435058594, "learning_rate": 9.217707276410002e-06, "loss": 19.8318, "step": 131210 }, { "epoch": 0.26507270207702904, "grad_norm": 426.78851318359375, "learning_rate": 9.217519795406214e-06, "loss": 18.5401, "step": 131220 }, { "epoch": 0.26509290270971286, "grad_norm": 233.7823486328125, "learning_rate": 9.217332293846747e-06, "loss": 15.6772, "step": 131230 }, { "epoch": 0.2651131033423967, "grad_norm": 418.2563781738281, "learning_rate": 9.217144771732515e-06, "loss": 17.4979, "step": 131240 }, { "epoch": 0.2651333039750805, "grad_norm": 775.1187133789062, "learning_rate": 9.21695722906443e-06, "loss": 35.5911, "step": 131250 }, { "epoch": 0.2651535046077643, "grad_norm": 476.4820556640625, "learning_rate": 9.216769665843406e-06, "loss": 20.8171, "step": 131260 }, { "epoch": 0.26517370524044814, "grad_norm": 411.09490966796875, "learning_rate": 9.216582082070359e-06, "loss": 25.5806, "step": 131270 }, { "epoch": 0.26519390587313196, "grad_norm": 180.24594116210938, "learning_rate": 9.216394477746202e-06, "loss": 22.324, "step": 131280 }, { "epoch": 0.2652141065058158, "grad_norm": 604.4251098632812, "learning_rate": 9.21620685287185e-06, "loss": 23.6203, "step": 131290 }, { "epoch": 0.2652343071384996, "grad_norm": 639.6996459960938, "learning_rate": 9.216019207448216e-06, "loss": 25.4689, "step": 131300 }, { "epoch": 0.26525450777118337, "grad_norm": 288.1322937011719, "learning_rate": 9.215831541476217e-06, "loss": 27.2807, "step": 131310 }, { "epoch": 0.2652747084038672, "grad_norm": 364.5860900878906, "learning_rate": 9.215643854956766e-06, "loss": 34.1475, "step": 131320 }, { "epoch": 0.265294909036551, "grad_norm": 18.383115768432617, "learning_rate": 9.215456147890778e-06, "loss": 25.5754, "step": 131330 }, { "epoch": 0.2653151096692348, "grad_norm": 436.0059509277344, "learning_rate": 9.215268420279168e-06, "loss": 24.1076, "step": 131340 }, { "epoch": 0.26533531030191865, "grad_norm": 48.3547248840332, "learning_rate": 9.215080672122854e-06, "loss": 16.4566, "step": 131350 }, { "epoch": 0.26535551093460247, "grad_norm": 45.05363082885742, "learning_rate": 9.214892903422745e-06, "loss": 26.8547, "step": 131360 }, { "epoch": 0.2653757115672863, "grad_norm": 28.296865463256836, "learning_rate": 9.214705114179759e-06, "loss": 24.4135, "step": 131370 }, { "epoch": 0.2653959121999701, "grad_norm": 1304.309814453125, "learning_rate": 9.214517304394813e-06, "loss": 23.6039, "step": 131380 }, { "epoch": 0.2654161128326539, "grad_norm": 82.30618286132812, "learning_rate": 9.214329474068818e-06, "loss": 15.2886, "step": 131390 }, { "epoch": 0.26543631346533775, "grad_norm": 674.7920532226562, "learning_rate": 9.214141623202694e-06, "loss": 29.9275, "step": 131400 }, { "epoch": 0.26545651409802157, "grad_norm": 362.3443603515625, "learning_rate": 9.213953751797355e-06, "loss": 15.6839, "step": 131410 }, { "epoch": 0.2654767147307054, "grad_norm": 501.14617919921875, "learning_rate": 9.213765859853717e-06, "loss": 16.9059, "step": 131420 }, { "epoch": 0.2654969153633892, "grad_norm": 593.780517578125, "learning_rate": 9.213577947372694e-06, "loss": 25.2237, "step": 131430 }, { "epoch": 0.26551711599607297, "grad_norm": 464.27447509765625, "learning_rate": 9.213390014355204e-06, "loss": 31.1519, "step": 131440 }, { "epoch": 0.2655373166287568, "grad_norm": 238.30935668945312, "learning_rate": 9.213202060802162e-06, "loss": 51.9815, "step": 131450 }, { "epoch": 0.2655575172614406, "grad_norm": 99.66514587402344, "learning_rate": 9.213014086714484e-06, "loss": 20.4605, "step": 131460 }, { "epoch": 0.26557771789412443, "grad_norm": 178.2960205078125, "learning_rate": 9.212826092093085e-06, "loss": 15.421, "step": 131470 }, { "epoch": 0.26559791852680825, "grad_norm": 472.04132080078125, "learning_rate": 9.212638076938885e-06, "loss": 19.2474, "step": 131480 }, { "epoch": 0.26561811915949207, "grad_norm": 318.9012451171875, "learning_rate": 9.212450041252797e-06, "loss": 26.4302, "step": 131490 }, { "epoch": 0.2656383197921759, "grad_norm": 324.2199401855469, "learning_rate": 9.21226198503574e-06, "loss": 23.9791, "step": 131500 }, { "epoch": 0.2656585204248597, "grad_norm": 645.5361938476562, "learning_rate": 9.212073908288626e-06, "loss": 16.8702, "step": 131510 }, { "epoch": 0.26567872105754353, "grad_norm": 997.70751953125, "learning_rate": 9.211885811012376e-06, "loss": 20.7685, "step": 131520 }, { "epoch": 0.26569892169022735, "grad_norm": 432.2757263183594, "learning_rate": 9.211697693207905e-06, "loss": 30.3959, "step": 131530 }, { "epoch": 0.26571912232291117, "grad_norm": 308.8063049316406, "learning_rate": 9.21150955487613e-06, "loss": 19.4911, "step": 131540 }, { "epoch": 0.265739322955595, "grad_norm": 431.20013427734375, "learning_rate": 9.21132139601797e-06, "loss": 40.1249, "step": 131550 }, { "epoch": 0.2657595235882788, "grad_norm": 701.2637329101562, "learning_rate": 9.211133216634339e-06, "loss": 26.772, "step": 131560 }, { "epoch": 0.2657797242209626, "grad_norm": 452.8524169921875, "learning_rate": 9.210945016726155e-06, "loss": 18.2151, "step": 131570 }, { "epoch": 0.2657999248536464, "grad_norm": 106.77520751953125, "learning_rate": 9.210756796294335e-06, "loss": 27.5683, "step": 131580 }, { "epoch": 0.2658201254863302, "grad_norm": 386.91998291015625, "learning_rate": 9.2105685553398e-06, "loss": 35.6824, "step": 131590 }, { "epoch": 0.26584032611901404, "grad_norm": 327.1443176269531, "learning_rate": 9.210380293863462e-06, "loss": 24.1842, "step": 131600 }, { "epoch": 0.26586052675169786, "grad_norm": 212.36473083496094, "learning_rate": 9.210192011866242e-06, "loss": 30.5665, "step": 131610 }, { "epoch": 0.2658807273843817, "grad_norm": 524.0457153320312, "learning_rate": 9.210003709349058e-06, "loss": 28.7755, "step": 131620 }, { "epoch": 0.2659009280170655, "grad_norm": 935.5753173828125, "learning_rate": 9.209815386312824e-06, "loss": 27.792, "step": 131630 }, { "epoch": 0.2659211286497493, "grad_norm": 70.88928985595703, "learning_rate": 9.209627042758462e-06, "loss": 17.7293, "step": 131640 }, { "epoch": 0.26594132928243314, "grad_norm": 119.21331024169922, "learning_rate": 9.209438678686888e-06, "loss": 18.8387, "step": 131650 }, { "epoch": 0.26596152991511696, "grad_norm": 415.9164733886719, "learning_rate": 9.209250294099021e-06, "loss": 23.0434, "step": 131660 }, { "epoch": 0.2659817305478008, "grad_norm": 566.3864135742188, "learning_rate": 9.209061888995777e-06, "loss": 14.7185, "step": 131670 }, { "epoch": 0.2660019311804846, "grad_norm": 713.2189331054688, "learning_rate": 9.208873463378078e-06, "loss": 20.9568, "step": 131680 }, { "epoch": 0.26602213181316836, "grad_norm": 374.5089416503906, "learning_rate": 9.208685017246839e-06, "loss": 20.8218, "step": 131690 }, { "epoch": 0.2660423324458522, "grad_norm": 434.3398132324219, "learning_rate": 9.208496550602979e-06, "loss": 24.3201, "step": 131700 }, { "epoch": 0.266062533078536, "grad_norm": 401.7296142578125, "learning_rate": 9.208308063447418e-06, "loss": 17.9843, "step": 131710 }, { "epoch": 0.2660827337112198, "grad_norm": 180.731689453125, "learning_rate": 9.208119555781074e-06, "loss": 24.8958, "step": 131720 }, { "epoch": 0.26610293434390364, "grad_norm": 126.87080383300781, "learning_rate": 9.207931027604867e-06, "loss": 15.78, "step": 131730 }, { "epoch": 0.26612313497658746, "grad_norm": 481.7561950683594, "learning_rate": 9.207742478919713e-06, "loss": 17.5088, "step": 131740 }, { "epoch": 0.2661433356092713, "grad_norm": 46.55644989013672, "learning_rate": 9.207553909726532e-06, "loss": 19.1678, "step": 131750 }, { "epoch": 0.2661635362419551, "grad_norm": 709.1443481445312, "learning_rate": 9.207365320026244e-06, "loss": 26.2364, "step": 131760 }, { "epoch": 0.2661837368746389, "grad_norm": 76.94552612304688, "learning_rate": 9.207176709819768e-06, "loss": 21.8475, "step": 131770 }, { "epoch": 0.26620393750732274, "grad_norm": 422.1698303222656, "learning_rate": 9.206988079108023e-06, "loss": 22.4855, "step": 131780 }, { "epoch": 0.26622413814000656, "grad_norm": 344.1897277832031, "learning_rate": 9.206799427891928e-06, "loss": 14.6986, "step": 131790 }, { "epoch": 0.2662443387726904, "grad_norm": 1501.330078125, "learning_rate": 9.206610756172402e-06, "loss": 38.14, "step": 131800 }, { "epoch": 0.2662645394053742, "grad_norm": 646.0186767578125, "learning_rate": 9.206422063950368e-06, "loss": 40.8852, "step": 131810 }, { "epoch": 0.26628474003805797, "grad_norm": 139.66990661621094, "learning_rate": 9.206233351226742e-06, "loss": 22.6399, "step": 131820 }, { "epoch": 0.2663049406707418, "grad_norm": 247.84478759765625, "learning_rate": 9.206044618002443e-06, "loss": 22.2184, "step": 131830 }, { "epoch": 0.2663251413034256, "grad_norm": 1052.5062255859375, "learning_rate": 9.205855864278394e-06, "loss": 26.3953, "step": 131840 }, { "epoch": 0.2663453419361094, "grad_norm": 373.505615234375, "learning_rate": 9.205667090055513e-06, "loss": 27.9726, "step": 131850 }, { "epoch": 0.26636554256879325, "grad_norm": 596.9949951171875, "learning_rate": 9.205478295334722e-06, "loss": 24.5974, "step": 131860 }, { "epoch": 0.26638574320147707, "grad_norm": 127.57281494140625, "learning_rate": 9.20528948011694e-06, "loss": 15.9013, "step": 131870 }, { "epoch": 0.2664059438341609, "grad_norm": 152.38870239257812, "learning_rate": 9.205100644403084e-06, "loss": 25.7809, "step": 131880 }, { "epoch": 0.2664261444668447, "grad_norm": 296.1481018066406, "learning_rate": 9.20491178819408e-06, "loss": 12.8902, "step": 131890 }, { "epoch": 0.2664463450995285, "grad_norm": 191.02944946289062, "learning_rate": 9.204722911490847e-06, "loss": 14.6816, "step": 131900 }, { "epoch": 0.26646654573221235, "grad_norm": 81.29207611083984, "learning_rate": 9.204534014294302e-06, "loss": 13.4297, "step": 131910 }, { "epoch": 0.26648674636489617, "grad_norm": 520.2135620117188, "learning_rate": 9.204345096605369e-06, "loss": 20.1724, "step": 131920 }, { "epoch": 0.26650694699758, "grad_norm": 408.005126953125, "learning_rate": 9.204156158424969e-06, "loss": 29.1905, "step": 131930 }, { "epoch": 0.2665271476302638, "grad_norm": 641.2085571289062, "learning_rate": 9.20396719975402e-06, "loss": 21.5415, "step": 131940 }, { "epoch": 0.26654734826294757, "grad_norm": 627.3342895507812, "learning_rate": 9.203778220593447e-06, "loss": 23.7676, "step": 131950 }, { "epoch": 0.2665675488956314, "grad_norm": 519.7887573242188, "learning_rate": 9.203589220944166e-06, "loss": 29.4992, "step": 131960 }, { "epoch": 0.2665877495283152, "grad_norm": 183.91842651367188, "learning_rate": 9.203400200807104e-06, "loss": 22.6261, "step": 131970 }, { "epoch": 0.26660795016099903, "grad_norm": 587.1560668945312, "learning_rate": 9.203211160183177e-06, "loss": 26.334, "step": 131980 }, { "epoch": 0.26662815079368285, "grad_norm": 272.0841369628906, "learning_rate": 9.20302209907331e-06, "loss": 23.0761, "step": 131990 }, { "epoch": 0.26664835142636667, "grad_norm": 351.566162109375, "learning_rate": 9.202833017478421e-06, "loss": 13.8574, "step": 132000 }, { "epoch": 0.2666685520590505, "grad_norm": 220.77423095703125, "learning_rate": 9.202643915399436e-06, "loss": 22.7003, "step": 132010 }, { "epoch": 0.2666887526917343, "grad_norm": 679.429443359375, "learning_rate": 9.202454792837273e-06, "loss": 17.5129, "step": 132020 }, { "epoch": 0.26670895332441813, "grad_norm": 395.39178466796875, "learning_rate": 9.202265649792856e-06, "loss": 27.1673, "step": 132030 }, { "epoch": 0.26672915395710195, "grad_norm": 610.6573486328125, "learning_rate": 9.202076486267106e-06, "loss": 46.3144, "step": 132040 }, { "epoch": 0.26674935458978577, "grad_norm": 292.31298828125, "learning_rate": 9.201887302260943e-06, "loss": 37.9318, "step": 132050 }, { "epoch": 0.2667695552224696, "grad_norm": 377.9103088378906, "learning_rate": 9.201698097775291e-06, "loss": 11.6902, "step": 132060 }, { "epoch": 0.2667897558551534, "grad_norm": 771.6697998046875, "learning_rate": 9.201508872811074e-06, "loss": 24.8368, "step": 132070 }, { "epoch": 0.2668099564878372, "grad_norm": 394.54052734375, "learning_rate": 9.201319627369211e-06, "loss": 23.2955, "step": 132080 }, { "epoch": 0.266830157120521, "grad_norm": 488.073486328125, "learning_rate": 9.201130361450627e-06, "loss": 17.0227, "step": 132090 }, { "epoch": 0.2668503577532048, "grad_norm": 606.3373413085938, "learning_rate": 9.200941075056242e-06, "loss": 15.1066, "step": 132100 }, { "epoch": 0.26687055838588863, "grad_norm": 695.5035400390625, "learning_rate": 9.20075176818698e-06, "loss": 19.2674, "step": 132110 }, { "epoch": 0.26689075901857245, "grad_norm": 316.67376708984375, "learning_rate": 9.200562440843763e-06, "loss": 24.817, "step": 132120 }, { "epoch": 0.2669109596512563, "grad_norm": 676.6490478515625, "learning_rate": 9.200373093027515e-06, "loss": 23.4508, "step": 132130 }, { "epoch": 0.2669311602839401, "grad_norm": 386.1720886230469, "learning_rate": 9.200183724739158e-06, "loss": 21.1068, "step": 132140 }, { "epoch": 0.2669513609166239, "grad_norm": 33.5107536315918, "learning_rate": 9.199994335979613e-06, "loss": 16.8658, "step": 132150 }, { "epoch": 0.26697156154930773, "grad_norm": 335.3405456542969, "learning_rate": 9.199804926749807e-06, "loss": 24.8775, "step": 132160 }, { "epoch": 0.26699176218199155, "grad_norm": 572.8211059570312, "learning_rate": 9.19961549705066e-06, "loss": 13.9328, "step": 132170 }, { "epoch": 0.2670119628146754, "grad_norm": 433.8130798339844, "learning_rate": 9.199426046883097e-06, "loss": 27.4959, "step": 132180 }, { "epoch": 0.2670321634473592, "grad_norm": 777.7250366210938, "learning_rate": 9.19923657624804e-06, "loss": 23.5044, "step": 132190 }, { "epoch": 0.267052364080043, "grad_norm": 34.473838806152344, "learning_rate": 9.199047085146415e-06, "loss": 19.792, "step": 132200 }, { "epoch": 0.2670725647127268, "grad_norm": 1079.24560546875, "learning_rate": 9.198857573579143e-06, "loss": 17.6298, "step": 132210 }, { "epoch": 0.2670927653454106, "grad_norm": 273.13128662109375, "learning_rate": 9.198668041547149e-06, "loss": 29.453, "step": 132220 }, { "epoch": 0.2671129659780944, "grad_norm": 406.24365234375, "learning_rate": 9.198478489051355e-06, "loss": 20.6822, "step": 132230 }, { "epoch": 0.26713316661077824, "grad_norm": 374.2793884277344, "learning_rate": 9.198288916092685e-06, "loss": 16.7359, "step": 132240 }, { "epoch": 0.26715336724346206, "grad_norm": 336.56878662109375, "learning_rate": 9.198099322672066e-06, "loss": 28.721, "step": 132250 }, { "epoch": 0.2671735678761459, "grad_norm": 131.42808532714844, "learning_rate": 9.19790970879042e-06, "loss": 23.3139, "step": 132260 }, { "epoch": 0.2671937685088297, "grad_norm": 454.2491149902344, "learning_rate": 9.19772007444867e-06, "loss": 20.6341, "step": 132270 }, { "epoch": 0.2672139691415135, "grad_norm": 4854.48876953125, "learning_rate": 9.197530419647744e-06, "loss": 33.9443, "step": 132280 }, { "epoch": 0.26723416977419734, "grad_norm": 425.8404846191406, "learning_rate": 9.197340744388562e-06, "loss": 16.7814, "step": 132290 }, { "epoch": 0.26725437040688116, "grad_norm": 169.8668212890625, "learning_rate": 9.197151048672051e-06, "loss": 19.8753, "step": 132300 }, { "epoch": 0.267274571039565, "grad_norm": 472.1898193359375, "learning_rate": 9.196961332499133e-06, "loss": 19.4039, "step": 132310 }, { "epoch": 0.2672947716722488, "grad_norm": 320.7469787597656, "learning_rate": 9.196771595870736e-06, "loss": 21.0947, "step": 132320 }, { "epoch": 0.26731497230493256, "grad_norm": 298.8196105957031, "learning_rate": 9.196581838787784e-06, "loss": 17.3034, "step": 132330 }, { "epoch": 0.2673351729376164, "grad_norm": 276.4595031738281, "learning_rate": 9.196392061251199e-06, "loss": 22.1888, "step": 132340 }, { "epoch": 0.2673553735703002, "grad_norm": 245.24012756347656, "learning_rate": 9.196202263261908e-06, "loss": 16.4098, "step": 132350 }, { "epoch": 0.267375574202984, "grad_norm": 432.42022705078125, "learning_rate": 9.196012444820839e-06, "loss": 20.4767, "step": 132360 }, { "epoch": 0.26739577483566784, "grad_norm": 470.5956115722656, "learning_rate": 9.195822605928913e-06, "loss": 30.7396, "step": 132370 }, { "epoch": 0.26741597546835166, "grad_norm": 514.9091796875, "learning_rate": 9.195632746587055e-06, "loss": 21.2021, "step": 132380 }, { "epoch": 0.2674361761010355, "grad_norm": 293.6721496582031, "learning_rate": 9.195442866796194e-06, "loss": 22.6942, "step": 132390 }, { "epoch": 0.2674563767337193, "grad_norm": 331.6898193359375, "learning_rate": 9.195252966557252e-06, "loss": 24.5026, "step": 132400 }, { "epoch": 0.2674765773664031, "grad_norm": 282.0184631347656, "learning_rate": 9.195063045871156e-06, "loss": 15.5731, "step": 132410 }, { "epoch": 0.26749677799908694, "grad_norm": 510.8322448730469, "learning_rate": 9.194873104738831e-06, "loss": 19.1523, "step": 132420 }, { "epoch": 0.26751697863177076, "grad_norm": 401.6566162109375, "learning_rate": 9.194683143161205e-06, "loss": 31.8837, "step": 132430 }, { "epoch": 0.2675371792644546, "grad_norm": 135.264404296875, "learning_rate": 9.1944931611392e-06, "loss": 29.447, "step": 132440 }, { "epoch": 0.2675573798971384, "grad_norm": 187.27471923828125, "learning_rate": 9.194303158673744e-06, "loss": 25.0556, "step": 132450 }, { "epoch": 0.26757758052982217, "grad_norm": 222.26097106933594, "learning_rate": 9.194113135765766e-06, "loss": 20.4969, "step": 132460 }, { "epoch": 0.267597781162506, "grad_norm": 746.4765625, "learning_rate": 9.193923092416187e-06, "loss": 34.7272, "step": 132470 }, { "epoch": 0.2676179817951898, "grad_norm": 783.6100463867188, "learning_rate": 9.193733028625936e-06, "loss": 14.6561, "step": 132480 }, { "epoch": 0.2676381824278736, "grad_norm": 592.0213012695312, "learning_rate": 9.193542944395938e-06, "loss": 28.9808, "step": 132490 }, { "epoch": 0.26765838306055745, "grad_norm": 630.4940185546875, "learning_rate": 9.193352839727122e-06, "loss": 22.5505, "step": 132500 }, { "epoch": 0.26767858369324127, "grad_norm": 450.4183654785156, "learning_rate": 9.193162714620411e-06, "loss": 11.2748, "step": 132510 }, { "epoch": 0.2676987843259251, "grad_norm": 647.8392333984375, "learning_rate": 9.192972569076734e-06, "loss": 19.0283, "step": 132520 }, { "epoch": 0.2677189849586089, "grad_norm": 295.0071716308594, "learning_rate": 9.192782403097018e-06, "loss": 10.4461, "step": 132530 }, { "epoch": 0.2677391855912927, "grad_norm": 437.8892822265625, "learning_rate": 9.192592216682189e-06, "loss": 36.5702, "step": 132540 }, { "epoch": 0.26775938622397655, "grad_norm": 276.1405944824219, "learning_rate": 9.192402009833174e-06, "loss": 15.4697, "step": 132550 }, { "epoch": 0.26777958685666037, "grad_norm": 1536.82763671875, "learning_rate": 9.192211782550899e-06, "loss": 32.0363, "step": 132560 }, { "epoch": 0.2677997874893442, "grad_norm": 1108.8671875, "learning_rate": 9.192021534836293e-06, "loss": 30.7985, "step": 132570 }, { "epoch": 0.267819988122028, "grad_norm": 796.626953125, "learning_rate": 9.191831266690284e-06, "loss": 27.7736, "step": 132580 }, { "epoch": 0.26784018875471177, "grad_norm": 288.6631164550781, "learning_rate": 9.191640978113796e-06, "loss": 21.322, "step": 132590 }, { "epoch": 0.2678603893873956, "grad_norm": 299.3996276855469, "learning_rate": 9.191450669107758e-06, "loss": 54.9828, "step": 132600 }, { "epoch": 0.2678805900200794, "grad_norm": 1362.4151611328125, "learning_rate": 9.191260339673099e-06, "loss": 19.2313, "step": 132610 }, { "epoch": 0.26790079065276323, "grad_norm": 383.589111328125, "learning_rate": 9.191069989810743e-06, "loss": 21.1399, "step": 132620 }, { "epoch": 0.26792099128544705, "grad_norm": 230.8157196044922, "learning_rate": 9.190879619521623e-06, "loss": 23.5327, "step": 132630 }, { "epoch": 0.26794119191813087, "grad_norm": 0.0, "learning_rate": 9.190689228806664e-06, "loss": 19.4128, "step": 132640 }, { "epoch": 0.2679613925508147, "grad_norm": 0.0, "learning_rate": 9.190498817666793e-06, "loss": 17.6414, "step": 132650 }, { "epoch": 0.2679815931834985, "grad_norm": 165.3369903564453, "learning_rate": 9.190308386102938e-06, "loss": 15.453, "step": 132660 }, { "epoch": 0.26800179381618233, "grad_norm": 664.3617553710938, "learning_rate": 9.19011793411603e-06, "loss": 37.5341, "step": 132670 }, { "epoch": 0.26802199444886615, "grad_norm": 253.08544921875, "learning_rate": 9.189927461706994e-06, "loss": 24.1027, "step": 132680 }, { "epoch": 0.26804219508154997, "grad_norm": 647.598388671875, "learning_rate": 9.189736968876761e-06, "loss": 24.0179, "step": 132690 }, { "epoch": 0.2680623957142338, "grad_norm": 519.2431640625, "learning_rate": 9.189546455626258e-06, "loss": 25.4059, "step": 132700 }, { "epoch": 0.2680825963469176, "grad_norm": 7.84843111038208, "learning_rate": 9.189355921956414e-06, "loss": 28.0975, "step": 132710 }, { "epoch": 0.2681027969796014, "grad_norm": 201.0693817138672, "learning_rate": 9.189165367868157e-06, "loss": 24.6074, "step": 132720 }, { "epoch": 0.2681229976122852, "grad_norm": 301.8020935058594, "learning_rate": 9.188974793362412e-06, "loss": 26.2875, "step": 132730 }, { "epoch": 0.268143198244969, "grad_norm": 321.9702453613281, "learning_rate": 9.188784198440118e-06, "loss": 11.4118, "step": 132740 }, { "epoch": 0.26816339887765284, "grad_norm": 556.8260498046875, "learning_rate": 9.188593583102195e-06, "loss": 18.8817, "step": 132750 }, { "epoch": 0.26818359951033666, "grad_norm": 342.0984191894531, "learning_rate": 9.188402947349575e-06, "loss": 17.7907, "step": 132760 }, { "epoch": 0.2682038001430205, "grad_norm": 652.8034057617188, "learning_rate": 9.188212291183187e-06, "loss": 17.4182, "step": 132770 }, { "epoch": 0.2682240007757043, "grad_norm": 213.09066772460938, "learning_rate": 9.18802161460396e-06, "loss": 29.8512, "step": 132780 }, { "epoch": 0.2682442014083881, "grad_norm": 92.26563262939453, "learning_rate": 9.187830917612826e-06, "loss": 22.9727, "step": 132790 }, { "epoch": 0.26826440204107194, "grad_norm": 98.51717376708984, "learning_rate": 9.18764020021071e-06, "loss": 57.7267, "step": 132800 }, { "epoch": 0.26828460267375576, "grad_norm": 133.5460968017578, "learning_rate": 9.187449462398544e-06, "loss": 16.3387, "step": 132810 }, { "epoch": 0.2683048033064396, "grad_norm": 669.2384643554688, "learning_rate": 9.187258704177255e-06, "loss": 36.3648, "step": 132820 }, { "epoch": 0.2683250039391234, "grad_norm": 61.76363754272461, "learning_rate": 9.187067925547779e-06, "loss": 16.2125, "step": 132830 }, { "epoch": 0.2683452045718072, "grad_norm": 265.6653137207031, "learning_rate": 9.186877126511039e-06, "loss": 17.4552, "step": 132840 }, { "epoch": 0.268365405204491, "grad_norm": 110.96267700195312, "learning_rate": 9.186686307067968e-06, "loss": 25.6673, "step": 132850 }, { "epoch": 0.2683856058371748, "grad_norm": 352.1205139160156, "learning_rate": 9.186495467219497e-06, "loss": 43.148, "step": 132860 }, { "epoch": 0.2684058064698586, "grad_norm": 223.99241638183594, "learning_rate": 9.186304606966554e-06, "loss": 26.3819, "step": 132870 }, { "epoch": 0.26842600710254244, "grad_norm": 573.003662109375, "learning_rate": 9.186113726310068e-06, "loss": 26.4017, "step": 132880 }, { "epoch": 0.26844620773522626, "grad_norm": 751.8842163085938, "learning_rate": 9.185922825250975e-06, "loss": 20.1029, "step": 132890 }, { "epoch": 0.2684664083679101, "grad_norm": 246.11575317382812, "learning_rate": 9.1857319037902e-06, "loss": 18.4643, "step": 132900 }, { "epoch": 0.2684866090005939, "grad_norm": 263.97454833984375, "learning_rate": 9.185540961928677e-06, "loss": 27.4157, "step": 132910 }, { "epoch": 0.2685068096332777, "grad_norm": 643.5626220703125, "learning_rate": 9.185349999667333e-06, "loss": 17.9134, "step": 132920 }, { "epoch": 0.26852701026596154, "grad_norm": 255.15863037109375, "learning_rate": 9.185159017007101e-06, "loss": 11.7951, "step": 132930 }, { "epoch": 0.26854721089864536, "grad_norm": 820.3862915039062, "learning_rate": 9.184968013948912e-06, "loss": 21.1888, "step": 132940 }, { "epoch": 0.2685674115313292, "grad_norm": 384.4031066894531, "learning_rate": 9.184776990493696e-06, "loss": 18.4046, "step": 132950 }, { "epoch": 0.268587612164013, "grad_norm": 441.9537658691406, "learning_rate": 9.184585946642384e-06, "loss": 14.0234, "step": 132960 }, { "epoch": 0.26860781279669677, "grad_norm": 234.75209045410156, "learning_rate": 9.184394882395909e-06, "loss": 34.9167, "step": 132970 }, { "epoch": 0.2686280134293806, "grad_norm": 244.43898010253906, "learning_rate": 9.1842037977552e-06, "loss": 24.3913, "step": 132980 }, { "epoch": 0.2686482140620644, "grad_norm": 195.8040771484375, "learning_rate": 9.18401269272119e-06, "loss": 52.0898, "step": 132990 }, { "epoch": 0.2686684146947482, "grad_norm": 517.8396606445312, "learning_rate": 9.18382156729481e-06, "loss": 16.1742, "step": 133000 }, { "epoch": 0.26868861532743205, "grad_norm": 535.0672607421875, "learning_rate": 9.18363042147699e-06, "loss": 15.3108, "step": 133010 }, { "epoch": 0.26870881596011587, "grad_norm": 183.6185760498047, "learning_rate": 9.183439255268662e-06, "loss": 33.4402, "step": 133020 }, { "epoch": 0.2687290165927997, "grad_norm": 231.61705017089844, "learning_rate": 9.18324806867076e-06, "loss": 30.3354, "step": 133030 }, { "epoch": 0.2687492172254835, "grad_norm": 334.2143249511719, "learning_rate": 9.183056861684215e-06, "loss": 11.6259, "step": 133040 }, { "epoch": 0.2687694178581673, "grad_norm": 646.425048828125, "learning_rate": 9.182865634309956e-06, "loss": 13.3922, "step": 133050 }, { "epoch": 0.26878961849085115, "grad_norm": 570.08935546875, "learning_rate": 9.182674386548918e-06, "loss": 38.6776, "step": 133060 }, { "epoch": 0.26880981912353497, "grad_norm": 369.7538757324219, "learning_rate": 9.182483118402032e-06, "loss": 15.7544, "step": 133070 }, { "epoch": 0.2688300197562188, "grad_norm": 914.4676513671875, "learning_rate": 9.18229182987023e-06, "loss": 15.9988, "step": 133080 }, { "epoch": 0.2688502203889026, "grad_norm": 158.03488159179688, "learning_rate": 9.182100520954446e-06, "loss": 29.8605, "step": 133090 }, { "epoch": 0.26887042102158637, "grad_norm": 623.6317749023438, "learning_rate": 9.181909191655613e-06, "loss": 22.6347, "step": 133100 }, { "epoch": 0.2688906216542702, "grad_norm": 444.5938415527344, "learning_rate": 9.18171784197466e-06, "loss": 35.1076, "step": 133110 }, { "epoch": 0.268910822286954, "grad_norm": 306.80548095703125, "learning_rate": 9.18152647191252e-06, "loss": 25.4284, "step": 133120 }, { "epoch": 0.26893102291963783, "grad_norm": 552.109130859375, "learning_rate": 9.181335081470128e-06, "loss": 23.2609, "step": 133130 }, { "epoch": 0.26895122355232165, "grad_norm": 421.8555603027344, "learning_rate": 9.181143670648418e-06, "loss": 21.4386, "step": 133140 }, { "epoch": 0.26897142418500547, "grad_norm": 510.4923400878906, "learning_rate": 9.18095223944832e-06, "loss": 22.8459, "step": 133150 }, { "epoch": 0.2689916248176893, "grad_norm": 135.2384796142578, "learning_rate": 9.180760787870766e-06, "loss": 26.551, "step": 133160 }, { "epoch": 0.2690118254503731, "grad_norm": 858.7987060546875, "learning_rate": 9.180569315916693e-06, "loss": 28.7734, "step": 133170 }, { "epoch": 0.26903202608305693, "grad_norm": 1593.05810546875, "learning_rate": 9.180377823587031e-06, "loss": 34.3314, "step": 133180 }, { "epoch": 0.26905222671574075, "grad_norm": 417.97918701171875, "learning_rate": 9.180186310882715e-06, "loss": 28.1385, "step": 133190 }, { "epoch": 0.26907242734842457, "grad_norm": 1240.343994140625, "learning_rate": 9.179994777804677e-06, "loss": 28.292, "step": 133200 }, { "epoch": 0.2690926279811084, "grad_norm": 388.9484558105469, "learning_rate": 9.179803224353854e-06, "loss": 25.5228, "step": 133210 }, { "epoch": 0.2691128286137922, "grad_norm": 617.9100341796875, "learning_rate": 9.179611650531174e-06, "loss": 18.4147, "step": 133220 }, { "epoch": 0.269133029246476, "grad_norm": 370.5556640625, "learning_rate": 9.179420056337576e-06, "loss": 18.3568, "step": 133230 }, { "epoch": 0.2691532298791598, "grad_norm": 315.80926513671875, "learning_rate": 9.179228441773991e-06, "loss": 18.8859, "step": 133240 }, { "epoch": 0.2691734305118436, "grad_norm": 442.8673095703125, "learning_rate": 9.179036806841352e-06, "loss": 10.576, "step": 133250 }, { "epoch": 0.26919363114452743, "grad_norm": 329.0492858886719, "learning_rate": 9.178845151540597e-06, "loss": 16.9877, "step": 133260 }, { "epoch": 0.26921383177721125, "grad_norm": 255.6985321044922, "learning_rate": 9.178653475872655e-06, "loss": 14.3121, "step": 133270 }, { "epoch": 0.2692340324098951, "grad_norm": 881.1149291992188, "learning_rate": 9.178461779838464e-06, "loss": 35.6185, "step": 133280 }, { "epoch": 0.2692542330425789, "grad_norm": 518.59814453125, "learning_rate": 9.178270063438957e-06, "loss": 27.3528, "step": 133290 }, { "epoch": 0.2692744336752627, "grad_norm": 671.7938842773438, "learning_rate": 9.178078326675069e-06, "loss": 37.0915, "step": 133300 }, { "epoch": 0.26929463430794653, "grad_norm": 283.6918029785156, "learning_rate": 9.177886569547731e-06, "loss": 24.2198, "step": 133310 }, { "epoch": 0.26931483494063035, "grad_norm": 653.0701904296875, "learning_rate": 9.177694792057884e-06, "loss": 25.0911, "step": 133320 }, { "epoch": 0.2693350355733142, "grad_norm": 474.48077392578125, "learning_rate": 9.177502994206457e-06, "loss": 15.433, "step": 133330 }, { "epoch": 0.269355236205998, "grad_norm": 461.8171691894531, "learning_rate": 9.17731117599439e-06, "loss": 20.6739, "step": 133340 }, { "epoch": 0.2693754368386818, "grad_norm": 582.8025512695312, "learning_rate": 9.177119337422613e-06, "loss": 19.432, "step": 133350 }, { "epoch": 0.2693956374713656, "grad_norm": 454.8699645996094, "learning_rate": 9.176927478492064e-06, "loss": 24.2648, "step": 133360 }, { "epoch": 0.2694158381040494, "grad_norm": 294.0679016113281, "learning_rate": 9.176735599203676e-06, "loss": 23.7354, "step": 133370 }, { "epoch": 0.2694360387367332, "grad_norm": 270.6733093261719, "learning_rate": 9.176543699558384e-06, "loss": 17.6682, "step": 133380 }, { "epoch": 0.26945623936941704, "grad_norm": 750.679443359375, "learning_rate": 9.176351779557127e-06, "loss": 26.0452, "step": 133390 }, { "epoch": 0.26947644000210086, "grad_norm": 440.5780334472656, "learning_rate": 9.176159839200838e-06, "loss": 19.2952, "step": 133400 }, { "epoch": 0.2694966406347847, "grad_norm": 197.2433624267578, "learning_rate": 9.175967878490451e-06, "loss": 14.8968, "step": 133410 }, { "epoch": 0.2695168412674685, "grad_norm": 172.82565307617188, "learning_rate": 9.175775897426904e-06, "loss": 20.906, "step": 133420 }, { "epoch": 0.2695370419001523, "grad_norm": 650.2919311523438, "learning_rate": 9.17558389601113e-06, "loss": 25.1461, "step": 133430 }, { "epoch": 0.26955724253283614, "grad_norm": 189.42684936523438, "learning_rate": 9.175391874244068e-06, "loss": 14.5088, "step": 133440 }, { "epoch": 0.26957744316551996, "grad_norm": 327.37689208984375, "learning_rate": 9.175199832126654e-06, "loss": 30.4333, "step": 133450 }, { "epoch": 0.2695976437982038, "grad_norm": 107.6774673461914, "learning_rate": 9.17500776965982e-06, "loss": 20.1457, "step": 133460 }, { "epoch": 0.2696178444308876, "grad_norm": 139.43043518066406, "learning_rate": 9.174815686844506e-06, "loss": 32.9828, "step": 133470 }, { "epoch": 0.2696380450635714, "grad_norm": 438.24945068359375, "learning_rate": 9.174623583681644e-06, "loss": 32.7648, "step": 133480 }, { "epoch": 0.2696582456962552, "grad_norm": 530.69580078125, "learning_rate": 9.174431460172175e-06, "loss": 10.383, "step": 133490 }, { "epoch": 0.269678446328939, "grad_norm": 491.65020751953125, "learning_rate": 9.174239316317034e-06, "loss": 17.2912, "step": 133500 }, { "epoch": 0.2696986469616228, "grad_norm": 390.62896728515625, "learning_rate": 9.174047152117155e-06, "loss": 19.479, "step": 133510 }, { "epoch": 0.26971884759430664, "grad_norm": 521.3779907226562, "learning_rate": 9.173854967573479e-06, "loss": 45.7823, "step": 133520 }, { "epoch": 0.26973904822699046, "grad_norm": 359.65325927734375, "learning_rate": 9.173662762686937e-06, "loss": 16.5244, "step": 133530 }, { "epoch": 0.2697592488596743, "grad_norm": 682.971435546875, "learning_rate": 9.17347053745847e-06, "loss": 25.2273, "step": 133540 }, { "epoch": 0.2697794494923581, "grad_norm": 806.0768432617188, "learning_rate": 9.173278291889016e-06, "loss": 17.358, "step": 133550 }, { "epoch": 0.2697996501250419, "grad_norm": 363.98028564453125, "learning_rate": 9.173086025979507e-06, "loss": 12.2788, "step": 133560 }, { "epoch": 0.26981985075772574, "grad_norm": 244.05532836914062, "learning_rate": 9.172893739730884e-06, "loss": 20.116, "step": 133570 }, { "epoch": 0.26984005139040956, "grad_norm": 459.2618103027344, "learning_rate": 9.172701433144083e-06, "loss": 24.0229, "step": 133580 }, { "epoch": 0.2698602520230934, "grad_norm": 186.79541015625, "learning_rate": 9.172509106220041e-06, "loss": 19.4491, "step": 133590 }, { "epoch": 0.2698804526557772, "grad_norm": 157.8845977783203, "learning_rate": 9.172316758959695e-06, "loss": 15.3868, "step": 133600 }, { "epoch": 0.26990065328846097, "grad_norm": 559.3065185546875, "learning_rate": 9.172124391363986e-06, "loss": 31.9676, "step": 133610 }, { "epoch": 0.2699208539211448, "grad_norm": 125.10926818847656, "learning_rate": 9.171932003433845e-06, "loss": 19.5996, "step": 133620 }, { "epoch": 0.2699410545538286, "grad_norm": 1056.4173583984375, "learning_rate": 9.171739595170217e-06, "loss": 33.6187, "step": 133630 }, { "epoch": 0.2699612551865124, "grad_norm": 651.4199829101562, "learning_rate": 9.171547166574034e-06, "loss": 13.5563, "step": 133640 }, { "epoch": 0.26998145581919625, "grad_norm": 200.57957458496094, "learning_rate": 9.171354717646238e-06, "loss": 32.7947, "step": 133650 }, { "epoch": 0.27000165645188007, "grad_norm": 61.84878921508789, "learning_rate": 9.171162248387762e-06, "loss": 25.86, "step": 133660 }, { "epoch": 0.2700218570845639, "grad_norm": 167.61216735839844, "learning_rate": 9.170969758799549e-06, "loss": 23.9729, "step": 133670 }, { "epoch": 0.2700420577172477, "grad_norm": 478.9454040527344, "learning_rate": 9.170777248882535e-06, "loss": 17.3073, "step": 133680 }, { "epoch": 0.2700622583499315, "grad_norm": 547.905029296875, "learning_rate": 9.170584718637658e-06, "loss": 21.9839, "step": 133690 }, { "epoch": 0.27008245898261535, "grad_norm": 379.87994384765625, "learning_rate": 9.170392168065858e-06, "loss": 10.2162, "step": 133700 }, { "epoch": 0.27010265961529917, "grad_norm": 1176.78515625, "learning_rate": 9.17019959716807e-06, "loss": 18.8268, "step": 133710 }, { "epoch": 0.270122860247983, "grad_norm": 758.3802490234375, "learning_rate": 9.170007005945236e-06, "loss": 33.3501, "step": 133720 }, { "epoch": 0.2701430608806668, "grad_norm": 91.6874008178711, "learning_rate": 9.169814394398295e-06, "loss": 21.1327, "step": 133730 }, { "epoch": 0.27016326151335057, "grad_norm": 210.44874572753906, "learning_rate": 9.169621762528184e-06, "loss": 25.161, "step": 133740 }, { "epoch": 0.2701834621460344, "grad_norm": 211.814453125, "learning_rate": 9.169429110335842e-06, "loss": 23.5289, "step": 133750 }, { "epoch": 0.2702036627787182, "grad_norm": 88.92313385009766, "learning_rate": 9.169236437822208e-06, "loss": 34.3629, "step": 133760 }, { "epoch": 0.27022386341140203, "grad_norm": 279.8443298339844, "learning_rate": 9.169043744988222e-06, "loss": 12.3574, "step": 133770 }, { "epoch": 0.27024406404408585, "grad_norm": 261.79803466796875, "learning_rate": 9.16885103183482e-06, "loss": 15.2551, "step": 133780 }, { "epoch": 0.27026426467676967, "grad_norm": 185.6011199951172, "learning_rate": 9.168658298362947e-06, "loss": 23.5912, "step": 133790 }, { "epoch": 0.2702844653094535, "grad_norm": 358.6379089355469, "learning_rate": 9.168465544573538e-06, "loss": 8.0604, "step": 133800 }, { "epoch": 0.2703046659421373, "grad_norm": 409.3935546875, "learning_rate": 9.168272770467532e-06, "loss": 21.4261, "step": 133810 }, { "epoch": 0.27032486657482113, "grad_norm": 677.9612426757812, "learning_rate": 9.16807997604587e-06, "loss": 17.9283, "step": 133820 }, { "epoch": 0.27034506720750495, "grad_norm": 464.4529113769531, "learning_rate": 9.167887161309495e-06, "loss": 18.2088, "step": 133830 }, { "epoch": 0.27036526784018877, "grad_norm": 249.7884063720703, "learning_rate": 9.16769432625934e-06, "loss": 21.463, "step": 133840 }, { "epoch": 0.2703854684728726, "grad_norm": 675.5252075195312, "learning_rate": 9.16750147089635e-06, "loss": 16.9006, "step": 133850 }, { "epoch": 0.2704056691055564, "grad_norm": 269.93756103515625, "learning_rate": 9.167308595221463e-06, "loss": 17.7645, "step": 133860 }, { "epoch": 0.2704258697382402, "grad_norm": 845.3773193359375, "learning_rate": 9.167115699235618e-06, "loss": 34.0293, "step": 133870 }, { "epoch": 0.270446070370924, "grad_norm": 373.9945373535156, "learning_rate": 9.166922782939759e-06, "loss": 26.0111, "step": 133880 }, { "epoch": 0.2704662710036078, "grad_norm": 642.9016723632812, "learning_rate": 9.166729846334822e-06, "loss": 46.0158, "step": 133890 }, { "epoch": 0.27048647163629164, "grad_norm": 731.1618041992188, "learning_rate": 9.16653688942175e-06, "loss": 36.8734, "step": 133900 }, { "epoch": 0.27050667226897546, "grad_norm": 348.7421875, "learning_rate": 9.166343912201482e-06, "loss": 17.0303, "step": 133910 }, { "epoch": 0.2705268729016593, "grad_norm": 570.551025390625, "learning_rate": 9.166150914674959e-06, "loss": 31.2026, "step": 133920 }, { "epoch": 0.2705470735343431, "grad_norm": 746.3660888671875, "learning_rate": 9.165957896843122e-06, "loss": 16.837, "step": 133930 }, { "epoch": 0.2705672741670269, "grad_norm": 195.4158935546875, "learning_rate": 9.165764858706912e-06, "loss": 21.4735, "step": 133940 }, { "epoch": 0.27058747479971074, "grad_norm": 401.3896789550781, "learning_rate": 9.165571800267267e-06, "loss": 18.1618, "step": 133950 }, { "epoch": 0.27060767543239456, "grad_norm": 773.892822265625, "learning_rate": 9.165378721525133e-06, "loss": 36.3603, "step": 133960 }, { "epoch": 0.2706278760650784, "grad_norm": 1609.6444091796875, "learning_rate": 9.165185622481447e-06, "loss": 28.9816, "step": 133970 }, { "epoch": 0.2706480766977622, "grad_norm": 354.135009765625, "learning_rate": 9.164992503137152e-06, "loss": 15.0174, "step": 133980 }, { "epoch": 0.270668277330446, "grad_norm": 4.084848880767822, "learning_rate": 9.16479936349319e-06, "loss": 27.4202, "step": 133990 }, { "epoch": 0.2706884779631298, "grad_norm": 920.7405395507812, "learning_rate": 9.164606203550498e-06, "loss": 28.7478, "step": 134000 }, { "epoch": 0.2707086785958136, "grad_norm": 434.6065979003906, "learning_rate": 9.164413023310022e-06, "loss": 24.9194, "step": 134010 }, { "epoch": 0.2707288792284974, "grad_norm": 458.9222717285156, "learning_rate": 9.164219822772702e-06, "loss": 26.233, "step": 134020 }, { "epoch": 0.27074907986118124, "grad_norm": 144.0622100830078, "learning_rate": 9.16402660193948e-06, "loss": 25.8593, "step": 134030 }, { "epoch": 0.27076928049386506, "grad_norm": 705.0419921875, "learning_rate": 9.163833360811296e-06, "loss": 16.5194, "step": 134040 }, { "epoch": 0.2707894811265489, "grad_norm": 458.0263977050781, "learning_rate": 9.163640099389095e-06, "loss": 23.7073, "step": 134050 }, { "epoch": 0.2708096817592327, "grad_norm": 214.73873901367188, "learning_rate": 9.163446817673817e-06, "loss": 14.6718, "step": 134060 }, { "epoch": 0.2708298823919165, "grad_norm": 265.9853515625, "learning_rate": 9.163253515666403e-06, "loss": 20.0908, "step": 134070 }, { "epoch": 0.27085008302460034, "grad_norm": 945.6927490234375, "learning_rate": 9.163060193367797e-06, "loss": 21.8016, "step": 134080 }, { "epoch": 0.27087028365728416, "grad_norm": 280.96636962890625, "learning_rate": 9.16286685077894e-06, "loss": 17.5716, "step": 134090 }, { "epoch": 0.270890484289968, "grad_norm": 894.2539672851562, "learning_rate": 9.162673487900775e-06, "loss": 14.1572, "step": 134100 }, { "epoch": 0.2709106849226518, "grad_norm": 303.4360046386719, "learning_rate": 9.162480104734244e-06, "loss": 14.7613, "step": 134110 }, { "epoch": 0.27093088555533557, "grad_norm": 802.8580932617188, "learning_rate": 9.162286701280292e-06, "loss": 23.2513, "step": 134120 }, { "epoch": 0.2709510861880194, "grad_norm": 619.9885864257812, "learning_rate": 9.162093277539856e-06, "loss": 16.6658, "step": 134130 }, { "epoch": 0.2709712868207032, "grad_norm": 267.7410888671875, "learning_rate": 9.161899833513885e-06, "loss": 18.987, "step": 134140 }, { "epoch": 0.270991487453387, "grad_norm": 340.4080810546875, "learning_rate": 9.161706369203319e-06, "loss": 14.3383, "step": 134150 }, { "epoch": 0.27101168808607085, "grad_norm": 481.16546630859375, "learning_rate": 9.161512884609099e-06, "loss": 14.2786, "step": 134160 }, { "epoch": 0.27103188871875467, "grad_norm": 882.806396484375, "learning_rate": 9.16131937973217e-06, "loss": 24.3084, "step": 134170 }, { "epoch": 0.2710520893514385, "grad_norm": 558.1138916015625, "learning_rate": 9.161125854573476e-06, "loss": 31.4807, "step": 134180 }, { "epoch": 0.2710722899841223, "grad_norm": 636.9767456054688, "learning_rate": 9.160932309133959e-06, "loss": 28.0938, "step": 134190 }, { "epoch": 0.2710924906168061, "grad_norm": 818.5437622070312, "learning_rate": 9.160738743414564e-06, "loss": 30.3742, "step": 134200 }, { "epoch": 0.27111269124948995, "grad_norm": 775.7115478515625, "learning_rate": 9.160545157416231e-06, "loss": 24.856, "step": 134210 }, { "epoch": 0.27113289188217377, "grad_norm": 518.9937133789062, "learning_rate": 9.160351551139906e-06, "loss": 21.8429, "step": 134220 }, { "epoch": 0.2711530925148576, "grad_norm": 245.49343872070312, "learning_rate": 9.160157924586532e-06, "loss": 21.4535, "step": 134230 }, { "epoch": 0.2711732931475414, "grad_norm": 350.6053161621094, "learning_rate": 9.159964277757054e-06, "loss": 17.8508, "step": 134240 }, { "epoch": 0.27119349378022517, "grad_norm": 300.557373046875, "learning_rate": 9.159770610652413e-06, "loss": 34.6474, "step": 134250 }, { "epoch": 0.271213694412909, "grad_norm": 151.58203125, "learning_rate": 9.159576923273557e-06, "loss": 28.7178, "step": 134260 }, { "epoch": 0.2712338950455928, "grad_norm": 730.8589477539062, "learning_rate": 9.159383215621425e-06, "loss": 48.4156, "step": 134270 }, { "epoch": 0.27125409567827663, "grad_norm": 250.2581024169922, "learning_rate": 9.159189487696965e-06, "loss": 28.0459, "step": 134280 }, { "epoch": 0.27127429631096045, "grad_norm": 508.3044738769531, "learning_rate": 9.15899573950112e-06, "loss": 25.1482, "step": 134290 }, { "epoch": 0.27129449694364427, "grad_norm": 683.8849487304688, "learning_rate": 9.158801971034832e-06, "loss": 36.1386, "step": 134300 }, { "epoch": 0.2713146975763281, "grad_norm": 217.94737243652344, "learning_rate": 9.15860818229905e-06, "loss": 21.7075, "step": 134310 }, { "epoch": 0.2713348982090119, "grad_norm": 409.0342712402344, "learning_rate": 9.158414373294715e-06, "loss": 13.7616, "step": 134320 }, { "epoch": 0.27135509884169573, "grad_norm": 251.45237731933594, "learning_rate": 9.158220544022773e-06, "loss": 21.7063, "step": 134330 }, { "epoch": 0.27137529947437955, "grad_norm": 232.46438598632812, "learning_rate": 9.15802669448417e-06, "loss": 20.2642, "step": 134340 }, { "epoch": 0.27139550010706337, "grad_norm": 563.8030395507812, "learning_rate": 9.157832824679846e-06, "loss": 22.5836, "step": 134350 }, { "epoch": 0.2714157007397472, "grad_norm": 425.8003234863281, "learning_rate": 9.15763893461075e-06, "loss": 39.1836, "step": 134360 }, { "epoch": 0.271435901372431, "grad_norm": 379.83837890625, "learning_rate": 9.157445024277829e-06, "loss": 16.1051, "step": 134370 }, { "epoch": 0.2714561020051148, "grad_norm": 508.3604736328125, "learning_rate": 9.157251093682022e-06, "loss": 18.5233, "step": 134380 }, { "epoch": 0.2714763026377986, "grad_norm": 325.0518798828125, "learning_rate": 9.15705714282428e-06, "loss": 14.0072, "step": 134390 }, { "epoch": 0.2714965032704824, "grad_norm": 327.50885009765625, "learning_rate": 9.156863171705543e-06, "loss": 31.057, "step": 134400 }, { "epoch": 0.27151670390316623, "grad_norm": 574.4484252929688, "learning_rate": 9.15666918032676e-06, "loss": 24.4694, "step": 134410 }, { "epoch": 0.27153690453585005, "grad_norm": 356.52581787109375, "learning_rate": 9.156475168688876e-06, "loss": 21.091, "step": 134420 }, { "epoch": 0.2715571051685339, "grad_norm": 76.50200653076172, "learning_rate": 9.156281136792836e-06, "loss": 24.1895, "step": 134430 }, { "epoch": 0.2715773058012177, "grad_norm": 751.059814453125, "learning_rate": 9.156087084639587e-06, "loss": 25.2717, "step": 134440 }, { "epoch": 0.2715975064339015, "grad_norm": 241.04539489746094, "learning_rate": 9.155893012230072e-06, "loss": 18.1675, "step": 134450 }, { "epoch": 0.27161770706658533, "grad_norm": 484.7392883300781, "learning_rate": 9.15569891956524e-06, "loss": 18.552, "step": 134460 }, { "epoch": 0.27163790769926915, "grad_norm": 369.731201171875, "learning_rate": 9.155504806646033e-06, "loss": 25.0763, "step": 134470 }, { "epoch": 0.271658108331953, "grad_norm": 166.63563537597656, "learning_rate": 9.1553106734734e-06, "loss": 23.8329, "step": 134480 }, { "epoch": 0.2716783089646368, "grad_norm": 72.80184173583984, "learning_rate": 9.155116520048289e-06, "loss": 16.3994, "step": 134490 }, { "epoch": 0.2716985095973206, "grad_norm": 1032.9742431640625, "learning_rate": 9.154922346371641e-06, "loss": 33.3965, "step": 134500 }, { "epoch": 0.2717187102300044, "grad_norm": 591.19677734375, "learning_rate": 9.154728152444408e-06, "loss": 14.1618, "step": 134510 }, { "epoch": 0.2717389108626882, "grad_norm": 377.9610290527344, "learning_rate": 9.154533938267534e-06, "loss": 17.8524, "step": 134520 }, { "epoch": 0.271759111495372, "grad_norm": 242.5041961669922, "learning_rate": 9.154339703841963e-06, "loss": 30.3299, "step": 134530 }, { "epoch": 0.27177931212805584, "grad_norm": 560.8853759765625, "learning_rate": 9.154145449168647e-06, "loss": 19.1206, "step": 134540 }, { "epoch": 0.27179951276073966, "grad_norm": 125.90225219726562, "learning_rate": 9.153951174248528e-06, "loss": 32.8272, "step": 134550 }, { "epoch": 0.2718197133934235, "grad_norm": 555.1868286132812, "learning_rate": 9.153756879082556e-06, "loss": 25.5203, "step": 134560 }, { "epoch": 0.2718399140261073, "grad_norm": 46.10591125488281, "learning_rate": 9.153562563671676e-06, "loss": 13.7072, "step": 134570 }, { "epoch": 0.2718601146587911, "grad_norm": 78.36112976074219, "learning_rate": 9.153368228016836e-06, "loss": 13.7849, "step": 134580 }, { "epoch": 0.27188031529147494, "grad_norm": 419.6697692871094, "learning_rate": 9.153173872118984e-06, "loss": 32.6198, "step": 134590 }, { "epoch": 0.27190051592415876, "grad_norm": 435.4535827636719, "learning_rate": 9.152979495979064e-06, "loss": 16.4727, "step": 134600 }, { "epoch": 0.2719207165568426, "grad_norm": 292.64007568359375, "learning_rate": 9.152785099598027e-06, "loss": 25.1713, "step": 134610 }, { "epoch": 0.2719409171895264, "grad_norm": 492.0068664550781, "learning_rate": 9.152590682976818e-06, "loss": 16.8073, "step": 134620 }, { "epoch": 0.2719611178222102, "grad_norm": 193.37913513183594, "learning_rate": 9.152396246116387e-06, "loss": 39.7841, "step": 134630 }, { "epoch": 0.271981318454894, "grad_norm": 422.52130126953125, "learning_rate": 9.15220178901768e-06, "loss": 17.9294, "step": 134640 }, { "epoch": 0.2720015190875778, "grad_norm": 340.7070007324219, "learning_rate": 9.152007311681645e-06, "loss": 22.5974, "step": 134650 }, { "epoch": 0.2720217197202616, "grad_norm": 427.43450927734375, "learning_rate": 9.151812814109231e-06, "loss": 22.4301, "step": 134660 }, { "epoch": 0.27204192035294544, "grad_norm": 319.4281005859375, "learning_rate": 9.151618296301383e-06, "loss": 14.1845, "step": 134670 }, { "epoch": 0.27206212098562926, "grad_norm": 189.7301788330078, "learning_rate": 9.151423758259052e-06, "loss": 29.2532, "step": 134680 }, { "epoch": 0.2720823216183131, "grad_norm": 566.9752807617188, "learning_rate": 9.151229199983185e-06, "loss": 29.6061, "step": 134690 }, { "epoch": 0.2721025222509969, "grad_norm": 69.69047546386719, "learning_rate": 9.15103462147473e-06, "loss": 15.4756, "step": 134700 }, { "epoch": 0.2721227228836807, "grad_norm": 808.6002807617188, "learning_rate": 9.150840022734637e-06, "loss": 22.1024, "step": 134710 }, { "epoch": 0.27214292351636454, "grad_norm": 136.4609832763672, "learning_rate": 9.150645403763852e-06, "loss": 17.2627, "step": 134720 }, { "epoch": 0.27216312414904836, "grad_norm": 1079.5589599609375, "learning_rate": 9.150450764563324e-06, "loss": 45.1729, "step": 134730 }, { "epoch": 0.2721833247817322, "grad_norm": 314.94708251953125, "learning_rate": 9.150256105134003e-06, "loss": 10.2344, "step": 134740 }, { "epoch": 0.272203525414416, "grad_norm": 426.2752380371094, "learning_rate": 9.150061425476839e-06, "loss": 22.0265, "step": 134750 }, { "epoch": 0.27222372604709977, "grad_norm": 384.5572204589844, "learning_rate": 9.149866725592777e-06, "loss": 28.3392, "step": 134760 }, { "epoch": 0.2722439266797836, "grad_norm": 354.8988342285156, "learning_rate": 9.149672005482768e-06, "loss": 24.0602, "step": 134770 }, { "epoch": 0.2722641273124674, "grad_norm": 716.0555419921875, "learning_rate": 9.149477265147762e-06, "loss": 24.6733, "step": 134780 }, { "epoch": 0.2722843279451512, "grad_norm": 312.19732666015625, "learning_rate": 9.149282504588706e-06, "loss": 38.7751, "step": 134790 }, { "epoch": 0.27230452857783505, "grad_norm": 202.42605590820312, "learning_rate": 9.14908772380655e-06, "loss": 17.9143, "step": 134800 }, { "epoch": 0.27232472921051887, "grad_norm": 249.84127807617188, "learning_rate": 9.148892922802245e-06, "loss": 41.8903, "step": 134810 }, { "epoch": 0.2723449298432027, "grad_norm": 289.1515808105469, "learning_rate": 9.148698101576737e-06, "loss": 36.8615, "step": 134820 }, { "epoch": 0.2723651304758865, "grad_norm": 840.993408203125, "learning_rate": 9.14850326013098e-06, "loss": 24.2888, "step": 134830 }, { "epoch": 0.2723853311085703, "grad_norm": 388.4644470214844, "learning_rate": 9.14830839846592e-06, "loss": 20.4274, "step": 134840 }, { "epoch": 0.27240553174125415, "grad_norm": 184.5399627685547, "learning_rate": 9.148113516582508e-06, "loss": 10.6888, "step": 134850 }, { "epoch": 0.27242573237393797, "grad_norm": 655.6101684570312, "learning_rate": 9.147918614481693e-06, "loss": 29.1236, "step": 134860 }, { "epoch": 0.2724459330066218, "grad_norm": 158.5425567626953, "learning_rate": 9.147723692164428e-06, "loss": 23.4754, "step": 134870 }, { "epoch": 0.2724661336393056, "grad_norm": 248.134765625, "learning_rate": 9.14752874963166e-06, "loss": 22.1439, "step": 134880 }, { "epoch": 0.27248633427198937, "grad_norm": 336.59246826171875, "learning_rate": 9.147333786884338e-06, "loss": 21.4644, "step": 134890 }, { "epoch": 0.2725065349046732, "grad_norm": 280.18597412109375, "learning_rate": 9.147138803923417e-06, "loss": 20.4713, "step": 134900 }, { "epoch": 0.272526735537357, "grad_norm": 463.82501220703125, "learning_rate": 9.146943800749842e-06, "loss": 23.571, "step": 134910 }, { "epoch": 0.27254693617004083, "grad_norm": 390.9377746582031, "learning_rate": 9.146748777364567e-06, "loss": 20.9144, "step": 134920 }, { "epoch": 0.27256713680272465, "grad_norm": 326.0651550292969, "learning_rate": 9.146553733768541e-06, "loss": 13.6753, "step": 134930 }, { "epoch": 0.27258733743540847, "grad_norm": 304.681396484375, "learning_rate": 9.146358669962717e-06, "loss": 14.8385, "step": 134940 }, { "epoch": 0.2726075380680923, "grad_norm": 377.82598876953125, "learning_rate": 9.146163585948041e-06, "loss": 24.0972, "step": 134950 }, { "epoch": 0.2726277387007761, "grad_norm": 358.47674560546875, "learning_rate": 9.145968481725466e-06, "loss": 21.5306, "step": 134960 }, { "epoch": 0.27264793933345993, "grad_norm": 305.32037353515625, "learning_rate": 9.145773357295947e-06, "loss": 26.0249, "step": 134970 }, { "epoch": 0.27266813996614375, "grad_norm": 463.0208740234375, "learning_rate": 9.14557821266043e-06, "loss": 15.3451, "step": 134980 }, { "epoch": 0.27268834059882757, "grad_norm": 291.29364013671875, "learning_rate": 9.145383047819867e-06, "loss": 18.1568, "step": 134990 }, { "epoch": 0.2727085412315114, "grad_norm": 193.42633056640625, "learning_rate": 9.145187862775208e-06, "loss": 17.3333, "step": 135000 }, { "epoch": 0.2727287418641952, "grad_norm": 1213.7652587890625, "learning_rate": 9.144992657527409e-06, "loss": 28.9438, "step": 135010 }, { "epoch": 0.272748942496879, "grad_norm": 187.8778076171875, "learning_rate": 9.144797432077418e-06, "loss": 15.6976, "step": 135020 }, { "epoch": 0.2727691431295628, "grad_norm": 19.426258087158203, "learning_rate": 9.144602186426186e-06, "loss": 8.3403, "step": 135030 }, { "epoch": 0.2727893437622466, "grad_norm": 417.7514343261719, "learning_rate": 9.144406920574667e-06, "loss": 20.5598, "step": 135040 }, { "epoch": 0.27280954439493044, "grad_norm": 381.2732238769531, "learning_rate": 9.14421163452381e-06, "loss": 22.3431, "step": 135050 }, { "epoch": 0.27282974502761426, "grad_norm": 57.7036247253418, "learning_rate": 9.144016328274569e-06, "loss": 20.7668, "step": 135060 }, { "epoch": 0.2728499456602981, "grad_norm": 513.7007446289062, "learning_rate": 9.143821001827895e-06, "loss": 17.7617, "step": 135070 }, { "epoch": 0.2728701462929819, "grad_norm": 185.3470458984375, "learning_rate": 9.14362565518474e-06, "loss": 14.2232, "step": 135080 }, { "epoch": 0.2728903469256657, "grad_norm": 332.8409729003906, "learning_rate": 9.143430288346056e-06, "loss": 12.8592, "step": 135090 }, { "epoch": 0.27291054755834954, "grad_norm": 732.3867797851562, "learning_rate": 9.143234901312794e-06, "loss": 35.1978, "step": 135100 }, { "epoch": 0.27293074819103336, "grad_norm": 365.0626525878906, "learning_rate": 9.143039494085911e-06, "loss": 11.9089, "step": 135110 }, { "epoch": 0.2729509488237172, "grad_norm": 65.0551528930664, "learning_rate": 9.142844066666352e-06, "loss": 17.9208, "step": 135120 }, { "epoch": 0.272971149456401, "grad_norm": 631.0284423828125, "learning_rate": 9.142648619055074e-06, "loss": 20.9481, "step": 135130 }, { "epoch": 0.2729913500890848, "grad_norm": 689.35205078125, "learning_rate": 9.142453151253033e-06, "loss": 22.1548, "step": 135140 }, { "epoch": 0.2730115507217686, "grad_norm": 425.0822448730469, "learning_rate": 9.142257663261173e-06, "loss": 11.7964, "step": 135150 }, { "epoch": 0.2730317513544524, "grad_norm": 463.693359375, "learning_rate": 9.142062155080455e-06, "loss": 32.4459, "step": 135160 }, { "epoch": 0.2730519519871362, "grad_norm": 76.3325424194336, "learning_rate": 9.141866626711826e-06, "loss": 26.7998, "step": 135170 }, { "epoch": 0.27307215261982004, "grad_norm": 222.16542053222656, "learning_rate": 9.141671078156242e-06, "loss": 16.4523, "step": 135180 }, { "epoch": 0.27309235325250386, "grad_norm": 256.1183776855469, "learning_rate": 9.141475509414656e-06, "loss": 12.4589, "step": 135190 }, { "epoch": 0.2731125538851877, "grad_norm": 406.1766357421875, "learning_rate": 9.141279920488021e-06, "loss": 33.166, "step": 135200 }, { "epoch": 0.2731327545178715, "grad_norm": 577.5496215820312, "learning_rate": 9.141084311377291e-06, "loss": 17.6375, "step": 135210 }, { "epoch": 0.2731529551505553, "grad_norm": 258.7943115234375, "learning_rate": 9.140888682083416e-06, "loss": 22.6671, "step": 135220 }, { "epoch": 0.27317315578323914, "grad_norm": 568.5740966796875, "learning_rate": 9.140693032607353e-06, "loss": 23.2073, "step": 135230 }, { "epoch": 0.27319335641592296, "grad_norm": 217.25416564941406, "learning_rate": 9.140497362950054e-06, "loss": 20.6969, "step": 135240 }, { "epoch": 0.2732135570486068, "grad_norm": 450.7647705078125, "learning_rate": 9.140301673112472e-06, "loss": 20.2831, "step": 135250 }, { "epoch": 0.2732337576812906, "grad_norm": 381.1827392578125, "learning_rate": 9.140105963095563e-06, "loss": 21.1977, "step": 135260 }, { "epoch": 0.2732539583139744, "grad_norm": 104.4590072631836, "learning_rate": 9.13991023290028e-06, "loss": 51.9077, "step": 135270 }, { "epoch": 0.2732741589466582, "grad_norm": 251.69143676757812, "learning_rate": 9.139714482527577e-06, "loss": 17.3639, "step": 135280 }, { "epoch": 0.273294359579342, "grad_norm": 194.8394012451172, "learning_rate": 9.139518711978406e-06, "loss": 26.8899, "step": 135290 }, { "epoch": 0.2733145602120258, "grad_norm": 258.308349609375, "learning_rate": 9.139322921253724e-06, "loss": 32.5457, "step": 135300 }, { "epoch": 0.27333476084470965, "grad_norm": 736.0946044921875, "learning_rate": 9.139127110354484e-06, "loss": 29.4364, "step": 135310 }, { "epoch": 0.27335496147739347, "grad_norm": 966.8939819335938, "learning_rate": 9.13893127928164e-06, "loss": 32.0747, "step": 135320 }, { "epoch": 0.2733751621100773, "grad_norm": 491.54620361328125, "learning_rate": 9.138735428036147e-06, "loss": 22.1527, "step": 135330 }, { "epoch": 0.2733953627427611, "grad_norm": 543.1093139648438, "learning_rate": 9.13853955661896e-06, "loss": 35.6467, "step": 135340 }, { "epoch": 0.2734155633754449, "grad_norm": 560.1389770507812, "learning_rate": 9.138343665031033e-06, "loss": 27.8347, "step": 135350 }, { "epoch": 0.27343576400812875, "grad_norm": 732.5491943359375, "learning_rate": 9.13814775327332e-06, "loss": 23.0842, "step": 135360 }, { "epoch": 0.27345596464081257, "grad_norm": 508.7496337890625, "learning_rate": 9.137951821346779e-06, "loss": 31.3813, "step": 135370 }, { "epoch": 0.2734761652734964, "grad_norm": 120.45158386230469, "learning_rate": 9.13775586925236e-06, "loss": 31.4389, "step": 135380 }, { "epoch": 0.2734963659061802, "grad_norm": 483.6618347167969, "learning_rate": 9.137559896991023e-06, "loss": 22.703, "step": 135390 }, { "epoch": 0.27351656653886397, "grad_norm": 91.70558166503906, "learning_rate": 9.13736390456372e-06, "loss": 24.9209, "step": 135400 }, { "epoch": 0.2735367671715478, "grad_norm": 452.6220397949219, "learning_rate": 9.137167891971407e-06, "loss": 19.4814, "step": 135410 }, { "epoch": 0.2735569678042316, "grad_norm": 547.836669921875, "learning_rate": 9.136971859215041e-06, "loss": 36.5402, "step": 135420 }, { "epoch": 0.27357716843691543, "grad_norm": 333.5689697265625, "learning_rate": 9.136775806295572e-06, "loss": 19.448, "step": 135430 }, { "epoch": 0.27359736906959925, "grad_norm": 519.5537109375, "learning_rate": 9.136579733213963e-06, "loss": 30.3864, "step": 135440 }, { "epoch": 0.27361756970228307, "grad_norm": 525.2999267578125, "learning_rate": 9.136383639971166e-06, "loss": 11.6618, "step": 135450 }, { "epoch": 0.2736377703349669, "grad_norm": 274.5901794433594, "learning_rate": 9.136187526568134e-06, "loss": 17.5567, "step": 135460 }, { "epoch": 0.2736579709676507, "grad_norm": 351.3760986328125, "learning_rate": 9.135991393005827e-06, "loss": 31.458, "step": 135470 }, { "epoch": 0.27367817160033453, "grad_norm": 220.59791564941406, "learning_rate": 9.135795239285201e-06, "loss": 20.3189, "step": 135480 }, { "epoch": 0.27369837223301835, "grad_norm": 749.9730834960938, "learning_rate": 9.13559906540721e-06, "loss": 38.2688, "step": 135490 }, { "epoch": 0.27371857286570217, "grad_norm": 588.3572998046875, "learning_rate": 9.13540287137281e-06, "loss": 25.7997, "step": 135500 }, { "epoch": 0.273738773498386, "grad_norm": 754.5552978515625, "learning_rate": 9.135206657182958e-06, "loss": 12.5387, "step": 135510 }, { "epoch": 0.2737589741310698, "grad_norm": 377.010498046875, "learning_rate": 9.13501042283861e-06, "loss": 12.7068, "step": 135520 }, { "epoch": 0.2737791747637536, "grad_norm": 462.2884521484375, "learning_rate": 9.134814168340725e-06, "loss": 19.7204, "step": 135530 }, { "epoch": 0.2737993753964374, "grad_norm": 245.97598266601562, "learning_rate": 9.134617893690253e-06, "loss": 11.4352, "step": 135540 }, { "epoch": 0.2738195760291212, "grad_norm": 366.9969482421875, "learning_rate": 9.13442159888816e-06, "loss": 31.2602, "step": 135550 }, { "epoch": 0.27383977666180503, "grad_norm": 567.2673950195312, "learning_rate": 9.134225283935395e-06, "loss": 19.0601, "step": 135560 }, { "epoch": 0.27385997729448885, "grad_norm": 236.04896545410156, "learning_rate": 9.134028948832915e-06, "loss": 19.0393, "step": 135570 }, { "epoch": 0.2738801779271727, "grad_norm": 197.89019775390625, "learning_rate": 9.133832593581683e-06, "loss": 17.6415, "step": 135580 }, { "epoch": 0.2739003785598565, "grad_norm": 616.6891479492188, "learning_rate": 9.133636218182652e-06, "loss": 45.4994, "step": 135590 }, { "epoch": 0.2739205791925403, "grad_norm": 350.0677490234375, "learning_rate": 9.133439822636779e-06, "loss": 28.2331, "step": 135600 }, { "epoch": 0.27394077982522413, "grad_norm": 313.6117248535156, "learning_rate": 9.13324340694502e-06, "loss": 18.7268, "step": 135610 }, { "epoch": 0.27396098045790795, "grad_norm": 572.9440307617188, "learning_rate": 9.133046971108335e-06, "loss": 10.22, "step": 135620 }, { "epoch": 0.2739811810905918, "grad_norm": 337.5039367675781, "learning_rate": 9.13285051512768e-06, "loss": 17.1278, "step": 135630 }, { "epoch": 0.2740013817232756, "grad_norm": 347.3039855957031, "learning_rate": 9.132654039004015e-06, "loss": 14.5573, "step": 135640 }, { "epoch": 0.2740215823559594, "grad_norm": 186.5449676513672, "learning_rate": 9.132457542738292e-06, "loss": 9.6061, "step": 135650 }, { "epoch": 0.2740417829886432, "grad_norm": 577.7155151367188, "learning_rate": 9.132261026331473e-06, "loss": 20.9212, "step": 135660 }, { "epoch": 0.274061983621327, "grad_norm": 642.8598022460938, "learning_rate": 9.132064489784516e-06, "loss": 19.4279, "step": 135670 }, { "epoch": 0.2740821842540108, "grad_norm": 127.74354553222656, "learning_rate": 9.131867933098379e-06, "loss": 25.3777, "step": 135680 }, { "epoch": 0.27410238488669464, "grad_norm": 364.76190185546875, "learning_rate": 9.131671356274016e-06, "loss": 24.1008, "step": 135690 }, { "epoch": 0.27412258551937846, "grad_norm": 451.497802734375, "learning_rate": 9.13147475931239e-06, "loss": 13.1718, "step": 135700 }, { "epoch": 0.2741427861520623, "grad_norm": 819.51318359375, "learning_rate": 9.131278142214457e-06, "loss": 19.6711, "step": 135710 }, { "epoch": 0.2741629867847461, "grad_norm": 478.2668151855469, "learning_rate": 9.131081504981175e-06, "loss": 15.8314, "step": 135720 }, { "epoch": 0.2741831874174299, "grad_norm": 291.201416015625, "learning_rate": 9.1308848476135e-06, "loss": 44.0003, "step": 135730 }, { "epoch": 0.27420338805011374, "grad_norm": 237.51837158203125, "learning_rate": 9.130688170112398e-06, "loss": 30.6632, "step": 135740 }, { "epoch": 0.27422358868279756, "grad_norm": 571.5438232421875, "learning_rate": 9.130491472478819e-06, "loss": 17.9365, "step": 135750 }, { "epoch": 0.2742437893154814, "grad_norm": 790.66259765625, "learning_rate": 9.130294754713728e-06, "loss": 20.6232, "step": 135760 }, { "epoch": 0.2742639899481652, "grad_norm": 28.99637222290039, "learning_rate": 9.13009801681808e-06, "loss": 30.6997, "step": 135770 }, { "epoch": 0.274284190580849, "grad_norm": 304.7511291503906, "learning_rate": 9.129901258792838e-06, "loss": 53.585, "step": 135780 }, { "epoch": 0.2743043912135328, "grad_norm": 62.51104736328125, "learning_rate": 9.129704480638955e-06, "loss": 16.246, "step": 135790 }, { "epoch": 0.2743245918462166, "grad_norm": 206.17721557617188, "learning_rate": 9.129507682357393e-06, "loss": 11.2183, "step": 135800 }, { "epoch": 0.2743447924789004, "grad_norm": 688.3556518554688, "learning_rate": 9.129310863949113e-06, "loss": 46.0043, "step": 135810 }, { "epoch": 0.27436499311158424, "grad_norm": 758.4866333007812, "learning_rate": 9.129114025415071e-06, "loss": 28.1612, "step": 135820 }, { "epoch": 0.27438519374426806, "grad_norm": 221.9129180908203, "learning_rate": 9.12891716675623e-06, "loss": 29.148, "step": 135830 }, { "epoch": 0.2744053943769519, "grad_norm": 1008.09033203125, "learning_rate": 9.128720287973547e-06, "loss": 23.7683, "step": 135840 }, { "epoch": 0.2744255950096357, "grad_norm": 1713.9359130859375, "learning_rate": 9.128523389067983e-06, "loss": 31.9497, "step": 135850 }, { "epoch": 0.2744457956423195, "grad_norm": 713.1209106445312, "learning_rate": 9.128326470040495e-06, "loss": 21.8295, "step": 135860 }, { "epoch": 0.27446599627500334, "grad_norm": 228.49380493164062, "learning_rate": 9.128129530892044e-06, "loss": 15.5112, "step": 135870 }, { "epoch": 0.27448619690768716, "grad_norm": 429.3661804199219, "learning_rate": 9.127932571623592e-06, "loss": 18.5801, "step": 135880 }, { "epoch": 0.274506397540371, "grad_norm": 302.1600341796875, "learning_rate": 9.127735592236097e-06, "loss": 14.2539, "step": 135890 }, { "epoch": 0.2745265981730548, "grad_norm": 334.8915100097656, "learning_rate": 9.12753859273052e-06, "loss": 24.5498, "step": 135900 }, { "epoch": 0.2745467988057386, "grad_norm": 91.45170593261719, "learning_rate": 9.127341573107819e-06, "loss": 21.5034, "step": 135910 }, { "epoch": 0.2745669994384224, "grad_norm": 446.6305236816406, "learning_rate": 9.127144533368956e-06, "loss": 23.2795, "step": 135920 }, { "epoch": 0.2745872000711062, "grad_norm": 672.91455078125, "learning_rate": 9.126947473514891e-06, "loss": 45.7162, "step": 135930 }, { "epoch": 0.27460740070379, "grad_norm": 316.0632629394531, "learning_rate": 9.126750393546587e-06, "loss": 28.0067, "step": 135940 }, { "epoch": 0.27462760133647385, "grad_norm": 331.94342041015625, "learning_rate": 9.126553293465e-06, "loss": 22.472, "step": 135950 }, { "epoch": 0.27464780196915767, "grad_norm": 296.5798645019531, "learning_rate": 9.126356173271092e-06, "loss": 28.1315, "step": 135960 }, { "epoch": 0.2746680026018415, "grad_norm": 1046.6729736328125, "learning_rate": 9.126159032965825e-06, "loss": 34.1038, "step": 135970 }, { "epoch": 0.2746882032345253, "grad_norm": 691.593994140625, "learning_rate": 9.125961872550159e-06, "loss": 32.512, "step": 135980 }, { "epoch": 0.2747084038672091, "grad_norm": 184.54931640625, "learning_rate": 9.125764692025057e-06, "loss": 17.5683, "step": 135990 }, { "epoch": 0.27472860449989295, "grad_norm": 220.64520263671875, "learning_rate": 9.125567491391476e-06, "loss": 16.7871, "step": 136000 }, { "epoch": 0.27474880513257677, "grad_norm": 221.02293395996094, "learning_rate": 9.125370270650381e-06, "loss": 11.8503, "step": 136010 }, { "epoch": 0.2747690057652606, "grad_norm": 388.2686462402344, "learning_rate": 9.125173029802732e-06, "loss": 23.263, "step": 136020 }, { "epoch": 0.2747892063979444, "grad_norm": 117.8719253540039, "learning_rate": 9.124975768849489e-06, "loss": 14.5228, "step": 136030 }, { "epoch": 0.27480940703062817, "grad_norm": 450.4751281738281, "learning_rate": 9.124778487791615e-06, "loss": 17.4647, "step": 136040 }, { "epoch": 0.274829607663312, "grad_norm": 318.2745361328125, "learning_rate": 9.12458118663007e-06, "loss": 16.2264, "step": 136050 }, { "epoch": 0.2748498082959958, "grad_norm": 423.7740173339844, "learning_rate": 9.124383865365817e-06, "loss": 17.5846, "step": 136060 }, { "epoch": 0.27487000892867963, "grad_norm": 872.8343505859375, "learning_rate": 9.124186523999818e-06, "loss": 40.837, "step": 136070 }, { "epoch": 0.27489020956136345, "grad_norm": 488.3759460449219, "learning_rate": 9.123989162533032e-06, "loss": 21.9602, "step": 136080 }, { "epoch": 0.27491041019404727, "grad_norm": 255.27935791015625, "learning_rate": 9.123791780966425e-06, "loss": 24.8364, "step": 136090 }, { "epoch": 0.2749306108267311, "grad_norm": 222.71697998046875, "learning_rate": 9.123594379300956e-06, "loss": 24.8809, "step": 136100 }, { "epoch": 0.2749508114594149, "grad_norm": 311.8800964355469, "learning_rate": 9.12339695753759e-06, "loss": 21.3448, "step": 136110 }, { "epoch": 0.27497101209209873, "grad_norm": 176.24217224121094, "learning_rate": 9.123199515677285e-06, "loss": 17.7472, "step": 136120 }, { "epoch": 0.27499121272478255, "grad_norm": 332.91845703125, "learning_rate": 9.123002053721006e-06, "loss": 17.2235, "step": 136130 }, { "epoch": 0.27501141335746637, "grad_norm": 164.54354858398438, "learning_rate": 9.122804571669717e-06, "loss": 24.1397, "step": 136140 }, { "epoch": 0.2750316139901502, "grad_norm": 601.98193359375, "learning_rate": 9.122607069524377e-06, "loss": 17.1016, "step": 136150 }, { "epoch": 0.275051814622834, "grad_norm": 716.9262084960938, "learning_rate": 9.122409547285948e-06, "loss": 17.5064, "step": 136160 }, { "epoch": 0.2750720152555178, "grad_norm": 343.188720703125, "learning_rate": 9.122212004955399e-06, "loss": 14.1656, "step": 136170 }, { "epoch": 0.2750922158882016, "grad_norm": 482.0539245605469, "learning_rate": 9.122014442533686e-06, "loss": 22.3766, "step": 136180 }, { "epoch": 0.2751124165208854, "grad_norm": 420.0506591796875, "learning_rate": 9.121816860021776e-06, "loss": 21.8825, "step": 136190 }, { "epoch": 0.27513261715356924, "grad_norm": 434.53692626953125, "learning_rate": 9.12161925742063e-06, "loss": 23.3705, "step": 136200 }, { "epoch": 0.27515281778625306, "grad_norm": 308.1763916015625, "learning_rate": 9.121421634731211e-06, "loss": 20.2883, "step": 136210 }, { "epoch": 0.2751730184189369, "grad_norm": 109.4878158569336, "learning_rate": 9.121223991954484e-06, "loss": 26.7603, "step": 136220 }, { "epoch": 0.2751932190516207, "grad_norm": 296.6226806640625, "learning_rate": 9.12102632909141e-06, "loss": 14.9261, "step": 136230 }, { "epoch": 0.2752134196843045, "grad_norm": 22.82421875, "learning_rate": 9.120828646142954e-06, "loss": 12.6671, "step": 136240 }, { "epoch": 0.27523362031698834, "grad_norm": 100.18248748779297, "learning_rate": 9.120630943110078e-06, "loss": 23.0864, "step": 136250 }, { "epoch": 0.27525382094967216, "grad_norm": 198.57740783691406, "learning_rate": 9.120433219993749e-06, "loss": 22.5658, "step": 136260 }, { "epoch": 0.275274021582356, "grad_norm": 517.2141723632812, "learning_rate": 9.120235476794926e-06, "loss": 18.6785, "step": 136270 }, { "epoch": 0.2752942222150398, "grad_norm": 132.4513702392578, "learning_rate": 9.120037713514575e-06, "loss": 22.0669, "step": 136280 }, { "epoch": 0.2753144228477236, "grad_norm": 476.9325256347656, "learning_rate": 9.119839930153663e-06, "loss": 26.4279, "step": 136290 }, { "epoch": 0.2753346234804074, "grad_norm": 94.77568817138672, "learning_rate": 9.119642126713147e-06, "loss": 36.7561, "step": 136300 }, { "epoch": 0.2753548241130912, "grad_norm": 828.9990234375, "learning_rate": 9.119444303193997e-06, "loss": 17.9698, "step": 136310 }, { "epoch": 0.275375024745775, "grad_norm": 971.7246704101562, "learning_rate": 9.119246459597173e-06, "loss": 28.4067, "step": 136320 }, { "epoch": 0.27539522537845884, "grad_norm": 543.5722045898438, "learning_rate": 9.119048595923643e-06, "loss": 19.151, "step": 136330 }, { "epoch": 0.27541542601114266, "grad_norm": 295.9839172363281, "learning_rate": 9.11885071217437e-06, "loss": 34.4547, "step": 136340 }, { "epoch": 0.2754356266438265, "grad_norm": 841.3892211914062, "learning_rate": 9.11865280835032e-06, "loss": 24.7327, "step": 136350 }, { "epoch": 0.2754558272765103, "grad_norm": 487.9488220214844, "learning_rate": 9.118454884452452e-06, "loss": 35.7545, "step": 136360 }, { "epoch": 0.2754760279091941, "grad_norm": 439.5728454589844, "learning_rate": 9.118256940481735e-06, "loss": 14.4582, "step": 136370 }, { "epoch": 0.27549622854187794, "grad_norm": 16.771465301513672, "learning_rate": 9.118058976439134e-06, "loss": 16.6588, "step": 136380 }, { "epoch": 0.27551642917456176, "grad_norm": 1496.6934814453125, "learning_rate": 9.117860992325613e-06, "loss": 37.2434, "step": 136390 }, { "epoch": 0.2755366298072456, "grad_norm": 310.22509765625, "learning_rate": 9.117662988142138e-06, "loss": 22.0505, "step": 136400 }, { "epoch": 0.2755568304399294, "grad_norm": 385.6380615234375, "learning_rate": 9.117464963889672e-06, "loss": 25.1847, "step": 136410 }, { "epoch": 0.2755770310726132, "grad_norm": 446.22418212890625, "learning_rate": 9.117266919569181e-06, "loss": 24.8115, "step": 136420 }, { "epoch": 0.275597231705297, "grad_norm": 458.6966857910156, "learning_rate": 9.11706885518163e-06, "loss": 18.2814, "step": 136430 }, { "epoch": 0.2756174323379808, "grad_norm": 117.55816650390625, "learning_rate": 9.116870770727986e-06, "loss": 17.5085, "step": 136440 }, { "epoch": 0.2756376329706646, "grad_norm": 279.3009338378906, "learning_rate": 9.116672666209211e-06, "loss": 19.4594, "step": 136450 }, { "epoch": 0.27565783360334845, "grad_norm": 293.4078674316406, "learning_rate": 9.116474541626277e-06, "loss": 32.4162, "step": 136460 }, { "epoch": 0.27567803423603227, "grad_norm": 301.1860656738281, "learning_rate": 9.116276396980141e-06, "loss": 14.9995, "step": 136470 }, { "epoch": 0.2756982348687161, "grad_norm": 8.642691612243652, "learning_rate": 9.116078232271774e-06, "loss": 24.8244, "step": 136480 }, { "epoch": 0.2757184355013999, "grad_norm": 378.1164245605469, "learning_rate": 9.115880047502142e-06, "loss": 11.2514, "step": 136490 }, { "epoch": 0.2757386361340837, "grad_norm": 299.1815185546875, "learning_rate": 9.115681842672211e-06, "loss": 18.7732, "step": 136500 }, { "epoch": 0.27575883676676755, "grad_norm": 486.540283203125, "learning_rate": 9.115483617782943e-06, "loss": 32.1482, "step": 136510 }, { "epoch": 0.27577903739945137, "grad_norm": 346.2464904785156, "learning_rate": 9.115285372835309e-06, "loss": 27.7063, "step": 136520 }, { "epoch": 0.2757992380321352, "grad_norm": 408.13946533203125, "learning_rate": 9.115087107830272e-06, "loss": 35.1097, "step": 136530 }, { "epoch": 0.275819438664819, "grad_norm": 242.6774444580078, "learning_rate": 9.114888822768801e-06, "loss": 13.6486, "step": 136540 }, { "epoch": 0.2758396392975028, "grad_norm": 289.8144226074219, "learning_rate": 9.11469051765186e-06, "loss": 19.7283, "step": 136550 }, { "epoch": 0.2758598399301866, "grad_norm": 250.8406219482422, "learning_rate": 9.114492192480416e-06, "loss": 17.4547, "step": 136560 }, { "epoch": 0.2758800405628704, "grad_norm": 597.64599609375, "learning_rate": 9.114293847255437e-06, "loss": 23.1336, "step": 136570 }, { "epoch": 0.27590024119555423, "grad_norm": 477.0276184082031, "learning_rate": 9.114095481977887e-06, "loss": 30.7469, "step": 136580 }, { "epoch": 0.27592044182823805, "grad_norm": 411.8534240722656, "learning_rate": 9.113897096648735e-06, "loss": 17.9106, "step": 136590 }, { "epoch": 0.27594064246092187, "grad_norm": 419.3416442871094, "learning_rate": 9.11369869126895e-06, "loss": 23.661, "step": 136600 }, { "epoch": 0.2759608430936057, "grad_norm": 491.56884765625, "learning_rate": 9.113500265839495e-06, "loss": 28.9299, "step": 136610 }, { "epoch": 0.2759810437262895, "grad_norm": 1747.8265380859375, "learning_rate": 9.113301820361339e-06, "loss": 41.6999, "step": 136620 }, { "epoch": 0.27600124435897333, "grad_norm": 372.26727294921875, "learning_rate": 9.113103354835448e-06, "loss": 30.6698, "step": 136630 }, { "epoch": 0.27602144499165715, "grad_norm": 421.3455810546875, "learning_rate": 9.112904869262791e-06, "loss": 28.1034, "step": 136640 }, { "epoch": 0.27604164562434097, "grad_norm": 437.5903625488281, "learning_rate": 9.112706363644334e-06, "loss": 42.669, "step": 136650 }, { "epoch": 0.2760618462570248, "grad_norm": 630.2725830078125, "learning_rate": 9.112507837981045e-06, "loss": 28.7041, "step": 136660 }, { "epoch": 0.2760820468897086, "grad_norm": 268.4493408203125, "learning_rate": 9.112309292273891e-06, "loss": 31.938, "step": 136670 }, { "epoch": 0.2761022475223924, "grad_norm": 440.1805114746094, "learning_rate": 9.112110726523841e-06, "loss": 28.1518, "step": 136680 }, { "epoch": 0.2761224481550762, "grad_norm": 338.7571716308594, "learning_rate": 9.111912140731862e-06, "loss": 26.5386, "step": 136690 }, { "epoch": 0.27614264878776, "grad_norm": 194.66127014160156, "learning_rate": 9.111713534898923e-06, "loss": 12.6745, "step": 136700 }, { "epoch": 0.27616284942044383, "grad_norm": 289.3456726074219, "learning_rate": 9.111514909025988e-06, "loss": 29.55, "step": 136710 }, { "epoch": 0.27618305005312765, "grad_norm": 634.5272216796875, "learning_rate": 9.11131626311403e-06, "loss": 16.9974, "step": 136720 }, { "epoch": 0.2762032506858115, "grad_norm": 572.50439453125, "learning_rate": 9.111117597164016e-06, "loss": 25.0887, "step": 136730 }, { "epoch": 0.2762234513184953, "grad_norm": 303.0616760253906, "learning_rate": 9.110918911176911e-06, "loss": 12.9918, "step": 136740 }, { "epoch": 0.2762436519511791, "grad_norm": 376.22662353515625, "learning_rate": 9.110720205153688e-06, "loss": 17.0476, "step": 136750 }, { "epoch": 0.27626385258386293, "grad_norm": 270.92962646484375, "learning_rate": 9.110521479095314e-06, "loss": 27.9115, "step": 136760 }, { "epoch": 0.27628405321654675, "grad_norm": 160.26510620117188, "learning_rate": 9.110322733002755e-06, "loss": 30.4222, "step": 136770 }, { "epoch": 0.2763042538492306, "grad_norm": 377.6893005371094, "learning_rate": 9.11012396687698e-06, "loss": 22.0914, "step": 136780 }, { "epoch": 0.2763244544819144, "grad_norm": 424.3580017089844, "learning_rate": 9.10992518071896e-06, "loss": 38.3696, "step": 136790 }, { "epoch": 0.2763446551145982, "grad_norm": 164.9799041748047, "learning_rate": 9.109726374529666e-06, "loss": 15.7721, "step": 136800 }, { "epoch": 0.276364855747282, "grad_norm": 457.60418701171875, "learning_rate": 9.109527548310062e-06, "loss": 11.6539, "step": 136810 }, { "epoch": 0.2763850563799658, "grad_norm": 350.32342529296875, "learning_rate": 9.10932870206112e-06, "loss": 25.4811, "step": 136820 }, { "epoch": 0.2764052570126496, "grad_norm": 198.0208740234375, "learning_rate": 9.109129835783808e-06, "loss": 15.2665, "step": 136830 }, { "epoch": 0.27642545764533344, "grad_norm": 669.3509521484375, "learning_rate": 9.108930949479096e-06, "loss": 18.8688, "step": 136840 }, { "epoch": 0.27644565827801726, "grad_norm": 413.8048400878906, "learning_rate": 9.108732043147952e-06, "loss": 13.1434, "step": 136850 }, { "epoch": 0.2764658589107011, "grad_norm": 299.4035949707031, "learning_rate": 9.108533116791347e-06, "loss": 10.0517, "step": 136860 }, { "epoch": 0.2764860595433849, "grad_norm": 273.9521179199219, "learning_rate": 9.108334170410249e-06, "loss": 18.742, "step": 136870 }, { "epoch": 0.2765062601760687, "grad_norm": 104.94783020019531, "learning_rate": 9.10813520400563e-06, "loss": 22.7361, "step": 136880 }, { "epoch": 0.27652646080875254, "grad_norm": 210.21517944335938, "learning_rate": 9.107936217578457e-06, "loss": 18.2072, "step": 136890 }, { "epoch": 0.27654666144143636, "grad_norm": 998.265380859375, "learning_rate": 9.107737211129702e-06, "loss": 39.4067, "step": 136900 }, { "epoch": 0.2765668620741202, "grad_norm": 459.36737060546875, "learning_rate": 9.107538184660333e-06, "loss": 14.1219, "step": 136910 }, { "epoch": 0.276587062706804, "grad_norm": 110.8598861694336, "learning_rate": 9.107339138171321e-06, "loss": 16.6522, "step": 136920 }, { "epoch": 0.2766072633394878, "grad_norm": 385.3807373046875, "learning_rate": 9.107140071663637e-06, "loss": 20.694, "step": 136930 }, { "epoch": 0.2766274639721716, "grad_norm": 422.10626220703125, "learning_rate": 9.106940985138251e-06, "loss": 18.0311, "step": 136940 }, { "epoch": 0.2766476646048554, "grad_norm": 811.509033203125, "learning_rate": 9.106741878596132e-06, "loss": 29.9824, "step": 136950 }, { "epoch": 0.2766678652375392, "grad_norm": 368.9154968261719, "learning_rate": 9.10654275203825e-06, "loss": 12.0087, "step": 136960 }, { "epoch": 0.27668806587022304, "grad_norm": 143.35433959960938, "learning_rate": 9.106343605465578e-06, "loss": 25.1284, "step": 136970 }, { "epoch": 0.27670826650290686, "grad_norm": 767.0995483398438, "learning_rate": 9.106144438879086e-06, "loss": 34.238, "step": 136980 }, { "epoch": 0.2767284671355907, "grad_norm": 163.25469970703125, "learning_rate": 9.105945252279743e-06, "loss": 24.4687, "step": 136990 }, { "epoch": 0.2767486677682745, "grad_norm": 538.8115234375, "learning_rate": 9.10574604566852e-06, "loss": 27.7695, "step": 137000 }, { "epoch": 0.2767688684009583, "grad_norm": 482.6521301269531, "learning_rate": 9.105546819046391e-06, "loss": 14.6953, "step": 137010 }, { "epoch": 0.27678906903364214, "grad_norm": 463.677734375, "learning_rate": 9.105347572414323e-06, "loss": 15.1554, "step": 137020 }, { "epoch": 0.27680926966632596, "grad_norm": 182.11375427246094, "learning_rate": 9.10514830577329e-06, "loss": 17.1029, "step": 137030 }, { "epoch": 0.2768294702990098, "grad_norm": 751.0150756835938, "learning_rate": 9.104949019124261e-06, "loss": 30.9878, "step": 137040 }, { "epoch": 0.2768496709316936, "grad_norm": 570.7824096679688, "learning_rate": 9.104749712468208e-06, "loss": 24.0046, "step": 137050 }, { "epoch": 0.2768698715643774, "grad_norm": 13508.3525390625, "learning_rate": 9.104550385806103e-06, "loss": 39.5268, "step": 137060 }, { "epoch": 0.2768900721970612, "grad_norm": 792.635009765625, "learning_rate": 9.104351039138919e-06, "loss": 27.5066, "step": 137070 }, { "epoch": 0.276910272829745, "grad_norm": 724.1927490234375, "learning_rate": 9.104151672467625e-06, "loss": 27.1776, "step": 137080 }, { "epoch": 0.27693047346242883, "grad_norm": 339.5395812988281, "learning_rate": 9.103952285793192e-06, "loss": 26.116, "step": 137090 }, { "epoch": 0.27695067409511265, "grad_norm": 293.49639892578125, "learning_rate": 9.103752879116595e-06, "loss": 27.5327, "step": 137100 }, { "epoch": 0.27697087472779647, "grad_norm": 701.0228881835938, "learning_rate": 9.103553452438803e-06, "loss": 21.5859, "step": 137110 }, { "epoch": 0.2769910753604803, "grad_norm": 801.4207763671875, "learning_rate": 9.10335400576079e-06, "loss": 15.4929, "step": 137120 }, { "epoch": 0.2770112759931641, "grad_norm": 552.5881958007812, "learning_rate": 9.103154539083527e-06, "loss": 29.8185, "step": 137130 }, { "epoch": 0.27703147662584793, "grad_norm": 1147.41943359375, "learning_rate": 9.102955052407986e-06, "loss": 24.181, "step": 137140 }, { "epoch": 0.27705167725853175, "grad_norm": 297.5654296875, "learning_rate": 9.102755545735141e-06, "loss": 21.4677, "step": 137150 }, { "epoch": 0.27707187789121557, "grad_norm": 833.3944702148438, "learning_rate": 9.102556019065962e-06, "loss": 27.3052, "step": 137160 }, { "epoch": 0.2770920785238994, "grad_norm": 398.9085388183594, "learning_rate": 9.102356472401424e-06, "loss": 10.619, "step": 137170 }, { "epoch": 0.2771122791565832, "grad_norm": 830.4307861328125, "learning_rate": 9.102156905742497e-06, "loss": 29.5127, "step": 137180 }, { "epoch": 0.27713247978926697, "grad_norm": 403.244873046875, "learning_rate": 9.101957319090153e-06, "loss": 26.0385, "step": 137190 }, { "epoch": 0.2771526804219508, "grad_norm": 763.9085083007812, "learning_rate": 9.101757712445369e-06, "loss": 28.5399, "step": 137200 }, { "epoch": 0.2771728810546346, "grad_norm": 391.4627380371094, "learning_rate": 9.101558085809114e-06, "loss": 21.6998, "step": 137210 }, { "epoch": 0.27719308168731843, "grad_norm": 383.41033935546875, "learning_rate": 9.101358439182364e-06, "loss": 11.3443, "step": 137220 }, { "epoch": 0.27721328232000225, "grad_norm": 249.82388305664062, "learning_rate": 9.101158772566088e-06, "loss": 14.4067, "step": 137230 }, { "epoch": 0.27723348295268607, "grad_norm": 342.18780517578125, "learning_rate": 9.100959085961263e-06, "loss": 38.9308, "step": 137240 }, { "epoch": 0.2772536835853699, "grad_norm": 528.7169189453125, "learning_rate": 9.100759379368863e-06, "loss": 25.9011, "step": 137250 }, { "epoch": 0.2772738842180537, "grad_norm": 38.9533805847168, "learning_rate": 9.100559652789856e-06, "loss": 19.0543, "step": 137260 }, { "epoch": 0.27729408485073753, "grad_norm": 216.45538330078125, "learning_rate": 9.100359906225219e-06, "loss": 24.176, "step": 137270 }, { "epoch": 0.27731428548342135, "grad_norm": 231.65589904785156, "learning_rate": 9.100160139675925e-06, "loss": 15.8339, "step": 137280 }, { "epoch": 0.27733448611610517, "grad_norm": 677.2991333007812, "learning_rate": 9.099960353142948e-06, "loss": 14.7094, "step": 137290 }, { "epoch": 0.277354686748789, "grad_norm": 459.41119384765625, "learning_rate": 9.099760546627262e-06, "loss": 27.8189, "step": 137300 }, { "epoch": 0.2773748873814728, "grad_norm": 320.6719970703125, "learning_rate": 9.099560720129842e-06, "loss": 19.2016, "step": 137310 }, { "epoch": 0.2773950880141566, "grad_norm": 900.2733154296875, "learning_rate": 9.099360873651658e-06, "loss": 24.9501, "step": 137320 }, { "epoch": 0.2774152886468404, "grad_norm": 383.8463134765625, "learning_rate": 9.099161007193686e-06, "loss": 16.2387, "step": 137330 }, { "epoch": 0.2774354892795242, "grad_norm": 370.25323486328125, "learning_rate": 9.098961120756902e-06, "loss": 19.4532, "step": 137340 }, { "epoch": 0.27745568991220804, "grad_norm": 474.8215637207031, "learning_rate": 9.098761214342277e-06, "loss": 18.6614, "step": 137350 }, { "epoch": 0.27747589054489186, "grad_norm": 214.69796752929688, "learning_rate": 9.098561287950788e-06, "loss": 22.1373, "step": 137360 }, { "epoch": 0.2774960911775757, "grad_norm": 696.3637084960938, "learning_rate": 9.098361341583408e-06, "loss": 30.7538, "step": 137370 }, { "epoch": 0.2775162918102595, "grad_norm": 328.6319885253906, "learning_rate": 9.098161375241112e-06, "loss": 26.2747, "step": 137380 }, { "epoch": 0.2775364924429433, "grad_norm": 0.0, "learning_rate": 9.097961388924875e-06, "loss": 20.9066, "step": 137390 }, { "epoch": 0.27755669307562714, "grad_norm": 348.576904296875, "learning_rate": 9.09776138263567e-06, "loss": 30.0631, "step": 137400 }, { "epoch": 0.27757689370831096, "grad_norm": 211.87579345703125, "learning_rate": 9.097561356374473e-06, "loss": 13.9718, "step": 137410 }, { "epoch": 0.2775970943409948, "grad_norm": 936.2708740234375, "learning_rate": 9.097361310142261e-06, "loss": 32.1798, "step": 137420 }, { "epoch": 0.2776172949736786, "grad_norm": 265.9165344238281, "learning_rate": 9.097161243940005e-06, "loss": 16.4136, "step": 137430 }, { "epoch": 0.2776374956063624, "grad_norm": 676.0292358398438, "learning_rate": 9.096961157768681e-06, "loss": 26.7054, "step": 137440 }, { "epoch": 0.2776576962390462, "grad_norm": 174.90513610839844, "learning_rate": 9.096761051629268e-06, "loss": 19.1602, "step": 137450 }, { "epoch": 0.27767789687173, "grad_norm": 109.43218994140625, "learning_rate": 9.096560925522738e-06, "loss": 21.11, "step": 137460 }, { "epoch": 0.2776980975044138, "grad_norm": 197.34710693359375, "learning_rate": 9.096360779450066e-06, "loss": 33.6084, "step": 137470 }, { "epoch": 0.27771829813709764, "grad_norm": 387.3228759765625, "learning_rate": 9.096160613412228e-06, "loss": 16.9318, "step": 137480 }, { "epoch": 0.27773849876978146, "grad_norm": 310.5277099609375, "learning_rate": 9.095960427410202e-06, "loss": 13.8329, "step": 137490 }, { "epoch": 0.2777586994024653, "grad_norm": 1074.7540283203125, "learning_rate": 9.09576022144496e-06, "loss": 28.8925, "step": 137500 }, { "epoch": 0.2777789000351491, "grad_norm": 7.782865047454834, "learning_rate": 9.09555999551748e-06, "loss": 15.7964, "step": 137510 }, { "epoch": 0.2777991006678329, "grad_norm": 603.712646484375, "learning_rate": 9.095359749628736e-06, "loss": 19.4232, "step": 137520 }, { "epoch": 0.27781930130051674, "grad_norm": 296.1772155761719, "learning_rate": 9.095159483779707e-06, "loss": 32.3323, "step": 137530 }, { "epoch": 0.27783950193320056, "grad_norm": 277.2585144042969, "learning_rate": 9.094959197971367e-06, "loss": 27.0107, "step": 137540 }, { "epoch": 0.2778597025658844, "grad_norm": 67.9524154663086, "learning_rate": 9.09475889220469e-06, "loss": 17.7709, "step": 137550 }, { "epoch": 0.2778799031985682, "grad_norm": 344.07452392578125, "learning_rate": 9.094558566480659e-06, "loss": 16.1814, "step": 137560 }, { "epoch": 0.277900103831252, "grad_norm": 481.0843505859375, "learning_rate": 9.094358220800243e-06, "loss": 24.1806, "step": 137570 }, { "epoch": 0.2779203044639358, "grad_norm": 259.8028259277344, "learning_rate": 9.094157855164424e-06, "loss": 28.0218, "step": 137580 }, { "epoch": 0.2779405050966196, "grad_norm": 381.5344543457031, "learning_rate": 9.093957469574175e-06, "loss": 18.8146, "step": 137590 }, { "epoch": 0.2779607057293034, "grad_norm": 344.8265380859375, "learning_rate": 9.093757064030473e-06, "loss": 28.887, "step": 137600 }, { "epoch": 0.27798090636198725, "grad_norm": 1606.595458984375, "learning_rate": 9.093556638534298e-06, "loss": 23.4497, "step": 137610 }, { "epoch": 0.27800110699467107, "grad_norm": 228.54640197753906, "learning_rate": 9.093356193086622e-06, "loss": 14.7104, "step": 137620 }, { "epoch": 0.2780213076273549, "grad_norm": 235.8044891357422, "learning_rate": 9.093155727688426e-06, "loss": 28.2329, "step": 137630 }, { "epoch": 0.2780415082600387, "grad_norm": 619.3360595703125, "learning_rate": 9.092955242340684e-06, "loss": 29.1266, "step": 137640 }, { "epoch": 0.2780617088927225, "grad_norm": 522.7439575195312, "learning_rate": 9.092754737044375e-06, "loss": 13.6259, "step": 137650 }, { "epoch": 0.27808190952540635, "grad_norm": 677.4970703125, "learning_rate": 9.092554211800476e-06, "loss": 32.3102, "step": 137660 }, { "epoch": 0.27810211015809017, "grad_norm": 244.76707458496094, "learning_rate": 9.092353666609963e-06, "loss": 22.9451, "step": 137670 }, { "epoch": 0.278122310790774, "grad_norm": 917.2261962890625, "learning_rate": 9.092153101473818e-06, "loss": 19.912, "step": 137680 }, { "epoch": 0.2781425114234578, "grad_norm": 399.02978515625, "learning_rate": 9.091952516393012e-06, "loss": 20.9459, "step": 137690 }, { "epoch": 0.2781627120561416, "grad_norm": 675.6434326171875, "learning_rate": 9.091751911368524e-06, "loss": 28.4803, "step": 137700 }, { "epoch": 0.2781829126888254, "grad_norm": 127.59564971923828, "learning_rate": 9.091551286401337e-06, "loss": 24.397, "step": 137710 }, { "epoch": 0.2782031133215092, "grad_norm": 432.05291748046875, "learning_rate": 9.091350641492424e-06, "loss": 31.1793, "step": 137720 }, { "epoch": 0.27822331395419303, "grad_norm": 359.1927795410156, "learning_rate": 9.091149976642765e-06, "loss": 19.6272, "step": 137730 }, { "epoch": 0.27824351458687685, "grad_norm": 586.7080688476562, "learning_rate": 9.090949291853337e-06, "loss": 19.5357, "step": 137740 }, { "epoch": 0.27826371521956067, "grad_norm": 376.0263977050781, "learning_rate": 9.090748587125118e-06, "loss": 15.923, "step": 137750 }, { "epoch": 0.2782839158522445, "grad_norm": 484.933837890625, "learning_rate": 9.090547862459087e-06, "loss": 27.1255, "step": 137760 }, { "epoch": 0.2783041164849283, "grad_norm": 451.0625, "learning_rate": 9.09034711785622e-06, "loss": 19.3612, "step": 137770 }, { "epoch": 0.27832431711761213, "grad_norm": 354.6343078613281, "learning_rate": 9.090146353317499e-06, "loss": 17.4342, "step": 137780 }, { "epoch": 0.27834451775029595, "grad_norm": 375.70721435546875, "learning_rate": 9.0899455688439e-06, "loss": 16.687, "step": 137790 }, { "epoch": 0.27836471838297977, "grad_norm": 717.6085205078125, "learning_rate": 9.089744764436404e-06, "loss": 21.4564, "step": 137800 }, { "epoch": 0.2783849190156636, "grad_norm": 658.6801147460938, "learning_rate": 9.089543940095985e-06, "loss": 24.0651, "step": 137810 }, { "epoch": 0.2784051196483474, "grad_norm": 304.2969665527344, "learning_rate": 9.089343095823628e-06, "loss": 23.4212, "step": 137820 }, { "epoch": 0.2784253202810312, "grad_norm": 1553.240234375, "learning_rate": 9.089142231620306e-06, "loss": 32.3794, "step": 137830 }, { "epoch": 0.278445520913715, "grad_norm": 465.6623840332031, "learning_rate": 9.088941347487004e-06, "loss": 19.0398, "step": 137840 }, { "epoch": 0.2784657215463988, "grad_norm": 793.8300170898438, "learning_rate": 9.088740443424695e-06, "loss": 30.4577, "step": 137850 }, { "epoch": 0.27848592217908263, "grad_norm": 864.763671875, "learning_rate": 9.088539519434362e-06, "loss": 25.3503, "step": 137860 }, { "epoch": 0.27850612281176645, "grad_norm": 302.22894287109375, "learning_rate": 9.088338575516983e-06, "loss": 12.4432, "step": 137870 }, { "epoch": 0.2785263234444503, "grad_norm": 372.04144287109375, "learning_rate": 9.088137611673538e-06, "loss": 34.8318, "step": 137880 }, { "epoch": 0.2785465240771341, "grad_norm": 272.3525390625, "learning_rate": 9.087936627905005e-06, "loss": 17.5451, "step": 137890 }, { "epoch": 0.2785667247098179, "grad_norm": 1021.4248046875, "learning_rate": 9.087735624212365e-06, "loss": 19.9586, "step": 137900 }, { "epoch": 0.27858692534250173, "grad_norm": 513.5504150390625, "learning_rate": 9.087534600596599e-06, "loss": 22.0206, "step": 137910 }, { "epoch": 0.27860712597518555, "grad_norm": 225.71331787109375, "learning_rate": 9.087333557058684e-06, "loss": 18.2251, "step": 137920 }, { "epoch": 0.2786273266078694, "grad_norm": 397.6352233886719, "learning_rate": 9.087132493599601e-06, "loss": 14.128, "step": 137930 }, { "epoch": 0.2786475272405532, "grad_norm": 425.21856689453125, "learning_rate": 9.08693141022033e-06, "loss": 18.2089, "step": 137940 }, { "epoch": 0.278667727873237, "grad_norm": 590.8599853515625, "learning_rate": 9.08673030692185e-06, "loss": 20.5031, "step": 137950 }, { "epoch": 0.2786879285059208, "grad_norm": 184.47247314453125, "learning_rate": 9.086529183705144e-06, "loss": 14.7618, "step": 137960 }, { "epoch": 0.2787081291386046, "grad_norm": 136.02565002441406, "learning_rate": 9.086328040571189e-06, "loss": 18.2466, "step": 137970 }, { "epoch": 0.2787283297712884, "grad_norm": 681.7166748046875, "learning_rate": 9.086126877520967e-06, "loss": 15.8043, "step": 137980 }, { "epoch": 0.27874853040397224, "grad_norm": 695.03955078125, "learning_rate": 9.085925694555457e-06, "loss": 18.401, "step": 137990 }, { "epoch": 0.27876873103665606, "grad_norm": 77.13236236572266, "learning_rate": 9.085724491675642e-06, "loss": 23.4363, "step": 138000 }, { "epoch": 0.2787889316693399, "grad_norm": 1009.24853515625, "learning_rate": 9.085523268882504e-06, "loss": 22.7571, "step": 138010 }, { "epoch": 0.2788091323020237, "grad_norm": 735.2975463867188, "learning_rate": 9.085322026177017e-06, "loss": 33.2772, "step": 138020 }, { "epoch": 0.2788293329347075, "grad_norm": 371.4656982421875, "learning_rate": 9.085120763560168e-06, "loss": 12.3176, "step": 138030 }, { "epoch": 0.27884953356739134, "grad_norm": 639.376953125, "learning_rate": 9.084919481032935e-06, "loss": 32.2983, "step": 138040 }, { "epoch": 0.27886973420007516, "grad_norm": 554.71484375, "learning_rate": 9.084718178596301e-06, "loss": 40.4096, "step": 138050 }, { "epoch": 0.278889934832759, "grad_norm": 355.3971252441406, "learning_rate": 9.084516856251244e-06, "loss": 28.8127, "step": 138060 }, { "epoch": 0.2789101354654428, "grad_norm": 373.5417785644531, "learning_rate": 9.084315513998749e-06, "loss": 22.5024, "step": 138070 }, { "epoch": 0.2789303360981266, "grad_norm": 237.6198272705078, "learning_rate": 9.084114151839795e-06, "loss": 37.6147, "step": 138080 }, { "epoch": 0.2789505367308104, "grad_norm": 759.7806396484375, "learning_rate": 9.083912769775364e-06, "loss": 18.7683, "step": 138090 }, { "epoch": 0.2789707373634942, "grad_norm": 548.9072265625, "learning_rate": 9.083711367806438e-06, "loss": 17.0632, "step": 138100 }, { "epoch": 0.278990937996178, "grad_norm": 146.3411865234375, "learning_rate": 9.083509945933996e-06, "loss": 17.3576, "step": 138110 }, { "epoch": 0.27901113862886184, "grad_norm": 173.25881958007812, "learning_rate": 9.083308504159025e-06, "loss": 18.1152, "step": 138120 }, { "epoch": 0.27903133926154566, "grad_norm": 540.9036865234375, "learning_rate": 9.083107042482502e-06, "loss": 11.4908, "step": 138130 }, { "epoch": 0.2790515398942295, "grad_norm": 356.0698547363281, "learning_rate": 9.08290556090541e-06, "loss": 15.1667, "step": 138140 }, { "epoch": 0.2790717405269133, "grad_norm": 373.5164489746094, "learning_rate": 9.082704059428732e-06, "loss": 21.7914, "step": 138150 }, { "epoch": 0.2790919411595971, "grad_norm": 646.2813720703125, "learning_rate": 9.08250253805345e-06, "loss": 41.9151, "step": 138160 }, { "epoch": 0.27911214179228094, "grad_norm": 8.045919418334961, "learning_rate": 9.082300996780543e-06, "loss": 15.8566, "step": 138170 }, { "epoch": 0.27913234242496476, "grad_norm": 744.6547241210938, "learning_rate": 9.082099435611001e-06, "loss": 21.6561, "step": 138180 }, { "epoch": 0.2791525430576486, "grad_norm": 943.8947143554688, "learning_rate": 9.081897854545798e-06, "loss": 19.71, "step": 138190 }, { "epoch": 0.2791727436903324, "grad_norm": 116.42237854003906, "learning_rate": 9.08169625358592e-06, "loss": 23.7724, "step": 138200 }, { "epoch": 0.2791929443230162, "grad_norm": 559.2406005859375, "learning_rate": 9.08149463273235e-06, "loss": 18.6301, "step": 138210 }, { "epoch": 0.2792131449557, "grad_norm": 552.3639526367188, "learning_rate": 9.081292991986072e-06, "loss": 42.6683, "step": 138220 }, { "epoch": 0.2792333455883838, "grad_norm": 318.72149658203125, "learning_rate": 9.081091331348065e-06, "loss": 15.5614, "step": 138230 }, { "epoch": 0.27925354622106763, "grad_norm": 508.3001403808594, "learning_rate": 9.080889650819313e-06, "loss": 21.2383, "step": 138240 }, { "epoch": 0.27927374685375145, "grad_norm": 664.1058349609375, "learning_rate": 9.080687950400801e-06, "loss": 22.6868, "step": 138250 }, { "epoch": 0.27929394748643527, "grad_norm": 145.73680114746094, "learning_rate": 9.08048623009351e-06, "loss": 17.7912, "step": 138260 }, { "epoch": 0.2793141481191191, "grad_norm": 633.4774169921875, "learning_rate": 9.080284489898428e-06, "loss": 16.7837, "step": 138270 }, { "epoch": 0.2793343487518029, "grad_norm": 212.75233459472656, "learning_rate": 9.08008272981653e-06, "loss": 26.669, "step": 138280 }, { "epoch": 0.27935454938448673, "grad_norm": 201.02984619140625, "learning_rate": 9.079880949848804e-06, "loss": 26.8011, "step": 138290 }, { "epoch": 0.27937475001717055, "grad_norm": 254.2630615234375, "learning_rate": 9.079679149996235e-06, "loss": 26.1649, "step": 138300 }, { "epoch": 0.27939495064985437, "grad_norm": 217.7797088623047, "learning_rate": 9.079477330259803e-06, "loss": 12.8471, "step": 138310 }, { "epoch": 0.2794151512825382, "grad_norm": 1549.8648681640625, "learning_rate": 9.079275490640494e-06, "loss": 29.4622, "step": 138320 }, { "epoch": 0.279435351915222, "grad_norm": 4345.27587890625, "learning_rate": 9.079073631139291e-06, "loss": 31.5164, "step": 138330 }, { "epoch": 0.27945555254790583, "grad_norm": 664.3687133789062, "learning_rate": 9.078871751757176e-06, "loss": 25.7238, "step": 138340 }, { "epoch": 0.2794757531805896, "grad_norm": 471.45782470703125, "learning_rate": 9.078669852495138e-06, "loss": 29.2242, "step": 138350 }, { "epoch": 0.2794959538132734, "grad_norm": 520.059814453125, "learning_rate": 9.078467933354156e-06, "loss": 34.4871, "step": 138360 }, { "epoch": 0.27951615444595723, "grad_norm": 167.5902557373047, "learning_rate": 9.078265994335216e-06, "loss": 23.6625, "step": 138370 }, { "epoch": 0.27953635507864105, "grad_norm": 268.4201965332031, "learning_rate": 9.078064035439303e-06, "loss": 21.1321, "step": 138380 }, { "epoch": 0.2795565557113249, "grad_norm": 643.2801513671875, "learning_rate": 9.0778620566674e-06, "loss": 24.712, "step": 138390 }, { "epoch": 0.2795767563440087, "grad_norm": 452.7569274902344, "learning_rate": 9.077660058020492e-06, "loss": 43.9378, "step": 138400 }, { "epoch": 0.2795969569766925, "grad_norm": 463.3872985839844, "learning_rate": 9.077458039499563e-06, "loss": 12.4162, "step": 138410 }, { "epoch": 0.27961715760937633, "grad_norm": 669.3692626953125, "learning_rate": 9.077256001105598e-06, "loss": 32.6533, "step": 138420 }, { "epoch": 0.27963735824206015, "grad_norm": 565.0306396484375, "learning_rate": 9.077053942839581e-06, "loss": 26.3031, "step": 138430 }, { "epoch": 0.27965755887474397, "grad_norm": 234.78082275390625, "learning_rate": 9.0768518647025e-06, "loss": 29.5895, "step": 138440 }, { "epoch": 0.2796777595074278, "grad_norm": 0.1866646558046341, "learning_rate": 9.076649766695336e-06, "loss": 21.0687, "step": 138450 }, { "epoch": 0.2796979601401116, "grad_norm": 282.2435607910156, "learning_rate": 9.076447648819076e-06, "loss": 20.6133, "step": 138460 }, { "epoch": 0.2797181607727954, "grad_norm": 282.5113525390625, "learning_rate": 9.076245511074704e-06, "loss": 20.6862, "step": 138470 }, { "epoch": 0.2797383614054792, "grad_norm": 1906.7191162109375, "learning_rate": 9.076043353463205e-06, "loss": 14.3788, "step": 138480 }, { "epoch": 0.279758562038163, "grad_norm": 716.705810546875, "learning_rate": 9.075841175985566e-06, "loss": 37.1254, "step": 138490 }, { "epoch": 0.27977876267084684, "grad_norm": 408.14013671875, "learning_rate": 9.07563897864277e-06, "loss": 27.3324, "step": 138500 }, { "epoch": 0.27979896330353066, "grad_norm": 444.34307861328125, "learning_rate": 9.075436761435807e-06, "loss": 19.5062, "step": 138510 }, { "epoch": 0.2798191639362145, "grad_norm": 1251.0943603515625, "learning_rate": 9.075234524365658e-06, "loss": 23.5936, "step": 138520 }, { "epoch": 0.2798393645688983, "grad_norm": 965.272705078125, "learning_rate": 9.07503226743331e-06, "loss": 36.7323, "step": 138530 }, { "epoch": 0.2798595652015821, "grad_norm": 1015.2049560546875, "learning_rate": 9.07482999063975e-06, "loss": 30.3238, "step": 138540 }, { "epoch": 0.27987976583426594, "grad_norm": 231.09129333496094, "learning_rate": 9.07462769398596e-06, "loss": 30.4964, "step": 138550 }, { "epoch": 0.27989996646694976, "grad_norm": 487.2249755859375, "learning_rate": 9.074425377472932e-06, "loss": 25.6819, "step": 138560 }, { "epoch": 0.2799201670996336, "grad_norm": 166.27435302734375, "learning_rate": 9.074223041101647e-06, "loss": 23.7328, "step": 138570 }, { "epoch": 0.2799403677323174, "grad_norm": 586.8052368164062, "learning_rate": 9.074020684873095e-06, "loss": 39.2396, "step": 138580 }, { "epoch": 0.2799605683650012, "grad_norm": 268.6707458496094, "learning_rate": 9.073818308788258e-06, "loss": 15.8867, "step": 138590 }, { "epoch": 0.279980768997685, "grad_norm": 324.8249816894531, "learning_rate": 9.073615912848126e-06, "loss": 19.836, "step": 138600 }, { "epoch": 0.2800009696303688, "grad_norm": 879.8435668945312, "learning_rate": 9.073413497053683e-06, "loss": 31.2466, "step": 138610 }, { "epoch": 0.2800211702630526, "grad_norm": 170.1151123046875, "learning_rate": 9.073211061405918e-06, "loss": 19.4436, "step": 138620 }, { "epoch": 0.28004137089573644, "grad_norm": 181.6733856201172, "learning_rate": 9.073008605905816e-06, "loss": 23.0449, "step": 138630 }, { "epoch": 0.28006157152842026, "grad_norm": 318.5815124511719, "learning_rate": 9.072806130554364e-06, "loss": 13.7624, "step": 138640 }, { "epoch": 0.2800817721611041, "grad_norm": 287.2176208496094, "learning_rate": 9.072603635352548e-06, "loss": 34.9895, "step": 138650 }, { "epoch": 0.2801019727937879, "grad_norm": 193.97384643554688, "learning_rate": 9.072401120301356e-06, "loss": 16.6459, "step": 138660 }, { "epoch": 0.2801221734264717, "grad_norm": 148.6316680908203, "learning_rate": 9.072198585401775e-06, "loss": 11.2359, "step": 138670 }, { "epoch": 0.28014237405915554, "grad_norm": 617.3121337890625, "learning_rate": 9.071996030654793e-06, "loss": 31.7572, "step": 138680 }, { "epoch": 0.28016257469183936, "grad_norm": 847.959716796875, "learning_rate": 9.071793456061395e-06, "loss": 39.0562, "step": 138690 }, { "epoch": 0.2801827753245232, "grad_norm": 59.63226318359375, "learning_rate": 9.07159086162257e-06, "loss": 31.0103, "step": 138700 }, { "epoch": 0.280202975957207, "grad_norm": 471.9793701171875, "learning_rate": 9.071388247339306e-06, "loss": 13.428, "step": 138710 }, { "epoch": 0.2802231765898908, "grad_norm": 292.8183898925781, "learning_rate": 9.071185613212588e-06, "loss": 29.688, "step": 138720 }, { "epoch": 0.2802433772225746, "grad_norm": 637.3397827148438, "learning_rate": 9.070982959243406e-06, "loss": 16.313, "step": 138730 }, { "epoch": 0.2802635778552584, "grad_norm": 118.97696685791016, "learning_rate": 9.070780285432746e-06, "loss": 23.1142, "step": 138740 }, { "epoch": 0.2802837784879422, "grad_norm": 723.663818359375, "learning_rate": 9.070577591781598e-06, "loss": 19.9465, "step": 138750 }, { "epoch": 0.28030397912062605, "grad_norm": 360.73968505859375, "learning_rate": 9.070374878290946e-06, "loss": 15.4039, "step": 138760 }, { "epoch": 0.28032417975330987, "grad_norm": 250.6209716796875, "learning_rate": 9.070172144961781e-06, "loss": 19.9679, "step": 138770 }, { "epoch": 0.2803443803859937, "grad_norm": 270.03662109375, "learning_rate": 9.069969391795093e-06, "loss": 28.7409, "step": 138780 }, { "epoch": 0.2803645810186775, "grad_norm": 890.7096557617188, "learning_rate": 9.069766618791867e-06, "loss": 20.5492, "step": 138790 }, { "epoch": 0.2803847816513613, "grad_norm": 922.1776123046875, "learning_rate": 9.069563825953092e-06, "loss": 30.5126, "step": 138800 }, { "epoch": 0.28040498228404515, "grad_norm": 695.0859985351562, "learning_rate": 9.069361013279755e-06, "loss": 41.9807, "step": 138810 }, { "epoch": 0.28042518291672897, "grad_norm": 163.199951171875, "learning_rate": 9.069158180772848e-06, "loss": 7.1529, "step": 138820 }, { "epoch": 0.2804453835494128, "grad_norm": 417.4488220214844, "learning_rate": 9.068955328433355e-06, "loss": 16.0531, "step": 138830 }, { "epoch": 0.2804655841820966, "grad_norm": 230.33570861816406, "learning_rate": 9.06875245626227e-06, "loss": 32.4096, "step": 138840 }, { "epoch": 0.2804857848147804, "grad_norm": 559.3120727539062, "learning_rate": 9.068549564260578e-06, "loss": 22.6386, "step": 138850 }, { "epoch": 0.2805059854474642, "grad_norm": 552.2367553710938, "learning_rate": 9.068346652429269e-06, "loss": 34.1713, "step": 138860 }, { "epoch": 0.280526186080148, "grad_norm": 238.18087768554688, "learning_rate": 9.068143720769332e-06, "loss": 12.5275, "step": 138870 }, { "epoch": 0.28054638671283183, "grad_norm": 458.226318359375, "learning_rate": 9.067940769281755e-06, "loss": 22.525, "step": 138880 }, { "epoch": 0.28056658734551565, "grad_norm": 296.74176025390625, "learning_rate": 9.067737797967528e-06, "loss": 25.6449, "step": 138890 }, { "epoch": 0.28058678797819947, "grad_norm": 254.4502716064453, "learning_rate": 9.06753480682764e-06, "loss": 14.2461, "step": 138900 }, { "epoch": 0.2806069886108833, "grad_norm": 358.54150390625, "learning_rate": 9.067331795863084e-06, "loss": 17.6906, "step": 138910 }, { "epoch": 0.2806271892435671, "grad_norm": 799.5117797851562, "learning_rate": 9.067128765074842e-06, "loss": 20.4168, "step": 138920 }, { "epoch": 0.28064738987625093, "grad_norm": 278.828369140625, "learning_rate": 9.06692571446391e-06, "loss": 14.3315, "step": 138930 }, { "epoch": 0.28066759050893475, "grad_norm": 706.9019775390625, "learning_rate": 9.066722644031274e-06, "loss": 27.403, "step": 138940 }, { "epoch": 0.28068779114161857, "grad_norm": 457.7975769042969, "learning_rate": 9.066519553777926e-06, "loss": 22.5423, "step": 138950 }, { "epoch": 0.2807079917743024, "grad_norm": 620.1854858398438, "learning_rate": 9.066316443704854e-06, "loss": 26.5021, "step": 138960 }, { "epoch": 0.2807281924069862, "grad_norm": 176.0418243408203, "learning_rate": 9.06611331381305e-06, "loss": 12.7354, "step": 138970 }, { "epoch": 0.28074839303967003, "grad_norm": 388.2792663574219, "learning_rate": 9.065910164103502e-06, "loss": 16.8222, "step": 138980 }, { "epoch": 0.2807685936723538, "grad_norm": 433.3446350097656, "learning_rate": 9.065706994577203e-06, "loss": 66.8012, "step": 138990 }, { "epoch": 0.2807887943050376, "grad_norm": 432.26007080078125, "learning_rate": 9.065503805235139e-06, "loss": 21.6146, "step": 139000 }, { "epoch": 0.28080899493772143, "grad_norm": 430.3299560546875, "learning_rate": 9.065300596078304e-06, "loss": 23.9434, "step": 139010 }, { "epoch": 0.28082919557040525, "grad_norm": 265.5904846191406, "learning_rate": 9.065097367107685e-06, "loss": 19.374, "step": 139020 }, { "epoch": 0.2808493962030891, "grad_norm": 606.0962524414062, "learning_rate": 9.064894118324276e-06, "loss": 31.1727, "step": 139030 }, { "epoch": 0.2808695968357729, "grad_norm": 290.9197082519531, "learning_rate": 9.064690849729066e-06, "loss": 22.6817, "step": 139040 }, { "epoch": 0.2808897974684567, "grad_norm": 345.8620300292969, "learning_rate": 9.064487561323046e-06, "loss": 20.0112, "step": 139050 }, { "epoch": 0.28090999810114053, "grad_norm": 339.62493896484375, "learning_rate": 9.064284253107206e-06, "loss": 20.9928, "step": 139060 }, { "epoch": 0.28093019873382435, "grad_norm": 123.88328552246094, "learning_rate": 9.064080925082536e-06, "loss": 18.6651, "step": 139070 }, { "epoch": 0.2809503993665082, "grad_norm": 475.6920471191406, "learning_rate": 9.063877577250031e-06, "loss": 22.357, "step": 139080 }, { "epoch": 0.280970599999192, "grad_norm": 600.6383666992188, "learning_rate": 9.063674209610678e-06, "loss": 30.9068, "step": 139090 }, { "epoch": 0.2809908006318758, "grad_norm": 262.7354431152344, "learning_rate": 9.06347082216547e-06, "loss": 17.4674, "step": 139100 }, { "epoch": 0.2810110012645596, "grad_norm": 695.3836669921875, "learning_rate": 9.0632674149154e-06, "loss": 46.7981, "step": 139110 }, { "epoch": 0.2810312018972434, "grad_norm": 5.711061000823975, "learning_rate": 9.063063987861455e-06, "loss": 12.0205, "step": 139120 }, { "epoch": 0.2810514025299272, "grad_norm": 6.8529133796691895, "learning_rate": 9.06286054100463e-06, "loss": 25.1362, "step": 139130 }, { "epoch": 0.28107160316261104, "grad_norm": 20.48434066772461, "learning_rate": 9.062657074345916e-06, "loss": 18.4941, "step": 139140 }, { "epoch": 0.28109180379529486, "grad_norm": 228.85006713867188, "learning_rate": 9.062453587886302e-06, "loss": 16.1669, "step": 139150 }, { "epoch": 0.2811120044279787, "grad_norm": 438.3404846191406, "learning_rate": 9.062250081626784e-06, "loss": 29.6202, "step": 139160 }, { "epoch": 0.2811322050606625, "grad_norm": 402.7679443359375, "learning_rate": 9.062046555568351e-06, "loss": 15.0169, "step": 139170 }, { "epoch": 0.2811524056933463, "grad_norm": 184.88658142089844, "learning_rate": 9.061843009711995e-06, "loss": 13.5044, "step": 139180 }, { "epoch": 0.28117260632603014, "grad_norm": 285.018310546875, "learning_rate": 9.06163944405871e-06, "loss": 23.2025, "step": 139190 }, { "epoch": 0.28119280695871396, "grad_norm": 1371.1322021484375, "learning_rate": 9.061435858609486e-06, "loss": 44.7656, "step": 139200 }, { "epoch": 0.2812130075913978, "grad_norm": 818.6122436523438, "learning_rate": 9.061232253365317e-06, "loss": 26.9839, "step": 139210 }, { "epoch": 0.2812332082240816, "grad_norm": 695.9334106445312, "learning_rate": 9.061028628327196e-06, "loss": 24.9003, "step": 139220 }, { "epoch": 0.2812534088567654, "grad_norm": 635.3130493164062, "learning_rate": 9.060824983496113e-06, "loss": 16.1886, "step": 139230 }, { "epoch": 0.2812736094894492, "grad_norm": 412.47625732421875, "learning_rate": 9.06062131887306e-06, "loss": 18.6965, "step": 139240 }, { "epoch": 0.281293810122133, "grad_norm": 949.825439453125, "learning_rate": 9.060417634459032e-06, "loss": 19.2848, "step": 139250 }, { "epoch": 0.2813140107548168, "grad_norm": 741.3657836914062, "learning_rate": 9.060213930255023e-06, "loss": 26.5962, "step": 139260 }, { "epoch": 0.28133421138750064, "grad_norm": 598.5442504882812, "learning_rate": 9.06001020626202e-06, "loss": 22.5946, "step": 139270 }, { "epoch": 0.28135441202018446, "grad_norm": 534.9511108398438, "learning_rate": 9.059806462481022e-06, "loss": 26.616, "step": 139280 }, { "epoch": 0.2813746126528683, "grad_norm": 536.8895263671875, "learning_rate": 9.05960269891302e-06, "loss": 13.6408, "step": 139290 }, { "epoch": 0.2813948132855521, "grad_norm": 338.2876892089844, "learning_rate": 9.059398915559005e-06, "loss": 17.3098, "step": 139300 }, { "epoch": 0.2814150139182359, "grad_norm": 281.8960266113281, "learning_rate": 9.059195112419972e-06, "loss": 27.7513, "step": 139310 }, { "epoch": 0.28143521455091974, "grad_norm": 353.2132873535156, "learning_rate": 9.058991289496916e-06, "loss": 18.9002, "step": 139320 }, { "epoch": 0.28145541518360356, "grad_norm": 0.0, "learning_rate": 9.058787446790828e-06, "loss": 15.0055, "step": 139330 }, { "epoch": 0.2814756158162874, "grad_norm": 259.71734619140625, "learning_rate": 9.058583584302702e-06, "loss": 33.1441, "step": 139340 }, { "epoch": 0.2814958164489712, "grad_norm": 348.2009582519531, "learning_rate": 9.058379702033533e-06, "loss": 15.4112, "step": 139350 }, { "epoch": 0.281516017081655, "grad_norm": 293.4024963378906, "learning_rate": 9.058175799984312e-06, "loss": 11.7142, "step": 139360 }, { "epoch": 0.2815362177143388, "grad_norm": 1326.43310546875, "learning_rate": 9.057971878156036e-06, "loss": 49.4331, "step": 139370 }, { "epoch": 0.2815564183470226, "grad_norm": 396.4537353515625, "learning_rate": 9.057767936549696e-06, "loss": 19.5482, "step": 139380 }, { "epoch": 0.28157661897970643, "grad_norm": 334.4983825683594, "learning_rate": 9.057563975166288e-06, "loss": 20.0406, "step": 139390 }, { "epoch": 0.28159681961239025, "grad_norm": 573.329833984375, "learning_rate": 9.057359994006806e-06, "loss": 20.9532, "step": 139400 }, { "epoch": 0.28161702024507407, "grad_norm": 308.3233947753906, "learning_rate": 9.057155993072241e-06, "loss": 32.7891, "step": 139410 }, { "epoch": 0.2816372208777579, "grad_norm": 390.9920959472656, "learning_rate": 9.056951972363592e-06, "loss": 13.0427, "step": 139420 }, { "epoch": 0.2816574215104417, "grad_norm": 325.1952209472656, "learning_rate": 9.056747931881851e-06, "loss": 38.5052, "step": 139430 }, { "epoch": 0.28167762214312553, "grad_norm": 296.2378845214844, "learning_rate": 9.056543871628012e-06, "loss": 15.9121, "step": 139440 }, { "epoch": 0.28169782277580935, "grad_norm": 494.32855224609375, "learning_rate": 9.056339791603069e-06, "loss": 16.8407, "step": 139450 }, { "epoch": 0.28171802340849317, "grad_norm": 99.52635955810547, "learning_rate": 9.056135691808019e-06, "loss": 24.2374, "step": 139460 }, { "epoch": 0.281738224041177, "grad_norm": 389.7237854003906, "learning_rate": 9.055931572243857e-06, "loss": 27.4766, "step": 139470 }, { "epoch": 0.2817584246738608, "grad_norm": 279.0994873046875, "learning_rate": 9.055727432911574e-06, "loss": 56.0306, "step": 139480 }, { "epoch": 0.28177862530654463, "grad_norm": 112.3465347290039, "learning_rate": 9.055523273812168e-06, "loss": 30.8866, "step": 139490 }, { "epoch": 0.2817988259392284, "grad_norm": 353.2060241699219, "learning_rate": 9.055319094946633e-06, "loss": 21.7786, "step": 139500 }, { "epoch": 0.2818190265719122, "grad_norm": 356.8995361328125, "learning_rate": 9.055114896315966e-06, "loss": 31.1258, "step": 139510 }, { "epoch": 0.28183922720459603, "grad_norm": 319.8045654296875, "learning_rate": 9.05491067792116e-06, "loss": 21.9368, "step": 139520 }, { "epoch": 0.28185942783727985, "grad_norm": 171.35629272460938, "learning_rate": 9.054706439763212e-06, "loss": 21.618, "step": 139530 }, { "epoch": 0.2818796284699637, "grad_norm": 315.6375427246094, "learning_rate": 9.054502181843117e-06, "loss": 24.4054, "step": 139540 }, { "epoch": 0.2818998291026475, "grad_norm": 468.3456726074219, "learning_rate": 9.054297904161868e-06, "loss": 14.5097, "step": 139550 }, { "epoch": 0.2819200297353313, "grad_norm": 512.71728515625, "learning_rate": 9.054093606720464e-06, "loss": 27.3539, "step": 139560 }, { "epoch": 0.28194023036801513, "grad_norm": 235.35549926757812, "learning_rate": 9.0538892895199e-06, "loss": 23.005, "step": 139570 }, { "epoch": 0.28196043100069895, "grad_norm": 728.3839721679688, "learning_rate": 9.053684952561171e-06, "loss": 27.9579, "step": 139580 }, { "epoch": 0.2819806316333828, "grad_norm": 335.0770568847656, "learning_rate": 9.053480595845272e-06, "loss": 11.292, "step": 139590 }, { "epoch": 0.2820008322660666, "grad_norm": 27.807125091552734, "learning_rate": 9.0532762193732e-06, "loss": 24.8045, "step": 139600 }, { "epoch": 0.2820210328987504, "grad_norm": 260.2357177734375, "learning_rate": 9.053071823145953e-06, "loss": 34.6502, "step": 139610 }, { "epoch": 0.28204123353143423, "grad_norm": 377.25225830078125, "learning_rate": 9.052867407164525e-06, "loss": 43.1483, "step": 139620 }, { "epoch": 0.282061434164118, "grad_norm": 743.7612915039062, "learning_rate": 9.052662971429912e-06, "loss": 17.927, "step": 139630 }, { "epoch": 0.2820816347968018, "grad_norm": 158.10980224609375, "learning_rate": 9.052458515943112e-06, "loss": 7.0445, "step": 139640 }, { "epoch": 0.28210183542948564, "grad_norm": 349.80194091796875, "learning_rate": 9.052254040705121e-06, "loss": 24.8305, "step": 139650 }, { "epoch": 0.28212203606216946, "grad_norm": 393.1520080566406, "learning_rate": 9.052049545716934e-06, "loss": 19.4659, "step": 139660 }, { "epoch": 0.2821422366948533, "grad_norm": 244.4481201171875, "learning_rate": 9.05184503097955e-06, "loss": 40.7217, "step": 139670 }, { "epoch": 0.2821624373275371, "grad_norm": 627.539794921875, "learning_rate": 9.051640496493965e-06, "loss": 34.6599, "step": 139680 }, { "epoch": 0.2821826379602209, "grad_norm": 333.36944580078125, "learning_rate": 9.051435942261175e-06, "loss": 14.3166, "step": 139690 }, { "epoch": 0.28220283859290474, "grad_norm": 468.92840576171875, "learning_rate": 9.051231368282177e-06, "loss": 23.9827, "step": 139700 }, { "epoch": 0.28222303922558856, "grad_norm": 821.8056640625, "learning_rate": 9.051026774557969e-06, "loss": 23.295, "step": 139710 }, { "epoch": 0.2822432398582724, "grad_norm": 744.9319458007812, "learning_rate": 9.05082216108955e-06, "loss": 20.0495, "step": 139720 }, { "epoch": 0.2822634404909562, "grad_norm": 281.4646911621094, "learning_rate": 9.050617527877911e-06, "loss": 18.5791, "step": 139730 }, { "epoch": 0.28228364112364, "grad_norm": 574.1758422851562, "learning_rate": 9.050412874924057e-06, "loss": 30.0051, "step": 139740 }, { "epoch": 0.2823038417563238, "grad_norm": 341.85565185546875, "learning_rate": 9.050208202228981e-06, "loss": 37.8916, "step": 139750 }, { "epoch": 0.2823240423890076, "grad_norm": 107.9791488647461, "learning_rate": 9.05000350979368e-06, "loss": 26.4139, "step": 139760 }, { "epoch": 0.2823442430216914, "grad_norm": 432.2236328125, "learning_rate": 9.049798797619156e-06, "loss": 27.9242, "step": 139770 }, { "epoch": 0.28236444365437524, "grad_norm": 397.84930419921875, "learning_rate": 9.049594065706401e-06, "loss": 30.808, "step": 139780 }, { "epoch": 0.28238464428705906, "grad_norm": 633.5519409179688, "learning_rate": 9.049389314056417e-06, "loss": 28.3848, "step": 139790 }, { "epoch": 0.2824048449197429, "grad_norm": 509.9869384765625, "learning_rate": 9.0491845426702e-06, "loss": 15.8071, "step": 139800 }, { "epoch": 0.2824250455524267, "grad_norm": 164.17051696777344, "learning_rate": 9.04897975154875e-06, "loss": 31.5073, "step": 139810 }, { "epoch": 0.2824452461851105, "grad_norm": 286.58984375, "learning_rate": 9.048774940693062e-06, "loss": 16.4875, "step": 139820 }, { "epoch": 0.28246544681779434, "grad_norm": 671.1658935546875, "learning_rate": 9.048570110104137e-06, "loss": 17.9476, "step": 139830 }, { "epoch": 0.28248564745047816, "grad_norm": 312.6763610839844, "learning_rate": 9.048365259782973e-06, "loss": 14.704, "step": 139840 }, { "epoch": 0.282505848083162, "grad_norm": 166.3192596435547, "learning_rate": 9.048160389730565e-06, "loss": 19.6797, "step": 139850 }, { "epoch": 0.2825260487158458, "grad_norm": 304.2613525390625, "learning_rate": 9.047955499947916e-06, "loss": 14.4554, "step": 139860 }, { "epoch": 0.2825462493485296, "grad_norm": 648.1076049804688, "learning_rate": 9.047750590436023e-06, "loss": 19.2086, "step": 139870 }, { "epoch": 0.2825664499812134, "grad_norm": 841.6949462890625, "learning_rate": 9.047545661195885e-06, "loss": 22.0263, "step": 139880 }, { "epoch": 0.2825866506138972, "grad_norm": 1014.7114868164062, "learning_rate": 9.0473407122285e-06, "loss": 35.6883, "step": 139890 }, { "epoch": 0.282606851246581, "grad_norm": 728.3817138671875, "learning_rate": 9.047135743534866e-06, "loss": 14.7379, "step": 139900 }, { "epoch": 0.28262705187926485, "grad_norm": 318.85577392578125, "learning_rate": 9.046930755115986e-06, "loss": 19.7937, "step": 139910 }, { "epoch": 0.28264725251194867, "grad_norm": 655.446533203125, "learning_rate": 9.046725746972855e-06, "loss": 19.0856, "step": 139920 }, { "epoch": 0.2826674531446325, "grad_norm": 319.9189758300781, "learning_rate": 9.046520719106473e-06, "loss": 25.6961, "step": 139930 }, { "epoch": 0.2826876537773163, "grad_norm": 443.02099609375, "learning_rate": 9.04631567151784e-06, "loss": 40.0771, "step": 139940 }, { "epoch": 0.2827078544100001, "grad_norm": 827.03564453125, "learning_rate": 9.046110604207955e-06, "loss": 19.5551, "step": 139950 }, { "epoch": 0.28272805504268395, "grad_norm": 507.2126159667969, "learning_rate": 9.045905517177817e-06, "loss": 25.4966, "step": 139960 }, { "epoch": 0.28274825567536777, "grad_norm": 327.4715270996094, "learning_rate": 9.045700410428428e-06, "loss": 13.62, "step": 139970 }, { "epoch": 0.2827684563080516, "grad_norm": 394.3174743652344, "learning_rate": 9.045495283960784e-06, "loss": 13.4917, "step": 139980 }, { "epoch": 0.2827886569407354, "grad_norm": 676.4134521484375, "learning_rate": 9.045290137775888e-06, "loss": 36.4538, "step": 139990 }, { "epoch": 0.2828088575734192, "grad_norm": 719.8438720703125, "learning_rate": 9.045084971874738e-06, "loss": 15.936, "step": 140000 }, { "epoch": 0.282829058206103, "grad_norm": 334.0062255859375, "learning_rate": 9.044879786258335e-06, "loss": 23.4169, "step": 140010 }, { "epoch": 0.2828492588387868, "grad_norm": 398.6730651855469, "learning_rate": 9.044674580927678e-06, "loss": 36.6501, "step": 140020 }, { "epoch": 0.28286945947147063, "grad_norm": 305.77349853515625, "learning_rate": 9.044469355883767e-06, "loss": 13.6044, "step": 140030 }, { "epoch": 0.28288966010415445, "grad_norm": 385.3360900878906, "learning_rate": 9.044264111127603e-06, "loss": 23.8349, "step": 140040 }, { "epoch": 0.28290986073683827, "grad_norm": 458.33624267578125, "learning_rate": 9.044058846660187e-06, "loss": 18.4828, "step": 140050 }, { "epoch": 0.2829300613695221, "grad_norm": 472.4216003417969, "learning_rate": 9.043853562482518e-06, "loss": 21.9815, "step": 140060 }, { "epoch": 0.2829502620022059, "grad_norm": 281.8251037597656, "learning_rate": 9.043648258595598e-06, "loss": 23.9108, "step": 140070 }, { "epoch": 0.28297046263488973, "grad_norm": 350.5022277832031, "learning_rate": 9.043442935000428e-06, "loss": 19.8607, "step": 140080 }, { "epoch": 0.28299066326757355, "grad_norm": 1277.1827392578125, "learning_rate": 9.043237591698006e-06, "loss": 38.2556, "step": 140090 }, { "epoch": 0.28301086390025737, "grad_norm": 534.0780639648438, "learning_rate": 9.043032228689333e-06, "loss": 24.3573, "step": 140100 }, { "epoch": 0.2830310645329412, "grad_norm": 429.0807189941406, "learning_rate": 9.042826845975413e-06, "loss": 30.0201, "step": 140110 }, { "epoch": 0.283051265165625, "grad_norm": 502.4304504394531, "learning_rate": 9.042621443557244e-06, "loss": 13.0569, "step": 140120 }, { "epoch": 0.28307146579830883, "grad_norm": 355.1749572753906, "learning_rate": 9.042416021435831e-06, "loss": 32.5609, "step": 140130 }, { "epoch": 0.2830916664309926, "grad_norm": 948.6965942382812, "learning_rate": 9.042210579612171e-06, "loss": 43.2929, "step": 140140 }, { "epoch": 0.2831118670636764, "grad_norm": 305.7770690917969, "learning_rate": 9.042005118087267e-06, "loss": 14.1311, "step": 140150 }, { "epoch": 0.28313206769636023, "grad_norm": 245.43408203125, "learning_rate": 9.041799636862119e-06, "loss": 25.9679, "step": 140160 }, { "epoch": 0.28315226832904405, "grad_norm": 552.7593383789062, "learning_rate": 9.041594135937731e-06, "loss": 19.4616, "step": 140170 }, { "epoch": 0.2831724689617279, "grad_norm": 943.1445922851562, "learning_rate": 9.041388615315102e-06, "loss": 23.4781, "step": 140180 }, { "epoch": 0.2831926695944117, "grad_norm": 281.3905334472656, "learning_rate": 9.041183074995238e-06, "loss": 22.9952, "step": 140190 }, { "epoch": 0.2832128702270955, "grad_norm": 256.47320556640625, "learning_rate": 9.040977514979136e-06, "loss": 16.3178, "step": 140200 }, { "epoch": 0.28323307085977933, "grad_norm": 19.432308197021484, "learning_rate": 9.0407719352678e-06, "loss": 16.9027, "step": 140210 }, { "epoch": 0.28325327149246315, "grad_norm": 283.8543395996094, "learning_rate": 9.040566335862231e-06, "loss": 25.2738, "step": 140220 }, { "epoch": 0.283273472125147, "grad_norm": 888.1716918945312, "learning_rate": 9.040360716763432e-06, "loss": 20.5632, "step": 140230 }, { "epoch": 0.2832936727578308, "grad_norm": 191.85345458984375, "learning_rate": 9.040155077972406e-06, "loss": 15.5759, "step": 140240 }, { "epoch": 0.2833138733905146, "grad_norm": 1257.1053466796875, "learning_rate": 9.039949419490152e-06, "loss": 23.4082, "step": 140250 }, { "epoch": 0.2833340740231984, "grad_norm": 715.1559448242188, "learning_rate": 9.039743741317677e-06, "loss": 16.0674, "step": 140260 }, { "epoch": 0.2833542746558822, "grad_norm": 528.466064453125, "learning_rate": 9.03953804345598e-06, "loss": 22.3576, "step": 140270 }, { "epoch": 0.283374475288566, "grad_norm": 364.2476501464844, "learning_rate": 9.039332325906065e-06, "loss": 19.1027, "step": 140280 }, { "epoch": 0.28339467592124984, "grad_norm": 474.8454895019531, "learning_rate": 9.039126588668934e-06, "loss": 22.6441, "step": 140290 }, { "epoch": 0.28341487655393366, "grad_norm": 594.119140625, "learning_rate": 9.038920831745587e-06, "loss": 16.2823, "step": 140300 }, { "epoch": 0.2834350771866175, "grad_norm": 572.68798828125, "learning_rate": 9.038715055137033e-06, "loss": 18.5837, "step": 140310 }, { "epoch": 0.2834552778193013, "grad_norm": 257.25201416015625, "learning_rate": 9.038509258844271e-06, "loss": 28.9023, "step": 140320 }, { "epoch": 0.2834754784519851, "grad_norm": 457.9421081542969, "learning_rate": 9.038303442868304e-06, "loss": 27.8972, "step": 140330 }, { "epoch": 0.28349567908466894, "grad_norm": 736.3229370117188, "learning_rate": 9.038097607210136e-06, "loss": 19.0042, "step": 140340 }, { "epoch": 0.28351587971735276, "grad_norm": 359.7777404785156, "learning_rate": 9.037891751870772e-06, "loss": 21.475, "step": 140350 }, { "epoch": 0.2835360803500366, "grad_norm": 800.7240600585938, "learning_rate": 9.037685876851211e-06, "loss": 26.0903, "step": 140360 }, { "epoch": 0.2835562809827204, "grad_norm": 1088.6314697265625, "learning_rate": 9.03747998215246e-06, "loss": 21.6544, "step": 140370 }, { "epoch": 0.2835764816154042, "grad_norm": 114.48328399658203, "learning_rate": 9.03727406777552e-06, "loss": 11.678, "step": 140380 }, { "epoch": 0.283596682248088, "grad_norm": 609.7506713867188, "learning_rate": 9.037068133721396e-06, "loss": 19.2222, "step": 140390 }, { "epoch": 0.2836168828807718, "grad_norm": 35.8396110534668, "learning_rate": 9.036862179991092e-06, "loss": 6.4215, "step": 140400 }, { "epoch": 0.2836370835134556, "grad_norm": 424.3556823730469, "learning_rate": 9.036656206585612e-06, "loss": 13.0586, "step": 140410 }, { "epoch": 0.28365728414613944, "grad_norm": 688.9608154296875, "learning_rate": 9.036450213505958e-06, "loss": 32.8414, "step": 140420 }, { "epoch": 0.28367748477882326, "grad_norm": 552.4269409179688, "learning_rate": 9.036244200753136e-06, "loss": 17.1787, "step": 140430 }, { "epoch": 0.2836976854115071, "grad_norm": 90.48094940185547, "learning_rate": 9.036038168328149e-06, "loss": 19.5705, "step": 140440 }, { "epoch": 0.2837178860441909, "grad_norm": 329.4688415527344, "learning_rate": 9.035832116232002e-06, "loss": 35.406, "step": 140450 }, { "epoch": 0.2837380866768747, "grad_norm": 0.4015864133834839, "learning_rate": 9.035626044465699e-06, "loss": 21.8288, "step": 140460 }, { "epoch": 0.28375828730955854, "grad_norm": 221.49996948242188, "learning_rate": 9.035419953030244e-06, "loss": 16.4197, "step": 140470 }, { "epoch": 0.28377848794224236, "grad_norm": 128.67318725585938, "learning_rate": 9.03521384192664e-06, "loss": 15.8318, "step": 140480 }, { "epoch": 0.2837986885749262, "grad_norm": 575.8938598632812, "learning_rate": 9.035007711155894e-06, "loss": 19.0869, "step": 140490 }, { "epoch": 0.28381888920761, "grad_norm": 581.9172973632812, "learning_rate": 9.03480156071901e-06, "loss": 25.2945, "step": 140500 }, { "epoch": 0.2838390898402938, "grad_norm": 393.1622314453125, "learning_rate": 9.034595390616993e-06, "loss": 16.7882, "step": 140510 }, { "epoch": 0.2838592904729776, "grad_norm": 96.71426391601562, "learning_rate": 9.034389200850847e-06, "loss": 33.8566, "step": 140520 }, { "epoch": 0.2838794911056614, "grad_norm": 195.7721710205078, "learning_rate": 9.034182991421578e-06, "loss": 16.3035, "step": 140530 }, { "epoch": 0.28389969173834523, "grad_norm": 679.16748046875, "learning_rate": 9.033976762330189e-06, "loss": 18.6678, "step": 140540 }, { "epoch": 0.28391989237102905, "grad_norm": 224.84298706054688, "learning_rate": 9.033770513577688e-06, "loss": 17.4794, "step": 140550 }, { "epoch": 0.28394009300371287, "grad_norm": 60.07780456542969, "learning_rate": 9.033564245165077e-06, "loss": 15.1955, "step": 140560 }, { "epoch": 0.2839602936363967, "grad_norm": 541.4494018554688, "learning_rate": 9.033357957093366e-06, "loss": 25.8799, "step": 140570 }, { "epoch": 0.2839804942690805, "grad_norm": 325.6931457519531, "learning_rate": 9.033151649363555e-06, "loss": 31.2157, "step": 140580 }, { "epoch": 0.28400069490176433, "grad_norm": 218.31802368164062, "learning_rate": 9.032945321976652e-06, "loss": 15.9393, "step": 140590 }, { "epoch": 0.28402089553444815, "grad_norm": 689.8706665039062, "learning_rate": 9.032738974933663e-06, "loss": 33.0374, "step": 140600 }, { "epoch": 0.28404109616713197, "grad_norm": 117.66937255859375, "learning_rate": 9.032532608235594e-06, "loss": 34.8357, "step": 140610 }, { "epoch": 0.2840612967998158, "grad_norm": 549.00048828125, "learning_rate": 9.03232622188345e-06, "loss": 20.1212, "step": 140620 }, { "epoch": 0.2840814974324996, "grad_norm": 464.6247863769531, "learning_rate": 9.032119815878237e-06, "loss": 27.099, "step": 140630 }, { "epoch": 0.28410169806518343, "grad_norm": 440.7333984375, "learning_rate": 9.03191339022096e-06, "loss": 21.3173, "step": 140640 }, { "epoch": 0.2841218986978672, "grad_norm": 401.0006408691406, "learning_rate": 9.031706944912627e-06, "loss": 19.9862, "step": 140650 }, { "epoch": 0.284142099330551, "grad_norm": 198.79434204101562, "learning_rate": 9.031500479954243e-06, "loss": 16.9328, "step": 140660 }, { "epoch": 0.28416229996323483, "grad_norm": 517.9761962890625, "learning_rate": 9.031293995346814e-06, "loss": 28.2514, "step": 140670 }, { "epoch": 0.28418250059591865, "grad_norm": 277.5168762207031, "learning_rate": 9.03108749109135e-06, "loss": 20.5854, "step": 140680 }, { "epoch": 0.2842027012286025, "grad_norm": 648.0955810546875, "learning_rate": 9.030880967188852e-06, "loss": 15.7541, "step": 140690 }, { "epoch": 0.2842229018612863, "grad_norm": 616.1392822265625, "learning_rate": 9.03067442364033e-06, "loss": 37.7645, "step": 140700 }, { "epoch": 0.2842431024939701, "grad_norm": 267.5377502441406, "learning_rate": 9.030467860446789e-06, "loss": 15.694, "step": 140710 }, { "epoch": 0.28426330312665393, "grad_norm": 653.78662109375, "learning_rate": 9.030261277609235e-06, "loss": 23.0132, "step": 140720 }, { "epoch": 0.28428350375933775, "grad_norm": 454.54779052734375, "learning_rate": 9.030054675128679e-06, "loss": 49.4573, "step": 140730 }, { "epoch": 0.2843037043920216, "grad_norm": 576.9923706054688, "learning_rate": 9.029848053006125e-06, "loss": 31.1583, "step": 140740 }, { "epoch": 0.2843239050247054, "grad_norm": 234.82867431640625, "learning_rate": 9.02964141124258e-06, "loss": 20.9029, "step": 140750 }, { "epoch": 0.2843441056573892, "grad_norm": 392.6349792480469, "learning_rate": 9.02943474983905e-06, "loss": 17.4374, "step": 140760 }, { "epoch": 0.28436430629007303, "grad_norm": 635.7166748046875, "learning_rate": 9.029228068796546e-06, "loss": 41.5775, "step": 140770 }, { "epoch": 0.2843845069227568, "grad_norm": 418.59637451171875, "learning_rate": 9.029021368116072e-06, "loss": 19.3102, "step": 140780 }, { "epoch": 0.2844047075554406, "grad_norm": 481.26287841796875, "learning_rate": 9.028814647798635e-06, "loss": 18.2928, "step": 140790 }, { "epoch": 0.28442490818812444, "grad_norm": 540.19091796875, "learning_rate": 9.028607907845247e-06, "loss": 26.6712, "step": 140800 }, { "epoch": 0.28444510882080826, "grad_norm": 444.8948669433594, "learning_rate": 9.028401148256911e-06, "loss": 23.629, "step": 140810 }, { "epoch": 0.2844653094534921, "grad_norm": 227.0774383544922, "learning_rate": 9.028194369034638e-06, "loss": 16.6439, "step": 140820 }, { "epoch": 0.2844855100861759, "grad_norm": 362.9493713378906, "learning_rate": 9.027987570179432e-06, "loss": 12.54, "step": 140830 }, { "epoch": 0.2845057107188597, "grad_norm": 231.91323852539062, "learning_rate": 9.027780751692303e-06, "loss": 12.8618, "step": 140840 }, { "epoch": 0.28452591135154354, "grad_norm": 499.4613342285156, "learning_rate": 9.02757391357426e-06, "loss": 16.7643, "step": 140850 }, { "epoch": 0.28454611198422736, "grad_norm": 215.88856506347656, "learning_rate": 9.027367055826311e-06, "loss": 23.9327, "step": 140860 }, { "epoch": 0.2845663126169112, "grad_norm": 279.5829162597656, "learning_rate": 9.027160178449464e-06, "loss": 14.2983, "step": 140870 }, { "epoch": 0.284586513249595, "grad_norm": 83.72539520263672, "learning_rate": 9.026953281444725e-06, "loss": 26.6484, "step": 140880 }, { "epoch": 0.2846067138822788, "grad_norm": 370.7086486816406, "learning_rate": 9.026746364813105e-06, "loss": 14.8684, "step": 140890 }, { "epoch": 0.2846269145149626, "grad_norm": 390.4755859375, "learning_rate": 9.026539428555609e-06, "loss": 21.4055, "step": 140900 }, { "epoch": 0.2846471151476464, "grad_norm": 791.41162109375, "learning_rate": 9.026332472673251e-06, "loss": 27.1457, "step": 140910 }, { "epoch": 0.2846673157803302, "grad_norm": 373.7838439941406, "learning_rate": 9.026125497167037e-06, "loss": 15.93, "step": 140920 }, { "epoch": 0.28468751641301404, "grad_norm": 488.69012451171875, "learning_rate": 9.025918502037975e-06, "loss": 9.9848, "step": 140930 }, { "epoch": 0.28470771704569786, "grad_norm": 950.9098510742188, "learning_rate": 9.025711487287074e-06, "loss": 44.7586, "step": 140940 }, { "epoch": 0.2847279176783817, "grad_norm": 307.61773681640625, "learning_rate": 9.025504452915345e-06, "loss": 16.8016, "step": 140950 }, { "epoch": 0.2847481183110655, "grad_norm": 256.02911376953125, "learning_rate": 9.025297398923794e-06, "loss": 16.7141, "step": 140960 }, { "epoch": 0.2847683189437493, "grad_norm": 547.9686279296875, "learning_rate": 9.025090325313432e-06, "loss": 25.2908, "step": 140970 }, { "epoch": 0.28478851957643314, "grad_norm": 124.6249771118164, "learning_rate": 9.024883232085268e-06, "loss": 34.0416, "step": 140980 }, { "epoch": 0.28480872020911696, "grad_norm": 574.1445922851562, "learning_rate": 9.024676119240312e-06, "loss": 22.9915, "step": 140990 }, { "epoch": 0.2848289208418008, "grad_norm": 161.6092529296875, "learning_rate": 9.02446898677957e-06, "loss": 22.989, "step": 141000 }, { "epoch": 0.2848491214744846, "grad_norm": 197.62750244140625, "learning_rate": 9.024261834704058e-06, "loss": 21.8625, "step": 141010 }, { "epoch": 0.2848693221071684, "grad_norm": 256.64434814453125, "learning_rate": 9.02405466301478e-06, "loss": 28.2441, "step": 141020 }, { "epoch": 0.2848895227398522, "grad_norm": 761.4275512695312, "learning_rate": 9.023847471712748e-06, "loss": 29.7179, "step": 141030 }, { "epoch": 0.284909723372536, "grad_norm": 660.5105590820312, "learning_rate": 9.023640260798972e-06, "loss": 29.1144, "step": 141040 }, { "epoch": 0.2849299240052198, "grad_norm": 680.9800415039062, "learning_rate": 9.02343303027446e-06, "loss": 31.2829, "step": 141050 }, { "epoch": 0.28495012463790365, "grad_norm": 192.0106201171875, "learning_rate": 9.023225780140223e-06, "loss": 21.9095, "step": 141060 }, { "epoch": 0.28497032527058747, "grad_norm": 1290.7042236328125, "learning_rate": 9.023018510397274e-06, "loss": 41.4337, "step": 141070 }, { "epoch": 0.2849905259032713, "grad_norm": 739.5303955078125, "learning_rate": 9.022811221046618e-06, "loss": 15.2573, "step": 141080 }, { "epoch": 0.2850107265359551, "grad_norm": 291.6741638183594, "learning_rate": 9.02260391208927e-06, "loss": 39.7825, "step": 141090 }, { "epoch": 0.2850309271686389, "grad_norm": 676.3077392578125, "learning_rate": 9.022396583526238e-06, "loss": 28.4242, "step": 141100 }, { "epoch": 0.28505112780132275, "grad_norm": 11.59755802154541, "learning_rate": 9.022189235358533e-06, "loss": 16.0288, "step": 141110 }, { "epoch": 0.28507132843400657, "grad_norm": 877.0466918945312, "learning_rate": 9.021981867587165e-06, "loss": 23.9562, "step": 141120 }, { "epoch": 0.2850915290666904, "grad_norm": 525.8517456054688, "learning_rate": 9.021774480213145e-06, "loss": 16.4442, "step": 141130 }, { "epoch": 0.2851117296993742, "grad_norm": 696.8245239257812, "learning_rate": 9.021567073237486e-06, "loss": 12.7827, "step": 141140 }, { "epoch": 0.285131930332058, "grad_norm": 606.3016967773438, "learning_rate": 9.021359646661194e-06, "loss": 12.2285, "step": 141150 }, { "epoch": 0.2851521309647418, "grad_norm": 445.4494323730469, "learning_rate": 9.021152200485283e-06, "loss": 17.6888, "step": 141160 }, { "epoch": 0.2851723315974256, "grad_norm": 507.1750183105469, "learning_rate": 9.020944734710767e-06, "loss": 21.5278, "step": 141170 }, { "epoch": 0.28519253223010943, "grad_norm": 14.19613265991211, "learning_rate": 9.02073724933865e-06, "loss": 15.0407, "step": 141180 }, { "epoch": 0.28521273286279325, "grad_norm": 309.0068664550781, "learning_rate": 9.02052974436995e-06, "loss": 20.7985, "step": 141190 }, { "epoch": 0.28523293349547707, "grad_norm": 594.0772094726562, "learning_rate": 9.020322219805674e-06, "loss": 24.369, "step": 141200 }, { "epoch": 0.2852531341281609, "grad_norm": 814.9288940429688, "learning_rate": 9.020114675646835e-06, "loss": 30.1738, "step": 141210 }, { "epoch": 0.2852733347608447, "grad_norm": 711.9208984375, "learning_rate": 9.019907111894447e-06, "loss": 29.0659, "step": 141220 }, { "epoch": 0.28529353539352853, "grad_norm": 406.48309326171875, "learning_rate": 9.019699528549518e-06, "loss": 32.3264, "step": 141230 }, { "epoch": 0.28531373602621235, "grad_norm": 513.768310546875, "learning_rate": 9.01949192561306e-06, "loss": 24.5212, "step": 141240 }, { "epoch": 0.28533393665889617, "grad_norm": 657.7827758789062, "learning_rate": 9.019284303086086e-06, "loss": 13.5329, "step": 141250 }, { "epoch": 0.28535413729158, "grad_norm": 1400.362548828125, "learning_rate": 9.01907666096961e-06, "loss": 36.6206, "step": 141260 }, { "epoch": 0.2853743379242638, "grad_norm": 339.25982666015625, "learning_rate": 9.018868999264641e-06, "loss": 25.5105, "step": 141270 }, { "epoch": 0.28539453855694763, "grad_norm": 371.290771484375, "learning_rate": 9.018661317972191e-06, "loss": 13.5509, "step": 141280 }, { "epoch": 0.2854147391896314, "grad_norm": 496.48199462890625, "learning_rate": 9.018453617093273e-06, "loss": 50.5274, "step": 141290 }, { "epoch": 0.2854349398223152, "grad_norm": 525.364501953125, "learning_rate": 9.0182458966289e-06, "loss": 17.7513, "step": 141300 }, { "epoch": 0.28545514045499903, "grad_norm": 375.22796630859375, "learning_rate": 9.018038156580084e-06, "loss": 21.2563, "step": 141310 }, { "epoch": 0.28547534108768285, "grad_norm": 336.7750244140625, "learning_rate": 9.017830396947838e-06, "loss": 16.0464, "step": 141320 }, { "epoch": 0.2854955417203667, "grad_norm": 600.1476440429688, "learning_rate": 9.017622617733173e-06, "loss": 13.1931, "step": 141330 }, { "epoch": 0.2855157423530505, "grad_norm": 163.05801391601562, "learning_rate": 9.017414818937101e-06, "loss": 45.0232, "step": 141340 }, { "epoch": 0.2855359429857343, "grad_norm": 403.7227783203125, "learning_rate": 9.017207000560639e-06, "loss": 15.3571, "step": 141350 }, { "epoch": 0.28555614361841813, "grad_norm": 1307.354736328125, "learning_rate": 9.016999162604795e-06, "loss": 23.5802, "step": 141360 }, { "epoch": 0.28557634425110195, "grad_norm": 614.9104614257812, "learning_rate": 9.016791305070587e-06, "loss": 23.8888, "step": 141370 }, { "epoch": 0.2855965448837858, "grad_norm": 285.7597351074219, "learning_rate": 9.016583427959025e-06, "loss": 15.5126, "step": 141380 }, { "epoch": 0.2856167455164696, "grad_norm": 300.525146484375, "learning_rate": 9.01637553127112e-06, "loss": 17.0144, "step": 141390 }, { "epoch": 0.2856369461491534, "grad_norm": 405.881591796875, "learning_rate": 9.01616761500789e-06, "loss": 19.6971, "step": 141400 }, { "epoch": 0.28565714678183723, "grad_norm": 221.88356018066406, "learning_rate": 9.015959679170346e-06, "loss": 18.885, "step": 141410 }, { "epoch": 0.285677347414521, "grad_norm": 1110.6405029296875, "learning_rate": 9.015751723759501e-06, "loss": 27.1684, "step": 141420 }, { "epoch": 0.2856975480472048, "grad_norm": 419.91925048828125, "learning_rate": 9.01554374877637e-06, "loss": 19.0117, "step": 141430 }, { "epoch": 0.28571774867988864, "grad_norm": 137.30470275878906, "learning_rate": 9.015335754221964e-06, "loss": 17.3682, "step": 141440 }, { "epoch": 0.28573794931257246, "grad_norm": 208.8695831298828, "learning_rate": 9.015127740097301e-06, "loss": 16.6924, "step": 141450 }, { "epoch": 0.2857581499452563, "grad_norm": 245.57373046875, "learning_rate": 9.01491970640339e-06, "loss": 19.0873, "step": 141460 }, { "epoch": 0.2857783505779401, "grad_norm": 533.674560546875, "learning_rate": 9.014711653141248e-06, "loss": 24.1613, "step": 141470 }, { "epoch": 0.2857985512106239, "grad_norm": 384.47943115234375, "learning_rate": 9.014503580311889e-06, "loss": 16.388, "step": 141480 }, { "epoch": 0.28581875184330774, "grad_norm": 268.9631042480469, "learning_rate": 9.014295487916325e-06, "loss": 25.5681, "step": 141490 }, { "epoch": 0.28583895247599156, "grad_norm": 0.9439303278923035, "learning_rate": 9.014087375955574e-06, "loss": 24.6955, "step": 141500 }, { "epoch": 0.2858591531086754, "grad_norm": 642.2147827148438, "learning_rate": 9.013879244430645e-06, "loss": 34.832, "step": 141510 }, { "epoch": 0.2858793537413592, "grad_norm": 511.16973876953125, "learning_rate": 9.013671093342557e-06, "loss": 19.2557, "step": 141520 }, { "epoch": 0.285899554374043, "grad_norm": 667.150634765625, "learning_rate": 9.013462922692324e-06, "loss": 30.6257, "step": 141530 }, { "epoch": 0.2859197550067268, "grad_norm": 486.5180358886719, "learning_rate": 9.013254732480958e-06, "loss": 21.1171, "step": 141540 }, { "epoch": 0.2859399556394106, "grad_norm": 584.9976196289062, "learning_rate": 9.013046522709477e-06, "loss": 30.7921, "step": 141550 }, { "epoch": 0.2859601562720944, "grad_norm": 173.3647003173828, "learning_rate": 9.01283829337889e-06, "loss": 53.3171, "step": 141560 }, { "epoch": 0.28598035690477824, "grad_norm": 139.5582275390625, "learning_rate": 9.01263004449022e-06, "loss": 26.5207, "step": 141570 }, { "epoch": 0.28600055753746206, "grad_norm": 278.63323974609375, "learning_rate": 9.012421776044477e-06, "loss": 36.4622, "step": 141580 }, { "epoch": 0.2860207581701459, "grad_norm": 0.0, "learning_rate": 9.012213488042677e-06, "loss": 20.3394, "step": 141590 }, { "epoch": 0.2860409588028297, "grad_norm": 104.30828094482422, "learning_rate": 9.012005180485834e-06, "loss": 18.8843, "step": 141600 }, { "epoch": 0.2860611594355135, "grad_norm": 943.4236450195312, "learning_rate": 9.011796853374964e-06, "loss": 32.3506, "step": 141610 }, { "epoch": 0.28608136006819734, "grad_norm": 237.1873779296875, "learning_rate": 9.011588506711085e-06, "loss": 30.9954, "step": 141620 }, { "epoch": 0.28610156070088116, "grad_norm": 619.3309936523438, "learning_rate": 9.011380140495207e-06, "loss": 28.4549, "step": 141630 }, { "epoch": 0.286121761333565, "grad_norm": 1272.03759765625, "learning_rate": 9.01117175472835e-06, "loss": 26.1937, "step": 141640 }, { "epoch": 0.2861419619662488, "grad_norm": 331.3840637207031, "learning_rate": 9.010963349411529e-06, "loss": 28.1638, "step": 141650 }, { "epoch": 0.2861621625989326, "grad_norm": 72.40825653076172, "learning_rate": 9.01075492454576e-06, "loss": 15.7668, "step": 141660 }, { "epoch": 0.2861823632316164, "grad_norm": 522.8346557617188, "learning_rate": 9.010546480132055e-06, "loss": 21.3102, "step": 141670 }, { "epoch": 0.2862025638643002, "grad_norm": 296.7470397949219, "learning_rate": 9.010338016171434e-06, "loss": 11.4901, "step": 141680 }, { "epoch": 0.28622276449698403, "grad_norm": 593.6846923828125, "learning_rate": 9.010129532664914e-06, "loss": 28.7207, "step": 141690 }, { "epoch": 0.28624296512966785, "grad_norm": 339.2178649902344, "learning_rate": 9.009921029613506e-06, "loss": 15.8317, "step": 141700 }, { "epoch": 0.28626316576235167, "grad_norm": 195.95559692382812, "learning_rate": 9.00971250701823e-06, "loss": 36.6844, "step": 141710 }, { "epoch": 0.2862833663950355, "grad_norm": 855.398681640625, "learning_rate": 9.009503964880105e-06, "loss": 18.0744, "step": 141720 }, { "epoch": 0.2863035670277193, "grad_norm": 88.88728332519531, "learning_rate": 9.00929540320014e-06, "loss": 18.5225, "step": 141730 }, { "epoch": 0.28632376766040313, "grad_norm": 361.8889465332031, "learning_rate": 9.009086821979358e-06, "loss": 28.5676, "step": 141740 }, { "epoch": 0.28634396829308695, "grad_norm": 212.55238342285156, "learning_rate": 9.00887822121877e-06, "loss": 11.9381, "step": 141750 }, { "epoch": 0.28636416892577077, "grad_norm": 184.14773559570312, "learning_rate": 9.008669600919399e-06, "loss": 14.0141, "step": 141760 }, { "epoch": 0.2863843695584546, "grad_norm": 583.1585693359375, "learning_rate": 9.008460961082257e-06, "loss": 26.22, "step": 141770 }, { "epoch": 0.2864045701911384, "grad_norm": 423.38079833984375, "learning_rate": 9.008252301708362e-06, "loss": 13.1701, "step": 141780 }, { "epoch": 0.28642477082382223, "grad_norm": 623.1273193359375, "learning_rate": 9.008043622798732e-06, "loss": 32.882, "step": 141790 }, { "epoch": 0.286444971456506, "grad_norm": 553.2904663085938, "learning_rate": 9.007834924354384e-06, "loss": 28.11, "step": 141800 }, { "epoch": 0.2864651720891898, "grad_norm": 922.823974609375, "learning_rate": 9.007626206376335e-06, "loss": 22.7093, "step": 141810 }, { "epoch": 0.28648537272187363, "grad_norm": 689.900390625, "learning_rate": 9.0074174688656e-06, "loss": 18.8748, "step": 141820 }, { "epoch": 0.28650557335455745, "grad_norm": 110.99595642089844, "learning_rate": 9.007208711823198e-06, "loss": 23.6381, "step": 141830 }, { "epoch": 0.2865257739872413, "grad_norm": 679.4481201171875, "learning_rate": 9.006999935250149e-06, "loss": 15.933, "step": 141840 }, { "epoch": 0.2865459746199251, "grad_norm": 518.9588012695312, "learning_rate": 9.006791139147468e-06, "loss": 24.9338, "step": 141850 }, { "epoch": 0.2865661752526089, "grad_norm": 557.93701171875, "learning_rate": 9.006582323516172e-06, "loss": 25.8751, "step": 141860 }, { "epoch": 0.28658637588529273, "grad_norm": 420.7420349121094, "learning_rate": 9.006373488357281e-06, "loss": 32.2564, "step": 141870 }, { "epoch": 0.28660657651797655, "grad_norm": 334.7980041503906, "learning_rate": 9.00616463367181e-06, "loss": 21.5728, "step": 141880 }, { "epoch": 0.2866267771506604, "grad_norm": 0.0, "learning_rate": 9.005955759460779e-06, "loss": 22.1354, "step": 141890 }, { "epoch": 0.2866469777833442, "grad_norm": 324.70465087890625, "learning_rate": 9.005746865725206e-06, "loss": 22.3069, "step": 141900 }, { "epoch": 0.286667178416028, "grad_norm": 480.416748046875, "learning_rate": 9.005537952466108e-06, "loss": 36.1294, "step": 141910 }, { "epoch": 0.28668737904871183, "grad_norm": 208.1507568359375, "learning_rate": 9.005329019684503e-06, "loss": 36.0992, "step": 141920 }, { "epoch": 0.2867075796813956, "grad_norm": 257.6912841796875, "learning_rate": 9.005120067381413e-06, "loss": 25.5808, "step": 141930 }, { "epoch": 0.2867277803140794, "grad_norm": 467.33734130859375, "learning_rate": 9.004911095557852e-06, "loss": 27.5341, "step": 141940 }, { "epoch": 0.28674798094676324, "grad_norm": 251.2129364013672, "learning_rate": 9.00470210421484e-06, "loss": 38.4227, "step": 141950 }, { "epoch": 0.28676818157944706, "grad_norm": 0.7161155343055725, "learning_rate": 9.004493093353394e-06, "loss": 33.6804, "step": 141960 }, { "epoch": 0.2867883822121309, "grad_norm": 168.31422424316406, "learning_rate": 9.004284062974537e-06, "loss": 19.0771, "step": 141970 }, { "epoch": 0.2868085828448147, "grad_norm": 114.95464324951172, "learning_rate": 9.004075013079284e-06, "loss": 22.4422, "step": 141980 }, { "epoch": 0.2868287834774985, "grad_norm": 479.05218505859375, "learning_rate": 9.003865943668656e-06, "loss": 16.7318, "step": 141990 }, { "epoch": 0.28684898411018234, "grad_norm": 178.01829528808594, "learning_rate": 9.003656854743667e-06, "loss": 22.3476, "step": 142000 }, { "epoch": 0.28686918474286616, "grad_norm": 576.8214111328125, "learning_rate": 9.003447746305345e-06, "loss": 30.2682, "step": 142010 }, { "epoch": 0.28688938537555, "grad_norm": 400.33575439453125, "learning_rate": 9.003238618354702e-06, "loss": 15.8093, "step": 142020 }, { "epoch": 0.2869095860082338, "grad_norm": 393.9613952636719, "learning_rate": 9.003029470892759e-06, "loss": 25.6593, "step": 142030 }, { "epoch": 0.2869297866409176, "grad_norm": 152.47128295898438, "learning_rate": 9.002820303920537e-06, "loss": 19.8718, "step": 142040 }, { "epoch": 0.28694998727360144, "grad_norm": 577.7484741210938, "learning_rate": 9.002611117439054e-06, "loss": 17.9984, "step": 142050 }, { "epoch": 0.2869701879062852, "grad_norm": 184.30430603027344, "learning_rate": 9.00240191144933e-06, "loss": 20.9967, "step": 142060 }, { "epoch": 0.286990388538969, "grad_norm": 239.4911651611328, "learning_rate": 9.002192685952385e-06, "loss": 20.1448, "step": 142070 }, { "epoch": 0.28701058917165284, "grad_norm": 244.6146697998047, "learning_rate": 9.001983440949236e-06, "loss": 15.4028, "step": 142080 }, { "epoch": 0.28703078980433666, "grad_norm": 150.16650390625, "learning_rate": 9.001774176440908e-06, "loss": 16.8485, "step": 142090 }, { "epoch": 0.2870509904370205, "grad_norm": 309.9389953613281, "learning_rate": 9.001564892428416e-06, "loss": 31.5917, "step": 142100 }, { "epoch": 0.2870711910697043, "grad_norm": 561.6334228515625, "learning_rate": 9.001355588912784e-06, "loss": 21.6469, "step": 142110 }, { "epoch": 0.2870913917023881, "grad_norm": 728.5511474609375, "learning_rate": 9.001146265895028e-06, "loss": 18.9681, "step": 142120 }, { "epoch": 0.28711159233507194, "grad_norm": 467.7611083984375, "learning_rate": 9.000936923376171e-06, "loss": 11.793, "step": 142130 }, { "epoch": 0.28713179296775576, "grad_norm": 562.4061889648438, "learning_rate": 9.000727561357234e-06, "loss": 17.7584, "step": 142140 }, { "epoch": 0.2871519936004396, "grad_norm": 128.87075805664062, "learning_rate": 9.000518179839236e-06, "loss": 13.9033, "step": 142150 }, { "epoch": 0.2871721942331234, "grad_norm": 0.0, "learning_rate": 9.000308778823196e-06, "loss": 14.2383, "step": 142160 }, { "epoch": 0.2871923948658072, "grad_norm": 4.180148601531982, "learning_rate": 9.000099358310137e-06, "loss": 21.0602, "step": 142170 }, { "epoch": 0.287212595498491, "grad_norm": 281.80340576171875, "learning_rate": 8.99988991830108e-06, "loss": 18.984, "step": 142180 }, { "epoch": 0.2872327961311748, "grad_norm": 313.60968017578125, "learning_rate": 8.999680458797042e-06, "loss": 21.767, "step": 142190 }, { "epoch": 0.2872529967638586, "grad_norm": 511.8515319824219, "learning_rate": 8.999470979799048e-06, "loss": 25.0589, "step": 142200 }, { "epoch": 0.28727319739654245, "grad_norm": 44.609806060791016, "learning_rate": 8.999261481308117e-06, "loss": 25.8067, "step": 142210 }, { "epoch": 0.28729339802922627, "grad_norm": 176.3469696044922, "learning_rate": 8.999051963325271e-06, "loss": 13.9836, "step": 142220 }, { "epoch": 0.2873135986619101, "grad_norm": 246.97377014160156, "learning_rate": 8.998842425851531e-06, "loss": 10.2865, "step": 142230 }, { "epoch": 0.2873337992945939, "grad_norm": 147.90199279785156, "learning_rate": 8.998632868887918e-06, "loss": 11.0603, "step": 142240 }, { "epoch": 0.2873539999272777, "grad_norm": 778.6231689453125, "learning_rate": 8.998423292435455e-06, "loss": 36.8754, "step": 142250 }, { "epoch": 0.28737420055996155, "grad_norm": 309.0158386230469, "learning_rate": 8.998213696495159e-06, "loss": 30.2746, "step": 142260 }, { "epoch": 0.28739440119264537, "grad_norm": 857.2860717773438, "learning_rate": 8.998004081068055e-06, "loss": 34.1955, "step": 142270 }, { "epoch": 0.2874146018253292, "grad_norm": 651.5060424804688, "learning_rate": 8.997794446155165e-06, "loss": 17.9878, "step": 142280 }, { "epoch": 0.287434802458013, "grad_norm": 777.4243774414062, "learning_rate": 8.997584791757508e-06, "loss": 32.8457, "step": 142290 }, { "epoch": 0.2874550030906968, "grad_norm": 1243.1383056640625, "learning_rate": 8.99737511787611e-06, "loss": 41.3604, "step": 142300 }, { "epoch": 0.2874752037233806, "grad_norm": 152.16807556152344, "learning_rate": 8.997165424511988e-06, "loss": 33.1704, "step": 142310 }, { "epoch": 0.2874954043560644, "grad_norm": 183.40115356445312, "learning_rate": 8.996955711666168e-06, "loss": 21.6837, "step": 142320 }, { "epoch": 0.28751560498874823, "grad_norm": 527.2123413085938, "learning_rate": 8.996745979339671e-06, "loss": 13.4808, "step": 142330 }, { "epoch": 0.28753580562143205, "grad_norm": 312.4104919433594, "learning_rate": 8.996536227533519e-06, "loss": 17.1448, "step": 142340 }, { "epoch": 0.28755600625411587, "grad_norm": 532.3212280273438, "learning_rate": 8.996326456248732e-06, "loss": 22.7429, "step": 142350 }, { "epoch": 0.2875762068867997, "grad_norm": 267.4195251464844, "learning_rate": 8.996116665486337e-06, "loss": 38.7647, "step": 142360 }, { "epoch": 0.2875964075194835, "grad_norm": 457.72979736328125, "learning_rate": 8.995906855247354e-06, "loss": 25.3404, "step": 142370 }, { "epoch": 0.28761660815216733, "grad_norm": 1294.712158203125, "learning_rate": 8.995697025532803e-06, "loss": 52.108, "step": 142380 }, { "epoch": 0.28763680878485115, "grad_norm": 603.0121459960938, "learning_rate": 8.995487176343711e-06, "loss": 27.2448, "step": 142390 }, { "epoch": 0.28765700941753497, "grad_norm": 536.1098022460938, "learning_rate": 8.9952773076811e-06, "loss": 16.5084, "step": 142400 }, { "epoch": 0.2876772100502188, "grad_norm": 55.689697265625, "learning_rate": 8.99506741954599e-06, "loss": 21.7152, "step": 142410 }, { "epoch": 0.2876974106829026, "grad_norm": 58.7491455078125, "learning_rate": 8.994857511939408e-06, "loss": 23.1094, "step": 142420 }, { "epoch": 0.28771761131558643, "grad_norm": 644.9966430664062, "learning_rate": 8.994647584862374e-06, "loss": 12.4131, "step": 142430 }, { "epoch": 0.2877378119482702, "grad_norm": 346.7236633300781, "learning_rate": 8.994437638315912e-06, "loss": 28.0715, "step": 142440 }, { "epoch": 0.287758012580954, "grad_norm": 579.5534057617188, "learning_rate": 8.994227672301046e-06, "loss": 11.6123, "step": 142450 }, { "epoch": 0.28777821321363783, "grad_norm": 572.34326171875, "learning_rate": 8.994017686818799e-06, "loss": 21.997, "step": 142460 }, { "epoch": 0.28779841384632165, "grad_norm": 464.645751953125, "learning_rate": 8.993807681870192e-06, "loss": 23.9822, "step": 142470 }, { "epoch": 0.2878186144790055, "grad_norm": 271.6661376953125, "learning_rate": 8.993597657456252e-06, "loss": 20.6079, "step": 142480 }, { "epoch": 0.2878388151116893, "grad_norm": 319.6411437988281, "learning_rate": 8.993387613578003e-06, "loss": 36.7985, "step": 142490 }, { "epoch": 0.2878590157443731, "grad_norm": 785.7559814453125, "learning_rate": 8.993177550236464e-06, "loss": 21.1795, "step": 142500 }, { "epoch": 0.28787921637705693, "grad_norm": 372.37860107421875, "learning_rate": 8.992967467432665e-06, "loss": 27.2943, "step": 142510 }, { "epoch": 0.28789941700974075, "grad_norm": 1.3575628995895386, "learning_rate": 8.992757365167625e-06, "loss": 20.1759, "step": 142520 }, { "epoch": 0.2879196176424246, "grad_norm": 253.48573303222656, "learning_rate": 8.99254724344237e-06, "loss": 25.6765, "step": 142530 }, { "epoch": 0.2879398182751084, "grad_norm": 319.9288330078125, "learning_rate": 8.992337102257925e-06, "loss": 26.6335, "step": 142540 }, { "epoch": 0.2879600189077922, "grad_norm": 591.0591430664062, "learning_rate": 8.992126941615314e-06, "loss": 45.3629, "step": 142550 }, { "epoch": 0.28798021954047603, "grad_norm": 356.8609924316406, "learning_rate": 8.991916761515557e-06, "loss": 29.198, "step": 142560 }, { "epoch": 0.2880004201731598, "grad_norm": 448.464111328125, "learning_rate": 8.991706561959684e-06, "loss": 16.613, "step": 142570 }, { "epoch": 0.2880206208058436, "grad_norm": 941.5521240234375, "learning_rate": 8.991496342948718e-06, "loss": 20.0772, "step": 142580 }, { "epoch": 0.28804082143852744, "grad_norm": 897.8544311523438, "learning_rate": 8.991286104483682e-06, "loss": 26.1854, "step": 142590 }, { "epoch": 0.28806102207121126, "grad_norm": 336.386474609375, "learning_rate": 8.991075846565603e-06, "loss": 14.872, "step": 142600 }, { "epoch": 0.2880812227038951, "grad_norm": 149.27305603027344, "learning_rate": 8.990865569195502e-06, "loss": 26.0966, "step": 142610 }, { "epoch": 0.2881014233365789, "grad_norm": 629.7431640625, "learning_rate": 8.990655272374409e-06, "loss": 18.5056, "step": 142620 }, { "epoch": 0.2881216239692627, "grad_norm": 362.06134033203125, "learning_rate": 8.990444956103343e-06, "loss": 16.4395, "step": 142630 }, { "epoch": 0.28814182460194654, "grad_norm": 153.1188201904297, "learning_rate": 8.990234620383335e-06, "loss": 22.6376, "step": 142640 }, { "epoch": 0.28816202523463036, "grad_norm": 414.06201171875, "learning_rate": 8.990024265215405e-06, "loss": 20.5046, "step": 142650 }, { "epoch": 0.2881822258673142, "grad_norm": 434.1939697265625, "learning_rate": 8.989813890600582e-06, "loss": 26.4005, "step": 142660 }, { "epoch": 0.288202426499998, "grad_norm": 587.8204956054688, "learning_rate": 8.989603496539891e-06, "loss": 22.4086, "step": 142670 }, { "epoch": 0.2882226271326818, "grad_norm": 277.9704895019531, "learning_rate": 8.989393083034355e-06, "loss": 13.3265, "step": 142680 }, { "epoch": 0.28824282776536564, "grad_norm": 635.5704345703125, "learning_rate": 8.989182650085003e-06, "loss": 23.5692, "step": 142690 }, { "epoch": 0.2882630283980494, "grad_norm": 290.620361328125, "learning_rate": 8.988972197692857e-06, "loss": 22.3352, "step": 142700 }, { "epoch": 0.2882832290307332, "grad_norm": 462.3267517089844, "learning_rate": 8.988761725858942e-06, "loss": 25.7847, "step": 142710 }, { "epoch": 0.28830342966341704, "grad_norm": 304.3821716308594, "learning_rate": 8.988551234584289e-06, "loss": 22.0378, "step": 142720 }, { "epoch": 0.28832363029610086, "grad_norm": 393.9117431640625, "learning_rate": 8.988340723869921e-06, "loss": 26.1371, "step": 142730 }, { "epoch": 0.2883438309287847, "grad_norm": 212.9825439453125, "learning_rate": 8.988130193716864e-06, "loss": 22.2655, "step": 142740 }, { "epoch": 0.2883640315614685, "grad_norm": 495.86407470703125, "learning_rate": 8.987919644126145e-06, "loss": 19.0947, "step": 142750 }, { "epoch": 0.2883842321941523, "grad_norm": 599.6559448242188, "learning_rate": 8.987709075098786e-06, "loss": 16.4004, "step": 142760 }, { "epoch": 0.28840443282683614, "grad_norm": 1017.9681396484375, "learning_rate": 8.98749848663582e-06, "loss": 23.2595, "step": 142770 }, { "epoch": 0.28842463345951996, "grad_norm": 381.4158935546875, "learning_rate": 8.987287878738269e-06, "loss": 20.6209, "step": 142780 }, { "epoch": 0.2884448340922038, "grad_norm": 277.0904846191406, "learning_rate": 8.987077251407159e-06, "loss": 29.096, "step": 142790 }, { "epoch": 0.2884650347248876, "grad_norm": 234.6095733642578, "learning_rate": 8.986866604643518e-06, "loss": 14.6361, "step": 142800 }, { "epoch": 0.2884852353575714, "grad_norm": 325.8774108886719, "learning_rate": 8.986655938448373e-06, "loss": 34.7078, "step": 142810 }, { "epoch": 0.2885054359902552, "grad_norm": 349.6905822753906, "learning_rate": 8.986445252822752e-06, "loss": 23.9932, "step": 142820 }, { "epoch": 0.288525636622939, "grad_norm": 509.04443359375, "learning_rate": 8.986234547767681e-06, "loss": 26.4596, "step": 142830 }, { "epoch": 0.28854583725562283, "grad_norm": 397.302734375, "learning_rate": 8.986023823284184e-06, "loss": 13.2624, "step": 142840 }, { "epoch": 0.28856603788830665, "grad_norm": 255.59266662597656, "learning_rate": 8.985813079373293e-06, "loss": 30.9302, "step": 142850 }, { "epoch": 0.28858623852099047, "grad_norm": 546.9246826171875, "learning_rate": 8.98560231603603e-06, "loss": 13.2987, "step": 142860 }, { "epoch": 0.2886064391536743, "grad_norm": 270.22564697265625, "learning_rate": 8.985391533273425e-06, "loss": 17.018, "step": 142870 }, { "epoch": 0.2886266397863581, "grad_norm": 462.64117431640625, "learning_rate": 8.985180731086505e-06, "loss": 22.4081, "step": 142880 }, { "epoch": 0.28864684041904193, "grad_norm": 352.8821105957031, "learning_rate": 8.984969909476299e-06, "loss": 26.6187, "step": 142890 }, { "epoch": 0.28866704105172575, "grad_norm": 926.951416015625, "learning_rate": 8.984759068443832e-06, "loss": 34.7425, "step": 142900 }, { "epoch": 0.28868724168440957, "grad_norm": 411.7799072265625, "learning_rate": 8.984548207990133e-06, "loss": 20.4422, "step": 142910 }, { "epoch": 0.2887074423170934, "grad_norm": 561.7974243164062, "learning_rate": 8.984337328116228e-06, "loss": 10.887, "step": 142920 }, { "epoch": 0.2887276429497772, "grad_norm": 259.2440490722656, "learning_rate": 8.984126428823147e-06, "loss": 16.6807, "step": 142930 }, { "epoch": 0.28874784358246103, "grad_norm": 502.75067138671875, "learning_rate": 8.983915510111918e-06, "loss": 20.0843, "step": 142940 }, { "epoch": 0.2887680442151448, "grad_norm": 260.4075927734375, "learning_rate": 8.983704571983568e-06, "loss": 21.9718, "step": 142950 }, { "epoch": 0.2887882448478286, "grad_norm": 736.0562744140625, "learning_rate": 8.983493614439123e-06, "loss": 23.5602, "step": 142960 }, { "epoch": 0.28880844548051243, "grad_norm": 48.65081024169922, "learning_rate": 8.983282637479613e-06, "loss": 24.7249, "step": 142970 }, { "epoch": 0.28882864611319625, "grad_norm": 494.50213623046875, "learning_rate": 8.983071641106068e-06, "loss": 24.5031, "step": 142980 }, { "epoch": 0.2888488467458801, "grad_norm": 534.8903198242188, "learning_rate": 8.982860625319514e-06, "loss": 10.7974, "step": 142990 }, { "epoch": 0.2888690473785639, "grad_norm": 884.2274169921875, "learning_rate": 8.982649590120982e-06, "loss": 32.7576, "step": 143000 }, { "epoch": 0.2888892480112477, "grad_norm": 830.9031372070312, "learning_rate": 8.982438535511498e-06, "loss": 22.6672, "step": 143010 }, { "epoch": 0.28890944864393153, "grad_norm": 499.8160400390625, "learning_rate": 8.982227461492092e-06, "loss": 26.981, "step": 143020 }, { "epoch": 0.28892964927661535, "grad_norm": 910.0860595703125, "learning_rate": 8.982016368063793e-06, "loss": 31.4334, "step": 143030 }, { "epoch": 0.2889498499092992, "grad_norm": 146.77008056640625, "learning_rate": 8.981805255227627e-06, "loss": 12.6157, "step": 143040 }, { "epoch": 0.288970050541983, "grad_norm": 354.0091247558594, "learning_rate": 8.981594122984628e-06, "loss": 35.6246, "step": 143050 }, { "epoch": 0.2889902511746668, "grad_norm": 370.31414794921875, "learning_rate": 8.98138297133582e-06, "loss": 21.4761, "step": 143060 }, { "epoch": 0.28901045180735063, "grad_norm": 293.3982849121094, "learning_rate": 8.981171800282233e-06, "loss": 14.7823, "step": 143070 }, { "epoch": 0.2890306524400344, "grad_norm": 309.86907958984375, "learning_rate": 8.9809606098249e-06, "loss": 13.7215, "step": 143080 }, { "epoch": 0.2890508530727182, "grad_norm": 441.5501403808594, "learning_rate": 8.980749399964847e-06, "loss": 14.571, "step": 143090 }, { "epoch": 0.28907105370540204, "grad_norm": 792.845458984375, "learning_rate": 8.980538170703104e-06, "loss": 20.5297, "step": 143100 }, { "epoch": 0.28909125433808586, "grad_norm": 323.4634094238281, "learning_rate": 8.9803269220407e-06, "loss": 18.91, "step": 143110 }, { "epoch": 0.2891114549707697, "grad_norm": 197.86825561523438, "learning_rate": 8.980115653978667e-06, "loss": 27.4354, "step": 143120 }, { "epoch": 0.2891316556034535, "grad_norm": 439.68585205078125, "learning_rate": 8.979904366518034e-06, "loss": 17.8669, "step": 143130 }, { "epoch": 0.2891518562361373, "grad_norm": 39.589786529541016, "learning_rate": 8.979693059659826e-06, "loss": 44.6112, "step": 143140 }, { "epoch": 0.28917205686882114, "grad_norm": 235.7417755126953, "learning_rate": 8.97948173340508e-06, "loss": 11.0216, "step": 143150 }, { "epoch": 0.28919225750150496, "grad_norm": 420.8698425292969, "learning_rate": 8.97927038775482e-06, "loss": 28.2996, "step": 143160 }, { "epoch": 0.2892124581341888, "grad_norm": 192.67665100097656, "learning_rate": 8.979059022710081e-06, "loss": 20.0547, "step": 143170 }, { "epoch": 0.2892326587668726, "grad_norm": 587.1990356445312, "learning_rate": 8.97884763827189e-06, "loss": 27.0522, "step": 143180 }, { "epoch": 0.2892528593995564, "grad_norm": 497.73663330078125, "learning_rate": 8.97863623444128e-06, "loss": 32.4231, "step": 143190 }, { "epoch": 0.28927306003224024, "grad_norm": 328.3630065917969, "learning_rate": 8.978424811219277e-06, "loss": 18.4167, "step": 143200 }, { "epoch": 0.289293260664924, "grad_norm": 114.45890808105469, "learning_rate": 8.978213368606916e-06, "loss": 9.9748, "step": 143210 }, { "epoch": 0.2893134612976078, "grad_norm": 535.30615234375, "learning_rate": 8.978001906605226e-06, "loss": 13.5249, "step": 143220 }, { "epoch": 0.28933366193029164, "grad_norm": 593.1908569335938, "learning_rate": 8.977790425215234e-06, "loss": 31.4074, "step": 143230 }, { "epoch": 0.28935386256297546, "grad_norm": 209.8313446044922, "learning_rate": 8.977578924437976e-06, "loss": 17.6172, "step": 143240 }, { "epoch": 0.2893740631956593, "grad_norm": 634.3653564453125, "learning_rate": 8.97736740427448e-06, "loss": 29.8174, "step": 143250 }, { "epoch": 0.2893942638283431, "grad_norm": 753.666259765625, "learning_rate": 8.977155864725778e-06, "loss": 22.5378, "step": 143260 }, { "epoch": 0.2894144644610269, "grad_norm": 473.6728210449219, "learning_rate": 8.976944305792901e-06, "loss": 18.1003, "step": 143270 }, { "epoch": 0.28943466509371074, "grad_norm": 1239.0589599609375, "learning_rate": 8.97673272747688e-06, "loss": 55.9816, "step": 143280 }, { "epoch": 0.28945486572639456, "grad_norm": 180.95069885253906, "learning_rate": 8.976521129778746e-06, "loss": 14.7166, "step": 143290 }, { "epoch": 0.2894750663590784, "grad_norm": 52.53523635864258, "learning_rate": 8.97630951269953e-06, "loss": 15.5059, "step": 143300 }, { "epoch": 0.2894952669917622, "grad_norm": 511.3759765625, "learning_rate": 8.976097876240263e-06, "loss": 35.4166, "step": 143310 }, { "epoch": 0.289515467624446, "grad_norm": 558.9628295898438, "learning_rate": 8.975886220401978e-06, "loss": 14.7336, "step": 143320 }, { "epoch": 0.2895356682571298, "grad_norm": 391.6131286621094, "learning_rate": 8.975674545185704e-06, "loss": 25.8173, "step": 143330 }, { "epoch": 0.2895558688898136, "grad_norm": 269.6672058105469, "learning_rate": 8.975462850592476e-06, "loss": 16.7628, "step": 143340 }, { "epoch": 0.2895760695224974, "grad_norm": 11.58529281616211, "learning_rate": 8.975251136623326e-06, "loss": 23.6183, "step": 143350 }, { "epoch": 0.28959627015518125, "grad_norm": 594.9649658203125, "learning_rate": 8.975039403279282e-06, "loss": 21.1069, "step": 143360 }, { "epoch": 0.28961647078786507, "grad_norm": 178.17930603027344, "learning_rate": 8.974827650561378e-06, "loss": 23.1182, "step": 143370 }, { "epoch": 0.2896366714205489, "grad_norm": 606.8555297851562, "learning_rate": 8.974615878470646e-06, "loss": 49.7317, "step": 143380 }, { "epoch": 0.2896568720532327, "grad_norm": 132.47006225585938, "learning_rate": 8.97440408700812e-06, "loss": 16.5019, "step": 143390 }, { "epoch": 0.2896770726859165, "grad_norm": 65.81047821044922, "learning_rate": 8.97419227617483e-06, "loss": 20.2708, "step": 143400 }, { "epoch": 0.28969727331860035, "grad_norm": 431.75469970703125, "learning_rate": 8.973980445971806e-06, "loss": 34.7638, "step": 143410 }, { "epoch": 0.28971747395128417, "grad_norm": 456.8233947753906, "learning_rate": 8.973768596400085e-06, "loss": 24.3499, "step": 143420 }, { "epoch": 0.289737674583968, "grad_norm": 57.23274612426758, "learning_rate": 8.973556727460699e-06, "loss": 33.2259, "step": 143430 }, { "epoch": 0.2897578752166518, "grad_norm": 810.389892578125, "learning_rate": 8.973344839154678e-06, "loss": 26.781, "step": 143440 }, { "epoch": 0.2897780758493356, "grad_norm": 571.724365234375, "learning_rate": 8.973132931483057e-06, "loss": 27.9718, "step": 143450 }, { "epoch": 0.2897982764820194, "grad_norm": 459.9219665527344, "learning_rate": 8.972921004446868e-06, "loss": 18.6552, "step": 143460 }, { "epoch": 0.2898184771147032, "grad_norm": 1089.4761962890625, "learning_rate": 8.972709058047145e-06, "loss": 32.9153, "step": 143470 }, { "epoch": 0.28983867774738703, "grad_norm": 186.7657470703125, "learning_rate": 8.972497092284918e-06, "loss": 25.3111, "step": 143480 }, { "epoch": 0.28985887838007085, "grad_norm": 149.28268432617188, "learning_rate": 8.972285107161222e-06, "loss": 18.3322, "step": 143490 }, { "epoch": 0.28987907901275467, "grad_norm": 242.9874267578125, "learning_rate": 8.972073102677091e-06, "loss": 21.4952, "step": 143500 }, { "epoch": 0.2898992796454385, "grad_norm": 21.775375366210938, "learning_rate": 8.971861078833558e-06, "loss": 18.7088, "step": 143510 }, { "epoch": 0.2899194802781223, "grad_norm": 34.676124572753906, "learning_rate": 8.971649035631655e-06, "loss": 10.497, "step": 143520 }, { "epoch": 0.28993968091080613, "grad_norm": 41.31591796875, "learning_rate": 8.971436973072416e-06, "loss": 15.0646, "step": 143530 }, { "epoch": 0.28995988154348995, "grad_norm": 91.02080535888672, "learning_rate": 8.971224891156876e-06, "loss": 28.2071, "step": 143540 }, { "epoch": 0.28998008217617377, "grad_norm": 1039.9039306640625, "learning_rate": 8.971012789886066e-06, "loss": 23.3713, "step": 143550 }, { "epoch": 0.2900002828088576, "grad_norm": 561.6629638671875, "learning_rate": 8.970800669261022e-06, "loss": 26.126, "step": 143560 }, { "epoch": 0.2900204834415414, "grad_norm": 701.478271484375, "learning_rate": 8.970588529282778e-06, "loss": 22.7546, "step": 143570 }, { "epoch": 0.29004068407422523, "grad_norm": 411.79571533203125, "learning_rate": 8.970376369952366e-06, "loss": 17.5618, "step": 143580 }, { "epoch": 0.290060884706909, "grad_norm": 343.6632385253906, "learning_rate": 8.97016419127082e-06, "loss": 21.3977, "step": 143590 }, { "epoch": 0.2900810853395928, "grad_norm": 387.7538146972656, "learning_rate": 8.969951993239177e-06, "loss": 28.7602, "step": 143600 }, { "epoch": 0.29010128597227663, "grad_norm": 326.79339599609375, "learning_rate": 8.96973977585847e-06, "loss": 22.5106, "step": 143610 }, { "epoch": 0.29012148660496045, "grad_norm": 814.3696899414062, "learning_rate": 8.969527539129732e-06, "loss": 23.4005, "step": 143620 }, { "epoch": 0.2901416872376443, "grad_norm": 354.3022155761719, "learning_rate": 8.969315283053998e-06, "loss": 25.594, "step": 143630 }, { "epoch": 0.2901618878703281, "grad_norm": 608.0245361328125, "learning_rate": 8.969103007632302e-06, "loss": 14.8101, "step": 143640 }, { "epoch": 0.2901820885030119, "grad_norm": 335.25665283203125, "learning_rate": 8.96889071286568e-06, "loss": 12.4076, "step": 143650 }, { "epoch": 0.29020228913569573, "grad_norm": 464.4058837890625, "learning_rate": 8.968678398755165e-06, "loss": 24.3785, "step": 143660 }, { "epoch": 0.29022248976837955, "grad_norm": 201.16714477539062, "learning_rate": 8.968466065301796e-06, "loss": 12.0195, "step": 143670 }, { "epoch": 0.2902426904010634, "grad_norm": 879.4492797851562, "learning_rate": 8.968253712506602e-06, "loss": 31.0383, "step": 143680 }, { "epoch": 0.2902628910337472, "grad_norm": 343.1659240722656, "learning_rate": 8.968041340370622e-06, "loss": 22.9296, "step": 143690 }, { "epoch": 0.290283091666431, "grad_norm": 1271.926025390625, "learning_rate": 8.96782894889489e-06, "loss": 32.2323, "step": 143700 }, { "epoch": 0.29030329229911483, "grad_norm": 215.3273468017578, "learning_rate": 8.967616538080438e-06, "loss": 18.3563, "step": 143710 }, { "epoch": 0.2903234929317986, "grad_norm": 413.6395263671875, "learning_rate": 8.967404107928309e-06, "loss": 20.5151, "step": 143720 }, { "epoch": 0.2903436935644824, "grad_norm": 713.0821533203125, "learning_rate": 8.96719165843953e-06, "loss": 44.3929, "step": 143730 }, { "epoch": 0.29036389419716624, "grad_norm": 465.6328125, "learning_rate": 8.966979189615142e-06, "loss": 15.2196, "step": 143740 }, { "epoch": 0.29038409482985006, "grad_norm": 299.3564758300781, "learning_rate": 8.966766701456177e-06, "loss": 12.7935, "step": 143750 }, { "epoch": 0.2904042954625339, "grad_norm": 209.91482543945312, "learning_rate": 8.966554193963673e-06, "loss": 33.006, "step": 143760 }, { "epoch": 0.2904244960952177, "grad_norm": 354.65521240234375, "learning_rate": 8.966341667138663e-06, "loss": 19.442, "step": 143770 }, { "epoch": 0.2904446967279015, "grad_norm": 226.30235290527344, "learning_rate": 8.966129120982188e-06, "loss": 22.6549, "step": 143780 }, { "epoch": 0.29046489736058534, "grad_norm": 105.86489868164062, "learning_rate": 8.965916555495278e-06, "loss": 8.8837, "step": 143790 }, { "epoch": 0.29048509799326916, "grad_norm": 402.1645202636719, "learning_rate": 8.965703970678974e-06, "loss": 28.6892, "step": 143800 }, { "epoch": 0.290505298625953, "grad_norm": 830.6857299804688, "learning_rate": 8.965491366534309e-06, "loss": 21.4636, "step": 143810 }, { "epoch": 0.2905254992586368, "grad_norm": 241.9054718017578, "learning_rate": 8.96527874306232e-06, "loss": 12.2601, "step": 143820 }, { "epoch": 0.2905456998913206, "grad_norm": 332.3222961425781, "learning_rate": 8.965066100264042e-06, "loss": 44.2617, "step": 143830 }, { "epoch": 0.29056590052400444, "grad_norm": 409.7407531738281, "learning_rate": 8.964853438140515e-06, "loss": 15.4268, "step": 143840 }, { "epoch": 0.2905861011566882, "grad_norm": 420.72845458984375, "learning_rate": 8.96464075669277e-06, "loss": 16.8882, "step": 143850 }, { "epoch": 0.290606301789372, "grad_norm": 0.21845011413097382, "learning_rate": 8.96442805592185e-06, "loss": 20.7173, "step": 143860 }, { "epoch": 0.29062650242205584, "grad_norm": 325.0856018066406, "learning_rate": 8.964215335828788e-06, "loss": 24.7074, "step": 143870 }, { "epoch": 0.29064670305473966, "grad_norm": 572.4677124023438, "learning_rate": 8.96400259641462e-06, "loss": 19.0564, "step": 143880 }, { "epoch": 0.2906669036874235, "grad_norm": 535.1879272460938, "learning_rate": 8.963789837680386e-06, "loss": 40.4394, "step": 143890 }, { "epoch": 0.2906871043201073, "grad_norm": 136.49729919433594, "learning_rate": 8.963577059627117e-06, "loss": 20.6042, "step": 143900 }, { "epoch": 0.2907073049527911, "grad_norm": 331.70404052734375, "learning_rate": 8.963364262255859e-06, "loss": 18.4239, "step": 143910 }, { "epoch": 0.29072750558547494, "grad_norm": 315.2754821777344, "learning_rate": 8.963151445567642e-06, "loss": 23.5942, "step": 143920 }, { "epoch": 0.29074770621815876, "grad_norm": 149.0545654296875, "learning_rate": 8.962938609563506e-06, "loss": 31.3397, "step": 143930 }, { "epoch": 0.2907679068508426, "grad_norm": 820.7781372070312, "learning_rate": 8.962725754244487e-06, "loss": 31.2748, "step": 143940 }, { "epoch": 0.2907881074835264, "grad_norm": 659.9220581054688, "learning_rate": 8.962512879611624e-06, "loss": 23.4115, "step": 143950 }, { "epoch": 0.2908083081162102, "grad_norm": 373.0547180175781, "learning_rate": 8.962299985665955e-06, "loss": 9.2079, "step": 143960 }, { "epoch": 0.290828508748894, "grad_norm": 562.0675659179688, "learning_rate": 8.962087072408514e-06, "loss": 21.1904, "step": 143970 }, { "epoch": 0.2908487093815778, "grad_norm": 1321.6712646484375, "learning_rate": 8.961874139840342e-06, "loss": 56.7481, "step": 143980 }, { "epoch": 0.29086891001426163, "grad_norm": 0.0, "learning_rate": 8.961661187962477e-06, "loss": 21.7907, "step": 143990 }, { "epoch": 0.29088911064694545, "grad_norm": 125.6926498413086, "learning_rate": 8.961448216775955e-06, "loss": 41.779, "step": 144000 }, { "epoch": 0.29090931127962927, "grad_norm": 380.9176025390625, "learning_rate": 8.961235226281815e-06, "loss": 25.6484, "step": 144010 }, { "epoch": 0.2909295119123131, "grad_norm": 240.7569122314453, "learning_rate": 8.961022216481094e-06, "loss": 17.7789, "step": 144020 }, { "epoch": 0.2909497125449969, "grad_norm": 227.57423400878906, "learning_rate": 8.960809187374833e-06, "loss": 21.8197, "step": 144030 }, { "epoch": 0.29096991317768073, "grad_norm": 223.62075805664062, "learning_rate": 8.960596138964065e-06, "loss": 18.3656, "step": 144040 }, { "epoch": 0.29099011381036455, "grad_norm": 381.46173095703125, "learning_rate": 8.960383071249837e-06, "loss": 10.363, "step": 144050 }, { "epoch": 0.29101031444304837, "grad_norm": 324.1127014160156, "learning_rate": 8.960169984233179e-06, "loss": 19.3931, "step": 144060 }, { "epoch": 0.2910305150757322, "grad_norm": 562.385498046875, "learning_rate": 8.959956877915132e-06, "loss": 13.9735, "step": 144070 }, { "epoch": 0.291050715708416, "grad_norm": 302.4697265625, "learning_rate": 8.959743752296736e-06, "loss": 18.8322, "step": 144080 }, { "epoch": 0.29107091634109983, "grad_norm": 515.53515625, "learning_rate": 8.959530607379032e-06, "loss": 35.0527, "step": 144090 }, { "epoch": 0.2910911169737836, "grad_norm": 760.4889526367188, "learning_rate": 8.959317443163054e-06, "loss": 21.2706, "step": 144100 }, { "epoch": 0.2911113176064674, "grad_norm": 123.36358642578125, "learning_rate": 8.959104259649842e-06, "loss": 24.0755, "step": 144110 }, { "epoch": 0.29113151823915123, "grad_norm": 1043.007080078125, "learning_rate": 8.958891056840438e-06, "loss": 56.0429, "step": 144120 }, { "epoch": 0.29115171887183505, "grad_norm": 55.80719757080078, "learning_rate": 8.958677834735879e-06, "loss": 13.53, "step": 144130 }, { "epoch": 0.2911719195045189, "grad_norm": 256.8143310546875, "learning_rate": 8.958464593337202e-06, "loss": 12.0759, "step": 144140 }, { "epoch": 0.2911921201372027, "grad_norm": 180.5006561279297, "learning_rate": 8.95825133264545e-06, "loss": 27.4772, "step": 144150 }, { "epoch": 0.2912123207698865, "grad_norm": 268.6878662109375, "learning_rate": 8.958038052661661e-06, "loss": 7.3291, "step": 144160 }, { "epoch": 0.29123252140257033, "grad_norm": 890.093017578125, "learning_rate": 8.957824753386877e-06, "loss": 27.2381, "step": 144170 }, { "epoch": 0.29125272203525415, "grad_norm": 610.2390747070312, "learning_rate": 8.957611434822133e-06, "loss": 24.5062, "step": 144180 }, { "epoch": 0.291272922667938, "grad_norm": 383.61016845703125, "learning_rate": 8.95739809696847e-06, "loss": 13.5431, "step": 144190 }, { "epoch": 0.2912931233006218, "grad_norm": 855.97705078125, "learning_rate": 8.957184739826929e-06, "loss": 24.0863, "step": 144200 }, { "epoch": 0.2913133239333056, "grad_norm": 446.4833068847656, "learning_rate": 8.95697136339855e-06, "loss": 22.1917, "step": 144210 }, { "epoch": 0.29133352456598943, "grad_norm": 461.7430419921875, "learning_rate": 8.956757967684372e-06, "loss": 16.1331, "step": 144220 }, { "epoch": 0.2913537251986732, "grad_norm": 43.587120056152344, "learning_rate": 8.956544552685437e-06, "loss": 31.9169, "step": 144230 }, { "epoch": 0.291373925831357, "grad_norm": 200.32955932617188, "learning_rate": 8.956331118402784e-06, "loss": 17.4206, "step": 144240 }, { "epoch": 0.29139412646404084, "grad_norm": 9.927374839782715, "learning_rate": 8.956117664837452e-06, "loss": 16.6463, "step": 144250 }, { "epoch": 0.29141432709672466, "grad_norm": 229.5208282470703, "learning_rate": 8.955904191990481e-06, "loss": 21.387, "step": 144260 }, { "epoch": 0.2914345277294085, "grad_norm": 1290.9556884765625, "learning_rate": 8.955690699862913e-06, "loss": 22.9719, "step": 144270 }, { "epoch": 0.2914547283620923, "grad_norm": 347.4378662109375, "learning_rate": 8.955477188455791e-06, "loss": 12.1614, "step": 144280 }, { "epoch": 0.2914749289947761, "grad_norm": 386.7215576171875, "learning_rate": 8.95526365777015e-06, "loss": 30.4455, "step": 144290 }, { "epoch": 0.29149512962745994, "grad_norm": 568.8643188476562, "learning_rate": 8.955050107807035e-06, "loss": 42.8156, "step": 144300 }, { "epoch": 0.29151533026014376, "grad_norm": 462.5457458496094, "learning_rate": 8.954836538567486e-06, "loss": 18.1224, "step": 144310 }, { "epoch": 0.2915355308928276, "grad_norm": 862.5878295898438, "learning_rate": 8.954622950052543e-06, "loss": 33.7564, "step": 144320 }, { "epoch": 0.2915557315255114, "grad_norm": 1225.79931640625, "learning_rate": 8.954409342263246e-06, "loss": 34.9419, "step": 144330 }, { "epoch": 0.2915759321581952, "grad_norm": 536.2356567382812, "learning_rate": 8.95419571520064e-06, "loss": 23.5325, "step": 144340 }, { "epoch": 0.29159613279087904, "grad_norm": 125.20826721191406, "learning_rate": 8.95398206886576e-06, "loss": 17.5736, "step": 144350 }, { "epoch": 0.2916163334235628, "grad_norm": 201.34152221679688, "learning_rate": 8.953768403259655e-06, "loss": 13.0423, "step": 144360 }, { "epoch": 0.2916365340562466, "grad_norm": 343.0006408691406, "learning_rate": 8.95355471838336e-06, "loss": 28.9384, "step": 144370 }, { "epoch": 0.29165673468893044, "grad_norm": 450.1666564941406, "learning_rate": 8.953341014237919e-06, "loss": 39.7002, "step": 144380 }, { "epoch": 0.29167693532161426, "grad_norm": 436.2256164550781, "learning_rate": 8.953127290824374e-06, "loss": 23.7114, "step": 144390 }, { "epoch": 0.2916971359542981, "grad_norm": 500.2027587890625, "learning_rate": 8.952913548143766e-06, "loss": 14.5103, "step": 144400 }, { "epoch": 0.2917173365869819, "grad_norm": 147.0856475830078, "learning_rate": 8.952699786197137e-06, "loss": 23.2955, "step": 144410 }, { "epoch": 0.2917375372196657, "grad_norm": 286.50390625, "learning_rate": 8.952486004985527e-06, "loss": 35.0774, "step": 144420 }, { "epoch": 0.29175773785234954, "grad_norm": 254.57830810546875, "learning_rate": 8.95227220450998e-06, "loss": 20.6649, "step": 144430 }, { "epoch": 0.29177793848503336, "grad_norm": 275.4768371582031, "learning_rate": 8.952058384771539e-06, "loss": 12.5758, "step": 144440 }, { "epoch": 0.2917981391177172, "grad_norm": 416.6864013671875, "learning_rate": 8.951844545771244e-06, "loss": 16.3182, "step": 144450 }, { "epoch": 0.291818339750401, "grad_norm": 607.0198364257812, "learning_rate": 8.951630687510137e-06, "loss": 21.5984, "step": 144460 }, { "epoch": 0.2918385403830848, "grad_norm": 393.8330383300781, "learning_rate": 8.951416809989263e-06, "loss": 32.2409, "step": 144470 }, { "epoch": 0.29185874101576864, "grad_norm": 738.3124389648438, "learning_rate": 8.951202913209662e-06, "loss": 34.3631, "step": 144480 }, { "epoch": 0.2918789416484524, "grad_norm": 426.9834899902344, "learning_rate": 8.950988997172378e-06, "loss": 23.0125, "step": 144490 }, { "epoch": 0.2918991422811362, "grad_norm": 616.0340576171875, "learning_rate": 8.950775061878453e-06, "loss": 27.9565, "step": 144500 }, { "epoch": 0.29191934291382005, "grad_norm": 239.58522033691406, "learning_rate": 8.950561107328927e-06, "loss": 47.7109, "step": 144510 }, { "epoch": 0.29193954354650387, "grad_norm": 119.57493591308594, "learning_rate": 8.950347133524849e-06, "loss": 29.4281, "step": 144520 }, { "epoch": 0.2919597441791877, "grad_norm": 58.043556213378906, "learning_rate": 8.950133140467256e-06, "loss": 32.7095, "step": 144530 }, { "epoch": 0.2919799448118715, "grad_norm": 207.09091186523438, "learning_rate": 8.949919128157194e-06, "loss": 24.4547, "step": 144540 }, { "epoch": 0.2920001454445553, "grad_norm": 609.0300903320312, "learning_rate": 8.949705096595704e-06, "loss": 25.7507, "step": 144550 }, { "epoch": 0.29202034607723915, "grad_norm": 578.8982543945312, "learning_rate": 8.94949104578383e-06, "loss": 13.9327, "step": 144560 }, { "epoch": 0.29204054670992297, "grad_norm": 423.1669616699219, "learning_rate": 8.949276975722617e-06, "loss": 20.6589, "step": 144570 }, { "epoch": 0.2920607473426068, "grad_norm": 114.99697875976562, "learning_rate": 8.949062886413106e-06, "loss": 31.3314, "step": 144580 }, { "epoch": 0.2920809479752906, "grad_norm": 261.10162353515625, "learning_rate": 8.948848777856342e-06, "loss": 15.5353, "step": 144590 }, { "epoch": 0.2921011486079744, "grad_norm": 252.13076782226562, "learning_rate": 8.94863465005337e-06, "loss": 20.2258, "step": 144600 }, { "epoch": 0.2921213492406582, "grad_norm": 600.653076171875, "learning_rate": 8.948420503005229e-06, "loss": 35.3409, "step": 144610 }, { "epoch": 0.292141549873342, "grad_norm": 275.3252258300781, "learning_rate": 8.948206336712966e-06, "loss": 31.1141, "step": 144620 }, { "epoch": 0.29216175050602583, "grad_norm": 526.3372802734375, "learning_rate": 8.947992151177625e-06, "loss": 22.6549, "step": 144630 }, { "epoch": 0.29218195113870965, "grad_norm": 313.9520263671875, "learning_rate": 8.947777946400247e-06, "loss": 10.2737, "step": 144640 }, { "epoch": 0.29220215177139347, "grad_norm": 196.8408660888672, "learning_rate": 8.94756372238188e-06, "loss": 30.4515, "step": 144650 }, { "epoch": 0.2922223524040773, "grad_norm": 272.763427734375, "learning_rate": 8.947349479123565e-06, "loss": 36.4658, "step": 144660 }, { "epoch": 0.2922425530367611, "grad_norm": 284.01666259765625, "learning_rate": 8.947135216626349e-06, "loss": 20.4259, "step": 144670 }, { "epoch": 0.29226275366944493, "grad_norm": 351.39532470703125, "learning_rate": 8.946920934891274e-06, "loss": 15.2977, "step": 144680 }, { "epoch": 0.29228295430212875, "grad_norm": 790.0850830078125, "learning_rate": 8.946706633919385e-06, "loss": 32.3869, "step": 144690 }, { "epoch": 0.29230315493481257, "grad_norm": 377.062255859375, "learning_rate": 8.946492313711725e-06, "loss": 41.3869, "step": 144700 }, { "epoch": 0.2923233555674964, "grad_norm": 509.7406311035156, "learning_rate": 8.946277974269342e-06, "loss": 28.8467, "step": 144710 }, { "epoch": 0.2923435562001802, "grad_norm": 600.3405151367188, "learning_rate": 8.94606361559328e-06, "loss": 23.0266, "step": 144720 }, { "epoch": 0.29236375683286403, "grad_norm": 650.2919921875, "learning_rate": 8.945849237684578e-06, "loss": 21.252, "step": 144730 }, { "epoch": 0.2923839574655478, "grad_norm": 862.9744873046875, "learning_rate": 8.94563484054429e-06, "loss": 31.7525, "step": 144740 }, { "epoch": 0.2924041580982316, "grad_norm": 197.72608947753906, "learning_rate": 8.945420424173455e-06, "loss": 16.2845, "step": 144750 }, { "epoch": 0.29242435873091543, "grad_norm": 309.2283630371094, "learning_rate": 8.945205988573117e-06, "loss": 24.8, "step": 144760 }, { "epoch": 0.29244455936359925, "grad_norm": 273.2706604003906, "learning_rate": 8.944991533744327e-06, "loss": 11.5869, "step": 144770 }, { "epoch": 0.2924647599962831, "grad_norm": 470.6202087402344, "learning_rate": 8.944777059688125e-06, "loss": 18.9063, "step": 144780 }, { "epoch": 0.2924849606289669, "grad_norm": 32.0260124206543, "learning_rate": 8.944562566405558e-06, "loss": 18.1894, "step": 144790 }, { "epoch": 0.2925051612616507, "grad_norm": 569.5053100585938, "learning_rate": 8.944348053897672e-06, "loss": 22.1403, "step": 144800 }, { "epoch": 0.29252536189433453, "grad_norm": 104.93385314941406, "learning_rate": 8.94413352216551e-06, "loss": 26.3922, "step": 144810 }, { "epoch": 0.29254556252701835, "grad_norm": 833.8599243164062, "learning_rate": 8.943918971210122e-06, "loss": 21.0204, "step": 144820 }, { "epoch": 0.2925657631597022, "grad_norm": 487.80242919921875, "learning_rate": 8.943704401032551e-06, "loss": 20.9569, "step": 144830 }, { "epoch": 0.292585963792386, "grad_norm": 386.6275939941406, "learning_rate": 8.943489811633843e-06, "loss": 12.6614, "step": 144840 }, { "epoch": 0.2926061644250698, "grad_norm": 1075.7357177734375, "learning_rate": 8.943275203015042e-06, "loss": 35.2753, "step": 144850 }, { "epoch": 0.29262636505775363, "grad_norm": 349.0261535644531, "learning_rate": 8.943060575177197e-06, "loss": 16.3996, "step": 144860 }, { "epoch": 0.2926465656904374, "grad_norm": 108.65377044677734, "learning_rate": 8.942845928121356e-06, "loss": 12.0128, "step": 144870 }, { "epoch": 0.2926667663231212, "grad_norm": 484.78271484375, "learning_rate": 8.942631261848558e-06, "loss": 37.5167, "step": 144880 }, { "epoch": 0.29268696695580504, "grad_norm": 464.53704833984375, "learning_rate": 8.942416576359855e-06, "loss": 20.0881, "step": 144890 }, { "epoch": 0.29270716758848886, "grad_norm": 372.568359375, "learning_rate": 8.942201871656292e-06, "loss": 28.2813, "step": 144900 }, { "epoch": 0.2927273682211727, "grad_norm": 501.3120422363281, "learning_rate": 8.941987147738915e-06, "loss": 21.7532, "step": 144910 }, { "epoch": 0.2927475688538565, "grad_norm": 224.8948211669922, "learning_rate": 8.94177240460877e-06, "loss": 14.4497, "step": 144920 }, { "epoch": 0.2927677694865403, "grad_norm": 110.44869995117188, "learning_rate": 8.941557642266906e-06, "loss": 18.5177, "step": 144930 }, { "epoch": 0.29278797011922414, "grad_norm": 367.12896728515625, "learning_rate": 8.941342860714368e-06, "loss": 16.2311, "step": 144940 }, { "epoch": 0.29280817075190796, "grad_norm": 429.4028625488281, "learning_rate": 8.9411280599522e-06, "loss": 23.6019, "step": 144950 }, { "epoch": 0.2928283713845918, "grad_norm": 17.152137756347656, "learning_rate": 8.940913239981454e-06, "loss": 23.7691, "step": 144960 }, { "epoch": 0.2928485720172756, "grad_norm": 934.8453979492188, "learning_rate": 8.940698400803177e-06, "loss": 26.2601, "step": 144970 }, { "epoch": 0.2928687726499594, "grad_norm": 434.5068359375, "learning_rate": 8.94048354241841e-06, "loss": 22.1481, "step": 144980 }, { "epoch": 0.29288897328264324, "grad_norm": 381.8167419433594, "learning_rate": 8.940268664828207e-06, "loss": 19.2977, "step": 144990 }, { "epoch": 0.292909173915327, "grad_norm": 287.8057556152344, "learning_rate": 8.94005376803361e-06, "loss": 18.1322, "step": 145000 }, { "epoch": 0.2929293745480108, "grad_norm": 185.08685302734375, "learning_rate": 8.939838852035672e-06, "loss": 16.3741, "step": 145010 }, { "epoch": 0.29294957518069464, "grad_norm": 1287.8944091796875, "learning_rate": 8.939623916835434e-06, "loss": 20.9493, "step": 145020 }, { "epoch": 0.29296977581337846, "grad_norm": 635.646484375, "learning_rate": 8.939408962433949e-06, "loss": 25.3974, "step": 145030 }, { "epoch": 0.2929899764460623, "grad_norm": 374.07794189453125, "learning_rate": 8.939193988832261e-06, "loss": 10.0691, "step": 145040 }, { "epoch": 0.2930101770787461, "grad_norm": 107.63578033447266, "learning_rate": 8.93897899603142e-06, "loss": 16.6006, "step": 145050 }, { "epoch": 0.2930303777114299, "grad_norm": 519.5859985351562, "learning_rate": 8.938763984032473e-06, "loss": 13.1343, "step": 145060 }, { "epoch": 0.29305057834411374, "grad_norm": 450.7736511230469, "learning_rate": 8.938548952836469e-06, "loss": 25.0087, "step": 145070 }, { "epoch": 0.29307077897679756, "grad_norm": 198.88833618164062, "learning_rate": 8.938333902444454e-06, "loss": 14.4416, "step": 145080 }, { "epoch": 0.2930909796094814, "grad_norm": 757.00146484375, "learning_rate": 8.938118832857476e-06, "loss": 25.4719, "step": 145090 }, { "epoch": 0.2931111802421652, "grad_norm": 715.574951171875, "learning_rate": 8.937903744076587e-06, "loss": 16.1636, "step": 145100 }, { "epoch": 0.293131380874849, "grad_norm": 634.7432250976562, "learning_rate": 8.937688636102832e-06, "loss": 51.1023, "step": 145110 }, { "epoch": 0.29315158150753284, "grad_norm": 302.01226806640625, "learning_rate": 8.93747350893726e-06, "loss": 16.4928, "step": 145120 }, { "epoch": 0.2931717821402166, "grad_norm": 257.2762145996094, "learning_rate": 8.937258362580918e-06, "loss": 21.9717, "step": 145130 }, { "epoch": 0.29319198277290043, "grad_norm": 430.455078125, "learning_rate": 8.937043197034858e-06, "loss": 21.8851, "step": 145140 }, { "epoch": 0.29321218340558425, "grad_norm": 2753.68017578125, "learning_rate": 8.936828012300127e-06, "loss": 30.6224, "step": 145150 }, { "epoch": 0.29323238403826807, "grad_norm": 273.1519775390625, "learning_rate": 8.936612808377773e-06, "loss": 9.0698, "step": 145160 }, { "epoch": 0.2932525846709519, "grad_norm": 439.8018798828125, "learning_rate": 8.936397585268848e-06, "loss": 21.0634, "step": 145170 }, { "epoch": 0.2932727853036357, "grad_norm": 994.3154907226562, "learning_rate": 8.936182342974396e-06, "loss": 31.0293, "step": 145180 }, { "epoch": 0.29329298593631953, "grad_norm": 262.0361328125, "learning_rate": 8.93596708149547e-06, "loss": 15.7493, "step": 145190 }, { "epoch": 0.29331318656900335, "grad_norm": 222.2875213623047, "learning_rate": 8.935751800833117e-06, "loss": 21.3349, "step": 145200 }, { "epoch": 0.29333338720168717, "grad_norm": 220.3922119140625, "learning_rate": 8.935536500988387e-06, "loss": 12.4372, "step": 145210 }, { "epoch": 0.293353587834371, "grad_norm": 252.5332794189453, "learning_rate": 8.93532118196233e-06, "loss": 19.8117, "step": 145220 }, { "epoch": 0.2933737884670548, "grad_norm": 239.65103149414062, "learning_rate": 8.935105843755994e-06, "loss": 19.7695, "step": 145230 }, { "epoch": 0.29339398909973863, "grad_norm": 196.02635192871094, "learning_rate": 8.93489048637043e-06, "loss": 10.8984, "step": 145240 }, { "epoch": 0.2934141897324224, "grad_norm": 86.53240203857422, "learning_rate": 8.934675109806688e-06, "loss": 37.7658, "step": 145250 }, { "epoch": 0.2934343903651062, "grad_norm": 453.9691467285156, "learning_rate": 8.934459714065815e-06, "loss": 14.615, "step": 145260 }, { "epoch": 0.29345459099779003, "grad_norm": 163.30526733398438, "learning_rate": 8.934244299148864e-06, "loss": 24.815, "step": 145270 }, { "epoch": 0.29347479163047385, "grad_norm": 213.64715576171875, "learning_rate": 8.934028865056883e-06, "loss": 39.1907, "step": 145280 }, { "epoch": 0.2934949922631577, "grad_norm": 767.2720947265625, "learning_rate": 8.933813411790922e-06, "loss": 28.3336, "step": 145290 }, { "epoch": 0.2935151928958415, "grad_norm": 313.1923828125, "learning_rate": 8.933597939352031e-06, "loss": 41.4924, "step": 145300 }, { "epoch": 0.2935353935285253, "grad_norm": 12.70064926147461, "learning_rate": 8.93338244774126e-06, "loss": 12.5341, "step": 145310 }, { "epoch": 0.29355559416120913, "grad_norm": 296.32940673828125, "learning_rate": 8.933166936959664e-06, "loss": 32.5023, "step": 145320 }, { "epoch": 0.29357579479389295, "grad_norm": 381.9779968261719, "learning_rate": 8.932951407008286e-06, "loss": 14.9476, "step": 145330 }, { "epoch": 0.2935959954265768, "grad_norm": 471.7348327636719, "learning_rate": 8.93273585788818e-06, "loss": 32.7502, "step": 145340 }, { "epoch": 0.2936161960592606, "grad_norm": 45.068382263183594, "learning_rate": 8.932520289600396e-06, "loss": 24.3668, "step": 145350 }, { "epoch": 0.2936363966919444, "grad_norm": 211.95819091796875, "learning_rate": 8.932304702145988e-06, "loss": 14.7878, "step": 145360 }, { "epoch": 0.29365659732462823, "grad_norm": 274.7302551269531, "learning_rate": 8.932089095526003e-06, "loss": 15.4987, "step": 145370 }, { "epoch": 0.293676797957312, "grad_norm": 265.11651611328125, "learning_rate": 8.93187346974149e-06, "loss": 23.715, "step": 145380 }, { "epoch": 0.2936969985899958, "grad_norm": 323.3000793457031, "learning_rate": 8.931657824793505e-06, "loss": 26.2044, "step": 145390 }, { "epoch": 0.29371719922267964, "grad_norm": 263.9230651855469, "learning_rate": 8.931442160683094e-06, "loss": 8.327, "step": 145400 }, { "epoch": 0.29373739985536346, "grad_norm": 499.3628234863281, "learning_rate": 8.931226477411314e-06, "loss": 32.8783, "step": 145410 }, { "epoch": 0.2937576004880473, "grad_norm": 671.4661865234375, "learning_rate": 8.931010774979212e-06, "loss": 22.3906, "step": 145420 }, { "epoch": 0.2937778011207311, "grad_norm": 299.31536865234375, "learning_rate": 8.93079505338784e-06, "loss": 16.962, "step": 145430 }, { "epoch": 0.2937980017534149, "grad_norm": 276.3232116699219, "learning_rate": 8.93057931263825e-06, "loss": 54.696, "step": 145440 }, { "epoch": 0.29381820238609874, "grad_norm": 556.1576538085938, "learning_rate": 8.930363552731491e-06, "loss": 23.0526, "step": 145450 }, { "epoch": 0.29383840301878256, "grad_norm": 420.9582214355469, "learning_rate": 8.930147773668618e-06, "loss": 27.9474, "step": 145460 }, { "epoch": 0.2938586036514664, "grad_norm": 86.83731079101562, "learning_rate": 8.929931975450683e-06, "loss": 16.0631, "step": 145470 }, { "epoch": 0.2938788042841502, "grad_norm": 621.8159790039062, "learning_rate": 8.929716158078734e-06, "loss": 27.9742, "step": 145480 }, { "epoch": 0.293899004916834, "grad_norm": 1001.0625610351562, "learning_rate": 8.929500321553825e-06, "loss": 44.1198, "step": 145490 }, { "epoch": 0.29391920554951784, "grad_norm": 743.4075317382812, "learning_rate": 8.92928446587701e-06, "loss": 18.8021, "step": 145500 }, { "epoch": 0.2939394061822016, "grad_norm": 1123.14453125, "learning_rate": 8.929068591049338e-06, "loss": 22.9869, "step": 145510 }, { "epoch": 0.2939596068148854, "grad_norm": 406.63653564453125, "learning_rate": 8.928852697071863e-06, "loss": 25.7997, "step": 145520 }, { "epoch": 0.29397980744756924, "grad_norm": 313.7357177734375, "learning_rate": 8.928636783945635e-06, "loss": 25.3153, "step": 145530 }, { "epoch": 0.29400000808025306, "grad_norm": 552.120849609375, "learning_rate": 8.928420851671708e-06, "loss": 29.8396, "step": 145540 }, { "epoch": 0.2940202087129369, "grad_norm": 415.21307373046875, "learning_rate": 8.928204900251136e-06, "loss": 20.4346, "step": 145550 }, { "epoch": 0.2940404093456207, "grad_norm": 126.5959243774414, "learning_rate": 8.92798892968497e-06, "loss": 19.7, "step": 145560 }, { "epoch": 0.2940606099783045, "grad_norm": 903.931396484375, "learning_rate": 8.92777293997426e-06, "loss": 22.6722, "step": 145570 }, { "epoch": 0.29408081061098834, "grad_norm": 274.7602233886719, "learning_rate": 8.92755693112006e-06, "loss": 22.2178, "step": 145580 }, { "epoch": 0.29410101124367216, "grad_norm": 242.60911560058594, "learning_rate": 8.927340903123428e-06, "loss": 19.2611, "step": 145590 }, { "epoch": 0.294121211876356, "grad_norm": 233.6172637939453, "learning_rate": 8.92712485598541e-06, "loss": 39.959, "step": 145600 }, { "epoch": 0.2941414125090398, "grad_norm": 424.9320983886719, "learning_rate": 8.926908789707063e-06, "loss": 16.9217, "step": 145610 }, { "epoch": 0.2941616131417236, "grad_norm": 238.45736694335938, "learning_rate": 8.926692704289437e-06, "loss": 25.0203, "step": 145620 }, { "epoch": 0.29418181377440744, "grad_norm": 551.928955078125, "learning_rate": 8.926476599733588e-06, "loss": 18.3843, "step": 145630 }, { "epoch": 0.2942020144070912, "grad_norm": 366.42462158203125, "learning_rate": 8.926260476040568e-06, "loss": 24.9385, "step": 145640 }, { "epoch": 0.294222215039775, "grad_norm": 356.678955078125, "learning_rate": 8.926044333211433e-06, "loss": 30.3019, "step": 145650 }, { "epoch": 0.29424241567245885, "grad_norm": 158.8470458984375, "learning_rate": 8.925828171247231e-06, "loss": 28.3491, "step": 145660 }, { "epoch": 0.29426261630514267, "grad_norm": 369.3500671386719, "learning_rate": 8.925611990149021e-06, "loss": 30.4343, "step": 145670 }, { "epoch": 0.2942828169378265, "grad_norm": 333.98699951171875, "learning_rate": 8.925395789917852e-06, "loss": 18.3424, "step": 145680 }, { "epoch": 0.2943030175705103, "grad_norm": 172.48178100585938, "learning_rate": 8.925179570554783e-06, "loss": 11.635, "step": 145690 }, { "epoch": 0.2943232182031941, "grad_norm": 372.4821472167969, "learning_rate": 8.924963332060863e-06, "loss": 15.4591, "step": 145700 }, { "epoch": 0.29434341883587795, "grad_norm": 472.9522399902344, "learning_rate": 8.924747074437147e-06, "loss": 28.8564, "step": 145710 }, { "epoch": 0.29436361946856177, "grad_norm": 702.295654296875, "learning_rate": 8.92453079768469e-06, "loss": 48.8211, "step": 145720 }, { "epoch": 0.2943838201012456, "grad_norm": 602.8131713867188, "learning_rate": 8.924314501804548e-06, "loss": 36.8326, "step": 145730 }, { "epoch": 0.2944040207339294, "grad_norm": 306.16607666015625, "learning_rate": 8.924098186797771e-06, "loss": 21.1611, "step": 145740 }, { "epoch": 0.2944242213666132, "grad_norm": 175.8815460205078, "learning_rate": 8.923881852665416e-06, "loss": 15.8559, "step": 145750 }, { "epoch": 0.29444442199929705, "grad_norm": 296.03253173828125, "learning_rate": 8.923665499408535e-06, "loss": 18.4978, "step": 145760 }, { "epoch": 0.2944646226319808, "grad_norm": 342.70672607421875, "learning_rate": 8.923449127028187e-06, "loss": 9.7858, "step": 145770 }, { "epoch": 0.29448482326466463, "grad_norm": 288.128662109375, "learning_rate": 8.923232735525422e-06, "loss": 21.0356, "step": 145780 }, { "epoch": 0.29450502389734845, "grad_norm": 79.99772644042969, "learning_rate": 8.923016324901298e-06, "loss": 18.7125, "step": 145790 }, { "epoch": 0.29452522453003227, "grad_norm": 0.5629855394363403, "learning_rate": 8.922799895156868e-06, "loss": 16.9756, "step": 145800 }, { "epoch": 0.2945454251627161, "grad_norm": 999.4471435546875, "learning_rate": 8.922583446293186e-06, "loss": 31.4349, "step": 145810 }, { "epoch": 0.2945656257953999, "grad_norm": 725.2561645507812, "learning_rate": 8.922366978311307e-06, "loss": 28.9004, "step": 145820 }, { "epoch": 0.29458582642808373, "grad_norm": 516.2388916015625, "learning_rate": 8.92215049121229e-06, "loss": 40.1277, "step": 145830 }, { "epoch": 0.29460602706076755, "grad_norm": 145.9407196044922, "learning_rate": 8.921933984997186e-06, "loss": 21.9316, "step": 145840 }, { "epoch": 0.29462622769345137, "grad_norm": 517.9457397460938, "learning_rate": 8.921717459667052e-06, "loss": 22.3559, "step": 145850 }, { "epoch": 0.2946464283261352, "grad_norm": 548.7401123046875, "learning_rate": 8.921500915222941e-06, "loss": 21.2759, "step": 145860 }, { "epoch": 0.294666628958819, "grad_norm": 221.891845703125, "learning_rate": 8.921284351665911e-06, "loss": 9.9675, "step": 145870 }, { "epoch": 0.29468682959150283, "grad_norm": 143.00672912597656, "learning_rate": 8.921067768997018e-06, "loss": 16.967, "step": 145880 }, { "epoch": 0.2947070302241866, "grad_norm": 348.74945068359375, "learning_rate": 8.920851167217315e-06, "loss": 16.3741, "step": 145890 }, { "epoch": 0.2947272308568704, "grad_norm": 277.1033630371094, "learning_rate": 8.920634546327857e-06, "loss": 25.7971, "step": 145900 }, { "epoch": 0.29474743148955423, "grad_norm": 335.9961242675781, "learning_rate": 8.920417906329704e-06, "loss": 26.0592, "step": 145910 }, { "epoch": 0.29476763212223805, "grad_norm": 605.5419921875, "learning_rate": 8.92020124722391e-06, "loss": 21.711, "step": 145920 }, { "epoch": 0.2947878327549219, "grad_norm": 459.62896728515625, "learning_rate": 8.91998456901153e-06, "loss": 26.0456, "step": 145930 }, { "epoch": 0.2948080333876057, "grad_norm": 669.4099731445312, "learning_rate": 8.91976787169362e-06, "loss": 28.3342, "step": 145940 }, { "epoch": 0.2948282340202895, "grad_norm": 224.82894897460938, "learning_rate": 8.919551155271239e-06, "loss": 26.5072, "step": 145950 }, { "epoch": 0.29484843465297333, "grad_norm": 590.4454345703125, "learning_rate": 8.91933441974544e-06, "loss": 23.0178, "step": 145960 }, { "epoch": 0.29486863528565715, "grad_norm": 161.39234924316406, "learning_rate": 8.91911766511728e-06, "loss": 19.901, "step": 145970 }, { "epoch": 0.294888835918341, "grad_norm": 406.33294677734375, "learning_rate": 8.918900891387814e-06, "loss": 13.1816, "step": 145980 }, { "epoch": 0.2949090365510248, "grad_norm": 575.1746826171875, "learning_rate": 8.918684098558102e-06, "loss": 24.3348, "step": 145990 }, { "epoch": 0.2949292371837086, "grad_norm": 355.184326171875, "learning_rate": 8.9184672866292e-06, "loss": 16.0829, "step": 146000 }, { "epoch": 0.29494943781639243, "grad_norm": 437.41790771484375, "learning_rate": 8.918250455602162e-06, "loss": 22.8996, "step": 146010 }, { "epoch": 0.2949696384490762, "grad_norm": 291.26556396484375, "learning_rate": 8.918033605478047e-06, "loss": 35.5031, "step": 146020 }, { "epoch": 0.29498983908176, "grad_norm": 368.38018798828125, "learning_rate": 8.917816736257912e-06, "loss": 23.9374, "step": 146030 }, { "epoch": 0.29501003971444384, "grad_norm": 412.8968200683594, "learning_rate": 8.917599847942813e-06, "loss": 28.5956, "step": 146040 }, { "epoch": 0.29503024034712766, "grad_norm": 43.92367172241211, "learning_rate": 8.917382940533809e-06, "loss": 17.0223, "step": 146050 }, { "epoch": 0.2950504409798115, "grad_norm": 411.7793273925781, "learning_rate": 8.917166014031953e-06, "loss": 20.9394, "step": 146060 }, { "epoch": 0.2950706416124953, "grad_norm": 287.5653991699219, "learning_rate": 8.916949068438307e-06, "loss": 17.0937, "step": 146070 }, { "epoch": 0.2950908422451791, "grad_norm": 612.794189453125, "learning_rate": 8.916732103753924e-06, "loss": 25.5027, "step": 146080 }, { "epoch": 0.29511104287786294, "grad_norm": 0.0, "learning_rate": 8.916515119979867e-06, "loss": 22.0467, "step": 146090 }, { "epoch": 0.29513124351054676, "grad_norm": 159.05609130859375, "learning_rate": 8.916298117117188e-06, "loss": 24.5293, "step": 146100 }, { "epoch": 0.2951514441432306, "grad_norm": 143.80015563964844, "learning_rate": 8.916081095166947e-06, "loss": 17.7001, "step": 146110 }, { "epoch": 0.2951716447759144, "grad_norm": 102.78569793701172, "learning_rate": 8.915864054130203e-06, "loss": 11.5246, "step": 146120 }, { "epoch": 0.2951918454085982, "grad_norm": 456.70733642578125, "learning_rate": 8.915646994008011e-06, "loss": 35.5333, "step": 146130 }, { "epoch": 0.29521204604128204, "grad_norm": 394.3199768066406, "learning_rate": 8.915429914801433e-06, "loss": 23.1958, "step": 146140 }, { "epoch": 0.2952322466739658, "grad_norm": 768.4058837890625, "learning_rate": 8.915212816511521e-06, "loss": 25.0435, "step": 146150 }, { "epoch": 0.2952524473066496, "grad_norm": 392.62481689453125, "learning_rate": 8.91499569913934e-06, "loss": 30.6829, "step": 146160 }, { "epoch": 0.29527264793933344, "grad_norm": 235.93899536132812, "learning_rate": 8.914778562685941e-06, "loss": 10.8239, "step": 146170 }, { "epoch": 0.29529284857201726, "grad_norm": 323.7956848144531, "learning_rate": 8.91456140715239e-06, "loss": 32.8595, "step": 146180 }, { "epoch": 0.2953130492047011, "grad_norm": 651.843994140625, "learning_rate": 8.914344232539739e-06, "loss": 31.1947, "step": 146190 }, { "epoch": 0.2953332498373849, "grad_norm": 262.5066223144531, "learning_rate": 8.91412703884905e-06, "loss": 17.1492, "step": 146200 }, { "epoch": 0.2953534504700687, "grad_norm": 1024.361083984375, "learning_rate": 8.91390982608138e-06, "loss": 23.8093, "step": 146210 }, { "epoch": 0.29537365110275254, "grad_norm": 264.75152587890625, "learning_rate": 8.91369259423779e-06, "loss": 17.0897, "step": 146220 }, { "epoch": 0.29539385173543636, "grad_norm": 138.51217651367188, "learning_rate": 8.913475343319333e-06, "loss": 21.5543, "step": 146230 }, { "epoch": 0.2954140523681202, "grad_norm": 593.5452880859375, "learning_rate": 8.913258073327075e-06, "loss": 20.6775, "step": 146240 }, { "epoch": 0.295434253000804, "grad_norm": 193.46112060546875, "learning_rate": 8.91304078426207e-06, "loss": 20.7935, "step": 146250 }, { "epoch": 0.2954544536334878, "grad_norm": 550.3673095703125, "learning_rate": 8.91282347612538e-06, "loss": 19.0618, "step": 146260 }, { "epoch": 0.29547465426617164, "grad_norm": 324.4193115234375, "learning_rate": 8.912606148918063e-06, "loss": 20.8737, "step": 146270 }, { "epoch": 0.2954948548988554, "grad_norm": 659.3085327148438, "learning_rate": 8.912388802641177e-06, "loss": 24.7531, "step": 146280 }, { "epoch": 0.29551505553153923, "grad_norm": 236.26272583007812, "learning_rate": 8.912171437295785e-06, "loss": 24.2559, "step": 146290 }, { "epoch": 0.29553525616422305, "grad_norm": 218.22898864746094, "learning_rate": 8.911954052882941e-06, "loss": 11.1286, "step": 146300 }, { "epoch": 0.29555545679690687, "grad_norm": 396.6911315917969, "learning_rate": 8.91173664940371e-06, "loss": 28.2047, "step": 146310 }, { "epoch": 0.2955756574295907, "grad_norm": 277.3451843261719, "learning_rate": 8.911519226859147e-06, "loss": 21.9935, "step": 146320 }, { "epoch": 0.2955958580622745, "grad_norm": 625.2225952148438, "learning_rate": 8.911301785250315e-06, "loss": 28.9494, "step": 146330 }, { "epoch": 0.29561605869495833, "grad_norm": 394.705078125, "learning_rate": 8.911084324578272e-06, "loss": 31.6344, "step": 146340 }, { "epoch": 0.29563625932764215, "grad_norm": 129.0830841064453, "learning_rate": 8.910866844844077e-06, "loss": 59.7862, "step": 146350 }, { "epoch": 0.29565645996032597, "grad_norm": 264.7086181640625, "learning_rate": 8.910649346048792e-06, "loss": 25.4121, "step": 146360 }, { "epoch": 0.2956766605930098, "grad_norm": 287.8779296875, "learning_rate": 8.910431828193478e-06, "loss": 24.0402, "step": 146370 }, { "epoch": 0.2956968612256936, "grad_norm": 304.1589660644531, "learning_rate": 8.910214291279192e-06, "loss": 12.6678, "step": 146380 }, { "epoch": 0.29571706185837743, "grad_norm": 302.09661865234375, "learning_rate": 8.909996735306996e-06, "loss": 43.1045, "step": 146390 }, { "epoch": 0.2957372624910612, "grad_norm": 724.1412353515625, "learning_rate": 8.909779160277951e-06, "loss": 23.2681, "step": 146400 }, { "epoch": 0.295757463123745, "grad_norm": 452.8772277832031, "learning_rate": 8.909561566193118e-06, "loss": 20.4508, "step": 146410 }, { "epoch": 0.29577766375642883, "grad_norm": 228.56248474121094, "learning_rate": 8.909343953053553e-06, "loss": 27.4691, "step": 146420 }, { "epoch": 0.29579786438911265, "grad_norm": 1085.392578125, "learning_rate": 8.90912632086032e-06, "loss": 25.6713, "step": 146430 }, { "epoch": 0.2958180650217965, "grad_norm": 541.2378540039062, "learning_rate": 8.90890866961448e-06, "loss": 23.7755, "step": 146440 }, { "epoch": 0.2958382656544803, "grad_norm": 225.4510040283203, "learning_rate": 8.908690999317094e-06, "loss": 42.209, "step": 146450 }, { "epoch": 0.2958584662871641, "grad_norm": 419.2010192871094, "learning_rate": 8.90847330996922e-06, "loss": 22.9083, "step": 146460 }, { "epoch": 0.29587866691984793, "grad_norm": 149.35641479492188, "learning_rate": 8.908255601571924e-06, "loss": 20.6513, "step": 146470 }, { "epoch": 0.29589886755253175, "grad_norm": 933.8770141601562, "learning_rate": 8.908037874126263e-06, "loss": 46.9795, "step": 146480 }, { "epoch": 0.2959190681852156, "grad_norm": 146.07037353515625, "learning_rate": 8.9078201276333e-06, "loss": 43.3425, "step": 146490 }, { "epoch": 0.2959392688178994, "grad_norm": 274.5592041015625, "learning_rate": 8.907602362094094e-06, "loss": 14.9558, "step": 146500 }, { "epoch": 0.2959594694505832, "grad_norm": 332.6141357421875, "learning_rate": 8.90738457750971e-06, "loss": 10.5094, "step": 146510 }, { "epoch": 0.29597967008326703, "grad_norm": 655.5591430664062, "learning_rate": 8.907166773881207e-06, "loss": 35.6411, "step": 146520 }, { "epoch": 0.2959998707159508, "grad_norm": 507.4541931152344, "learning_rate": 8.906948951209647e-06, "loss": 10.5311, "step": 146530 }, { "epoch": 0.2960200713486346, "grad_norm": 349.426513671875, "learning_rate": 8.90673110949609e-06, "loss": 27.6748, "step": 146540 }, { "epoch": 0.29604027198131844, "grad_norm": 283.3293762207031, "learning_rate": 8.9065132487416e-06, "loss": 40.9241, "step": 146550 }, { "epoch": 0.29606047261400226, "grad_norm": 685.6849975585938, "learning_rate": 8.90629536894724e-06, "loss": 20.4005, "step": 146560 }, { "epoch": 0.2960806732466861, "grad_norm": 315.76190185546875, "learning_rate": 8.906077470114068e-06, "loss": 23.1389, "step": 146570 }, { "epoch": 0.2961008738793699, "grad_norm": 399.6425476074219, "learning_rate": 8.90585955224315e-06, "loss": 25.1947, "step": 146580 }, { "epoch": 0.2961210745120537, "grad_norm": 505.26568603515625, "learning_rate": 8.905641615335545e-06, "loss": 17.7144, "step": 146590 }, { "epoch": 0.29614127514473754, "grad_norm": 1093.8341064453125, "learning_rate": 8.905423659392316e-06, "loss": 33.1093, "step": 146600 }, { "epoch": 0.29616147577742136, "grad_norm": 436.3739929199219, "learning_rate": 8.905205684414527e-06, "loss": 21.3882, "step": 146610 }, { "epoch": 0.2961816764101052, "grad_norm": 295.1983947753906, "learning_rate": 8.90498769040324e-06, "loss": 22.0839, "step": 146620 }, { "epoch": 0.296201877042789, "grad_norm": 543.0384521484375, "learning_rate": 8.904769677359515e-06, "loss": 20.9607, "step": 146630 }, { "epoch": 0.2962220776754728, "grad_norm": 438.29888916015625, "learning_rate": 8.904551645284416e-06, "loss": 11.9037, "step": 146640 }, { "epoch": 0.29624227830815664, "grad_norm": 358.1011657714844, "learning_rate": 8.904333594179007e-06, "loss": 22.6588, "step": 146650 }, { "epoch": 0.2962624789408404, "grad_norm": 712.97119140625, "learning_rate": 8.904115524044349e-06, "loss": 14.9611, "step": 146660 }, { "epoch": 0.2962826795735242, "grad_norm": 395.3158264160156, "learning_rate": 8.903897434881506e-06, "loss": 43.3063, "step": 146670 }, { "epoch": 0.29630288020620804, "grad_norm": 605.7266235351562, "learning_rate": 8.90367932669154e-06, "loss": 25.3787, "step": 146680 }, { "epoch": 0.29632308083889186, "grad_norm": 711.9656982421875, "learning_rate": 8.903461199475514e-06, "loss": 17.2008, "step": 146690 }, { "epoch": 0.2963432814715757, "grad_norm": 259.397216796875, "learning_rate": 8.903243053234492e-06, "loss": 12.5827, "step": 146700 }, { "epoch": 0.2963634821042595, "grad_norm": 398.7919616699219, "learning_rate": 8.903024887969536e-06, "loss": 15.5399, "step": 146710 }, { "epoch": 0.2963836827369433, "grad_norm": 586.203125, "learning_rate": 8.90280670368171e-06, "loss": 27.2687, "step": 146720 }, { "epoch": 0.29640388336962714, "grad_norm": 206.72064208984375, "learning_rate": 8.902588500372078e-06, "loss": 25.3577, "step": 146730 }, { "epoch": 0.29642408400231096, "grad_norm": 519.1009521484375, "learning_rate": 8.902370278041705e-06, "loss": 16.6351, "step": 146740 }, { "epoch": 0.2964442846349948, "grad_norm": 291.5577392578125, "learning_rate": 8.902152036691649e-06, "loss": 43.2595, "step": 146750 }, { "epoch": 0.2964644852676786, "grad_norm": 444.7674255371094, "learning_rate": 8.90193377632298e-06, "loss": 35.0919, "step": 146760 }, { "epoch": 0.2964846859003624, "grad_norm": 510.2618103027344, "learning_rate": 8.901715496936758e-06, "loss": 26.654, "step": 146770 }, { "epoch": 0.29650488653304624, "grad_norm": 448.94219970703125, "learning_rate": 8.901497198534048e-06, "loss": 27.6202, "step": 146780 }, { "epoch": 0.29652508716573, "grad_norm": 230.44442749023438, "learning_rate": 8.901278881115914e-06, "loss": 12.4462, "step": 146790 }, { "epoch": 0.2965452877984138, "grad_norm": 39.407379150390625, "learning_rate": 8.90106054468342e-06, "loss": 17.1074, "step": 146800 }, { "epoch": 0.29656548843109765, "grad_norm": 57.10234832763672, "learning_rate": 8.90084218923763e-06, "loss": 29.6358, "step": 146810 }, { "epoch": 0.29658568906378147, "grad_norm": 647.422119140625, "learning_rate": 8.900623814779605e-06, "loss": 12.2245, "step": 146820 }, { "epoch": 0.2966058896964653, "grad_norm": 0.0, "learning_rate": 8.900405421310416e-06, "loss": 16.0647, "step": 146830 }, { "epoch": 0.2966260903291491, "grad_norm": 728.9190673828125, "learning_rate": 8.900187008831124e-06, "loss": 19.3653, "step": 146840 }, { "epoch": 0.2966462909618329, "grad_norm": 444.3294372558594, "learning_rate": 8.899968577342794e-06, "loss": 12.0394, "step": 146850 }, { "epoch": 0.29666649159451675, "grad_norm": 257.6813659667969, "learning_rate": 8.89975012684649e-06, "loss": 28.4348, "step": 146860 }, { "epoch": 0.29668669222720057, "grad_norm": 249.75827026367188, "learning_rate": 8.899531657343275e-06, "loss": 19.7214, "step": 146870 }, { "epoch": 0.2967068928598844, "grad_norm": 151.40345764160156, "learning_rate": 8.899313168834216e-06, "loss": 23.5436, "step": 146880 }, { "epoch": 0.2967270934925682, "grad_norm": 457.6058044433594, "learning_rate": 8.899094661320378e-06, "loss": 20.3676, "step": 146890 }, { "epoch": 0.296747294125252, "grad_norm": 564.66748046875, "learning_rate": 8.898876134802827e-06, "loss": 25.6532, "step": 146900 }, { "epoch": 0.29676749475793585, "grad_norm": 54.622901916503906, "learning_rate": 8.898657589282625e-06, "loss": 26.233, "step": 146910 }, { "epoch": 0.2967876953906196, "grad_norm": 814.7896118164062, "learning_rate": 8.89843902476084e-06, "loss": 24.8433, "step": 146920 }, { "epoch": 0.29680789602330343, "grad_norm": 574.2201538085938, "learning_rate": 8.898220441238534e-06, "loss": 13.9479, "step": 146930 }, { "epoch": 0.29682809665598725, "grad_norm": 289.9844665527344, "learning_rate": 8.898001838716777e-06, "loss": 18.3098, "step": 146940 }, { "epoch": 0.29684829728867107, "grad_norm": 357.9109802246094, "learning_rate": 8.897783217196629e-06, "loss": 24.6636, "step": 146950 }, { "epoch": 0.2968684979213549, "grad_norm": 343.20648193359375, "learning_rate": 8.89756457667916e-06, "loss": 23.2273, "step": 146960 }, { "epoch": 0.2968886985540387, "grad_norm": 252.51998901367188, "learning_rate": 8.897345917165434e-06, "loss": 31.9909, "step": 146970 }, { "epoch": 0.29690889918672253, "grad_norm": 548.3547973632812, "learning_rate": 8.897127238656517e-06, "loss": 13.3891, "step": 146980 }, { "epoch": 0.29692909981940635, "grad_norm": 433.0122375488281, "learning_rate": 8.896908541153475e-06, "loss": 12.8016, "step": 146990 }, { "epoch": 0.29694930045209017, "grad_norm": 140.67532348632812, "learning_rate": 8.896689824657371e-06, "loss": 14.628, "step": 147000 }, { "epoch": 0.296969501084774, "grad_norm": 253.27105712890625, "learning_rate": 8.896471089169275e-06, "loss": 19.5172, "step": 147010 }, { "epoch": 0.2969897017174578, "grad_norm": 1135.7210693359375, "learning_rate": 8.896252334690251e-06, "loss": 26.0944, "step": 147020 }, { "epoch": 0.29700990235014163, "grad_norm": 180.94195556640625, "learning_rate": 8.896033561221367e-06, "loss": 21.1015, "step": 147030 }, { "epoch": 0.2970301029828254, "grad_norm": 133.50790405273438, "learning_rate": 8.895814768763686e-06, "loss": 14.5759, "step": 147040 }, { "epoch": 0.2970503036155092, "grad_norm": 666.6173706054688, "learning_rate": 8.895595957318277e-06, "loss": 30.8302, "step": 147050 }, { "epoch": 0.29707050424819303, "grad_norm": 83.84063720703125, "learning_rate": 8.895377126886206e-06, "loss": 26.6543, "step": 147060 }, { "epoch": 0.29709070488087685, "grad_norm": 358.62750244140625, "learning_rate": 8.89515827746854e-06, "loss": 23.4706, "step": 147070 }, { "epoch": 0.2971109055135607, "grad_norm": 51.10934829711914, "learning_rate": 8.894939409066344e-06, "loss": 14.4996, "step": 147080 }, { "epoch": 0.2971311061462445, "grad_norm": 561.810791015625, "learning_rate": 8.894720521680687e-06, "loss": 34.3612, "step": 147090 }, { "epoch": 0.2971513067789283, "grad_norm": 375.919677734375, "learning_rate": 8.894501615312633e-06, "loss": 14.518, "step": 147100 }, { "epoch": 0.29717150741161213, "grad_norm": 137.87319946289062, "learning_rate": 8.894282689963252e-06, "loss": 27.5433, "step": 147110 }, { "epoch": 0.29719170804429595, "grad_norm": 270.0392761230469, "learning_rate": 8.894063745633607e-06, "loss": 15.0138, "step": 147120 }, { "epoch": 0.2972119086769798, "grad_norm": 785.6463623046875, "learning_rate": 8.89384478232477e-06, "loss": 31.8076, "step": 147130 }, { "epoch": 0.2972321093096636, "grad_norm": 504.37017822265625, "learning_rate": 8.893625800037803e-06, "loss": 34.1459, "step": 147140 }, { "epoch": 0.2972523099423474, "grad_norm": 259.37835693359375, "learning_rate": 8.89340679877378e-06, "loss": 26.7039, "step": 147150 }, { "epoch": 0.29727251057503123, "grad_norm": 425.8026123046875, "learning_rate": 8.893187778533763e-06, "loss": 31.1731, "step": 147160 }, { "epoch": 0.297292711207715, "grad_norm": 129.9631805419922, "learning_rate": 8.892968739318819e-06, "loss": 41.2735, "step": 147170 }, { "epoch": 0.2973129118403988, "grad_norm": 100.39238739013672, "learning_rate": 8.89274968113002e-06, "loss": 13.9167, "step": 147180 }, { "epoch": 0.29733311247308264, "grad_norm": 529.4312744140625, "learning_rate": 8.89253060396843e-06, "loss": 37.019, "step": 147190 }, { "epoch": 0.29735331310576646, "grad_norm": 103.83464050292969, "learning_rate": 8.892311507835118e-06, "loss": 18.8964, "step": 147200 }, { "epoch": 0.2973735137384503, "grad_norm": 496.5546875, "learning_rate": 8.892092392731152e-06, "loss": 26.1287, "step": 147210 }, { "epoch": 0.2973937143711341, "grad_norm": 546.2858276367188, "learning_rate": 8.891873258657599e-06, "loss": 28.8668, "step": 147220 }, { "epoch": 0.2974139150038179, "grad_norm": 275.6580810546875, "learning_rate": 8.891654105615528e-06, "loss": 22.4762, "step": 147230 }, { "epoch": 0.29743411563650174, "grad_norm": 370.995849609375, "learning_rate": 8.891434933606009e-06, "loss": 20.5459, "step": 147240 }, { "epoch": 0.29745431626918556, "grad_norm": 343.0150451660156, "learning_rate": 8.891215742630106e-06, "loss": 13.0418, "step": 147250 }, { "epoch": 0.2974745169018694, "grad_norm": 122.11812591552734, "learning_rate": 8.890996532688889e-06, "loss": 19.9959, "step": 147260 }, { "epoch": 0.2974947175345532, "grad_norm": 338.8207702636719, "learning_rate": 8.890777303783428e-06, "loss": 20.0461, "step": 147270 }, { "epoch": 0.297514918167237, "grad_norm": 336.1431884765625, "learning_rate": 8.89055805591479e-06, "loss": 18.2996, "step": 147280 }, { "epoch": 0.29753511879992084, "grad_norm": 902.4846801757812, "learning_rate": 8.890338789084043e-06, "loss": 28.9435, "step": 147290 }, { "epoch": 0.2975553194326046, "grad_norm": 1083.5321044921875, "learning_rate": 8.890119503292258e-06, "loss": 25.1867, "step": 147300 }, { "epoch": 0.2975755200652884, "grad_norm": 0.0, "learning_rate": 8.889900198540502e-06, "loss": 19.9641, "step": 147310 }, { "epoch": 0.29759572069797224, "grad_norm": 239.86764526367188, "learning_rate": 8.889680874829845e-06, "loss": 26.0744, "step": 147320 }, { "epoch": 0.29761592133065606, "grad_norm": 229.78494262695312, "learning_rate": 8.889461532161353e-06, "loss": 23.5497, "step": 147330 }, { "epoch": 0.2976361219633399, "grad_norm": 688.3148803710938, "learning_rate": 8.889242170536099e-06, "loss": 18.7986, "step": 147340 }, { "epoch": 0.2976563225960237, "grad_norm": 238.5793914794922, "learning_rate": 8.88902278995515e-06, "loss": 22.7555, "step": 147350 }, { "epoch": 0.2976765232287075, "grad_norm": 391.3150634765625, "learning_rate": 8.888803390419576e-06, "loss": 15.5628, "step": 147360 }, { "epoch": 0.29769672386139134, "grad_norm": 465.0777282714844, "learning_rate": 8.888583971930444e-06, "loss": 34.3493, "step": 147370 }, { "epoch": 0.29771692449407516, "grad_norm": 865.205078125, "learning_rate": 8.888364534488828e-06, "loss": 25.9289, "step": 147380 }, { "epoch": 0.297737125126759, "grad_norm": 502.6608581542969, "learning_rate": 8.888145078095794e-06, "loss": 15.0483, "step": 147390 }, { "epoch": 0.2977573257594428, "grad_norm": 227.12208557128906, "learning_rate": 8.887925602752411e-06, "loss": 13.7662, "step": 147400 }, { "epoch": 0.2977775263921266, "grad_norm": 412.6221923828125, "learning_rate": 8.887706108459751e-06, "loss": 21.061, "step": 147410 }, { "epoch": 0.29779772702481044, "grad_norm": 470.05987548828125, "learning_rate": 8.887486595218884e-06, "loss": 26.8712, "step": 147420 }, { "epoch": 0.2978179276574942, "grad_norm": 443.9976501464844, "learning_rate": 8.887267063030876e-06, "loss": 18.5763, "step": 147430 }, { "epoch": 0.29783812829017803, "grad_norm": 462.3858642578125, "learning_rate": 8.887047511896803e-06, "loss": 15.802, "step": 147440 }, { "epoch": 0.29785832892286185, "grad_norm": 401.3211364746094, "learning_rate": 8.886827941817731e-06, "loss": 27.9149, "step": 147450 }, { "epoch": 0.29787852955554567, "grad_norm": 356.8927917480469, "learning_rate": 8.88660835279473e-06, "loss": 24.4112, "step": 147460 }, { "epoch": 0.2978987301882295, "grad_norm": 128.1566619873047, "learning_rate": 8.886388744828872e-06, "loss": 11.9876, "step": 147470 }, { "epoch": 0.2979189308209133, "grad_norm": 1048.524169921875, "learning_rate": 8.886169117921227e-06, "loss": 57.3669, "step": 147480 }, { "epoch": 0.29793913145359713, "grad_norm": 193.65866088867188, "learning_rate": 8.885949472072864e-06, "loss": 26.0427, "step": 147490 }, { "epoch": 0.29795933208628095, "grad_norm": 240.7947998046875, "learning_rate": 8.885729807284855e-06, "loss": 18.3831, "step": 147500 }, { "epoch": 0.29797953271896477, "grad_norm": 370.0865173339844, "learning_rate": 8.88551012355827e-06, "loss": 17.7053, "step": 147510 }, { "epoch": 0.2979997333516486, "grad_norm": 282.04217529296875, "learning_rate": 8.885290420894182e-06, "loss": 15.4468, "step": 147520 }, { "epoch": 0.2980199339843324, "grad_norm": 364.1242370605469, "learning_rate": 8.885070699293656e-06, "loss": 19.4559, "step": 147530 }, { "epoch": 0.29804013461701623, "grad_norm": 586.850830078125, "learning_rate": 8.884850958757769e-06, "loss": 13.9707, "step": 147540 }, { "epoch": 0.29806033524970005, "grad_norm": 236.7564697265625, "learning_rate": 8.88463119928759e-06, "loss": 28.6807, "step": 147550 }, { "epoch": 0.2980805358823838, "grad_norm": 388.3937072753906, "learning_rate": 8.88441142088419e-06, "loss": 29.3031, "step": 147560 }, { "epoch": 0.29810073651506763, "grad_norm": 646.3626098632812, "learning_rate": 8.884191623548636e-06, "loss": 30.6844, "step": 147570 }, { "epoch": 0.29812093714775145, "grad_norm": 497.60528564453125, "learning_rate": 8.883971807282007e-06, "loss": 19.792, "step": 147580 }, { "epoch": 0.2981411377804353, "grad_norm": 762.6091918945312, "learning_rate": 8.88375197208537e-06, "loss": 20.2638, "step": 147590 }, { "epoch": 0.2981613384131191, "grad_norm": 59.60248947143555, "learning_rate": 8.883532117959797e-06, "loss": 47.3193, "step": 147600 }, { "epoch": 0.2981815390458029, "grad_norm": 185.5955352783203, "learning_rate": 8.883312244906358e-06, "loss": 10.3251, "step": 147610 }, { "epoch": 0.29820173967848673, "grad_norm": 255.9095458984375, "learning_rate": 8.883092352926126e-06, "loss": 28.2199, "step": 147620 }, { "epoch": 0.29822194031117055, "grad_norm": 423.7047424316406, "learning_rate": 8.882872442020174e-06, "loss": 32.0141, "step": 147630 }, { "epoch": 0.2982421409438544, "grad_norm": 435.6179504394531, "learning_rate": 8.882652512189574e-06, "loss": 17.5903, "step": 147640 }, { "epoch": 0.2982623415765382, "grad_norm": 102.59506225585938, "learning_rate": 8.882432563435394e-06, "loss": 12.5239, "step": 147650 }, { "epoch": 0.298282542209222, "grad_norm": 311.9666442871094, "learning_rate": 8.88221259575871e-06, "loss": 28.5527, "step": 147660 }, { "epoch": 0.29830274284190583, "grad_norm": 383.766845703125, "learning_rate": 8.881992609160592e-06, "loss": 11.1076, "step": 147670 }, { "epoch": 0.2983229434745896, "grad_norm": 399.8511657714844, "learning_rate": 8.881772603642113e-06, "loss": 23.4477, "step": 147680 }, { "epoch": 0.2983431441072734, "grad_norm": 371.61968994140625, "learning_rate": 8.881552579204345e-06, "loss": 27.2733, "step": 147690 }, { "epoch": 0.29836334473995724, "grad_norm": 425.9482116699219, "learning_rate": 8.88133253584836e-06, "loss": 28.8364, "step": 147700 }, { "epoch": 0.29838354537264106, "grad_norm": 488.075927734375, "learning_rate": 8.881112473575231e-06, "loss": 33.4234, "step": 147710 }, { "epoch": 0.2984037460053249, "grad_norm": 410.5538330078125, "learning_rate": 8.880892392386032e-06, "loss": 57.9526, "step": 147720 }, { "epoch": 0.2984239466380087, "grad_norm": 73.84367370605469, "learning_rate": 8.880672292281834e-06, "loss": 17.895, "step": 147730 }, { "epoch": 0.2984441472706925, "grad_norm": 290.07086181640625, "learning_rate": 8.880452173263708e-06, "loss": 17.2092, "step": 147740 }, { "epoch": 0.29846434790337634, "grad_norm": 390.3843994140625, "learning_rate": 8.88023203533273e-06, "loss": 19.0085, "step": 147750 }, { "epoch": 0.29848454853606016, "grad_norm": 766.4041748046875, "learning_rate": 8.880011878489972e-06, "loss": 20.1722, "step": 147760 }, { "epoch": 0.298504749168744, "grad_norm": 318.4327392578125, "learning_rate": 8.879791702736507e-06, "loss": 22.6863, "step": 147770 }, { "epoch": 0.2985249498014278, "grad_norm": 563.6470336914062, "learning_rate": 8.879571508073407e-06, "loss": 24.1672, "step": 147780 }, { "epoch": 0.2985451504341116, "grad_norm": 455.07293701171875, "learning_rate": 8.879351294501746e-06, "loss": 18.4838, "step": 147790 }, { "epoch": 0.29856535106679544, "grad_norm": 524.3452758789062, "learning_rate": 8.879131062022598e-06, "loss": 23.3109, "step": 147800 }, { "epoch": 0.2985855516994792, "grad_norm": 765.2113037109375, "learning_rate": 8.878910810637036e-06, "loss": 22.2379, "step": 147810 }, { "epoch": 0.298605752332163, "grad_norm": 489.6396484375, "learning_rate": 8.878690540346132e-06, "loss": 10.7837, "step": 147820 }, { "epoch": 0.29862595296484684, "grad_norm": 752.63232421875, "learning_rate": 8.878470251150959e-06, "loss": 21.7687, "step": 147830 }, { "epoch": 0.29864615359753066, "grad_norm": 952.3746337890625, "learning_rate": 8.878249943052595e-06, "loss": 39.7389, "step": 147840 }, { "epoch": 0.2986663542302145, "grad_norm": 238.0981903076172, "learning_rate": 8.87802961605211e-06, "loss": 16.2032, "step": 147850 }, { "epoch": 0.2986865548628983, "grad_norm": 119.70855712890625, "learning_rate": 8.877809270150582e-06, "loss": 17.705, "step": 147860 }, { "epoch": 0.2987067554955821, "grad_norm": 541.710205078125, "learning_rate": 8.877588905349079e-06, "loss": 39.0836, "step": 147870 }, { "epoch": 0.29872695612826594, "grad_norm": 1121.7535400390625, "learning_rate": 8.877368521648678e-06, "loss": 34.2635, "step": 147880 }, { "epoch": 0.29874715676094976, "grad_norm": 455.7093505859375, "learning_rate": 8.877148119050453e-06, "loss": 19.5688, "step": 147890 }, { "epoch": 0.2987673573936336, "grad_norm": 16.635032653808594, "learning_rate": 8.87692769755548e-06, "loss": 15.6266, "step": 147900 }, { "epoch": 0.2987875580263174, "grad_norm": 498.6807556152344, "learning_rate": 8.876707257164829e-06, "loss": 17.2583, "step": 147910 }, { "epoch": 0.2988077586590012, "grad_norm": 802.8692016601562, "learning_rate": 8.87648679787958e-06, "loss": 16.1728, "step": 147920 }, { "epoch": 0.29882795929168504, "grad_norm": 298.41729736328125, "learning_rate": 8.876266319700802e-06, "loss": 13.2344, "step": 147930 }, { "epoch": 0.2988481599243688, "grad_norm": 512.8638305664062, "learning_rate": 8.876045822629573e-06, "loss": 29.2116, "step": 147940 }, { "epoch": 0.2988683605570526, "grad_norm": 811.17822265625, "learning_rate": 8.875825306666968e-06, "loss": 22.1756, "step": 147950 }, { "epoch": 0.29888856118973645, "grad_norm": 91.45901489257812, "learning_rate": 8.87560477181406e-06, "loss": 24.5615, "step": 147960 }, { "epoch": 0.29890876182242027, "grad_norm": 496.5906982421875, "learning_rate": 8.875384218071923e-06, "loss": 10.5712, "step": 147970 }, { "epoch": 0.2989289624551041, "grad_norm": 80.80033874511719, "learning_rate": 8.875163645441633e-06, "loss": 21.25, "step": 147980 }, { "epoch": 0.2989491630877879, "grad_norm": 359.9295654296875, "learning_rate": 8.874943053924267e-06, "loss": 13.4884, "step": 147990 }, { "epoch": 0.2989693637204717, "grad_norm": 264.8037109375, "learning_rate": 8.874722443520898e-06, "loss": 30.6023, "step": 148000 }, { "epoch": 0.29898956435315555, "grad_norm": 248.23143005371094, "learning_rate": 8.874501814232603e-06, "loss": 31.3907, "step": 148010 }, { "epoch": 0.29900976498583937, "grad_norm": 447.3085021972656, "learning_rate": 8.874281166060454e-06, "loss": 18.9093, "step": 148020 }, { "epoch": 0.2990299656185232, "grad_norm": 581.6533203125, "learning_rate": 8.87406049900553e-06, "loss": 18.1009, "step": 148030 }, { "epoch": 0.299050166251207, "grad_norm": 369.7279357910156, "learning_rate": 8.873839813068904e-06, "loss": 12.6975, "step": 148040 }, { "epoch": 0.2990703668838908, "grad_norm": 758.1390380859375, "learning_rate": 8.873619108251654e-06, "loss": 21.9412, "step": 148050 }, { "epoch": 0.29909056751657465, "grad_norm": 0.9877247214317322, "learning_rate": 8.873398384554852e-06, "loss": 14.0648, "step": 148060 }, { "epoch": 0.2991107681492584, "grad_norm": 266.4160461425781, "learning_rate": 8.873177641979578e-06, "loss": 20.1138, "step": 148070 }, { "epoch": 0.29913096878194223, "grad_norm": 93.53788757324219, "learning_rate": 8.872956880526906e-06, "loss": 21.1116, "step": 148080 }, { "epoch": 0.29915116941462605, "grad_norm": 732.2179565429688, "learning_rate": 8.87273610019791e-06, "loss": 29.8653, "step": 148090 }, { "epoch": 0.29917137004730987, "grad_norm": 551.4814453125, "learning_rate": 8.872515300993669e-06, "loss": 17.968, "step": 148100 }, { "epoch": 0.2991915706799937, "grad_norm": 377.6685791015625, "learning_rate": 8.872294482915259e-06, "loss": 18.0758, "step": 148110 }, { "epoch": 0.2992117713126775, "grad_norm": 522.7916259765625, "learning_rate": 8.872073645963755e-06, "loss": 28.6316, "step": 148120 }, { "epoch": 0.29923197194536133, "grad_norm": 286.47064208984375, "learning_rate": 8.871852790140234e-06, "loss": 16.396, "step": 148130 }, { "epoch": 0.29925217257804515, "grad_norm": 335.798095703125, "learning_rate": 8.87163191544577e-06, "loss": 17.9535, "step": 148140 }, { "epoch": 0.29927237321072897, "grad_norm": 338.4599304199219, "learning_rate": 8.871411021881444e-06, "loss": 23.8329, "step": 148150 }, { "epoch": 0.2992925738434128, "grad_norm": 804.56787109375, "learning_rate": 8.871190109448329e-06, "loss": 31.3936, "step": 148160 }, { "epoch": 0.2993127744760966, "grad_norm": 795.2534790039062, "learning_rate": 8.870969178147502e-06, "loss": 15.8334, "step": 148170 }, { "epoch": 0.29933297510878043, "grad_norm": 404.1622009277344, "learning_rate": 8.870748227980044e-06, "loss": 21.6559, "step": 148180 }, { "epoch": 0.29935317574146425, "grad_norm": 2237.97802734375, "learning_rate": 8.870527258947025e-06, "loss": 46.9274, "step": 148190 }, { "epoch": 0.299373376374148, "grad_norm": 591.6390380859375, "learning_rate": 8.870306271049527e-06, "loss": 23.5487, "step": 148200 }, { "epoch": 0.29939357700683183, "grad_norm": 681.0844116210938, "learning_rate": 8.870085264288626e-06, "loss": 28.6523, "step": 148210 }, { "epoch": 0.29941377763951565, "grad_norm": 174.54888916015625, "learning_rate": 8.869864238665398e-06, "loss": 16.711, "step": 148220 }, { "epoch": 0.2994339782721995, "grad_norm": 1209.55517578125, "learning_rate": 8.86964319418092e-06, "loss": 32.4493, "step": 148230 }, { "epoch": 0.2994541789048833, "grad_norm": 856.8101806640625, "learning_rate": 8.869422130836274e-06, "loss": 44.8244, "step": 148240 }, { "epoch": 0.2994743795375671, "grad_norm": 1115.9072265625, "learning_rate": 8.869201048632531e-06, "loss": 43.5113, "step": 148250 }, { "epoch": 0.29949458017025093, "grad_norm": 380.28070068359375, "learning_rate": 8.868979947570773e-06, "loss": 21.9477, "step": 148260 }, { "epoch": 0.29951478080293475, "grad_norm": 366.782958984375, "learning_rate": 8.868758827652075e-06, "loss": 17.2486, "step": 148270 }, { "epoch": 0.2995349814356186, "grad_norm": 350.6100158691406, "learning_rate": 8.868537688877516e-06, "loss": 21.9206, "step": 148280 }, { "epoch": 0.2995551820683024, "grad_norm": 352.701171875, "learning_rate": 8.868316531248173e-06, "loss": 24.8186, "step": 148290 }, { "epoch": 0.2995753827009862, "grad_norm": 913.413818359375, "learning_rate": 8.868095354765125e-06, "loss": 17.7333, "step": 148300 }, { "epoch": 0.29959558333367003, "grad_norm": 442.60107421875, "learning_rate": 8.867874159429448e-06, "loss": 17.0529, "step": 148310 }, { "epoch": 0.2996157839663538, "grad_norm": 433.0865173339844, "learning_rate": 8.867652945242225e-06, "loss": 25.0527, "step": 148320 }, { "epoch": 0.2996359845990376, "grad_norm": 532.3131713867188, "learning_rate": 8.867431712204527e-06, "loss": 24.9076, "step": 148330 }, { "epoch": 0.29965618523172144, "grad_norm": 742.48876953125, "learning_rate": 8.867210460317437e-06, "loss": 31.1177, "step": 148340 }, { "epoch": 0.29967638586440526, "grad_norm": 769.7986450195312, "learning_rate": 8.866989189582033e-06, "loss": 33.9652, "step": 148350 }, { "epoch": 0.2996965864970891, "grad_norm": 303.25518798828125, "learning_rate": 8.86676789999939e-06, "loss": 16.3814, "step": 148360 }, { "epoch": 0.2997167871297729, "grad_norm": 131.83396911621094, "learning_rate": 8.866546591570593e-06, "loss": 9.7804, "step": 148370 }, { "epoch": 0.2997369877624567, "grad_norm": 61.998233795166016, "learning_rate": 8.866325264296716e-06, "loss": 15.5828, "step": 148380 }, { "epoch": 0.29975718839514054, "grad_norm": 341.0106201171875, "learning_rate": 8.866103918178837e-06, "loss": 16.5429, "step": 148390 }, { "epoch": 0.29977738902782436, "grad_norm": 355.2975769042969, "learning_rate": 8.865882553218036e-06, "loss": 20.0055, "step": 148400 }, { "epoch": 0.2997975896605082, "grad_norm": 152.00953674316406, "learning_rate": 8.865661169415396e-06, "loss": 18.1553, "step": 148410 }, { "epoch": 0.299817790293192, "grad_norm": 523.288330078125, "learning_rate": 8.865439766771988e-06, "loss": 13.4392, "step": 148420 }, { "epoch": 0.2998379909258758, "grad_norm": 258.62744140625, "learning_rate": 8.865218345288897e-06, "loss": 16.0073, "step": 148430 }, { "epoch": 0.29985819155855964, "grad_norm": 260.50823974609375, "learning_rate": 8.864996904967202e-06, "loss": 24.4141, "step": 148440 }, { "epoch": 0.2998783921912434, "grad_norm": 297.6823425292969, "learning_rate": 8.864775445807979e-06, "loss": 24.0066, "step": 148450 }, { "epoch": 0.2998985928239272, "grad_norm": 170.50486755371094, "learning_rate": 8.86455396781231e-06, "loss": 32.7914, "step": 148460 }, { "epoch": 0.29991879345661104, "grad_norm": 201.24354553222656, "learning_rate": 8.864332470981274e-06, "loss": 18.1797, "step": 148470 }, { "epoch": 0.29993899408929486, "grad_norm": 599.9772338867188, "learning_rate": 8.86411095531595e-06, "loss": 24.4635, "step": 148480 }, { "epoch": 0.2999591947219787, "grad_norm": 25.860544204711914, "learning_rate": 8.863889420817418e-06, "loss": 20.8214, "step": 148490 }, { "epoch": 0.2999793953546625, "grad_norm": 277.68365478515625, "learning_rate": 8.863667867486756e-06, "loss": 14.4374, "step": 148500 }, { "epoch": 0.2999995959873463, "grad_norm": 1149.8858642578125, "learning_rate": 8.863446295325047e-06, "loss": 31.039, "step": 148510 }, { "epoch": 0.30001979662003014, "grad_norm": 527.5618286132812, "learning_rate": 8.863224704333368e-06, "loss": 17.3079, "step": 148520 }, { "epoch": 0.30003999725271396, "grad_norm": 315.8086242675781, "learning_rate": 8.863003094512801e-06, "loss": 18.0132, "step": 148530 }, { "epoch": 0.3000601978853978, "grad_norm": 202.37364196777344, "learning_rate": 8.862781465864427e-06, "loss": 17.174, "step": 148540 }, { "epoch": 0.3000803985180816, "grad_norm": 254.23915100097656, "learning_rate": 8.862559818389322e-06, "loss": 24.6349, "step": 148550 }, { "epoch": 0.3001005991507654, "grad_norm": 620.2166748046875, "learning_rate": 8.862338152088573e-06, "loss": 31.1625, "step": 148560 }, { "epoch": 0.30012079978344924, "grad_norm": 253.61141967773438, "learning_rate": 8.862116466963251e-06, "loss": 20.4655, "step": 148570 }, { "epoch": 0.300141000416133, "grad_norm": 197.57196044921875, "learning_rate": 8.861894763014444e-06, "loss": 16.0904, "step": 148580 }, { "epoch": 0.30016120104881683, "grad_norm": 282.3333435058594, "learning_rate": 8.861673040243231e-06, "loss": 26.0632, "step": 148590 }, { "epoch": 0.30018140168150065, "grad_norm": 331.51220703125, "learning_rate": 8.861451298650692e-06, "loss": 22.1169, "step": 148600 }, { "epoch": 0.30020160231418447, "grad_norm": 673.6158447265625, "learning_rate": 8.861229538237908e-06, "loss": 22.2989, "step": 148610 }, { "epoch": 0.3002218029468683, "grad_norm": 127.45361328125, "learning_rate": 8.861007759005959e-06, "loss": 38.9359, "step": 148620 }, { "epoch": 0.3002420035795521, "grad_norm": 285.5706787109375, "learning_rate": 8.860785960955926e-06, "loss": 36.556, "step": 148630 }, { "epoch": 0.30026220421223593, "grad_norm": 534.787353515625, "learning_rate": 8.860564144088891e-06, "loss": 23.1304, "step": 148640 }, { "epoch": 0.30028240484491975, "grad_norm": 253.01190185546875, "learning_rate": 8.860342308405933e-06, "loss": 46.5173, "step": 148650 }, { "epoch": 0.30030260547760357, "grad_norm": 52.00938415527344, "learning_rate": 8.860120453908138e-06, "loss": 39.1426, "step": 148660 }, { "epoch": 0.3003228061102874, "grad_norm": 403.021240234375, "learning_rate": 8.859898580596581e-06, "loss": 24.2871, "step": 148670 }, { "epoch": 0.3003430067429712, "grad_norm": 446.07373046875, "learning_rate": 8.859676688472349e-06, "loss": 29.1645, "step": 148680 }, { "epoch": 0.30036320737565503, "grad_norm": 36.96280288696289, "learning_rate": 8.85945477753652e-06, "loss": 22.4767, "step": 148690 }, { "epoch": 0.30038340800833885, "grad_norm": 534.2410278320312, "learning_rate": 8.859232847790175e-06, "loss": 27.2594, "step": 148700 }, { "epoch": 0.3004036086410226, "grad_norm": 391.3323059082031, "learning_rate": 8.859010899234399e-06, "loss": 14.8472, "step": 148710 }, { "epoch": 0.30042380927370643, "grad_norm": 955.7097778320312, "learning_rate": 8.85878893187027e-06, "loss": 21.2886, "step": 148720 }, { "epoch": 0.30044400990639025, "grad_norm": 309.7611389160156, "learning_rate": 8.858566945698874e-06, "loss": 25.0879, "step": 148730 }, { "epoch": 0.3004642105390741, "grad_norm": 423.8357849121094, "learning_rate": 8.858344940721291e-06, "loss": 32.2133, "step": 148740 }, { "epoch": 0.3004844111717579, "grad_norm": 368.7214660644531, "learning_rate": 8.858122916938601e-06, "loss": 27.5963, "step": 148750 }, { "epoch": 0.3005046118044417, "grad_norm": 1165.051025390625, "learning_rate": 8.857900874351888e-06, "loss": 13.9224, "step": 148760 }, { "epoch": 0.30052481243712553, "grad_norm": 721.8985595703125, "learning_rate": 8.857678812962234e-06, "loss": 40.7466, "step": 148770 }, { "epoch": 0.30054501306980935, "grad_norm": 1.9701683521270752, "learning_rate": 8.857456732770723e-06, "loss": 12.1633, "step": 148780 }, { "epoch": 0.3005652137024932, "grad_norm": 347.9275207519531, "learning_rate": 8.857234633778434e-06, "loss": 26.421, "step": 148790 }, { "epoch": 0.300585414335177, "grad_norm": 676.7006225585938, "learning_rate": 8.857012515986452e-06, "loss": 20.2109, "step": 148800 }, { "epoch": 0.3006056149678608, "grad_norm": 375.9146728515625, "learning_rate": 8.856790379395858e-06, "loss": 18.1709, "step": 148810 }, { "epoch": 0.30062581560054463, "grad_norm": 849.3779907226562, "learning_rate": 8.856568224007736e-06, "loss": 29.7461, "step": 148820 }, { "epoch": 0.3006460162332284, "grad_norm": 428.88885498046875, "learning_rate": 8.856346049823169e-06, "loss": 20.8408, "step": 148830 }, { "epoch": 0.3006662168659122, "grad_norm": 131.7697296142578, "learning_rate": 8.856123856843236e-06, "loss": 25.3181, "step": 148840 }, { "epoch": 0.30068641749859604, "grad_norm": 74.23992156982422, "learning_rate": 8.855901645069026e-06, "loss": 33.1354, "step": 148850 }, { "epoch": 0.30070661813127986, "grad_norm": 176.134521484375, "learning_rate": 8.855679414501619e-06, "loss": 16.2658, "step": 148860 }, { "epoch": 0.3007268187639637, "grad_norm": 615.5466918945312, "learning_rate": 8.855457165142096e-06, "loss": 13.6649, "step": 148870 }, { "epoch": 0.3007470193966475, "grad_norm": 1026.47607421875, "learning_rate": 8.855234896991544e-06, "loss": 36.0267, "step": 148880 }, { "epoch": 0.3007672200293313, "grad_norm": 62.51200866699219, "learning_rate": 8.855012610051045e-06, "loss": 20.6455, "step": 148890 }, { "epoch": 0.30078742066201514, "grad_norm": 173.38348388671875, "learning_rate": 8.854790304321682e-06, "loss": 12.8308, "step": 148900 }, { "epoch": 0.30080762129469896, "grad_norm": 337.7551574707031, "learning_rate": 8.854567979804538e-06, "loss": 14.6456, "step": 148910 }, { "epoch": 0.3008278219273828, "grad_norm": 418.7915954589844, "learning_rate": 8.854345636500698e-06, "loss": 27.0897, "step": 148920 }, { "epoch": 0.3008480225600666, "grad_norm": 219.07815551757812, "learning_rate": 8.854123274411243e-06, "loss": 17.9353, "step": 148930 }, { "epoch": 0.3008682231927504, "grad_norm": 667.05419921875, "learning_rate": 8.85390089353726e-06, "loss": 20.7854, "step": 148940 }, { "epoch": 0.30088842382543424, "grad_norm": 573.677978515625, "learning_rate": 8.853678493879832e-06, "loss": 39.0861, "step": 148950 }, { "epoch": 0.300908624458118, "grad_norm": 249.09042358398438, "learning_rate": 8.853456075440041e-06, "loss": 19.4665, "step": 148960 }, { "epoch": 0.3009288250908018, "grad_norm": 94.33660888671875, "learning_rate": 8.853233638218974e-06, "loss": 26.8469, "step": 148970 }, { "epoch": 0.30094902572348564, "grad_norm": 193.69461059570312, "learning_rate": 8.853011182217712e-06, "loss": 23.3123, "step": 148980 }, { "epoch": 0.30096922635616946, "grad_norm": 241.58082580566406, "learning_rate": 8.852788707437343e-06, "loss": 17.7294, "step": 148990 }, { "epoch": 0.3009894269888533, "grad_norm": 17.932788848876953, "learning_rate": 8.852566213878947e-06, "loss": 18.8882, "step": 149000 }, { "epoch": 0.3010096276215371, "grad_norm": 572.9925537109375, "learning_rate": 8.852343701543611e-06, "loss": 16.8183, "step": 149010 }, { "epoch": 0.3010298282542209, "grad_norm": 693.1661987304688, "learning_rate": 8.85212117043242e-06, "loss": 34.758, "step": 149020 }, { "epoch": 0.30105002888690474, "grad_norm": 305.8961181640625, "learning_rate": 8.851898620546456e-06, "loss": 16.6461, "step": 149030 }, { "epoch": 0.30107022951958856, "grad_norm": 335.46087646484375, "learning_rate": 8.851676051886805e-06, "loss": 28.4095, "step": 149040 }, { "epoch": 0.3010904301522724, "grad_norm": 290.3029479980469, "learning_rate": 8.851453464454555e-06, "loss": 57.3568, "step": 149050 }, { "epoch": 0.3011106307849562, "grad_norm": 184.10365295410156, "learning_rate": 8.851230858250785e-06, "loss": 14.5301, "step": 149060 }, { "epoch": 0.30113083141764, "grad_norm": 259.8624572753906, "learning_rate": 8.851008233276586e-06, "loss": 24.3177, "step": 149070 }, { "epoch": 0.30115103205032384, "grad_norm": 420.3094787597656, "learning_rate": 8.850785589533038e-06, "loss": 21.9963, "step": 149080 }, { "epoch": 0.3011712326830076, "grad_norm": 785.2965698242188, "learning_rate": 8.850562927021227e-06, "loss": 22.0542, "step": 149090 }, { "epoch": 0.3011914333156914, "grad_norm": 389.8663635253906, "learning_rate": 8.85034024574224e-06, "loss": 26.2576, "step": 149100 }, { "epoch": 0.30121163394837525, "grad_norm": 51.002445220947266, "learning_rate": 8.850117545697163e-06, "loss": 24.099, "step": 149110 }, { "epoch": 0.30123183458105907, "grad_norm": 1020.509765625, "learning_rate": 8.849894826887078e-06, "loss": 27.3531, "step": 149120 }, { "epoch": 0.3012520352137429, "grad_norm": 355.1165466308594, "learning_rate": 8.849672089313074e-06, "loss": 17.6457, "step": 149130 }, { "epoch": 0.3012722358464267, "grad_norm": 596.9937744140625, "learning_rate": 8.849449332976235e-06, "loss": 28.6967, "step": 149140 }, { "epoch": 0.3012924364791105, "grad_norm": 451.7833251953125, "learning_rate": 8.849226557877647e-06, "loss": 22.5846, "step": 149150 }, { "epoch": 0.30131263711179435, "grad_norm": 796.8441772460938, "learning_rate": 8.849003764018395e-06, "loss": 20.1726, "step": 149160 }, { "epoch": 0.30133283774447817, "grad_norm": 398.1156311035156, "learning_rate": 8.848780951399566e-06, "loss": 22.2843, "step": 149170 }, { "epoch": 0.301353038377162, "grad_norm": 278.8240661621094, "learning_rate": 8.848558120022246e-06, "loss": 17.262, "step": 149180 }, { "epoch": 0.3013732390098458, "grad_norm": 442.8303527832031, "learning_rate": 8.84833526988752e-06, "loss": 22.3545, "step": 149190 }, { "epoch": 0.3013934396425296, "grad_norm": 583.061767578125, "learning_rate": 8.848112400996473e-06, "loss": 29.7904, "step": 149200 }, { "epoch": 0.30141364027521345, "grad_norm": 318.1643371582031, "learning_rate": 8.847889513350195e-06, "loss": 19.487, "step": 149210 }, { "epoch": 0.3014338409078972, "grad_norm": 699.6163940429688, "learning_rate": 8.847666606949768e-06, "loss": 34.3073, "step": 149220 }, { "epoch": 0.30145404154058103, "grad_norm": 454.4139709472656, "learning_rate": 8.847443681796283e-06, "loss": 25.9923, "step": 149230 }, { "epoch": 0.30147424217326485, "grad_norm": 275.9908752441406, "learning_rate": 8.847220737890823e-06, "loss": 16.7415, "step": 149240 }, { "epoch": 0.30149444280594867, "grad_norm": 469.0674133300781, "learning_rate": 8.846997775234476e-06, "loss": 16.5077, "step": 149250 }, { "epoch": 0.3015146434386325, "grad_norm": 214.13780212402344, "learning_rate": 8.846774793828328e-06, "loss": 23.2627, "step": 149260 }, { "epoch": 0.3015348440713163, "grad_norm": 91.90431213378906, "learning_rate": 8.846551793673467e-06, "loss": 14.4711, "step": 149270 }, { "epoch": 0.30155504470400013, "grad_norm": 679.1061401367188, "learning_rate": 8.846328774770978e-06, "loss": 26.7924, "step": 149280 }, { "epoch": 0.30157524533668395, "grad_norm": 333.43511962890625, "learning_rate": 8.84610573712195e-06, "loss": 30.4296, "step": 149290 }, { "epoch": 0.30159544596936777, "grad_norm": 172.83203125, "learning_rate": 8.84588268072747e-06, "loss": 21.8123, "step": 149300 }, { "epoch": 0.3016156466020516, "grad_norm": 50.759910583496094, "learning_rate": 8.845659605588622e-06, "loss": 10.55, "step": 149310 }, { "epoch": 0.3016358472347354, "grad_norm": 423.1531677246094, "learning_rate": 8.845436511706497e-06, "loss": 27.9608, "step": 149320 }, { "epoch": 0.30165604786741923, "grad_norm": 1031.787841796875, "learning_rate": 8.84521339908218e-06, "loss": 21.9223, "step": 149330 }, { "epoch": 0.30167624850010305, "grad_norm": 473.7039489746094, "learning_rate": 8.84499026771676e-06, "loss": 37.165, "step": 149340 }, { "epoch": 0.3016964491327868, "grad_norm": 576.4017944335938, "learning_rate": 8.844767117611324e-06, "loss": 26.5359, "step": 149350 }, { "epoch": 0.30171664976547063, "grad_norm": 297.1952209472656, "learning_rate": 8.844543948766958e-06, "loss": 15.7599, "step": 149360 }, { "epoch": 0.30173685039815445, "grad_norm": 466.27398681640625, "learning_rate": 8.844320761184753e-06, "loss": 15.319, "step": 149370 }, { "epoch": 0.3017570510308383, "grad_norm": 417.2751770019531, "learning_rate": 8.844097554865792e-06, "loss": 27.3471, "step": 149380 }, { "epoch": 0.3017772516635221, "grad_norm": 739.0093994140625, "learning_rate": 8.84387432981117e-06, "loss": 21.3136, "step": 149390 }, { "epoch": 0.3017974522962059, "grad_norm": 340.4967956542969, "learning_rate": 8.843651086021966e-06, "loss": 17.8877, "step": 149400 }, { "epoch": 0.30181765292888973, "grad_norm": 435.0361633300781, "learning_rate": 8.843427823499275e-06, "loss": 13.4663, "step": 149410 }, { "epoch": 0.30183785356157355, "grad_norm": 152.16822814941406, "learning_rate": 8.843204542244184e-06, "loss": 16.7841, "step": 149420 }, { "epoch": 0.3018580541942574, "grad_norm": 545.1543579101562, "learning_rate": 8.842981242257779e-06, "loss": 12.907, "step": 149430 }, { "epoch": 0.3018782548269412, "grad_norm": 474.6297912597656, "learning_rate": 8.84275792354115e-06, "loss": 26.8501, "step": 149440 }, { "epoch": 0.301898455459625, "grad_norm": 616.8176879882812, "learning_rate": 8.842534586095383e-06, "loss": 25.1576, "step": 149450 }, { "epoch": 0.30191865609230883, "grad_norm": 318.8073425292969, "learning_rate": 8.842311229921571e-06, "loss": 17.5954, "step": 149460 }, { "epoch": 0.3019388567249926, "grad_norm": 375.3653869628906, "learning_rate": 8.8420878550208e-06, "loss": 21.3203, "step": 149470 }, { "epoch": 0.3019590573576764, "grad_norm": 268.08160400390625, "learning_rate": 8.841864461394158e-06, "loss": 24.1415, "step": 149480 }, { "epoch": 0.30197925799036024, "grad_norm": 27.583419799804688, "learning_rate": 8.841641049042732e-06, "loss": 34.7693, "step": 149490 }, { "epoch": 0.30199945862304406, "grad_norm": 709.3275146484375, "learning_rate": 8.841417617967618e-06, "loss": 42.0899, "step": 149500 }, { "epoch": 0.3020196592557279, "grad_norm": 278.5901794433594, "learning_rate": 8.841194168169897e-06, "loss": 16.8193, "step": 149510 }, { "epoch": 0.3020398598884117, "grad_norm": 536.4020385742188, "learning_rate": 8.840970699650665e-06, "loss": 22.6882, "step": 149520 }, { "epoch": 0.3020600605210955, "grad_norm": 147.03111267089844, "learning_rate": 8.840747212411005e-06, "loss": 22.2375, "step": 149530 }, { "epoch": 0.30208026115377934, "grad_norm": 389.63531494140625, "learning_rate": 8.84052370645201e-06, "loss": 41.2673, "step": 149540 }, { "epoch": 0.30210046178646316, "grad_norm": 603.1446533203125, "learning_rate": 8.840300181774767e-06, "loss": 20.7329, "step": 149550 }, { "epoch": 0.302120662419147, "grad_norm": 198.4102020263672, "learning_rate": 8.840076638380368e-06, "loss": 23.4251, "step": 149560 }, { "epoch": 0.3021408630518308, "grad_norm": 312.51983642578125, "learning_rate": 8.8398530762699e-06, "loss": 11.6308, "step": 149570 }, { "epoch": 0.3021610636845146, "grad_norm": 326.4072265625, "learning_rate": 8.839629495444455e-06, "loss": 25.1864, "step": 149580 }, { "epoch": 0.30218126431719844, "grad_norm": 297.5825500488281, "learning_rate": 8.83940589590512e-06, "loss": 24.1519, "step": 149590 }, { "epoch": 0.3022014649498822, "grad_norm": 606.0067749023438, "learning_rate": 8.83918227765299e-06, "loss": 14.6197, "step": 149600 }, { "epoch": 0.302221665582566, "grad_norm": 760.7696533203125, "learning_rate": 8.838958640689146e-06, "loss": 20.4481, "step": 149610 }, { "epoch": 0.30224186621524984, "grad_norm": 314.8655700683594, "learning_rate": 8.838734985014686e-06, "loss": 14.067, "step": 149620 }, { "epoch": 0.30226206684793366, "grad_norm": 378.96044921875, "learning_rate": 8.838511310630697e-06, "loss": 15.6507, "step": 149630 }, { "epoch": 0.3022822674806175, "grad_norm": 649.549560546875, "learning_rate": 8.83828761753827e-06, "loss": 13.1199, "step": 149640 }, { "epoch": 0.3023024681133013, "grad_norm": 782.0072631835938, "learning_rate": 8.838063905738495e-06, "loss": 34.4812, "step": 149650 }, { "epoch": 0.3023226687459851, "grad_norm": 651.4970092773438, "learning_rate": 8.83784017523246e-06, "loss": 16.3783, "step": 149660 }, { "epoch": 0.30234286937866894, "grad_norm": 219.46450805664062, "learning_rate": 8.837616426021259e-06, "loss": 30.7405, "step": 149670 }, { "epoch": 0.30236307001135276, "grad_norm": 335.22442626953125, "learning_rate": 8.837392658105981e-06, "loss": 15.6363, "step": 149680 }, { "epoch": 0.3023832706440366, "grad_norm": 191.90707397460938, "learning_rate": 8.837168871487715e-06, "loss": 21.2069, "step": 149690 }, { "epoch": 0.3024034712767204, "grad_norm": 288.7420959472656, "learning_rate": 8.836945066167556e-06, "loss": 21.0821, "step": 149700 }, { "epoch": 0.3024236719094042, "grad_norm": 437.4322509765625, "learning_rate": 8.83672124214659e-06, "loss": 48.1713, "step": 149710 }, { "epoch": 0.30244387254208804, "grad_norm": 1047.5965576171875, "learning_rate": 8.83649739942591e-06, "loss": 14.8326, "step": 149720 }, { "epoch": 0.3024640731747718, "grad_norm": 359.870849609375, "learning_rate": 8.836273538006608e-06, "loss": 19.7465, "step": 149730 }, { "epoch": 0.30248427380745563, "grad_norm": 127.4852523803711, "learning_rate": 8.836049657889774e-06, "loss": 24.2413, "step": 149740 }, { "epoch": 0.30250447444013945, "grad_norm": 557.8663330078125, "learning_rate": 8.8358257590765e-06, "loss": 16.4603, "step": 149750 }, { "epoch": 0.30252467507282327, "grad_norm": 460.479248046875, "learning_rate": 8.835601841567874e-06, "loss": 18.994, "step": 149760 }, { "epoch": 0.3025448757055071, "grad_norm": 961.6781616210938, "learning_rate": 8.835377905364992e-06, "loss": 11.2767, "step": 149770 }, { "epoch": 0.3025650763381909, "grad_norm": 1103.5975341796875, "learning_rate": 8.835153950468943e-06, "loss": 45.3997, "step": 149780 }, { "epoch": 0.30258527697087473, "grad_norm": 306.8983459472656, "learning_rate": 8.834929976880818e-06, "loss": 22.383, "step": 149790 }, { "epoch": 0.30260547760355855, "grad_norm": 438.7554016113281, "learning_rate": 8.834705984601708e-06, "loss": 15.0056, "step": 149800 }, { "epoch": 0.30262567823624237, "grad_norm": 559.1272583007812, "learning_rate": 8.834481973632709e-06, "loss": 27.3405, "step": 149810 }, { "epoch": 0.3026458788689262, "grad_norm": 867.9369506835938, "learning_rate": 8.83425794397491e-06, "loss": 19.3661, "step": 149820 }, { "epoch": 0.30266607950161, "grad_norm": 116.16118621826172, "learning_rate": 8.8340338956294e-06, "loss": 21.6849, "step": 149830 }, { "epoch": 0.30268628013429383, "grad_norm": 685.396240234375, "learning_rate": 8.833809828597275e-06, "loss": 10.8325, "step": 149840 }, { "epoch": 0.30270648076697765, "grad_norm": 608.677978515625, "learning_rate": 8.833585742879627e-06, "loss": 22.01, "step": 149850 }, { "epoch": 0.3027266813996614, "grad_norm": 234.95603942871094, "learning_rate": 8.833361638477546e-06, "loss": 25.5733, "step": 149860 }, { "epoch": 0.30274688203234523, "grad_norm": 517.9550170898438, "learning_rate": 8.833137515392125e-06, "loss": 52.3825, "step": 149870 }, { "epoch": 0.30276708266502905, "grad_norm": 1143.35498046875, "learning_rate": 8.832913373624458e-06, "loss": 25.8712, "step": 149880 }, { "epoch": 0.3027872832977129, "grad_norm": 573.56396484375, "learning_rate": 8.832689213175636e-06, "loss": 25.1794, "step": 149890 }, { "epoch": 0.3028074839303967, "grad_norm": 238.27435302734375, "learning_rate": 8.83246503404675e-06, "loss": 17.7551, "step": 149900 }, { "epoch": 0.3028276845630805, "grad_norm": 483.5244445800781, "learning_rate": 8.832240836238894e-06, "loss": 24.2964, "step": 149910 }, { "epoch": 0.30284788519576433, "grad_norm": 274.0011291503906, "learning_rate": 8.832016619753164e-06, "loss": 30.2346, "step": 149920 }, { "epoch": 0.30286808582844815, "grad_norm": 139.33047485351562, "learning_rate": 8.831792384590646e-06, "loss": 18.8885, "step": 149930 }, { "epoch": 0.302888286461132, "grad_norm": 232.16989135742188, "learning_rate": 8.831568130752439e-06, "loss": 21.3329, "step": 149940 }, { "epoch": 0.3029084870938158, "grad_norm": 214.23403930664062, "learning_rate": 8.831343858239634e-06, "loss": 10.0199, "step": 149950 }, { "epoch": 0.3029286877264996, "grad_norm": 383.21649169921875, "learning_rate": 8.831119567053323e-06, "loss": 21.5086, "step": 149960 }, { "epoch": 0.30294888835918343, "grad_norm": 685.4547119140625, "learning_rate": 8.8308952571946e-06, "loss": 30.082, "step": 149970 }, { "epoch": 0.30296908899186725, "grad_norm": 372.8331298828125, "learning_rate": 8.830670928664558e-06, "loss": 19.3065, "step": 149980 }, { "epoch": 0.302989289624551, "grad_norm": 211.326904296875, "learning_rate": 8.83044658146429e-06, "loss": 20.4998, "step": 149990 }, { "epoch": 0.30300949025723484, "grad_norm": 581.1596069335938, "learning_rate": 8.83022221559489e-06, "loss": 16.4117, "step": 150000 }, { "epoch": 0.30302969088991866, "grad_norm": 486.1495056152344, "learning_rate": 8.829997831057454e-06, "loss": 33.2905, "step": 150010 }, { "epoch": 0.3030498915226025, "grad_norm": 880.4935302734375, "learning_rate": 8.82977342785307e-06, "loss": 48.0187, "step": 150020 }, { "epoch": 0.3030700921552863, "grad_norm": 184.04718017578125, "learning_rate": 8.829549005982836e-06, "loss": 24.734, "step": 150030 }, { "epoch": 0.3030902927879701, "grad_norm": 627.1402587890625, "learning_rate": 8.829324565447844e-06, "loss": 14.1204, "step": 150040 }, { "epoch": 0.30311049342065394, "grad_norm": 205.56370544433594, "learning_rate": 8.829100106249189e-06, "loss": 21.3699, "step": 150050 }, { "epoch": 0.30313069405333776, "grad_norm": 785.6224975585938, "learning_rate": 8.828875628387964e-06, "loss": 19.8302, "step": 150060 }, { "epoch": 0.3031508946860216, "grad_norm": 745.4777221679688, "learning_rate": 8.828651131865264e-06, "loss": 11.0934, "step": 150070 }, { "epoch": 0.3031710953187054, "grad_norm": 432.5574035644531, "learning_rate": 8.828426616682184e-06, "loss": 25.6614, "step": 150080 }, { "epoch": 0.3031912959513892, "grad_norm": 634.8599243164062, "learning_rate": 8.828202082839816e-06, "loss": 30.2482, "step": 150090 }, { "epoch": 0.30321149658407304, "grad_norm": 481.1977233886719, "learning_rate": 8.827977530339254e-06, "loss": 15.1406, "step": 150100 }, { "epoch": 0.3032316972167568, "grad_norm": 526.1364135742188, "learning_rate": 8.827752959181594e-06, "loss": 31.77, "step": 150110 }, { "epoch": 0.3032518978494406, "grad_norm": 54.602298736572266, "learning_rate": 8.827528369367932e-06, "loss": 20.0991, "step": 150120 }, { "epoch": 0.30327209848212444, "grad_norm": 231.0094757080078, "learning_rate": 8.82730376089936e-06, "loss": 34.4267, "step": 150130 }, { "epoch": 0.30329229911480826, "grad_norm": 710.7511596679688, "learning_rate": 8.827079133776975e-06, "loss": 43.0145, "step": 150140 }, { "epoch": 0.3033124997474921, "grad_norm": 449.840576171875, "learning_rate": 8.826854488001869e-06, "loss": 21.4409, "step": 150150 }, { "epoch": 0.3033327003801759, "grad_norm": 290.668212890625, "learning_rate": 8.826629823575138e-06, "loss": 19.9206, "step": 150160 }, { "epoch": 0.3033529010128597, "grad_norm": 330.00701904296875, "learning_rate": 8.826405140497878e-06, "loss": 19.1429, "step": 150170 }, { "epoch": 0.30337310164554354, "grad_norm": 497.4096984863281, "learning_rate": 8.826180438771184e-06, "loss": 12.3778, "step": 150180 }, { "epoch": 0.30339330227822736, "grad_norm": 344.986572265625, "learning_rate": 8.82595571839615e-06, "loss": 19.5334, "step": 150190 }, { "epoch": 0.3034135029109112, "grad_norm": 323.72918701171875, "learning_rate": 8.825730979373873e-06, "loss": 21.6462, "step": 150200 }, { "epoch": 0.303433703543595, "grad_norm": 276.95745849609375, "learning_rate": 8.825506221705445e-06, "loss": 30.4611, "step": 150210 }, { "epoch": 0.3034539041762788, "grad_norm": 318.9707336425781, "learning_rate": 8.825281445391965e-06, "loss": 29.1202, "step": 150220 }, { "epoch": 0.30347410480896264, "grad_norm": 257.0148010253906, "learning_rate": 8.825056650434528e-06, "loss": 31.8865, "step": 150230 }, { "epoch": 0.3034943054416464, "grad_norm": 279.8083801269531, "learning_rate": 8.824831836834227e-06, "loss": 41.3482, "step": 150240 }, { "epoch": 0.3035145060743302, "grad_norm": 881.94580078125, "learning_rate": 8.824607004592161e-06, "loss": 40.3344, "step": 150250 }, { "epoch": 0.30353470670701405, "grad_norm": 513.8170776367188, "learning_rate": 8.824382153709423e-06, "loss": 10.2271, "step": 150260 }, { "epoch": 0.30355490733969787, "grad_norm": 231.94570922851562, "learning_rate": 8.824157284187111e-06, "loss": 12.5738, "step": 150270 }, { "epoch": 0.3035751079723817, "grad_norm": 339.27716064453125, "learning_rate": 8.82393239602632e-06, "loss": 15.0772, "step": 150280 }, { "epoch": 0.3035953086050655, "grad_norm": 293.11962890625, "learning_rate": 8.823707489228145e-06, "loss": 27.0696, "step": 150290 }, { "epoch": 0.3036155092377493, "grad_norm": 374.0193176269531, "learning_rate": 8.823482563793687e-06, "loss": 22.8484, "step": 150300 }, { "epoch": 0.30363570987043315, "grad_norm": 483.96160888671875, "learning_rate": 8.823257619724036e-06, "loss": 15.1993, "step": 150310 }, { "epoch": 0.30365591050311697, "grad_norm": 179.94659423828125, "learning_rate": 8.82303265702029e-06, "loss": 11.1126, "step": 150320 }, { "epoch": 0.3036761111358008, "grad_norm": 181.8113555908203, "learning_rate": 8.82280767568355e-06, "loss": 13.8506, "step": 150330 }, { "epoch": 0.3036963117684846, "grad_norm": 278.054931640625, "learning_rate": 8.822582675714906e-06, "loss": 29.6194, "step": 150340 }, { "epoch": 0.3037165124011684, "grad_norm": 674.2128295898438, "learning_rate": 8.82235765711546e-06, "loss": 32.5813, "step": 150350 }, { "epoch": 0.30373671303385225, "grad_norm": 388.39276123046875, "learning_rate": 8.822132619886303e-06, "loss": 14.978, "step": 150360 }, { "epoch": 0.303756913666536, "grad_norm": 140.5186309814453, "learning_rate": 8.821907564028538e-06, "loss": 16.2977, "step": 150370 }, { "epoch": 0.30377711429921983, "grad_norm": 503.03759765625, "learning_rate": 8.821682489543259e-06, "loss": 42.961, "step": 150380 }, { "epoch": 0.30379731493190365, "grad_norm": 1607.3330078125, "learning_rate": 8.821457396431563e-06, "loss": 36.7542, "step": 150390 }, { "epoch": 0.30381751556458747, "grad_norm": 450.153564453125, "learning_rate": 8.821232284694545e-06, "loss": 13.293, "step": 150400 }, { "epoch": 0.3038377161972713, "grad_norm": 173.54832458496094, "learning_rate": 8.821007154333308e-06, "loss": 11.8466, "step": 150410 }, { "epoch": 0.3038579168299551, "grad_norm": 55.469947814941406, "learning_rate": 8.820782005348943e-06, "loss": 36.09, "step": 150420 }, { "epoch": 0.30387811746263893, "grad_norm": 509.9010009765625, "learning_rate": 8.82055683774255e-06, "loss": 21.5323, "step": 150430 }, { "epoch": 0.30389831809532275, "grad_norm": 361.66937255859375, "learning_rate": 8.820331651515226e-06, "loss": 24.5503, "step": 150440 }, { "epoch": 0.30391851872800657, "grad_norm": 479.13800048828125, "learning_rate": 8.820106446668071e-06, "loss": 10.7529, "step": 150450 }, { "epoch": 0.3039387193606904, "grad_norm": 371.19537353515625, "learning_rate": 8.819881223202179e-06, "loss": 24.8704, "step": 150460 }, { "epoch": 0.3039589199933742, "grad_norm": 139.8623046875, "learning_rate": 8.819655981118649e-06, "loss": 24.5305, "step": 150470 }, { "epoch": 0.30397912062605803, "grad_norm": 502.39508056640625, "learning_rate": 8.819430720418579e-06, "loss": 27.0709, "step": 150480 }, { "epoch": 0.30399932125874185, "grad_norm": 18.935550689697266, "learning_rate": 8.819205441103067e-06, "loss": 8.4231, "step": 150490 }, { "epoch": 0.3040195218914256, "grad_norm": 813.5499877929688, "learning_rate": 8.818980143173212e-06, "loss": 19.9964, "step": 150500 }, { "epoch": 0.30403972252410943, "grad_norm": 372.073486328125, "learning_rate": 8.818754826630109e-06, "loss": 16.9529, "step": 150510 }, { "epoch": 0.30405992315679325, "grad_norm": 553.2196044921875, "learning_rate": 8.81852949147486e-06, "loss": 23.6812, "step": 150520 }, { "epoch": 0.3040801237894771, "grad_norm": 510.5747375488281, "learning_rate": 8.81830413770856e-06, "loss": 16.7492, "step": 150530 }, { "epoch": 0.3041003244221609, "grad_norm": 426.5841369628906, "learning_rate": 8.818078765332309e-06, "loss": 19.8778, "step": 150540 }, { "epoch": 0.3041205250548447, "grad_norm": 267.7163391113281, "learning_rate": 8.817853374347208e-06, "loss": 14.2025, "step": 150550 }, { "epoch": 0.30414072568752853, "grad_norm": 299.236328125, "learning_rate": 8.81762796475435e-06, "loss": 21.0008, "step": 150560 }, { "epoch": 0.30416092632021235, "grad_norm": 1006.8319702148438, "learning_rate": 8.817402536554835e-06, "loss": 25.6188, "step": 150570 }, { "epoch": 0.3041811269528962, "grad_norm": 292.3677062988281, "learning_rate": 8.817177089749766e-06, "loss": 17.9441, "step": 150580 }, { "epoch": 0.30420132758558, "grad_norm": 192.40882873535156, "learning_rate": 8.816951624340238e-06, "loss": 20.5989, "step": 150590 }, { "epoch": 0.3042215282182638, "grad_norm": 404.0854797363281, "learning_rate": 8.81672614032735e-06, "loss": 7.8247, "step": 150600 }, { "epoch": 0.30424172885094763, "grad_norm": 563.6555786132812, "learning_rate": 8.816500637712201e-06, "loss": 22.9061, "step": 150610 }, { "epoch": 0.30426192948363145, "grad_norm": 723.2141723632812, "learning_rate": 8.816275116495891e-06, "loss": 22.3131, "step": 150620 }, { "epoch": 0.3042821301163152, "grad_norm": 294.1592712402344, "learning_rate": 8.816049576679521e-06, "loss": 15.9305, "step": 150630 }, { "epoch": 0.30430233074899904, "grad_norm": 740.9974975585938, "learning_rate": 8.815824018264187e-06, "loss": 20.614, "step": 150640 }, { "epoch": 0.30432253138168286, "grad_norm": 455.1861572265625, "learning_rate": 8.815598441250987e-06, "loss": 15.316, "step": 150650 }, { "epoch": 0.3043427320143667, "grad_norm": 623.273193359375, "learning_rate": 8.815372845641027e-06, "loss": 24.1247, "step": 150660 }, { "epoch": 0.3043629326470505, "grad_norm": 423.2725830078125, "learning_rate": 8.815147231435402e-06, "loss": 19.7505, "step": 150670 }, { "epoch": 0.3043831332797343, "grad_norm": 416.5577392578125, "learning_rate": 8.81492159863521e-06, "loss": 26.7691, "step": 150680 }, { "epoch": 0.30440333391241814, "grad_norm": 410.5572509765625, "learning_rate": 8.814695947241555e-06, "loss": 20.1439, "step": 150690 }, { "epoch": 0.30442353454510196, "grad_norm": 1430.1671142578125, "learning_rate": 8.814470277255532e-06, "loss": 29.6562, "step": 150700 }, { "epoch": 0.3044437351777858, "grad_norm": 297.33447265625, "learning_rate": 8.814244588678245e-06, "loss": 24.6234, "step": 150710 }, { "epoch": 0.3044639358104696, "grad_norm": 442.55377197265625, "learning_rate": 8.814018881510793e-06, "loss": 25.6873, "step": 150720 }, { "epoch": 0.3044841364431534, "grad_norm": 284.0892639160156, "learning_rate": 8.813793155754273e-06, "loss": 21.4096, "step": 150730 }, { "epoch": 0.30450433707583724, "grad_norm": 378.8963317871094, "learning_rate": 8.81356741140979e-06, "loss": 31.793, "step": 150740 }, { "epoch": 0.304524537708521, "grad_norm": 391.635498046875, "learning_rate": 8.813341648478443e-06, "loss": 17.9829, "step": 150750 }, { "epoch": 0.3045447383412048, "grad_norm": 624.3565673828125, "learning_rate": 8.81311586696133e-06, "loss": 33.0778, "step": 150760 }, { "epoch": 0.30456493897388864, "grad_norm": 898.7164306640625, "learning_rate": 8.812890066859552e-06, "loss": 39.0056, "step": 150770 }, { "epoch": 0.30458513960657246, "grad_norm": 335.1961669921875, "learning_rate": 8.81266424817421e-06, "loss": 17.4489, "step": 150780 }, { "epoch": 0.3046053402392563, "grad_norm": 150.5221710205078, "learning_rate": 8.812438410906407e-06, "loss": 17.4007, "step": 150790 }, { "epoch": 0.3046255408719401, "grad_norm": 478.2091064453125, "learning_rate": 8.81221255505724e-06, "loss": 30.3314, "step": 150800 }, { "epoch": 0.3046457415046239, "grad_norm": 200.3534393310547, "learning_rate": 8.811986680627812e-06, "loss": 17.8803, "step": 150810 }, { "epoch": 0.30466594213730774, "grad_norm": 235.67787170410156, "learning_rate": 8.811760787619224e-06, "loss": 29.5374, "step": 150820 }, { "epoch": 0.30468614276999156, "grad_norm": 124.64483642578125, "learning_rate": 8.811534876032575e-06, "loss": 23.3495, "step": 150830 }, { "epoch": 0.3047063434026754, "grad_norm": 801.1688232421875, "learning_rate": 8.811308945868966e-06, "loss": 15.842, "step": 150840 }, { "epoch": 0.3047265440353592, "grad_norm": 195.32472229003906, "learning_rate": 8.811082997129501e-06, "loss": 17.8216, "step": 150850 }, { "epoch": 0.304746744668043, "grad_norm": 332.58074951171875, "learning_rate": 8.81085702981528e-06, "loss": 17.0482, "step": 150860 }, { "epoch": 0.30476694530072684, "grad_norm": 864.2814331054688, "learning_rate": 8.810631043927405e-06, "loss": 21.3788, "step": 150870 }, { "epoch": 0.3047871459334106, "grad_norm": 423.365478515625, "learning_rate": 8.810405039466973e-06, "loss": 30.1768, "step": 150880 }, { "epoch": 0.30480734656609443, "grad_norm": 341.9500732421875, "learning_rate": 8.810179016435092e-06, "loss": 20.8337, "step": 150890 }, { "epoch": 0.30482754719877825, "grad_norm": 575.5938110351562, "learning_rate": 8.80995297483286e-06, "loss": 31.6785, "step": 150900 }, { "epoch": 0.30484774783146207, "grad_norm": 519.7661743164062, "learning_rate": 8.80972691466138e-06, "loss": 21.3529, "step": 150910 }, { "epoch": 0.3048679484641459, "grad_norm": 451.3832092285156, "learning_rate": 8.809500835921751e-06, "loss": 41.5301, "step": 150920 }, { "epoch": 0.3048881490968297, "grad_norm": 420.1068420410156, "learning_rate": 8.809274738615079e-06, "loss": 11.2938, "step": 150930 }, { "epoch": 0.30490834972951353, "grad_norm": 539.9143676757812, "learning_rate": 8.809048622742463e-06, "loss": 9.6369, "step": 150940 }, { "epoch": 0.30492855036219735, "grad_norm": 185.07460021972656, "learning_rate": 8.808822488305005e-06, "loss": 16.2636, "step": 150950 }, { "epoch": 0.30494875099488117, "grad_norm": 300.67254638671875, "learning_rate": 8.80859633530381e-06, "loss": 22.1122, "step": 150960 }, { "epoch": 0.304968951627565, "grad_norm": 820.0911254882812, "learning_rate": 8.808370163739978e-06, "loss": 26.7831, "step": 150970 }, { "epoch": 0.3049891522602488, "grad_norm": 120.84183502197266, "learning_rate": 8.808143973614612e-06, "loss": 10.9375, "step": 150980 }, { "epoch": 0.30500935289293263, "grad_norm": 223.33592224121094, "learning_rate": 8.807917764928813e-06, "loss": 17.7902, "step": 150990 }, { "epoch": 0.30502955352561645, "grad_norm": 956.6483764648438, "learning_rate": 8.807691537683685e-06, "loss": 48.7926, "step": 151000 }, { "epoch": 0.3050497541583002, "grad_norm": 292.8885498046875, "learning_rate": 8.807465291880331e-06, "loss": 47.7859, "step": 151010 }, { "epoch": 0.30506995479098403, "grad_norm": 285.24810791015625, "learning_rate": 8.807239027519852e-06, "loss": 11.9052, "step": 151020 }, { "epoch": 0.30509015542366785, "grad_norm": 325.638427734375, "learning_rate": 8.807012744603352e-06, "loss": 11.6426, "step": 151030 }, { "epoch": 0.3051103560563517, "grad_norm": 344.9587707519531, "learning_rate": 8.806786443131932e-06, "loss": 25.0543, "step": 151040 }, { "epoch": 0.3051305566890355, "grad_norm": 575.2521362304688, "learning_rate": 8.8065601231067e-06, "loss": 14.5129, "step": 151050 }, { "epoch": 0.3051507573217193, "grad_norm": 277.29754638671875, "learning_rate": 8.806333784528754e-06, "loss": 27.8048, "step": 151060 }, { "epoch": 0.30517095795440313, "grad_norm": 1071.9381103515625, "learning_rate": 8.806107427399198e-06, "loss": 21.264, "step": 151070 }, { "epoch": 0.30519115858708695, "grad_norm": 700.8414916992188, "learning_rate": 8.805881051719137e-06, "loss": 27.9538, "step": 151080 }, { "epoch": 0.3052113592197708, "grad_norm": 1784.7879638671875, "learning_rate": 8.805654657489673e-06, "loss": 21.2077, "step": 151090 }, { "epoch": 0.3052315598524546, "grad_norm": 364.26922607421875, "learning_rate": 8.80542824471191e-06, "loss": 33.0699, "step": 151100 }, { "epoch": 0.3052517604851384, "grad_norm": 417.54669189453125, "learning_rate": 8.80520181338695e-06, "loss": 15.0805, "step": 151110 }, { "epoch": 0.30527196111782223, "grad_norm": 330.0821533203125, "learning_rate": 8.8049753635159e-06, "loss": 37.3532, "step": 151120 }, { "epoch": 0.30529216175050605, "grad_norm": 614.7396850585938, "learning_rate": 8.80474889509986e-06, "loss": 13.9208, "step": 151130 }, { "epoch": 0.3053123623831898, "grad_norm": 488.34124755859375, "learning_rate": 8.804522408139936e-06, "loss": 24.2354, "step": 151140 }, { "epoch": 0.30533256301587364, "grad_norm": 731.0759887695312, "learning_rate": 8.804295902637233e-06, "loss": 13.7202, "step": 151150 }, { "epoch": 0.30535276364855746, "grad_norm": 26.747020721435547, "learning_rate": 8.80406937859285e-06, "loss": 10.319, "step": 151160 }, { "epoch": 0.3053729642812413, "grad_norm": 245.74026489257812, "learning_rate": 8.803842836007895e-06, "loss": 13.8397, "step": 151170 }, { "epoch": 0.3053931649139251, "grad_norm": 172.47738647460938, "learning_rate": 8.803616274883473e-06, "loss": 26.6136, "step": 151180 }, { "epoch": 0.3054133655466089, "grad_norm": 51.60191345214844, "learning_rate": 8.803389695220685e-06, "loss": 18.4662, "step": 151190 }, { "epoch": 0.30543356617929274, "grad_norm": 185.0488739013672, "learning_rate": 8.803163097020637e-06, "loss": 17.8821, "step": 151200 }, { "epoch": 0.30545376681197656, "grad_norm": 311.64788818359375, "learning_rate": 8.802936480284434e-06, "loss": 16.2108, "step": 151210 }, { "epoch": 0.3054739674446604, "grad_norm": 318.5120544433594, "learning_rate": 8.80270984501318e-06, "loss": 30.7696, "step": 151220 }, { "epoch": 0.3054941680773442, "grad_norm": 144.05270385742188, "learning_rate": 8.802483191207978e-06, "loss": 19.6995, "step": 151230 }, { "epoch": 0.305514368710028, "grad_norm": 172.28659057617188, "learning_rate": 8.802256518869935e-06, "loss": 23.231, "step": 151240 }, { "epoch": 0.30553456934271184, "grad_norm": 370.2278747558594, "learning_rate": 8.802029828000157e-06, "loss": 14.2334, "step": 151250 }, { "epoch": 0.30555476997539566, "grad_norm": 341.0437927246094, "learning_rate": 8.801803118599743e-06, "loss": 12.3321, "step": 151260 }, { "epoch": 0.3055749706080794, "grad_norm": 330.0649108886719, "learning_rate": 8.801576390669803e-06, "loss": 27.4561, "step": 151270 }, { "epoch": 0.30559517124076324, "grad_norm": 393.53973388671875, "learning_rate": 8.801349644211442e-06, "loss": 33.3796, "step": 151280 }, { "epoch": 0.30561537187344706, "grad_norm": 719.0829467773438, "learning_rate": 8.801122879225762e-06, "loss": 30.0677, "step": 151290 }, { "epoch": 0.3056355725061309, "grad_norm": 5.776564121246338, "learning_rate": 8.80089609571387e-06, "loss": 23.2832, "step": 151300 }, { "epoch": 0.3056557731388147, "grad_norm": 748.0703735351562, "learning_rate": 8.800669293676876e-06, "loss": 17.6918, "step": 151310 }, { "epoch": 0.3056759737714985, "grad_norm": 338.00482177734375, "learning_rate": 8.800442473115877e-06, "loss": 24.5486, "step": 151320 }, { "epoch": 0.30569617440418234, "grad_norm": 414.44073486328125, "learning_rate": 8.800215634031983e-06, "loss": 23.3791, "step": 151330 }, { "epoch": 0.30571637503686616, "grad_norm": 427.2399597167969, "learning_rate": 8.799988776426298e-06, "loss": 17.1218, "step": 151340 }, { "epoch": 0.30573657566955, "grad_norm": 15.36292839050293, "learning_rate": 8.799761900299929e-06, "loss": 24.8374, "step": 151350 }, { "epoch": 0.3057567763022338, "grad_norm": 346.0489196777344, "learning_rate": 8.799535005653982e-06, "loss": 17.9352, "step": 151360 }, { "epoch": 0.3057769769349176, "grad_norm": 172.96986389160156, "learning_rate": 8.799308092489561e-06, "loss": 28.0686, "step": 151370 }, { "epoch": 0.30579717756760144, "grad_norm": 473.66351318359375, "learning_rate": 8.799081160807773e-06, "loss": 19.6241, "step": 151380 }, { "epoch": 0.3058173782002852, "grad_norm": 510.2415771484375, "learning_rate": 8.798854210609727e-06, "loss": 19.7143, "step": 151390 }, { "epoch": 0.305837578832969, "grad_norm": 299.6713562011719, "learning_rate": 8.798627241896524e-06, "loss": 14.5041, "step": 151400 }, { "epoch": 0.30585777946565285, "grad_norm": 21.89015769958496, "learning_rate": 8.798400254669272e-06, "loss": 16.8085, "step": 151410 }, { "epoch": 0.30587798009833667, "grad_norm": 638.985107421875, "learning_rate": 8.798173248929081e-06, "loss": 14.6222, "step": 151420 }, { "epoch": 0.3058981807310205, "grad_norm": 0.38278064131736755, "learning_rate": 8.797946224677051e-06, "loss": 27.8758, "step": 151430 }, { "epoch": 0.3059183813637043, "grad_norm": 278.8282775878906, "learning_rate": 8.797719181914292e-06, "loss": 13.1075, "step": 151440 }, { "epoch": 0.3059385819963881, "grad_norm": 368.1073913574219, "learning_rate": 8.797492120641913e-06, "loss": 25.1111, "step": 151450 }, { "epoch": 0.30595878262907195, "grad_norm": 422.73663330078125, "learning_rate": 8.797265040861016e-06, "loss": 28.2203, "step": 151460 }, { "epoch": 0.30597898326175577, "grad_norm": 501.1971130371094, "learning_rate": 8.79703794257271e-06, "loss": 21.7, "step": 151470 }, { "epoch": 0.3059991838944396, "grad_norm": 15.982429504394531, "learning_rate": 8.796810825778101e-06, "loss": 17.1545, "step": 151480 }, { "epoch": 0.3060193845271234, "grad_norm": 247.7742462158203, "learning_rate": 8.796583690478297e-06, "loss": 25.6396, "step": 151490 }, { "epoch": 0.3060395851598072, "grad_norm": 537.6829223632812, "learning_rate": 8.796356536674404e-06, "loss": 14.4362, "step": 151500 }, { "epoch": 0.30605978579249105, "grad_norm": 227.258056640625, "learning_rate": 8.796129364367532e-06, "loss": 29.386, "step": 151510 }, { "epoch": 0.3060799864251748, "grad_norm": 475.4372253417969, "learning_rate": 8.795902173558784e-06, "loss": 24.9269, "step": 151520 }, { "epoch": 0.30610018705785863, "grad_norm": 557.894775390625, "learning_rate": 8.79567496424927e-06, "loss": 27.5515, "step": 151530 }, { "epoch": 0.30612038769054245, "grad_norm": 847.9214477539062, "learning_rate": 8.795447736440095e-06, "loss": 24.6747, "step": 151540 }, { "epoch": 0.30614058832322627, "grad_norm": 187.60552978515625, "learning_rate": 8.795220490132369e-06, "loss": 14.0796, "step": 151550 }, { "epoch": 0.3061607889559101, "grad_norm": 518.7286987304688, "learning_rate": 8.794993225327199e-06, "loss": 27.9041, "step": 151560 }, { "epoch": 0.3061809895885939, "grad_norm": 2.9284021854400635, "learning_rate": 8.794765942025692e-06, "loss": 11.9497, "step": 151570 }, { "epoch": 0.30620119022127773, "grad_norm": 610.6417846679688, "learning_rate": 8.794538640228956e-06, "loss": 49.2123, "step": 151580 }, { "epoch": 0.30622139085396155, "grad_norm": 422.1538391113281, "learning_rate": 8.794311319938098e-06, "loss": 31.4757, "step": 151590 }, { "epoch": 0.30624159148664537, "grad_norm": 586.0907592773438, "learning_rate": 8.794083981154229e-06, "loss": 45.8676, "step": 151600 }, { "epoch": 0.3062617921193292, "grad_norm": 522.1445922851562, "learning_rate": 8.793856623878453e-06, "loss": 28.3242, "step": 151610 }, { "epoch": 0.306281992752013, "grad_norm": 53.978363037109375, "learning_rate": 8.79362924811188e-06, "loss": 41.9327, "step": 151620 }, { "epoch": 0.30630219338469683, "grad_norm": 277.22320556640625, "learning_rate": 8.793401853855619e-06, "loss": 23.9439, "step": 151630 }, { "epoch": 0.30632239401738065, "grad_norm": 217.17184448242188, "learning_rate": 8.793174441110777e-06, "loss": 25.4111, "step": 151640 }, { "epoch": 0.3063425946500644, "grad_norm": 107.39692687988281, "learning_rate": 8.792947009878463e-06, "loss": 18.2247, "step": 151650 }, { "epoch": 0.30636279528274823, "grad_norm": 936.0779418945312, "learning_rate": 8.792719560159786e-06, "loss": 41.7242, "step": 151660 }, { "epoch": 0.30638299591543205, "grad_norm": 123.1182861328125, "learning_rate": 8.792492091955852e-06, "loss": 16.1218, "step": 151670 }, { "epoch": 0.3064031965481159, "grad_norm": 173.78582763671875, "learning_rate": 8.792264605267772e-06, "loss": 15.1619, "step": 151680 }, { "epoch": 0.3064233971807997, "grad_norm": 304.4840393066406, "learning_rate": 8.792037100096656e-06, "loss": 24.324, "step": 151690 }, { "epoch": 0.3064435978134835, "grad_norm": 816.9924926757812, "learning_rate": 8.791809576443611e-06, "loss": 19.6709, "step": 151700 }, { "epoch": 0.30646379844616733, "grad_norm": 411.3579406738281, "learning_rate": 8.791582034309745e-06, "loss": 35.6563, "step": 151710 }, { "epoch": 0.30648399907885115, "grad_norm": 760.7244262695312, "learning_rate": 8.791354473696167e-06, "loss": 27.6784, "step": 151720 }, { "epoch": 0.306504199711535, "grad_norm": 303.0555114746094, "learning_rate": 8.791126894603987e-06, "loss": 16.0399, "step": 151730 }, { "epoch": 0.3065244003442188, "grad_norm": 1149.0203857421875, "learning_rate": 8.790899297034317e-06, "loss": 18.7669, "step": 151740 }, { "epoch": 0.3065446009769026, "grad_norm": 374.638671875, "learning_rate": 8.790671680988261e-06, "loss": 10.4598, "step": 151750 }, { "epoch": 0.30656480160958643, "grad_norm": 321.0159606933594, "learning_rate": 8.790444046466933e-06, "loss": 25.1233, "step": 151760 }, { "epoch": 0.30658500224227025, "grad_norm": 115.95552062988281, "learning_rate": 8.79021639347144e-06, "loss": 31.6759, "step": 151770 }, { "epoch": 0.306605202874954, "grad_norm": 172.00579833984375, "learning_rate": 8.789988722002891e-06, "loss": 11.6271, "step": 151780 }, { "epoch": 0.30662540350763784, "grad_norm": 172.10379028320312, "learning_rate": 8.789761032062398e-06, "loss": 14.2386, "step": 151790 }, { "epoch": 0.30664560414032166, "grad_norm": 762.5750122070312, "learning_rate": 8.789533323651067e-06, "loss": 19.338, "step": 151800 }, { "epoch": 0.3066658047730055, "grad_norm": 392.90447998046875, "learning_rate": 8.789305596770013e-06, "loss": 10.5755, "step": 151810 }, { "epoch": 0.3066860054056893, "grad_norm": 707.3402099609375, "learning_rate": 8.789077851420341e-06, "loss": 13.1562, "step": 151820 }, { "epoch": 0.3067062060383731, "grad_norm": 517.05419921875, "learning_rate": 8.788850087603164e-06, "loss": 24.0082, "step": 151830 }, { "epoch": 0.30672640667105694, "grad_norm": 438.8190612792969, "learning_rate": 8.788622305319591e-06, "loss": 26.8024, "step": 151840 }, { "epoch": 0.30674660730374076, "grad_norm": 289.7139892578125, "learning_rate": 8.788394504570732e-06, "loss": 20.7359, "step": 151850 }, { "epoch": 0.3067668079364246, "grad_norm": 459.5687561035156, "learning_rate": 8.7881666853577e-06, "loss": 18.1115, "step": 151860 }, { "epoch": 0.3067870085691084, "grad_norm": 904.3475341796875, "learning_rate": 8.7879388476816e-06, "loss": 19.3853, "step": 151870 }, { "epoch": 0.3068072092017922, "grad_norm": 365.3310241699219, "learning_rate": 8.787710991543547e-06, "loss": 28.7085, "step": 151880 }, { "epoch": 0.30682740983447604, "grad_norm": 716.1660766601562, "learning_rate": 8.78748311694465e-06, "loss": 19.3352, "step": 151890 }, { "epoch": 0.3068476104671598, "grad_norm": 482.2041931152344, "learning_rate": 8.78725522388602e-06, "loss": 37.9838, "step": 151900 }, { "epoch": 0.3068678110998436, "grad_norm": 664.7199096679688, "learning_rate": 8.787027312368766e-06, "loss": 30.4015, "step": 151910 }, { "epoch": 0.30688801173252744, "grad_norm": 329.94268798828125, "learning_rate": 8.786799382394e-06, "loss": 17.9445, "step": 151920 }, { "epoch": 0.30690821236521126, "grad_norm": 534.4207153320312, "learning_rate": 8.786571433962837e-06, "loss": 19.7566, "step": 151930 }, { "epoch": 0.3069284129978951, "grad_norm": 233.0865936279297, "learning_rate": 8.78634346707638e-06, "loss": 13.216, "step": 151940 }, { "epoch": 0.3069486136305789, "grad_norm": 622.2435913085938, "learning_rate": 8.786115481735745e-06, "loss": 15.6773, "step": 151950 }, { "epoch": 0.3069688142632627, "grad_norm": 414.759521484375, "learning_rate": 8.785887477942041e-06, "loss": 31.4934, "step": 151960 }, { "epoch": 0.30698901489594654, "grad_norm": 375.6351013183594, "learning_rate": 8.785659455696384e-06, "loss": 32.9653, "step": 151970 }, { "epoch": 0.30700921552863036, "grad_norm": 329.70428466796875, "learning_rate": 8.78543141499988e-06, "loss": 19.4679, "step": 151980 }, { "epoch": 0.3070294161613142, "grad_norm": 582.3593139648438, "learning_rate": 8.785203355853642e-06, "loss": 27.1528, "step": 151990 }, { "epoch": 0.307049616793998, "grad_norm": 520.7802734375, "learning_rate": 8.784975278258783e-06, "loss": 28.7186, "step": 152000 }, { "epoch": 0.3070698174266818, "grad_norm": 485.47332763671875, "learning_rate": 8.784747182216414e-06, "loss": 23.6981, "step": 152010 }, { "epoch": 0.30709001805936564, "grad_norm": 574.9630126953125, "learning_rate": 8.784519067727644e-06, "loss": 24.5485, "step": 152020 }, { "epoch": 0.3071102186920494, "grad_norm": 237.2230224609375, "learning_rate": 8.78429093479359e-06, "loss": 17.3559, "step": 152030 }, { "epoch": 0.30713041932473323, "grad_norm": 459.49072265625, "learning_rate": 8.78406278341536e-06, "loss": 14.4572, "step": 152040 }, { "epoch": 0.30715061995741705, "grad_norm": 412.63922119140625, "learning_rate": 8.783834613594064e-06, "loss": 16.5848, "step": 152050 }, { "epoch": 0.30717082059010087, "grad_norm": 359.7923278808594, "learning_rate": 8.78360642533082e-06, "loss": 18.5688, "step": 152060 }, { "epoch": 0.3071910212227847, "grad_norm": 273.905029296875, "learning_rate": 8.783378218626737e-06, "loss": 21.732, "step": 152070 }, { "epoch": 0.3072112218554685, "grad_norm": 462.4580383300781, "learning_rate": 8.783149993482928e-06, "loss": 19.3913, "step": 152080 }, { "epoch": 0.30723142248815233, "grad_norm": 555.3651733398438, "learning_rate": 8.782921749900502e-06, "loss": 16.1707, "step": 152090 }, { "epoch": 0.30725162312083615, "grad_norm": 487.9097900390625, "learning_rate": 8.782693487880575e-06, "loss": 33.2753, "step": 152100 }, { "epoch": 0.30727182375351997, "grad_norm": 1059.037353515625, "learning_rate": 8.782465207424261e-06, "loss": 27.1952, "step": 152110 }, { "epoch": 0.3072920243862038, "grad_norm": 1091.01513671875, "learning_rate": 8.78223690853267e-06, "loss": 21.2001, "step": 152120 }, { "epoch": 0.3073122250188876, "grad_norm": 364.620361328125, "learning_rate": 8.782008591206914e-06, "loss": 20.7926, "step": 152130 }, { "epoch": 0.30733242565157143, "grad_norm": 443.7419738769531, "learning_rate": 8.781780255448106e-06, "loss": 27.453, "step": 152140 }, { "epoch": 0.30735262628425525, "grad_norm": 462.845458984375, "learning_rate": 8.78155190125736e-06, "loss": 21.5817, "step": 152150 }, { "epoch": 0.307372826916939, "grad_norm": 250.06491088867188, "learning_rate": 8.78132352863579e-06, "loss": 21.9177, "step": 152160 }, { "epoch": 0.30739302754962283, "grad_norm": 308.0704040527344, "learning_rate": 8.781095137584506e-06, "loss": 12.0368, "step": 152170 }, { "epoch": 0.30741322818230665, "grad_norm": 266.1795959472656, "learning_rate": 8.780866728104625e-06, "loss": 14.4003, "step": 152180 }, { "epoch": 0.3074334288149905, "grad_norm": 126.96919250488281, "learning_rate": 8.780638300197258e-06, "loss": 13.0788, "step": 152190 }, { "epoch": 0.3074536294476743, "grad_norm": 1097.9608154296875, "learning_rate": 8.780409853863517e-06, "loss": 35.0315, "step": 152200 }, { "epoch": 0.3074738300803581, "grad_norm": 452.70880126953125, "learning_rate": 8.780181389104516e-06, "loss": 20.7623, "step": 152210 }, { "epoch": 0.30749403071304193, "grad_norm": 370.7041320800781, "learning_rate": 8.779952905921372e-06, "loss": 25.4909, "step": 152220 }, { "epoch": 0.30751423134572575, "grad_norm": 190.17071533203125, "learning_rate": 8.779724404315195e-06, "loss": 18.3436, "step": 152230 }, { "epoch": 0.3075344319784096, "grad_norm": 550.6524047851562, "learning_rate": 8.779495884287099e-06, "loss": 32.8239, "step": 152240 }, { "epoch": 0.3075546326110934, "grad_norm": 292.2104187011719, "learning_rate": 8.779267345838198e-06, "loss": 38.8242, "step": 152250 }, { "epoch": 0.3075748332437772, "grad_norm": 514.6538696289062, "learning_rate": 8.779038788969607e-06, "loss": 29.647, "step": 152260 }, { "epoch": 0.30759503387646103, "grad_norm": 358.833740234375, "learning_rate": 8.77881021368244e-06, "loss": 16.6317, "step": 152270 }, { "epoch": 0.30761523450914485, "grad_norm": 269.1551818847656, "learning_rate": 8.778581619977811e-06, "loss": 23.7684, "step": 152280 }, { "epoch": 0.3076354351418286, "grad_norm": 448.9606018066406, "learning_rate": 8.778353007856832e-06, "loss": 18.1493, "step": 152290 }, { "epoch": 0.30765563577451244, "grad_norm": 321.6992492675781, "learning_rate": 8.778124377320619e-06, "loss": 21.9051, "step": 152300 }, { "epoch": 0.30767583640719626, "grad_norm": 842.9527587890625, "learning_rate": 8.777895728370285e-06, "loss": 39.2258, "step": 152310 }, { "epoch": 0.3076960370398801, "grad_norm": 870.618896484375, "learning_rate": 8.777667061006947e-06, "loss": 22.0667, "step": 152320 }, { "epoch": 0.3077162376725639, "grad_norm": 382.679931640625, "learning_rate": 8.777438375231717e-06, "loss": 22.2914, "step": 152330 }, { "epoch": 0.3077364383052477, "grad_norm": 350.8429260253906, "learning_rate": 8.77720967104571e-06, "loss": 24.1487, "step": 152340 }, { "epoch": 0.30775663893793154, "grad_norm": 450.3186950683594, "learning_rate": 8.776980948450043e-06, "loss": 11.0823, "step": 152350 }, { "epoch": 0.30777683957061536, "grad_norm": 218.6016845703125, "learning_rate": 8.776752207445829e-06, "loss": 32.1349, "step": 152360 }, { "epoch": 0.3077970402032992, "grad_norm": 306.7082824707031, "learning_rate": 8.776523448034182e-06, "loss": 18.5718, "step": 152370 }, { "epoch": 0.307817240835983, "grad_norm": 297.98638916015625, "learning_rate": 8.776294670216217e-06, "loss": 15.9312, "step": 152380 }, { "epoch": 0.3078374414686668, "grad_norm": 477.687255859375, "learning_rate": 8.776065873993049e-06, "loss": 14.8394, "step": 152390 }, { "epoch": 0.30785764210135064, "grad_norm": 415.9744567871094, "learning_rate": 8.775837059365796e-06, "loss": 27.5502, "step": 152400 }, { "epoch": 0.30787784273403446, "grad_norm": 554.4899291992188, "learning_rate": 8.77560822633557e-06, "loss": 36.1364, "step": 152410 }, { "epoch": 0.3078980433667182, "grad_norm": 229.49571228027344, "learning_rate": 8.775379374903487e-06, "loss": 30.9903, "step": 152420 }, { "epoch": 0.30791824399940204, "grad_norm": 51.77035140991211, "learning_rate": 8.775150505070664e-06, "loss": 16.698, "step": 152430 }, { "epoch": 0.30793844463208586, "grad_norm": 301.8908386230469, "learning_rate": 8.774921616838217e-06, "loss": 21.288, "step": 152440 }, { "epoch": 0.3079586452647697, "grad_norm": 648.9600830078125, "learning_rate": 8.774692710207257e-06, "loss": 27.5579, "step": 152450 }, { "epoch": 0.3079788458974535, "grad_norm": 731.9532470703125, "learning_rate": 8.774463785178904e-06, "loss": 29.7746, "step": 152460 }, { "epoch": 0.3079990465301373, "grad_norm": 714.5652465820312, "learning_rate": 8.774234841754271e-06, "loss": 15.7488, "step": 152470 }, { "epoch": 0.30801924716282114, "grad_norm": 278.4861755371094, "learning_rate": 8.774005879934475e-06, "loss": 30.5312, "step": 152480 }, { "epoch": 0.30803944779550496, "grad_norm": 458.5387878417969, "learning_rate": 8.773776899720634e-06, "loss": 15.5067, "step": 152490 }, { "epoch": 0.3080596484281888, "grad_norm": 543.5549926757812, "learning_rate": 8.773547901113862e-06, "loss": 19.926, "step": 152500 }, { "epoch": 0.3080798490608726, "grad_norm": 504.25628662109375, "learning_rate": 8.773318884115273e-06, "loss": 39.7353, "step": 152510 }, { "epoch": 0.3081000496935564, "grad_norm": 482.6075439453125, "learning_rate": 8.773089848725986e-06, "loss": 26.9789, "step": 152520 }, { "epoch": 0.30812025032624024, "grad_norm": 178.32061767578125, "learning_rate": 8.772860794947119e-06, "loss": 44.1952, "step": 152530 }, { "epoch": 0.308140450958924, "grad_norm": 263.0315246582031, "learning_rate": 8.772631722779783e-06, "loss": 16.951, "step": 152540 }, { "epoch": 0.3081606515916078, "grad_norm": 445.8074035644531, "learning_rate": 8.772402632225098e-06, "loss": 18.989, "step": 152550 }, { "epoch": 0.30818085222429165, "grad_norm": 383.47491455078125, "learning_rate": 8.772173523284182e-06, "loss": 21.363, "step": 152560 }, { "epoch": 0.30820105285697547, "grad_norm": 259.873046875, "learning_rate": 8.77194439595815e-06, "loss": 15.2514, "step": 152570 }, { "epoch": 0.3082212534896593, "grad_norm": 301.7531433105469, "learning_rate": 8.771715250248116e-06, "loss": 42.1112, "step": 152580 }, { "epoch": 0.3082414541223431, "grad_norm": 15.518353462219238, "learning_rate": 8.771486086155201e-06, "loss": 23.7325, "step": 152590 }, { "epoch": 0.3082616547550269, "grad_norm": 714.9344482421875, "learning_rate": 8.77125690368052e-06, "loss": 29.0086, "step": 152600 }, { "epoch": 0.30828185538771075, "grad_norm": 539.0986328125, "learning_rate": 8.77102770282519e-06, "loss": 33.3592, "step": 152610 }, { "epoch": 0.30830205602039457, "grad_norm": 708.436279296875, "learning_rate": 8.770798483590327e-06, "loss": 23.5398, "step": 152620 }, { "epoch": 0.3083222566530784, "grad_norm": 287.7116394042969, "learning_rate": 8.770569245977052e-06, "loss": 29.4807, "step": 152630 }, { "epoch": 0.3083424572857622, "grad_norm": 782.888916015625, "learning_rate": 8.770339989986479e-06, "loss": 20.0547, "step": 152640 }, { "epoch": 0.308362657918446, "grad_norm": 497.072998046875, "learning_rate": 8.770110715619726e-06, "loss": 9.7068, "step": 152650 }, { "epoch": 0.30838285855112985, "grad_norm": 568.1920166015625, "learning_rate": 8.769881422877911e-06, "loss": 18.5282, "step": 152660 }, { "epoch": 0.3084030591838136, "grad_norm": 234.79539489746094, "learning_rate": 8.76965211176215e-06, "loss": 17.9275, "step": 152670 }, { "epoch": 0.30842325981649743, "grad_norm": 579.5450439453125, "learning_rate": 8.769422782273563e-06, "loss": 30.9816, "step": 152680 }, { "epoch": 0.30844346044918125, "grad_norm": 390.92034912109375, "learning_rate": 8.769193434413266e-06, "loss": 18.3398, "step": 152690 }, { "epoch": 0.30846366108186507, "grad_norm": 295.37261962890625, "learning_rate": 8.768964068182378e-06, "loss": 12.4739, "step": 152700 }, { "epoch": 0.3084838617145489, "grad_norm": 162.08419799804688, "learning_rate": 8.768734683582017e-06, "loss": 19.1307, "step": 152710 }, { "epoch": 0.3085040623472327, "grad_norm": 79.44987487792969, "learning_rate": 8.768505280613297e-06, "loss": 16.8558, "step": 152720 }, { "epoch": 0.30852426297991653, "grad_norm": 234.40797424316406, "learning_rate": 8.768275859277342e-06, "loss": 43.9868, "step": 152730 }, { "epoch": 0.30854446361260035, "grad_norm": 627.1044311523438, "learning_rate": 8.768046419575267e-06, "loss": 22.7319, "step": 152740 }, { "epoch": 0.30856466424528417, "grad_norm": 148.47650146484375, "learning_rate": 8.767816961508191e-06, "loss": 14.009, "step": 152750 }, { "epoch": 0.308584864877968, "grad_norm": 272.833251953125, "learning_rate": 8.76758748507723e-06, "loss": 30.4871, "step": 152760 }, { "epoch": 0.3086050655106518, "grad_norm": 523.990478515625, "learning_rate": 8.767357990283507e-06, "loss": 19.5321, "step": 152770 }, { "epoch": 0.30862526614333563, "grad_norm": 558.1668090820312, "learning_rate": 8.767128477128138e-06, "loss": 22.5566, "step": 152780 }, { "epoch": 0.30864546677601945, "grad_norm": 291.9372253417969, "learning_rate": 8.766898945612241e-06, "loss": 36.3042, "step": 152790 }, { "epoch": 0.3086656674087032, "grad_norm": 474.65264892578125, "learning_rate": 8.766669395736936e-06, "loss": 23.7728, "step": 152800 }, { "epoch": 0.30868586804138703, "grad_norm": 310.5718994140625, "learning_rate": 8.766439827503339e-06, "loss": 18.2838, "step": 152810 }, { "epoch": 0.30870606867407085, "grad_norm": 158.04115295410156, "learning_rate": 8.766210240912574e-06, "loss": 18.8744, "step": 152820 }, { "epoch": 0.3087262693067547, "grad_norm": 400.5627746582031, "learning_rate": 8.765980635965755e-06, "loss": 27.1191, "step": 152830 }, { "epoch": 0.3087464699394385, "grad_norm": 519.1942749023438, "learning_rate": 8.765751012664004e-06, "loss": 29.7915, "step": 152840 }, { "epoch": 0.3087666705721223, "grad_norm": 200.17587280273438, "learning_rate": 8.765521371008439e-06, "loss": 22.471, "step": 152850 }, { "epoch": 0.30878687120480613, "grad_norm": 521.7061767578125, "learning_rate": 8.76529171100018e-06, "loss": 17.9913, "step": 152860 }, { "epoch": 0.30880707183748995, "grad_norm": 756.877685546875, "learning_rate": 8.765062032640346e-06, "loss": 23.4483, "step": 152870 }, { "epoch": 0.3088272724701738, "grad_norm": 189.5233612060547, "learning_rate": 8.764832335930055e-06, "loss": 19.8989, "step": 152880 }, { "epoch": 0.3088474731028576, "grad_norm": 506.61273193359375, "learning_rate": 8.764602620870429e-06, "loss": 15.1679, "step": 152890 }, { "epoch": 0.3088676737355414, "grad_norm": 669.17626953125, "learning_rate": 8.764372887462587e-06, "loss": 17.2644, "step": 152900 }, { "epoch": 0.30888787436822523, "grad_norm": 308.8310241699219, "learning_rate": 8.764143135707647e-06, "loss": 20.6132, "step": 152910 }, { "epoch": 0.30890807500090905, "grad_norm": 683.5242919921875, "learning_rate": 8.76391336560673e-06, "loss": 31.0237, "step": 152920 }, { "epoch": 0.3089282756335928, "grad_norm": 545.5559692382812, "learning_rate": 8.763683577160955e-06, "loss": 25.7465, "step": 152930 }, { "epoch": 0.30894847626627664, "grad_norm": 395.28021240234375, "learning_rate": 8.763453770371444e-06, "loss": 26.0159, "step": 152940 }, { "epoch": 0.30896867689896046, "grad_norm": 20.908044815063477, "learning_rate": 8.763223945239317e-06, "loss": 25.7089, "step": 152950 }, { "epoch": 0.3089888775316443, "grad_norm": 725.0459594726562, "learning_rate": 8.76299410176569e-06, "loss": 14.0466, "step": 152960 }, { "epoch": 0.3090090781643281, "grad_norm": 91.55359649658203, "learning_rate": 8.762764239951688e-06, "loss": 15.5929, "step": 152970 }, { "epoch": 0.3090292787970119, "grad_norm": 144.00570678710938, "learning_rate": 8.76253435979843e-06, "loss": 18.0419, "step": 152980 }, { "epoch": 0.30904947942969574, "grad_norm": 362.8430480957031, "learning_rate": 8.762304461307033e-06, "loss": 27.8007, "step": 152990 }, { "epoch": 0.30906968006237956, "grad_norm": 353.3616027832031, "learning_rate": 8.762074544478622e-06, "loss": 24.0006, "step": 153000 }, { "epoch": 0.3090898806950634, "grad_norm": 366.91448974609375, "learning_rate": 8.761844609314316e-06, "loss": 12.364, "step": 153010 }, { "epoch": 0.3091100813277472, "grad_norm": 259.1235046386719, "learning_rate": 8.761614655815237e-06, "loss": 25.123, "step": 153020 }, { "epoch": 0.309130281960431, "grad_norm": 555.457763671875, "learning_rate": 8.761384683982503e-06, "loss": 29.7559, "step": 153030 }, { "epoch": 0.30915048259311484, "grad_norm": 155.01808166503906, "learning_rate": 8.761154693817236e-06, "loss": 23.0903, "step": 153040 }, { "epoch": 0.30917068322579866, "grad_norm": 171.6526641845703, "learning_rate": 8.760924685320558e-06, "loss": 18.9855, "step": 153050 }, { "epoch": 0.3091908838584824, "grad_norm": 371.7682189941406, "learning_rate": 8.760694658493589e-06, "loss": 30.759, "step": 153060 }, { "epoch": 0.30921108449116624, "grad_norm": 199.42379760742188, "learning_rate": 8.76046461333745e-06, "loss": 21.7867, "step": 153070 }, { "epoch": 0.30923128512385006, "grad_norm": 392.96441650390625, "learning_rate": 8.760234549853263e-06, "loss": 28.6661, "step": 153080 }, { "epoch": 0.3092514857565339, "grad_norm": 567.9005737304688, "learning_rate": 8.760004468042148e-06, "loss": 37.3038, "step": 153090 }, { "epoch": 0.3092716863892177, "grad_norm": 542.5743408203125, "learning_rate": 8.759774367905228e-06, "loss": 15.6898, "step": 153100 }, { "epoch": 0.3092918870219015, "grad_norm": 357.5645446777344, "learning_rate": 8.759544249443624e-06, "loss": 19.9222, "step": 153110 }, { "epoch": 0.30931208765458534, "grad_norm": 382.986572265625, "learning_rate": 8.759314112658458e-06, "loss": 28.247, "step": 153120 }, { "epoch": 0.30933228828726916, "grad_norm": 362.6043395996094, "learning_rate": 8.759083957550849e-06, "loss": 20.1051, "step": 153130 }, { "epoch": 0.309352488919953, "grad_norm": 335.9272155761719, "learning_rate": 8.758853784121921e-06, "loss": 21.8557, "step": 153140 }, { "epoch": 0.3093726895526368, "grad_norm": 680.1184692382812, "learning_rate": 8.758623592372797e-06, "loss": 24.2451, "step": 153150 }, { "epoch": 0.3093928901853206, "grad_norm": 490.99578857421875, "learning_rate": 8.758393382304597e-06, "loss": 19.7592, "step": 153160 }, { "epoch": 0.30941309081800444, "grad_norm": 358.3629455566406, "learning_rate": 8.758163153918442e-06, "loss": 16.8443, "step": 153170 }, { "epoch": 0.3094332914506882, "grad_norm": 110.01791381835938, "learning_rate": 8.757932907215457e-06, "loss": 19.1974, "step": 153180 }, { "epoch": 0.30945349208337203, "grad_norm": 797.3854370117188, "learning_rate": 8.757702642196763e-06, "loss": 28.4614, "step": 153190 }, { "epoch": 0.30947369271605585, "grad_norm": 447.52392578125, "learning_rate": 8.757472358863481e-06, "loss": 29.6217, "step": 153200 }, { "epoch": 0.30949389334873967, "grad_norm": 749.3699340820312, "learning_rate": 8.757242057216735e-06, "loss": 25.4117, "step": 153210 }, { "epoch": 0.3095140939814235, "grad_norm": 346.72735595703125, "learning_rate": 8.757011737257646e-06, "loss": 23.0122, "step": 153220 }, { "epoch": 0.3095342946141073, "grad_norm": 368.5466003417969, "learning_rate": 8.75678139898734e-06, "loss": 15.7692, "step": 153230 }, { "epoch": 0.30955449524679113, "grad_norm": 720.599609375, "learning_rate": 8.756551042406936e-06, "loss": 41.3858, "step": 153240 }, { "epoch": 0.30957469587947495, "grad_norm": 1139.2069091796875, "learning_rate": 8.756320667517557e-06, "loss": 41.2011, "step": 153250 }, { "epoch": 0.30959489651215877, "grad_norm": 836.5335083007812, "learning_rate": 8.756090274320326e-06, "loss": 22.5103, "step": 153260 }, { "epoch": 0.3096150971448426, "grad_norm": 724.9573974609375, "learning_rate": 8.755859862816368e-06, "loss": 38.7618, "step": 153270 }, { "epoch": 0.3096352977775264, "grad_norm": 269.16058349609375, "learning_rate": 8.755629433006804e-06, "loss": 19.8056, "step": 153280 }, { "epoch": 0.30965549841021023, "grad_norm": 79.0902099609375, "learning_rate": 8.755398984892757e-06, "loss": 32.1713, "step": 153290 }, { "epoch": 0.30967569904289405, "grad_norm": 783.4654541015625, "learning_rate": 8.755168518475351e-06, "loss": 25.255, "step": 153300 }, { "epoch": 0.3096958996755778, "grad_norm": 35.45897674560547, "learning_rate": 8.754938033755712e-06, "loss": 15.7536, "step": 153310 }, { "epoch": 0.30971610030826163, "grad_norm": 158.96868896484375, "learning_rate": 8.754707530734958e-06, "loss": 40.3681, "step": 153320 }, { "epoch": 0.30973630094094545, "grad_norm": 1015.9963989257812, "learning_rate": 8.754477009414215e-06, "loss": 13.0651, "step": 153330 }, { "epoch": 0.3097565015736293, "grad_norm": 575.8265380859375, "learning_rate": 8.754246469794606e-06, "loss": 18.4281, "step": 153340 }, { "epoch": 0.3097767022063131, "grad_norm": 611.1806030273438, "learning_rate": 8.754015911877255e-06, "loss": 30.3986, "step": 153350 }, { "epoch": 0.3097969028389969, "grad_norm": 170.62525939941406, "learning_rate": 8.753785335663287e-06, "loss": 11.3904, "step": 153360 }, { "epoch": 0.30981710347168073, "grad_norm": 284.2012023925781, "learning_rate": 8.753554741153822e-06, "loss": 14.7108, "step": 153370 }, { "epoch": 0.30983730410436455, "grad_norm": 803.2312622070312, "learning_rate": 8.75332412834999e-06, "loss": 19.1856, "step": 153380 }, { "epoch": 0.3098575047370484, "grad_norm": 967.8285522460938, "learning_rate": 8.75309349725291e-06, "loss": 28.5167, "step": 153390 }, { "epoch": 0.3098777053697322, "grad_norm": 110.3150634765625, "learning_rate": 8.752862847863707e-06, "loss": 27.7898, "step": 153400 }, { "epoch": 0.309897906002416, "grad_norm": 478.0721740722656, "learning_rate": 8.752632180183504e-06, "loss": 28.3004, "step": 153410 }, { "epoch": 0.30991810663509983, "grad_norm": 77.1426010131836, "learning_rate": 8.75240149421343e-06, "loss": 19.5355, "step": 153420 }, { "epoch": 0.30993830726778365, "grad_norm": 322.9454650878906, "learning_rate": 8.752170789954604e-06, "loss": 20.2358, "step": 153430 }, { "epoch": 0.3099585079004674, "grad_norm": 0.0, "learning_rate": 8.751940067408155e-06, "loss": 25.7665, "step": 153440 }, { "epoch": 0.30997870853315124, "grad_norm": 872.8051147460938, "learning_rate": 8.751709326575204e-06, "loss": 14.2968, "step": 153450 }, { "epoch": 0.30999890916583506, "grad_norm": 253.02938842773438, "learning_rate": 8.751478567456874e-06, "loss": 12.1771, "step": 153460 }, { "epoch": 0.3100191097985189, "grad_norm": 740.6807861328125, "learning_rate": 8.751247790054297e-06, "loss": 11.7632, "step": 153470 }, { "epoch": 0.3100393104312027, "grad_norm": 388.167724609375, "learning_rate": 8.75101699436859e-06, "loss": 27.8447, "step": 153480 }, { "epoch": 0.3100595110638865, "grad_norm": 353.12030029296875, "learning_rate": 8.750786180400883e-06, "loss": 17.9569, "step": 153490 }, { "epoch": 0.31007971169657034, "grad_norm": 411.9723815917969, "learning_rate": 8.750555348152299e-06, "loss": 20.2977, "step": 153500 }, { "epoch": 0.31009991232925416, "grad_norm": 465.92498779296875, "learning_rate": 8.750324497623963e-06, "loss": 26.1791, "step": 153510 }, { "epoch": 0.310120112961938, "grad_norm": 1112.1820068359375, "learning_rate": 8.750093628817e-06, "loss": 38.8637, "step": 153520 }, { "epoch": 0.3101403135946218, "grad_norm": 467.22735595703125, "learning_rate": 8.749862741732534e-06, "loss": 27.2548, "step": 153530 }, { "epoch": 0.3101605142273056, "grad_norm": 391.3013000488281, "learning_rate": 8.749631836371692e-06, "loss": 9.4652, "step": 153540 }, { "epoch": 0.31018071485998944, "grad_norm": 515.9251098632812, "learning_rate": 8.749400912735602e-06, "loss": 20.3771, "step": 153550 }, { "epoch": 0.31020091549267326, "grad_norm": 1368.5169677734375, "learning_rate": 8.749169970825384e-06, "loss": 29.3309, "step": 153560 }, { "epoch": 0.310221116125357, "grad_norm": 539.8248901367188, "learning_rate": 8.748939010642168e-06, "loss": 26.0765, "step": 153570 }, { "epoch": 0.31024131675804084, "grad_norm": 494.73089599609375, "learning_rate": 8.748708032187076e-06, "loss": 29.5157, "step": 153580 }, { "epoch": 0.31026151739072466, "grad_norm": 515.84228515625, "learning_rate": 8.748477035461237e-06, "loss": 12.7383, "step": 153590 }, { "epoch": 0.3102817180234085, "grad_norm": 231.21498107910156, "learning_rate": 8.748246020465776e-06, "loss": 34.8677, "step": 153600 }, { "epoch": 0.3103019186560923, "grad_norm": 294.052734375, "learning_rate": 8.748014987201818e-06, "loss": 15.8449, "step": 153610 }, { "epoch": 0.3103221192887761, "grad_norm": 204.9749298095703, "learning_rate": 8.74778393567049e-06, "loss": 17.567, "step": 153620 }, { "epoch": 0.31034231992145994, "grad_norm": 276.3417053222656, "learning_rate": 8.747552865872918e-06, "loss": 21.6977, "step": 153630 }, { "epoch": 0.31036252055414376, "grad_norm": 545.5340576171875, "learning_rate": 8.747321777810226e-06, "loss": 28.0072, "step": 153640 }, { "epoch": 0.3103827211868276, "grad_norm": 379.01806640625, "learning_rate": 8.747090671483542e-06, "loss": 12.8451, "step": 153650 }, { "epoch": 0.3104029218195114, "grad_norm": 162.28639221191406, "learning_rate": 8.746859546893995e-06, "loss": 23.2111, "step": 153660 }, { "epoch": 0.3104231224521952, "grad_norm": 328.7956237792969, "learning_rate": 8.746628404042707e-06, "loss": 17.2274, "step": 153670 }, { "epoch": 0.31044332308487904, "grad_norm": 695.1113891601562, "learning_rate": 8.74639724293081e-06, "loss": 31.8599, "step": 153680 }, { "epoch": 0.31046352371756286, "grad_norm": 310.2861022949219, "learning_rate": 8.746166063559423e-06, "loss": 20.8219, "step": 153690 }, { "epoch": 0.3104837243502466, "grad_norm": 123.86649322509766, "learning_rate": 8.745934865929676e-06, "loss": 16.3994, "step": 153700 }, { "epoch": 0.31050392498293045, "grad_norm": 211.2790069580078, "learning_rate": 8.745703650042701e-06, "loss": 33.4938, "step": 153710 }, { "epoch": 0.31052412561561427, "grad_norm": 80.10101318359375, "learning_rate": 8.74547241589962e-06, "loss": 33.1368, "step": 153720 }, { "epoch": 0.3105443262482981, "grad_norm": 355.1980285644531, "learning_rate": 8.74524116350156e-06, "loss": 19.4098, "step": 153730 }, { "epoch": 0.3105645268809819, "grad_norm": 374.2474670410156, "learning_rate": 8.745009892849647e-06, "loss": 13.9502, "step": 153740 }, { "epoch": 0.3105847275136657, "grad_norm": 528.13623046875, "learning_rate": 8.744778603945013e-06, "loss": 38.6225, "step": 153750 }, { "epoch": 0.31060492814634955, "grad_norm": 3.575230598449707, "learning_rate": 8.744547296788779e-06, "loss": 15.232, "step": 153760 }, { "epoch": 0.31062512877903337, "grad_norm": 270.492919921875, "learning_rate": 8.744315971382078e-06, "loss": 19.9082, "step": 153770 }, { "epoch": 0.3106453294117172, "grad_norm": 451.46881103515625, "learning_rate": 8.744084627726034e-06, "loss": 14.2561, "step": 153780 }, { "epoch": 0.310665530044401, "grad_norm": 476.95574951171875, "learning_rate": 8.743853265821776e-06, "loss": 15.3103, "step": 153790 }, { "epoch": 0.3106857306770848, "grad_norm": 447.3099670410156, "learning_rate": 8.743621885670431e-06, "loss": 17.8824, "step": 153800 }, { "epoch": 0.31070593130976865, "grad_norm": 476.6871032714844, "learning_rate": 8.743390487273127e-06, "loss": 16.4435, "step": 153810 }, { "epoch": 0.3107261319424524, "grad_norm": 405.0229187011719, "learning_rate": 8.743159070630993e-06, "loss": 34.1352, "step": 153820 }, { "epoch": 0.31074633257513623, "grad_norm": 79.20420837402344, "learning_rate": 8.742927635745155e-06, "loss": 20.4067, "step": 153830 }, { "epoch": 0.31076653320782005, "grad_norm": 275.6741027832031, "learning_rate": 8.742696182616742e-06, "loss": 19.7314, "step": 153840 }, { "epoch": 0.31078673384050387, "grad_norm": 513.9413452148438, "learning_rate": 8.74246471124688e-06, "loss": 21.0318, "step": 153850 }, { "epoch": 0.3108069344731877, "grad_norm": 304.9493408203125, "learning_rate": 8.7422332216367e-06, "loss": 13.3125, "step": 153860 }, { "epoch": 0.3108271351058715, "grad_norm": 617.163818359375, "learning_rate": 8.742001713787329e-06, "loss": 10.1792, "step": 153870 }, { "epoch": 0.31084733573855533, "grad_norm": 423.6902160644531, "learning_rate": 8.741770187699897e-06, "loss": 13.1258, "step": 153880 }, { "epoch": 0.31086753637123915, "grad_norm": 356.60888671875, "learning_rate": 8.741538643375528e-06, "loss": 37.1837, "step": 153890 }, { "epoch": 0.31088773700392297, "grad_norm": 519.690673828125, "learning_rate": 8.741307080815357e-06, "loss": 40.6583, "step": 153900 }, { "epoch": 0.3109079376366068, "grad_norm": 1111.084228515625, "learning_rate": 8.741075500020506e-06, "loss": 18.5098, "step": 153910 }, { "epoch": 0.3109281382692906, "grad_norm": 591.6962280273438, "learning_rate": 8.74084390099211e-06, "loss": 18.7493, "step": 153920 }, { "epoch": 0.31094833890197443, "grad_norm": 285.12152099609375, "learning_rate": 8.74061228373129e-06, "loss": 17.6761, "step": 153930 }, { "epoch": 0.31096853953465825, "grad_norm": 294.6405944824219, "learning_rate": 8.740380648239182e-06, "loss": 20.9134, "step": 153940 }, { "epoch": 0.310988740167342, "grad_norm": 380.5940246582031, "learning_rate": 8.740148994516912e-06, "loss": 15.1802, "step": 153950 }, { "epoch": 0.31100894080002583, "grad_norm": 644.829345703125, "learning_rate": 8.73991732256561e-06, "loss": 21.9968, "step": 153960 }, { "epoch": 0.31102914143270965, "grad_norm": 271.3200378417969, "learning_rate": 8.739685632386405e-06, "loss": 13.6429, "step": 153970 }, { "epoch": 0.3110493420653935, "grad_norm": 644.6058959960938, "learning_rate": 8.739453923980425e-06, "loss": 21.9192, "step": 153980 }, { "epoch": 0.3110695426980773, "grad_norm": 371.74688720703125, "learning_rate": 8.7392221973488e-06, "loss": 24.7475, "step": 153990 }, { "epoch": 0.3110897433307611, "grad_norm": 1066.115234375, "learning_rate": 8.73899045249266e-06, "loss": 44.2597, "step": 154000 }, { "epoch": 0.31110994396344493, "grad_norm": 613.54833984375, "learning_rate": 8.738758689413133e-06, "loss": 22.0096, "step": 154010 }, { "epoch": 0.31113014459612875, "grad_norm": 460.6983337402344, "learning_rate": 8.738526908111352e-06, "loss": 34.8959, "step": 154020 }, { "epoch": 0.3111503452288126, "grad_norm": 283.304931640625, "learning_rate": 8.738295108588442e-06, "loss": 23.2965, "step": 154030 }, { "epoch": 0.3111705458614964, "grad_norm": 393.63427734375, "learning_rate": 8.738063290845536e-06, "loss": 19.5414, "step": 154040 }, { "epoch": 0.3111907464941802, "grad_norm": 595.6905517578125, "learning_rate": 8.737831454883762e-06, "loss": 32.4807, "step": 154050 }, { "epoch": 0.31121094712686403, "grad_norm": 630.62060546875, "learning_rate": 8.737599600704251e-06, "loss": 37.6232, "step": 154060 }, { "epoch": 0.31123114775954785, "grad_norm": 290.7593688964844, "learning_rate": 8.737367728308134e-06, "loss": 16.2704, "step": 154070 }, { "epoch": 0.3112513483922316, "grad_norm": 105.99109649658203, "learning_rate": 8.737135837696539e-06, "loss": 11.9972, "step": 154080 }, { "epoch": 0.31127154902491544, "grad_norm": 205.48757934570312, "learning_rate": 8.736903928870597e-06, "loss": 31.4111, "step": 154090 }, { "epoch": 0.31129174965759926, "grad_norm": 18.020917892456055, "learning_rate": 8.736672001831438e-06, "loss": 18.1271, "step": 154100 }, { "epoch": 0.3113119502902831, "grad_norm": 534.1895751953125, "learning_rate": 8.736440056580196e-06, "loss": 25.1162, "step": 154110 }, { "epoch": 0.3113321509229669, "grad_norm": 1209.593994140625, "learning_rate": 8.736208093117994e-06, "loss": 28.3276, "step": 154120 }, { "epoch": 0.3113523515556507, "grad_norm": 374.8323669433594, "learning_rate": 8.73597611144597e-06, "loss": 18.5329, "step": 154130 }, { "epoch": 0.31137255218833454, "grad_norm": 579.5465698242188, "learning_rate": 8.73574411156525e-06, "loss": 29.5733, "step": 154140 }, { "epoch": 0.31139275282101836, "grad_norm": 65.66193389892578, "learning_rate": 8.735512093476968e-06, "loss": 33.4175, "step": 154150 }, { "epoch": 0.3114129534537022, "grad_norm": 162.11863708496094, "learning_rate": 8.735280057182252e-06, "loss": 13.7979, "step": 154160 }, { "epoch": 0.311433154086386, "grad_norm": 179.59805297851562, "learning_rate": 8.735048002682233e-06, "loss": 28.4929, "step": 154170 }, { "epoch": 0.3114533547190698, "grad_norm": 291.77435302734375, "learning_rate": 8.734815929978045e-06, "loss": 22.1306, "step": 154180 }, { "epoch": 0.31147355535175364, "grad_norm": 370.16790771484375, "learning_rate": 8.734583839070817e-06, "loss": 20.3002, "step": 154190 }, { "epoch": 0.31149375598443746, "grad_norm": 365.54107666015625, "learning_rate": 8.73435172996168e-06, "loss": 30.2541, "step": 154200 }, { "epoch": 0.3115139566171212, "grad_norm": 600.3662109375, "learning_rate": 8.734119602651762e-06, "loss": 19.5633, "step": 154210 }, { "epoch": 0.31153415724980504, "grad_norm": 300.4071960449219, "learning_rate": 8.733887457142202e-06, "loss": 20.3158, "step": 154220 }, { "epoch": 0.31155435788248886, "grad_norm": 463.6324462890625, "learning_rate": 8.733655293434127e-06, "loss": 28.0891, "step": 154230 }, { "epoch": 0.3115745585151727, "grad_norm": 286.9560852050781, "learning_rate": 8.733423111528667e-06, "loss": 16.2501, "step": 154240 }, { "epoch": 0.3115947591478565, "grad_norm": 395.87042236328125, "learning_rate": 8.733190911426957e-06, "loss": 16.5527, "step": 154250 }, { "epoch": 0.3116149597805403, "grad_norm": 307.3493957519531, "learning_rate": 8.732958693130128e-06, "loss": 25.7269, "step": 154260 }, { "epoch": 0.31163516041322414, "grad_norm": 596.006591796875, "learning_rate": 8.73272645663931e-06, "loss": 18.0265, "step": 154270 }, { "epoch": 0.31165536104590796, "grad_norm": 510.0229797363281, "learning_rate": 8.732494201955636e-06, "loss": 18.1334, "step": 154280 }, { "epoch": 0.3116755616785918, "grad_norm": 475.0591125488281, "learning_rate": 8.732261929080239e-06, "loss": 8.0178, "step": 154290 }, { "epoch": 0.3116957623112756, "grad_norm": 210.19642639160156, "learning_rate": 8.732029638014249e-06, "loss": 15.1319, "step": 154300 }, { "epoch": 0.3117159629439594, "grad_norm": 445.4887390136719, "learning_rate": 8.7317973287588e-06, "loss": 29.9532, "step": 154310 }, { "epoch": 0.31173616357664324, "grad_norm": 353.43048095703125, "learning_rate": 8.73156500131502e-06, "loss": 33.7921, "step": 154320 }, { "epoch": 0.31175636420932706, "grad_norm": 545.59716796875, "learning_rate": 8.73133265568405e-06, "loss": 18.2144, "step": 154330 }, { "epoch": 0.31177656484201083, "grad_norm": 366.07177734375, "learning_rate": 8.731100291867013e-06, "loss": 16.1114, "step": 154340 }, { "epoch": 0.31179676547469465, "grad_norm": 157.15353393554688, "learning_rate": 8.730867909865048e-06, "loss": 31.1077, "step": 154350 }, { "epoch": 0.31181696610737847, "grad_norm": 177.85256958007812, "learning_rate": 8.730635509679286e-06, "loss": 34.1657, "step": 154360 }, { "epoch": 0.3118371667400623, "grad_norm": 144.4081573486328, "learning_rate": 8.730403091310857e-06, "loss": 16.4859, "step": 154370 }, { "epoch": 0.3118573673727461, "grad_norm": 847.1827392578125, "learning_rate": 8.730170654760896e-06, "loss": 25.0965, "step": 154380 }, { "epoch": 0.31187756800542993, "grad_norm": 388.1310119628906, "learning_rate": 8.729938200030537e-06, "loss": 19.9993, "step": 154390 }, { "epoch": 0.31189776863811375, "grad_norm": 419.26690673828125, "learning_rate": 8.729705727120911e-06, "loss": 19.7634, "step": 154400 }, { "epoch": 0.31191796927079757, "grad_norm": 433.9167785644531, "learning_rate": 8.729473236033152e-06, "loss": 20.6485, "step": 154410 }, { "epoch": 0.3119381699034814, "grad_norm": 179.41015625, "learning_rate": 8.729240726768393e-06, "loss": 21.4454, "step": 154420 }, { "epoch": 0.3119583705361652, "grad_norm": 657.5485229492188, "learning_rate": 8.729008199327767e-06, "loss": 29.4549, "step": 154430 }, { "epoch": 0.31197857116884903, "grad_norm": 137.22616577148438, "learning_rate": 8.728775653712405e-06, "loss": 20.9253, "step": 154440 }, { "epoch": 0.31199877180153285, "grad_norm": 252.69107055664062, "learning_rate": 8.728543089923444e-06, "loss": 24.214, "step": 154450 }, { "epoch": 0.3120189724342166, "grad_norm": 839.6799926757812, "learning_rate": 8.728310507962016e-06, "loss": 19.3649, "step": 154460 }, { "epoch": 0.31203917306690043, "grad_norm": 434.42059326171875, "learning_rate": 8.728077907829256e-06, "loss": 42.9605, "step": 154470 }, { "epoch": 0.31205937369958425, "grad_norm": 134.92294311523438, "learning_rate": 8.727845289526296e-06, "loss": 8.4402, "step": 154480 }, { "epoch": 0.3120795743322681, "grad_norm": 30.486722946166992, "learning_rate": 8.72761265305427e-06, "loss": 20.2543, "step": 154490 }, { "epoch": 0.3120997749649519, "grad_norm": 487.7585754394531, "learning_rate": 8.727379998414311e-06, "loss": 26.7082, "step": 154500 }, { "epoch": 0.3121199755976357, "grad_norm": 501.4851379394531, "learning_rate": 8.727147325607556e-06, "loss": 25.2665, "step": 154510 }, { "epoch": 0.31214017623031953, "grad_norm": 593.8209838867188, "learning_rate": 8.726914634635136e-06, "loss": 59.8455, "step": 154520 }, { "epoch": 0.31216037686300335, "grad_norm": 109.87444305419922, "learning_rate": 8.726681925498187e-06, "loss": 19.6351, "step": 154530 }, { "epoch": 0.3121805774956872, "grad_norm": 569.3522338867188, "learning_rate": 8.72644919819784e-06, "loss": 18.8903, "step": 154540 }, { "epoch": 0.312200778128371, "grad_norm": 112.68116760253906, "learning_rate": 8.726216452735233e-06, "loss": 15.7711, "step": 154550 }, { "epoch": 0.3122209787610548, "grad_norm": 271.089599609375, "learning_rate": 8.725983689111499e-06, "loss": 14.8479, "step": 154560 }, { "epoch": 0.31224117939373863, "grad_norm": 431.1347961425781, "learning_rate": 8.725750907327772e-06, "loss": 26.109, "step": 154570 }, { "epoch": 0.31226138002642245, "grad_norm": 448.3652648925781, "learning_rate": 8.725518107385188e-06, "loss": 26.0147, "step": 154580 }, { "epoch": 0.3122815806591062, "grad_norm": 316.93524169921875, "learning_rate": 8.725285289284879e-06, "loss": 33.8821, "step": 154590 }, { "epoch": 0.31230178129179004, "grad_norm": 330.87933349609375, "learning_rate": 8.725052453027982e-06, "loss": 15.7643, "step": 154600 }, { "epoch": 0.31232198192447386, "grad_norm": 209.00180053710938, "learning_rate": 8.72481959861563e-06, "loss": 23.5942, "step": 154610 }, { "epoch": 0.3123421825571577, "grad_norm": 428.0066833496094, "learning_rate": 8.72458672604896e-06, "loss": 24.2969, "step": 154620 }, { "epoch": 0.3123623831898415, "grad_norm": 778.66552734375, "learning_rate": 8.724353835329107e-06, "loss": 40.4456, "step": 154630 }, { "epoch": 0.3123825838225253, "grad_norm": 392.69287109375, "learning_rate": 8.724120926457205e-06, "loss": 22.3423, "step": 154640 }, { "epoch": 0.31240278445520914, "grad_norm": 720.3321533203125, "learning_rate": 8.723887999434389e-06, "loss": 29.4737, "step": 154650 }, { "epoch": 0.31242298508789296, "grad_norm": 296.5901794433594, "learning_rate": 8.723655054261792e-06, "loss": 22.3836, "step": 154660 }, { "epoch": 0.3124431857205768, "grad_norm": 352.9546203613281, "learning_rate": 8.723422090940556e-06, "loss": 24.9276, "step": 154670 }, { "epoch": 0.3124633863532606, "grad_norm": 666.4312133789062, "learning_rate": 8.72318910947181e-06, "loss": 23.2714, "step": 154680 }, { "epoch": 0.3124835869859444, "grad_norm": 682.2530517578125, "learning_rate": 8.722956109856693e-06, "loss": 25.6024, "step": 154690 }, { "epoch": 0.31250378761862824, "grad_norm": 338.8300476074219, "learning_rate": 8.722723092096337e-06, "loss": 29.3265, "step": 154700 }, { "epoch": 0.31252398825131206, "grad_norm": 135.53916931152344, "learning_rate": 8.722490056191884e-06, "loss": 17.6037, "step": 154710 }, { "epoch": 0.3125441888839958, "grad_norm": 253.51405334472656, "learning_rate": 8.722257002144462e-06, "loss": 27.952, "step": 154720 }, { "epoch": 0.31256438951667964, "grad_norm": 356.64166259765625, "learning_rate": 8.722023929955213e-06, "loss": 22.1669, "step": 154730 }, { "epoch": 0.31258459014936346, "grad_norm": 321.4786376953125, "learning_rate": 8.72179083962527e-06, "loss": 19.3403, "step": 154740 }, { "epoch": 0.3126047907820473, "grad_norm": 240.9815216064453, "learning_rate": 8.72155773115577e-06, "loss": 16.3145, "step": 154750 }, { "epoch": 0.3126249914147311, "grad_norm": 441.646728515625, "learning_rate": 8.721324604547851e-06, "loss": 31.5702, "step": 154760 }, { "epoch": 0.3126451920474149, "grad_norm": 379.2781066894531, "learning_rate": 8.721091459802646e-06, "loss": 27.9705, "step": 154770 }, { "epoch": 0.31266539268009874, "grad_norm": 1475.4915771484375, "learning_rate": 8.72085829692129e-06, "loss": 23.9081, "step": 154780 }, { "epoch": 0.31268559331278256, "grad_norm": 169.7453155517578, "learning_rate": 8.720625115904927e-06, "loss": 22.1654, "step": 154790 }, { "epoch": 0.3127057939454664, "grad_norm": 100.61199188232422, "learning_rate": 8.720391916754683e-06, "loss": 40.4935, "step": 154800 }, { "epoch": 0.3127259945781502, "grad_norm": 683.3108520507812, "learning_rate": 8.720158699471704e-06, "loss": 53.2084, "step": 154810 }, { "epoch": 0.312746195210834, "grad_norm": 250.6405792236328, "learning_rate": 8.71992546405712e-06, "loss": 23.5227, "step": 154820 }, { "epoch": 0.31276639584351784, "grad_norm": 421.2986145019531, "learning_rate": 8.719692210512072e-06, "loss": 16.4337, "step": 154830 }, { "epoch": 0.31278659647620166, "grad_norm": 145.6516876220703, "learning_rate": 8.719458938837695e-06, "loss": 20.4812, "step": 154840 }, { "epoch": 0.3128067971088854, "grad_norm": 173.38214111328125, "learning_rate": 8.719225649035126e-06, "loss": 16.6189, "step": 154850 }, { "epoch": 0.31282699774156925, "grad_norm": 566.532958984375, "learning_rate": 8.718992341105503e-06, "loss": 24.0527, "step": 154860 }, { "epoch": 0.31284719837425307, "grad_norm": 234.0479278564453, "learning_rate": 8.718759015049963e-06, "loss": 18.3841, "step": 154870 }, { "epoch": 0.3128673990069369, "grad_norm": 297.26959228515625, "learning_rate": 8.71852567086964e-06, "loss": 23.2053, "step": 154880 }, { "epoch": 0.3128875996396207, "grad_norm": 723.6534423828125, "learning_rate": 8.718292308565675e-06, "loss": 29.5211, "step": 154890 }, { "epoch": 0.3129078002723045, "grad_norm": 116.78231048583984, "learning_rate": 8.718058928139205e-06, "loss": 29.8787, "step": 154900 }, { "epoch": 0.31292800090498835, "grad_norm": 1550.7498779296875, "learning_rate": 8.717825529591367e-06, "loss": 20.1995, "step": 154910 }, { "epoch": 0.31294820153767217, "grad_norm": 245.3291015625, "learning_rate": 8.717592112923296e-06, "loss": 22.0477, "step": 154920 }, { "epoch": 0.312968402170356, "grad_norm": 822.2991333007812, "learning_rate": 8.717358678136133e-06, "loss": 33.4508, "step": 154930 }, { "epoch": 0.3129886028030398, "grad_norm": 551.5245361328125, "learning_rate": 8.717125225231018e-06, "loss": 18.003, "step": 154940 }, { "epoch": 0.3130088034357236, "grad_norm": 425.37152099609375, "learning_rate": 8.716891754209081e-06, "loss": 32.9085, "step": 154950 }, { "epoch": 0.31302900406840745, "grad_norm": 338.026611328125, "learning_rate": 8.716658265071467e-06, "loss": 50.6049, "step": 154960 }, { "epoch": 0.3130492047010912, "grad_norm": 283.1708984375, "learning_rate": 8.71642475781931e-06, "loss": 12.3127, "step": 154970 }, { "epoch": 0.31306940533377503, "grad_norm": 272.32489013671875, "learning_rate": 8.71619123245375e-06, "loss": 13.3999, "step": 154980 }, { "epoch": 0.31308960596645885, "grad_norm": 2465.364990234375, "learning_rate": 8.715957688975925e-06, "loss": 29.2973, "step": 154990 }, { "epoch": 0.31310980659914267, "grad_norm": 1328.699462890625, "learning_rate": 8.715724127386971e-06, "loss": 43.5636, "step": 155000 }, { "epoch": 0.3131300072318265, "grad_norm": 480.14715576171875, "learning_rate": 8.71549054768803e-06, "loss": 38.8249, "step": 155010 }, { "epoch": 0.3131502078645103, "grad_norm": 385.1468505859375, "learning_rate": 8.715256949880239e-06, "loss": 33.3817, "step": 155020 }, { "epoch": 0.31317040849719413, "grad_norm": 245.51141357421875, "learning_rate": 8.715023333964737e-06, "loss": 21.2037, "step": 155030 }, { "epoch": 0.31319060912987795, "grad_norm": 384.6320495605469, "learning_rate": 8.714789699942659e-06, "loss": 17.2748, "step": 155040 }, { "epoch": 0.31321080976256177, "grad_norm": 239.67886352539062, "learning_rate": 8.714556047815148e-06, "loss": 19.1228, "step": 155050 }, { "epoch": 0.3132310103952456, "grad_norm": 64.51019287109375, "learning_rate": 8.714322377583341e-06, "loss": 10.7204, "step": 155060 }, { "epoch": 0.3132512110279294, "grad_norm": 570.5287475585938, "learning_rate": 8.714088689248379e-06, "loss": 23.9701, "step": 155070 }, { "epoch": 0.31327141166061323, "grad_norm": 487.402099609375, "learning_rate": 8.713854982811398e-06, "loss": 24.8807, "step": 155080 }, { "epoch": 0.31329161229329705, "grad_norm": 304.18206787109375, "learning_rate": 8.713621258273539e-06, "loss": 37.0923, "step": 155090 }, { "epoch": 0.3133118129259808, "grad_norm": 369.0475158691406, "learning_rate": 8.713387515635938e-06, "loss": 14.3489, "step": 155100 }, { "epoch": 0.31333201355866463, "grad_norm": 69.50149536132812, "learning_rate": 8.713153754899738e-06, "loss": 16.3796, "step": 155110 }, { "epoch": 0.31335221419134845, "grad_norm": 404.92431640625, "learning_rate": 8.712919976066078e-06, "loss": 24.8467, "step": 155120 }, { "epoch": 0.3133724148240323, "grad_norm": 206.7754669189453, "learning_rate": 8.712686179136097e-06, "loss": 16.1075, "step": 155130 }, { "epoch": 0.3133926154567161, "grad_norm": 358.1948547363281, "learning_rate": 8.712452364110931e-06, "loss": 16.4383, "step": 155140 }, { "epoch": 0.3134128160893999, "grad_norm": 621.1553955078125, "learning_rate": 8.712218530991723e-06, "loss": 22.5428, "step": 155150 }, { "epoch": 0.31343301672208373, "grad_norm": 247.3667755126953, "learning_rate": 8.711984679779612e-06, "loss": 37.8268, "step": 155160 }, { "epoch": 0.31345321735476755, "grad_norm": 483.7298889160156, "learning_rate": 8.71175081047574e-06, "loss": 15.6308, "step": 155170 }, { "epoch": 0.3134734179874514, "grad_norm": 719.111572265625, "learning_rate": 8.711516923081244e-06, "loss": 23.1817, "step": 155180 }, { "epoch": 0.3134936186201352, "grad_norm": 56.7630500793457, "learning_rate": 8.711283017597265e-06, "loss": 15.7026, "step": 155190 }, { "epoch": 0.313513819252819, "grad_norm": 712.2754516601562, "learning_rate": 8.711049094024942e-06, "loss": 41.0088, "step": 155200 }, { "epoch": 0.31353401988550283, "grad_norm": 283.11627197265625, "learning_rate": 8.710815152365416e-06, "loss": 23.9812, "step": 155210 }, { "epoch": 0.31355422051818665, "grad_norm": 137.64024353027344, "learning_rate": 8.710581192619824e-06, "loss": 15.8262, "step": 155220 }, { "epoch": 0.3135744211508704, "grad_norm": 225.41720581054688, "learning_rate": 8.710347214789313e-06, "loss": 20.5877, "step": 155230 }, { "epoch": 0.31359462178355424, "grad_norm": 300.43157958984375, "learning_rate": 8.710113218875018e-06, "loss": 16.573, "step": 155240 }, { "epoch": 0.31361482241623806, "grad_norm": 427.4569091796875, "learning_rate": 8.709879204878082e-06, "loss": 25.7318, "step": 155250 }, { "epoch": 0.3136350230489219, "grad_norm": 332.05511474609375, "learning_rate": 8.709645172799646e-06, "loss": 22.4278, "step": 155260 }, { "epoch": 0.3136552236816057, "grad_norm": 748.4037475585938, "learning_rate": 8.709411122640847e-06, "loss": 32.8311, "step": 155270 }, { "epoch": 0.3136754243142895, "grad_norm": 590.763671875, "learning_rate": 8.709177054402829e-06, "loss": 26.0616, "step": 155280 }, { "epoch": 0.31369562494697334, "grad_norm": 405.1456604003906, "learning_rate": 8.708942968086733e-06, "loss": 18.7855, "step": 155290 }, { "epoch": 0.31371582557965716, "grad_norm": 733.8826293945312, "learning_rate": 8.708708863693696e-06, "loss": 24.3785, "step": 155300 }, { "epoch": 0.313736026212341, "grad_norm": 412.6300964355469, "learning_rate": 8.708474741224863e-06, "loss": 27.3805, "step": 155310 }, { "epoch": 0.3137562268450248, "grad_norm": 884.5291748046875, "learning_rate": 8.708240600681375e-06, "loss": 23.0809, "step": 155320 }, { "epoch": 0.3137764274777086, "grad_norm": 214.19802856445312, "learning_rate": 8.708006442064373e-06, "loss": 11.6219, "step": 155330 }, { "epoch": 0.31379662811039244, "grad_norm": 296.8635559082031, "learning_rate": 8.707772265374994e-06, "loss": 20.7415, "step": 155340 }, { "epoch": 0.31381682874307626, "grad_norm": 322.7183837890625, "learning_rate": 8.707538070614385e-06, "loss": 33.076, "step": 155350 }, { "epoch": 0.31383702937576, "grad_norm": 132.36392211914062, "learning_rate": 8.707303857783685e-06, "loss": 29.2217, "step": 155360 }, { "epoch": 0.31385723000844384, "grad_norm": 518.3986206054688, "learning_rate": 8.707069626884034e-06, "loss": 24.5106, "step": 155370 }, { "epoch": 0.31387743064112766, "grad_norm": 303.0432434082031, "learning_rate": 8.706835377916579e-06, "loss": 18.2479, "step": 155380 }, { "epoch": 0.3138976312738115, "grad_norm": 62.68539810180664, "learning_rate": 8.706601110882456e-06, "loss": 13.2302, "step": 155390 }, { "epoch": 0.3139178319064953, "grad_norm": 627.250244140625, "learning_rate": 8.706366825782805e-06, "loss": 20.2127, "step": 155400 }, { "epoch": 0.3139380325391791, "grad_norm": 548.8335571289062, "learning_rate": 8.706132522618777e-06, "loss": 29.7295, "step": 155410 }, { "epoch": 0.31395823317186294, "grad_norm": 392.1265869140625, "learning_rate": 8.705898201391504e-06, "loss": 23.7052, "step": 155420 }, { "epoch": 0.31397843380454676, "grad_norm": 507.8823547363281, "learning_rate": 8.705663862102137e-06, "loss": 28.8804, "step": 155430 }, { "epoch": 0.3139986344372306, "grad_norm": 397.531982421875, "learning_rate": 8.705429504751813e-06, "loss": 33.2689, "step": 155440 }, { "epoch": 0.3140188350699144, "grad_norm": 248.19107055664062, "learning_rate": 8.705195129341672e-06, "loss": 30.2313, "step": 155450 }, { "epoch": 0.3140390357025982, "grad_norm": 411.1767883300781, "learning_rate": 8.704960735872862e-06, "loss": 29.2826, "step": 155460 }, { "epoch": 0.31405923633528204, "grad_norm": 527.8690185546875, "learning_rate": 8.704726324346521e-06, "loss": 28.9641, "step": 155470 }, { "epoch": 0.31407943696796586, "grad_norm": 317.4735107421875, "learning_rate": 8.704491894763794e-06, "loss": 17.0169, "step": 155480 }, { "epoch": 0.31409963760064963, "grad_norm": 293.9752197265625, "learning_rate": 8.704257447125823e-06, "loss": 24.3327, "step": 155490 }, { "epoch": 0.31411983823333345, "grad_norm": 558.1671142578125, "learning_rate": 8.70402298143375e-06, "loss": 21.332, "step": 155500 }, { "epoch": 0.31414003886601727, "grad_norm": 548.06103515625, "learning_rate": 8.70378849768872e-06, "loss": 17.1295, "step": 155510 }, { "epoch": 0.3141602394987011, "grad_norm": 432.9718933105469, "learning_rate": 8.703553995891873e-06, "loss": 19.9746, "step": 155520 }, { "epoch": 0.3141804401313849, "grad_norm": 476.7720031738281, "learning_rate": 8.703319476044352e-06, "loss": 21.3201, "step": 155530 }, { "epoch": 0.31420064076406873, "grad_norm": 536.4819946289062, "learning_rate": 8.703084938147302e-06, "loss": 25.0215, "step": 155540 }, { "epoch": 0.31422084139675255, "grad_norm": 423.54718017578125, "learning_rate": 8.702850382201863e-06, "loss": 19.4235, "step": 155550 }, { "epoch": 0.31424104202943637, "grad_norm": 244.44570922851562, "learning_rate": 8.702615808209185e-06, "loss": 12.5618, "step": 155560 }, { "epoch": 0.3142612426621202, "grad_norm": 630.3048095703125, "learning_rate": 8.702381216170404e-06, "loss": 24.4882, "step": 155570 }, { "epoch": 0.314281443294804, "grad_norm": 764.992919921875, "learning_rate": 8.702146606086665e-06, "loss": 32.9412, "step": 155580 }, { "epoch": 0.31430164392748783, "grad_norm": 280.8818359375, "learning_rate": 8.701911977959113e-06, "loss": 16.6502, "step": 155590 }, { "epoch": 0.31432184456017165, "grad_norm": 137.41639709472656, "learning_rate": 8.701677331788891e-06, "loss": 24.229, "step": 155600 }, { "epoch": 0.3143420451928554, "grad_norm": 241.95132446289062, "learning_rate": 8.701442667577143e-06, "loss": 9.6414, "step": 155610 }, { "epoch": 0.31436224582553923, "grad_norm": 600.4203491210938, "learning_rate": 8.701207985325013e-06, "loss": 19.5094, "step": 155620 }, { "epoch": 0.31438244645822305, "grad_norm": 502.0248107910156, "learning_rate": 8.700973285033642e-06, "loss": 25.2597, "step": 155630 }, { "epoch": 0.3144026470909069, "grad_norm": 254.17803955078125, "learning_rate": 8.700738566704178e-06, "loss": 13.8078, "step": 155640 }, { "epoch": 0.3144228477235907, "grad_norm": 402.1114501953125, "learning_rate": 8.700503830337763e-06, "loss": 19.0496, "step": 155650 }, { "epoch": 0.3144430483562745, "grad_norm": 553.7101440429688, "learning_rate": 8.700269075935542e-06, "loss": 20.5768, "step": 155660 }, { "epoch": 0.31446324898895833, "grad_norm": 256.2198486328125, "learning_rate": 8.700034303498657e-06, "loss": 12.8878, "step": 155670 }, { "epoch": 0.31448344962164215, "grad_norm": 497.68817138671875, "learning_rate": 8.699799513028252e-06, "loss": 11.7977, "step": 155680 }, { "epoch": 0.314503650254326, "grad_norm": 261.4666442871094, "learning_rate": 8.699564704525477e-06, "loss": 17.6982, "step": 155690 }, { "epoch": 0.3145238508870098, "grad_norm": 243.61245727539062, "learning_rate": 8.699329877991469e-06, "loss": 30.5446, "step": 155700 }, { "epoch": 0.3145440515196936, "grad_norm": 161.9713592529297, "learning_rate": 8.699095033427377e-06, "loss": 20.471, "step": 155710 }, { "epoch": 0.31456425215237743, "grad_norm": 230.16290283203125, "learning_rate": 8.698860170834343e-06, "loss": 45.0552, "step": 155720 }, { "epoch": 0.31458445278506125, "grad_norm": 517.7139282226562, "learning_rate": 8.698625290213515e-06, "loss": 15.9301, "step": 155730 }, { "epoch": 0.314604653417745, "grad_norm": 758.0924072265625, "learning_rate": 8.698390391566036e-06, "loss": 19.9534, "step": 155740 }, { "epoch": 0.31462485405042884, "grad_norm": 371.744384765625, "learning_rate": 8.69815547489305e-06, "loss": 17.0234, "step": 155750 }, { "epoch": 0.31464505468311266, "grad_norm": 658.6950073242188, "learning_rate": 8.697920540195702e-06, "loss": 24.9621, "step": 155760 }, { "epoch": 0.3146652553157965, "grad_norm": 310.3829040527344, "learning_rate": 8.697685587475139e-06, "loss": 30.1228, "step": 155770 }, { "epoch": 0.3146854559484803, "grad_norm": 27.588375091552734, "learning_rate": 8.697450616732503e-06, "loss": 27.7926, "step": 155780 }, { "epoch": 0.3147056565811641, "grad_norm": 847.927490234375, "learning_rate": 8.697215627968944e-06, "loss": 25.3589, "step": 155790 }, { "epoch": 0.31472585721384794, "grad_norm": 400.6221008300781, "learning_rate": 8.696980621185602e-06, "loss": 19.515, "step": 155800 }, { "epoch": 0.31474605784653176, "grad_norm": 246.7696075439453, "learning_rate": 8.696745596383627e-06, "loss": 15.7324, "step": 155810 }, { "epoch": 0.3147662584792156, "grad_norm": 309.1961364746094, "learning_rate": 8.696510553564162e-06, "loss": 22.45, "step": 155820 }, { "epoch": 0.3147864591118994, "grad_norm": 183.41746520996094, "learning_rate": 8.696275492728352e-06, "loss": 28.1009, "step": 155830 }, { "epoch": 0.3148066597445832, "grad_norm": 433.7793273925781, "learning_rate": 8.696040413877344e-06, "loss": 29.0775, "step": 155840 }, { "epoch": 0.31482686037726704, "grad_norm": 280.98638916015625, "learning_rate": 8.695805317012283e-06, "loss": 24.2702, "step": 155850 }, { "epoch": 0.31484706100995086, "grad_norm": 333.2210693359375, "learning_rate": 8.695570202134314e-06, "loss": 20.1262, "step": 155860 }, { "epoch": 0.3148672616426346, "grad_norm": 396.2452697753906, "learning_rate": 8.695335069244586e-06, "loss": 16.4875, "step": 155870 }, { "epoch": 0.31488746227531844, "grad_norm": 399.8049011230469, "learning_rate": 8.695099918344243e-06, "loss": 31.2423, "step": 155880 }, { "epoch": 0.31490766290800226, "grad_norm": 294.70062255859375, "learning_rate": 8.69486474943443e-06, "loss": 17.1798, "step": 155890 }, { "epoch": 0.3149278635406861, "grad_norm": 525.5172729492188, "learning_rate": 8.694629562516295e-06, "loss": 25.3958, "step": 155900 }, { "epoch": 0.3149480641733699, "grad_norm": 1121.14599609375, "learning_rate": 8.694394357590982e-06, "loss": 18.7285, "step": 155910 }, { "epoch": 0.3149682648060537, "grad_norm": 525.0504760742188, "learning_rate": 8.694159134659641e-06, "loss": 23.9852, "step": 155920 }, { "epoch": 0.31498846543873754, "grad_norm": 364.8795166015625, "learning_rate": 8.693923893723415e-06, "loss": 18.5837, "step": 155930 }, { "epoch": 0.31500866607142136, "grad_norm": 410.02691650390625, "learning_rate": 8.693688634783453e-06, "loss": 12.7264, "step": 155940 }, { "epoch": 0.3150288667041052, "grad_norm": 133.11660766601562, "learning_rate": 8.6934533578409e-06, "loss": 9.3515, "step": 155950 }, { "epoch": 0.315049067336789, "grad_norm": 391.3555908203125, "learning_rate": 8.693218062896905e-06, "loss": 10.1853, "step": 155960 }, { "epoch": 0.3150692679694728, "grad_norm": 590.826416015625, "learning_rate": 8.692982749952613e-06, "loss": 22.8992, "step": 155970 }, { "epoch": 0.31508946860215664, "grad_norm": 335.4452819824219, "learning_rate": 8.692747419009168e-06, "loss": 20.7711, "step": 155980 }, { "epoch": 0.31510966923484046, "grad_norm": 464.6659851074219, "learning_rate": 8.692512070067722e-06, "loss": 35.9936, "step": 155990 }, { "epoch": 0.3151298698675242, "grad_norm": 859.1676635742188, "learning_rate": 8.692276703129421e-06, "loss": 18.3248, "step": 156000 }, { "epoch": 0.31515007050020805, "grad_norm": 563.6682739257812, "learning_rate": 8.692041318195409e-06, "loss": 14.7813, "step": 156010 }, { "epoch": 0.31517027113289187, "grad_norm": 423.956298828125, "learning_rate": 8.691805915266836e-06, "loss": 18.973, "step": 156020 }, { "epoch": 0.3151904717655757, "grad_norm": 572.4943237304688, "learning_rate": 8.691570494344848e-06, "loss": 20.4434, "step": 156030 }, { "epoch": 0.3152106723982595, "grad_norm": 460.4983825683594, "learning_rate": 8.691335055430595e-06, "loss": 19.7382, "step": 156040 }, { "epoch": 0.3152308730309433, "grad_norm": 364.9707336425781, "learning_rate": 8.691099598525222e-06, "loss": 17.7182, "step": 156050 }, { "epoch": 0.31525107366362715, "grad_norm": 305.00274658203125, "learning_rate": 8.690864123629876e-06, "loss": 32.178, "step": 156060 }, { "epoch": 0.31527127429631097, "grad_norm": 536.468994140625, "learning_rate": 8.690628630745708e-06, "loss": 22.6808, "step": 156070 }, { "epoch": 0.3152914749289948, "grad_norm": 636.2994384765625, "learning_rate": 8.690393119873863e-06, "loss": 19.5498, "step": 156080 }, { "epoch": 0.3153116755616786, "grad_norm": 181.8765106201172, "learning_rate": 8.690157591015488e-06, "loss": 30.41, "step": 156090 }, { "epoch": 0.3153318761943624, "grad_norm": 500.489990234375, "learning_rate": 8.689922044171735e-06, "loss": 15.6294, "step": 156100 }, { "epoch": 0.31535207682704625, "grad_norm": 278.20111083984375, "learning_rate": 8.689686479343747e-06, "loss": 18.7786, "step": 156110 }, { "epoch": 0.31537227745973007, "grad_norm": 468.1935729980469, "learning_rate": 8.689450896532675e-06, "loss": 11.8421, "step": 156120 }, { "epoch": 0.31539247809241383, "grad_norm": 126.52347564697266, "learning_rate": 8.689215295739669e-06, "loss": 20.7499, "step": 156130 }, { "epoch": 0.31541267872509765, "grad_norm": 160.05636596679688, "learning_rate": 8.688979676965872e-06, "loss": 13.2907, "step": 156140 }, { "epoch": 0.31543287935778147, "grad_norm": 570.4967041015625, "learning_rate": 8.688744040212438e-06, "loss": 12.3124, "step": 156150 }, { "epoch": 0.3154530799904653, "grad_norm": 454.1628112792969, "learning_rate": 8.688508385480513e-06, "loss": 30.6661, "step": 156160 }, { "epoch": 0.3154732806231491, "grad_norm": 20.657920837402344, "learning_rate": 8.688272712771243e-06, "loss": 11.9086, "step": 156170 }, { "epoch": 0.31549348125583293, "grad_norm": 935.6939086914062, "learning_rate": 8.688037022085783e-06, "loss": 18.0572, "step": 156180 }, { "epoch": 0.31551368188851675, "grad_norm": 279.9620666503906, "learning_rate": 8.687801313425275e-06, "loss": 34.7209, "step": 156190 }, { "epoch": 0.31553388252120057, "grad_norm": 215.29830932617188, "learning_rate": 8.68756558679087e-06, "loss": 14.8009, "step": 156200 }, { "epoch": 0.3155540831538844, "grad_norm": 438.32879638671875, "learning_rate": 8.68732984218372e-06, "loss": 19.9984, "step": 156210 }, { "epoch": 0.3155742837865682, "grad_norm": 1304.1524658203125, "learning_rate": 8.68709407960497e-06, "loss": 34.2655, "step": 156220 }, { "epoch": 0.31559448441925203, "grad_norm": 35.739952087402344, "learning_rate": 8.68685829905577e-06, "loss": 20.0038, "step": 156230 }, { "epoch": 0.31561468505193585, "grad_norm": 152.79037475585938, "learning_rate": 8.686622500537272e-06, "loss": 15.1975, "step": 156240 }, { "epoch": 0.3156348856846196, "grad_norm": 212.28329467773438, "learning_rate": 8.68638668405062e-06, "loss": 14.6955, "step": 156250 }, { "epoch": 0.31565508631730343, "grad_norm": 85.4367904663086, "learning_rate": 8.68615084959697e-06, "loss": 19.7354, "step": 156260 }, { "epoch": 0.31567528694998725, "grad_norm": 764.028564453125, "learning_rate": 8.685914997177465e-06, "loss": 31.5282, "step": 156270 }, { "epoch": 0.3156954875826711, "grad_norm": 414.4305114746094, "learning_rate": 8.685679126793258e-06, "loss": 23.7288, "step": 156280 }, { "epoch": 0.3157156882153549, "grad_norm": 265.472412109375, "learning_rate": 8.6854432384455e-06, "loss": 18.5333, "step": 156290 }, { "epoch": 0.3157358888480387, "grad_norm": 460.2200012207031, "learning_rate": 8.685207332135337e-06, "loss": 23.1256, "step": 156300 }, { "epoch": 0.31575608948072253, "grad_norm": 353.0555114746094, "learning_rate": 8.68497140786392e-06, "loss": 23.9941, "step": 156310 }, { "epoch": 0.31577629011340635, "grad_norm": 492.4260559082031, "learning_rate": 8.6847354656324e-06, "loss": 21.3439, "step": 156320 }, { "epoch": 0.3157964907460902, "grad_norm": 295.3174743652344, "learning_rate": 8.684499505441926e-06, "loss": 21.2145, "step": 156330 }, { "epoch": 0.315816691378774, "grad_norm": 168.670654296875, "learning_rate": 8.684263527293649e-06, "loss": 29.3661, "step": 156340 }, { "epoch": 0.3158368920114578, "grad_norm": 263.1680603027344, "learning_rate": 8.684027531188717e-06, "loss": 32.4153, "step": 156350 }, { "epoch": 0.31585709264414163, "grad_norm": 364.7283935546875, "learning_rate": 8.683791517128282e-06, "loss": 16.3072, "step": 156360 }, { "epoch": 0.31587729327682545, "grad_norm": 477.0263977050781, "learning_rate": 8.683555485113493e-06, "loss": 29.4266, "step": 156370 }, { "epoch": 0.3158974939095092, "grad_norm": 545.2871704101562, "learning_rate": 8.683319435145503e-06, "loss": 29.2442, "step": 156380 }, { "epoch": 0.31591769454219304, "grad_norm": 244.9238739013672, "learning_rate": 8.683083367225461e-06, "loss": 25.3003, "step": 156390 }, { "epoch": 0.31593789517487686, "grad_norm": 204.55955505371094, "learning_rate": 8.682847281354517e-06, "loss": 43.7771, "step": 156400 }, { "epoch": 0.3159580958075607, "grad_norm": 327.5174865722656, "learning_rate": 8.682611177533822e-06, "loss": 11.0926, "step": 156410 }, { "epoch": 0.3159782964402445, "grad_norm": 512.2208862304688, "learning_rate": 8.682375055764528e-06, "loss": 25.5094, "step": 156420 }, { "epoch": 0.3159984970729283, "grad_norm": 506.9759826660156, "learning_rate": 8.682138916047782e-06, "loss": 53.1913, "step": 156430 }, { "epoch": 0.31601869770561214, "grad_norm": 133.79246520996094, "learning_rate": 8.681902758384738e-06, "loss": 22.5905, "step": 156440 }, { "epoch": 0.31603889833829596, "grad_norm": 601.0071411132812, "learning_rate": 8.681666582776547e-06, "loss": 26.0011, "step": 156450 }, { "epoch": 0.3160590989709798, "grad_norm": 455.7332458496094, "learning_rate": 8.68143038922436e-06, "loss": 12.9082, "step": 156460 }, { "epoch": 0.3160792996036636, "grad_norm": 870.0426025390625, "learning_rate": 8.681194177729328e-06, "loss": 23.8988, "step": 156470 }, { "epoch": 0.3160995002363474, "grad_norm": 982.4699096679688, "learning_rate": 8.680957948292602e-06, "loss": 19.0372, "step": 156480 }, { "epoch": 0.31611970086903124, "grad_norm": 412.22589111328125, "learning_rate": 8.680721700915333e-06, "loss": 18.3669, "step": 156490 }, { "epoch": 0.31613990150171506, "grad_norm": 364.1700439453125, "learning_rate": 8.680485435598674e-06, "loss": 15.7197, "step": 156500 }, { "epoch": 0.3161601021343988, "grad_norm": 339.00054931640625, "learning_rate": 8.680249152343772e-06, "loss": 12.7125, "step": 156510 }, { "epoch": 0.31618030276708264, "grad_norm": 782.67236328125, "learning_rate": 8.680012851151785e-06, "loss": 19.3984, "step": 156520 }, { "epoch": 0.31620050339976646, "grad_norm": 572.0099487304688, "learning_rate": 8.679776532023861e-06, "loss": 23.4448, "step": 156530 }, { "epoch": 0.3162207040324503, "grad_norm": 429.39044189453125, "learning_rate": 8.679540194961153e-06, "loss": 20.0682, "step": 156540 }, { "epoch": 0.3162409046651341, "grad_norm": 282.64654541015625, "learning_rate": 8.679303839964811e-06, "loss": 31.609, "step": 156550 }, { "epoch": 0.3162611052978179, "grad_norm": 724.362060546875, "learning_rate": 8.679067467035989e-06, "loss": 29.2453, "step": 156560 }, { "epoch": 0.31628130593050174, "grad_norm": 787.4326171875, "learning_rate": 8.678831076175838e-06, "loss": 20.3108, "step": 156570 }, { "epoch": 0.31630150656318556, "grad_norm": 312.4324645996094, "learning_rate": 8.678594667385511e-06, "loss": 18.152, "step": 156580 }, { "epoch": 0.3163217071958694, "grad_norm": 498.779541015625, "learning_rate": 8.67835824066616e-06, "loss": 19.3695, "step": 156590 }, { "epoch": 0.3163419078285532, "grad_norm": 335.0408020019531, "learning_rate": 8.678121796018938e-06, "loss": 36.8111, "step": 156600 }, { "epoch": 0.316362108461237, "grad_norm": 416.4246520996094, "learning_rate": 8.677885333444995e-06, "loss": 20.3388, "step": 156610 }, { "epoch": 0.31638230909392084, "grad_norm": 197.67642211914062, "learning_rate": 8.677648852945486e-06, "loss": 11.6373, "step": 156620 }, { "epoch": 0.31640250972660466, "grad_norm": 434.5162048339844, "learning_rate": 8.677412354521561e-06, "loss": 21.7159, "step": 156630 }, { "epoch": 0.31642271035928843, "grad_norm": 223.23451232910156, "learning_rate": 8.677175838174374e-06, "loss": 13.2333, "step": 156640 }, { "epoch": 0.31644291099197225, "grad_norm": 374.6792907714844, "learning_rate": 8.67693930390508e-06, "loss": 26.7893, "step": 156650 }, { "epoch": 0.31646311162465607, "grad_norm": 740.7330932617188, "learning_rate": 8.676702751714829e-06, "loss": 17.3265, "step": 156660 }, { "epoch": 0.3164833122573399, "grad_norm": 722.9887084960938, "learning_rate": 8.676466181604775e-06, "loss": 34.9863, "step": 156670 }, { "epoch": 0.3165035128900237, "grad_norm": 618.39697265625, "learning_rate": 8.67622959357607e-06, "loss": 31.7607, "step": 156680 }, { "epoch": 0.31652371352270753, "grad_norm": 210.58859252929688, "learning_rate": 8.675992987629869e-06, "loss": 12.8285, "step": 156690 }, { "epoch": 0.31654391415539135, "grad_norm": 192.90066528320312, "learning_rate": 8.675756363767322e-06, "loss": 19.3255, "step": 156700 }, { "epoch": 0.31656411478807517, "grad_norm": 290.7137451171875, "learning_rate": 8.675519721989585e-06, "loss": 30.7223, "step": 156710 }, { "epoch": 0.316584315420759, "grad_norm": 17.167661666870117, "learning_rate": 8.675283062297811e-06, "loss": 18.0766, "step": 156720 }, { "epoch": 0.3166045160534428, "grad_norm": 453.1914978027344, "learning_rate": 8.675046384693154e-06, "loss": 28.4383, "step": 156730 }, { "epoch": 0.31662471668612663, "grad_norm": 374.5320129394531, "learning_rate": 8.674809689176765e-06, "loss": 36.8366, "step": 156740 }, { "epoch": 0.31664491731881045, "grad_norm": 351.17926025390625, "learning_rate": 8.6745729757498e-06, "loss": 23.2534, "step": 156750 }, { "epoch": 0.31666511795149427, "grad_norm": 454.0247497558594, "learning_rate": 8.674336244413413e-06, "loss": 21.8932, "step": 156760 }, { "epoch": 0.31668531858417803, "grad_norm": 240.80499267578125, "learning_rate": 8.674099495168755e-06, "loss": 24.6349, "step": 156770 }, { "epoch": 0.31670551921686185, "grad_norm": 392.14068603515625, "learning_rate": 8.673862728016983e-06, "loss": 14.8336, "step": 156780 }, { "epoch": 0.3167257198495457, "grad_norm": 701.1990356445312, "learning_rate": 8.67362594295925e-06, "loss": 24.7302, "step": 156790 }, { "epoch": 0.3167459204822295, "grad_norm": 513.910400390625, "learning_rate": 8.673389139996708e-06, "loss": 25.9293, "step": 156800 }, { "epoch": 0.3167661211149133, "grad_norm": 515.1926879882812, "learning_rate": 8.673152319130514e-06, "loss": 27.1508, "step": 156810 }, { "epoch": 0.31678632174759713, "grad_norm": 540.6401977539062, "learning_rate": 8.672915480361821e-06, "loss": 20.6046, "step": 156820 }, { "epoch": 0.31680652238028095, "grad_norm": 397.8475341796875, "learning_rate": 8.672678623691783e-06, "loss": 10.7563, "step": 156830 }, { "epoch": 0.3168267230129648, "grad_norm": 631.1962890625, "learning_rate": 8.672441749121555e-06, "loss": 43.2764, "step": 156840 }, { "epoch": 0.3168469236456486, "grad_norm": 31.80294418334961, "learning_rate": 8.672204856652291e-06, "loss": 15.2622, "step": 156850 }, { "epoch": 0.3168671242783324, "grad_norm": 241.8560791015625, "learning_rate": 8.671967946285147e-06, "loss": 18.81, "step": 156860 }, { "epoch": 0.31688732491101623, "grad_norm": 450.6624450683594, "learning_rate": 8.671731018021275e-06, "loss": 23.7015, "step": 156870 }, { "epoch": 0.31690752554370005, "grad_norm": 252.8155975341797, "learning_rate": 8.671494071861832e-06, "loss": 19.1985, "step": 156880 }, { "epoch": 0.3169277261763838, "grad_norm": 589.3222045898438, "learning_rate": 8.671257107807974e-06, "loss": 21.2078, "step": 156890 }, { "epoch": 0.31694792680906764, "grad_norm": 201.46461486816406, "learning_rate": 8.671020125860851e-06, "loss": 36.1632, "step": 156900 }, { "epoch": 0.31696812744175146, "grad_norm": 574.6801147460938, "learning_rate": 8.670783126021623e-06, "loss": 25.3402, "step": 156910 }, { "epoch": 0.3169883280744353, "grad_norm": 560.37060546875, "learning_rate": 8.670546108291443e-06, "loss": 29.2263, "step": 156920 }, { "epoch": 0.3170085287071191, "grad_norm": 198.41749572753906, "learning_rate": 8.670309072671468e-06, "loss": 33.0624, "step": 156930 }, { "epoch": 0.3170287293398029, "grad_norm": 349.1961975097656, "learning_rate": 8.67007201916285e-06, "loss": 19.6403, "step": 156940 }, { "epoch": 0.31704892997248674, "grad_norm": 511.9179382324219, "learning_rate": 8.669834947766746e-06, "loss": 20.1144, "step": 156950 }, { "epoch": 0.31706913060517056, "grad_norm": 577.7297973632812, "learning_rate": 8.66959785848431e-06, "loss": 23.7455, "step": 156960 }, { "epoch": 0.3170893312378544, "grad_norm": 777.7017211914062, "learning_rate": 8.669360751316702e-06, "loss": 26.5656, "step": 156970 }, { "epoch": 0.3171095318705382, "grad_norm": 167.69903564453125, "learning_rate": 8.669123626265074e-06, "loss": 21.3804, "step": 156980 }, { "epoch": 0.317129732503222, "grad_norm": 360.7198486328125, "learning_rate": 8.668886483330584e-06, "loss": 12.1088, "step": 156990 }, { "epoch": 0.31714993313590584, "grad_norm": 371.81353759765625, "learning_rate": 8.668649322514382e-06, "loss": 26.2547, "step": 157000 }, { "epoch": 0.31717013376858966, "grad_norm": 163.5631561279297, "learning_rate": 8.66841214381763e-06, "loss": 36.1617, "step": 157010 }, { "epoch": 0.3171903344012734, "grad_norm": 279.35504150390625, "learning_rate": 8.668174947241485e-06, "loss": 35.6495, "step": 157020 }, { "epoch": 0.31721053503395724, "grad_norm": 420.391845703125, "learning_rate": 8.667937732787097e-06, "loss": 20.8908, "step": 157030 }, { "epoch": 0.31723073566664106, "grad_norm": 355.45245361328125, "learning_rate": 8.667700500455627e-06, "loss": 16.8542, "step": 157040 }, { "epoch": 0.3172509362993249, "grad_norm": 314.1698303222656, "learning_rate": 8.667463250248229e-06, "loss": 17.1736, "step": 157050 }, { "epoch": 0.3172711369320087, "grad_norm": 202.76202392578125, "learning_rate": 8.667225982166058e-06, "loss": 30.3521, "step": 157060 }, { "epoch": 0.3172913375646925, "grad_norm": 28.479190826416016, "learning_rate": 8.666988696210275e-06, "loss": 14.3018, "step": 157070 }, { "epoch": 0.31731153819737634, "grad_norm": 407.8205871582031, "learning_rate": 8.666751392382033e-06, "loss": 16.5902, "step": 157080 }, { "epoch": 0.31733173883006016, "grad_norm": 123.9936294555664, "learning_rate": 8.66651407068249e-06, "loss": 22.9009, "step": 157090 }, { "epoch": 0.317351939462744, "grad_norm": 1239.0494384765625, "learning_rate": 8.666276731112802e-06, "loss": 18.3345, "step": 157100 }, { "epoch": 0.3173721400954278, "grad_norm": 490.563720703125, "learning_rate": 8.666039373674124e-06, "loss": 11.9196, "step": 157110 }, { "epoch": 0.3173923407281116, "grad_norm": 1140.191650390625, "learning_rate": 8.665801998367616e-06, "loss": 22.4172, "step": 157120 }, { "epoch": 0.31741254136079544, "grad_norm": 311.021240234375, "learning_rate": 8.665564605194435e-06, "loss": 19.7928, "step": 157130 }, { "epoch": 0.31743274199347926, "grad_norm": 276.641845703125, "learning_rate": 8.665327194155736e-06, "loss": 15.2686, "step": 157140 }, { "epoch": 0.317452942626163, "grad_norm": 509.0832824707031, "learning_rate": 8.665089765252674e-06, "loss": 15.1643, "step": 157150 }, { "epoch": 0.31747314325884685, "grad_norm": 20.60483169555664, "learning_rate": 8.664852318486412e-06, "loss": 30.92, "step": 157160 }, { "epoch": 0.31749334389153067, "grad_norm": 438.5542907714844, "learning_rate": 8.664614853858105e-06, "loss": 19.4307, "step": 157170 }, { "epoch": 0.3175135445242145, "grad_norm": 628.7283935546875, "learning_rate": 8.664377371368907e-06, "loss": 21.6214, "step": 157180 }, { "epoch": 0.3175337451568983, "grad_norm": 182.05941772460938, "learning_rate": 8.664139871019979e-06, "loss": 15.2398, "step": 157190 }, { "epoch": 0.3175539457895821, "grad_norm": 967.3475341796875, "learning_rate": 8.66390235281248e-06, "loss": 17.6178, "step": 157200 }, { "epoch": 0.31757414642226595, "grad_norm": 373.94219970703125, "learning_rate": 8.663664816747562e-06, "loss": 16.7508, "step": 157210 }, { "epoch": 0.31759434705494977, "grad_norm": 228.75059509277344, "learning_rate": 8.663427262826386e-06, "loss": 11.1991, "step": 157220 }, { "epoch": 0.3176145476876336, "grad_norm": 676.013671875, "learning_rate": 8.663189691050114e-06, "loss": 24.8321, "step": 157230 }, { "epoch": 0.3176347483203174, "grad_norm": 606.6685791015625, "learning_rate": 8.662952101419895e-06, "loss": 20.2691, "step": 157240 }, { "epoch": 0.3176549489530012, "grad_norm": 191.4118194580078, "learning_rate": 8.662714493936895e-06, "loss": 25.0529, "step": 157250 }, { "epoch": 0.31767514958568505, "grad_norm": 393.2243957519531, "learning_rate": 8.662476868602268e-06, "loss": 19.0307, "step": 157260 }, { "epoch": 0.31769535021836887, "grad_norm": 40.03284454345703, "learning_rate": 8.662239225417171e-06, "loss": 20.3379, "step": 157270 }, { "epoch": 0.31771555085105263, "grad_norm": 516.3447875976562, "learning_rate": 8.662001564382768e-06, "loss": 15.767, "step": 157280 }, { "epoch": 0.31773575148373645, "grad_norm": 2002.5963134765625, "learning_rate": 8.66176388550021e-06, "loss": 31.3092, "step": 157290 }, { "epoch": 0.31775595211642027, "grad_norm": 204.0232696533203, "learning_rate": 8.66152618877066e-06, "loss": 27.5483, "step": 157300 }, { "epoch": 0.3177761527491041, "grad_norm": 268.2770080566406, "learning_rate": 8.661288474195275e-06, "loss": 18.3599, "step": 157310 }, { "epoch": 0.3177963533817879, "grad_norm": 439.6703186035156, "learning_rate": 8.661050741775215e-06, "loss": 11.543, "step": 157320 }, { "epoch": 0.31781655401447173, "grad_norm": 579.0086059570312, "learning_rate": 8.660812991511636e-06, "loss": 24.7194, "step": 157330 }, { "epoch": 0.31783675464715555, "grad_norm": 387.04150390625, "learning_rate": 8.6605752234057e-06, "loss": 26.5599, "step": 157340 }, { "epoch": 0.31785695527983937, "grad_norm": 770.0346069335938, "learning_rate": 8.660337437458565e-06, "loss": 45.8609, "step": 157350 }, { "epoch": 0.3178771559125232, "grad_norm": 734.6922607421875, "learning_rate": 8.660099633671388e-06, "loss": 15.791, "step": 157360 }, { "epoch": 0.317897356545207, "grad_norm": 1305.1317138671875, "learning_rate": 8.65986181204533e-06, "loss": 37.2538, "step": 157370 }, { "epoch": 0.31791755717789083, "grad_norm": 1059.372802734375, "learning_rate": 8.659623972581548e-06, "loss": 42.6554, "step": 157380 }, { "epoch": 0.31793775781057465, "grad_norm": 298.19818115234375, "learning_rate": 8.659386115281205e-06, "loss": 11.3735, "step": 157390 }, { "epoch": 0.31795795844325847, "grad_norm": 630.5972900390625, "learning_rate": 8.659148240145456e-06, "loss": 23.9854, "step": 157400 }, { "epoch": 0.31797815907594223, "grad_norm": 400.2347412109375, "learning_rate": 8.658910347175463e-06, "loss": 15.1035, "step": 157410 }, { "epoch": 0.31799835970862605, "grad_norm": 568.3389892578125, "learning_rate": 8.658672436372385e-06, "loss": 23.9981, "step": 157420 }, { "epoch": 0.3180185603413099, "grad_norm": 858.0379638671875, "learning_rate": 8.658434507737381e-06, "loss": 31.0256, "step": 157430 }, { "epoch": 0.3180387609739937, "grad_norm": 242.70700073242188, "learning_rate": 8.65819656127161e-06, "loss": 19.9308, "step": 157440 }, { "epoch": 0.3180589616066775, "grad_norm": 104.55670166015625, "learning_rate": 8.657958596976235e-06, "loss": 39.1577, "step": 157450 }, { "epoch": 0.31807916223936133, "grad_norm": 381.4013977050781, "learning_rate": 8.657720614852412e-06, "loss": 27.9169, "step": 157460 }, { "epoch": 0.31809936287204515, "grad_norm": 851.9417724609375, "learning_rate": 8.657482614901302e-06, "loss": 17.0864, "step": 157470 }, { "epoch": 0.318119563504729, "grad_norm": 253.64715576171875, "learning_rate": 8.657244597124066e-06, "loss": 19.8157, "step": 157480 }, { "epoch": 0.3181397641374128, "grad_norm": 230.14015197753906, "learning_rate": 8.657006561521863e-06, "loss": 26.3671, "step": 157490 }, { "epoch": 0.3181599647700966, "grad_norm": 392.8260498046875, "learning_rate": 8.656768508095853e-06, "loss": 32.9371, "step": 157500 }, { "epoch": 0.31818016540278043, "grad_norm": 222.6374969482422, "learning_rate": 8.656530436847196e-06, "loss": 15.5303, "step": 157510 }, { "epoch": 0.31820036603546425, "grad_norm": 0.0, "learning_rate": 8.656292347777056e-06, "loss": 27.2507, "step": 157520 }, { "epoch": 0.318220566668148, "grad_norm": 327.7041015625, "learning_rate": 8.65605424088659e-06, "loss": 17.6667, "step": 157530 }, { "epoch": 0.31824076730083184, "grad_norm": 799.1824951171875, "learning_rate": 8.655816116176959e-06, "loss": 26.6767, "step": 157540 }, { "epoch": 0.31826096793351566, "grad_norm": 238.49359130859375, "learning_rate": 8.655577973649322e-06, "loss": 25.6931, "step": 157550 }, { "epoch": 0.3182811685661995, "grad_norm": 530.2203979492188, "learning_rate": 8.655339813304842e-06, "loss": 31.7046, "step": 157560 }, { "epoch": 0.3183013691988833, "grad_norm": 297.263427734375, "learning_rate": 8.655101635144678e-06, "loss": 34.6932, "step": 157570 }, { "epoch": 0.3183215698315671, "grad_norm": 230.2835693359375, "learning_rate": 8.654863439169994e-06, "loss": 25.6645, "step": 157580 }, { "epoch": 0.31834177046425094, "grad_norm": 410.8885498046875, "learning_rate": 8.654625225381947e-06, "loss": 21.8335, "step": 157590 }, { "epoch": 0.31836197109693476, "grad_norm": 201.29010009765625, "learning_rate": 8.654386993781703e-06, "loss": 26.1597, "step": 157600 }, { "epoch": 0.3183821717296186, "grad_norm": 324.5323486328125, "learning_rate": 8.654148744370416e-06, "loss": 6.3611, "step": 157610 }, { "epoch": 0.3184023723623024, "grad_norm": 611.1500244140625, "learning_rate": 8.653910477149254e-06, "loss": 27.3416, "step": 157620 }, { "epoch": 0.3184225729949862, "grad_norm": 464.58636474609375, "learning_rate": 8.653672192119376e-06, "loss": 25.9692, "step": 157630 }, { "epoch": 0.31844277362767004, "grad_norm": 382.68646240234375, "learning_rate": 8.65343388928194e-06, "loss": 16.8495, "step": 157640 }, { "epoch": 0.31846297426035386, "grad_norm": 567.3402099609375, "learning_rate": 8.653195568638114e-06, "loss": 13.9806, "step": 157650 }, { "epoch": 0.3184831748930376, "grad_norm": 670.782470703125, "learning_rate": 8.652957230189051e-06, "loss": 17.2101, "step": 157660 }, { "epoch": 0.31850337552572144, "grad_norm": 301.2229919433594, "learning_rate": 8.652718873935922e-06, "loss": 16.2374, "step": 157670 }, { "epoch": 0.31852357615840526, "grad_norm": 1011.8859252929688, "learning_rate": 8.652480499879881e-06, "loss": 49.0083, "step": 157680 }, { "epoch": 0.3185437767910891, "grad_norm": 254.2294921875, "learning_rate": 8.652242108022095e-06, "loss": 15.571, "step": 157690 }, { "epoch": 0.3185639774237729, "grad_norm": 681.1078491210938, "learning_rate": 8.652003698363724e-06, "loss": 22.2986, "step": 157700 }, { "epoch": 0.3185841780564567, "grad_norm": 573.0494384765625, "learning_rate": 8.65176527090593e-06, "loss": 42.1385, "step": 157710 }, { "epoch": 0.31860437868914054, "grad_norm": 75.19812774658203, "learning_rate": 8.651526825649874e-06, "loss": 16.02, "step": 157720 }, { "epoch": 0.31862457932182436, "grad_norm": 360.0973815917969, "learning_rate": 8.651288362596719e-06, "loss": 35.8746, "step": 157730 }, { "epoch": 0.3186447799545082, "grad_norm": 344.9049377441406, "learning_rate": 8.651049881747628e-06, "loss": 15.7555, "step": 157740 }, { "epoch": 0.318664980587192, "grad_norm": 369.2949523925781, "learning_rate": 8.650811383103763e-06, "loss": 18.4003, "step": 157750 }, { "epoch": 0.3186851812198758, "grad_norm": 45.9865608215332, "learning_rate": 8.650572866666285e-06, "loss": 29.2727, "step": 157760 }, { "epoch": 0.31870538185255964, "grad_norm": 64.4155044555664, "learning_rate": 8.650334332436358e-06, "loss": 25.9618, "step": 157770 }, { "epoch": 0.31872558248524346, "grad_norm": 563.4820556640625, "learning_rate": 8.650095780415144e-06, "loss": 15.1927, "step": 157780 }, { "epoch": 0.31874578311792723, "grad_norm": 1031.565673828125, "learning_rate": 8.649857210603808e-06, "loss": 26.4215, "step": 157790 }, { "epoch": 0.31876598375061105, "grad_norm": 201.8151092529297, "learning_rate": 8.649618623003509e-06, "loss": 14.7141, "step": 157800 }, { "epoch": 0.31878618438329487, "grad_norm": 543.1805419921875, "learning_rate": 8.649380017615411e-06, "loss": 19.4622, "step": 157810 }, { "epoch": 0.3188063850159787, "grad_norm": 731.8388061523438, "learning_rate": 8.649141394440677e-06, "loss": 24.1656, "step": 157820 }, { "epoch": 0.3188265856486625, "grad_norm": 284.24798583984375, "learning_rate": 8.648902753480472e-06, "loss": 23.817, "step": 157830 }, { "epoch": 0.31884678628134633, "grad_norm": 494.2884216308594, "learning_rate": 8.648664094735957e-06, "loss": 15.5966, "step": 157840 }, { "epoch": 0.31886698691403015, "grad_norm": 291.4805908203125, "learning_rate": 8.648425418208294e-06, "loss": 20.8839, "step": 157850 }, { "epoch": 0.31888718754671397, "grad_norm": 646.5670166015625, "learning_rate": 8.648186723898651e-06, "loss": 12.6177, "step": 157860 }, { "epoch": 0.3189073881793978, "grad_norm": 535.337646484375, "learning_rate": 8.647948011808187e-06, "loss": 25.9195, "step": 157870 }, { "epoch": 0.3189275888120816, "grad_norm": 489.74029541015625, "learning_rate": 8.647709281938066e-06, "loss": 18.5283, "step": 157880 }, { "epoch": 0.31894778944476543, "grad_norm": 525.4874877929688, "learning_rate": 8.647470534289453e-06, "loss": 19.2942, "step": 157890 }, { "epoch": 0.31896799007744925, "grad_norm": 287.4463195800781, "learning_rate": 8.647231768863513e-06, "loss": 21.5834, "step": 157900 }, { "epoch": 0.31898819071013307, "grad_norm": 684.4493408203125, "learning_rate": 8.646992985661404e-06, "loss": 18.918, "step": 157910 }, { "epoch": 0.31900839134281683, "grad_norm": 495.8959045410156, "learning_rate": 8.646754184684297e-06, "loss": 20.9811, "step": 157920 }, { "epoch": 0.31902859197550065, "grad_norm": 252.15960693359375, "learning_rate": 8.64651536593335e-06, "loss": 22.9397, "step": 157930 }, { "epoch": 0.3190487926081845, "grad_norm": 199.3404541015625, "learning_rate": 8.646276529409729e-06, "loss": 17.9345, "step": 157940 }, { "epoch": 0.3190689932408683, "grad_norm": 423.1203308105469, "learning_rate": 8.6460376751146e-06, "loss": 32.543, "step": 157950 }, { "epoch": 0.3190891938735521, "grad_norm": 885.3731079101562, "learning_rate": 8.645798803049126e-06, "loss": 27.9252, "step": 157960 }, { "epoch": 0.31910939450623593, "grad_norm": 846.163818359375, "learning_rate": 8.64555991321447e-06, "loss": 19.8226, "step": 157970 }, { "epoch": 0.31912959513891975, "grad_norm": 1335.4134521484375, "learning_rate": 8.645321005611797e-06, "loss": 34.8784, "step": 157980 }, { "epoch": 0.3191497957716036, "grad_norm": 115.35169982910156, "learning_rate": 8.64508208024227e-06, "loss": 23.3887, "step": 157990 }, { "epoch": 0.3191699964042874, "grad_norm": 168.93600463867188, "learning_rate": 8.644843137107058e-06, "loss": 15.7694, "step": 158000 }, { "epoch": 0.3191901970369712, "grad_norm": 792.327880859375, "learning_rate": 8.644604176207322e-06, "loss": 13.7187, "step": 158010 }, { "epoch": 0.31921039766965503, "grad_norm": 186.10223388671875, "learning_rate": 8.644365197544227e-06, "loss": 14.0684, "step": 158020 }, { "epoch": 0.31923059830233885, "grad_norm": 443.1025085449219, "learning_rate": 8.644126201118936e-06, "loss": 21.2446, "step": 158030 }, { "epoch": 0.3192507989350226, "grad_norm": 260.9898986816406, "learning_rate": 8.643887186932617e-06, "loss": 27.4131, "step": 158040 }, { "epoch": 0.31927099956770644, "grad_norm": 589.14111328125, "learning_rate": 8.643648154986436e-06, "loss": 35.1901, "step": 158050 }, { "epoch": 0.31929120020039026, "grad_norm": 364.7909240722656, "learning_rate": 8.643409105281554e-06, "loss": 11.9995, "step": 158060 }, { "epoch": 0.3193114008330741, "grad_norm": 635.873291015625, "learning_rate": 8.643170037819137e-06, "loss": 37.2956, "step": 158070 }, { "epoch": 0.3193316014657579, "grad_norm": 604.2241821289062, "learning_rate": 8.642930952600353e-06, "loss": 26.6291, "step": 158080 }, { "epoch": 0.3193518020984417, "grad_norm": 116.81539154052734, "learning_rate": 8.642691849626364e-06, "loss": 15.4781, "step": 158090 }, { "epoch": 0.31937200273112554, "grad_norm": 331.23272705078125, "learning_rate": 8.642452728898339e-06, "loss": 30.6745, "step": 158100 }, { "epoch": 0.31939220336380936, "grad_norm": 136.81800842285156, "learning_rate": 8.642213590417439e-06, "loss": 15.4753, "step": 158110 }, { "epoch": 0.3194124039964932, "grad_norm": 306.7845764160156, "learning_rate": 8.641974434184832e-06, "loss": 12.2529, "step": 158120 }, { "epoch": 0.319432604629177, "grad_norm": 985.0399169921875, "learning_rate": 8.641735260201682e-06, "loss": 21.8359, "step": 158130 }, { "epoch": 0.3194528052618608, "grad_norm": 222.74391174316406, "learning_rate": 8.641496068469159e-06, "loss": 18.9181, "step": 158140 }, { "epoch": 0.31947300589454464, "grad_norm": 359.5334167480469, "learning_rate": 8.641256858988424e-06, "loss": 24.1814, "step": 158150 }, { "epoch": 0.31949320652722846, "grad_norm": 396.8135986328125, "learning_rate": 8.641017631760646e-06, "loss": 25.7287, "step": 158160 }, { "epoch": 0.3195134071599122, "grad_norm": 826.8873291015625, "learning_rate": 8.640778386786987e-06, "loss": 17.03, "step": 158170 }, { "epoch": 0.31953360779259604, "grad_norm": 0.36692190170288086, "learning_rate": 8.640539124068617e-06, "loss": 14.8261, "step": 158180 }, { "epoch": 0.31955380842527986, "grad_norm": 366.53369140625, "learning_rate": 8.640299843606702e-06, "loss": 19.4884, "step": 158190 }, { "epoch": 0.3195740090579637, "grad_norm": 245.4751434326172, "learning_rate": 8.640060545402407e-06, "loss": 16.6446, "step": 158200 }, { "epoch": 0.3195942096906475, "grad_norm": 424.68695068359375, "learning_rate": 8.639821229456898e-06, "loss": 29.3442, "step": 158210 }, { "epoch": 0.3196144103233313, "grad_norm": 332.3997497558594, "learning_rate": 8.63958189577134e-06, "loss": 20.0662, "step": 158220 }, { "epoch": 0.31963461095601514, "grad_norm": 370.6135559082031, "learning_rate": 8.639342544346903e-06, "loss": 11.5215, "step": 158230 }, { "epoch": 0.31965481158869896, "grad_norm": 104.91935729980469, "learning_rate": 8.639103175184753e-06, "loss": 8.5922, "step": 158240 }, { "epoch": 0.3196750122213828, "grad_norm": 222.44091796875, "learning_rate": 8.638863788286054e-06, "loss": 15.4662, "step": 158250 }, { "epoch": 0.3196952128540666, "grad_norm": 372.41680908203125, "learning_rate": 8.638624383651974e-06, "loss": 41.0226, "step": 158260 }, { "epoch": 0.3197154134867504, "grad_norm": 319.10723876953125, "learning_rate": 8.638384961283678e-06, "loss": 16.1681, "step": 158270 }, { "epoch": 0.31973561411943424, "grad_norm": 223.899169921875, "learning_rate": 8.63814552118234e-06, "loss": 26.3587, "step": 158280 }, { "epoch": 0.31975581475211806, "grad_norm": 366.6710510253906, "learning_rate": 8.637906063349119e-06, "loss": 18.7715, "step": 158290 }, { "epoch": 0.3197760153848018, "grad_norm": 940.7827758789062, "learning_rate": 8.637666587785185e-06, "loss": 26.3993, "step": 158300 }, { "epoch": 0.31979621601748565, "grad_norm": 1038.5609130859375, "learning_rate": 8.637427094491706e-06, "loss": 26.0275, "step": 158310 }, { "epoch": 0.31981641665016947, "grad_norm": 637.1026000976562, "learning_rate": 8.637187583469849e-06, "loss": 27.5549, "step": 158320 }, { "epoch": 0.3198366172828533, "grad_norm": 1663.321044921875, "learning_rate": 8.63694805472078e-06, "loss": 39.1991, "step": 158330 }, { "epoch": 0.3198568179155371, "grad_norm": 792.3937377929688, "learning_rate": 8.636708508245666e-06, "loss": 36.0736, "step": 158340 }, { "epoch": 0.3198770185482209, "grad_norm": 220.81375122070312, "learning_rate": 8.636468944045677e-06, "loss": 11.5905, "step": 158350 }, { "epoch": 0.31989721918090475, "grad_norm": 360.908447265625, "learning_rate": 8.636229362121979e-06, "loss": 15.2297, "step": 158360 }, { "epoch": 0.31991741981358857, "grad_norm": 176.1470947265625, "learning_rate": 8.635989762475742e-06, "loss": 22.045, "step": 158370 }, { "epoch": 0.3199376204462724, "grad_norm": 344.9416198730469, "learning_rate": 8.63575014510813e-06, "loss": 17.1547, "step": 158380 }, { "epoch": 0.3199578210789562, "grad_norm": 461.4400634765625, "learning_rate": 8.635510510020313e-06, "loss": 27.6871, "step": 158390 }, { "epoch": 0.31997802171164, "grad_norm": 231.4617156982422, "learning_rate": 8.63527085721346e-06, "loss": 29.2742, "step": 158400 }, { "epoch": 0.31999822234432385, "grad_norm": 822.868408203125, "learning_rate": 8.635031186688736e-06, "loss": 19.7909, "step": 158410 }, { "epoch": 0.32001842297700767, "grad_norm": 429.7058410644531, "learning_rate": 8.634791498447313e-06, "loss": 20.7485, "step": 158420 }, { "epoch": 0.32003862360969143, "grad_norm": 483.2109375, "learning_rate": 8.634551792490356e-06, "loss": 44.433, "step": 158430 }, { "epoch": 0.32005882424237525, "grad_norm": 1219.4669189453125, "learning_rate": 8.634312068819032e-06, "loss": 15.1578, "step": 158440 }, { "epoch": 0.32007902487505907, "grad_norm": 244.9398651123047, "learning_rate": 8.634072327434515e-06, "loss": 23.0463, "step": 158450 }, { "epoch": 0.3200992255077429, "grad_norm": 402.80908203125, "learning_rate": 8.63383256833797e-06, "loss": 29.757, "step": 158460 }, { "epoch": 0.3201194261404267, "grad_norm": 767.7590942382812, "learning_rate": 8.633592791530564e-06, "loss": 17.3504, "step": 158470 }, { "epoch": 0.32013962677311053, "grad_norm": 102.67618560791016, "learning_rate": 8.63335299701347e-06, "loss": 27.6856, "step": 158480 }, { "epoch": 0.32015982740579435, "grad_norm": 11.8885498046875, "learning_rate": 8.633113184787852e-06, "loss": 24.0924, "step": 158490 }, { "epoch": 0.32018002803847817, "grad_norm": 200.66864013671875, "learning_rate": 8.632873354854881e-06, "loss": 20.7576, "step": 158500 }, { "epoch": 0.320200228671162, "grad_norm": 431.1730651855469, "learning_rate": 8.632633507215726e-06, "loss": 17.4027, "step": 158510 }, { "epoch": 0.3202204293038458, "grad_norm": 377.1340637207031, "learning_rate": 8.632393641871555e-06, "loss": 30.595, "step": 158520 }, { "epoch": 0.32024062993652963, "grad_norm": 270.2852478027344, "learning_rate": 8.63215375882354e-06, "loss": 22.4549, "step": 158530 }, { "epoch": 0.32026083056921345, "grad_norm": 98.90607452392578, "learning_rate": 8.631913858072846e-06, "loss": 32.6666, "step": 158540 }, { "epoch": 0.32028103120189727, "grad_norm": 1097.68896484375, "learning_rate": 8.631673939620647e-06, "loss": 26.0116, "step": 158550 }, { "epoch": 0.32030123183458103, "grad_norm": 483.80401611328125, "learning_rate": 8.631434003468108e-06, "loss": 12.891, "step": 158560 }, { "epoch": 0.32032143246726485, "grad_norm": 25.915245056152344, "learning_rate": 8.6311940496164e-06, "loss": 19.0802, "step": 158570 }, { "epoch": 0.3203416330999487, "grad_norm": 348.0601806640625, "learning_rate": 8.630954078066693e-06, "loss": 18.2453, "step": 158580 }, { "epoch": 0.3203618337326325, "grad_norm": 373.0682678222656, "learning_rate": 8.630714088820158e-06, "loss": 25.1146, "step": 158590 }, { "epoch": 0.3203820343653163, "grad_norm": 5706.0205078125, "learning_rate": 8.630474081877959e-06, "loss": 48.6244, "step": 158600 }, { "epoch": 0.32040223499800013, "grad_norm": 265.8067932128906, "learning_rate": 8.630234057241274e-06, "loss": 42.9384, "step": 158610 }, { "epoch": 0.32042243563068395, "grad_norm": 269.0303039550781, "learning_rate": 8.629994014911265e-06, "loss": 29.8681, "step": 158620 }, { "epoch": 0.3204426362633678, "grad_norm": 253.42100524902344, "learning_rate": 8.629753954889108e-06, "loss": 10.7813, "step": 158630 }, { "epoch": 0.3204628368960516, "grad_norm": 863.8766479492188, "learning_rate": 8.629513877175968e-06, "loss": 17.4965, "step": 158640 }, { "epoch": 0.3204830375287354, "grad_norm": 141.02073669433594, "learning_rate": 8.62927378177302e-06, "loss": 16.2872, "step": 158650 }, { "epoch": 0.32050323816141923, "grad_norm": 413.55548095703125, "learning_rate": 8.629033668681431e-06, "loss": 23.4488, "step": 158660 }, { "epoch": 0.32052343879410305, "grad_norm": 362.0960388183594, "learning_rate": 8.62879353790237e-06, "loss": 28.441, "step": 158670 }, { "epoch": 0.3205436394267868, "grad_norm": 296.40264892578125, "learning_rate": 8.628553389437011e-06, "loss": 19.8648, "step": 158680 }, { "epoch": 0.32056384005947064, "grad_norm": 643.4523315429688, "learning_rate": 8.628313223286524e-06, "loss": 34.9976, "step": 158690 }, { "epoch": 0.32058404069215446, "grad_norm": 509.4014892578125, "learning_rate": 8.628073039452076e-06, "loss": 19.9797, "step": 158700 }, { "epoch": 0.3206042413248383, "grad_norm": 369.1424865722656, "learning_rate": 8.627832837934843e-06, "loss": 17.0493, "step": 158710 }, { "epoch": 0.3206244419575221, "grad_norm": 500.41961669921875, "learning_rate": 8.627592618735989e-06, "loss": 22.8432, "step": 158720 }, { "epoch": 0.3206446425902059, "grad_norm": 438.64874267578125, "learning_rate": 8.627352381856691e-06, "loss": 19.6252, "step": 158730 }, { "epoch": 0.32066484322288974, "grad_norm": 272.56451416015625, "learning_rate": 8.627112127298117e-06, "loss": 20.2484, "step": 158740 }, { "epoch": 0.32068504385557356, "grad_norm": 302.90557861328125, "learning_rate": 8.626871855061438e-06, "loss": 21.2945, "step": 158750 }, { "epoch": 0.3207052444882574, "grad_norm": 643.6118774414062, "learning_rate": 8.626631565147827e-06, "loss": 18.7646, "step": 158760 }, { "epoch": 0.3207254451209412, "grad_norm": 448.28302001953125, "learning_rate": 8.626391257558453e-06, "loss": 21.5459, "step": 158770 }, { "epoch": 0.320745645753625, "grad_norm": 689.1029663085938, "learning_rate": 8.626150932294486e-06, "loss": 28.1602, "step": 158780 }, { "epoch": 0.32076584638630884, "grad_norm": 184.48121643066406, "learning_rate": 8.625910589357102e-06, "loss": 25.9541, "step": 158790 }, { "epoch": 0.32078604701899266, "grad_norm": 216.4418487548828, "learning_rate": 8.625670228747467e-06, "loss": 53.694, "step": 158800 }, { "epoch": 0.3208062476516764, "grad_norm": 524.427001953125, "learning_rate": 8.625429850466756e-06, "loss": 25.2995, "step": 158810 }, { "epoch": 0.32082644828436024, "grad_norm": 788.6796875, "learning_rate": 8.625189454516141e-06, "loss": 16.7995, "step": 158820 }, { "epoch": 0.32084664891704406, "grad_norm": 331.1761169433594, "learning_rate": 8.62494904089679e-06, "loss": 25.5924, "step": 158830 }, { "epoch": 0.3208668495497279, "grad_norm": 555.55615234375, "learning_rate": 8.624708609609879e-06, "loss": 37.2603, "step": 158840 }, { "epoch": 0.3208870501824117, "grad_norm": 193.11349487304688, "learning_rate": 8.624468160656576e-06, "loss": 25.2725, "step": 158850 }, { "epoch": 0.3209072508150955, "grad_norm": 238.9698028564453, "learning_rate": 8.624227694038057e-06, "loss": 18.2588, "step": 158860 }, { "epoch": 0.32092745144777934, "grad_norm": 507.4835205078125, "learning_rate": 8.623987209755489e-06, "loss": 21.4299, "step": 158870 }, { "epoch": 0.32094765208046316, "grad_norm": 296.4114990234375, "learning_rate": 8.62374670781005e-06, "loss": 27.3128, "step": 158880 }, { "epoch": 0.320967852713147, "grad_norm": 814.8472900390625, "learning_rate": 8.623506188202906e-06, "loss": 34.9156, "step": 158890 }, { "epoch": 0.3209880533458308, "grad_norm": 561.2838134765625, "learning_rate": 8.623265650935233e-06, "loss": 24.7943, "step": 158900 }, { "epoch": 0.3210082539785146, "grad_norm": 629.7000122070312, "learning_rate": 8.623025096008203e-06, "loss": 29.4117, "step": 158910 }, { "epoch": 0.32102845461119844, "grad_norm": 72.86721801757812, "learning_rate": 8.62278452342299e-06, "loss": 20.8836, "step": 158920 }, { "epoch": 0.32104865524388226, "grad_norm": 66.56548309326172, "learning_rate": 8.622543933180762e-06, "loss": 15.8122, "step": 158930 }, { "epoch": 0.32106885587656603, "grad_norm": 236.08253479003906, "learning_rate": 8.622303325282697e-06, "loss": 12.6933, "step": 158940 }, { "epoch": 0.32108905650924985, "grad_norm": 456.3116455078125, "learning_rate": 8.622062699729963e-06, "loss": 36.0511, "step": 158950 }, { "epoch": 0.32110925714193367, "grad_norm": 351.74493408203125, "learning_rate": 8.621822056523735e-06, "loss": 12.0406, "step": 158960 }, { "epoch": 0.3211294577746175, "grad_norm": 116.14690399169922, "learning_rate": 8.621581395665185e-06, "loss": 21.5926, "step": 158970 }, { "epoch": 0.3211496584073013, "grad_norm": 470.22625732421875, "learning_rate": 8.621340717155487e-06, "loss": 20.1175, "step": 158980 }, { "epoch": 0.32116985903998513, "grad_norm": 192.45420837402344, "learning_rate": 8.621100020995813e-06, "loss": 15.4986, "step": 158990 }, { "epoch": 0.32119005967266895, "grad_norm": 224.54586791992188, "learning_rate": 8.620859307187339e-06, "loss": 25.7586, "step": 159000 }, { "epoch": 0.32121026030535277, "grad_norm": 173.90090942382812, "learning_rate": 8.620618575731233e-06, "loss": 19.6338, "step": 159010 }, { "epoch": 0.3212304609380366, "grad_norm": 409.9088439941406, "learning_rate": 8.620377826628672e-06, "loss": 24.1875, "step": 159020 }, { "epoch": 0.3212506615707204, "grad_norm": 281.1211242675781, "learning_rate": 8.62013705988083e-06, "loss": 17.2278, "step": 159030 }, { "epoch": 0.32127086220340423, "grad_norm": 227.73297119140625, "learning_rate": 8.619896275488876e-06, "loss": 28.7881, "step": 159040 }, { "epoch": 0.32129106283608805, "grad_norm": 463.6237487792969, "learning_rate": 8.61965547345399e-06, "loss": 30.688, "step": 159050 }, { "epoch": 0.32131126346877187, "grad_norm": 467.28204345703125, "learning_rate": 8.619414653777341e-06, "loss": 17.0095, "step": 159060 }, { "epoch": 0.32133146410145563, "grad_norm": 106.27899169921875, "learning_rate": 8.619173816460104e-06, "loss": 23.959, "step": 159070 }, { "epoch": 0.32135166473413945, "grad_norm": 203.37060546875, "learning_rate": 8.618932961503452e-06, "loss": 10.4024, "step": 159080 }, { "epoch": 0.3213718653668233, "grad_norm": 109.97218322753906, "learning_rate": 8.618692088908562e-06, "loss": 17.3537, "step": 159090 }, { "epoch": 0.3213920659995071, "grad_norm": 427.2147521972656, "learning_rate": 8.618451198676602e-06, "loss": 27.7377, "step": 159100 }, { "epoch": 0.3214122666321909, "grad_norm": 371.28009033203125, "learning_rate": 8.618210290808753e-06, "loss": 10.7697, "step": 159110 }, { "epoch": 0.32143246726487473, "grad_norm": 709.9109497070312, "learning_rate": 8.617969365306184e-06, "loss": 17.8241, "step": 159120 }, { "epoch": 0.32145266789755855, "grad_norm": 611.5872802734375, "learning_rate": 8.617728422170071e-06, "loss": 18.3207, "step": 159130 }, { "epoch": 0.3214728685302424, "grad_norm": 256.28411865234375, "learning_rate": 8.61748746140159e-06, "loss": 18.4094, "step": 159140 }, { "epoch": 0.3214930691629262, "grad_norm": 697.2211303710938, "learning_rate": 8.617246483001914e-06, "loss": 20.5986, "step": 159150 }, { "epoch": 0.32151326979561, "grad_norm": 935.1668701171875, "learning_rate": 8.617005486972214e-06, "loss": 24.9866, "step": 159160 }, { "epoch": 0.32153347042829383, "grad_norm": 103.9272232055664, "learning_rate": 8.616764473313671e-06, "loss": 23.4761, "step": 159170 }, { "epoch": 0.32155367106097765, "grad_norm": 273.02557373046875, "learning_rate": 8.616523442027456e-06, "loss": 16.7901, "step": 159180 }, { "epoch": 0.3215738716936615, "grad_norm": 314.946533203125, "learning_rate": 8.616282393114745e-06, "loss": 27.1486, "step": 159190 }, { "epoch": 0.32159407232634524, "grad_norm": 593.523193359375, "learning_rate": 8.616041326576711e-06, "loss": 28.6153, "step": 159200 }, { "epoch": 0.32161427295902906, "grad_norm": 619.8696899414062, "learning_rate": 8.61580024241453e-06, "loss": 13.1667, "step": 159210 }, { "epoch": 0.3216344735917129, "grad_norm": 529.56298828125, "learning_rate": 8.615559140629377e-06, "loss": 23.2421, "step": 159220 }, { "epoch": 0.3216546742243967, "grad_norm": 20.517074584960938, "learning_rate": 8.61531802122243e-06, "loss": 8.4084, "step": 159230 }, { "epoch": 0.3216748748570805, "grad_norm": 288.3408508300781, "learning_rate": 8.615076884194859e-06, "loss": 26.7926, "step": 159240 }, { "epoch": 0.32169507548976434, "grad_norm": 660.5656127929688, "learning_rate": 8.614835729547841e-06, "loss": 21.9357, "step": 159250 }, { "epoch": 0.32171527612244816, "grad_norm": 425.1676330566406, "learning_rate": 8.614594557282553e-06, "loss": 17.5466, "step": 159260 }, { "epoch": 0.321735476755132, "grad_norm": 572.3814086914062, "learning_rate": 8.614353367400171e-06, "loss": 25.9064, "step": 159270 }, { "epoch": 0.3217556773878158, "grad_norm": 125.5263900756836, "learning_rate": 8.614112159901869e-06, "loss": 13.9782, "step": 159280 }, { "epoch": 0.3217758780204996, "grad_norm": 48.52302551269531, "learning_rate": 8.61387093478882e-06, "loss": 17.9707, "step": 159290 }, { "epoch": 0.32179607865318344, "grad_norm": 624.1820678710938, "learning_rate": 8.613629692062204e-06, "loss": 27.8861, "step": 159300 }, { "epoch": 0.32181627928586726, "grad_norm": 714.5958862304688, "learning_rate": 8.613388431723195e-06, "loss": 35.3888, "step": 159310 }, { "epoch": 0.321836479918551, "grad_norm": 3063.965576171875, "learning_rate": 8.61314715377297e-06, "loss": 19.4244, "step": 159320 }, { "epoch": 0.32185668055123484, "grad_norm": 191.87030029296875, "learning_rate": 8.612905858212702e-06, "loss": 19.0154, "step": 159330 }, { "epoch": 0.32187688118391866, "grad_norm": 271.310546875, "learning_rate": 8.612664545043572e-06, "loss": 10.0511, "step": 159340 }, { "epoch": 0.3218970818166025, "grad_norm": 486.7148132324219, "learning_rate": 8.612423214266749e-06, "loss": 15.3081, "step": 159350 }, { "epoch": 0.3219172824492863, "grad_norm": 153.84129333496094, "learning_rate": 8.612181865883416e-06, "loss": 18.8152, "step": 159360 }, { "epoch": 0.3219374830819701, "grad_norm": 518.131103515625, "learning_rate": 8.611940499894746e-06, "loss": 25.9769, "step": 159370 }, { "epoch": 0.32195768371465394, "grad_norm": 402.9221496582031, "learning_rate": 8.611699116301916e-06, "loss": 26.4168, "step": 159380 }, { "epoch": 0.32197788434733776, "grad_norm": 579.6004638671875, "learning_rate": 8.611457715106103e-06, "loss": 18.7459, "step": 159390 }, { "epoch": 0.3219980849800216, "grad_norm": 272.2318115234375, "learning_rate": 8.611216296308485e-06, "loss": 19.9651, "step": 159400 }, { "epoch": 0.3220182856127054, "grad_norm": 543.0274047851562, "learning_rate": 8.610974859910235e-06, "loss": 29.9832, "step": 159410 }, { "epoch": 0.3220384862453892, "grad_norm": 544.9154663085938, "learning_rate": 8.610733405912531e-06, "loss": 17.4575, "step": 159420 }, { "epoch": 0.32205868687807304, "grad_norm": 183.30484008789062, "learning_rate": 8.61049193431655e-06, "loss": 10.8891, "step": 159430 }, { "epoch": 0.32207888751075686, "grad_norm": 614.1286010742188, "learning_rate": 8.610250445123472e-06, "loss": 30.1711, "step": 159440 }, { "epoch": 0.3220990881434406, "grad_norm": 405.5843505859375, "learning_rate": 8.610008938334467e-06, "loss": 18.1423, "step": 159450 }, { "epoch": 0.32211928877612445, "grad_norm": 353.0569152832031, "learning_rate": 8.609767413950719e-06, "loss": 13.327, "step": 159460 }, { "epoch": 0.32213948940880827, "grad_norm": 1106.0927734375, "learning_rate": 8.609525871973402e-06, "loss": 26.8672, "step": 159470 }, { "epoch": 0.3221596900414921, "grad_norm": 678.6915283203125, "learning_rate": 8.609284312403695e-06, "loss": 17.5967, "step": 159480 }, { "epoch": 0.3221798906741759, "grad_norm": 203.57518005371094, "learning_rate": 8.60904273524277e-06, "loss": 8.7134, "step": 159490 }, { "epoch": 0.3222000913068597, "grad_norm": 877.3408813476562, "learning_rate": 8.608801140491811e-06, "loss": 22.3646, "step": 159500 }, { "epoch": 0.32222029193954355, "grad_norm": 440.02825927734375, "learning_rate": 8.608559528151994e-06, "loss": 32.0175, "step": 159510 }, { "epoch": 0.32224049257222737, "grad_norm": 284.1017150878906, "learning_rate": 8.608317898224495e-06, "loss": 22.2352, "step": 159520 }, { "epoch": 0.3222606932049112, "grad_norm": 996.2796630859375, "learning_rate": 8.608076250710491e-06, "loss": 24.237, "step": 159530 }, { "epoch": 0.322280893837595, "grad_norm": 1123.6976318359375, "learning_rate": 8.607834585611162e-06, "loss": 20.9938, "step": 159540 }, { "epoch": 0.3223010944702788, "grad_norm": 386.87310791015625, "learning_rate": 8.607592902927684e-06, "loss": 18.9438, "step": 159550 }, { "epoch": 0.32232129510296265, "grad_norm": 24.58682632446289, "learning_rate": 8.607351202661236e-06, "loss": 14.1573, "step": 159560 }, { "epoch": 0.32234149573564647, "grad_norm": 492.16845703125, "learning_rate": 8.607109484812996e-06, "loss": 15.2691, "step": 159570 }, { "epoch": 0.32236169636833023, "grad_norm": 160.42462158203125, "learning_rate": 8.606867749384142e-06, "loss": 20.1392, "step": 159580 }, { "epoch": 0.32238189700101405, "grad_norm": 693.1361694335938, "learning_rate": 8.60662599637585e-06, "loss": 38.6382, "step": 159590 }, { "epoch": 0.32240209763369787, "grad_norm": 316.7165222167969, "learning_rate": 8.606384225789304e-06, "loss": 13.4398, "step": 159600 }, { "epoch": 0.3224222982663817, "grad_norm": 468.33197021484375, "learning_rate": 8.606142437625676e-06, "loss": 17.6492, "step": 159610 }, { "epoch": 0.3224424988990655, "grad_norm": 207.65821838378906, "learning_rate": 8.605900631886148e-06, "loss": 18.3911, "step": 159620 }, { "epoch": 0.32246269953174933, "grad_norm": 844.1154174804688, "learning_rate": 8.605658808571898e-06, "loss": 13.3952, "step": 159630 }, { "epoch": 0.32248290016443315, "grad_norm": 258.1779479980469, "learning_rate": 8.605416967684105e-06, "loss": 12.232, "step": 159640 }, { "epoch": 0.32250310079711697, "grad_norm": 377.6234436035156, "learning_rate": 8.605175109223945e-06, "loss": 18.0037, "step": 159650 }, { "epoch": 0.3225233014298008, "grad_norm": 42.619693756103516, "learning_rate": 8.604933233192598e-06, "loss": 11.9227, "step": 159660 }, { "epoch": 0.3225435020624846, "grad_norm": 325.4930114746094, "learning_rate": 8.604691339591248e-06, "loss": 11.5, "step": 159670 }, { "epoch": 0.32256370269516843, "grad_norm": 397.5663757324219, "learning_rate": 8.604449428421065e-06, "loss": 27.9093, "step": 159680 }, { "epoch": 0.32258390332785225, "grad_norm": 103.412353515625, "learning_rate": 8.604207499683235e-06, "loss": 13.7212, "step": 159690 }, { "epoch": 0.32260410396053607, "grad_norm": 265.3908996582031, "learning_rate": 8.603965553378934e-06, "loss": 14.6327, "step": 159700 }, { "epoch": 0.32262430459321984, "grad_norm": 446.9028625488281, "learning_rate": 8.603723589509342e-06, "loss": 24.4498, "step": 159710 }, { "epoch": 0.32264450522590365, "grad_norm": 473.041015625, "learning_rate": 8.603481608075638e-06, "loss": 26.1825, "step": 159720 }, { "epoch": 0.3226647058585875, "grad_norm": 673.2293090820312, "learning_rate": 8.603239609079005e-06, "loss": 22.2739, "step": 159730 }, { "epoch": 0.3226849064912713, "grad_norm": 757.7315063476562, "learning_rate": 8.602997592520615e-06, "loss": 14.6222, "step": 159740 }, { "epoch": 0.3227051071239551, "grad_norm": 1660.0133056640625, "learning_rate": 8.602755558401653e-06, "loss": 33.4549, "step": 159750 }, { "epoch": 0.32272530775663893, "grad_norm": 1194.78125, "learning_rate": 8.602513506723298e-06, "loss": 40.946, "step": 159760 }, { "epoch": 0.32274550838932275, "grad_norm": 737.1159057617188, "learning_rate": 8.602271437486728e-06, "loss": 32.7479, "step": 159770 }, { "epoch": 0.3227657090220066, "grad_norm": 257.111328125, "learning_rate": 8.602029350693124e-06, "loss": 39.9, "step": 159780 }, { "epoch": 0.3227859096546904, "grad_norm": 249.00831604003906, "learning_rate": 8.601787246343667e-06, "loss": 9.8765, "step": 159790 }, { "epoch": 0.3228061102873742, "grad_norm": 406.1790771484375, "learning_rate": 8.601545124439535e-06, "loss": 17.3077, "step": 159800 }, { "epoch": 0.32282631092005803, "grad_norm": 0.0, "learning_rate": 8.60130298498191e-06, "loss": 16.5663, "step": 159810 }, { "epoch": 0.32284651155274185, "grad_norm": 348.7825012207031, "learning_rate": 8.60106082797197e-06, "loss": 15.3467, "step": 159820 }, { "epoch": 0.3228667121854257, "grad_norm": 175.0106201171875, "learning_rate": 8.600818653410895e-06, "loss": 25.3356, "step": 159830 }, { "epoch": 0.32288691281810944, "grad_norm": 206.51332092285156, "learning_rate": 8.600576461299869e-06, "loss": 37.4376, "step": 159840 }, { "epoch": 0.32290711345079326, "grad_norm": 469.91583251953125, "learning_rate": 8.60033425164007e-06, "loss": 19.0019, "step": 159850 }, { "epoch": 0.3229273140834771, "grad_norm": 347.607177734375, "learning_rate": 8.600092024432676e-06, "loss": 17.3698, "step": 159860 }, { "epoch": 0.3229475147161609, "grad_norm": 553.8758544921875, "learning_rate": 8.599849779678872e-06, "loss": 31.0398, "step": 159870 }, { "epoch": 0.3229677153488447, "grad_norm": 466.9158935546875, "learning_rate": 8.599607517379837e-06, "loss": 23.9702, "step": 159880 }, { "epoch": 0.32298791598152854, "grad_norm": 224.60279846191406, "learning_rate": 8.599365237536751e-06, "loss": 21.599, "step": 159890 }, { "epoch": 0.32300811661421236, "grad_norm": 340.7933044433594, "learning_rate": 8.599122940150795e-06, "loss": 28.8004, "step": 159900 }, { "epoch": 0.3230283172468962, "grad_norm": 138.8593292236328, "learning_rate": 8.598880625223152e-06, "loss": 15.2423, "step": 159910 }, { "epoch": 0.32304851787958, "grad_norm": 374.0597229003906, "learning_rate": 8.598638292755e-06, "loss": 23.8807, "step": 159920 }, { "epoch": 0.3230687185122638, "grad_norm": 364.0459289550781, "learning_rate": 8.59839594274752e-06, "loss": 30.2405, "step": 159930 }, { "epoch": 0.32308891914494764, "grad_norm": 546.2945556640625, "learning_rate": 8.598153575201897e-06, "loss": 24.3799, "step": 159940 }, { "epoch": 0.32310911977763146, "grad_norm": 342.47540283203125, "learning_rate": 8.597911190119308e-06, "loss": 30.9596, "step": 159950 }, { "epoch": 0.3231293204103152, "grad_norm": 0.0, "learning_rate": 8.597668787500937e-06, "loss": 6.6189, "step": 159960 }, { "epoch": 0.32314952104299904, "grad_norm": 494.8405456542969, "learning_rate": 8.597426367347965e-06, "loss": 34.9544, "step": 159970 }, { "epoch": 0.32316972167568286, "grad_norm": 348.3296813964844, "learning_rate": 8.597183929661573e-06, "loss": 23.1549, "step": 159980 }, { "epoch": 0.3231899223083667, "grad_norm": 335.0788879394531, "learning_rate": 8.596941474442943e-06, "loss": 12.827, "step": 159990 }, { "epoch": 0.3232101229410505, "grad_norm": 357.87774658203125, "learning_rate": 8.596699001693257e-06, "loss": 13.433, "step": 160000 }, { "epoch": 0.3232303235737343, "grad_norm": 868.0357666015625, "learning_rate": 8.596456511413695e-06, "loss": 35.5549, "step": 160010 }, { "epoch": 0.32325052420641814, "grad_norm": 466.7880859375, "learning_rate": 8.59621400360544e-06, "loss": 22.8039, "step": 160020 }, { "epoch": 0.32327072483910196, "grad_norm": 747.6487426757812, "learning_rate": 8.595971478269675e-06, "loss": 15.5572, "step": 160030 }, { "epoch": 0.3232909254717858, "grad_norm": 622.7278442382812, "learning_rate": 8.59572893540758e-06, "loss": 14.0454, "step": 160040 }, { "epoch": 0.3233111261044696, "grad_norm": 572.1239624023438, "learning_rate": 8.59548637502034e-06, "loss": 16.4945, "step": 160050 }, { "epoch": 0.3233313267371534, "grad_norm": 303.442138671875, "learning_rate": 8.595243797109137e-06, "loss": 27.6703, "step": 160060 }, { "epoch": 0.32335152736983724, "grad_norm": 162.61599731445312, "learning_rate": 8.595001201675149e-06, "loss": 24.7356, "step": 160070 }, { "epoch": 0.32337172800252106, "grad_norm": 386.8169860839844, "learning_rate": 8.594758588719562e-06, "loss": 19.3219, "step": 160080 }, { "epoch": 0.32339192863520483, "grad_norm": 449.1678771972656, "learning_rate": 8.594515958243557e-06, "loss": 29.9948, "step": 160090 }, { "epoch": 0.32341212926788865, "grad_norm": 598.8031005859375, "learning_rate": 8.594273310248317e-06, "loss": 18.6745, "step": 160100 }, { "epoch": 0.32343232990057247, "grad_norm": 197.71539306640625, "learning_rate": 8.594030644735025e-06, "loss": 17.4596, "step": 160110 }, { "epoch": 0.3234525305332563, "grad_norm": 338.89697265625, "learning_rate": 8.593787961704864e-06, "loss": 17.5787, "step": 160120 }, { "epoch": 0.3234727311659401, "grad_norm": 657.6072387695312, "learning_rate": 8.593545261159017e-06, "loss": 30.781, "step": 160130 }, { "epoch": 0.32349293179862393, "grad_norm": 408.7171325683594, "learning_rate": 8.593302543098666e-06, "loss": 18.7955, "step": 160140 }, { "epoch": 0.32351313243130775, "grad_norm": 999.4275512695312, "learning_rate": 8.593059807524993e-06, "loss": 30.9812, "step": 160150 }, { "epoch": 0.32353333306399157, "grad_norm": 232.17300415039062, "learning_rate": 8.592817054439184e-06, "loss": 14.6789, "step": 160160 }, { "epoch": 0.3235535336966754, "grad_norm": 59.33561706542969, "learning_rate": 8.592574283842418e-06, "loss": 8.6261, "step": 160170 }, { "epoch": 0.3235737343293592, "grad_norm": 464.56915283203125, "learning_rate": 8.592331495735884e-06, "loss": 13.9509, "step": 160180 }, { "epoch": 0.32359393496204303, "grad_norm": 299.0964660644531, "learning_rate": 8.592088690120759e-06, "loss": 17.5647, "step": 160190 }, { "epoch": 0.32361413559472685, "grad_norm": 600.798583984375, "learning_rate": 8.591845866998231e-06, "loss": 26.1429, "step": 160200 }, { "epoch": 0.32363433622741067, "grad_norm": 1665.239501953125, "learning_rate": 8.591603026369481e-06, "loss": 30.2836, "step": 160210 }, { "epoch": 0.32365453686009443, "grad_norm": 258.0927734375, "learning_rate": 8.591360168235694e-06, "loss": 8.8683, "step": 160220 }, { "epoch": 0.32367473749277825, "grad_norm": 188.40628051757812, "learning_rate": 8.591117292598053e-06, "loss": 26.9318, "step": 160230 }, { "epoch": 0.3236949381254621, "grad_norm": 330.8376159667969, "learning_rate": 8.590874399457743e-06, "loss": 33.8913, "step": 160240 }, { "epoch": 0.3237151387581459, "grad_norm": 123.2463607788086, "learning_rate": 8.590631488815945e-06, "loss": 24.2562, "step": 160250 }, { "epoch": 0.3237353393908297, "grad_norm": 210.2475128173828, "learning_rate": 8.590388560673846e-06, "loss": 23.3481, "step": 160260 }, { "epoch": 0.32375554002351353, "grad_norm": 1193.0272216796875, "learning_rate": 8.590145615032626e-06, "loss": 18.3568, "step": 160270 }, { "epoch": 0.32377574065619735, "grad_norm": 1069.4720458984375, "learning_rate": 8.589902651893474e-06, "loss": 41.682, "step": 160280 }, { "epoch": 0.3237959412888812, "grad_norm": 1540.3475341796875, "learning_rate": 8.589659671257573e-06, "loss": 21.1678, "step": 160290 }, { "epoch": 0.323816141921565, "grad_norm": 185.64068603515625, "learning_rate": 8.589416673126104e-06, "loss": 17.0713, "step": 160300 }, { "epoch": 0.3238363425542488, "grad_norm": 284.4470520019531, "learning_rate": 8.589173657500254e-06, "loss": 19.351, "step": 160310 }, { "epoch": 0.32385654318693263, "grad_norm": 267.5775146484375, "learning_rate": 8.588930624381207e-06, "loss": 10.3574, "step": 160320 }, { "epoch": 0.32387674381961645, "grad_norm": 884.9189453125, "learning_rate": 8.588687573770146e-06, "loss": 22.3544, "step": 160330 }, { "epoch": 0.3238969444523003, "grad_norm": 6.199318885803223, "learning_rate": 8.588444505668259e-06, "loss": 26.2884, "step": 160340 }, { "epoch": 0.32391714508498404, "grad_norm": 197.96551513671875, "learning_rate": 8.588201420076727e-06, "loss": 35.7713, "step": 160350 }, { "epoch": 0.32393734571766786, "grad_norm": 1003.1591796875, "learning_rate": 8.587958316996739e-06, "loss": 19.3143, "step": 160360 }, { "epoch": 0.3239575463503517, "grad_norm": 284.5838928222656, "learning_rate": 8.587715196429477e-06, "loss": 23.5706, "step": 160370 }, { "epoch": 0.3239777469830355, "grad_norm": 20.341636657714844, "learning_rate": 8.587472058376122e-06, "loss": 30.0062, "step": 160380 }, { "epoch": 0.3239979476157193, "grad_norm": 960.5979614257812, "learning_rate": 8.587228902837868e-06, "loss": 29.1643, "step": 160390 }, { "epoch": 0.32401814824840314, "grad_norm": 328.1547546386719, "learning_rate": 8.586985729815895e-06, "loss": 25.4238, "step": 160400 }, { "epoch": 0.32403834888108696, "grad_norm": 336.5776062011719, "learning_rate": 8.586742539311385e-06, "loss": 15.6985, "step": 160410 }, { "epoch": 0.3240585495137708, "grad_norm": 253.26870727539062, "learning_rate": 8.58649933132553e-06, "loss": 21.2331, "step": 160420 }, { "epoch": 0.3240787501464546, "grad_norm": 1049.53076171875, "learning_rate": 8.586256105859512e-06, "loss": 32.0964, "step": 160430 }, { "epoch": 0.3240989507791384, "grad_norm": 474.3785095214844, "learning_rate": 8.586012862914517e-06, "loss": 28.0231, "step": 160440 }, { "epoch": 0.32411915141182224, "grad_norm": 570.814208984375, "learning_rate": 8.585769602491729e-06, "loss": 29.7923, "step": 160450 }, { "epoch": 0.32413935204450606, "grad_norm": 317.1396789550781, "learning_rate": 8.585526324592335e-06, "loss": 22.6866, "step": 160460 }, { "epoch": 0.3241595526771899, "grad_norm": 509.0378723144531, "learning_rate": 8.585283029217521e-06, "loss": 24.7845, "step": 160470 }, { "epoch": 0.32417975330987364, "grad_norm": 207.631591796875, "learning_rate": 8.585039716368473e-06, "loss": 14.4742, "step": 160480 }, { "epoch": 0.32419995394255746, "grad_norm": 179.84829711914062, "learning_rate": 8.584796386046374e-06, "loss": 18.566, "step": 160490 }, { "epoch": 0.3242201545752413, "grad_norm": 511.96466064453125, "learning_rate": 8.584553038252415e-06, "loss": 20.0971, "step": 160500 }, { "epoch": 0.3242403552079251, "grad_norm": 366.3064270019531, "learning_rate": 8.584309672987778e-06, "loss": 18.8068, "step": 160510 }, { "epoch": 0.3242605558406089, "grad_norm": 839.582763671875, "learning_rate": 8.584066290253649e-06, "loss": 25.166, "step": 160520 }, { "epoch": 0.32428075647329274, "grad_norm": 516.114013671875, "learning_rate": 8.583822890051217e-06, "loss": 26.7551, "step": 160530 }, { "epoch": 0.32430095710597656, "grad_norm": 115.7437744140625, "learning_rate": 8.583579472381668e-06, "loss": 12.1438, "step": 160540 }, { "epoch": 0.3243211577386604, "grad_norm": 849.4694213867188, "learning_rate": 8.583336037246187e-06, "loss": 40.4061, "step": 160550 }, { "epoch": 0.3243413583713442, "grad_norm": 356.7700500488281, "learning_rate": 8.58309258464596e-06, "loss": 33.7424, "step": 160560 }, { "epoch": 0.324361559004028, "grad_norm": 592.692138671875, "learning_rate": 8.582849114582173e-06, "loss": 12.2936, "step": 160570 }, { "epoch": 0.32438175963671184, "grad_norm": 95.89431762695312, "learning_rate": 8.582605627056016e-06, "loss": 17.5404, "step": 160580 }, { "epoch": 0.32440196026939566, "grad_norm": 265.5685729980469, "learning_rate": 8.582362122068673e-06, "loss": 13.4172, "step": 160590 }, { "epoch": 0.3244221609020794, "grad_norm": 416.9441223144531, "learning_rate": 8.58211859962133e-06, "loss": 25.3335, "step": 160600 }, { "epoch": 0.32444236153476325, "grad_norm": 138.38201904296875, "learning_rate": 8.581875059715177e-06, "loss": 30.2629, "step": 160610 }, { "epoch": 0.32446256216744707, "grad_norm": 112.8659896850586, "learning_rate": 8.5816315023514e-06, "loss": 19.0405, "step": 160620 }, { "epoch": 0.3244827628001309, "grad_norm": 385.9313659667969, "learning_rate": 8.581387927531184e-06, "loss": 12.0313, "step": 160630 }, { "epoch": 0.3245029634328147, "grad_norm": 121.29768371582031, "learning_rate": 8.581144335255717e-06, "loss": 12.3851, "step": 160640 }, { "epoch": 0.3245231640654985, "grad_norm": 224.8111114501953, "learning_rate": 8.580900725526189e-06, "loss": 23.353, "step": 160650 }, { "epoch": 0.32454336469818235, "grad_norm": 308.1531982421875, "learning_rate": 8.580657098343786e-06, "loss": 17.705, "step": 160660 }, { "epoch": 0.32456356533086617, "grad_norm": 256.0937194824219, "learning_rate": 8.58041345370969e-06, "loss": 18.9161, "step": 160670 }, { "epoch": 0.32458376596355, "grad_norm": 442.410400390625, "learning_rate": 8.580169791625097e-06, "loss": 16.9792, "step": 160680 }, { "epoch": 0.3246039665962338, "grad_norm": 1028.3814697265625, "learning_rate": 8.57992611209119e-06, "loss": 34.2166, "step": 160690 }, { "epoch": 0.3246241672289176, "grad_norm": 268.5386962890625, "learning_rate": 8.579682415109156e-06, "loss": 34.551, "step": 160700 }, { "epoch": 0.32464436786160145, "grad_norm": 503.40478515625, "learning_rate": 8.579438700680184e-06, "loss": 21.9579, "step": 160710 }, { "epoch": 0.32466456849428527, "grad_norm": 468.3645935058594, "learning_rate": 8.579194968805464e-06, "loss": 22.4314, "step": 160720 }, { "epoch": 0.32468476912696903, "grad_norm": 217.84596252441406, "learning_rate": 8.57895121948618e-06, "loss": 15.8868, "step": 160730 }, { "epoch": 0.32470496975965285, "grad_norm": 164.38002014160156, "learning_rate": 8.578707452723524e-06, "loss": 23.0477, "step": 160740 }, { "epoch": 0.32472517039233667, "grad_norm": 813.5709838867188, "learning_rate": 8.57846366851868e-06, "loss": 22.8599, "step": 160750 }, { "epoch": 0.3247453710250205, "grad_norm": 672.2219848632812, "learning_rate": 8.57821986687284e-06, "loss": 12.0255, "step": 160760 }, { "epoch": 0.3247655716577043, "grad_norm": 2.353846549987793, "learning_rate": 8.577976047787187e-06, "loss": 14.1876, "step": 160770 }, { "epoch": 0.32478577229038813, "grad_norm": 265.181640625, "learning_rate": 8.577732211262914e-06, "loss": 20.4124, "step": 160780 }, { "epoch": 0.32480597292307195, "grad_norm": 476.9296569824219, "learning_rate": 8.577488357301209e-06, "loss": 20.8482, "step": 160790 }, { "epoch": 0.32482617355575577, "grad_norm": 391.3948669433594, "learning_rate": 8.57724448590326e-06, "loss": 20.8304, "step": 160800 }, { "epoch": 0.3248463741884396, "grad_norm": 629.4627685546875, "learning_rate": 8.577000597070256e-06, "loss": 15.2627, "step": 160810 }, { "epoch": 0.3248665748211234, "grad_norm": 297.3624267578125, "learning_rate": 8.576756690803382e-06, "loss": 13.3768, "step": 160820 }, { "epoch": 0.32488677545380723, "grad_norm": 481.56170654296875, "learning_rate": 8.576512767103831e-06, "loss": 12.0114, "step": 160830 }, { "epoch": 0.32490697608649105, "grad_norm": 341.4563903808594, "learning_rate": 8.576268825972791e-06, "loss": 39.4003, "step": 160840 }, { "epoch": 0.32492717671917487, "grad_norm": 528.8344116210938, "learning_rate": 8.576024867411452e-06, "loss": 32.0481, "step": 160850 }, { "epoch": 0.32494737735185864, "grad_norm": 534.1671142578125, "learning_rate": 8.575780891420998e-06, "loss": 29.5159, "step": 160860 }, { "epoch": 0.32496757798454246, "grad_norm": 75.37045288085938, "learning_rate": 8.575536898002623e-06, "loss": 7.5759, "step": 160870 }, { "epoch": 0.3249877786172263, "grad_norm": 289.7796630859375, "learning_rate": 8.575292887157515e-06, "loss": 17.507, "step": 160880 }, { "epoch": 0.3250079792499101, "grad_norm": 520.741455078125, "learning_rate": 8.575048858886865e-06, "loss": 17.889, "step": 160890 }, { "epoch": 0.3250281798825939, "grad_norm": 189.44778442382812, "learning_rate": 8.574804813191859e-06, "loss": 14.8586, "step": 160900 }, { "epoch": 0.32504838051527774, "grad_norm": 376.878662109375, "learning_rate": 8.574560750073687e-06, "loss": 14.024, "step": 160910 }, { "epoch": 0.32506858114796156, "grad_norm": 679.778564453125, "learning_rate": 8.57431666953354e-06, "loss": 24.7067, "step": 160920 }, { "epoch": 0.3250887817806454, "grad_norm": 486.84967041015625, "learning_rate": 8.574072571572606e-06, "loss": 14.1587, "step": 160930 }, { "epoch": 0.3251089824133292, "grad_norm": 430.3070373535156, "learning_rate": 8.57382845619208e-06, "loss": 16.6094, "step": 160940 }, { "epoch": 0.325129183046013, "grad_norm": 548.0245971679688, "learning_rate": 8.573584323393142e-06, "loss": 23.7725, "step": 160950 }, { "epoch": 0.32514938367869683, "grad_norm": 334.4255676269531, "learning_rate": 8.57334017317699e-06, "loss": 21.4106, "step": 160960 }, { "epoch": 0.32516958431138065, "grad_norm": 515.2789306640625, "learning_rate": 8.573096005544812e-06, "loss": 30.5712, "step": 160970 }, { "epoch": 0.3251897849440645, "grad_norm": 460.02093505859375, "learning_rate": 8.572851820497797e-06, "loss": 15.8288, "step": 160980 }, { "epoch": 0.32520998557674824, "grad_norm": 614.912109375, "learning_rate": 8.572607618037137e-06, "loss": 19.0982, "step": 160990 }, { "epoch": 0.32523018620943206, "grad_norm": 266.36419677734375, "learning_rate": 8.572363398164017e-06, "loss": 25.8255, "step": 161000 }, { "epoch": 0.3252503868421159, "grad_norm": 472.96392822265625, "learning_rate": 8.572119160879633e-06, "loss": 21.8668, "step": 161010 }, { "epoch": 0.3252705874747997, "grad_norm": 497.37042236328125, "learning_rate": 8.571874906185175e-06, "loss": 28.2013, "step": 161020 }, { "epoch": 0.3252907881074835, "grad_norm": 74.47261810302734, "learning_rate": 8.57163063408183e-06, "loss": 18.9175, "step": 161030 }, { "epoch": 0.32531098874016734, "grad_norm": 302.9626159667969, "learning_rate": 8.571386344570791e-06, "loss": 41.7581, "step": 161040 }, { "epoch": 0.32533118937285116, "grad_norm": 477.74627685546875, "learning_rate": 8.571142037653249e-06, "loss": 26.5132, "step": 161050 }, { "epoch": 0.325351390005535, "grad_norm": 445.6697998046875, "learning_rate": 8.570897713330392e-06, "loss": 24.8526, "step": 161060 }, { "epoch": 0.3253715906382188, "grad_norm": 445.0180358886719, "learning_rate": 8.570653371603414e-06, "loss": 20.6618, "step": 161070 }, { "epoch": 0.3253917912709026, "grad_norm": 304.4143981933594, "learning_rate": 8.570409012473503e-06, "loss": 20.7598, "step": 161080 }, { "epoch": 0.32541199190358644, "grad_norm": 9.344650268554688, "learning_rate": 8.570164635941853e-06, "loss": 25.8202, "step": 161090 }, { "epoch": 0.32543219253627026, "grad_norm": 507.6846008300781, "learning_rate": 8.569920242009655e-06, "loss": 25.5562, "step": 161100 }, { "epoch": 0.325452393168954, "grad_norm": 291.4764404296875, "learning_rate": 8.569675830678097e-06, "loss": 13.4058, "step": 161110 }, { "epoch": 0.32547259380163784, "grad_norm": 669.7941284179688, "learning_rate": 8.569431401948371e-06, "loss": 28.1941, "step": 161120 }, { "epoch": 0.32549279443432166, "grad_norm": 108.53662872314453, "learning_rate": 8.56918695582167e-06, "loss": 21.114, "step": 161130 }, { "epoch": 0.3255129950670055, "grad_norm": 185.5521697998047, "learning_rate": 8.568942492299186e-06, "loss": 19.8685, "step": 161140 }, { "epoch": 0.3255331956996893, "grad_norm": 737.6587524414062, "learning_rate": 8.568698011382108e-06, "loss": 19.6524, "step": 161150 }, { "epoch": 0.3255533963323731, "grad_norm": 523.246337890625, "learning_rate": 8.568453513071628e-06, "loss": 30.4347, "step": 161160 }, { "epoch": 0.32557359696505694, "grad_norm": 302.4512939453125, "learning_rate": 8.568208997368938e-06, "loss": 15.7047, "step": 161170 }, { "epoch": 0.32559379759774076, "grad_norm": 406.25714111328125, "learning_rate": 8.567964464275233e-06, "loss": 13.2557, "step": 161180 }, { "epoch": 0.3256139982304246, "grad_norm": 238.0476837158203, "learning_rate": 8.5677199137917e-06, "loss": 29.0175, "step": 161190 }, { "epoch": 0.3256341988631084, "grad_norm": 375.2989501953125, "learning_rate": 8.567475345919532e-06, "loss": 29.475, "step": 161200 }, { "epoch": 0.3256543994957922, "grad_norm": 277.81304931640625, "learning_rate": 8.567230760659924e-06, "loss": 15.3525, "step": 161210 }, { "epoch": 0.32567460012847604, "grad_norm": 177.0699462890625, "learning_rate": 8.566986158014065e-06, "loss": 17.9513, "step": 161220 }, { "epoch": 0.32569480076115986, "grad_norm": 441.58746337890625, "learning_rate": 8.566741537983147e-06, "loss": 18.4316, "step": 161230 }, { "epoch": 0.32571500139384363, "grad_norm": 800.390380859375, "learning_rate": 8.566496900568364e-06, "loss": 18.4686, "step": 161240 }, { "epoch": 0.32573520202652745, "grad_norm": 692.088623046875, "learning_rate": 8.56625224577091e-06, "loss": 18.8764, "step": 161250 }, { "epoch": 0.32575540265921127, "grad_norm": 679.6253662109375, "learning_rate": 8.566007573591972e-06, "loss": 13.6277, "step": 161260 }, { "epoch": 0.3257756032918951, "grad_norm": 403.29766845703125, "learning_rate": 8.565762884032747e-06, "loss": 20.9267, "step": 161270 }, { "epoch": 0.3257958039245789, "grad_norm": 545.2127075195312, "learning_rate": 8.565518177094425e-06, "loss": 25.598, "step": 161280 }, { "epoch": 0.32581600455726273, "grad_norm": 545.4293212890625, "learning_rate": 8.5652734527782e-06, "loss": 16.6594, "step": 161290 }, { "epoch": 0.32583620518994655, "grad_norm": 578.7387084960938, "learning_rate": 8.565028711085266e-06, "loss": 29.5076, "step": 161300 }, { "epoch": 0.32585640582263037, "grad_norm": 924.8513793945312, "learning_rate": 8.564783952016813e-06, "loss": 19.9818, "step": 161310 }, { "epoch": 0.3258766064553142, "grad_norm": 59.36968231201172, "learning_rate": 8.564539175574035e-06, "loss": 16.2665, "step": 161320 }, { "epoch": 0.325896807087998, "grad_norm": 536.894775390625, "learning_rate": 8.564294381758128e-06, "loss": 12.6896, "step": 161330 }, { "epoch": 0.32591700772068183, "grad_norm": 159.65232849121094, "learning_rate": 8.56404957057028e-06, "loss": 24.8131, "step": 161340 }, { "epoch": 0.32593720835336565, "grad_norm": 129.59710693359375, "learning_rate": 8.563804742011689e-06, "loss": 21.0008, "step": 161350 }, { "epoch": 0.32595740898604947, "grad_norm": 490.70501708984375, "learning_rate": 8.563559896083544e-06, "loss": 24.1454, "step": 161360 }, { "epoch": 0.32597760961873323, "grad_norm": 496.4228210449219, "learning_rate": 8.56331503278704e-06, "loss": 34.2492, "step": 161370 }, { "epoch": 0.32599781025141705, "grad_norm": 339.2381896972656, "learning_rate": 8.563070152123372e-06, "loss": 11.0371, "step": 161380 }, { "epoch": 0.3260180108841009, "grad_norm": 487.29449462890625, "learning_rate": 8.562825254093732e-06, "loss": 13.583, "step": 161390 }, { "epoch": 0.3260382115167847, "grad_norm": 442.0322570800781, "learning_rate": 8.562580338699313e-06, "loss": 28.2066, "step": 161400 }, { "epoch": 0.3260584121494685, "grad_norm": 523.257568359375, "learning_rate": 8.56233540594131e-06, "loss": 17.8132, "step": 161410 }, { "epoch": 0.32607861278215233, "grad_norm": 177.55528259277344, "learning_rate": 8.562090455820918e-06, "loss": 35.7883, "step": 161420 }, { "epoch": 0.32609881341483615, "grad_norm": 1586.7711181640625, "learning_rate": 8.561845488339327e-06, "loss": 30.0223, "step": 161430 }, { "epoch": 0.32611901404752, "grad_norm": 35.832515716552734, "learning_rate": 8.561600503497734e-06, "loss": 34.6228, "step": 161440 }, { "epoch": 0.3261392146802038, "grad_norm": 563.1609497070312, "learning_rate": 8.56135550129733e-06, "loss": 29.8364, "step": 161450 }, { "epoch": 0.3261594153128876, "grad_norm": 74.82799530029297, "learning_rate": 8.561110481739314e-06, "loss": 16.1139, "step": 161460 }, { "epoch": 0.32617961594557143, "grad_norm": 4.64946174621582, "learning_rate": 8.560865444824875e-06, "loss": 28.9766, "step": 161470 }, { "epoch": 0.32619981657825525, "grad_norm": 492.2882385253906, "learning_rate": 8.560620390555212e-06, "loss": 19.2768, "step": 161480 }, { "epoch": 0.3262200172109391, "grad_norm": 549.025146484375, "learning_rate": 8.560375318931517e-06, "loss": 13.379, "step": 161490 }, { "epoch": 0.32624021784362284, "grad_norm": 261.4835510253906, "learning_rate": 8.560130229954985e-06, "loss": 25.0308, "step": 161500 }, { "epoch": 0.32626041847630666, "grad_norm": 844.3115234375, "learning_rate": 8.559885123626806e-06, "loss": 22.5069, "step": 161510 }, { "epoch": 0.3262806191089905, "grad_norm": 349.45947265625, "learning_rate": 8.559639999948181e-06, "loss": 16.8482, "step": 161520 }, { "epoch": 0.3263008197416743, "grad_norm": 401.1449890136719, "learning_rate": 8.559394858920304e-06, "loss": 21.0563, "step": 161530 }, { "epoch": 0.3263210203743581, "grad_norm": 277.7603759765625, "learning_rate": 8.559149700544367e-06, "loss": 26.0529, "step": 161540 }, { "epoch": 0.32634122100704194, "grad_norm": 375.0739440917969, "learning_rate": 8.558904524821565e-06, "loss": 7.5684, "step": 161550 }, { "epoch": 0.32636142163972576, "grad_norm": 423.8526306152344, "learning_rate": 8.558659331753096e-06, "loss": 23.3725, "step": 161560 }, { "epoch": 0.3263816222724096, "grad_norm": 132.01666259765625, "learning_rate": 8.558414121340152e-06, "loss": 23.1224, "step": 161570 }, { "epoch": 0.3264018229050934, "grad_norm": 469.2595520019531, "learning_rate": 8.55816889358393e-06, "loss": 11.6602, "step": 161580 }, { "epoch": 0.3264220235377772, "grad_norm": 1435.6239013671875, "learning_rate": 8.557923648485622e-06, "loss": 32.211, "step": 161590 }, { "epoch": 0.32644222417046104, "grad_norm": 1065.9990234375, "learning_rate": 8.557678386046429e-06, "loss": 30.3401, "step": 161600 }, { "epoch": 0.32646242480314486, "grad_norm": 171.49520874023438, "learning_rate": 8.55743310626754e-06, "loss": 9.6927, "step": 161610 }, { "epoch": 0.3264826254358287, "grad_norm": 396.7803649902344, "learning_rate": 8.557187809150154e-06, "loss": 15.5282, "step": 161620 }, { "epoch": 0.32650282606851244, "grad_norm": 829.1217041015625, "learning_rate": 8.556942494695467e-06, "loss": 16.5586, "step": 161630 }, { "epoch": 0.32652302670119626, "grad_norm": 231.0962677001953, "learning_rate": 8.556697162904674e-06, "loss": 34.7648, "step": 161640 }, { "epoch": 0.3265432273338801, "grad_norm": 620.1940307617188, "learning_rate": 8.55645181377897e-06, "loss": 18.0748, "step": 161650 }, { "epoch": 0.3265634279665639, "grad_norm": 358.02386474609375, "learning_rate": 8.55620644731955e-06, "loss": 26.7937, "step": 161660 }, { "epoch": 0.3265836285992477, "grad_norm": 451.89056396484375, "learning_rate": 8.555961063527612e-06, "loss": 24.4252, "step": 161670 }, { "epoch": 0.32660382923193154, "grad_norm": 157.92124938964844, "learning_rate": 8.555715662404352e-06, "loss": 9.6293, "step": 161680 }, { "epoch": 0.32662402986461536, "grad_norm": 262.64215087890625, "learning_rate": 8.555470243950963e-06, "loss": 14.5127, "step": 161690 }, { "epoch": 0.3266442304972992, "grad_norm": 518.3018798828125, "learning_rate": 8.555224808168644e-06, "loss": 22.2709, "step": 161700 }, { "epoch": 0.326664431129983, "grad_norm": 341.222900390625, "learning_rate": 8.554979355058593e-06, "loss": 19.9243, "step": 161710 }, { "epoch": 0.3266846317626668, "grad_norm": 255.33712768554688, "learning_rate": 8.554733884622003e-06, "loss": 22.7051, "step": 161720 }, { "epoch": 0.32670483239535064, "grad_norm": 495.9708557128906, "learning_rate": 8.554488396860069e-06, "loss": 28.9186, "step": 161730 }, { "epoch": 0.32672503302803446, "grad_norm": 247.82550048828125, "learning_rate": 8.55424289177399e-06, "loss": 19.3723, "step": 161740 }, { "epoch": 0.3267452336607182, "grad_norm": 276.525634765625, "learning_rate": 8.553997369364964e-06, "loss": 18.7027, "step": 161750 }, { "epoch": 0.32676543429340205, "grad_norm": 4.652155876159668, "learning_rate": 8.553751829634184e-06, "loss": 13.5415, "step": 161760 }, { "epoch": 0.32678563492608587, "grad_norm": 67.46437072753906, "learning_rate": 8.55350627258285e-06, "loss": 20.1329, "step": 161770 }, { "epoch": 0.3268058355587697, "grad_norm": 367.9119873046875, "learning_rate": 8.553260698212156e-06, "loss": 13.7456, "step": 161780 }, { "epoch": 0.3268260361914535, "grad_norm": 565.5093383789062, "learning_rate": 8.5530151065233e-06, "loss": 29.1452, "step": 161790 }, { "epoch": 0.3268462368241373, "grad_norm": 883.661376953125, "learning_rate": 8.55276949751748e-06, "loss": 20.8828, "step": 161800 }, { "epoch": 0.32686643745682115, "grad_norm": 380.5515441894531, "learning_rate": 8.552523871195895e-06, "loss": 27.1904, "step": 161810 }, { "epoch": 0.32688663808950497, "grad_norm": 349.1186828613281, "learning_rate": 8.552278227559736e-06, "loss": 34.3681, "step": 161820 }, { "epoch": 0.3269068387221888, "grad_norm": 65.8606948852539, "learning_rate": 8.552032566610206e-06, "loss": 13.1881, "step": 161830 }, { "epoch": 0.3269270393548726, "grad_norm": 518.0609741210938, "learning_rate": 8.551786888348499e-06, "loss": 22.1906, "step": 161840 }, { "epoch": 0.3269472399875564, "grad_norm": 556.7715454101562, "learning_rate": 8.551541192775813e-06, "loss": 40.4598, "step": 161850 }, { "epoch": 0.32696744062024025, "grad_norm": 127.9832534790039, "learning_rate": 8.551295479893347e-06, "loss": 22.3926, "step": 161860 }, { "epoch": 0.32698764125292407, "grad_norm": 329.7506103515625, "learning_rate": 8.551049749702298e-06, "loss": 25.5638, "step": 161870 }, { "epoch": 0.32700784188560783, "grad_norm": 387.6360168457031, "learning_rate": 8.550804002203862e-06, "loss": 16.2738, "step": 161880 }, { "epoch": 0.32702804251829165, "grad_norm": 289.0902404785156, "learning_rate": 8.550558237399238e-06, "loss": 15.9065, "step": 161890 }, { "epoch": 0.32704824315097547, "grad_norm": 365.9407958984375, "learning_rate": 8.550312455289624e-06, "loss": 19.6126, "step": 161900 }, { "epoch": 0.3270684437836593, "grad_norm": 0.0, "learning_rate": 8.550066655876219e-06, "loss": 21.1061, "step": 161910 }, { "epoch": 0.3270886444163431, "grad_norm": 775.4854125976562, "learning_rate": 8.549820839160217e-06, "loss": 20.7549, "step": 161920 }, { "epoch": 0.32710884504902693, "grad_norm": 369.57037353515625, "learning_rate": 8.54957500514282e-06, "loss": 25.828, "step": 161930 }, { "epoch": 0.32712904568171075, "grad_norm": 518.6697387695312, "learning_rate": 8.549329153825226e-06, "loss": 15.5915, "step": 161940 }, { "epoch": 0.32714924631439457, "grad_norm": 683.3680419921875, "learning_rate": 8.549083285208632e-06, "loss": 18.4541, "step": 161950 }, { "epoch": 0.3271694469470784, "grad_norm": 341.146240234375, "learning_rate": 8.548837399294235e-06, "loss": 45.243, "step": 161960 }, { "epoch": 0.3271896475797622, "grad_norm": 262.21697998046875, "learning_rate": 8.548591496083236e-06, "loss": 27.1111, "step": 161970 }, { "epoch": 0.32720984821244603, "grad_norm": 400.15399169921875, "learning_rate": 8.548345575576832e-06, "loss": 33.4267, "step": 161980 }, { "epoch": 0.32723004884512985, "grad_norm": 596.1068725585938, "learning_rate": 8.548099637776222e-06, "loss": 16.6739, "step": 161990 }, { "epoch": 0.32725024947781367, "grad_norm": 304.2925720214844, "learning_rate": 8.547853682682605e-06, "loss": 11.4405, "step": 162000 }, { "epoch": 0.32727045011049744, "grad_norm": 210.50241088867188, "learning_rate": 8.54760771029718e-06, "loss": 15.9007, "step": 162010 }, { "epoch": 0.32729065074318126, "grad_norm": 563.9810791015625, "learning_rate": 8.547361720621144e-06, "loss": 25.1085, "step": 162020 }, { "epoch": 0.3273108513758651, "grad_norm": 838.3320922851562, "learning_rate": 8.547115713655698e-06, "loss": 21.7204, "step": 162030 }, { "epoch": 0.3273310520085489, "grad_norm": 238.96920776367188, "learning_rate": 8.546869689402042e-06, "loss": 16.9194, "step": 162040 }, { "epoch": 0.3273512526412327, "grad_norm": 187.7772979736328, "learning_rate": 8.54662364786137e-06, "loss": 17.0501, "step": 162050 }, { "epoch": 0.32737145327391654, "grad_norm": 10.696183204650879, "learning_rate": 8.546377589034886e-06, "loss": 28.5379, "step": 162060 }, { "epoch": 0.32739165390660036, "grad_norm": 600.3301391601562, "learning_rate": 8.546131512923787e-06, "loss": 33.8055, "step": 162070 }, { "epoch": 0.3274118545392842, "grad_norm": 326.2858581542969, "learning_rate": 8.545885419529276e-06, "loss": 21.1821, "step": 162080 }, { "epoch": 0.327432055171968, "grad_norm": 270.0640869140625, "learning_rate": 8.545639308852546e-06, "loss": 14.3184, "step": 162090 }, { "epoch": 0.3274522558046518, "grad_norm": 376.1778259277344, "learning_rate": 8.545393180894801e-06, "loss": 24.4655, "step": 162100 }, { "epoch": 0.32747245643733564, "grad_norm": 485.3133239746094, "learning_rate": 8.54514703565724e-06, "loss": 19.5701, "step": 162110 }, { "epoch": 0.32749265707001946, "grad_norm": 410.0458068847656, "learning_rate": 8.544900873141063e-06, "loss": 10.9461, "step": 162120 }, { "epoch": 0.3275128577027033, "grad_norm": 121.17286682128906, "learning_rate": 8.54465469334747e-06, "loss": 24.1511, "step": 162130 }, { "epoch": 0.32753305833538704, "grad_norm": 688.821044921875, "learning_rate": 8.544408496277657e-06, "loss": 16.0761, "step": 162140 }, { "epoch": 0.32755325896807086, "grad_norm": 969.4783325195312, "learning_rate": 8.544162281932829e-06, "loss": 37.9854, "step": 162150 }, { "epoch": 0.3275734596007547, "grad_norm": 184.92868041992188, "learning_rate": 8.543916050314182e-06, "loss": 21.9242, "step": 162160 }, { "epoch": 0.3275936602334385, "grad_norm": 447.2829895019531, "learning_rate": 8.54366980142292e-06, "loss": 11.3009, "step": 162170 }, { "epoch": 0.3276138608661223, "grad_norm": 169.4679412841797, "learning_rate": 8.54342353526024e-06, "loss": 17.2119, "step": 162180 }, { "epoch": 0.32763406149880614, "grad_norm": 312.2886047363281, "learning_rate": 8.543177251827344e-06, "loss": 21.3861, "step": 162190 }, { "epoch": 0.32765426213148996, "grad_norm": 499.69000244140625, "learning_rate": 8.542930951125432e-06, "loss": 18.3689, "step": 162200 }, { "epoch": 0.3276744627641738, "grad_norm": 365.1938171386719, "learning_rate": 8.542684633155703e-06, "loss": 15.5692, "step": 162210 }, { "epoch": 0.3276946633968576, "grad_norm": 348.21636962890625, "learning_rate": 8.54243829791936e-06, "loss": 26.9492, "step": 162220 }, { "epoch": 0.3277148640295414, "grad_norm": 347.29559326171875, "learning_rate": 8.5421919454176e-06, "loss": 17.5994, "step": 162230 }, { "epoch": 0.32773506466222524, "grad_norm": 616.3199462890625, "learning_rate": 8.54194557565163e-06, "loss": 11.9137, "step": 162240 }, { "epoch": 0.32775526529490906, "grad_norm": 262.7330627441406, "learning_rate": 8.541699188622645e-06, "loss": 15.68, "step": 162250 }, { "epoch": 0.3277754659275929, "grad_norm": 494.26934814453125, "learning_rate": 8.541452784331848e-06, "loss": 28.6482, "step": 162260 }, { "epoch": 0.32779566656027664, "grad_norm": 365.5330505371094, "learning_rate": 8.541206362780439e-06, "loss": 39.957, "step": 162270 }, { "epoch": 0.32781586719296046, "grad_norm": 423.7875671386719, "learning_rate": 8.54095992396962e-06, "loss": 16.2074, "step": 162280 }, { "epoch": 0.3278360678256443, "grad_norm": 551.8334350585938, "learning_rate": 8.540713467900592e-06, "loss": 21.7853, "step": 162290 }, { "epoch": 0.3278562684583281, "grad_norm": 70.32467651367188, "learning_rate": 8.540466994574556e-06, "loss": 27.0091, "step": 162300 }, { "epoch": 0.3278764690910119, "grad_norm": 479.802978515625, "learning_rate": 8.540220503992713e-06, "loss": 28.0744, "step": 162310 }, { "epoch": 0.32789666972369574, "grad_norm": 258.39971923828125, "learning_rate": 8.539973996156265e-06, "loss": 15.1333, "step": 162320 }, { "epoch": 0.32791687035637956, "grad_norm": 546.7047729492188, "learning_rate": 8.539727471066412e-06, "loss": 24.53, "step": 162330 }, { "epoch": 0.3279370709890634, "grad_norm": 489.63507080078125, "learning_rate": 8.539480928724358e-06, "loss": 19.637, "step": 162340 }, { "epoch": 0.3279572716217472, "grad_norm": 347.3206481933594, "learning_rate": 8.539234369131301e-06, "loss": 22.4533, "step": 162350 }, { "epoch": 0.327977472254431, "grad_norm": 297.5228271484375, "learning_rate": 8.538987792288447e-06, "loss": 16.6198, "step": 162360 }, { "epoch": 0.32799767288711484, "grad_norm": 430.5821228027344, "learning_rate": 8.538741198196996e-06, "loss": 17.2786, "step": 162370 }, { "epoch": 0.32801787351979866, "grad_norm": 663.9387817382812, "learning_rate": 8.53849458685815e-06, "loss": 19.893, "step": 162380 }, { "epoch": 0.32803807415248243, "grad_norm": 546.9205932617188, "learning_rate": 8.53824795827311e-06, "loss": 23.6222, "step": 162390 }, { "epoch": 0.32805827478516625, "grad_norm": 306.525146484375, "learning_rate": 8.538001312443078e-06, "loss": 20.6085, "step": 162400 }, { "epoch": 0.32807847541785007, "grad_norm": 255.8943328857422, "learning_rate": 8.537754649369256e-06, "loss": 18.243, "step": 162410 }, { "epoch": 0.3280986760505339, "grad_norm": 337.78399658203125, "learning_rate": 8.537507969052848e-06, "loss": 18.7949, "step": 162420 }, { "epoch": 0.3281188766832177, "grad_norm": 106.52259063720703, "learning_rate": 8.537261271495055e-06, "loss": 12.198, "step": 162430 }, { "epoch": 0.32813907731590153, "grad_norm": 191.0532684326172, "learning_rate": 8.537014556697078e-06, "loss": 17.4216, "step": 162440 }, { "epoch": 0.32815927794858535, "grad_norm": 462.94671630859375, "learning_rate": 8.536767824660124e-06, "loss": 22.73, "step": 162450 }, { "epoch": 0.32817947858126917, "grad_norm": 279.56524658203125, "learning_rate": 8.536521075385391e-06, "loss": 13.6735, "step": 162460 }, { "epoch": 0.328199679213953, "grad_norm": 579.7990112304688, "learning_rate": 8.536274308874083e-06, "loss": 19.308, "step": 162470 }, { "epoch": 0.3282198798466368, "grad_norm": 779.7994995117188, "learning_rate": 8.536027525127405e-06, "loss": 27.6808, "step": 162480 }, { "epoch": 0.32824008047932063, "grad_norm": 558.7512817382812, "learning_rate": 8.535780724146553e-06, "loss": 20.9931, "step": 162490 }, { "epoch": 0.32826028111200445, "grad_norm": 1386.6976318359375, "learning_rate": 8.535533905932739e-06, "loss": 45.5163, "step": 162500 }, { "epoch": 0.32828048174468827, "grad_norm": 331.6851501464844, "learning_rate": 8.53528707048716e-06, "loss": 13.4701, "step": 162510 }, { "epoch": 0.32830068237737203, "grad_norm": 233.7537078857422, "learning_rate": 8.535040217811019e-06, "loss": 15.4268, "step": 162520 }, { "epoch": 0.32832088301005585, "grad_norm": 261.6727294921875, "learning_rate": 8.534793347905523e-06, "loss": 17.069, "step": 162530 }, { "epoch": 0.3283410836427397, "grad_norm": 497.7231140136719, "learning_rate": 8.534546460771873e-06, "loss": 30.1122, "step": 162540 }, { "epoch": 0.3283612842754235, "grad_norm": 270.703125, "learning_rate": 8.534299556411272e-06, "loss": 17.4311, "step": 162550 }, { "epoch": 0.3283814849081073, "grad_norm": 343.0119323730469, "learning_rate": 8.534052634824923e-06, "loss": 20.4247, "step": 162560 }, { "epoch": 0.32840168554079113, "grad_norm": 327.3485107421875, "learning_rate": 8.53380569601403e-06, "loss": 12.5775, "step": 162570 }, { "epoch": 0.32842188617347495, "grad_norm": 22.53675079345703, "learning_rate": 8.533558739979796e-06, "loss": 17.3434, "step": 162580 }, { "epoch": 0.3284420868061588, "grad_norm": 609.8348999023438, "learning_rate": 8.533311766723428e-06, "loss": 38.1542, "step": 162590 }, { "epoch": 0.3284622874388426, "grad_norm": 569.4354858398438, "learning_rate": 8.533064776246126e-06, "loss": 14.9503, "step": 162600 }, { "epoch": 0.3284824880715264, "grad_norm": 344.6394958496094, "learning_rate": 8.532817768549092e-06, "loss": 17.9084, "step": 162610 }, { "epoch": 0.32850268870421023, "grad_norm": 312.6901550292969, "learning_rate": 8.532570743633535e-06, "loss": 44.5818, "step": 162620 }, { "epoch": 0.32852288933689405, "grad_norm": 134.35845947265625, "learning_rate": 8.532323701500657e-06, "loss": 13.8301, "step": 162630 }, { "epoch": 0.3285430899695779, "grad_norm": 392.0881042480469, "learning_rate": 8.532076642151661e-06, "loss": 25.9964, "step": 162640 }, { "epoch": 0.32856329060226164, "grad_norm": 316.1405944824219, "learning_rate": 8.531829565587751e-06, "loss": 20.5279, "step": 162650 }, { "epoch": 0.32858349123494546, "grad_norm": 292.3106384277344, "learning_rate": 8.531582471810134e-06, "loss": 22.3062, "step": 162660 }, { "epoch": 0.3286036918676293, "grad_norm": 419.2328186035156, "learning_rate": 8.53133536082001e-06, "loss": 22.9919, "step": 162670 }, { "epoch": 0.3286238925003131, "grad_norm": 467.255615234375, "learning_rate": 8.531088232618587e-06, "loss": 31.9226, "step": 162680 }, { "epoch": 0.3286440931329969, "grad_norm": 180.1269989013672, "learning_rate": 8.530841087207068e-06, "loss": 9.9781, "step": 162690 }, { "epoch": 0.32866429376568074, "grad_norm": 123.73636627197266, "learning_rate": 8.530593924586659e-06, "loss": 25.4351, "step": 162700 }, { "epoch": 0.32868449439836456, "grad_norm": 342.6778259277344, "learning_rate": 8.530346744758562e-06, "loss": 26.805, "step": 162710 }, { "epoch": 0.3287046950310484, "grad_norm": 573.797607421875, "learning_rate": 8.530099547723983e-06, "loss": 30.3572, "step": 162720 }, { "epoch": 0.3287248956637322, "grad_norm": 829.430908203125, "learning_rate": 8.529852333484129e-06, "loss": 20.8336, "step": 162730 }, { "epoch": 0.328745096296416, "grad_norm": 129.11514282226562, "learning_rate": 8.5296051020402e-06, "loss": 20.287, "step": 162740 }, { "epoch": 0.32876529692909984, "grad_norm": 403.94183349609375, "learning_rate": 8.529357853393406e-06, "loss": 22.2563, "step": 162750 }, { "epoch": 0.32878549756178366, "grad_norm": 205.53599548339844, "learning_rate": 8.52911058754495e-06, "loss": 18.6037, "step": 162760 }, { "epoch": 0.3288056981944675, "grad_norm": 288.7787170410156, "learning_rate": 8.528863304496035e-06, "loss": 22.3115, "step": 162770 }, { "epoch": 0.32882589882715124, "grad_norm": 734.8102416992188, "learning_rate": 8.528616004247869e-06, "loss": 18.1356, "step": 162780 }, { "epoch": 0.32884609945983506, "grad_norm": 598.1242065429688, "learning_rate": 8.528368686801656e-06, "loss": 25.302, "step": 162790 }, { "epoch": 0.3288663000925189, "grad_norm": 713.0995483398438, "learning_rate": 8.528121352158604e-06, "loss": 10.998, "step": 162800 }, { "epoch": 0.3288865007252027, "grad_norm": 232.38246154785156, "learning_rate": 8.527874000319915e-06, "loss": 13.865, "step": 162810 }, { "epoch": 0.3289067013578865, "grad_norm": 468.7928771972656, "learning_rate": 8.527626631286797e-06, "loss": 30.414, "step": 162820 }, { "epoch": 0.32892690199057034, "grad_norm": 212.81771850585938, "learning_rate": 8.527379245060453e-06, "loss": 15.9356, "step": 162830 }, { "epoch": 0.32894710262325416, "grad_norm": 252.7122039794922, "learning_rate": 8.527131841642092e-06, "loss": 20.9118, "step": 162840 }, { "epoch": 0.328967303255938, "grad_norm": 127.44690704345703, "learning_rate": 8.526884421032916e-06, "loss": 34.5894, "step": 162850 }, { "epoch": 0.3289875038886218, "grad_norm": 426.28131103515625, "learning_rate": 8.526636983234135e-06, "loss": 14.298, "step": 162860 }, { "epoch": 0.3290077045213056, "grad_norm": 450.6679992675781, "learning_rate": 8.526389528246955e-06, "loss": 17.171, "step": 162870 }, { "epoch": 0.32902790515398944, "grad_norm": 486.2570495605469, "learning_rate": 8.526142056072578e-06, "loss": 26.1525, "step": 162880 }, { "epoch": 0.32904810578667326, "grad_norm": 314.75201416015625, "learning_rate": 8.525894566712212e-06, "loss": 24.6402, "step": 162890 }, { "epoch": 0.3290683064193571, "grad_norm": 250.26100158691406, "learning_rate": 8.525647060167063e-06, "loss": 17.4215, "step": 162900 }, { "epoch": 0.32908850705204085, "grad_norm": 205.49232482910156, "learning_rate": 8.52539953643834e-06, "loss": 33.0014, "step": 162910 }, { "epoch": 0.32910870768472467, "grad_norm": 744.604248046875, "learning_rate": 8.525151995527244e-06, "loss": 19.6141, "step": 162920 }, { "epoch": 0.3291289083174085, "grad_norm": 805.7160034179688, "learning_rate": 8.524904437434986e-06, "loss": 34.4444, "step": 162930 }, { "epoch": 0.3291491089500923, "grad_norm": 334.0453186035156, "learning_rate": 8.524656862162773e-06, "loss": 22.3157, "step": 162940 }, { "epoch": 0.3291693095827761, "grad_norm": 1248.5230712890625, "learning_rate": 8.524409269711808e-06, "loss": 12.5765, "step": 162950 }, { "epoch": 0.32918951021545995, "grad_norm": 172.95550537109375, "learning_rate": 8.524161660083301e-06, "loss": 16.6586, "step": 162960 }, { "epoch": 0.32920971084814377, "grad_norm": 515.2111206054688, "learning_rate": 8.523914033278456e-06, "loss": 22.1315, "step": 162970 }, { "epoch": 0.3292299114808276, "grad_norm": 115.48587799072266, "learning_rate": 8.523666389298484e-06, "loss": 26.5629, "step": 162980 }, { "epoch": 0.3292501121135114, "grad_norm": 632.9249267578125, "learning_rate": 8.523418728144585e-06, "loss": 25.6933, "step": 162990 }, { "epoch": 0.3292703127461952, "grad_norm": 751.2723388671875, "learning_rate": 8.523171049817974e-06, "loss": 20.6223, "step": 163000 }, { "epoch": 0.32929051337887905, "grad_norm": 295.5430603027344, "learning_rate": 8.522923354319854e-06, "loss": 25.3509, "step": 163010 }, { "epoch": 0.32931071401156287, "grad_norm": 273.0199279785156, "learning_rate": 8.522675641651432e-06, "loss": 16.5398, "step": 163020 }, { "epoch": 0.32933091464424663, "grad_norm": 607.393310546875, "learning_rate": 8.522427911813917e-06, "loss": 20.1377, "step": 163030 }, { "epoch": 0.32935111527693045, "grad_norm": 541.272705078125, "learning_rate": 8.522180164808515e-06, "loss": 33.9776, "step": 163040 }, { "epoch": 0.32937131590961427, "grad_norm": 213.0469970703125, "learning_rate": 8.521932400636435e-06, "loss": 23.3719, "step": 163050 }, { "epoch": 0.3293915165422981, "grad_norm": 216.6061248779297, "learning_rate": 8.521684619298883e-06, "loss": 17.6476, "step": 163060 }, { "epoch": 0.3294117171749819, "grad_norm": 109.58318328857422, "learning_rate": 8.521436820797067e-06, "loss": 14.3388, "step": 163070 }, { "epoch": 0.32943191780766573, "grad_norm": 695.0380249023438, "learning_rate": 8.521189005132195e-06, "loss": 13.6592, "step": 163080 }, { "epoch": 0.32945211844034955, "grad_norm": 840.1543579101562, "learning_rate": 8.520941172305477e-06, "loss": 18.1814, "step": 163090 }, { "epoch": 0.32947231907303337, "grad_norm": 489.0300598144531, "learning_rate": 8.520693322318116e-06, "loss": 30.5703, "step": 163100 }, { "epoch": 0.3294925197057172, "grad_norm": 275.0984191894531, "learning_rate": 8.520445455171325e-06, "loss": 23.3886, "step": 163110 }, { "epoch": 0.329512720338401, "grad_norm": 702.0556640625, "learning_rate": 8.520197570866307e-06, "loss": 18.539, "step": 163120 }, { "epoch": 0.32953292097108483, "grad_norm": 23.86518096923828, "learning_rate": 8.519949669404275e-06, "loss": 24.436, "step": 163130 }, { "epoch": 0.32955312160376865, "grad_norm": 860.4483642578125, "learning_rate": 8.519701750786435e-06, "loss": 24.5196, "step": 163140 }, { "epoch": 0.32957332223645247, "grad_norm": 294.7730407714844, "learning_rate": 8.519453815013996e-06, "loss": 20.5409, "step": 163150 }, { "epoch": 0.32959352286913624, "grad_norm": 623.9278564453125, "learning_rate": 8.519205862088165e-06, "loss": 22.677, "step": 163160 }, { "epoch": 0.32961372350182006, "grad_norm": 552.2848510742188, "learning_rate": 8.518957892010151e-06, "loss": 18.9434, "step": 163170 }, { "epoch": 0.3296339241345039, "grad_norm": 199.6901092529297, "learning_rate": 8.518709904781163e-06, "loss": 24.925, "step": 163180 }, { "epoch": 0.3296541247671877, "grad_norm": 517.12060546875, "learning_rate": 8.518461900402411e-06, "loss": 20.4278, "step": 163190 }, { "epoch": 0.3296743253998715, "grad_norm": 1314.028076171875, "learning_rate": 8.518213878875103e-06, "loss": 27.2679, "step": 163200 }, { "epoch": 0.32969452603255534, "grad_norm": 183.17808532714844, "learning_rate": 8.517965840200445e-06, "loss": 36.9442, "step": 163210 }, { "epoch": 0.32971472666523916, "grad_norm": 2.3865418434143066, "learning_rate": 8.51771778437965e-06, "loss": 18.969, "step": 163220 }, { "epoch": 0.329734927297923, "grad_norm": 154.100341796875, "learning_rate": 8.517469711413924e-06, "loss": 17.4932, "step": 163230 }, { "epoch": 0.3297551279306068, "grad_norm": 197.871826171875, "learning_rate": 8.517221621304479e-06, "loss": 13.5748, "step": 163240 }, { "epoch": 0.3297753285632906, "grad_norm": 14.94884967803955, "learning_rate": 8.51697351405252e-06, "loss": 19.712, "step": 163250 }, { "epoch": 0.32979552919597444, "grad_norm": 412.6863098144531, "learning_rate": 8.51672538965926e-06, "loss": 22.0908, "step": 163260 }, { "epoch": 0.32981572982865826, "grad_norm": 441.4495849609375, "learning_rate": 8.516477248125907e-06, "loss": 23.1763, "step": 163270 }, { "epoch": 0.3298359304613421, "grad_norm": 216.23782348632812, "learning_rate": 8.51622908945367e-06, "loss": 21.2779, "step": 163280 }, { "epoch": 0.32985613109402584, "grad_norm": 357.8020935058594, "learning_rate": 8.515980913643759e-06, "loss": 19.6541, "step": 163290 }, { "epoch": 0.32987633172670966, "grad_norm": 312.1048889160156, "learning_rate": 8.515732720697383e-06, "loss": 23.176, "step": 163300 }, { "epoch": 0.3298965323593935, "grad_norm": 408.1529541015625, "learning_rate": 8.515484510615753e-06, "loss": 20.2293, "step": 163310 }, { "epoch": 0.3299167329920773, "grad_norm": 10.482206344604492, "learning_rate": 8.515236283400078e-06, "loss": 19.5709, "step": 163320 }, { "epoch": 0.3299369336247611, "grad_norm": 490.29095458984375, "learning_rate": 8.514988039051567e-06, "loss": 17.5755, "step": 163330 }, { "epoch": 0.32995713425744494, "grad_norm": 792.8616943359375, "learning_rate": 8.514739777571431e-06, "loss": 25.364, "step": 163340 }, { "epoch": 0.32997733489012876, "grad_norm": 519.2301635742188, "learning_rate": 8.51449149896088e-06, "loss": 30.0556, "step": 163350 }, { "epoch": 0.3299975355228126, "grad_norm": 14.800172805786133, "learning_rate": 8.514243203221124e-06, "loss": 32.3125, "step": 163360 }, { "epoch": 0.3300177361554964, "grad_norm": 486.00164794921875, "learning_rate": 8.51399489035337e-06, "loss": 29.4682, "step": 163370 }, { "epoch": 0.3300379367881802, "grad_norm": 387.1175537109375, "learning_rate": 8.513746560358833e-06, "loss": 24.6668, "step": 163380 }, { "epoch": 0.33005813742086404, "grad_norm": 583.5296630859375, "learning_rate": 8.513498213238722e-06, "loss": 24.8786, "step": 163390 }, { "epoch": 0.33007833805354786, "grad_norm": 1140.5289306640625, "learning_rate": 8.513249848994248e-06, "loss": 33.8562, "step": 163400 }, { "epoch": 0.3300985386862317, "grad_norm": 535.426513671875, "learning_rate": 8.513001467626618e-06, "loss": 33.8145, "step": 163410 }, { "epoch": 0.33011873931891544, "grad_norm": 613.3733520507812, "learning_rate": 8.512753069137046e-06, "loss": 33.113, "step": 163420 }, { "epoch": 0.33013893995159926, "grad_norm": 167.0186309814453, "learning_rate": 8.51250465352674e-06, "loss": 12.3463, "step": 163430 }, { "epoch": 0.3301591405842831, "grad_norm": 349.2598876953125, "learning_rate": 8.512256220796915e-06, "loss": 31.2951, "step": 163440 }, { "epoch": 0.3301793412169669, "grad_norm": 495.67828369140625, "learning_rate": 8.512007770948775e-06, "loss": 24.4958, "step": 163450 }, { "epoch": 0.3301995418496507, "grad_norm": 21.19040298461914, "learning_rate": 8.51175930398354e-06, "loss": 24.4859, "step": 163460 }, { "epoch": 0.33021974248233454, "grad_norm": 748.869140625, "learning_rate": 8.511510819902413e-06, "loss": 38.5311, "step": 163470 }, { "epoch": 0.33023994311501836, "grad_norm": 410.0790710449219, "learning_rate": 8.51126231870661e-06, "loss": 15.1447, "step": 163480 }, { "epoch": 0.3302601437477022, "grad_norm": 975.2960815429688, "learning_rate": 8.511013800397338e-06, "loss": 18.7852, "step": 163490 }, { "epoch": 0.330280344380386, "grad_norm": 518.7819213867188, "learning_rate": 8.510765264975813e-06, "loss": 28.029, "step": 163500 }, { "epoch": 0.3303005450130698, "grad_norm": 546.6959228515625, "learning_rate": 8.510516712443244e-06, "loss": 13.1513, "step": 163510 }, { "epoch": 0.33032074564575364, "grad_norm": 664.11376953125, "learning_rate": 8.51026814280084e-06, "loss": 31.3736, "step": 163520 }, { "epoch": 0.33034094627843746, "grad_norm": 534.3417358398438, "learning_rate": 8.510019556049815e-06, "loss": 24.8484, "step": 163530 }, { "epoch": 0.3303611469111213, "grad_norm": 530.6990356445312, "learning_rate": 8.509770952191384e-06, "loss": 23.2029, "step": 163540 }, { "epoch": 0.33038134754380505, "grad_norm": 471.0072937011719, "learning_rate": 8.509522331226751e-06, "loss": 49.6587, "step": 163550 }, { "epoch": 0.33040154817648887, "grad_norm": 452.3913269042969, "learning_rate": 8.509273693157133e-06, "loss": 22.6263, "step": 163560 }, { "epoch": 0.3304217488091727, "grad_norm": 331.334716796875, "learning_rate": 8.509025037983742e-06, "loss": 25.4802, "step": 163570 }, { "epoch": 0.3304419494418565, "grad_norm": 250.43438720703125, "learning_rate": 8.508776365707788e-06, "loss": 23.6728, "step": 163580 }, { "epoch": 0.33046215007454033, "grad_norm": 1079.7852783203125, "learning_rate": 8.508527676330483e-06, "loss": 32.216, "step": 163590 }, { "epoch": 0.33048235070722415, "grad_norm": 1053.2109375, "learning_rate": 8.508278969853037e-06, "loss": 27.0024, "step": 163600 }, { "epoch": 0.33050255133990797, "grad_norm": 162.0265655517578, "learning_rate": 8.508030246276668e-06, "loss": 22.319, "step": 163610 }, { "epoch": 0.3305227519725918, "grad_norm": 8.137332916259766, "learning_rate": 8.507781505602585e-06, "loss": 24.311, "step": 163620 }, { "epoch": 0.3305429526052756, "grad_norm": 190.8646697998047, "learning_rate": 8.507532747832e-06, "loss": 27.7262, "step": 163630 }, { "epoch": 0.33056315323795943, "grad_norm": 118.73406219482422, "learning_rate": 8.507283972966126e-06, "loss": 17.7927, "step": 163640 }, { "epoch": 0.33058335387064325, "grad_norm": 303.9774169921875, "learning_rate": 8.507035181006175e-06, "loss": 21.1845, "step": 163650 }, { "epoch": 0.33060355450332707, "grad_norm": 280.1281433105469, "learning_rate": 8.50678637195336e-06, "loss": 30.4494, "step": 163660 }, { "epoch": 0.33062375513601083, "grad_norm": 925.5298461914062, "learning_rate": 8.506537545808894e-06, "loss": 30.8261, "step": 163670 }, { "epoch": 0.33064395576869465, "grad_norm": 0.0, "learning_rate": 8.506288702573988e-06, "loss": 22.4596, "step": 163680 }, { "epoch": 0.3306641564013785, "grad_norm": 592.17578125, "learning_rate": 8.506039842249855e-06, "loss": 19.7413, "step": 163690 }, { "epoch": 0.3306843570340623, "grad_norm": 941.7138671875, "learning_rate": 8.505790964837712e-06, "loss": 23.3322, "step": 163700 }, { "epoch": 0.3307045576667461, "grad_norm": 495.9679260253906, "learning_rate": 8.505542070338768e-06, "loss": 27.4565, "step": 163710 }, { "epoch": 0.33072475829942993, "grad_norm": 507.7092590332031, "learning_rate": 8.505293158754238e-06, "loss": 22.9482, "step": 163720 }, { "epoch": 0.33074495893211375, "grad_norm": 1086.2271728515625, "learning_rate": 8.505044230085332e-06, "loss": 23.6799, "step": 163730 }, { "epoch": 0.3307651595647976, "grad_norm": 212.30029296875, "learning_rate": 8.504795284333267e-06, "loss": 25.285, "step": 163740 }, { "epoch": 0.3307853601974814, "grad_norm": 239.4369659423828, "learning_rate": 8.504546321499255e-06, "loss": 35.6713, "step": 163750 }, { "epoch": 0.3308055608301652, "grad_norm": 417.925537109375, "learning_rate": 8.504297341584509e-06, "loss": 22.9194, "step": 163760 }, { "epoch": 0.33082576146284903, "grad_norm": 309.2572021484375, "learning_rate": 8.504048344590243e-06, "loss": 12.4214, "step": 163770 }, { "epoch": 0.33084596209553285, "grad_norm": 400.84893798828125, "learning_rate": 8.50379933051767e-06, "loss": 7.8366, "step": 163780 }, { "epoch": 0.3308661627282167, "grad_norm": 82.74517059326172, "learning_rate": 8.503550299368004e-06, "loss": 24.8063, "step": 163790 }, { "epoch": 0.33088636336090044, "grad_norm": 271.88836669921875, "learning_rate": 8.50330125114246e-06, "loss": 23.0029, "step": 163800 }, { "epoch": 0.33090656399358426, "grad_norm": 677.6578979492188, "learning_rate": 8.50305218584225e-06, "loss": 39.7992, "step": 163810 }, { "epoch": 0.3309267646262681, "grad_norm": 176.89434814453125, "learning_rate": 8.502803103468587e-06, "loss": 14.4343, "step": 163820 }, { "epoch": 0.3309469652589519, "grad_norm": 1424.477783203125, "learning_rate": 8.502554004022688e-06, "loss": 27.7567, "step": 163830 }, { "epoch": 0.3309671658916357, "grad_norm": 343.5373229980469, "learning_rate": 8.502304887505765e-06, "loss": 14.9989, "step": 163840 }, { "epoch": 0.33098736652431954, "grad_norm": 503.44097900390625, "learning_rate": 8.502055753919033e-06, "loss": 22.4749, "step": 163850 }, { "epoch": 0.33100756715700336, "grad_norm": 376.5154113769531, "learning_rate": 8.501806603263706e-06, "loss": 21.681, "step": 163860 }, { "epoch": 0.3310277677896872, "grad_norm": 532.5528564453125, "learning_rate": 8.501557435540996e-06, "loss": 23.0162, "step": 163870 }, { "epoch": 0.331047968422371, "grad_norm": 733.7446899414062, "learning_rate": 8.501308250752123e-06, "loss": 34.5138, "step": 163880 }, { "epoch": 0.3310681690550548, "grad_norm": 153.69967651367188, "learning_rate": 8.501059048898297e-06, "loss": 16.3836, "step": 163890 }, { "epoch": 0.33108836968773864, "grad_norm": 593.3562622070312, "learning_rate": 8.500809829980734e-06, "loss": 16.1941, "step": 163900 }, { "epoch": 0.33110857032042246, "grad_norm": 42.10019302368164, "learning_rate": 8.50056059400065e-06, "loss": 23.1775, "step": 163910 }, { "epoch": 0.3311287709531063, "grad_norm": 150.84754943847656, "learning_rate": 8.500311340959256e-06, "loss": 23.459, "step": 163920 }, { "epoch": 0.33114897158579004, "grad_norm": 130.12181091308594, "learning_rate": 8.500062070857772e-06, "loss": 19.3963, "step": 163930 }, { "epoch": 0.33116917221847386, "grad_norm": 248.83938598632812, "learning_rate": 8.499812783697406e-06, "loss": 20.3013, "step": 163940 }, { "epoch": 0.3311893728511577, "grad_norm": 205.4557647705078, "learning_rate": 8.499563479479378e-06, "loss": 30.7546, "step": 163950 }, { "epoch": 0.3312095734838415, "grad_norm": 629.36083984375, "learning_rate": 8.499314158204904e-06, "loss": 28.0496, "step": 163960 }, { "epoch": 0.3312297741165253, "grad_norm": 510.4541320800781, "learning_rate": 8.499064819875195e-06, "loss": 18.2752, "step": 163970 }, { "epoch": 0.33124997474920914, "grad_norm": 407.316650390625, "learning_rate": 8.49881546449147e-06, "loss": 15.6284, "step": 163980 }, { "epoch": 0.33127017538189296, "grad_norm": 278.81414794921875, "learning_rate": 8.498566092054943e-06, "loss": 16.8428, "step": 163990 }, { "epoch": 0.3312903760145768, "grad_norm": 581.9839477539062, "learning_rate": 8.498316702566828e-06, "loss": 30.8934, "step": 164000 }, { "epoch": 0.3313105766472606, "grad_norm": 669.310791015625, "learning_rate": 8.498067296028343e-06, "loss": 28.5954, "step": 164010 }, { "epoch": 0.3313307772799444, "grad_norm": 514.7133178710938, "learning_rate": 8.497817872440702e-06, "loss": 19.1162, "step": 164020 }, { "epoch": 0.33135097791262824, "grad_norm": 233.59666442871094, "learning_rate": 8.497568431805118e-06, "loss": 7.1543, "step": 164030 }, { "epoch": 0.33137117854531206, "grad_norm": 208.81288146972656, "learning_rate": 8.497318974122813e-06, "loss": 13.3095, "step": 164040 }, { "epoch": 0.3313913791779959, "grad_norm": 480.57177734375, "learning_rate": 8.497069499394998e-06, "loss": 19.3028, "step": 164050 }, { "epoch": 0.33141157981067965, "grad_norm": 521.1884155273438, "learning_rate": 8.496820007622891e-06, "loss": 16.78, "step": 164060 }, { "epoch": 0.33143178044336347, "grad_norm": 520.5380859375, "learning_rate": 8.496570498807708e-06, "loss": 24.3755, "step": 164070 }, { "epoch": 0.3314519810760473, "grad_norm": 447.192138671875, "learning_rate": 8.496320972950663e-06, "loss": 19.0259, "step": 164080 }, { "epoch": 0.3314721817087311, "grad_norm": 283.7176818847656, "learning_rate": 8.496071430052975e-06, "loss": 21.2698, "step": 164090 }, { "epoch": 0.3314923823414149, "grad_norm": 1076.1070556640625, "learning_rate": 8.495821870115857e-06, "loss": 31.0239, "step": 164100 }, { "epoch": 0.33151258297409875, "grad_norm": 496.1323547363281, "learning_rate": 8.49557229314053e-06, "loss": 20.0498, "step": 164110 }, { "epoch": 0.33153278360678257, "grad_norm": 455.780517578125, "learning_rate": 8.495322699128206e-06, "loss": 19.5465, "step": 164120 }, { "epoch": 0.3315529842394664, "grad_norm": 568.5023803710938, "learning_rate": 8.495073088080102e-06, "loss": 20.3758, "step": 164130 }, { "epoch": 0.3315731848721502, "grad_norm": 200.53025817871094, "learning_rate": 8.494823459997437e-06, "loss": 25.7012, "step": 164140 }, { "epoch": 0.331593385504834, "grad_norm": 264.4395446777344, "learning_rate": 8.494573814881426e-06, "loss": 18.4623, "step": 164150 }, { "epoch": 0.33161358613751785, "grad_norm": 324.6714782714844, "learning_rate": 8.494324152733286e-06, "loss": 22.9645, "step": 164160 }, { "epoch": 0.33163378677020167, "grad_norm": 193.10011291503906, "learning_rate": 8.494074473554235e-06, "loss": 18.749, "step": 164170 }, { "epoch": 0.33165398740288543, "grad_norm": 260.4193420410156, "learning_rate": 8.493824777345487e-06, "loss": 20.2552, "step": 164180 }, { "epoch": 0.33167418803556925, "grad_norm": 339.6500244140625, "learning_rate": 8.493575064108262e-06, "loss": 24.2752, "step": 164190 }, { "epoch": 0.33169438866825307, "grad_norm": 147.78736877441406, "learning_rate": 8.493325333843776e-06, "loss": 15.3503, "step": 164200 }, { "epoch": 0.3317145893009369, "grad_norm": 47.65892791748047, "learning_rate": 8.493075586553245e-06, "loss": 20.568, "step": 164210 }, { "epoch": 0.3317347899336207, "grad_norm": 295.9309997558594, "learning_rate": 8.492825822237888e-06, "loss": 52.0664, "step": 164220 }, { "epoch": 0.33175499056630453, "grad_norm": 492.0908508300781, "learning_rate": 8.492576040898921e-06, "loss": 16.3666, "step": 164230 }, { "epoch": 0.33177519119898835, "grad_norm": 1002.5883178710938, "learning_rate": 8.492326242537564e-06, "loss": 17.0254, "step": 164240 }, { "epoch": 0.33179539183167217, "grad_norm": 499.7936706542969, "learning_rate": 8.492076427155031e-06, "loss": 34.3492, "step": 164250 }, { "epoch": 0.331815592464356, "grad_norm": 398.0194396972656, "learning_rate": 8.49182659475254e-06, "loss": 12.2671, "step": 164260 }, { "epoch": 0.3318357930970398, "grad_norm": 602.7805786132812, "learning_rate": 8.491576745331312e-06, "loss": 22.4832, "step": 164270 }, { "epoch": 0.33185599372972363, "grad_norm": 385.6957702636719, "learning_rate": 8.49132687889256e-06, "loss": 15.002, "step": 164280 }, { "epoch": 0.33187619436240745, "grad_norm": 53.15830993652344, "learning_rate": 8.491076995437504e-06, "loss": 28.6947, "step": 164290 }, { "epoch": 0.33189639499509127, "grad_norm": 110.26107788085938, "learning_rate": 8.490827094967364e-06, "loss": 20.2564, "step": 164300 }, { "epoch": 0.33191659562777504, "grad_norm": 401.9669494628906, "learning_rate": 8.490577177483357e-06, "loss": 22.4905, "step": 164310 }, { "epoch": 0.33193679626045886, "grad_norm": 373.6098327636719, "learning_rate": 8.490327242986698e-06, "loss": 19.2917, "step": 164320 }, { "epoch": 0.3319569968931427, "grad_norm": 893.7239379882812, "learning_rate": 8.490077291478607e-06, "loss": 20.567, "step": 164330 }, { "epoch": 0.3319771975258265, "grad_norm": 160.93667602539062, "learning_rate": 8.489827322960305e-06, "loss": 11.6975, "step": 164340 }, { "epoch": 0.3319973981585103, "grad_norm": 391.2195739746094, "learning_rate": 8.489577337433006e-06, "loss": 18.3815, "step": 164350 }, { "epoch": 0.33201759879119414, "grad_norm": 228.7294921875, "learning_rate": 8.48932733489793e-06, "loss": 26.4221, "step": 164360 }, { "epoch": 0.33203779942387796, "grad_norm": 462.6561584472656, "learning_rate": 8.489077315356297e-06, "loss": 17.3625, "step": 164370 }, { "epoch": 0.3320580000565618, "grad_norm": 372.6870422363281, "learning_rate": 8.488827278809324e-06, "loss": 28.4132, "step": 164380 }, { "epoch": 0.3320782006892456, "grad_norm": 736.1632690429688, "learning_rate": 8.48857722525823e-06, "loss": 27.4231, "step": 164390 }, { "epoch": 0.3320984013219294, "grad_norm": 276.06182861328125, "learning_rate": 8.488327154704232e-06, "loss": 22.3278, "step": 164400 }, { "epoch": 0.33211860195461324, "grad_norm": 311.4897766113281, "learning_rate": 8.488077067148554e-06, "loss": 11.3768, "step": 164410 }, { "epoch": 0.33213880258729706, "grad_norm": 979.6168212890625, "learning_rate": 8.487826962592409e-06, "loss": 14.5317, "step": 164420 }, { "epoch": 0.3321590032199809, "grad_norm": 756.59423828125, "learning_rate": 8.487576841037019e-06, "loss": 16.5986, "step": 164430 }, { "epoch": 0.33217920385266464, "grad_norm": 124.56269073486328, "learning_rate": 8.487326702483602e-06, "loss": 53.1876, "step": 164440 }, { "epoch": 0.33219940448534846, "grad_norm": 367.2579650878906, "learning_rate": 8.487076546933378e-06, "loss": 18.0344, "step": 164450 }, { "epoch": 0.3322196051180323, "grad_norm": 414.42083740234375, "learning_rate": 8.486826374387568e-06, "loss": 22.6892, "step": 164460 }, { "epoch": 0.3322398057507161, "grad_norm": 905.2984008789062, "learning_rate": 8.486576184847386e-06, "loss": 29.1753, "step": 164470 }, { "epoch": 0.3322600063833999, "grad_norm": 559.3729248046875, "learning_rate": 8.486325978314054e-06, "loss": 20.0122, "step": 164480 }, { "epoch": 0.33228020701608374, "grad_norm": 675.530029296875, "learning_rate": 8.486075754788794e-06, "loss": 17.5078, "step": 164490 }, { "epoch": 0.33230040764876756, "grad_norm": 492.83636474609375, "learning_rate": 8.485825514272824e-06, "loss": 34.6663, "step": 164500 }, { "epoch": 0.3323206082814514, "grad_norm": 384.5464782714844, "learning_rate": 8.485575256767362e-06, "loss": 29.0318, "step": 164510 }, { "epoch": 0.3323408089141352, "grad_norm": 579.1456909179688, "learning_rate": 8.48532498227363e-06, "loss": 12.1218, "step": 164520 }, { "epoch": 0.332361009546819, "grad_norm": 186.2333526611328, "learning_rate": 8.485074690792845e-06, "loss": 14.7983, "step": 164530 }, { "epoch": 0.33238121017950284, "grad_norm": 2662.985595703125, "learning_rate": 8.484824382326232e-06, "loss": 29.8661, "step": 164540 }, { "epoch": 0.33240141081218666, "grad_norm": 384.2058410644531, "learning_rate": 8.484574056875004e-06, "loss": 27.3167, "step": 164550 }, { "epoch": 0.3324216114448705, "grad_norm": 386.95574951171875, "learning_rate": 8.484323714440386e-06, "loss": 42.5152, "step": 164560 }, { "epoch": 0.33244181207755424, "grad_norm": 973.225341796875, "learning_rate": 8.484073355023597e-06, "loss": 25.1114, "step": 164570 }, { "epoch": 0.33246201271023806, "grad_norm": 542.1936645507812, "learning_rate": 8.483822978625855e-06, "loss": 19.0829, "step": 164580 }, { "epoch": 0.3324822133429219, "grad_norm": 357.5818786621094, "learning_rate": 8.483572585248385e-06, "loss": 12.3465, "step": 164590 }, { "epoch": 0.3325024139756057, "grad_norm": 277.9449157714844, "learning_rate": 8.483322174892404e-06, "loss": 16.4891, "step": 164600 }, { "epoch": 0.3325226146082895, "grad_norm": 291.3374938964844, "learning_rate": 8.483071747559133e-06, "loss": 14.9676, "step": 164610 }, { "epoch": 0.33254281524097334, "grad_norm": 166.30682373046875, "learning_rate": 8.482821303249793e-06, "loss": 25.2423, "step": 164620 }, { "epoch": 0.33256301587365716, "grad_norm": 341.4237060546875, "learning_rate": 8.482570841965605e-06, "loss": 19.0733, "step": 164630 }, { "epoch": 0.332583216506341, "grad_norm": 285.921875, "learning_rate": 8.482320363707787e-06, "loss": 18.954, "step": 164640 }, { "epoch": 0.3326034171390248, "grad_norm": 873.6636352539062, "learning_rate": 8.482069868477565e-06, "loss": 14.2361, "step": 164650 }, { "epoch": 0.3326236177717086, "grad_norm": 671.779296875, "learning_rate": 8.481819356276155e-06, "loss": 31.5424, "step": 164660 }, { "epoch": 0.33264381840439244, "grad_norm": 615.38623046875, "learning_rate": 8.481568827104779e-06, "loss": 18.1736, "step": 164670 }, { "epoch": 0.33266401903707626, "grad_norm": 446.8658142089844, "learning_rate": 8.481318280964661e-06, "loss": 21.8833, "step": 164680 }, { "epoch": 0.3326842196697601, "grad_norm": 521.0661010742188, "learning_rate": 8.481067717857017e-06, "loss": 20.2418, "step": 164690 }, { "epoch": 0.33270442030244385, "grad_norm": 426.0536804199219, "learning_rate": 8.480817137783073e-06, "loss": 16.9004, "step": 164700 }, { "epoch": 0.33272462093512767, "grad_norm": 625.370849609375, "learning_rate": 8.480566540744048e-06, "loss": 17.6258, "step": 164710 }, { "epoch": 0.3327448215678115, "grad_norm": 0.0, "learning_rate": 8.480315926741165e-06, "loss": 34.8818, "step": 164720 }, { "epoch": 0.3327650222004953, "grad_norm": 641.9622802734375, "learning_rate": 8.480065295775643e-06, "loss": 26.7894, "step": 164730 }, { "epoch": 0.33278522283317913, "grad_norm": 624.4006958007812, "learning_rate": 8.479814647848706e-06, "loss": 16.9639, "step": 164740 }, { "epoch": 0.33280542346586295, "grad_norm": 365.9395446777344, "learning_rate": 8.479563982961572e-06, "loss": 36.2082, "step": 164750 }, { "epoch": 0.33282562409854677, "grad_norm": 320.30950927734375, "learning_rate": 8.479313301115467e-06, "loss": 13.5921, "step": 164760 }, { "epoch": 0.3328458247312306, "grad_norm": 555.3203735351562, "learning_rate": 8.479062602311611e-06, "loss": 16.8221, "step": 164770 }, { "epoch": 0.3328660253639144, "grad_norm": 91.21572875976562, "learning_rate": 8.478811886551226e-06, "loss": 24.735, "step": 164780 }, { "epoch": 0.33288622599659823, "grad_norm": 638.2918090820312, "learning_rate": 8.478561153835532e-06, "loss": 19.5226, "step": 164790 }, { "epoch": 0.33290642662928205, "grad_norm": 511.9052734375, "learning_rate": 8.478310404165756e-06, "loss": 29.554, "step": 164800 }, { "epoch": 0.33292662726196587, "grad_norm": 398.52520751953125, "learning_rate": 8.478059637543114e-06, "loss": 43.5895, "step": 164810 }, { "epoch": 0.33294682789464963, "grad_norm": 464.9781494140625, "learning_rate": 8.477808853968831e-06, "loss": 20.1277, "step": 164820 }, { "epoch": 0.33296702852733345, "grad_norm": 225.42550659179688, "learning_rate": 8.477558053444133e-06, "loss": 17.408, "step": 164830 }, { "epoch": 0.3329872291600173, "grad_norm": 319.70770263671875, "learning_rate": 8.477307235970235e-06, "loss": 23.274, "step": 164840 }, { "epoch": 0.3330074297927011, "grad_norm": 159.54104614257812, "learning_rate": 8.477056401548364e-06, "loss": 37.0467, "step": 164850 }, { "epoch": 0.3330276304253849, "grad_norm": 602.1248168945312, "learning_rate": 8.476805550179743e-06, "loss": 15.3591, "step": 164860 }, { "epoch": 0.33304783105806873, "grad_norm": 504.2489013671875, "learning_rate": 8.476554681865594e-06, "loss": 26.6329, "step": 164870 }, { "epoch": 0.33306803169075255, "grad_norm": 635.47265625, "learning_rate": 8.476303796607138e-06, "loss": 16.9301, "step": 164880 }, { "epoch": 0.3330882323234364, "grad_norm": 525.6773071289062, "learning_rate": 8.4760528944056e-06, "loss": 19.0034, "step": 164890 }, { "epoch": 0.3331084329561202, "grad_norm": 438.91815185546875, "learning_rate": 8.4758019752622e-06, "loss": 20.7083, "step": 164900 }, { "epoch": 0.333128633588804, "grad_norm": 592.015380859375, "learning_rate": 8.475551039178164e-06, "loss": 22.1569, "step": 164910 }, { "epoch": 0.33314883422148783, "grad_norm": 538.2504272460938, "learning_rate": 8.475300086154714e-06, "loss": 41.7083, "step": 164920 }, { "epoch": 0.33316903485417165, "grad_norm": 916.0399780273438, "learning_rate": 8.475049116193071e-06, "loss": 41.4224, "step": 164930 }, { "epoch": 0.3331892354868555, "grad_norm": 221.72230529785156, "learning_rate": 8.474798129294462e-06, "loss": 29.0907, "step": 164940 }, { "epoch": 0.33320943611953924, "grad_norm": 10.00229549407959, "learning_rate": 8.474547125460108e-06, "loss": 19.1066, "step": 164950 }, { "epoch": 0.33322963675222306, "grad_norm": 425.7928466796875, "learning_rate": 8.474296104691231e-06, "loss": 44.4712, "step": 164960 }, { "epoch": 0.3332498373849069, "grad_norm": 19.716455459594727, "learning_rate": 8.474045066989058e-06, "loss": 20.1471, "step": 164970 }, { "epoch": 0.3332700380175907, "grad_norm": 365.4695129394531, "learning_rate": 8.47379401235481e-06, "loss": 35.0631, "step": 164980 }, { "epoch": 0.3332902386502745, "grad_norm": 397.28033447265625, "learning_rate": 8.473542940789712e-06, "loss": 19.0947, "step": 164990 }, { "epoch": 0.33331043928295834, "grad_norm": 615.3440551757812, "learning_rate": 8.473291852294986e-06, "loss": 19.4807, "step": 165000 }, { "epoch": 0.33333063991564216, "grad_norm": 148.29989624023438, "learning_rate": 8.47304074687186e-06, "loss": 15.1729, "step": 165010 }, { "epoch": 0.333350840548326, "grad_norm": 535.3994140625, "learning_rate": 8.472789624521552e-06, "loss": 34.4777, "step": 165020 }, { "epoch": 0.3333710411810098, "grad_norm": 182.21981811523438, "learning_rate": 8.472538485245287e-06, "loss": 39.2378, "step": 165030 }, { "epoch": 0.3333912418136936, "grad_norm": 554.3379516601562, "learning_rate": 8.472287329044292e-06, "loss": 14.5789, "step": 165040 }, { "epoch": 0.33341144244637744, "grad_norm": 155.79673767089844, "learning_rate": 8.47203615591979e-06, "loss": 19.0935, "step": 165050 }, { "epoch": 0.33343164307906126, "grad_norm": 305.8067321777344, "learning_rate": 8.471784965873005e-06, "loss": 17.3382, "step": 165060 }, { "epoch": 0.3334518437117451, "grad_norm": 404.5699157714844, "learning_rate": 8.471533758905161e-06, "loss": 11.7331, "step": 165070 }, { "epoch": 0.33347204434442884, "grad_norm": 682.6023559570312, "learning_rate": 8.471282535017482e-06, "loss": 18.0622, "step": 165080 }, { "epoch": 0.33349224497711266, "grad_norm": 213.82958984375, "learning_rate": 8.471031294211194e-06, "loss": 32.7308, "step": 165090 }, { "epoch": 0.3335124456097965, "grad_norm": 663.1001586914062, "learning_rate": 8.47078003648752e-06, "loss": 28.8932, "step": 165100 }, { "epoch": 0.3335326462424803, "grad_norm": 243.75440979003906, "learning_rate": 8.470528761847684e-06, "loss": 15.589, "step": 165110 }, { "epoch": 0.3335528468751641, "grad_norm": 342.10357666015625, "learning_rate": 8.470277470292914e-06, "loss": 13.145, "step": 165120 }, { "epoch": 0.33357304750784794, "grad_norm": 416.9073181152344, "learning_rate": 8.47002616182443e-06, "loss": 18.5308, "step": 165130 }, { "epoch": 0.33359324814053176, "grad_norm": 254.2198486328125, "learning_rate": 8.46977483644346e-06, "loss": 20.4267, "step": 165140 }, { "epoch": 0.3336134487732156, "grad_norm": 317.7593688964844, "learning_rate": 8.469523494151229e-06, "loss": 18.6798, "step": 165150 }, { "epoch": 0.3336336494058994, "grad_norm": 385.6800842285156, "learning_rate": 8.469272134948963e-06, "loss": 13.8181, "step": 165160 }, { "epoch": 0.3336538500385832, "grad_norm": 325.71832275390625, "learning_rate": 8.469020758837882e-06, "loss": 26.1625, "step": 165170 }, { "epoch": 0.33367405067126704, "grad_norm": 508.8575439453125, "learning_rate": 8.468769365819216e-06, "loss": 37.5624, "step": 165180 }, { "epoch": 0.33369425130395086, "grad_norm": 637.180908203125, "learning_rate": 8.46851795589419e-06, "loss": 16.4045, "step": 165190 }, { "epoch": 0.3337144519366347, "grad_norm": 468.8171691894531, "learning_rate": 8.468266529064025e-06, "loss": 28.6397, "step": 165200 }, { "epoch": 0.33373465256931845, "grad_norm": 289.24896240234375, "learning_rate": 8.468015085329952e-06, "loss": 12.0796, "step": 165210 }, { "epoch": 0.33375485320200227, "grad_norm": 265.281005859375, "learning_rate": 8.467763624693195e-06, "loss": 13.2185, "step": 165220 }, { "epoch": 0.3337750538346861, "grad_norm": 781.6720581054688, "learning_rate": 8.467512147154977e-06, "loss": 30.3655, "step": 165230 }, { "epoch": 0.3337952544673699, "grad_norm": 192.22543334960938, "learning_rate": 8.467260652716525e-06, "loss": 13.2761, "step": 165240 }, { "epoch": 0.3338154551000537, "grad_norm": 163.2029571533203, "learning_rate": 8.467009141379065e-06, "loss": 22.6383, "step": 165250 }, { "epoch": 0.33383565573273755, "grad_norm": 218.8509979248047, "learning_rate": 8.466757613143824e-06, "loss": 27.0643, "step": 165260 }, { "epoch": 0.33385585636542137, "grad_norm": 518.0239868164062, "learning_rate": 8.466506068012025e-06, "loss": 17.2885, "step": 165270 }, { "epoch": 0.3338760569981052, "grad_norm": 392.69757080078125, "learning_rate": 8.466254505984899e-06, "loss": 24.6875, "step": 165280 }, { "epoch": 0.333896257630789, "grad_norm": 402.17510986328125, "learning_rate": 8.466002927063668e-06, "loss": 32.3207, "step": 165290 }, { "epoch": 0.3339164582634728, "grad_norm": 274.7178039550781, "learning_rate": 8.465751331249558e-06, "loss": 25.4942, "step": 165300 }, { "epoch": 0.33393665889615665, "grad_norm": 928.2224731445312, "learning_rate": 8.465499718543797e-06, "loss": 37.6747, "step": 165310 }, { "epoch": 0.33395685952884047, "grad_norm": 134.0745849609375, "learning_rate": 8.46524808894761e-06, "loss": 13.0874, "step": 165320 }, { "epoch": 0.3339770601615243, "grad_norm": 144.41650390625, "learning_rate": 8.464996442462226e-06, "loss": 15.1885, "step": 165330 }, { "epoch": 0.33399726079420805, "grad_norm": 517.2526245117188, "learning_rate": 8.464744779088868e-06, "loss": 36.2311, "step": 165340 }, { "epoch": 0.33401746142689187, "grad_norm": 398.09765625, "learning_rate": 8.464493098828763e-06, "loss": 31.7159, "step": 165350 }, { "epoch": 0.3340376620595757, "grad_norm": 112.75455474853516, "learning_rate": 8.464241401683142e-06, "loss": 15.3615, "step": 165360 }, { "epoch": 0.3340578626922595, "grad_norm": 289.33154296875, "learning_rate": 8.463989687653226e-06, "loss": 17.8703, "step": 165370 }, { "epoch": 0.33407806332494333, "grad_norm": 558.5931396484375, "learning_rate": 8.463737956740246e-06, "loss": 19.8129, "step": 165380 }, { "epoch": 0.33409826395762715, "grad_norm": 25.02311134338379, "learning_rate": 8.463486208945426e-06, "loss": 16.373, "step": 165390 }, { "epoch": 0.33411846459031097, "grad_norm": 281.6791076660156, "learning_rate": 8.463234444269994e-06, "loss": 9.0688, "step": 165400 }, { "epoch": 0.3341386652229948, "grad_norm": 444.8165588378906, "learning_rate": 8.462982662715179e-06, "loss": 20.3018, "step": 165410 }, { "epoch": 0.3341588658556786, "grad_norm": 287.7277526855469, "learning_rate": 8.462730864282206e-06, "loss": 13.8106, "step": 165420 }, { "epoch": 0.33417906648836243, "grad_norm": 281.07452392578125, "learning_rate": 8.462479048972302e-06, "loss": 26.3089, "step": 165430 }, { "epoch": 0.33419926712104625, "grad_norm": 747.5748901367188, "learning_rate": 8.462227216786696e-06, "loss": 19.8105, "step": 165440 }, { "epoch": 0.33421946775373007, "grad_norm": 345.7933044433594, "learning_rate": 8.461975367726614e-06, "loss": 17.2945, "step": 165450 }, { "epoch": 0.33423966838641384, "grad_norm": 225.85513305664062, "learning_rate": 8.461723501793284e-06, "loss": 39.0874, "step": 165460 }, { "epoch": 0.33425986901909766, "grad_norm": 278.6300964355469, "learning_rate": 8.461471618987933e-06, "loss": 18.8434, "step": 165470 }, { "epoch": 0.3342800696517815, "grad_norm": 987.3038940429688, "learning_rate": 8.46121971931179e-06, "loss": 26.1987, "step": 165480 }, { "epoch": 0.3343002702844653, "grad_norm": 510.0689392089844, "learning_rate": 8.460967802766081e-06, "loss": 13.8596, "step": 165490 }, { "epoch": 0.3343204709171491, "grad_norm": 346.2994689941406, "learning_rate": 8.460715869352035e-06, "loss": 22.2588, "step": 165500 }, { "epoch": 0.33434067154983294, "grad_norm": 4.863061428070068, "learning_rate": 8.460463919070879e-06, "loss": 19.7094, "step": 165510 }, { "epoch": 0.33436087218251676, "grad_norm": 329.3653564453125, "learning_rate": 8.460211951923842e-06, "loss": 30.0413, "step": 165520 }, { "epoch": 0.3343810728152006, "grad_norm": 576.1764526367188, "learning_rate": 8.459959967912152e-06, "loss": 27.0201, "step": 165530 }, { "epoch": 0.3344012734478844, "grad_norm": 403.1051940917969, "learning_rate": 8.459707967037037e-06, "loss": 16.6992, "step": 165540 }, { "epoch": 0.3344214740805682, "grad_norm": 102.03054809570312, "learning_rate": 8.459455949299725e-06, "loss": 16.7472, "step": 165550 }, { "epoch": 0.33444167471325204, "grad_norm": 573.9119873046875, "learning_rate": 8.459203914701444e-06, "loss": 18.7682, "step": 165560 }, { "epoch": 0.33446187534593586, "grad_norm": 731.407470703125, "learning_rate": 8.458951863243424e-06, "loss": 26.0697, "step": 165570 }, { "epoch": 0.3344820759786197, "grad_norm": 1461.892822265625, "learning_rate": 8.45869979492689e-06, "loss": 22.1549, "step": 165580 }, { "epoch": 0.33450227661130344, "grad_norm": 853.9044799804688, "learning_rate": 8.458447709753073e-06, "loss": 26.7949, "step": 165590 }, { "epoch": 0.33452247724398726, "grad_norm": 198.04319763183594, "learning_rate": 8.458195607723201e-06, "loss": 21.1348, "step": 165600 }, { "epoch": 0.3345426778766711, "grad_norm": 159.65589904785156, "learning_rate": 8.457943488838504e-06, "loss": 21.2028, "step": 165610 }, { "epoch": 0.3345628785093549, "grad_norm": 368.6065673828125, "learning_rate": 8.45769135310021e-06, "loss": 22.417, "step": 165620 }, { "epoch": 0.3345830791420387, "grad_norm": 973.2472534179688, "learning_rate": 8.457439200509548e-06, "loss": 29.6429, "step": 165630 }, { "epoch": 0.33460327977472254, "grad_norm": 141.88278198242188, "learning_rate": 8.457187031067746e-06, "loss": 15.8237, "step": 165640 }, { "epoch": 0.33462348040740636, "grad_norm": 311.3077087402344, "learning_rate": 8.456934844776033e-06, "loss": 15.497, "step": 165650 }, { "epoch": 0.3346436810400902, "grad_norm": 597.1159057617188, "learning_rate": 8.456682641635639e-06, "loss": 28.0578, "step": 165660 }, { "epoch": 0.334663881672774, "grad_norm": 443.5572814941406, "learning_rate": 8.456430421647795e-06, "loss": 37.2504, "step": 165670 }, { "epoch": 0.3346840823054578, "grad_norm": 71.40750885009766, "learning_rate": 8.456178184813726e-06, "loss": 13.1042, "step": 165680 }, { "epoch": 0.33470428293814164, "grad_norm": 493.4185485839844, "learning_rate": 8.455925931134665e-06, "loss": 13.4274, "step": 165690 }, { "epoch": 0.33472448357082546, "grad_norm": 852.5182495117188, "learning_rate": 8.45567366061184e-06, "loss": 28.5645, "step": 165700 }, { "epoch": 0.3347446842035093, "grad_norm": 433.5321350097656, "learning_rate": 8.455421373246479e-06, "loss": 18.3268, "step": 165710 }, { "epoch": 0.33476488483619304, "grad_norm": 472.730712890625, "learning_rate": 8.455169069039814e-06, "loss": 25.194, "step": 165720 }, { "epoch": 0.33478508546887686, "grad_norm": 543.5000610351562, "learning_rate": 8.454916747993076e-06, "loss": 20.7661, "step": 165730 }, { "epoch": 0.3348052861015607, "grad_norm": 203.8567657470703, "learning_rate": 8.454664410107492e-06, "loss": 28.9626, "step": 165740 }, { "epoch": 0.3348254867342445, "grad_norm": 255.99026489257812, "learning_rate": 8.454412055384292e-06, "loss": 17.9984, "step": 165750 }, { "epoch": 0.3348456873669283, "grad_norm": 946.3783569335938, "learning_rate": 8.454159683824707e-06, "loss": 23.2077, "step": 165760 }, { "epoch": 0.33486588799961214, "grad_norm": 364.1036376953125, "learning_rate": 8.453907295429969e-06, "loss": 18.382, "step": 165770 }, { "epoch": 0.33488608863229596, "grad_norm": 133.6348876953125, "learning_rate": 8.453654890201301e-06, "loss": 45.5667, "step": 165780 }, { "epoch": 0.3349062892649798, "grad_norm": 1.842629075050354, "learning_rate": 8.453402468139941e-06, "loss": 10.9108, "step": 165790 }, { "epoch": 0.3349264898976636, "grad_norm": 557.810302734375, "learning_rate": 8.453150029247115e-06, "loss": 23.3179, "step": 165800 }, { "epoch": 0.3349466905303474, "grad_norm": 602.2593383789062, "learning_rate": 8.452897573524055e-06, "loss": 19.134, "step": 165810 }, { "epoch": 0.33496689116303124, "grad_norm": 367.6490478515625, "learning_rate": 8.452645100971991e-06, "loss": 26.2369, "step": 165820 }, { "epoch": 0.33498709179571506, "grad_norm": 385.2917175292969, "learning_rate": 8.452392611592154e-06, "loss": 14.507, "step": 165830 }, { "epoch": 0.3350072924283989, "grad_norm": 211.99009704589844, "learning_rate": 8.452140105385774e-06, "loss": 17.8554, "step": 165840 }, { "epoch": 0.33502749306108265, "grad_norm": 287.37188720703125, "learning_rate": 8.451887582354081e-06, "loss": 24.3312, "step": 165850 }, { "epoch": 0.33504769369376647, "grad_norm": 128.64540100097656, "learning_rate": 8.451635042498307e-06, "loss": 15.5851, "step": 165860 }, { "epoch": 0.3350678943264503, "grad_norm": 488.65380859375, "learning_rate": 8.451382485819683e-06, "loss": 23.7012, "step": 165870 }, { "epoch": 0.3350880949591341, "grad_norm": 91.1632080078125, "learning_rate": 8.451129912319439e-06, "loss": 22.6898, "step": 165880 }, { "epoch": 0.33510829559181793, "grad_norm": 630.19287109375, "learning_rate": 8.450877321998805e-06, "loss": 22.226, "step": 165890 }, { "epoch": 0.33512849622450175, "grad_norm": 214.9325408935547, "learning_rate": 8.450624714859016e-06, "loss": 25.6399, "step": 165900 }, { "epoch": 0.33514869685718557, "grad_norm": 567.51611328125, "learning_rate": 8.4503720909013e-06, "loss": 25.022, "step": 165910 }, { "epoch": 0.3351688974898694, "grad_norm": 538.2396240234375, "learning_rate": 8.450119450126889e-06, "loss": 30.0168, "step": 165920 }, { "epoch": 0.3351890981225532, "grad_norm": 426.6291809082031, "learning_rate": 8.449866792537013e-06, "loss": 8.4766, "step": 165930 }, { "epoch": 0.33520929875523703, "grad_norm": 570.8070068359375, "learning_rate": 8.449614118132905e-06, "loss": 30.7451, "step": 165940 }, { "epoch": 0.33522949938792085, "grad_norm": 285.88262939453125, "learning_rate": 8.449361426915797e-06, "loss": 18.7535, "step": 165950 }, { "epoch": 0.33524970002060467, "grad_norm": 342.9992370605469, "learning_rate": 8.449108718886919e-06, "loss": 17.1622, "step": 165960 }, { "epoch": 0.3352699006532885, "grad_norm": 501.7101745605469, "learning_rate": 8.448855994047502e-06, "loss": 41.3075, "step": 165970 }, { "epoch": 0.33529010128597225, "grad_norm": 648.7622680664062, "learning_rate": 8.448603252398782e-06, "loss": 27.9492, "step": 165980 }, { "epoch": 0.3353103019186561, "grad_norm": 765.6998291015625, "learning_rate": 8.448350493941986e-06, "loss": 19.664, "step": 165990 }, { "epoch": 0.3353305025513399, "grad_norm": 592.9990234375, "learning_rate": 8.44809771867835e-06, "loss": 27.3351, "step": 166000 }, { "epoch": 0.3353507031840237, "grad_norm": 715.3545532226562, "learning_rate": 8.447844926609103e-06, "loss": 27.305, "step": 166010 }, { "epoch": 0.33537090381670753, "grad_norm": 337.9970703125, "learning_rate": 8.447592117735477e-06, "loss": 17.8994, "step": 166020 }, { "epoch": 0.33539110444939135, "grad_norm": 338.2167663574219, "learning_rate": 8.447339292058706e-06, "loss": 37.4576, "step": 166030 }, { "epoch": 0.3354113050820752, "grad_norm": 383.3039245605469, "learning_rate": 8.44708644958002e-06, "loss": 17.3921, "step": 166040 }, { "epoch": 0.335431505714759, "grad_norm": 361.1171875, "learning_rate": 8.446833590300656e-06, "loss": 23.4858, "step": 166050 }, { "epoch": 0.3354517063474428, "grad_norm": 499.3106689453125, "learning_rate": 8.44658071422184e-06, "loss": 24.2805, "step": 166060 }, { "epoch": 0.33547190698012663, "grad_norm": 851.7272338867188, "learning_rate": 8.44632782134481e-06, "loss": 16.007, "step": 166070 }, { "epoch": 0.33549210761281045, "grad_norm": 541.3045654296875, "learning_rate": 8.446074911670795e-06, "loss": 22.5634, "step": 166080 }, { "epoch": 0.3355123082454943, "grad_norm": 232.80931091308594, "learning_rate": 8.445821985201028e-06, "loss": 12.8816, "step": 166090 }, { "epoch": 0.33553250887817804, "grad_norm": 741.358642578125, "learning_rate": 8.445569041936743e-06, "loss": 22.6484, "step": 166100 }, { "epoch": 0.33555270951086186, "grad_norm": 848.94189453125, "learning_rate": 8.445316081879174e-06, "loss": 26.0934, "step": 166110 }, { "epoch": 0.3355729101435457, "grad_norm": 859.1322021484375, "learning_rate": 8.44506310502955e-06, "loss": 19.3114, "step": 166120 }, { "epoch": 0.3355931107762295, "grad_norm": 418.796142578125, "learning_rate": 8.444810111389108e-06, "loss": 42.8017, "step": 166130 }, { "epoch": 0.3356133114089133, "grad_norm": 124.33617401123047, "learning_rate": 8.44455710095908e-06, "loss": 20.8427, "step": 166140 }, { "epoch": 0.33563351204159714, "grad_norm": 463.8589782714844, "learning_rate": 8.444304073740695e-06, "loss": 14.3268, "step": 166150 }, { "epoch": 0.33565371267428096, "grad_norm": 271.988037109375, "learning_rate": 8.444051029735192e-06, "loss": 20.3641, "step": 166160 }, { "epoch": 0.3356739133069648, "grad_norm": 392.1072082519531, "learning_rate": 8.443797968943801e-06, "loss": 31.2143, "step": 166170 }, { "epoch": 0.3356941139396486, "grad_norm": 455.8572998046875, "learning_rate": 8.443544891367758e-06, "loss": 28.3247, "step": 166180 }, { "epoch": 0.3357143145723324, "grad_norm": 379.1649169921875, "learning_rate": 8.443291797008294e-06, "loss": 19.1225, "step": 166190 }, { "epoch": 0.33573451520501624, "grad_norm": 437.4548645019531, "learning_rate": 8.443038685866643e-06, "loss": 19.6158, "step": 166200 }, { "epoch": 0.33575471583770006, "grad_norm": 73.28082275390625, "learning_rate": 8.44278555794404e-06, "loss": 19.3912, "step": 166210 }, { "epoch": 0.3357749164703839, "grad_norm": 350.60595703125, "learning_rate": 8.442532413241717e-06, "loss": 24.2174, "step": 166220 }, { "epoch": 0.33579511710306764, "grad_norm": 38.57235336303711, "learning_rate": 8.442279251760907e-06, "loss": 18.4508, "step": 166230 }, { "epoch": 0.33581531773575146, "grad_norm": 103.85962677001953, "learning_rate": 8.442026073502849e-06, "loss": 29.9303, "step": 166240 }, { "epoch": 0.3358355183684353, "grad_norm": 294.2778015136719, "learning_rate": 8.44177287846877e-06, "loss": 15.0246, "step": 166250 }, { "epoch": 0.3358557190011191, "grad_norm": 264.279296875, "learning_rate": 8.44151966665991e-06, "loss": 25.0396, "step": 166260 }, { "epoch": 0.3358759196338029, "grad_norm": 222.1487274169922, "learning_rate": 8.4412664380775e-06, "loss": 33.7391, "step": 166270 }, { "epoch": 0.33589612026648674, "grad_norm": 668.9306030273438, "learning_rate": 8.441013192722774e-06, "loss": 21.2129, "step": 166280 }, { "epoch": 0.33591632089917056, "grad_norm": 17.128477096557617, "learning_rate": 8.440759930596967e-06, "loss": 24.8987, "step": 166290 }, { "epoch": 0.3359365215318544, "grad_norm": 411.150390625, "learning_rate": 8.440506651701315e-06, "loss": 23.1951, "step": 166300 }, { "epoch": 0.3359567221645382, "grad_norm": 302.4668884277344, "learning_rate": 8.440253356037048e-06, "loss": 19.6862, "step": 166310 }, { "epoch": 0.335976922797222, "grad_norm": 47.11547088623047, "learning_rate": 8.440000043605406e-06, "loss": 24.6051, "step": 166320 }, { "epoch": 0.33599712342990584, "grad_norm": 588.094970703125, "learning_rate": 8.439746714407619e-06, "loss": 21.6316, "step": 166330 }, { "epoch": 0.33601732406258966, "grad_norm": 353.5929870605469, "learning_rate": 8.439493368444924e-06, "loss": 11.4648, "step": 166340 }, { "epoch": 0.3360375246952735, "grad_norm": 312.5060729980469, "learning_rate": 8.439240005718556e-06, "loss": 28.425, "step": 166350 }, { "epoch": 0.33605772532795725, "grad_norm": 756.814697265625, "learning_rate": 8.43898662622975e-06, "loss": 51.6401, "step": 166360 }, { "epoch": 0.33607792596064107, "grad_norm": 406.2431640625, "learning_rate": 8.438733229979741e-06, "loss": 17.1661, "step": 166370 }, { "epoch": 0.3360981265933249, "grad_norm": 96.3930892944336, "learning_rate": 8.438479816969762e-06, "loss": 24.6942, "step": 166380 }, { "epoch": 0.3361183272260087, "grad_norm": 387.2288513183594, "learning_rate": 8.438226387201048e-06, "loss": 14.9424, "step": 166390 }, { "epoch": 0.3361385278586925, "grad_norm": 404.4085693359375, "learning_rate": 8.437972940674838e-06, "loss": 30.1257, "step": 166400 }, { "epoch": 0.33615872849137635, "grad_norm": 355.3511962890625, "learning_rate": 8.437719477392363e-06, "loss": 19.9233, "step": 166410 }, { "epoch": 0.33617892912406017, "grad_norm": 178.55581665039062, "learning_rate": 8.43746599735486e-06, "loss": 20.9385, "step": 166420 }, { "epoch": 0.336199129756744, "grad_norm": 282.252685546875, "learning_rate": 8.437212500563567e-06, "loss": 13.9381, "step": 166430 }, { "epoch": 0.3362193303894278, "grad_norm": 63.36928939819336, "learning_rate": 8.436958987019717e-06, "loss": 17.171, "step": 166440 }, { "epoch": 0.3362395310221116, "grad_norm": 125.04975891113281, "learning_rate": 8.436705456724545e-06, "loss": 17.6303, "step": 166450 }, { "epoch": 0.33625973165479545, "grad_norm": 247.55935668945312, "learning_rate": 8.436451909679286e-06, "loss": 21.9529, "step": 166460 }, { "epoch": 0.33627993228747927, "grad_norm": 481.1356201171875, "learning_rate": 8.436198345885177e-06, "loss": 14.1245, "step": 166470 }, { "epoch": 0.3363001329201631, "grad_norm": 441.5704345703125, "learning_rate": 8.435944765343457e-06, "loss": 22.2057, "step": 166480 }, { "epoch": 0.33632033355284685, "grad_norm": 575.5211181640625, "learning_rate": 8.435691168055358e-06, "loss": 22.0704, "step": 166490 }, { "epoch": 0.33634053418553067, "grad_norm": 615.0734252929688, "learning_rate": 8.435437554022116e-06, "loss": 27.4929, "step": 166500 }, { "epoch": 0.3363607348182145, "grad_norm": 374.204833984375, "learning_rate": 8.435183923244969e-06, "loss": 39.71, "step": 166510 }, { "epoch": 0.3363809354508983, "grad_norm": 292.698974609375, "learning_rate": 8.43493027572515e-06, "loss": 15.8342, "step": 166520 }, { "epoch": 0.33640113608358213, "grad_norm": 11.15060043334961, "learning_rate": 8.4346766114639e-06, "loss": 26.0426, "step": 166530 }, { "epoch": 0.33642133671626595, "grad_norm": 445.99713134765625, "learning_rate": 8.434422930462452e-06, "loss": 20.9144, "step": 166540 }, { "epoch": 0.33644153734894977, "grad_norm": 385.7422180175781, "learning_rate": 8.434169232722043e-06, "loss": 23.432, "step": 166550 }, { "epoch": 0.3364617379816336, "grad_norm": 505.59503173828125, "learning_rate": 8.433915518243909e-06, "loss": 18.3347, "step": 166560 }, { "epoch": 0.3364819386143174, "grad_norm": 643.0967407226562, "learning_rate": 8.433661787029288e-06, "loss": 20.145, "step": 166570 }, { "epoch": 0.33650213924700123, "grad_norm": 425.1985168457031, "learning_rate": 8.433408039079415e-06, "loss": 14.0901, "step": 166580 }, { "epoch": 0.33652233987968505, "grad_norm": 388.61944580078125, "learning_rate": 8.433154274395529e-06, "loss": 28.345, "step": 166590 }, { "epoch": 0.33654254051236887, "grad_norm": 984.209228515625, "learning_rate": 8.432900492978864e-06, "loss": 21.7307, "step": 166600 }, { "epoch": 0.3365627411450527, "grad_norm": 178.183837890625, "learning_rate": 8.43264669483066e-06, "loss": 24.6388, "step": 166610 }, { "epoch": 0.33658294177773646, "grad_norm": 231.317138671875, "learning_rate": 8.432392879952151e-06, "loss": 13.9039, "step": 166620 }, { "epoch": 0.3366031424104203, "grad_norm": 393.9949951171875, "learning_rate": 8.432139048344577e-06, "loss": 18.0523, "step": 166630 }, { "epoch": 0.3366233430431041, "grad_norm": 24.599918365478516, "learning_rate": 8.431885200009172e-06, "loss": 20.6987, "step": 166640 }, { "epoch": 0.3366435436757879, "grad_norm": 223.66964721679688, "learning_rate": 8.431631334947175e-06, "loss": 22.4234, "step": 166650 }, { "epoch": 0.33666374430847174, "grad_norm": 474.1702880859375, "learning_rate": 8.431377453159822e-06, "loss": 17.2442, "step": 166660 }, { "epoch": 0.33668394494115556, "grad_norm": 516.7149047851562, "learning_rate": 8.431123554648354e-06, "loss": 24.7318, "step": 166670 }, { "epoch": 0.3367041455738394, "grad_norm": 380.5775146484375, "learning_rate": 8.430869639414004e-06, "loss": 24.9795, "step": 166680 }, { "epoch": 0.3367243462065232, "grad_norm": 313.3843078613281, "learning_rate": 8.430615707458012e-06, "loss": 23.0944, "step": 166690 }, { "epoch": 0.336744546839207, "grad_norm": 229.4980926513672, "learning_rate": 8.430361758781616e-06, "loss": 26.4191, "step": 166700 }, { "epoch": 0.33676474747189084, "grad_norm": 192.1104278564453, "learning_rate": 8.430107793386053e-06, "loss": 20.8212, "step": 166710 }, { "epoch": 0.33678494810457466, "grad_norm": 387.45477294921875, "learning_rate": 8.42985381127256e-06, "loss": 22.7019, "step": 166720 }, { "epoch": 0.3368051487372585, "grad_norm": 519.1399536132812, "learning_rate": 8.429599812442373e-06, "loss": 26.394, "step": 166730 }, { "epoch": 0.33682534936994224, "grad_norm": 1378.7998046875, "learning_rate": 8.429345796896736e-06, "loss": 36.4727, "step": 166740 }, { "epoch": 0.33684555000262606, "grad_norm": 334.69732666015625, "learning_rate": 8.429091764636883e-06, "loss": 22.7127, "step": 166750 }, { "epoch": 0.3368657506353099, "grad_norm": 644.4428100585938, "learning_rate": 8.42883771566405e-06, "loss": 27.6707, "step": 166760 }, { "epoch": 0.3368859512679937, "grad_norm": 1111.902587890625, "learning_rate": 8.42858364997948e-06, "loss": 22.6756, "step": 166770 }, { "epoch": 0.3369061519006775, "grad_norm": 492.6197814941406, "learning_rate": 8.428329567584411e-06, "loss": 24.5891, "step": 166780 }, { "epoch": 0.33692635253336134, "grad_norm": 77.93360900878906, "learning_rate": 8.428075468480076e-06, "loss": 16.2545, "step": 166790 }, { "epoch": 0.33694655316604516, "grad_norm": 496.0776672363281, "learning_rate": 8.427821352667719e-06, "loss": 16.1612, "step": 166800 }, { "epoch": 0.336966753798729, "grad_norm": 1901.234619140625, "learning_rate": 8.427567220148574e-06, "loss": 27.2727, "step": 166810 }, { "epoch": 0.3369869544314128, "grad_norm": 264.42022705078125, "learning_rate": 8.427313070923885e-06, "loss": 27.5479, "step": 166820 }, { "epoch": 0.3370071550640966, "grad_norm": 185.50962829589844, "learning_rate": 8.427058904994888e-06, "loss": 11.7651, "step": 166830 }, { "epoch": 0.33702735569678044, "grad_norm": 140.66539001464844, "learning_rate": 8.426804722362818e-06, "loss": 19.9623, "step": 166840 }, { "epoch": 0.33704755632946426, "grad_norm": 75.92221069335938, "learning_rate": 8.42655052302892e-06, "loss": 8.8664, "step": 166850 }, { "epoch": 0.3370677569621481, "grad_norm": 195.6241912841797, "learning_rate": 8.42629630699443e-06, "loss": 18.7624, "step": 166860 }, { "epoch": 0.33708795759483184, "grad_norm": 388.46453857421875, "learning_rate": 8.426042074260588e-06, "loss": 21.4288, "step": 166870 }, { "epoch": 0.33710815822751566, "grad_norm": 273.54736328125, "learning_rate": 8.425787824828632e-06, "loss": 18.4725, "step": 166880 }, { "epoch": 0.3371283588601995, "grad_norm": 492.0992736816406, "learning_rate": 8.425533558699801e-06, "loss": 17.9134, "step": 166890 }, { "epoch": 0.3371485594928833, "grad_norm": 233.55056762695312, "learning_rate": 8.425279275875336e-06, "loss": 14.4852, "step": 166900 }, { "epoch": 0.3371687601255671, "grad_norm": 429.4356689453125, "learning_rate": 8.425024976356474e-06, "loss": 26.6053, "step": 166910 }, { "epoch": 0.33718896075825094, "grad_norm": 406.38671875, "learning_rate": 8.424770660144457e-06, "loss": 30.5793, "step": 166920 }, { "epoch": 0.33720916139093476, "grad_norm": 380.27838134765625, "learning_rate": 8.424516327240521e-06, "loss": 12.3561, "step": 166930 }, { "epoch": 0.3372293620236186, "grad_norm": 353.7559509277344, "learning_rate": 8.424261977645909e-06, "loss": 29.661, "step": 166940 }, { "epoch": 0.3372495626563024, "grad_norm": 482.1020812988281, "learning_rate": 8.424007611361861e-06, "loss": 16.1973, "step": 166950 }, { "epoch": 0.3372697632889862, "grad_norm": 462.35589599609375, "learning_rate": 8.423753228389612e-06, "loss": 19.1275, "step": 166960 }, { "epoch": 0.33728996392167004, "grad_norm": 440.8437194824219, "learning_rate": 8.423498828730408e-06, "loss": 22.6218, "step": 166970 }, { "epoch": 0.33731016455435386, "grad_norm": 386.29315185546875, "learning_rate": 8.423244412385485e-06, "loss": 20.1392, "step": 166980 }, { "epoch": 0.3373303651870377, "grad_norm": 923.9805908203125, "learning_rate": 8.422989979356084e-06, "loss": 15.9717, "step": 166990 }, { "epoch": 0.33735056581972145, "grad_norm": 0.0, "learning_rate": 8.422735529643445e-06, "loss": 9.4694, "step": 167000 }, { "epoch": 0.33737076645240527, "grad_norm": 250.28387451171875, "learning_rate": 8.422481063248806e-06, "loss": 18.5734, "step": 167010 }, { "epoch": 0.3373909670850891, "grad_norm": 399.7904968261719, "learning_rate": 8.422226580173411e-06, "loss": 12.4292, "step": 167020 }, { "epoch": 0.3374111677177729, "grad_norm": 453.8663024902344, "learning_rate": 8.4219720804185e-06, "loss": 26.8315, "step": 167030 }, { "epoch": 0.33743136835045673, "grad_norm": 642.8257446289062, "learning_rate": 8.421717563985312e-06, "loss": 21.8601, "step": 167040 }, { "epoch": 0.33745156898314055, "grad_norm": 743.86669921875, "learning_rate": 8.421463030875086e-06, "loss": 37.0026, "step": 167050 }, { "epoch": 0.33747176961582437, "grad_norm": 146.4901580810547, "learning_rate": 8.421208481089064e-06, "loss": 18.3244, "step": 167060 }, { "epoch": 0.3374919702485082, "grad_norm": 168.8124237060547, "learning_rate": 8.42095391462849e-06, "loss": 16.8123, "step": 167070 }, { "epoch": 0.337512170881192, "grad_norm": 547.0553588867188, "learning_rate": 8.420699331494597e-06, "loss": 27.1035, "step": 167080 }, { "epoch": 0.33753237151387583, "grad_norm": 545.5162963867188, "learning_rate": 8.420444731688632e-06, "loss": 26.4211, "step": 167090 }, { "epoch": 0.33755257214655965, "grad_norm": 436.70703125, "learning_rate": 8.420190115211835e-06, "loss": 23.3277, "step": 167100 }, { "epoch": 0.33757277277924347, "grad_norm": 341.232421875, "learning_rate": 8.419935482065447e-06, "loss": 22.7691, "step": 167110 }, { "epoch": 0.3375929734119273, "grad_norm": 736.3862915039062, "learning_rate": 8.419680832250706e-06, "loss": 26.1081, "step": 167120 }, { "epoch": 0.33761317404461105, "grad_norm": 389.464111328125, "learning_rate": 8.419426165768856e-06, "loss": 27.3949, "step": 167130 }, { "epoch": 0.3376333746772949, "grad_norm": 816.3432006835938, "learning_rate": 8.41917148262114e-06, "loss": 25.1434, "step": 167140 }, { "epoch": 0.3376535753099787, "grad_norm": 847.6219482421875, "learning_rate": 8.418916782808795e-06, "loss": 23.2586, "step": 167150 }, { "epoch": 0.3376737759426625, "grad_norm": 1193.47216796875, "learning_rate": 8.418662066333063e-06, "loss": 44.3514, "step": 167160 }, { "epoch": 0.33769397657534633, "grad_norm": 645.29052734375, "learning_rate": 8.418407333195189e-06, "loss": 22.0493, "step": 167170 }, { "epoch": 0.33771417720803015, "grad_norm": 561.77783203125, "learning_rate": 8.418152583396411e-06, "loss": 22.6123, "step": 167180 }, { "epoch": 0.337734377840714, "grad_norm": 269.1815490722656, "learning_rate": 8.417897816937973e-06, "loss": 24.2781, "step": 167190 }, { "epoch": 0.3377545784733978, "grad_norm": 378.8291320800781, "learning_rate": 8.417643033821114e-06, "loss": 14.2875, "step": 167200 }, { "epoch": 0.3377747791060816, "grad_norm": 357.581298828125, "learning_rate": 8.417388234047078e-06, "loss": 12.0459, "step": 167210 }, { "epoch": 0.33779497973876543, "grad_norm": 520.4596557617188, "learning_rate": 8.417133417617107e-06, "loss": 26.0304, "step": 167220 }, { "epoch": 0.33781518037144925, "grad_norm": 137.63626098632812, "learning_rate": 8.416878584532442e-06, "loss": 26.1745, "step": 167230 }, { "epoch": 0.3378353810041331, "grad_norm": 124.04597473144531, "learning_rate": 8.416623734794324e-06, "loss": 26.5604, "step": 167240 }, { "epoch": 0.33785558163681684, "grad_norm": 352.2334899902344, "learning_rate": 8.416368868403997e-06, "loss": 21.9152, "step": 167250 }, { "epoch": 0.33787578226950066, "grad_norm": 170.1546173095703, "learning_rate": 8.416113985362702e-06, "loss": 16.849, "step": 167260 }, { "epoch": 0.3378959829021845, "grad_norm": 1349.9058837890625, "learning_rate": 8.415859085671683e-06, "loss": 42.5881, "step": 167270 }, { "epoch": 0.3379161835348683, "grad_norm": 1348.8409423828125, "learning_rate": 8.41560416933218e-06, "loss": 24.9556, "step": 167280 }, { "epoch": 0.3379363841675521, "grad_norm": 321.2070007324219, "learning_rate": 8.415349236345436e-06, "loss": 19.3961, "step": 167290 }, { "epoch": 0.33795658480023594, "grad_norm": 342.5564270019531, "learning_rate": 8.415094286712694e-06, "loss": 12.1926, "step": 167300 }, { "epoch": 0.33797678543291976, "grad_norm": 356.09503173828125, "learning_rate": 8.4148393204352e-06, "loss": 14.2662, "step": 167310 }, { "epoch": 0.3379969860656036, "grad_norm": 257.2403869628906, "learning_rate": 8.41458433751419e-06, "loss": 14.5469, "step": 167320 }, { "epoch": 0.3380171866982874, "grad_norm": 479.4848327636719, "learning_rate": 8.41432933795091e-06, "loss": 25.0478, "step": 167330 }, { "epoch": 0.3380373873309712, "grad_norm": 347.92242431640625, "learning_rate": 8.414074321746605e-06, "loss": 13.6998, "step": 167340 }, { "epoch": 0.33805758796365504, "grad_norm": 1818.5078125, "learning_rate": 8.413819288902514e-06, "loss": 38.0327, "step": 167350 }, { "epoch": 0.33807778859633886, "grad_norm": 291.0877380371094, "learning_rate": 8.413564239419883e-06, "loss": 21.6719, "step": 167360 }, { "epoch": 0.3380979892290227, "grad_norm": 540.657470703125, "learning_rate": 8.413309173299954e-06, "loss": 19.6665, "step": 167370 }, { "epoch": 0.33811818986170644, "grad_norm": 268.7587585449219, "learning_rate": 8.41305409054397e-06, "loss": 20.5052, "step": 167380 }, { "epoch": 0.33813839049439026, "grad_norm": 241.68829345703125, "learning_rate": 8.412798991153172e-06, "loss": 20.1494, "step": 167390 }, { "epoch": 0.3381585911270741, "grad_norm": 0.0, "learning_rate": 8.412543875128809e-06, "loss": 13.7161, "step": 167400 }, { "epoch": 0.3381787917597579, "grad_norm": 56.017757415771484, "learning_rate": 8.412288742472118e-06, "loss": 23.5154, "step": 167410 }, { "epoch": 0.3381989923924417, "grad_norm": 938.0296020507812, "learning_rate": 8.412033593184348e-06, "loss": 22.4197, "step": 167420 }, { "epoch": 0.33821919302512554, "grad_norm": 551.4971313476562, "learning_rate": 8.411778427266739e-06, "loss": 22.8464, "step": 167430 }, { "epoch": 0.33823939365780936, "grad_norm": 286.5843505859375, "learning_rate": 8.411523244720536e-06, "loss": 21.0896, "step": 167440 }, { "epoch": 0.3382595942904932, "grad_norm": 519.6744384765625, "learning_rate": 8.411268045546984e-06, "loss": 19.7263, "step": 167450 }, { "epoch": 0.338279794923177, "grad_norm": 20.29805564880371, "learning_rate": 8.411012829747323e-06, "loss": 11.9788, "step": 167460 }, { "epoch": 0.3382999955558608, "grad_norm": 661.3635864257812, "learning_rate": 8.4107575973228e-06, "loss": 21.3432, "step": 167470 }, { "epoch": 0.33832019618854464, "grad_norm": 490.74407958984375, "learning_rate": 8.410502348274658e-06, "loss": 23.1471, "step": 167480 }, { "epoch": 0.33834039682122846, "grad_norm": 433.8485412597656, "learning_rate": 8.410247082604142e-06, "loss": 28.1603, "step": 167490 }, { "epoch": 0.3383605974539123, "grad_norm": 259.97308349609375, "learning_rate": 8.409991800312493e-06, "loss": 45.3952, "step": 167500 }, { "epoch": 0.33838079808659605, "grad_norm": 306.1461181640625, "learning_rate": 8.40973650140096e-06, "loss": 47.7216, "step": 167510 }, { "epoch": 0.33840099871927987, "grad_norm": 276.6924133300781, "learning_rate": 8.409481185870783e-06, "loss": 16.2209, "step": 167520 }, { "epoch": 0.3384211993519637, "grad_norm": 317.9801025390625, "learning_rate": 8.409225853723209e-06, "loss": 28.3326, "step": 167530 }, { "epoch": 0.3384413999846475, "grad_norm": 219.22189331054688, "learning_rate": 8.40897050495948e-06, "loss": 23.7641, "step": 167540 }, { "epoch": 0.3384616006173313, "grad_norm": 1.6560962200164795, "learning_rate": 8.408715139580846e-06, "loss": 14.9415, "step": 167550 }, { "epoch": 0.33848180125001515, "grad_norm": 383.08514404296875, "learning_rate": 8.408459757588547e-06, "loss": 13.7631, "step": 167560 }, { "epoch": 0.33850200188269897, "grad_norm": 307.9669494628906, "learning_rate": 8.408204358983826e-06, "loss": 18.9212, "step": 167570 }, { "epoch": 0.3385222025153828, "grad_norm": 147.46270751953125, "learning_rate": 8.407948943767933e-06, "loss": 13.1037, "step": 167580 }, { "epoch": 0.3385424031480666, "grad_norm": 751.38037109375, "learning_rate": 8.407693511942107e-06, "loss": 17.0046, "step": 167590 }, { "epoch": 0.3385626037807504, "grad_norm": 276.7906799316406, "learning_rate": 8.4074380635076e-06, "loss": 24.6428, "step": 167600 }, { "epoch": 0.33858280441343425, "grad_norm": 370.64874267578125, "learning_rate": 8.40718259846565e-06, "loss": 26.6888, "step": 167610 }, { "epoch": 0.33860300504611807, "grad_norm": 313.6131591796875, "learning_rate": 8.406927116817507e-06, "loss": 29.4524, "step": 167620 }, { "epoch": 0.3386232056788019, "grad_norm": 305.2022705078125, "learning_rate": 8.406671618564413e-06, "loss": 28.7683, "step": 167630 }, { "epoch": 0.33864340631148565, "grad_norm": 143.18399047851562, "learning_rate": 8.406416103707616e-06, "loss": 15.4658, "step": 167640 }, { "epoch": 0.33866360694416947, "grad_norm": 73.9214096069336, "learning_rate": 8.406160572248361e-06, "loss": 18.774, "step": 167650 }, { "epoch": 0.3386838075768533, "grad_norm": 483.00140380859375, "learning_rate": 8.40590502418789e-06, "loss": 25.6484, "step": 167660 }, { "epoch": 0.3387040082095371, "grad_norm": 1040.4700927734375, "learning_rate": 8.405649459527453e-06, "loss": 26.0978, "step": 167670 }, { "epoch": 0.33872420884222093, "grad_norm": 157.07838439941406, "learning_rate": 8.405393878268292e-06, "loss": 21.5523, "step": 167680 }, { "epoch": 0.33874440947490475, "grad_norm": 10.15843391418457, "learning_rate": 8.405138280411656e-06, "loss": 18.6426, "step": 167690 }, { "epoch": 0.33876461010758857, "grad_norm": 652.705810546875, "learning_rate": 8.404882665958788e-06, "loss": 18.274, "step": 167700 }, { "epoch": 0.3387848107402724, "grad_norm": 440.4273376464844, "learning_rate": 8.404627034910934e-06, "loss": 18.0438, "step": 167710 }, { "epoch": 0.3388050113729562, "grad_norm": 579.0615234375, "learning_rate": 8.404371387269341e-06, "loss": 22.8465, "step": 167720 }, { "epoch": 0.33882521200564003, "grad_norm": 311.01251220703125, "learning_rate": 8.404115723035256e-06, "loss": 19.5474, "step": 167730 }, { "epoch": 0.33884541263832385, "grad_norm": 482.511474609375, "learning_rate": 8.403860042209923e-06, "loss": 23.372, "step": 167740 }, { "epoch": 0.33886561327100767, "grad_norm": 487.533935546875, "learning_rate": 8.40360434479459e-06, "loss": 38.8385, "step": 167750 }, { "epoch": 0.3388858139036915, "grad_norm": 943.122314453125, "learning_rate": 8.4033486307905e-06, "loss": 23.2046, "step": 167760 }, { "epoch": 0.33890601453637526, "grad_norm": 27.917802810668945, "learning_rate": 8.403092900198904e-06, "loss": 14.9173, "step": 167770 }, { "epoch": 0.3389262151690591, "grad_norm": 1000.3346557617188, "learning_rate": 8.402837153021047e-06, "loss": 18.9368, "step": 167780 }, { "epoch": 0.3389464158017429, "grad_norm": 476.365234375, "learning_rate": 8.402581389258171e-06, "loss": 19.8223, "step": 167790 }, { "epoch": 0.3389666164344267, "grad_norm": 482.34063720703125, "learning_rate": 8.402325608911527e-06, "loss": 21.4722, "step": 167800 }, { "epoch": 0.33898681706711054, "grad_norm": 1219.3975830078125, "learning_rate": 8.40206981198236e-06, "loss": 30.1687, "step": 167810 }, { "epoch": 0.33900701769979436, "grad_norm": 369.8954772949219, "learning_rate": 8.40181399847192e-06, "loss": 38.7512, "step": 167820 }, { "epoch": 0.3390272183324782, "grad_norm": 641.1861572265625, "learning_rate": 8.40155816838145e-06, "loss": 26.7039, "step": 167830 }, { "epoch": 0.339047418965162, "grad_norm": 447.89642333984375, "learning_rate": 8.401302321712198e-06, "loss": 20.2609, "step": 167840 }, { "epoch": 0.3390676195978458, "grad_norm": 528.932373046875, "learning_rate": 8.40104645846541e-06, "loss": 8.231, "step": 167850 }, { "epoch": 0.33908782023052964, "grad_norm": 164.8485870361328, "learning_rate": 8.400790578642333e-06, "loss": 11.0415, "step": 167860 }, { "epoch": 0.33910802086321346, "grad_norm": 832.0770263671875, "learning_rate": 8.400534682244217e-06, "loss": 31.5946, "step": 167870 }, { "epoch": 0.3391282214958973, "grad_norm": 646.8993530273438, "learning_rate": 8.400278769272307e-06, "loss": 36.4199, "step": 167880 }, { "epoch": 0.33914842212858104, "grad_norm": 182.03268432617188, "learning_rate": 8.400022839727853e-06, "loss": 13.3391, "step": 167890 }, { "epoch": 0.33916862276126486, "grad_norm": 550.815673828125, "learning_rate": 8.399766893612096e-06, "loss": 12.5295, "step": 167900 }, { "epoch": 0.3391888233939487, "grad_norm": 277.0923156738281, "learning_rate": 8.399510930926291e-06, "loss": 25.4, "step": 167910 }, { "epoch": 0.3392090240266325, "grad_norm": 38.229129791259766, "learning_rate": 8.399254951671681e-06, "loss": 23.5104, "step": 167920 }, { "epoch": 0.3392292246593163, "grad_norm": 428.00860595703125, "learning_rate": 8.398998955849513e-06, "loss": 50.9553, "step": 167930 }, { "epoch": 0.33924942529200014, "grad_norm": 448.3719787597656, "learning_rate": 8.398742943461038e-06, "loss": 26.2329, "step": 167940 }, { "epoch": 0.33926962592468396, "grad_norm": 747.2113037109375, "learning_rate": 8.398486914507501e-06, "loss": 23.6032, "step": 167950 }, { "epoch": 0.3392898265573678, "grad_norm": 437.9092712402344, "learning_rate": 8.398230868990151e-06, "loss": 26.8879, "step": 167960 }, { "epoch": 0.3393100271900516, "grad_norm": 383.5091247558594, "learning_rate": 8.397974806910237e-06, "loss": 26.357, "step": 167970 }, { "epoch": 0.3393302278227354, "grad_norm": 636.2250366210938, "learning_rate": 8.397718728269006e-06, "loss": 17.1569, "step": 167980 }, { "epoch": 0.33935042845541924, "grad_norm": 41.08114242553711, "learning_rate": 8.397462633067705e-06, "loss": 24.3474, "step": 167990 }, { "epoch": 0.33937062908810306, "grad_norm": 103.65257263183594, "learning_rate": 8.397206521307584e-06, "loss": 21.3358, "step": 168000 }, { "epoch": 0.3393908297207869, "grad_norm": 286.0574951171875, "learning_rate": 8.396950392989888e-06, "loss": 15.17, "step": 168010 }, { "epoch": 0.33941103035347064, "grad_norm": 19.315410614013672, "learning_rate": 8.396694248115871e-06, "loss": 20.8692, "step": 168020 }, { "epoch": 0.33943123098615446, "grad_norm": 479.29913330078125, "learning_rate": 8.396438086686779e-06, "loss": 43.0148, "step": 168030 }, { "epoch": 0.3394514316188383, "grad_norm": 248.98126220703125, "learning_rate": 8.396181908703855e-06, "loss": 13.8845, "step": 168040 }, { "epoch": 0.3394716322515221, "grad_norm": 78.33552551269531, "learning_rate": 8.395925714168356e-06, "loss": 50.4541, "step": 168050 }, { "epoch": 0.3394918328842059, "grad_norm": 525.9219970703125, "learning_rate": 8.395669503081524e-06, "loss": 14.7674, "step": 168060 }, { "epoch": 0.33951203351688974, "grad_norm": 302.1783447265625, "learning_rate": 8.395413275444614e-06, "loss": 16.9842, "step": 168070 }, { "epoch": 0.33953223414957356, "grad_norm": 434.3306579589844, "learning_rate": 8.39515703125887e-06, "loss": 26.1344, "step": 168080 }, { "epoch": 0.3395524347822574, "grad_norm": 178.66641235351562, "learning_rate": 8.394900770525543e-06, "loss": 17.0307, "step": 168090 }, { "epoch": 0.3395726354149412, "grad_norm": 508.07611083984375, "learning_rate": 8.394644493245882e-06, "loss": 26.2574, "step": 168100 }, { "epoch": 0.339592836047625, "grad_norm": 185.32598876953125, "learning_rate": 8.394388199421133e-06, "loss": 9.7913, "step": 168110 }, { "epoch": 0.33961303668030884, "grad_norm": 689.20068359375, "learning_rate": 8.39413188905255e-06, "loss": 29.8605, "step": 168120 }, { "epoch": 0.33963323731299266, "grad_norm": 135.01458740234375, "learning_rate": 8.393875562141379e-06, "loss": 16.6367, "step": 168130 }, { "epoch": 0.3396534379456765, "grad_norm": 391.94451904296875, "learning_rate": 8.39361921868887e-06, "loss": 20.4665, "step": 168140 }, { "epoch": 0.33967363857836025, "grad_norm": 283.5951843261719, "learning_rate": 8.393362858696272e-06, "loss": 11.1001, "step": 168150 }, { "epoch": 0.33969383921104407, "grad_norm": 760.8709106445312, "learning_rate": 8.393106482164836e-06, "loss": 30.6613, "step": 168160 }, { "epoch": 0.3397140398437279, "grad_norm": 733.862060546875, "learning_rate": 8.39285008909581e-06, "loss": 26.6271, "step": 168170 }, { "epoch": 0.3397342404764117, "grad_norm": 348.95367431640625, "learning_rate": 8.392593679490444e-06, "loss": 16.9305, "step": 168180 }, { "epoch": 0.33975444110909553, "grad_norm": 204.7607421875, "learning_rate": 8.392337253349988e-06, "loss": 9.565, "step": 168190 }, { "epoch": 0.33977464174177935, "grad_norm": 272.72021484375, "learning_rate": 8.392080810675692e-06, "loss": 28.3992, "step": 168200 }, { "epoch": 0.33979484237446317, "grad_norm": 517.2645874023438, "learning_rate": 8.391824351468805e-06, "loss": 33.936, "step": 168210 }, { "epoch": 0.339815043007147, "grad_norm": 421.4093322753906, "learning_rate": 8.391567875730577e-06, "loss": 21.1351, "step": 168220 }, { "epoch": 0.3398352436398308, "grad_norm": 401.4591064453125, "learning_rate": 8.39131138346226e-06, "loss": 23.4746, "step": 168230 }, { "epoch": 0.33985544427251463, "grad_norm": 363.4436340332031, "learning_rate": 8.391054874665103e-06, "loss": 21.9265, "step": 168240 }, { "epoch": 0.33987564490519845, "grad_norm": 158.54002380371094, "learning_rate": 8.390798349340354e-06, "loss": 15.752, "step": 168250 }, { "epoch": 0.33989584553788227, "grad_norm": 442.8247375488281, "learning_rate": 8.390541807489266e-06, "loss": 27.6127, "step": 168260 }, { "epoch": 0.3399160461705661, "grad_norm": 410.08660888671875, "learning_rate": 8.390285249113088e-06, "loss": 17.5862, "step": 168270 }, { "epoch": 0.33993624680324985, "grad_norm": 485.7059631347656, "learning_rate": 8.390028674213072e-06, "loss": 17.1023, "step": 168280 }, { "epoch": 0.3399564474359337, "grad_norm": 242.07127380371094, "learning_rate": 8.389772082790466e-06, "loss": 15.6061, "step": 168290 }, { "epoch": 0.3399766480686175, "grad_norm": 182.19862365722656, "learning_rate": 8.389515474846522e-06, "loss": 23.0791, "step": 168300 }, { "epoch": 0.3399968487013013, "grad_norm": 96.41129302978516, "learning_rate": 8.389258850382491e-06, "loss": 16.7356, "step": 168310 }, { "epoch": 0.34001704933398513, "grad_norm": 402.6282958984375, "learning_rate": 8.389002209399625e-06, "loss": 17.2682, "step": 168320 }, { "epoch": 0.34003724996666895, "grad_norm": 400.3856506347656, "learning_rate": 8.388745551899172e-06, "loss": 14.5012, "step": 168330 }, { "epoch": 0.3400574505993528, "grad_norm": 468.0992126464844, "learning_rate": 8.388488877882383e-06, "loss": 20.7429, "step": 168340 }, { "epoch": 0.3400776512320366, "grad_norm": 342.0508117675781, "learning_rate": 8.388232187350513e-06, "loss": 21.1195, "step": 168350 }, { "epoch": 0.3400978518647204, "grad_norm": 225.8852996826172, "learning_rate": 8.387975480304808e-06, "loss": 20.6178, "step": 168360 }, { "epoch": 0.34011805249740423, "grad_norm": 150.79534912109375, "learning_rate": 8.387718756746522e-06, "loss": 25.234, "step": 168370 }, { "epoch": 0.34013825313008805, "grad_norm": 692.2120361328125, "learning_rate": 8.387462016676906e-06, "loss": 25.3535, "step": 168380 }, { "epoch": 0.3401584537627719, "grad_norm": 92.25536346435547, "learning_rate": 8.387205260097211e-06, "loss": 27.1206, "step": 168390 }, { "epoch": 0.3401786543954557, "grad_norm": 384.4954833984375, "learning_rate": 8.386948487008687e-06, "loss": 22.4686, "step": 168400 }, { "epoch": 0.34019885502813946, "grad_norm": 314.7448425292969, "learning_rate": 8.386691697412588e-06, "loss": 20.4522, "step": 168410 }, { "epoch": 0.3402190556608233, "grad_norm": 144.3899688720703, "learning_rate": 8.386434891310164e-06, "loss": 24.6264, "step": 168420 }, { "epoch": 0.3402392562935071, "grad_norm": 385.3217468261719, "learning_rate": 8.386178068702669e-06, "loss": 24.4465, "step": 168430 }, { "epoch": 0.3402594569261909, "grad_norm": 705.103759765625, "learning_rate": 8.385921229591351e-06, "loss": 17.2236, "step": 168440 }, { "epoch": 0.34027965755887474, "grad_norm": 669.4501953125, "learning_rate": 8.385664373977462e-06, "loss": 20.0496, "step": 168450 }, { "epoch": 0.34029985819155856, "grad_norm": 634.993408203125, "learning_rate": 8.385407501862258e-06, "loss": 24.514, "step": 168460 }, { "epoch": 0.3403200588242424, "grad_norm": 399.2812194824219, "learning_rate": 8.385150613246989e-06, "loss": 10.7176, "step": 168470 }, { "epoch": 0.3403402594569262, "grad_norm": 661.9140625, "learning_rate": 8.384893708132904e-06, "loss": 19.4782, "step": 168480 }, { "epoch": 0.34036046008961, "grad_norm": 349.7503967285156, "learning_rate": 8.384636786521259e-06, "loss": 18.4263, "step": 168490 }, { "epoch": 0.34038066072229384, "grad_norm": 115.83380889892578, "learning_rate": 8.384379848413304e-06, "loss": 17.163, "step": 168500 }, { "epoch": 0.34040086135497766, "grad_norm": 324.6518859863281, "learning_rate": 8.384122893810294e-06, "loss": 15.6572, "step": 168510 }, { "epoch": 0.3404210619876615, "grad_norm": 328.8683776855469, "learning_rate": 8.383865922713478e-06, "loss": 49.4089, "step": 168520 }, { "epoch": 0.34044126262034524, "grad_norm": 631.0416870117188, "learning_rate": 8.383608935124109e-06, "loss": 18.9201, "step": 168530 }, { "epoch": 0.34046146325302906, "grad_norm": 533.4650268554688, "learning_rate": 8.383351931043441e-06, "loss": 16.3636, "step": 168540 }, { "epoch": 0.3404816638857129, "grad_norm": 6.021905899047852, "learning_rate": 8.383094910472728e-06, "loss": 12.5463, "step": 168550 }, { "epoch": 0.3405018645183967, "grad_norm": 1174.9256591796875, "learning_rate": 8.38283787341322e-06, "loss": 25.0581, "step": 168560 }, { "epoch": 0.3405220651510805, "grad_norm": 306.4240417480469, "learning_rate": 8.382580819866168e-06, "loss": 12.3355, "step": 168570 }, { "epoch": 0.34054226578376434, "grad_norm": 356.0873107910156, "learning_rate": 8.38232374983283e-06, "loss": 35.7938, "step": 168580 }, { "epoch": 0.34056246641644816, "grad_norm": 685.3045654296875, "learning_rate": 8.382066663314455e-06, "loss": 43.9767, "step": 168590 }, { "epoch": 0.340582667049132, "grad_norm": 644.2220458984375, "learning_rate": 8.381809560312298e-06, "loss": 29.4125, "step": 168600 }, { "epoch": 0.3406028676818158, "grad_norm": 187.02745056152344, "learning_rate": 8.381552440827611e-06, "loss": 31.9015, "step": 168610 }, { "epoch": 0.3406230683144996, "grad_norm": 68.95345306396484, "learning_rate": 8.381295304861647e-06, "loss": 33.2846, "step": 168620 }, { "epoch": 0.34064326894718344, "grad_norm": 502.47271728515625, "learning_rate": 8.381038152415661e-06, "loss": 29.9597, "step": 168630 }, { "epoch": 0.34066346957986726, "grad_norm": 481.96142578125, "learning_rate": 8.380780983490903e-06, "loss": 24.2839, "step": 168640 }, { "epoch": 0.3406836702125511, "grad_norm": 258.9666748046875, "learning_rate": 8.38052379808863e-06, "loss": 32.6194, "step": 168650 }, { "epoch": 0.34070387084523485, "grad_norm": 320.35662841796875, "learning_rate": 8.380266596210095e-06, "loss": 32.8204, "step": 168660 }, { "epoch": 0.34072407147791867, "grad_norm": 619.9447631835938, "learning_rate": 8.380009377856548e-06, "loss": 21.3143, "step": 168670 }, { "epoch": 0.3407442721106025, "grad_norm": 626.5851440429688, "learning_rate": 8.379752143029248e-06, "loss": 21.0067, "step": 168680 }, { "epoch": 0.3407644727432863, "grad_norm": 4.577370643615723, "learning_rate": 8.379494891729445e-06, "loss": 26.3092, "step": 168690 }, { "epoch": 0.3407846733759701, "grad_norm": 627.0779418945312, "learning_rate": 8.379237623958393e-06, "loss": 19.199, "step": 168700 }, { "epoch": 0.34080487400865395, "grad_norm": 334.8100891113281, "learning_rate": 8.378980339717348e-06, "loss": 19.6408, "step": 168710 }, { "epoch": 0.34082507464133777, "grad_norm": 706.650634765625, "learning_rate": 8.37872303900756e-06, "loss": 31.1062, "step": 168720 }, { "epoch": 0.3408452752740216, "grad_norm": 265.525390625, "learning_rate": 8.378465721830289e-06, "loss": 16.0674, "step": 168730 }, { "epoch": 0.3408654759067054, "grad_norm": 197.72540283203125, "learning_rate": 8.378208388186784e-06, "loss": 50.1339, "step": 168740 }, { "epoch": 0.3408856765393892, "grad_norm": 260.6598815917969, "learning_rate": 8.377951038078303e-06, "loss": 34.7109, "step": 168750 }, { "epoch": 0.34090587717207305, "grad_norm": 24.389583587646484, "learning_rate": 8.377693671506094e-06, "loss": 14.9343, "step": 168760 }, { "epoch": 0.34092607780475687, "grad_norm": 325.55029296875, "learning_rate": 8.37743628847142e-06, "loss": 22.3212, "step": 168770 }, { "epoch": 0.3409462784374407, "grad_norm": 335.56439208984375, "learning_rate": 8.37717888897553e-06, "loss": 16.7277, "step": 168780 }, { "epoch": 0.34096647907012445, "grad_norm": 328.0765075683594, "learning_rate": 8.37692147301968e-06, "loss": 15.7982, "step": 168790 }, { "epoch": 0.34098667970280827, "grad_norm": 614.3564453125, "learning_rate": 8.376664040605122e-06, "loss": 21.111, "step": 168800 }, { "epoch": 0.3410068803354921, "grad_norm": 224.3957061767578, "learning_rate": 8.376406591733115e-06, "loss": 18.3861, "step": 168810 }, { "epoch": 0.3410270809681759, "grad_norm": 324.8829650878906, "learning_rate": 8.37614912640491e-06, "loss": 32.3365, "step": 168820 }, { "epoch": 0.34104728160085973, "grad_norm": 411.1203918457031, "learning_rate": 8.375891644621767e-06, "loss": 11.3225, "step": 168830 }, { "epoch": 0.34106748223354355, "grad_norm": 236.78245544433594, "learning_rate": 8.375634146384937e-06, "loss": 15.5481, "step": 168840 }, { "epoch": 0.34108768286622737, "grad_norm": 6.189517498016357, "learning_rate": 8.375376631695673e-06, "loss": 17.8311, "step": 168850 }, { "epoch": 0.3411078834989112, "grad_norm": 467.2071228027344, "learning_rate": 8.375119100555234e-06, "loss": 17.9199, "step": 168860 }, { "epoch": 0.341128084131595, "grad_norm": 391.2217712402344, "learning_rate": 8.374861552964875e-06, "loss": 16.9528, "step": 168870 }, { "epoch": 0.34114828476427883, "grad_norm": 188.17681884765625, "learning_rate": 8.374603988925848e-06, "loss": 15.361, "step": 168880 }, { "epoch": 0.34116848539696265, "grad_norm": 599.74365234375, "learning_rate": 8.37434640843941e-06, "loss": 15.0419, "step": 168890 }, { "epoch": 0.34118868602964647, "grad_norm": 324.143798828125, "learning_rate": 8.374088811506819e-06, "loss": 23.422, "step": 168900 }, { "epoch": 0.3412088866623303, "grad_norm": 772.922607421875, "learning_rate": 8.373831198129327e-06, "loss": 15.1421, "step": 168910 }, { "epoch": 0.34122908729501406, "grad_norm": 435.1819152832031, "learning_rate": 8.373573568308193e-06, "loss": 24.3665, "step": 168920 }, { "epoch": 0.3412492879276979, "grad_norm": 1819.421630859375, "learning_rate": 8.37331592204467e-06, "loss": 31.2344, "step": 168930 }, { "epoch": 0.3412694885603817, "grad_norm": 21.15869140625, "learning_rate": 8.373058259340012e-06, "loss": 27.9389, "step": 168940 }, { "epoch": 0.3412896891930655, "grad_norm": 523.2466430664062, "learning_rate": 8.372800580195479e-06, "loss": 23.8264, "step": 168950 }, { "epoch": 0.34130988982574934, "grad_norm": 307.0376892089844, "learning_rate": 8.372542884612324e-06, "loss": 17.7892, "step": 168960 }, { "epoch": 0.34133009045843316, "grad_norm": 209.58401489257812, "learning_rate": 8.372285172591806e-06, "loss": 24.8336, "step": 168970 }, { "epoch": 0.341350291091117, "grad_norm": 416.9378662109375, "learning_rate": 8.372027444135176e-06, "loss": 25.3283, "step": 168980 }, { "epoch": 0.3413704917238008, "grad_norm": 66.7175064086914, "learning_rate": 8.371769699243694e-06, "loss": 14.9726, "step": 168990 }, { "epoch": 0.3413906923564846, "grad_norm": 1017.1961059570312, "learning_rate": 8.371511937918616e-06, "loss": 26.7788, "step": 169000 }, { "epoch": 0.34141089298916844, "grad_norm": 16.84259033203125, "learning_rate": 8.3712541601612e-06, "loss": 9.0483, "step": 169010 }, { "epoch": 0.34143109362185226, "grad_norm": 278.5416259765625, "learning_rate": 8.370996365972698e-06, "loss": 19.8899, "step": 169020 }, { "epoch": 0.3414512942545361, "grad_norm": 206.6166534423828, "learning_rate": 8.37073855535437e-06, "loss": 19.9386, "step": 169030 }, { "epoch": 0.3414714948872199, "grad_norm": 321.22039794921875, "learning_rate": 8.370480728307469e-06, "loss": 21.6634, "step": 169040 }, { "epoch": 0.34149169551990366, "grad_norm": 167.9938201904297, "learning_rate": 8.370222884833255e-06, "loss": 13.9485, "step": 169050 }, { "epoch": 0.3415118961525875, "grad_norm": 753.5398559570312, "learning_rate": 8.369965024932983e-06, "loss": 33.6936, "step": 169060 }, { "epoch": 0.3415320967852713, "grad_norm": 22.527313232421875, "learning_rate": 8.36970714860791e-06, "loss": 20.0193, "step": 169070 }, { "epoch": 0.3415522974179551, "grad_norm": 152.59750366210938, "learning_rate": 8.369449255859294e-06, "loss": 26.9117, "step": 169080 }, { "epoch": 0.34157249805063894, "grad_norm": 450.9139099121094, "learning_rate": 8.36919134668839e-06, "loss": 9.5488, "step": 169090 }, { "epoch": 0.34159269868332276, "grad_norm": 136.21226501464844, "learning_rate": 8.368933421096454e-06, "loss": 27.497, "step": 169100 }, { "epoch": 0.3416128993160066, "grad_norm": 655.6229858398438, "learning_rate": 8.368675479084749e-06, "loss": 37.529, "step": 169110 }, { "epoch": 0.3416330999486904, "grad_norm": 111.01313018798828, "learning_rate": 8.368417520654526e-06, "loss": 15.1597, "step": 169120 }, { "epoch": 0.3416533005813742, "grad_norm": 258.2264404296875, "learning_rate": 8.368159545807047e-06, "loss": 21.8688, "step": 169130 }, { "epoch": 0.34167350121405804, "grad_norm": 248.80821228027344, "learning_rate": 8.367901554543563e-06, "loss": 14.1267, "step": 169140 }, { "epoch": 0.34169370184674186, "grad_norm": 0.0, "learning_rate": 8.367643546865339e-06, "loss": 16.2312, "step": 169150 }, { "epoch": 0.3417139024794257, "grad_norm": 121.87797546386719, "learning_rate": 8.367385522773625e-06, "loss": 14.3055, "step": 169160 }, { "epoch": 0.34173410311210944, "grad_norm": 494.537841796875, "learning_rate": 8.367127482269686e-06, "loss": 20.4748, "step": 169170 }, { "epoch": 0.34175430374479326, "grad_norm": 254.8951873779297, "learning_rate": 8.366869425354774e-06, "loss": 12.5494, "step": 169180 }, { "epoch": 0.3417745043774771, "grad_norm": 348.20233154296875, "learning_rate": 8.36661135203015e-06, "loss": 12.4797, "step": 169190 }, { "epoch": 0.3417947050101609, "grad_norm": 871.5491333007812, "learning_rate": 8.366353262297069e-06, "loss": 29.6144, "step": 169200 }, { "epoch": 0.3418149056428447, "grad_norm": 497.8019104003906, "learning_rate": 8.366095156156793e-06, "loss": 22.8318, "step": 169210 }, { "epoch": 0.34183510627552854, "grad_norm": 623.549072265625, "learning_rate": 8.365837033610576e-06, "loss": 38.8452, "step": 169220 }, { "epoch": 0.34185530690821236, "grad_norm": 638.8836669921875, "learning_rate": 8.365578894659677e-06, "loss": 14.1556, "step": 169230 }, { "epoch": 0.3418755075408962, "grad_norm": 375.0995178222656, "learning_rate": 8.365320739305355e-06, "loss": 19.7537, "step": 169240 }, { "epoch": 0.34189570817358, "grad_norm": 932.085205078125, "learning_rate": 8.365062567548868e-06, "loss": 26.9566, "step": 169250 }, { "epoch": 0.3419159088062638, "grad_norm": 45.37674331665039, "learning_rate": 8.364804379391474e-06, "loss": 33.6483, "step": 169260 }, { "epoch": 0.34193610943894764, "grad_norm": 479.5264892578125, "learning_rate": 8.364546174834431e-06, "loss": 36.6652, "step": 169270 }, { "epoch": 0.34195631007163146, "grad_norm": 696.8583374023438, "learning_rate": 8.364287953879e-06, "loss": 18.2568, "step": 169280 }, { "epoch": 0.3419765107043153, "grad_norm": 393.2134094238281, "learning_rate": 8.364029716526437e-06, "loss": 13.7931, "step": 169290 }, { "epoch": 0.34199671133699905, "grad_norm": 101.37198638916016, "learning_rate": 8.363771462778e-06, "loss": 29.9042, "step": 169300 }, { "epoch": 0.34201691196968287, "grad_norm": 689.8870239257812, "learning_rate": 8.36351319263495e-06, "loss": 22.3282, "step": 169310 }, { "epoch": 0.3420371126023667, "grad_norm": 436.01104736328125, "learning_rate": 8.363254906098543e-06, "loss": 24.0667, "step": 169320 }, { "epoch": 0.3420573132350505, "grad_norm": 552.0741577148438, "learning_rate": 8.36299660317004e-06, "loss": 20.0332, "step": 169330 }, { "epoch": 0.34207751386773433, "grad_norm": 387.7598876953125, "learning_rate": 8.3627382838507e-06, "loss": 28.4696, "step": 169340 }, { "epoch": 0.34209771450041815, "grad_norm": 443.9676818847656, "learning_rate": 8.36247994814178e-06, "loss": 14.1799, "step": 169350 }, { "epoch": 0.34211791513310197, "grad_norm": 539.8017578125, "learning_rate": 8.362221596044542e-06, "loss": 24.8048, "step": 169360 }, { "epoch": 0.3421381157657858, "grad_norm": 235.52764892578125, "learning_rate": 8.361963227560244e-06, "loss": 23.0377, "step": 169370 }, { "epoch": 0.3421583163984696, "grad_norm": 80.1401596069336, "learning_rate": 8.361704842690144e-06, "loss": 16.5207, "step": 169380 }, { "epoch": 0.34217851703115343, "grad_norm": 561.2789916992188, "learning_rate": 8.361446441435503e-06, "loss": 26.294, "step": 169390 }, { "epoch": 0.34219871766383725, "grad_norm": 302.1050720214844, "learning_rate": 8.361188023797581e-06, "loss": 19.0365, "step": 169400 }, { "epoch": 0.34221891829652107, "grad_norm": 692.5919799804688, "learning_rate": 8.360929589777634e-06, "loss": 29.2574, "step": 169410 }, { "epoch": 0.3422391189292049, "grad_norm": 690.12646484375, "learning_rate": 8.360671139376925e-06, "loss": 13.6711, "step": 169420 }, { "epoch": 0.34225931956188865, "grad_norm": 266.81707763671875, "learning_rate": 8.360412672596712e-06, "loss": 20.4914, "step": 169430 }, { "epoch": 0.3422795201945725, "grad_norm": 605.1412353515625, "learning_rate": 8.360154189438257e-06, "loss": 18.8189, "step": 169440 }, { "epoch": 0.3422997208272563, "grad_norm": 182.8216094970703, "learning_rate": 8.359895689902815e-06, "loss": 22.1485, "step": 169450 }, { "epoch": 0.3423199214599401, "grad_norm": 551.4183959960938, "learning_rate": 8.35963717399165e-06, "loss": 15.8574, "step": 169460 }, { "epoch": 0.34234012209262393, "grad_norm": 714.9756469726562, "learning_rate": 8.359378641706021e-06, "loss": 23.5323, "step": 169470 }, { "epoch": 0.34236032272530775, "grad_norm": 460.0231018066406, "learning_rate": 8.35912009304719e-06, "loss": 19.5928, "step": 169480 }, { "epoch": 0.3423805233579916, "grad_norm": 262.87603759765625, "learning_rate": 8.358861528016413e-06, "loss": 35.9306, "step": 169490 }, { "epoch": 0.3424007239906754, "grad_norm": 171.7864990234375, "learning_rate": 8.358602946614952e-06, "loss": 24.3828, "step": 169500 }, { "epoch": 0.3424209246233592, "grad_norm": 451.3740234375, "learning_rate": 8.358344348844068e-06, "loss": 18.9796, "step": 169510 }, { "epoch": 0.34244112525604303, "grad_norm": 72.87115478515625, "learning_rate": 8.358085734705021e-06, "loss": 18.0904, "step": 169520 }, { "epoch": 0.34246132588872685, "grad_norm": 197.48690795898438, "learning_rate": 8.357827104199073e-06, "loss": 18.9026, "step": 169530 }, { "epoch": 0.3424815265214107, "grad_norm": 776.4761962890625, "learning_rate": 8.357568457327481e-06, "loss": 26.4053, "step": 169540 }, { "epoch": 0.3425017271540945, "grad_norm": 186.80355834960938, "learning_rate": 8.357309794091508e-06, "loss": 21.6301, "step": 169550 }, { "epoch": 0.34252192778677826, "grad_norm": 293.9378662109375, "learning_rate": 8.357051114492414e-06, "loss": 10.8797, "step": 169560 }, { "epoch": 0.3425421284194621, "grad_norm": 304.8777160644531, "learning_rate": 8.35679241853146e-06, "loss": 15.5547, "step": 169570 }, { "epoch": 0.3425623290521459, "grad_norm": 469.260498046875, "learning_rate": 8.356533706209907e-06, "loss": 18.2517, "step": 169580 }, { "epoch": 0.3425825296848297, "grad_norm": 459.9218444824219, "learning_rate": 8.356274977529015e-06, "loss": 21.3798, "step": 169590 }, { "epoch": 0.34260273031751354, "grad_norm": 429.17529296875, "learning_rate": 8.356016232490047e-06, "loss": 27.0815, "step": 169600 }, { "epoch": 0.34262293095019736, "grad_norm": 239.51206970214844, "learning_rate": 8.355757471094263e-06, "loss": 13.7177, "step": 169610 }, { "epoch": 0.3426431315828812, "grad_norm": 382.5251159667969, "learning_rate": 8.355498693342925e-06, "loss": 25.9149, "step": 169620 }, { "epoch": 0.342663332215565, "grad_norm": 422.1537780761719, "learning_rate": 8.355239899237291e-06, "loss": 24.0822, "step": 169630 }, { "epoch": 0.3426835328482488, "grad_norm": 0.0, "learning_rate": 8.354981088778626e-06, "loss": 15.7907, "step": 169640 }, { "epoch": 0.34270373348093264, "grad_norm": 763.3910522460938, "learning_rate": 8.35472226196819e-06, "loss": 22.5954, "step": 169650 }, { "epoch": 0.34272393411361646, "grad_norm": 506.7947998046875, "learning_rate": 8.354463418807245e-06, "loss": 17.1152, "step": 169660 }, { "epoch": 0.3427441347463003, "grad_norm": 408.3777770996094, "learning_rate": 8.35420455929705e-06, "loss": 14.6228, "step": 169670 }, { "epoch": 0.3427643353789841, "grad_norm": 161.3364715576172, "learning_rate": 8.35394568343887e-06, "loss": 24.6751, "step": 169680 }, { "epoch": 0.34278453601166786, "grad_norm": 251.61575317382812, "learning_rate": 8.353686791233969e-06, "loss": 15.8552, "step": 169690 }, { "epoch": 0.3428047366443517, "grad_norm": 247.20155334472656, "learning_rate": 8.353427882683601e-06, "loss": 20.0482, "step": 169700 }, { "epoch": 0.3428249372770355, "grad_norm": 528.7928466796875, "learning_rate": 8.353168957789033e-06, "loss": 27.7193, "step": 169710 }, { "epoch": 0.3428451379097193, "grad_norm": 566.47705078125, "learning_rate": 8.352910016551527e-06, "loss": 28.6182, "step": 169720 }, { "epoch": 0.34286533854240314, "grad_norm": 401.88116455078125, "learning_rate": 8.352651058972344e-06, "loss": 25.6401, "step": 169730 }, { "epoch": 0.34288553917508696, "grad_norm": 815.401611328125, "learning_rate": 8.352392085052748e-06, "loss": 23.015, "step": 169740 }, { "epoch": 0.3429057398077708, "grad_norm": 188.4127197265625, "learning_rate": 8.352133094793996e-06, "loss": 25.489, "step": 169750 }, { "epoch": 0.3429259404404546, "grad_norm": 434.46868896484375, "learning_rate": 8.351874088197356e-06, "loss": 14.7717, "step": 169760 }, { "epoch": 0.3429461410731384, "grad_norm": 288.19000244140625, "learning_rate": 8.351615065264088e-06, "loss": 28.5178, "step": 169770 }, { "epoch": 0.34296634170582224, "grad_norm": 353.88775634765625, "learning_rate": 8.351356025995454e-06, "loss": 21.3087, "step": 169780 }, { "epoch": 0.34298654233850606, "grad_norm": 359.5602111816406, "learning_rate": 8.351096970392718e-06, "loss": 20.6816, "step": 169790 }, { "epoch": 0.3430067429711899, "grad_norm": 336.394775390625, "learning_rate": 8.350837898457142e-06, "loss": 32.5338, "step": 169800 }, { "epoch": 0.34302694360387365, "grad_norm": 65.51998901367188, "learning_rate": 8.350578810189988e-06, "loss": 18.3302, "step": 169810 }, { "epoch": 0.34304714423655747, "grad_norm": 410.8455505371094, "learning_rate": 8.35031970559252e-06, "loss": 14.3117, "step": 169820 }, { "epoch": 0.3430673448692413, "grad_norm": 801.1245727539062, "learning_rate": 8.350060584666e-06, "loss": 27.8058, "step": 169830 }, { "epoch": 0.3430875455019251, "grad_norm": 637.9276123046875, "learning_rate": 8.34980144741169e-06, "loss": 26.9451, "step": 169840 }, { "epoch": 0.3431077461346089, "grad_norm": 320.0799560546875, "learning_rate": 8.349542293830854e-06, "loss": 20.0242, "step": 169850 }, { "epoch": 0.34312794676729275, "grad_norm": 358.5277099609375, "learning_rate": 8.349283123924756e-06, "loss": 22.6337, "step": 169860 }, { "epoch": 0.34314814739997657, "grad_norm": 101.61077117919922, "learning_rate": 8.349023937694658e-06, "loss": 23.6921, "step": 169870 }, { "epoch": 0.3431683480326604, "grad_norm": 416.40826416015625, "learning_rate": 8.348764735141823e-06, "loss": 22.7895, "step": 169880 }, { "epoch": 0.3431885486653442, "grad_norm": 31.98366928100586, "learning_rate": 8.348505516267515e-06, "loss": 39.1289, "step": 169890 }, { "epoch": 0.343208749298028, "grad_norm": 885.4635620117188, "learning_rate": 8.348246281072998e-06, "loss": 34.4323, "step": 169900 }, { "epoch": 0.34322894993071185, "grad_norm": 2524.4736328125, "learning_rate": 8.347987029559534e-06, "loss": 46.8328, "step": 169910 }, { "epoch": 0.34324915056339567, "grad_norm": 428.1924743652344, "learning_rate": 8.347727761728388e-06, "loss": 16.0055, "step": 169920 }, { "epoch": 0.3432693511960795, "grad_norm": 1.726176381111145, "learning_rate": 8.347468477580822e-06, "loss": 13.6668, "step": 169930 }, { "epoch": 0.34328955182876325, "grad_norm": 56.078792572021484, "learning_rate": 8.347209177118101e-06, "loss": 16.26, "step": 169940 }, { "epoch": 0.34330975246144707, "grad_norm": 474.4909362792969, "learning_rate": 8.346949860341489e-06, "loss": 22.5801, "step": 169950 }, { "epoch": 0.3433299530941309, "grad_norm": 316.09130859375, "learning_rate": 8.34669052725225e-06, "loss": 14.1333, "step": 169960 }, { "epoch": 0.3433501537268147, "grad_norm": 513.1192626953125, "learning_rate": 8.346431177851645e-06, "loss": 15.1452, "step": 169970 }, { "epoch": 0.34337035435949853, "grad_norm": 257.24407958984375, "learning_rate": 8.346171812140942e-06, "loss": 11.9185, "step": 169980 }, { "epoch": 0.34339055499218235, "grad_norm": 700.6121215820312, "learning_rate": 8.345912430121403e-06, "loss": 35.5041, "step": 169990 }, { "epoch": 0.34341075562486617, "grad_norm": 217.2509002685547, "learning_rate": 8.345653031794292e-06, "loss": 15.1854, "step": 170000 }, { "epoch": 0.34343095625755, "grad_norm": 188.48707580566406, "learning_rate": 8.345393617160876e-06, "loss": 17.3443, "step": 170010 }, { "epoch": 0.3434511568902338, "grad_norm": 315.58544921875, "learning_rate": 8.345134186222415e-06, "loss": 30.7932, "step": 170020 }, { "epoch": 0.34347135752291763, "grad_norm": 203.83631896972656, "learning_rate": 8.344874738980175e-06, "loss": 21.2119, "step": 170030 }, { "epoch": 0.34349155815560145, "grad_norm": 211.39915466308594, "learning_rate": 8.344615275435423e-06, "loss": 31.5429, "step": 170040 }, { "epoch": 0.34351175878828527, "grad_norm": 938.8517456054688, "learning_rate": 8.34435579558942e-06, "loss": 30.4447, "step": 170050 }, { "epoch": 0.3435319594209691, "grad_norm": 530.2730712890625, "learning_rate": 8.344096299443434e-06, "loss": 19.6267, "step": 170060 }, { "epoch": 0.34355216005365286, "grad_norm": 242.96951293945312, "learning_rate": 8.34383678699873e-06, "loss": 28.5493, "step": 170070 }, { "epoch": 0.3435723606863367, "grad_norm": 283.67205810546875, "learning_rate": 8.343577258256567e-06, "loss": 22.3562, "step": 170080 }, { "epoch": 0.3435925613190205, "grad_norm": 599.8084106445312, "learning_rate": 8.343317713218218e-06, "loss": 18.2178, "step": 170090 }, { "epoch": 0.3436127619517043, "grad_norm": 257.37677001953125, "learning_rate": 8.343058151884942e-06, "loss": 28.3873, "step": 170100 }, { "epoch": 0.34363296258438814, "grad_norm": 1036.1143798828125, "learning_rate": 8.342798574258005e-06, "loss": 40.1173, "step": 170110 }, { "epoch": 0.34365316321707196, "grad_norm": 321.11163330078125, "learning_rate": 8.342538980338675e-06, "loss": 24.2756, "step": 170120 }, { "epoch": 0.3436733638497558, "grad_norm": 289.0916748046875, "learning_rate": 8.342279370128215e-06, "loss": 14.9477, "step": 170130 }, { "epoch": 0.3436935644824396, "grad_norm": 1119.5335693359375, "learning_rate": 8.34201974362789e-06, "loss": 30.5628, "step": 170140 }, { "epoch": 0.3437137651151234, "grad_norm": 419.0737609863281, "learning_rate": 8.341760100838967e-06, "loss": 12.6939, "step": 170150 }, { "epoch": 0.34373396574780724, "grad_norm": 301.77398681640625, "learning_rate": 8.341500441762708e-06, "loss": 37.2242, "step": 170160 }, { "epoch": 0.34375416638049106, "grad_norm": 295.69183349609375, "learning_rate": 8.341240766400385e-06, "loss": 19.4963, "step": 170170 }, { "epoch": 0.3437743670131749, "grad_norm": 610.6740112304688, "learning_rate": 8.340981074753258e-06, "loss": 24.2911, "step": 170180 }, { "epoch": 0.3437945676458587, "grad_norm": 407.9315185546875, "learning_rate": 8.340721366822594e-06, "loss": 14.1512, "step": 170190 }, { "epoch": 0.34381476827854246, "grad_norm": 1188.9407958984375, "learning_rate": 8.34046164260966e-06, "loss": 24.054, "step": 170200 }, { "epoch": 0.3438349689112263, "grad_norm": 221.2847900390625, "learning_rate": 8.34020190211572e-06, "loss": 26.5996, "step": 170210 }, { "epoch": 0.3438551695439101, "grad_norm": 406.8226013183594, "learning_rate": 8.33994214534204e-06, "loss": 19.9345, "step": 170220 }, { "epoch": 0.3438753701765939, "grad_norm": 574.2039794921875, "learning_rate": 8.33968237228989e-06, "loss": 18.2827, "step": 170230 }, { "epoch": 0.34389557080927774, "grad_norm": 428.4683532714844, "learning_rate": 8.339422582960533e-06, "loss": 14.6638, "step": 170240 }, { "epoch": 0.34391577144196156, "grad_norm": 1262.0887451171875, "learning_rate": 8.339162777355233e-06, "loss": 20.0347, "step": 170250 }, { "epoch": 0.3439359720746454, "grad_norm": 535.7229614257812, "learning_rate": 8.338902955475261e-06, "loss": 18.6566, "step": 170260 }, { "epoch": 0.3439561727073292, "grad_norm": 298.4888610839844, "learning_rate": 8.33864311732188e-06, "loss": 22.3802, "step": 170270 }, { "epoch": 0.343976373340013, "grad_norm": 172.71926879882812, "learning_rate": 8.338383262896357e-06, "loss": 36.8636, "step": 170280 }, { "epoch": 0.34399657397269684, "grad_norm": 452.70599365234375, "learning_rate": 8.33812339219996e-06, "loss": 9.8566, "step": 170290 }, { "epoch": 0.34401677460538066, "grad_norm": 188.11831665039062, "learning_rate": 8.337863505233954e-06, "loss": 16.6795, "step": 170300 }, { "epoch": 0.3440369752380645, "grad_norm": 320.6652526855469, "learning_rate": 8.337603601999605e-06, "loss": 19.1898, "step": 170310 }, { "epoch": 0.34405717587074824, "grad_norm": 454.8415222167969, "learning_rate": 8.337343682498181e-06, "loss": 20.4676, "step": 170320 }, { "epoch": 0.34407737650343206, "grad_norm": 394.4899597167969, "learning_rate": 8.33708374673095e-06, "loss": 16.1026, "step": 170330 }, { "epoch": 0.3440975771361159, "grad_norm": 160.0926055908203, "learning_rate": 8.336823794699177e-06, "loss": 37.1404, "step": 170340 }, { "epoch": 0.3441177777687997, "grad_norm": 891.3941650390625, "learning_rate": 8.336563826404129e-06, "loss": 15.6676, "step": 170350 }, { "epoch": 0.3441379784014835, "grad_norm": 397.9073486328125, "learning_rate": 8.336303841847073e-06, "loss": 22.0974, "step": 170360 }, { "epoch": 0.34415817903416734, "grad_norm": 361.884033203125, "learning_rate": 8.336043841029278e-06, "loss": 21.7622, "step": 170370 }, { "epoch": 0.34417837966685116, "grad_norm": 673.8883056640625, "learning_rate": 8.33578382395201e-06, "loss": 40.7657, "step": 170380 }, { "epoch": 0.344198580299535, "grad_norm": 436.1302490234375, "learning_rate": 8.335523790616536e-06, "loss": 19.4727, "step": 170390 }, { "epoch": 0.3442187809322188, "grad_norm": 349.3412780761719, "learning_rate": 8.335263741024123e-06, "loss": 20.2046, "step": 170400 }, { "epoch": 0.3442389815649026, "grad_norm": 921.329345703125, "learning_rate": 8.33500367517604e-06, "loss": 32.5087, "step": 170410 }, { "epoch": 0.34425918219758644, "grad_norm": 444.14471435546875, "learning_rate": 8.334743593073553e-06, "loss": 32.0047, "step": 170420 }, { "epoch": 0.34427938283027026, "grad_norm": 563.543212890625, "learning_rate": 8.33448349471793e-06, "loss": 10.8298, "step": 170430 }, { "epoch": 0.3442995834629541, "grad_norm": 953.4566650390625, "learning_rate": 8.334223380110438e-06, "loss": 28.8473, "step": 170440 }, { "epoch": 0.34431978409563785, "grad_norm": 500.12701416015625, "learning_rate": 8.333963249252347e-06, "loss": 27.48, "step": 170450 }, { "epoch": 0.34433998472832167, "grad_norm": 282.26861572265625, "learning_rate": 8.333703102144924e-06, "loss": 15.609, "step": 170460 }, { "epoch": 0.3443601853610055, "grad_norm": 769.2520751953125, "learning_rate": 8.333442938789435e-06, "loss": 19.0537, "step": 170470 }, { "epoch": 0.3443803859936893, "grad_norm": 1127.9569091796875, "learning_rate": 8.333182759187151e-06, "loss": 24.0776, "step": 170480 }, { "epoch": 0.34440058662637313, "grad_norm": 518.5499877929688, "learning_rate": 8.332922563339336e-06, "loss": 20.4763, "step": 170490 }, { "epoch": 0.34442078725905695, "grad_norm": 1037.2139892578125, "learning_rate": 8.332662351247262e-06, "loss": 22.9443, "step": 170500 }, { "epoch": 0.34444098789174077, "grad_norm": 362.7684326171875, "learning_rate": 8.332402122912198e-06, "loss": 21.2911, "step": 170510 }, { "epoch": 0.3444611885244246, "grad_norm": 114.61538696289062, "learning_rate": 8.332141878335407e-06, "loss": 39.4521, "step": 170520 }, { "epoch": 0.3444813891571084, "grad_norm": 157.37367248535156, "learning_rate": 8.331881617518163e-06, "loss": 13.0495, "step": 170530 }, { "epoch": 0.34450158978979223, "grad_norm": 719.7300415039062, "learning_rate": 8.331621340461731e-06, "loss": 24.5822, "step": 170540 }, { "epoch": 0.34452179042247605, "grad_norm": 991.0283813476562, "learning_rate": 8.33136104716738e-06, "loss": 35.9608, "step": 170550 }, { "epoch": 0.34454199105515987, "grad_norm": 425.6492004394531, "learning_rate": 8.331100737636381e-06, "loss": 22.3316, "step": 170560 }, { "epoch": 0.3445621916878437, "grad_norm": 193.59051513671875, "learning_rate": 8.330840411869999e-06, "loss": 21.6656, "step": 170570 }, { "epoch": 0.34458239232052745, "grad_norm": 160.44122314453125, "learning_rate": 8.330580069869506e-06, "loss": 12.0201, "step": 170580 }, { "epoch": 0.3446025929532113, "grad_norm": 287.67901611328125, "learning_rate": 8.33031971163617e-06, "loss": 20.8663, "step": 170590 }, { "epoch": 0.3446227935858951, "grad_norm": 404.4389343261719, "learning_rate": 8.33005933717126e-06, "loss": 17.7059, "step": 170600 }, { "epoch": 0.3446429942185789, "grad_norm": 782.0043334960938, "learning_rate": 8.329798946476042e-06, "loss": 24.3673, "step": 170610 }, { "epoch": 0.34466319485126273, "grad_norm": 375.68548583984375, "learning_rate": 8.329538539551791e-06, "loss": 37.0092, "step": 170620 }, { "epoch": 0.34468339548394655, "grad_norm": 1607.6585693359375, "learning_rate": 8.32927811639977e-06, "loss": 26.7318, "step": 170630 }, { "epoch": 0.3447035961166304, "grad_norm": 743.4594116210938, "learning_rate": 8.329017677021254e-06, "loss": 14.8951, "step": 170640 }, { "epoch": 0.3447237967493142, "grad_norm": 220.3754119873047, "learning_rate": 8.328757221417507e-06, "loss": 13.5273, "step": 170650 }, { "epoch": 0.344743997381998, "grad_norm": 1711.363525390625, "learning_rate": 8.328496749589803e-06, "loss": 44.6478, "step": 170660 }, { "epoch": 0.34476419801468183, "grad_norm": 283.867919921875, "learning_rate": 8.328236261539411e-06, "loss": 21.2721, "step": 170670 }, { "epoch": 0.34478439864736565, "grad_norm": 352.4759826660156, "learning_rate": 8.327975757267596e-06, "loss": 18.2324, "step": 170680 }, { "epoch": 0.3448045992800495, "grad_norm": 143.2894287109375, "learning_rate": 8.327715236775634e-06, "loss": 11.3667, "step": 170690 }, { "epoch": 0.3448247999127333, "grad_norm": 104.88064575195312, "learning_rate": 8.327454700064788e-06, "loss": 13.2799, "step": 170700 }, { "epoch": 0.34484500054541706, "grad_norm": 194.11032104492188, "learning_rate": 8.327194147136332e-06, "loss": 34.7384, "step": 170710 }, { "epoch": 0.3448652011781009, "grad_norm": 1422.937744140625, "learning_rate": 8.326933577991536e-06, "loss": 31.012, "step": 170720 }, { "epoch": 0.3448854018107847, "grad_norm": 322.70208740234375, "learning_rate": 8.326672992631671e-06, "loss": 28.5852, "step": 170730 }, { "epoch": 0.3449056024434685, "grad_norm": 255.77743530273438, "learning_rate": 8.326412391058003e-06, "loss": 16.4344, "step": 170740 }, { "epoch": 0.34492580307615234, "grad_norm": 208.72271728515625, "learning_rate": 8.326151773271805e-06, "loss": 21.4215, "step": 170750 }, { "epoch": 0.34494600370883616, "grad_norm": 688.43896484375, "learning_rate": 8.325891139274348e-06, "loss": 25.3622, "step": 170760 }, { "epoch": 0.34496620434152, "grad_norm": 878.5773315429688, "learning_rate": 8.325630489066899e-06, "loss": 29.5908, "step": 170770 }, { "epoch": 0.3449864049742038, "grad_norm": 315.8194580078125, "learning_rate": 8.325369822650731e-06, "loss": 18.6706, "step": 170780 }, { "epoch": 0.3450066056068876, "grad_norm": 331.92791748046875, "learning_rate": 8.325109140027115e-06, "loss": 21.8603, "step": 170790 }, { "epoch": 0.34502680623957144, "grad_norm": 233.86302185058594, "learning_rate": 8.324848441197317e-06, "loss": 24.5369, "step": 170800 }, { "epoch": 0.34504700687225526, "grad_norm": 94.36721801757812, "learning_rate": 8.324587726162614e-06, "loss": 17.6402, "step": 170810 }, { "epoch": 0.3450672075049391, "grad_norm": 102.23753356933594, "learning_rate": 8.324326994924272e-06, "loss": 18.1454, "step": 170820 }, { "epoch": 0.3450874081376229, "grad_norm": 831.04296875, "learning_rate": 8.324066247483565e-06, "loss": 14.6294, "step": 170830 }, { "epoch": 0.34510760877030666, "grad_norm": 124.45799255371094, "learning_rate": 8.323805483841762e-06, "loss": 28.6171, "step": 170840 }, { "epoch": 0.3451278094029905, "grad_norm": 201.7651824951172, "learning_rate": 8.323544704000134e-06, "loss": 20.1691, "step": 170850 }, { "epoch": 0.3451480100356743, "grad_norm": 556.4908447265625, "learning_rate": 8.323283907959952e-06, "loss": 12.2531, "step": 170860 }, { "epoch": 0.3451682106683581, "grad_norm": 426.6864929199219, "learning_rate": 8.323023095722486e-06, "loss": 14.0918, "step": 170870 }, { "epoch": 0.34518841130104194, "grad_norm": 429.3362121582031, "learning_rate": 8.32276226728901e-06, "loss": 25.1534, "step": 170880 }, { "epoch": 0.34520861193372576, "grad_norm": 325.4744567871094, "learning_rate": 8.322501422660794e-06, "loss": 19.3162, "step": 170890 }, { "epoch": 0.3452288125664096, "grad_norm": 2206.974853515625, "learning_rate": 8.32224056183911e-06, "loss": 53.0366, "step": 170900 }, { "epoch": 0.3452490131990934, "grad_norm": 367.0641784667969, "learning_rate": 8.321979684825225e-06, "loss": 21.5768, "step": 170910 }, { "epoch": 0.3452692138317772, "grad_norm": 653.0476684570312, "learning_rate": 8.321718791620417e-06, "loss": 22.0793, "step": 170920 }, { "epoch": 0.34528941446446104, "grad_norm": 646.60888671875, "learning_rate": 8.321457882225952e-06, "loss": 9.5773, "step": 170930 }, { "epoch": 0.34530961509714486, "grad_norm": 224.6949462890625, "learning_rate": 8.321196956643107e-06, "loss": 29.1476, "step": 170940 }, { "epoch": 0.3453298157298287, "grad_norm": 324.59033203125, "learning_rate": 8.320936014873148e-06, "loss": 32.6603, "step": 170950 }, { "epoch": 0.34535001636251245, "grad_norm": 179.57569885253906, "learning_rate": 8.320675056917353e-06, "loss": 20.4256, "step": 170960 }, { "epoch": 0.34537021699519627, "grad_norm": 334.9712219238281, "learning_rate": 8.320414082776987e-06, "loss": 24.2263, "step": 170970 }, { "epoch": 0.3453904176278801, "grad_norm": 157.3404998779297, "learning_rate": 8.320153092453326e-06, "loss": 22.3718, "step": 170980 }, { "epoch": 0.3454106182605639, "grad_norm": 392.4839782714844, "learning_rate": 8.319892085947643e-06, "loss": 19.561, "step": 170990 }, { "epoch": 0.3454308188932477, "grad_norm": 309.3358154296875, "learning_rate": 8.319631063261209e-06, "loss": 27.3685, "step": 171000 }, { "epoch": 0.34545101952593155, "grad_norm": 568.41552734375, "learning_rate": 8.319370024395294e-06, "loss": 20.216, "step": 171010 }, { "epoch": 0.34547122015861537, "grad_norm": 484.2926025390625, "learning_rate": 8.319108969351173e-06, "loss": 27.3912, "step": 171020 }, { "epoch": 0.3454914207912992, "grad_norm": 308.9615783691406, "learning_rate": 8.318847898130118e-06, "loss": 34.1446, "step": 171030 }, { "epoch": 0.345511621423983, "grad_norm": 527.2250366210938, "learning_rate": 8.318586810733401e-06, "loss": 16.5062, "step": 171040 }, { "epoch": 0.3455318220566668, "grad_norm": 219.6500244140625, "learning_rate": 8.318325707162293e-06, "loss": 10.3177, "step": 171050 }, { "epoch": 0.34555202268935065, "grad_norm": 161.37271118164062, "learning_rate": 8.318064587418068e-06, "loss": 25.4439, "step": 171060 }, { "epoch": 0.34557222332203447, "grad_norm": 203.05517578125, "learning_rate": 8.317803451502e-06, "loss": 12.2113, "step": 171070 }, { "epoch": 0.3455924239547183, "grad_norm": 692.4461669921875, "learning_rate": 8.31754229941536e-06, "loss": 30.0388, "step": 171080 }, { "epoch": 0.34561262458740205, "grad_norm": 449.1273193359375, "learning_rate": 8.31728113115942e-06, "loss": 21.1349, "step": 171090 }, { "epoch": 0.34563282522008587, "grad_norm": 533.189453125, "learning_rate": 8.317019946735456e-06, "loss": 36.1625, "step": 171100 }, { "epoch": 0.3456530258527697, "grad_norm": 79.7922134399414, "learning_rate": 8.316758746144738e-06, "loss": 17.4875, "step": 171110 }, { "epoch": 0.3456732264854535, "grad_norm": 1937.9715576171875, "learning_rate": 8.31649752938854e-06, "loss": 34.8884, "step": 171120 }, { "epoch": 0.34569342711813733, "grad_norm": 181.68194580078125, "learning_rate": 8.316236296468135e-06, "loss": 24.8936, "step": 171130 }, { "epoch": 0.34571362775082115, "grad_norm": 318.2300720214844, "learning_rate": 8.315975047384798e-06, "loss": 16.2185, "step": 171140 }, { "epoch": 0.34573382838350497, "grad_norm": 716.7720947265625, "learning_rate": 8.315713782139801e-06, "loss": 17.966, "step": 171150 }, { "epoch": 0.3457540290161888, "grad_norm": 359.93780517578125, "learning_rate": 8.315452500734415e-06, "loss": 21.9639, "step": 171160 }, { "epoch": 0.3457742296488726, "grad_norm": 214.55775451660156, "learning_rate": 8.315191203169917e-06, "loss": 26.7262, "step": 171170 }, { "epoch": 0.34579443028155643, "grad_norm": 478.1524353027344, "learning_rate": 8.314929889447578e-06, "loss": 18.5229, "step": 171180 }, { "epoch": 0.34581463091424025, "grad_norm": 901.6751098632812, "learning_rate": 8.314668559568674e-06, "loss": 14.8478, "step": 171190 }, { "epoch": 0.34583483154692407, "grad_norm": 648.0640869140625, "learning_rate": 8.314407213534477e-06, "loss": 34.5751, "step": 171200 }, { "epoch": 0.3458550321796079, "grad_norm": 72.91156005859375, "learning_rate": 8.31414585134626e-06, "loss": 27.2991, "step": 171210 }, { "epoch": 0.34587523281229166, "grad_norm": 562.7615356445312, "learning_rate": 8.3138844730053e-06, "loss": 12.0175, "step": 171220 }, { "epoch": 0.3458954334449755, "grad_norm": 354.0174865722656, "learning_rate": 8.313623078512869e-06, "loss": 21.4302, "step": 171230 }, { "epoch": 0.3459156340776593, "grad_norm": 894.1097412109375, "learning_rate": 8.313361667870238e-06, "loss": 30.4599, "step": 171240 }, { "epoch": 0.3459358347103431, "grad_norm": 409.5519714355469, "learning_rate": 8.313100241078689e-06, "loss": 32.2646, "step": 171250 }, { "epoch": 0.34595603534302694, "grad_norm": 815.2952880859375, "learning_rate": 8.312838798139488e-06, "loss": 25.4976, "step": 171260 }, { "epoch": 0.34597623597571076, "grad_norm": 390.8360900878906, "learning_rate": 8.312577339053914e-06, "loss": 15.6623, "step": 171270 }, { "epoch": 0.3459964366083946, "grad_norm": 341.1806945800781, "learning_rate": 8.312315863823239e-06, "loss": 11.5922, "step": 171280 }, { "epoch": 0.3460166372410784, "grad_norm": 459.4844665527344, "learning_rate": 8.312054372448737e-06, "loss": 15.2012, "step": 171290 }, { "epoch": 0.3460368378737622, "grad_norm": 334.7923583984375, "learning_rate": 8.311792864931686e-06, "loss": 31.4539, "step": 171300 }, { "epoch": 0.34605703850644604, "grad_norm": 638.260986328125, "learning_rate": 8.311531341273355e-06, "loss": 14.77, "step": 171310 }, { "epoch": 0.34607723913912986, "grad_norm": 464.9892578125, "learning_rate": 8.311269801475026e-06, "loss": 24.8364, "step": 171320 }, { "epoch": 0.3460974397718137, "grad_norm": 618.2025756835938, "learning_rate": 8.311008245537967e-06, "loss": 18.2905, "step": 171330 }, { "epoch": 0.3461176404044975, "grad_norm": 301.9693908691406, "learning_rate": 8.310746673463456e-06, "loss": 15.1808, "step": 171340 }, { "epoch": 0.34613784103718126, "grad_norm": 899.296875, "learning_rate": 8.310485085252767e-06, "loss": 22.5531, "step": 171350 }, { "epoch": 0.3461580416698651, "grad_norm": 747.182861328125, "learning_rate": 8.310223480907176e-06, "loss": 21.0325, "step": 171360 }, { "epoch": 0.3461782423025489, "grad_norm": 68.04808807373047, "learning_rate": 8.309961860427957e-06, "loss": 17.6851, "step": 171370 }, { "epoch": 0.3461984429352327, "grad_norm": 361.1148986816406, "learning_rate": 8.309700223816385e-06, "loss": 20.9249, "step": 171380 }, { "epoch": 0.34621864356791654, "grad_norm": 475.031494140625, "learning_rate": 8.309438571073734e-06, "loss": 38.1675, "step": 171390 }, { "epoch": 0.34623884420060036, "grad_norm": 575.501220703125, "learning_rate": 8.309176902201283e-06, "loss": 22.632, "step": 171400 }, { "epoch": 0.3462590448332842, "grad_norm": 58.37808609008789, "learning_rate": 8.308915217200305e-06, "loss": 19.541, "step": 171410 }, { "epoch": 0.346279245465968, "grad_norm": 258.83099365234375, "learning_rate": 8.308653516072074e-06, "loss": 14.9517, "step": 171420 }, { "epoch": 0.3462994460986518, "grad_norm": 327.9186096191406, "learning_rate": 8.30839179881787e-06, "loss": 27.1265, "step": 171430 }, { "epoch": 0.34631964673133564, "grad_norm": 489.5204162597656, "learning_rate": 8.308130065438963e-06, "loss": 20.5175, "step": 171440 }, { "epoch": 0.34633984736401946, "grad_norm": 185.77662658691406, "learning_rate": 8.307868315936631e-06, "loss": 18.2377, "step": 171450 }, { "epoch": 0.3463600479967033, "grad_norm": 529.444580078125, "learning_rate": 8.307606550312152e-06, "loss": 14.2224, "step": 171460 }, { "epoch": 0.3463802486293871, "grad_norm": 798.6822509765625, "learning_rate": 8.307344768566798e-06, "loss": 32.2639, "step": 171470 }, { "epoch": 0.34640044926207086, "grad_norm": 1000.7474975585938, "learning_rate": 8.30708297070185e-06, "loss": 34.3624, "step": 171480 }, { "epoch": 0.3464206498947547, "grad_norm": 283.7931213378906, "learning_rate": 8.306821156718577e-06, "loss": 15.6855, "step": 171490 }, { "epoch": 0.3464408505274385, "grad_norm": 435.1275939941406, "learning_rate": 8.30655932661826e-06, "loss": 41.0409, "step": 171500 }, { "epoch": 0.3464610511601223, "grad_norm": 154.36695861816406, "learning_rate": 8.306297480402175e-06, "loss": 17.1858, "step": 171510 }, { "epoch": 0.34648125179280614, "grad_norm": 392.240478515625, "learning_rate": 8.306035618071595e-06, "loss": 23.5365, "step": 171520 }, { "epoch": 0.34650145242548996, "grad_norm": 331.3149108886719, "learning_rate": 8.305773739627801e-06, "loss": 9.4739, "step": 171530 }, { "epoch": 0.3465216530581738, "grad_norm": 263.245849609375, "learning_rate": 8.305511845072065e-06, "loss": 20.0296, "step": 171540 }, { "epoch": 0.3465418536908576, "grad_norm": 252.88711547851562, "learning_rate": 8.305249934405663e-06, "loss": 21.1912, "step": 171550 }, { "epoch": 0.3465620543235414, "grad_norm": 213.32379150390625, "learning_rate": 8.304988007629878e-06, "loss": 15.5472, "step": 171560 }, { "epoch": 0.34658225495622524, "grad_norm": 964.141357421875, "learning_rate": 8.30472606474598e-06, "loss": 41.3969, "step": 171570 }, { "epoch": 0.34660245558890906, "grad_norm": 837.5604248046875, "learning_rate": 8.304464105755248e-06, "loss": 33.7858, "step": 171580 }, { "epoch": 0.3466226562215929, "grad_norm": 223.40711975097656, "learning_rate": 8.304202130658959e-06, "loss": 23.8464, "step": 171590 }, { "epoch": 0.34664285685427665, "grad_norm": 756.6446533203125, "learning_rate": 8.303940139458389e-06, "loss": 17.5319, "step": 171600 }, { "epoch": 0.34666305748696047, "grad_norm": 1434.41455078125, "learning_rate": 8.303678132154817e-06, "loss": 44.3605, "step": 171610 }, { "epoch": 0.3466832581196443, "grad_norm": 33.394493103027344, "learning_rate": 8.303416108749516e-06, "loss": 13.2621, "step": 171620 }, { "epoch": 0.3467034587523281, "grad_norm": 435.3074645996094, "learning_rate": 8.303154069243769e-06, "loss": 27.7944, "step": 171630 }, { "epoch": 0.34672365938501193, "grad_norm": 315.0975036621094, "learning_rate": 8.302892013638846e-06, "loss": 16.8905, "step": 171640 }, { "epoch": 0.34674386001769575, "grad_norm": 1057.85693359375, "learning_rate": 8.302629941936032e-06, "loss": 18.137, "step": 171650 }, { "epoch": 0.34676406065037957, "grad_norm": 426.70343017578125, "learning_rate": 8.302367854136598e-06, "loss": 18.8805, "step": 171660 }, { "epoch": 0.3467842612830634, "grad_norm": 328.9032287597656, "learning_rate": 8.302105750241822e-06, "loss": 18.166, "step": 171670 }, { "epoch": 0.3468044619157472, "grad_norm": 883.0125122070312, "learning_rate": 8.301843630252986e-06, "loss": 36.7881, "step": 171680 }, { "epoch": 0.34682466254843103, "grad_norm": 266.7763671875, "learning_rate": 8.301581494171363e-06, "loss": 25.3635, "step": 171690 }, { "epoch": 0.34684486318111485, "grad_norm": 963.8483276367188, "learning_rate": 8.301319341998231e-06, "loss": 12.9876, "step": 171700 }, { "epoch": 0.34686506381379867, "grad_norm": 364.8996887207031, "learning_rate": 8.301057173734872e-06, "loss": 34.6909, "step": 171710 }, { "epoch": 0.3468852644464825, "grad_norm": 408.3586730957031, "learning_rate": 8.300794989382559e-06, "loss": 15.5905, "step": 171720 }, { "epoch": 0.34690546507916625, "grad_norm": 330.177978515625, "learning_rate": 8.300532788942571e-06, "loss": 31.1517, "step": 171730 }, { "epoch": 0.3469256657118501, "grad_norm": 621.7115478515625, "learning_rate": 8.300270572416187e-06, "loss": 23.2462, "step": 171740 }, { "epoch": 0.3469458663445339, "grad_norm": 13.08733081817627, "learning_rate": 8.300008339804686e-06, "loss": 10.6224, "step": 171750 }, { "epoch": 0.3469660669772177, "grad_norm": 24.804424285888672, "learning_rate": 8.299746091109343e-06, "loss": 20.0629, "step": 171760 }, { "epoch": 0.34698626760990153, "grad_norm": 22.87811851501465, "learning_rate": 8.299483826331436e-06, "loss": 9.8308, "step": 171770 }, { "epoch": 0.34700646824258535, "grad_norm": 201.6088409423828, "learning_rate": 8.299221545472248e-06, "loss": 24.3398, "step": 171780 }, { "epoch": 0.3470266688752692, "grad_norm": 737.10400390625, "learning_rate": 8.298959248533054e-06, "loss": 24.2523, "step": 171790 }, { "epoch": 0.347046869507953, "grad_norm": 690.1240844726562, "learning_rate": 8.298696935515132e-06, "loss": 36.7034, "step": 171800 }, { "epoch": 0.3470670701406368, "grad_norm": 233.17715454101562, "learning_rate": 8.29843460641976e-06, "loss": 38.8903, "step": 171810 }, { "epoch": 0.34708727077332063, "grad_norm": 299.7923278808594, "learning_rate": 8.29817226124822e-06, "loss": 15.2415, "step": 171820 }, { "epoch": 0.34710747140600445, "grad_norm": 190.00265502929688, "learning_rate": 8.297909900001787e-06, "loss": 17.8699, "step": 171830 }, { "epoch": 0.3471276720386883, "grad_norm": 581.3510131835938, "learning_rate": 8.297647522681741e-06, "loss": 29.8289, "step": 171840 }, { "epoch": 0.3471478726713721, "grad_norm": 384.6495361328125, "learning_rate": 8.297385129289361e-06, "loss": 8.751, "step": 171850 }, { "epoch": 0.34716807330405586, "grad_norm": 87.20552825927734, "learning_rate": 8.297122719825928e-06, "loss": 20.6711, "step": 171860 }, { "epoch": 0.3471882739367397, "grad_norm": 578.283203125, "learning_rate": 8.296860294292716e-06, "loss": 19.5475, "step": 171870 }, { "epoch": 0.3472084745694235, "grad_norm": 744.7264404296875, "learning_rate": 8.296597852691008e-06, "loss": 28.1937, "step": 171880 }, { "epoch": 0.3472286752021073, "grad_norm": 40.18510818481445, "learning_rate": 8.296335395022083e-06, "loss": 22.0612, "step": 171890 }, { "epoch": 0.34724887583479114, "grad_norm": 421.2962951660156, "learning_rate": 8.296072921287217e-06, "loss": 20.1413, "step": 171900 }, { "epoch": 0.34726907646747496, "grad_norm": 715.844482421875, "learning_rate": 8.295810431487692e-06, "loss": 51.607, "step": 171910 }, { "epoch": 0.3472892771001588, "grad_norm": 182.30955505371094, "learning_rate": 8.295547925624786e-06, "loss": 22.5666, "step": 171920 }, { "epoch": 0.3473094777328426, "grad_norm": 459.4350891113281, "learning_rate": 8.295285403699783e-06, "loss": 21.2349, "step": 171930 }, { "epoch": 0.3473296783655264, "grad_norm": 326.0625305175781, "learning_rate": 8.295022865713955e-06, "loss": 19.5352, "step": 171940 }, { "epoch": 0.34734987899821024, "grad_norm": 505.67962646484375, "learning_rate": 8.294760311668586e-06, "loss": 21.1203, "step": 171950 }, { "epoch": 0.34737007963089406, "grad_norm": 626.6988525390625, "learning_rate": 8.294497741564953e-06, "loss": 14.3073, "step": 171960 }, { "epoch": 0.3473902802635779, "grad_norm": 62.619449615478516, "learning_rate": 8.29423515540434e-06, "loss": 20.2754, "step": 171970 }, { "epoch": 0.3474104808962617, "grad_norm": 892.3823852539062, "learning_rate": 8.293972553188023e-06, "loss": 34.2372, "step": 171980 }, { "epoch": 0.34743068152894546, "grad_norm": 332.57415771484375, "learning_rate": 8.293709934917284e-06, "loss": 19.2847, "step": 171990 }, { "epoch": 0.3474508821616293, "grad_norm": 206.59849548339844, "learning_rate": 8.293447300593402e-06, "loss": 17.6362, "step": 172000 }, { "epoch": 0.3474710827943131, "grad_norm": 433.46435546875, "learning_rate": 8.293184650217657e-06, "loss": 15.8899, "step": 172010 }, { "epoch": 0.3474912834269969, "grad_norm": 529.408935546875, "learning_rate": 8.292921983791332e-06, "loss": 23.3935, "step": 172020 }, { "epoch": 0.34751148405968074, "grad_norm": 312.68878173828125, "learning_rate": 8.292659301315702e-06, "loss": 21.1025, "step": 172030 }, { "epoch": 0.34753168469236456, "grad_norm": 486.9510803222656, "learning_rate": 8.29239660279205e-06, "loss": 11.4331, "step": 172040 }, { "epoch": 0.3475518853250484, "grad_norm": 630.8102416992188, "learning_rate": 8.292133888221659e-06, "loss": 24.9631, "step": 172050 }, { "epoch": 0.3475720859577322, "grad_norm": 322.6904296875, "learning_rate": 8.291871157605803e-06, "loss": 22.8038, "step": 172060 }, { "epoch": 0.347592286590416, "grad_norm": 263.4180603027344, "learning_rate": 8.291608410945768e-06, "loss": 31.7861, "step": 172070 }, { "epoch": 0.34761248722309984, "grad_norm": 276.12310791015625, "learning_rate": 8.291345648242832e-06, "loss": 21.8643, "step": 172080 }, { "epoch": 0.34763268785578366, "grad_norm": 558.2141723632812, "learning_rate": 8.291082869498277e-06, "loss": 22.0106, "step": 172090 }, { "epoch": 0.3476528884884675, "grad_norm": 965.6648559570312, "learning_rate": 8.290820074713383e-06, "loss": 18.0912, "step": 172100 }, { "epoch": 0.3476730891211513, "grad_norm": 154.92520141601562, "learning_rate": 8.290557263889432e-06, "loss": 3.3864, "step": 172110 }, { "epoch": 0.34769328975383507, "grad_norm": 184.02687072753906, "learning_rate": 8.290294437027704e-06, "loss": 22.1731, "step": 172120 }, { "epoch": 0.3477134903865189, "grad_norm": 547.7509765625, "learning_rate": 8.29003159412948e-06, "loss": 18.3219, "step": 172130 }, { "epoch": 0.3477336910192027, "grad_norm": 92.7471923828125, "learning_rate": 8.28976873519604e-06, "loss": 10.7081, "step": 172140 }, { "epoch": 0.3477538916518865, "grad_norm": 347.52655029296875, "learning_rate": 8.289505860228666e-06, "loss": 43.5869, "step": 172150 }, { "epoch": 0.34777409228457035, "grad_norm": 875.1144409179688, "learning_rate": 8.28924296922864e-06, "loss": 29.2674, "step": 172160 }, { "epoch": 0.34779429291725417, "grad_norm": 629.087646484375, "learning_rate": 8.288980062197243e-06, "loss": 27.1192, "step": 172170 }, { "epoch": 0.347814493549938, "grad_norm": 170.34738159179688, "learning_rate": 8.288717139135755e-06, "loss": 7.0775, "step": 172180 }, { "epoch": 0.3478346941826218, "grad_norm": 381.4020690917969, "learning_rate": 8.28845420004546e-06, "loss": 21.7472, "step": 172190 }, { "epoch": 0.3478548948153056, "grad_norm": 423.1697082519531, "learning_rate": 8.288191244927637e-06, "loss": 13.2788, "step": 172200 }, { "epoch": 0.34787509544798945, "grad_norm": 616.6538696289062, "learning_rate": 8.28792827378357e-06, "loss": 12.0924, "step": 172210 }, { "epoch": 0.34789529608067327, "grad_norm": 971.9646606445312, "learning_rate": 8.287665286614538e-06, "loss": 23.8575, "step": 172220 }, { "epoch": 0.3479154967133571, "grad_norm": 748.2013549804688, "learning_rate": 8.287402283421825e-06, "loss": 20.1542, "step": 172230 }, { "epoch": 0.34793569734604085, "grad_norm": 222.02059936523438, "learning_rate": 8.287139264206712e-06, "loss": 34.7346, "step": 172240 }, { "epoch": 0.34795589797872467, "grad_norm": 258.45654296875, "learning_rate": 8.28687622897048e-06, "loss": 20.7514, "step": 172250 }, { "epoch": 0.3479760986114085, "grad_norm": 440.2414245605469, "learning_rate": 8.286613177714412e-06, "loss": 23.3005, "step": 172260 }, { "epoch": 0.3479962992440923, "grad_norm": 23.830753326416016, "learning_rate": 8.28635011043979e-06, "loss": 19.8647, "step": 172270 }, { "epoch": 0.34801649987677613, "grad_norm": 302.4520263671875, "learning_rate": 8.286087027147899e-06, "loss": 18.095, "step": 172280 }, { "epoch": 0.34803670050945995, "grad_norm": 561.2001342773438, "learning_rate": 8.285823927840015e-06, "loss": 16.7233, "step": 172290 }, { "epoch": 0.34805690114214377, "grad_norm": 197.0220184326172, "learning_rate": 8.285560812517423e-06, "loss": 36.6479, "step": 172300 }, { "epoch": 0.3480771017748276, "grad_norm": 525.0717163085938, "learning_rate": 8.285297681181408e-06, "loss": 20.3847, "step": 172310 }, { "epoch": 0.3480973024075114, "grad_norm": 345.0289001464844, "learning_rate": 8.285034533833251e-06, "loss": 26.9689, "step": 172320 }, { "epoch": 0.34811750304019523, "grad_norm": 576.8797607421875, "learning_rate": 8.284771370474233e-06, "loss": 25.3182, "step": 172330 }, { "epoch": 0.34813770367287905, "grad_norm": 8.572928428649902, "learning_rate": 8.284508191105638e-06, "loss": 17.7019, "step": 172340 }, { "epoch": 0.34815790430556287, "grad_norm": 217.74179077148438, "learning_rate": 8.284244995728749e-06, "loss": 23.8376, "step": 172350 }, { "epoch": 0.3481781049382467, "grad_norm": 599.9877319335938, "learning_rate": 8.283981784344847e-06, "loss": 25.6374, "step": 172360 }, { "epoch": 0.34819830557093046, "grad_norm": 356.48748779296875, "learning_rate": 8.283718556955216e-06, "loss": 19.1465, "step": 172370 }, { "epoch": 0.3482185062036143, "grad_norm": 312.2649841308594, "learning_rate": 8.283455313561141e-06, "loss": 38.9632, "step": 172380 }, { "epoch": 0.3482387068362981, "grad_norm": 724.2706909179688, "learning_rate": 8.283192054163902e-06, "loss": 20.6261, "step": 172390 }, { "epoch": 0.3482589074689819, "grad_norm": 331.345458984375, "learning_rate": 8.282928778764783e-06, "loss": 17.2537, "step": 172400 }, { "epoch": 0.34827910810166574, "grad_norm": 688.9057006835938, "learning_rate": 8.282665487365067e-06, "loss": 10.3169, "step": 172410 }, { "epoch": 0.34829930873434956, "grad_norm": 391.465087890625, "learning_rate": 8.282402179966039e-06, "loss": 28.3406, "step": 172420 }, { "epoch": 0.3483195093670334, "grad_norm": 126.01844024658203, "learning_rate": 8.282138856568978e-06, "loss": 25.5566, "step": 172430 }, { "epoch": 0.3483397099997172, "grad_norm": 267.91357421875, "learning_rate": 8.281875517175173e-06, "loss": 13.3885, "step": 172440 }, { "epoch": 0.348359910632401, "grad_norm": 1057.038818359375, "learning_rate": 8.281612161785903e-06, "loss": 21.8697, "step": 172450 }, { "epoch": 0.34838011126508484, "grad_norm": 983.1893310546875, "learning_rate": 8.281348790402455e-06, "loss": 33.7804, "step": 172460 }, { "epoch": 0.34840031189776866, "grad_norm": 671.6742553710938, "learning_rate": 8.28108540302611e-06, "loss": 28.8767, "step": 172470 }, { "epoch": 0.3484205125304525, "grad_norm": 2117.237548828125, "learning_rate": 8.280821999658153e-06, "loss": 47.7445, "step": 172480 }, { "epoch": 0.3484407131631363, "grad_norm": 10.951056480407715, "learning_rate": 8.280558580299868e-06, "loss": 23.4332, "step": 172490 }, { "epoch": 0.34846091379582006, "grad_norm": 597.7149047851562, "learning_rate": 8.280295144952537e-06, "loss": 29.9681, "step": 172500 }, { "epoch": 0.3484811144285039, "grad_norm": 523.89501953125, "learning_rate": 8.280031693617446e-06, "loss": 27.9063, "step": 172510 }, { "epoch": 0.3485013150611877, "grad_norm": 392.02764892578125, "learning_rate": 8.27976822629588e-06, "loss": 11.0087, "step": 172520 }, { "epoch": 0.3485215156938715, "grad_norm": 211.89456176757812, "learning_rate": 8.279504742989117e-06, "loss": 26.7032, "step": 172530 }, { "epoch": 0.34854171632655534, "grad_norm": 42.395484924316406, "learning_rate": 8.27924124369845e-06, "loss": 15.2733, "step": 172540 }, { "epoch": 0.34856191695923916, "grad_norm": 602.2659912109375, "learning_rate": 8.278977728425157e-06, "loss": 31.1341, "step": 172550 }, { "epoch": 0.348582117591923, "grad_norm": 629.5859375, "learning_rate": 8.278714197170526e-06, "loss": 24.7875, "step": 172560 }, { "epoch": 0.3486023182246068, "grad_norm": 374.5086669921875, "learning_rate": 8.278450649935838e-06, "loss": 33.6697, "step": 172570 }, { "epoch": 0.3486225188572906, "grad_norm": 175.51300048828125, "learning_rate": 8.278187086722378e-06, "loss": 17.7969, "step": 172580 }, { "epoch": 0.34864271948997444, "grad_norm": 426.8929748535156, "learning_rate": 8.277923507531434e-06, "loss": 19.0821, "step": 172590 }, { "epoch": 0.34866292012265826, "grad_norm": 427.6558837890625, "learning_rate": 8.277659912364288e-06, "loss": 18.8651, "step": 172600 }, { "epoch": 0.3486831207553421, "grad_norm": 512.8453369140625, "learning_rate": 8.277396301222223e-06, "loss": 25.9066, "step": 172610 }, { "epoch": 0.3487033213880259, "grad_norm": 471.2196044921875, "learning_rate": 8.277132674106528e-06, "loss": 14.5615, "step": 172620 }, { "epoch": 0.34872352202070966, "grad_norm": 377.5659484863281, "learning_rate": 8.276869031018486e-06, "loss": 27.5667, "step": 172630 }, { "epoch": 0.3487437226533935, "grad_norm": 327.4805908203125, "learning_rate": 8.27660537195938e-06, "loss": 25.7655, "step": 172640 }, { "epoch": 0.3487639232860773, "grad_norm": 332.9518127441406, "learning_rate": 8.276341696930499e-06, "loss": 14.511, "step": 172650 }, { "epoch": 0.3487841239187611, "grad_norm": 221.7066650390625, "learning_rate": 8.276078005933125e-06, "loss": 21.6163, "step": 172660 }, { "epoch": 0.34880432455144494, "grad_norm": 447.6441345214844, "learning_rate": 8.275814298968544e-06, "loss": 15.6934, "step": 172670 }, { "epoch": 0.34882452518412876, "grad_norm": 374.6302490234375, "learning_rate": 8.275550576038043e-06, "loss": 18.794, "step": 172680 }, { "epoch": 0.3488447258168126, "grad_norm": 1010.4691162109375, "learning_rate": 8.275286837142903e-06, "loss": 24.7621, "step": 172690 }, { "epoch": 0.3488649264494964, "grad_norm": 228.8806610107422, "learning_rate": 8.275023082284413e-06, "loss": 16.0672, "step": 172700 }, { "epoch": 0.3488851270821802, "grad_norm": 494.7354431152344, "learning_rate": 8.27475931146386e-06, "loss": 13.6087, "step": 172710 }, { "epoch": 0.34890532771486404, "grad_norm": 1394.575439453125, "learning_rate": 8.274495524682524e-06, "loss": 28.2938, "step": 172720 }, { "epoch": 0.34892552834754786, "grad_norm": 315.2424011230469, "learning_rate": 8.274231721941696e-06, "loss": 25.0352, "step": 172730 }, { "epoch": 0.3489457289802317, "grad_norm": 262.1417541503906, "learning_rate": 8.273967903242659e-06, "loss": 12.0646, "step": 172740 }, { "epoch": 0.3489659296129155, "grad_norm": 698.4266967773438, "learning_rate": 8.273704068586698e-06, "loss": 22.4679, "step": 172750 }, { "epoch": 0.34898613024559927, "grad_norm": 476.93621826171875, "learning_rate": 8.273440217975103e-06, "loss": 19.2444, "step": 172760 }, { "epoch": 0.3490063308782831, "grad_norm": 266.60009765625, "learning_rate": 8.273176351409157e-06, "loss": 34.1924, "step": 172770 }, { "epoch": 0.3490265315109669, "grad_norm": 159.0330047607422, "learning_rate": 8.272912468890146e-06, "loss": 17.7355, "step": 172780 }, { "epoch": 0.34904673214365073, "grad_norm": 391.28729248046875, "learning_rate": 8.272648570419357e-06, "loss": 27.3779, "step": 172790 }, { "epoch": 0.34906693277633455, "grad_norm": 66.09520721435547, "learning_rate": 8.272384655998075e-06, "loss": 19.4427, "step": 172800 }, { "epoch": 0.34908713340901837, "grad_norm": 606.9044799804688, "learning_rate": 8.272120725627588e-06, "loss": 31.7733, "step": 172810 }, { "epoch": 0.3491073340417022, "grad_norm": 778.39013671875, "learning_rate": 8.27185677930918e-06, "loss": 24.3503, "step": 172820 }, { "epoch": 0.349127534674386, "grad_norm": 674.9252319335938, "learning_rate": 8.27159281704414e-06, "loss": 15.9073, "step": 172830 }, { "epoch": 0.34914773530706983, "grad_norm": 793.4208984375, "learning_rate": 8.271328838833753e-06, "loss": 25.1093, "step": 172840 }, { "epoch": 0.34916793593975365, "grad_norm": 695.896484375, "learning_rate": 8.271064844679306e-06, "loss": 30.2209, "step": 172850 }, { "epoch": 0.34918813657243747, "grad_norm": 71.12458801269531, "learning_rate": 8.270800834582087e-06, "loss": 25.3217, "step": 172860 }, { "epoch": 0.3492083372051213, "grad_norm": 674.998779296875, "learning_rate": 8.270536808543379e-06, "loss": 25.0619, "step": 172870 }, { "epoch": 0.34922853783780505, "grad_norm": 566.4926147460938, "learning_rate": 8.270272766564473e-06, "loss": 30.4275, "step": 172880 }, { "epoch": 0.3492487384704889, "grad_norm": 1088.8902587890625, "learning_rate": 8.270008708646653e-06, "loss": 28.3126, "step": 172890 }, { "epoch": 0.3492689391031727, "grad_norm": 306.4773864746094, "learning_rate": 8.269744634791207e-06, "loss": 22.0916, "step": 172900 }, { "epoch": 0.3492891397358565, "grad_norm": 357.73980712890625, "learning_rate": 8.269480544999424e-06, "loss": 9.0816, "step": 172910 }, { "epoch": 0.34930934036854033, "grad_norm": 341.8287048339844, "learning_rate": 8.26921643927259e-06, "loss": 31.488, "step": 172920 }, { "epoch": 0.34932954100122415, "grad_norm": 378.9211730957031, "learning_rate": 8.268952317611989e-06, "loss": 19.5464, "step": 172930 }, { "epoch": 0.349349741633908, "grad_norm": 423.0436096191406, "learning_rate": 8.268688180018911e-06, "loss": 20.9778, "step": 172940 }, { "epoch": 0.3493699422665918, "grad_norm": 500.8439636230469, "learning_rate": 8.268424026494646e-06, "loss": 16.0925, "step": 172950 }, { "epoch": 0.3493901428992756, "grad_norm": 640.2675170898438, "learning_rate": 8.268159857040475e-06, "loss": 19.7851, "step": 172960 }, { "epoch": 0.34941034353195943, "grad_norm": 375.4637451171875, "learning_rate": 8.267895671657692e-06, "loss": 22.5761, "step": 172970 }, { "epoch": 0.34943054416464325, "grad_norm": 303.6288757324219, "learning_rate": 8.26763147034758e-06, "loss": 27.9879, "step": 172980 }, { "epoch": 0.3494507447973271, "grad_norm": 216.30111694335938, "learning_rate": 8.26736725311143e-06, "loss": 19.6532, "step": 172990 }, { "epoch": 0.3494709454300109, "grad_norm": 531.7112426757812, "learning_rate": 8.267103019950529e-06, "loss": 10.862, "step": 173000 }, { "epoch": 0.34949114606269466, "grad_norm": 86.75955963134766, "learning_rate": 8.266838770866162e-06, "loss": 14.3546, "step": 173010 }, { "epoch": 0.3495113466953785, "grad_norm": 170.6025848388672, "learning_rate": 8.26657450585962e-06, "loss": 16.5934, "step": 173020 }, { "epoch": 0.3495315473280623, "grad_norm": 1053.049560546875, "learning_rate": 8.266310224932191e-06, "loss": 31.3053, "step": 173030 }, { "epoch": 0.3495517479607461, "grad_norm": 134.47523498535156, "learning_rate": 8.26604592808516e-06, "loss": 87.7414, "step": 173040 }, { "epoch": 0.34957194859342994, "grad_norm": 693.0878295898438, "learning_rate": 8.26578161531982e-06, "loss": 29.6518, "step": 173050 }, { "epoch": 0.34959214922611376, "grad_norm": 654.5897216796875, "learning_rate": 8.265517286637453e-06, "loss": 23.5505, "step": 173060 }, { "epoch": 0.3496123498587976, "grad_norm": 668.3735961914062, "learning_rate": 8.265252942039352e-06, "loss": 21.3319, "step": 173070 }, { "epoch": 0.3496325504914814, "grad_norm": 212.28268432617188, "learning_rate": 8.264988581526806e-06, "loss": 14.6275, "step": 173080 }, { "epoch": 0.3496527511241652, "grad_norm": 192.173583984375, "learning_rate": 8.2647242051011e-06, "loss": 17.1246, "step": 173090 }, { "epoch": 0.34967295175684904, "grad_norm": 985.883544921875, "learning_rate": 8.264459812763525e-06, "loss": 23.9225, "step": 173100 }, { "epoch": 0.34969315238953286, "grad_norm": 1110.9190673828125, "learning_rate": 8.264195404515369e-06, "loss": 43.0745, "step": 173110 }, { "epoch": 0.3497133530222167, "grad_norm": 514.7064208984375, "learning_rate": 8.26393098035792e-06, "loss": 21.6134, "step": 173120 }, { "epoch": 0.3497335536549005, "grad_norm": 338.3360290527344, "learning_rate": 8.263666540292468e-06, "loss": 25.7399, "step": 173130 }, { "epoch": 0.34975375428758426, "grad_norm": 155.49681091308594, "learning_rate": 8.263402084320299e-06, "loss": 13.0151, "step": 173140 }, { "epoch": 0.3497739549202681, "grad_norm": 485.2061767578125, "learning_rate": 8.263137612442705e-06, "loss": 29.5074, "step": 173150 }, { "epoch": 0.3497941555529519, "grad_norm": 543.310302734375, "learning_rate": 8.262873124660976e-06, "loss": 28.8361, "step": 173160 }, { "epoch": 0.3498143561856357, "grad_norm": 250.26800537109375, "learning_rate": 8.262608620976396e-06, "loss": 25.3538, "step": 173170 }, { "epoch": 0.34983455681831954, "grad_norm": 0.0, "learning_rate": 8.262344101390261e-06, "loss": 30.8818, "step": 173180 }, { "epoch": 0.34985475745100336, "grad_norm": 148.61407470703125, "learning_rate": 8.262079565903853e-06, "loss": 35.5837, "step": 173190 }, { "epoch": 0.3498749580836872, "grad_norm": 1020.1525268554688, "learning_rate": 8.261815014518465e-06, "loss": 19.6972, "step": 173200 }, { "epoch": 0.349895158716371, "grad_norm": 517.3057250976562, "learning_rate": 8.261550447235389e-06, "loss": 16.0533, "step": 173210 }, { "epoch": 0.3499153593490548, "grad_norm": 372.4304504394531, "learning_rate": 8.26128586405591e-06, "loss": 14.5533, "step": 173220 }, { "epoch": 0.34993555998173864, "grad_norm": 0.0, "learning_rate": 8.26102126498132e-06, "loss": 22.9425, "step": 173230 }, { "epoch": 0.34995576061442246, "grad_norm": 264.6286926269531, "learning_rate": 8.260756650012906e-06, "loss": 22.0927, "step": 173240 }, { "epoch": 0.3499759612471063, "grad_norm": 1.9577926397323608, "learning_rate": 8.260492019151962e-06, "loss": 22.1662, "step": 173250 }, { "epoch": 0.3499961618797901, "grad_norm": 451.12725830078125, "learning_rate": 8.260227372399773e-06, "loss": 13.3866, "step": 173260 }, { "epoch": 0.35001636251247387, "grad_norm": 585.6008911132812, "learning_rate": 8.259962709757634e-06, "loss": 37.5836, "step": 173270 }, { "epoch": 0.3500365631451577, "grad_norm": 266.95361328125, "learning_rate": 8.259698031226831e-06, "loss": 19.4592, "step": 173280 }, { "epoch": 0.3500567637778415, "grad_norm": 401.6283264160156, "learning_rate": 8.259433336808653e-06, "loss": 28.0473, "step": 173290 }, { "epoch": 0.3500769644105253, "grad_norm": 1045.560546875, "learning_rate": 8.259168626504395e-06, "loss": 18.9615, "step": 173300 }, { "epoch": 0.35009716504320915, "grad_norm": 498.0918884277344, "learning_rate": 8.258903900315343e-06, "loss": 19.7967, "step": 173310 }, { "epoch": 0.35011736567589297, "grad_norm": 369.1413269042969, "learning_rate": 8.25863915824279e-06, "loss": 26.5213, "step": 173320 }, { "epoch": 0.3501375663085768, "grad_norm": 144.392822265625, "learning_rate": 8.258374400288022e-06, "loss": 34.1343, "step": 173330 }, { "epoch": 0.3501577669412606, "grad_norm": 487.2896728515625, "learning_rate": 8.258109626452335e-06, "loss": 20.6067, "step": 173340 }, { "epoch": 0.3501779675739444, "grad_norm": 62.003700256347656, "learning_rate": 8.257844836737017e-06, "loss": 15.0474, "step": 173350 }, { "epoch": 0.35019816820662825, "grad_norm": 216.18478393554688, "learning_rate": 8.257580031143357e-06, "loss": 16.0873, "step": 173360 }, { "epoch": 0.35021836883931207, "grad_norm": 295.86688232421875, "learning_rate": 8.257315209672648e-06, "loss": 20.4352, "step": 173370 }, { "epoch": 0.3502385694719959, "grad_norm": 590.26953125, "learning_rate": 8.257050372326179e-06, "loss": 29.161, "step": 173380 }, { "epoch": 0.35025877010467965, "grad_norm": 397.9071044921875, "learning_rate": 8.256785519105242e-06, "loss": 26.4877, "step": 173390 }, { "epoch": 0.35027897073736347, "grad_norm": 365.5968933105469, "learning_rate": 8.256520650011126e-06, "loss": 25.2415, "step": 173400 }, { "epoch": 0.3502991713700473, "grad_norm": 804.7587890625, "learning_rate": 8.256255765045124e-06, "loss": 33.8894, "step": 173410 }, { "epoch": 0.3503193720027311, "grad_norm": 534.3924560546875, "learning_rate": 8.255990864208529e-06, "loss": 36.9681, "step": 173420 }, { "epoch": 0.35033957263541493, "grad_norm": 283.88006591796875, "learning_rate": 8.255725947502627e-06, "loss": 29.9513, "step": 173430 }, { "epoch": 0.35035977326809875, "grad_norm": 313.4808044433594, "learning_rate": 8.255461014928713e-06, "loss": 30.8336, "step": 173440 }, { "epoch": 0.35037997390078257, "grad_norm": 634.5245361328125, "learning_rate": 8.255196066488074e-06, "loss": 44.8829, "step": 173450 }, { "epoch": 0.3504001745334664, "grad_norm": 455.39520263671875, "learning_rate": 8.254931102182007e-06, "loss": 35.0474, "step": 173460 }, { "epoch": 0.3504203751661502, "grad_norm": 136.81488037109375, "learning_rate": 8.254666122011799e-06, "loss": 14.8398, "step": 173470 }, { "epoch": 0.35044057579883403, "grad_norm": 58.212120056152344, "learning_rate": 8.254401125978744e-06, "loss": 16.4767, "step": 173480 }, { "epoch": 0.35046077643151785, "grad_norm": 218.75843811035156, "learning_rate": 8.254136114084133e-06, "loss": 10.1186, "step": 173490 }, { "epoch": 0.35048097706420167, "grad_norm": 123.15180206298828, "learning_rate": 8.253871086329255e-06, "loss": 22.3891, "step": 173500 }, { "epoch": 0.3505011776968855, "grad_norm": 397.4679260253906, "learning_rate": 8.253606042715405e-06, "loss": 39.2791, "step": 173510 }, { "epoch": 0.35052137832956926, "grad_norm": 549.019287109375, "learning_rate": 8.253340983243876e-06, "loss": 22.0367, "step": 173520 }, { "epoch": 0.3505415789622531, "grad_norm": 463.5042419433594, "learning_rate": 8.253075907915955e-06, "loss": 19.4606, "step": 173530 }, { "epoch": 0.3505617795949369, "grad_norm": 47.17810821533203, "learning_rate": 8.252810816732936e-06, "loss": 18.1637, "step": 173540 }, { "epoch": 0.3505819802276207, "grad_norm": 699.627685546875, "learning_rate": 8.252545709696115e-06, "loss": 33.326, "step": 173550 }, { "epoch": 0.35060218086030454, "grad_norm": 786.27001953125, "learning_rate": 8.252280586806778e-06, "loss": 25.532, "step": 173560 }, { "epoch": 0.35062238149298836, "grad_norm": 590.9194946289062, "learning_rate": 8.25201544806622e-06, "loss": 29.8604, "step": 173570 }, { "epoch": 0.3506425821256722, "grad_norm": 482.8412780761719, "learning_rate": 8.251750293475735e-06, "loss": 34.1533, "step": 173580 }, { "epoch": 0.350662782758356, "grad_norm": 500.3822326660156, "learning_rate": 8.25148512303661e-06, "loss": 14.332, "step": 173590 }, { "epoch": 0.3506829833910398, "grad_norm": 428.2358093261719, "learning_rate": 8.251219936750145e-06, "loss": 13.9377, "step": 173600 }, { "epoch": 0.35070318402372364, "grad_norm": 180.1342010498047, "learning_rate": 8.250954734617627e-06, "loss": 23.4336, "step": 173610 }, { "epoch": 0.35072338465640746, "grad_norm": 627.1963500976562, "learning_rate": 8.250689516640349e-06, "loss": 10.7365, "step": 173620 }, { "epoch": 0.3507435852890913, "grad_norm": 533.58837890625, "learning_rate": 8.250424282819604e-06, "loss": 17.2078, "step": 173630 }, { "epoch": 0.3507637859217751, "grad_norm": 643.8233642578125, "learning_rate": 8.250159033156687e-06, "loss": 27.9006, "step": 173640 }, { "epoch": 0.35078398655445886, "grad_norm": 22.857276916503906, "learning_rate": 8.249893767652888e-06, "loss": 15.8945, "step": 173650 }, { "epoch": 0.3508041871871427, "grad_norm": 132.8467254638672, "learning_rate": 8.2496284863095e-06, "loss": 21.7937, "step": 173660 }, { "epoch": 0.3508243878198265, "grad_norm": 135.77993774414062, "learning_rate": 8.249363189127818e-06, "loss": 23.7345, "step": 173670 }, { "epoch": 0.3508445884525103, "grad_norm": 657.5906372070312, "learning_rate": 8.249097876109136e-06, "loss": 13.2515, "step": 173680 }, { "epoch": 0.35086478908519414, "grad_norm": 1219.5201416015625, "learning_rate": 8.248832547254742e-06, "loss": 28.1558, "step": 173690 }, { "epoch": 0.35088498971787796, "grad_norm": 390.09503173828125, "learning_rate": 8.248567202565934e-06, "loss": 17.8971, "step": 173700 }, { "epoch": 0.3509051903505618, "grad_norm": 468.7591552734375, "learning_rate": 8.248301842044003e-06, "loss": 25.077, "step": 173710 }, { "epoch": 0.3509253909832456, "grad_norm": 320.3813781738281, "learning_rate": 8.24803646569024e-06, "loss": 25.8523, "step": 173720 }, { "epoch": 0.3509455916159294, "grad_norm": 498.8067321777344, "learning_rate": 8.247771073505946e-06, "loss": 29.7495, "step": 173730 }, { "epoch": 0.35096579224861324, "grad_norm": 1407.796142578125, "learning_rate": 8.247505665492406e-06, "loss": 25.3236, "step": 173740 }, { "epoch": 0.35098599288129706, "grad_norm": 417.5876159667969, "learning_rate": 8.247240241650918e-06, "loss": 22.4682, "step": 173750 }, { "epoch": 0.3510061935139809, "grad_norm": 289.0838623046875, "learning_rate": 8.246974801982776e-06, "loss": 20.5483, "step": 173760 }, { "epoch": 0.3510263941466647, "grad_norm": 155.3359832763672, "learning_rate": 8.246709346489272e-06, "loss": 26.6778, "step": 173770 }, { "epoch": 0.35104659477934846, "grad_norm": 440.09454345703125, "learning_rate": 8.2464438751717e-06, "loss": 28.5211, "step": 173780 }, { "epoch": 0.3510667954120323, "grad_norm": 311.7047119140625, "learning_rate": 8.246178388031355e-06, "loss": 18.4436, "step": 173790 }, { "epoch": 0.3510869960447161, "grad_norm": 30.6214656829834, "learning_rate": 8.24591288506953e-06, "loss": 10.6946, "step": 173800 }, { "epoch": 0.3511071966773999, "grad_norm": 614.3140258789062, "learning_rate": 8.24564736628752e-06, "loss": 14.8239, "step": 173810 }, { "epoch": 0.35112739731008374, "grad_norm": 182.3624267578125, "learning_rate": 8.245381831686618e-06, "loss": 16.5028, "step": 173820 }, { "epoch": 0.35114759794276756, "grad_norm": 676.9864501953125, "learning_rate": 8.245116281268119e-06, "loss": 20.3585, "step": 173830 }, { "epoch": 0.3511677985754514, "grad_norm": 1057.8885498046875, "learning_rate": 8.244850715033316e-06, "loss": 25.8648, "step": 173840 }, { "epoch": 0.3511879992081352, "grad_norm": 497.7960510253906, "learning_rate": 8.244585132983505e-06, "loss": 17.4738, "step": 173850 }, { "epoch": 0.351208199840819, "grad_norm": 179.4878387451172, "learning_rate": 8.244319535119978e-06, "loss": 20.1675, "step": 173860 }, { "epoch": 0.35122840047350284, "grad_norm": 388.04949951171875, "learning_rate": 8.244053921444034e-06, "loss": 21.7078, "step": 173870 }, { "epoch": 0.35124860110618666, "grad_norm": 362.1933898925781, "learning_rate": 8.24378829195696e-06, "loss": 13.1197, "step": 173880 }, { "epoch": 0.3512688017388705, "grad_norm": 365.29107666015625, "learning_rate": 8.243522646660058e-06, "loss": 27.2235, "step": 173890 }, { "epoch": 0.3512890023715543, "grad_norm": 743.2318115234375, "learning_rate": 8.243256985554622e-06, "loss": 22.8129, "step": 173900 }, { "epoch": 0.35130920300423807, "grad_norm": 221.1050567626953, "learning_rate": 8.24299130864194e-06, "loss": 44.6234, "step": 173910 }, { "epoch": 0.3513294036369219, "grad_norm": 380.4090576171875, "learning_rate": 8.242725615923316e-06, "loss": 19.9493, "step": 173920 }, { "epoch": 0.3513496042696057, "grad_norm": 393.19842529296875, "learning_rate": 8.24245990740004e-06, "loss": 20.9912, "step": 173930 }, { "epoch": 0.35136980490228953, "grad_norm": 153.64451599121094, "learning_rate": 8.242194183073406e-06, "loss": 30.5434, "step": 173940 }, { "epoch": 0.35139000553497335, "grad_norm": 397.6885986328125, "learning_rate": 8.24192844294471e-06, "loss": 31.8051, "step": 173950 }, { "epoch": 0.35141020616765717, "grad_norm": 1085.44189453125, "learning_rate": 8.241662687015251e-06, "loss": 18.7905, "step": 173960 }, { "epoch": 0.351430406800341, "grad_norm": 169.8010711669922, "learning_rate": 8.24139691528632e-06, "loss": 19.0973, "step": 173970 }, { "epoch": 0.3514506074330248, "grad_norm": 123.90522003173828, "learning_rate": 8.241131127759214e-06, "loss": 9.796, "step": 173980 }, { "epoch": 0.35147080806570863, "grad_norm": 408.1794128417969, "learning_rate": 8.240865324435225e-06, "loss": 18.8127, "step": 173990 }, { "epoch": 0.35149100869839245, "grad_norm": 47.10768508911133, "learning_rate": 8.240599505315656e-06, "loss": 21.2711, "step": 174000 }, { "epoch": 0.35151120933107627, "grad_norm": 275.2547302246094, "learning_rate": 8.240333670401795e-06, "loss": 26.2684, "step": 174010 }, { "epoch": 0.3515314099637601, "grad_norm": 513.5037841796875, "learning_rate": 8.240067819694941e-06, "loss": 23.2252, "step": 174020 }, { "epoch": 0.35155161059644385, "grad_norm": 34.5965576171875, "learning_rate": 8.23980195319639e-06, "loss": 21.4563, "step": 174030 }, { "epoch": 0.3515718112291277, "grad_norm": 365.20196533203125, "learning_rate": 8.239536070907437e-06, "loss": 22.2469, "step": 174040 }, { "epoch": 0.3515920118618115, "grad_norm": 288.6556701660156, "learning_rate": 8.239270172829379e-06, "loss": 13.4052, "step": 174050 }, { "epoch": 0.3516122124944953, "grad_norm": 651.83837890625, "learning_rate": 8.23900425896351e-06, "loss": 16.056, "step": 174060 }, { "epoch": 0.35163241312717913, "grad_norm": 306.2084045410156, "learning_rate": 8.238738329311126e-06, "loss": 17.5342, "step": 174070 }, { "epoch": 0.35165261375986295, "grad_norm": 162.78701782226562, "learning_rate": 8.238472383873528e-06, "loss": 16.8793, "step": 174080 }, { "epoch": 0.3516728143925468, "grad_norm": 1043.649658203125, "learning_rate": 8.238206422652007e-06, "loss": 17.1426, "step": 174090 }, { "epoch": 0.3516930150252306, "grad_norm": 507.8601989746094, "learning_rate": 8.237940445647858e-06, "loss": 29.4015, "step": 174100 }, { "epoch": 0.3517132156579144, "grad_norm": 371.531494140625, "learning_rate": 8.23767445286238e-06, "loss": 24.1237, "step": 174110 }, { "epoch": 0.35173341629059823, "grad_norm": 502.83709716796875, "learning_rate": 8.237408444296872e-06, "loss": 21.8385, "step": 174120 }, { "epoch": 0.35175361692328205, "grad_norm": 127.65411376953125, "learning_rate": 8.237142419952628e-06, "loss": 36.3535, "step": 174130 }, { "epoch": 0.3517738175559659, "grad_norm": 155.60055541992188, "learning_rate": 8.236876379830943e-06, "loss": 27.9313, "step": 174140 }, { "epoch": 0.3517940181886497, "grad_norm": 464.2092590332031, "learning_rate": 8.236610323933115e-06, "loss": 13.191, "step": 174150 }, { "epoch": 0.35181421882133346, "grad_norm": 1012.05810546875, "learning_rate": 8.236344252260442e-06, "loss": 19.8992, "step": 174160 }, { "epoch": 0.3518344194540173, "grad_norm": 28.210956573486328, "learning_rate": 8.236078164814218e-06, "loss": 32.3238, "step": 174170 }, { "epoch": 0.3518546200867011, "grad_norm": 137.7019805908203, "learning_rate": 8.235812061595742e-06, "loss": 25.4323, "step": 174180 }, { "epoch": 0.3518748207193849, "grad_norm": 491.35687255859375, "learning_rate": 8.235545942606311e-06, "loss": 20.8667, "step": 174190 }, { "epoch": 0.35189502135206874, "grad_norm": 532.808349609375, "learning_rate": 8.235279807847223e-06, "loss": 20.3499, "step": 174200 }, { "epoch": 0.35191522198475256, "grad_norm": 236.93276977539062, "learning_rate": 8.235013657319772e-06, "loss": 37.5176, "step": 174210 }, { "epoch": 0.3519354226174364, "grad_norm": 674.0003051757812, "learning_rate": 8.234747491025257e-06, "loss": 24.63, "step": 174220 }, { "epoch": 0.3519556232501202, "grad_norm": 282.430419921875, "learning_rate": 8.234481308964975e-06, "loss": 19.8445, "step": 174230 }, { "epoch": 0.351975823882804, "grad_norm": 854.5878295898438, "learning_rate": 8.234215111140222e-06, "loss": 35.0627, "step": 174240 }, { "epoch": 0.35199602451548784, "grad_norm": 476.0958557128906, "learning_rate": 8.2339488975523e-06, "loss": 14.2766, "step": 174250 }, { "epoch": 0.35201622514817166, "grad_norm": 394.2736511230469, "learning_rate": 8.2336826682025e-06, "loss": 19.2981, "step": 174260 }, { "epoch": 0.3520364257808555, "grad_norm": 125.087646484375, "learning_rate": 8.233416423092124e-06, "loss": 19.7714, "step": 174270 }, { "epoch": 0.3520566264135393, "grad_norm": 252.89723205566406, "learning_rate": 8.23315016222247e-06, "loss": 28.8608, "step": 174280 }, { "epoch": 0.35207682704622306, "grad_norm": 167.74928283691406, "learning_rate": 8.232883885594831e-06, "loss": 14.6707, "step": 174290 }, { "epoch": 0.3520970276789069, "grad_norm": 134.2716064453125, "learning_rate": 8.232617593210512e-06, "loss": 12.7259, "step": 174300 }, { "epoch": 0.3521172283115907, "grad_norm": 160.16358947753906, "learning_rate": 8.232351285070804e-06, "loss": 15.1264, "step": 174310 }, { "epoch": 0.3521374289442745, "grad_norm": 701.306640625, "learning_rate": 8.23208496117701e-06, "loss": 34.7873, "step": 174320 }, { "epoch": 0.35215762957695834, "grad_norm": 222.68003845214844, "learning_rate": 8.231818621530424e-06, "loss": 34.4222, "step": 174330 }, { "epoch": 0.35217783020964216, "grad_norm": 393.4009094238281, "learning_rate": 8.231552266132346e-06, "loss": 49.5541, "step": 174340 }, { "epoch": 0.352198030842326, "grad_norm": 224.24929809570312, "learning_rate": 8.231285894984076e-06, "loss": 27.1999, "step": 174350 }, { "epoch": 0.3522182314750098, "grad_norm": 215.3976593017578, "learning_rate": 8.231019508086908e-06, "loss": 18.1587, "step": 174360 }, { "epoch": 0.3522384321076936, "grad_norm": 86.14173889160156, "learning_rate": 8.230753105442145e-06, "loss": 25.2225, "step": 174370 }, { "epoch": 0.35225863274037744, "grad_norm": 467.911865234375, "learning_rate": 8.230486687051082e-06, "loss": 19.3863, "step": 174380 }, { "epoch": 0.35227883337306126, "grad_norm": 515.731201171875, "learning_rate": 8.23022025291502e-06, "loss": 20.7277, "step": 174390 }, { "epoch": 0.3522990340057451, "grad_norm": 491.38543701171875, "learning_rate": 8.229953803035256e-06, "loss": 20.9303, "step": 174400 }, { "epoch": 0.3523192346384289, "grad_norm": 251.4536590576172, "learning_rate": 8.229687337413087e-06, "loss": 38.819, "step": 174410 }, { "epoch": 0.35233943527111267, "grad_norm": 396.22802734375, "learning_rate": 8.229420856049814e-06, "loss": 24.1538, "step": 174420 }, { "epoch": 0.3523596359037965, "grad_norm": 379.91949462890625, "learning_rate": 8.229154358946739e-06, "loss": 29.9421, "step": 174430 }, { "epoch": 0.3523798365364803, "grad_norm": 520.71484375, "learning_rate": 8.228887846105154e-06, "loss": 17.0296, "step": 174440 }, { "epoch": 0.3524000371691641, "grad_norm": 289.1027526855469, "learning_rate": 8.228621317526362e-06, "loss": 20.0793, "step": 174450 }, { "epoch": 0.35242023780184795, "grad_norm": 453.0242919921875, "learning_rate": 8.22835477321166e-06, "loss": 20.1559, "step": 174460 }, { "epoch": 0.35244043843453177, "grad_norm": 866.8163452148438, "learning_rate": 8.22808821316235e-06, "loss": 27.4743, "step": 174470 }, { "epoch": 0.3524606390672156, "grad_norm": 399.63018798828125, "learning_rate": 8.22782163737973e-06, "loss": 15.5161, "step": 174480 }, { "epoch": 0.3524808396998994, "grad_norm": 542.755859375, "learning_rate": 8.227555045865097e-06, "loss": 27.222, "step": 174490 }, { "epoch": 0.3525010403325832, "grad_norm": 813.2581787109375, "learning_rate": 8.227288438619754e-06, "loss": 19.779, "step": 174500 }, { "epoch": 0.35252124096526705, "grad_norm": 313.12493896484375, "learning_rate": 8.227021815644998e-06, "loss": 19.1409, "step": 174510 }, { "epoch": 0.35254144159795087, "grad_norm": 606.32421875, "learning_rate": 8.226755176942127e-06, "loss": 18.7381, "step": 174520 }, { "epoch": 0.3525616422306347, "grad_norm": 721.5530395507812, "learning_rate": 8.226488522512445e-06, "loss": 18.6823, "step": 174530 }, { "epoch": 0.3525818428633185, "grad_norm": 443.0743713378906, "learning_rate": 8.22622185235725e-06, "loss": 16.3067, "step": 174540 }, { "epoch": 0.35260204349600227, "grad_norm": 309.57989501953125, "learning_rate": 8.22595516647784e-06, "loss": 16.5351, "step": 174550 }, { "epoch": 0.3526222441286861, "grad_norm": 266.3574523925781, "learning_rate": 8.225688464875514e-06, "loss": 10.3782, "step": 174560 }, { "epoch": 0.3526424447613699, "grad_norm": 455.272216796875, "learning_rate": 8.225421747551575e-06, "loss": 13.2519, "step": 174570 }, { "epoch": 0.35266264539405373, "grad_norm": 424.2519226074219, "learning_rate": 8.225155014507322e-06, "loss": 42.6528, "step": 174580 }, { "epoch": 0.35268284602673755, "grad_norm": 1382.4771728515625, "learning_rate": 8.224888265744055e-06, "loss": 20.4178, "step": 174590 }, { "epoch": 0.35270304665942137, "grad_norm": 114.04889678955078, "learning_rate": 8.224621501263073e-06, "loss": 12.4362, "step": 174600 }, { "epoch": 0.3527232472921052, "grad_norm": 410.6907958984375, "learning_rate": 8.224354721065676e-06, "loss": 29.2127, "step": 174610 }, { "epoch": 0.352743447924789, "grad_norm": 375.0404968261719, "learning_rate": 8.224087925153167e-06, "loss": 15.1587, "step": 174620 }, { "epoch": 0.35276364855747283, "grad_norm": 661.4111938476562, "learning_rate": 8.223821113526843e-06, "loss": 31.2368, "step": 174630 }, { "epoch": 0.35278384919015665, "grad_norm": 159.78857421875, "learning_rate": 8.223554286188007e-06, "loss": 17.7349, "step": 174640 }, { "epoch": 0.35280404982284047, "grad_norm": 897.872314453125, "learning_rate": 8.223287443137957e-06, "loss": 20.3204, "step": 174650 }, { "epoch": 0.3528242504555243, "grad_norm": 699.4429321289062, "learning_rate": 8.223020584377997e-06, "loss": 23.8734, "step": 174660 }, { "epoch": 0.35284445108820806, "grad_norm": 632.2472534179688, "learning_rate": 8.222753709909423e-06, "loss": 19.7888, "step": 174670 }, { "epoch": 0.3528646517208919, "grad_norm": 725.69189453125, "learning_rate": 8.22248681973354e-06, "loss": 40.8493, "step": 174680 }, { "epoch": 0.3528848523535757, "grad_norm": 453.00372314453125, "learning_rate": 8.222219913851647e-06, "loss": 14.9936, "step": 174690 }, { "epoch": 0.3529050529862595, "grad_norm": 1012.4638061523438, "learning_rate": 8.221952992265046e-06, "loss": 15.8687, "step": 174700 }, { "epoch": 0.35292525361894334, "grad_norm": 430.6357727050781, "learning_rate": 8.221686054975035e-06, "loss": 29.4404, "step": 174710 }, { "epoch": 0.35294545425162716, "grad_norm": 78.58820343017578, "learning_rate": 8.221419101982917e-06, "loss": 52.1942, "step": 174720 }, { "epoch": 0.352965654884311, "grad_norm": 643.046142578125, "learning_rate": 8.221152133289993e-06, "loss": 27.5479, "step": 174730 }, { "epoch": 0.3529858555169948, "grad_norm": 840.59521484375, "learning_rate": 8.220885148897566e-06, "loss": 17.2693, "step": 174740 }, { "epoch": 0.3530060561496786, "grad_norm": 498.2879638671875, "learning_rate": 8.220618148806934e-06, "loss": 19.2516, "step": 174750 }, { "epoch": 0.35302625678236244, "grad_norm": 94.1357650756836, "learning_rate": 8.2203511330194e-06, "loss": 13.9296, "step": 174760 }, { "epoch": 0.35304645741504626, "grad_norm": 252.35238647460938, "learning_rate": 8.220084101536264e-06, "loss": 26.2823, "step": 174770 }, { "epoch": 0.3530666580477301, "grad_norm": 392.3772888183594, "learning_rate": 8.21981705435883e-06, "loss": 15.2514, "step": 174780 }, { "epoch": 0.3530868586804139, "grad_norm": 431.6820068359375, "learning_rate": 8.219549991488398e-06, "loss": 21.3616, "step": 174790 }, { "epoch": 0.35310705931309766, "grad_norm": 511.37030029296875, "learning_rate": 8.21928291292627e-06, "loss": 23.538, "step": 174800 }, { "epoch": 0.3531272599457815, "grad_norm": 491.3835144042969, "learning_rate": 8.219015818673747e-06, "loss": 37.6915, "step": 174810 }, { "epoch": 0.3531474605784653, "grad_norm": 568.794921875, "learning_rate": 8.218748708732131e-06, "loss": 11.7215, "step": 174820 }, { "epoch": 0.3531676612111491, "grad_norm": 198.10693359375, "learning_rate": 8.218481583102725e-06, "loss": 22.8137, "step": 174830 }, { "epoch": 0.35318786184383294, "grad_norm": 369.39703369140625, "learning_rate": 8.218214441786829e-06, "loss": 21.6082, "step": 174840 }, { "epoch": 0.35320806247651676, "grad_norm": 108.52118682861328, "learning_rate": 8.217947284785748e-06, "loss": 28.672, "step": 174850 }, { "epoch": 0.3532282631092006, "grad_norm": 319.4595642089844, "learning_rate": 8.217680112100782e-06, "loss": 14.6211, "step": 174860 }, { "epoch": 0.3532484637418844, "grad_norm": 550.907470703125, "learning_rate": 8.217412923733232e-06, "loss": 26.826, "step": 174870 }, { "epoch": 0.3532686643745682, "grad_norm": 77.43846130371094, "learning_rate": 8.217145719684403e-06, "loss": 36.5522, "step": 174880 }, { "epoch": 0.35328886500725204, "grad_norm": 162.79888916015625, "learning_rate": 8.216878499955594e-06, "loss": 12.6612, "step": 174890 }, { "epoch": 0.35330906563993586, "grad_norm": 570.7067260742188, "learning_rate": 8.21661126454811e-06, "loss": 13.0498, "step": 174900 }, { "epoch": 0.3533292662726197, "grad_norm": 539.6907348632812, "learning_rate": 8.216344013463255e-06, "loss": 16.5399, "step": 174910 }, { "epoch": 0.3533494669053035, "grad_norm": 180.33628845214844, "learning_rate": 8.216076746702327e-06, "loss": 20.4061, "step": 174920 }, { "epoch": 0.35336966753798726, "grad_norm": 309.32513427734375, "learning_rate": 8.215809464266632e-06, "loss": 14.4209, "step": 174930 }, { "epoch": 0.3533898681706711, "grad_norm": 535.1676025390625, "learning_rate": 8.21554216615747e-06, "loss": 24.7506, "step": 174940 }, { "epoch": 0.3534100688033549, "grad_norm": 377.8276062011719, "learning_rate": 8.215274852376148e-06, "loss": 18.0881, "step": 174950 }, { "epoch": 0.3534302694360387, "grad_norm": 223.22373962402344, "learning_rate": 8.215007522923965e-06, "loss": 8.4666, "step": 174960 }, { "epoch": 0.35345047006872254, "grad_norm": 621.6992797851562, "learning_rate": 8.214740177802225e-06, "loss": 33.3335, "step": 174970 }, { "epoch": 0.35347067070140636, "grad_norm": 183.6868896484375, "learning_rate": 8.214472817012232e-06, "loss": 28.1381, "step": 174980 }, { "epoch": 0.3534908713340902, "grad_norm": 445.21795654296875, "learning_rate": 8.214205440555289e-06, "loss": 11.1762, "step": 174990 }, { "epoch": 0.353511071966774, "grad_norm": 191.78660583496094, "learning_rate": 8.213938048432697e-06, "loss": 13.0107, "step": 175000 }, { "epoch": 0.3535312725994578, "grad_norm": 495.6348876953125, "learning_rate": 8.213670640645762e-06, "loss": 33.909, "step": 175010 }, { "epoch": 0.35355147323214164, "grad_norm": 526.88916015625, "learning_rate": 8.213403217195785e-06, "loss": 21.3817, "step": 175020 }, { "epoch": 0.35357167386482546, "grad_norm": 487.1705322265625, "learning_rate": 8.21313577808407e-06, "loss": 27.8439, "step": 175030 }, { "epoch": 0.3535918744975093, "grad_norm": 373.7630310058594, "learning_rate": 8.212868323311923e-06, "loss": 9.8656, "step": 175040 }, { "epoch": 0.3536120751301931, "grad_norm": 310.5169372558594, "learning_rate": 8.212600852880644e-06, "loss": 33.0556, "step": 175050 }, { "epoch": 0.35363227576287687, "grad_norm": 214.21728515625, "learning_rate": 8.212333366791539e-06, "loss": 29.317, "step": 175060 }, { "epoch": 0.3536524763955607, "grad_norm": 1133.0125732421875, "learning_rate": 8.21206586504591e-06, "loss": 19.4452, "step": 175070 }, { "epoch": 0.3536726770282445, "grad_norm": 477.560302734375, "learning_rate": 8.211798347645062e-06, "loss": 15.9271, "step": 175080 }, { "epoch": 0.35369287766092833, "grad_norm": 448.5748596191406, "learning_rate": 8.211530814590298e-06, "loss": 23.897, "step": 175090 }, { "epoch": 0.35371307829361215, "grad_norm": 321.6637268066406, "learning_rate": 8.211263265882923e-06, "loss": 17.1155, "step": 175100 }, { "epoch": 0.35373327892629597, "grad_norm": 157.0853271484375, "learning_rate": 8.21099570152424e-06, "loss": 31.9269, "step": 175110 }, { "epoch": 0.3537534795589798, "grad_norm": 366.2945556640625, "learning_rate": 8.210728121515552e-06, "loss": 23.7279, "step": 175120 }, { "epoch": 0.3537736801916636, "grad_norm": 212.07386779785156, "learning_rate": 8.210460525858167e-06, "loss": 25.1346, "step": 175130 }, { "epoch": 0.35379388082434743, "grad_norm": 187.92138671875, "learning_rate": 8.210192914553385e-06, "loss": 20.5662, "step": 175140 }, { "epoch": 0.35381408145703125, "grad_norm": 335.2757568359375, "learning_rate": 8.209925287602513e-06, "loss": 19.1828, "step": 175150 }, { "epoch": 0.35383428208971507, "grad_norm": 333.5650634765625, "learning_rate": 8.209657645006854e-06, "loss": 11.0527, "step": 175160 }, { "epoch": 0.3538544827223989, "grad_norm": 375.4122619628906, "learning_rate": 8.209389986767713e-06, "loss": 14.096, "step": 175170 }, { "epoch": 0.3538746833550827, "grad_norm": 599.1802368164062, "learning_rate": 8.209122312886394e-06, "loss": 15.0332, "step": 175180 }, { "epoch": 0.3538948839877665, "grad_norm": 830.013427734375, "learning_rate": 8.208854623364202e-06, "loss": 24.5616, "step": 175190 }, { "epoch": 0.3539150846204503, "grad_norm": 2.863065719604492, "learning_rate": 8.208586918202444e-06, "loss": 21.1935, "step": 175200 }, { "epoch": 0.3539352852531341, "grad_norm": 860.7329711914062, "learning_rate": 8.208319197402418e-06, "loss": 23.9066, "step": 175210 }, { "epoch": 0.35395548588581793, "grad_norm": 295.81427001953125, "learning_rate": 8.208051460965438e-06, "loss": 21.0558, "step": 175220 }, { "epoch": 0.35397568651850175, "grad_norm": 357.52789306640625, "learning_rate": 8.207783708892802e-06, "loss": 12.3761, "step": 175230 }, { "epoch": 0.3539958871511856, "grad_norm": 414.4925231933594, "learning_rate": 8.207515941185818e-06, "loss": 27.3326, "step": 175240 }, { "epoch": 0.3540160877838694, "grad_norm": 600.637451171875, "learning_rate": 8.20724815784579e-06, "loss": 16.0749, "step": 175250 }, { "epoch": 0.3540362884165532, "grad_norm": 128.98521423339844, "learning_rate": 8.206980358874024e-06, "loss": 20.8088, "step": 175260 }, { "epoch": 0.35405648904923703, "grad_norm": 301.27325439453125, "learning_rate": 8.206712544271825e-06, "loss": 29.2206, "step": 175270 }, { "epoch": 0.35407668968192085, "grad_norm": 520.8452758789062, "learning_rate": 8.206444714040496e-06, "loss": 17.5237, "step": 175280 }, { "epoch": 0.3540968903146047, "grad_norm": 122.30708312988281, "learning_rate": 8.206176868181346e-06, "loss": 11.8924, "step": 175290 }, { "epoch": 0.3541170909472885, "grad_norm": 161.1448516845703, "learning_rate": 8.205909006695679e-06, "loss": 16.651, "step": 175300 }, { "epoch": 0.35413729157997226, "grad_norm": 214.54786682128906, "learning_rate": 8.205641129584798e-06, "loss": 11.0476, "step": 175310 }, { "epoch": 0.3541574922126561, "grad_norm": 580.5718383789062, "learning_rate": 8.205373236850013e-06, "loss": 20.9196, "step": 175320 }, { "epoch": 0.3541776928453399, "grad_norm": 736.0718383789062, "learning_rate": 8.205105328492627e-06, "loss": 23.4889, "step": 175330 }, { "epoch": 0.3541978934780237, "grad_norm": 637.3290405273438, "learning_rate": 8.204837404513946e-06, "loss": 21.8905, "step": 175340 }, { "epoch": 0.35421809411070754, "grad_norm": 1373.241943359375, "learning_rate": 8.204569464915278e-06, "loss": 22.8951, "step": 175350 }, { "epoch": 0.35423829474339136, "grad_norm": 755.3607177734375, "learning_rate": 8.204301509697925e-06, "loss": 16.576, "step": 175360 }, { "epoch": 0.3542584953760752, "grad_norm": 643.6798095703125, "learning_rate": 8.204033538863196e-06, "loss": 28.1972, "step": 175370 }, { "epoch": 0.354278696008759, "grad_norm": 452.77093505859375, "learning_rate": 8.203765552412396e-06, "loss": 25.9113, "step": 175380 }, { "epoch": 0.3542988966414428, "grad_norm": 437.998779296875, "learning_rate": 8.203497550346832e-06, "loss": 50.7171, "step": 175390 }, { "epoch": 0.35431909727412664, "grad_norm": 688.136474609375, "learning_rate": 8.203229532667808e-06, "loss": 23.6534, "step": 175400 }, { "epoch": 0.35433929790681046, "grad_norm": 570.303955078125, "learning_rate": 8.202961499376633e-06, "loss": 28.7329, "step": 175410 }, { "epoch": 0.3543594985394943, "grad_norm": 364.986572265625, "learning_rate": 8.202693450474611e-06, "loss": 14.5151, "step": 175420 }, { "epoch": 0.3543796991721781, "grad_norm": 765.8523559570312, "learning_rate": 8.20242538596305e-06, "loss": 23.3007, "step": 175430 }, { "epoch": 0.35439989980486186, "grad_norm": 165.0301513671875, "learning_rate": 8.202157305843256e-06, "loss": 18.9969, "step": 175440 }, { "epoch": 0.3544201004375457, "grad_norm": 945.1859130859375, "learning_rate": 8.201889210116536e-06, "loss": 20.2072, "step": 175450 }, { "epoch": 0.3544403010702295, "grad_norm": 311.2787780761719, "learning_rate": 8.201621098784198e-06, "loss": 27.8193, "step": 175460 }, { "epoch": 0.3544605017029133, "grad_norm": 199.78689575195312, "learning_rate": 8.201352971847544e-06, "loss": 19.0399, "step": 175470 }, { "epoch": 0.35448070233559714, "grad_norm": 453.7472229003906, "learning_rate": 8.201084829307886e-06, "loss": 15.9557, "step": 175480 }, { "epoch": 0.35450090296828096, "grad_norm": 32.777862548828125, "learning_rate": 8.200816671166529e-06, "loss": 19.7851, "step": 175490 }, { "epoch": 0.3545211036009648, "grad_norm": 177.6970672607422, "learning_rate": 8.200548497424779e-06, "loss": 36.1829, "step": 175500 }, { "epoch": 0.3545413042336486, "grad_norm": 297.3965148925781, "learning_rate": 8.200280308083944e-06, "loss": 17.4511, "step": 175510 }, { "epoch": 0.3545615048663324, "grad_norm": 346.8007507324219, "learning_rate": 8.200012103145329e-06, "loss": 13.4345, "step": 175520 }, { "epoch": 0.35458170549901624, "grad_norm": 377.2947692871094, "learning_rate": 8.199743882610245e-06, "loss": 19.6327, "step": 175530 }, { "epoch": 0.35460190613170006, "grad_norm": 135.6499481201172, "learning_rate": 8.199475646479997e-06, "loss": 17.9815, "step": 175540 }, { "epoch": 0.3546221067643839, "grad_norm": 373.2859191894531, "learning_rate": 8.199207394755892e-06, "loss": 31.1193, "step": 175550 }, { "epoch": 0.3546423073970677, "grad_norm": 282.8166198730469, "learning_rate": 8.19893912743924e-06, "loss": 14.4851, "step": 175560 }, { "epoch": 0.35466250802975147, "grad_norm": 254.1454315185547, "learning_rate": 8.198670844531345e-06, "loss": 11.7371, "step": 175570 }, { "epoch": 0.3546827086624353, "grad_norm": 293.01776123046875, "learning_rate": 8.198402546033518e-06, "loss": 27.6861, "step": 175580 }, { "epoch": 0.3547029092951191, "grad_norm": 394.53094482421875, "learning_rate": 8.198134231947064e-06, "loss": 14.5366, "step": 175590 }, { "epoch": 0.3547231099278029, "grad_norm": 313.37158203125, "learning_rate": 8.197865902273291e-06, "loss": 32.7562, "step": 175600 }, { "epoch": 0.35474331056048675, "grad_norm": 265.4231872558594, "learning_rate": 8.197597557013507e-06, "loss": 23.1748, "step": 175610 }, { "epoch": 0.35476351119317057, "grad_norm": 204.347412109375, "learning_rate": 8.197329196169022e-06, "loss": 28.9186, "step": 175620 }, { "epoch": 0.3547837118258544, "grad_norm": 164.70802307128906, "learning_rate": 8.197060819741141e-06, "loss": 20.8136, "step": 175630 }, { "epoch": 0.3548039124585382, "grad_norm": 434.8452453613281, "learning_rate": 8.196792427731175e-06, "loss": 21.0687, "step": 175640 }, { "epoch": 0.354824113091222, "grad_norm": 770.203369140625, "learning_rate": 8.196524020140428e-06, "loss": 14.9363, "step": 175650 }, { "epoch": 0.35484431372390585, "grad_norm": 111.00702667236328, "learning_rate": 8.196255596970214e-06, "loss": 26.4642, "step": 175660 }, { "epoch": 0.35486451435658967, "grad_norm": 210.8970947265625, "learning_rate": 8.195987158221835e-06, "loss": 21.014, "step": 175670 }, { "epoch": 0.3548847149892735, "grad_norm": 503.451171875, "learning_rate": 8.195718703896603e-06, "loss": 28.6416, "step": 175680 }, { "epoch": 0.3549049156219573, "grad_norm": 220.35812377929688, "learning_rate": 8.195450233995826e-06, "loss": 15.3415, "step": 175690 }, { "epoch": 0.35492511625464107, "grad_norm": 499.7003173828125, "learning_rate": 8.19518174852081e-06, "loss": 26.7256, "step": 175700 }, { "epoch": 0.3549453168873249, "grad_norm": 136.46705627441406, "learning_rate": 8.194913247472868e-06, "loss": 12.9298, "step": 175710 }, { "epoch": 0.3549655175200087, "grad_norm": 883.2164916992188, "learning_rate": 8.194644730853307e-06, "loss": 21.4403, "step": 175720 }, { "epoch": 0.35498571815269253, "grad_norm": 638.2017822265625, "learning_rate": 8.194376198663434e-06, "loss": 30.9745, "step": 175730 }, { "epoch": 0.35500591878537635, "grad_norm": 534.1513671875, "learning_rate": 8.194107650904556e-06, "loss": 20.47, "step": 175740 }, { "epoch": 0.35502611941806017, "grad_norm": 417.97369384765625, "learning_rate": 8.19383908757799e-06, "loss": 16.4925, "step": 175750 }, { "epoch": 0.355046320050744, "grad_norm": 704.442138671875, "learning_rate": 8.193570508685035e-06, "loss": 21.7582, "step": 175760 }, { "epoch": 0.3550665206834278, "grad_norm": 309.06768798828125, "learning_rate": 8.193301914227008e-06, "loss": 13.6277, "step": 175770 }, { "epoch": 0.35508672131611163, "grad_norm": 169.63116455078125, "learning_rate": 8.193033304205213e-06, "loss": 10.3619, "step": 175780 }, { "epoch": 0.35510692194879545, "grad_norm": 461.3911437988281, "learning_rate": 8.192764678620961e-06, "loss": 27.736, "step": 175790 }, { "epoch": 0.35512712258147927, "grad_norm": 453.21435546875, "learning_rate": 8.192496037475562e-06, "loss": 25.6383, "step": 175800 }, { "epoch": 0.3551473232141631, "grad_norm": 696.3877563476562, "learning_rate": 8.192227380770326e-06, "loss": 32.5503, "step": 175810 }, { "epoch": 0.3551675238468469, "grad_norm": 359.2447204589844, "learning_rate": 8.191958708506557e-06, "loss": 24.0632, "step": 175820 }, { "epoch": 0.3551877244795307, "grad_norm": 448.9452819824219, "learning_rate": 8.19169002068557e-06, "loss": 16.0923, "step": 175830 }, { "epoch": 0.3552079251122145, "grad_norm": 302.58843994140625, "learning_rate": 8.191421317308674e-06, "loss": 7.7536, "step": 175840 }, { "epoch": 0.3552281257448983, "grad_norm": 317.3415222167969, "learning_rate": 8.191152598377179e-06, "loss": 26.0583, "step": 175850 }, { "epoch": 0.35524832637758214, "grad_norm": 113.57054138183594, "learning_rate": 8.19088386389239e-06, "loss": 22.8452, "step": 175860 }, { "epoch": 0.35526852701026596, "grad_norm": 515.0502319335938, "learning_rate": 8.19061511385562e-06, "loss": 17.7838, "step": 175870 }, { "epoch": 0.3552887276429498, "grad_norm": 625.891357421875, "learning_rate": 8.190346348268182e-06, "loss": 23.5269, "step": 175880 }, { "epoch": 0.3553089282756336, "grad_norm": 329.49493408203125, "learning_rate": 8.190077567131381e-06, "loss": 34.8531, "step": 175890 }, { "epoch": 0.3553291289083174, "grad_norm": 644.8638305664062, "learning_rate": 8.189808770446528e-06, "loss": 33.901, "step": 175900 }, { "epoch": 0.35534932954100124, "grad_norm": 254.66087341308594, "learning_rate": 8.189539958214934e-06, "loss": 38.3538, "step": 175910 }, { "epoch": 0.35536953017368506, "grad_norm": 767.3721923828125, "learning_rate": 8.18927113043791e-06, "loss": 20.6208, "step": 175920 }, { "epoch": 0.3553897308063689, "grad_norm": 124.93978881835938, "learning_rate": 8.189002287116765e-06, "loss": 23.6206, "step": 175930 }, { "epoch": 0.3554099314390527, "grad_norm": 368.7055969238281, "learning_rate": 8.188733428252811e-06, "loss": 18.4986, "step": 175940 }, { "epoch": 0.35543013207173646, "grad_norm": 328.1405029296875, "learning_rate": 8.188464553847356e-06, "loss": 16.6194, "step": 175950 }, { "epoch": 0.3554503327044203, "grad_norm": 617.9406127929688, "learning_rate": 8.18819566390171e-06, "loss": 17.6561, "step": 175960 }, { "epoch": 0.3554705333371041, "grad_norm": 984.331787109375, "learning_rate": 8.187926758417188e-06, "loss": 23.4561, "step": 175970 }, { "epoch": 0.3554907339697879, "grad_norm": 368.822021484375, "learning_rate": 8.187657837395095e-06, "loss": 14.7196, "step": 175980 }, { "epoch": 0.35551093460247174, "grad_norm": 350.0384216308594, "learning_rate": 8.187388900836745e-06, "loss": 26.0618, "step": 175990 }, { "epoch": 0.35553113523515556, "grad_norm": 405.1641540527344, "learning_rate": 8.18711994874345e-06, "loss": 14.5582, "step": 176000 }, { "epoch": 0.3555513358678394, "grad_norm": 178.0616455078125, "learning_rate": 8.186850981116516e-06, "loss": 14.8914, "step": 176010 }, { "epoch": 0.3555715365005232, "grad_norm": 223.12640380859375, "learning_rate": 8.18658199795726e-06, "loss": 14.7733, "step": 176020 }, { "epoch": 0.355591737133207, "grad_norm": 642.9396362304688, "learning_rate": 8.186312999266987e-06, "loss": 19.4961, "step": 176030 }, { "epoch": 0.35561193776589084, "grad_norm": 97.81071472167969, "learning_rate": 8.186043985047012e-06, "loss": 15.9184, "step": 176040 }, { "epoch": 0.35563213839857466, "grad_norm": 331.9412841796875, "learning_rate": 8.185774955298645e-06, "loss": 21.9188, "step": 176050 }, { "epoch": 0.3556523390312585, "grad_norm": 1437.18603515625, "learning_rate": 8.185505910023196e-06, "loss": 28.9128, "step": 176060 }, { "epoch": 0.3556725396639423, "grad_norm": 440.3446350097656, "learning_rate": 8.18523684922198e-06, "loss": 34.4394, "step": 176070 }, { "epoch": 0.35569274029662606, "grad_norm": 1181.7449951171875, "learning_rate": 8.184967772896304e-06, "loss": 21.1126, "step": 176080 }, { "epoch": 0.3557129409293099, "grad_norm": 466.76519775390625, "learning_rate": 8.184698681047482e-06, "loss": 14.2977, "step": 176090 }, { "epoch": 0.3557331415619937, "grad_norm": 668.67431640625, "learning_rate": 8.184429573676825e-06, "loss": 23.9406, "step": 176100 }, { "epoch": 0.3557533421946775, "grad_norm": 235.19480895996094, "learning_rate": 8.184160450785645e-06, "loss": 29.0543, "step": 176110 }, { "epoch": 0.35577354282736134, "grad_norm": 366.2788391113281, "learning_rate": 8.183891312375251e-06, "loss": 20.1827, "step": 176120 }, { "epoch": 0.35579374346004516, "grad_norm": 100.78903198242188, "learning_rate": 8.18362215844696e-06, "loss": 22.2481, "step": 176130 }, { "epoch": 0.355813944092729, "grad_norm": 290.5372314453125, "learning_rate": 8.183352989002079e-06, "loss": 17.5751, "step": 176140 }, { "epoch": 0.3558341447254128, "grad_norm": 265.5997619628906, "learning_rate": 8.183083804041922e-06, "loss": 19.2007, "step": 176150 }, { "epoch": 0.3558543453580966, "grad_norm": 817.3016357421875, "learning_rate": 8.1828146035678e-06, "loss": 35.5227, "step": 176160 }, { "epoch": 0.35587454599078044, "grad_norm": 647.1814575195312, "learning_rate": 8.182545387581026e-06, "loss": 20.4804, "step": 176170 }, { "epoch": 0.35589474662346426, "grad_norm": 240.89344787597656, "learning_rate": 8.182276156082911e-06, "loss": 26.3593, "step": 176180 }, { "epoch": 0.3559149472561481, "grad_norm": 830.0279541015625, "learning_rate": 8.182006909074769e-06, "loss": 38.3872, "step": 176190 }, { "epoch": 0.3559351478888319, "grad_norm": 69.38955688476562, "learning_rate": 8.181737646557912e-06, "loss": 14.6388, "step": 176200 }, { "epoch": 0.35595534852151567, "grad_norm": 361.1736145019531, "learning_rate": 8.181468368533651e-06, "loss": 28.375, "step": 176210 }, { "epoch": 0.3559755491541995, "grad_norm": 322.6838684082031, "learning_rate": 8.181199075003298e-06, "loss": 20.5138, "step": 176220 }, { "epoch": 0.3559957497868833, "grad_norm": 219.79002380371094, "learning_rate": 8.180929765968168e-06, "loss": 25.9195, "step": 176230 }, { "epoch": 0.35601595041956713, "grad_norm": 498.863525390625, "learning_rate": 8.18066044142957e-06, "loss": 18.7417, "step": 176240 }, { "epoch": 0.35603615105225095, "grad_norm": 692.880615234375, "learning_rate": 8.18039110138882e-06, "loss": 47.3885, "step": 176250 }, { "epoch": 0.35605635168493477, "grad_norm": 502.8414001464844, "learning_rate": 8.18012174584723e-06, "loss": 20.0152, "step": 176260 }, { "epoch": 0.3560765523176186, "grad_norm": 347.7989501953125, "learning_rate": 8.179852374806112e-06, "loss": 19.1384, "step": 176270 }, { "epoch": 0.3560967529503024, "grad_norm": 599.8389892578125, "learning_rate": 8.179582988266778e-06, "loss": 15.0186, "step": 176280 }, { "epoch": 0.35611695358298623, "grad_norm": 831.802001953125, "learning_rate": 8.179313586230544e-06, "loss": 27.5229, "step": 176290 }, { "epoch": 0.35613715421567005, "grad_norm": 877.4141845703125, "learning_rate": 8.179044168698722e-06, "loss": 41.1221, "step": 176300 }, { "epoch": 0.35615735484835387, "grad_norm": 1.0121227502822876, "learning_rate": 8.178774735672622e-06, "loss": 29.6882, "step": 176310 }, { "epoch": 0.3561775554810377, "grad_norm": 466.5857849121094, "learning_rate": 8.17850528715356e-06, "loss": 29.7614, "step": 176320 }, { "epoch": 0.3561977561137215, "grad_norm": 1201.7891845703125, "learning_rate": 8.178235823142849e-06, "loss": 20.21, "step": 176330 }, { "epoch": 0.3562179567464053, "grad_norm": 674.99609375, "learning_rate": 8.177966343641803e-06, "loss": 22.0448, "step": 176340 }, { "epoch": 0.3562381573790891, "grad_norm": 144.88600158691406, "learning_rate": 8.177696848651733e-06, "loss": 14.6154, "step": 176350 }, { "epoch": 0.3562583580117729, "grad_norm": 424.9156799316406, "learning_rate": 8.177427338173955e-06, "loss": 17.7365, "step": 176360 }, { "epoch": 0.35627855864445673, "grad_norm": 140.97987365722656, "learning_rate": 8.17715781220978e-06, "loss": 10.5915, "step": 176370 }, { "epoch": 0.35629875927714055, "grad_norm": 670.3990478515625, "learning_rate": 8.176888270760524e-06, "loss": 37.206, "step": 176380 }, { "epoch": 0.3563189599098244, "grad_norm": 335.6086730957031, "learning_rate": 8.1766187138275e-06, "loss": 16.479, "step": 176390 }, { "epoch": 0.3563391605425082, "grad_norm": 543.1423950195312, "learning_rate": 8.176349141412022e-06, "loss": 27.8851, "step": 176400 }, { "epoch": 0.356359361175192, "grad_norm": 407.7586975097656, "learning_rate": 8.176079553515403e-06, "loss": 21.3756, "step": 176410 }, { "epoch": 0.35637956180787583, "grad_norm": 487.2771911621094, "learning_rate": 8.175809950138958e-06, "loss": 19.7585, "step": 176420 }, { "epoch": 0.35639976244055965, "grad_norm": 1027.403076171875, "learning_rate": 8.175540331284e-06, "loss": 41.2479, "step": 176430 }, { "epoch": 0.3564199630732435, "grad_norm": 2425.6884765625, "learning_rate": 8.175270696951846e-06, "loss": 27.228, "step": 176440 }, { "epoch": 0.3564401637059273, "grad_norm": 364.4439392089844, "learning_rate": 8.175001047143804e-06, "loss": 14.7915, "step": 176450 }, { "epoch": 0.35646036433861106, "grad_norm": 255.93695068359375, "learning_rate": 8.174731381861194e-06, "loss": 25.3969, "step": 176460 }, { "epoch": 0.3564805649712949, "grad_norm": 1018.808349609375, "learning_rate": 8.174461701105328e-06, "loss": 60.0915, "step": 176470 }, { "epoch": 0.3565007656039787, "grad_norm": 239.02467346191406, "learning_rate": 8.17419200487752e-06, "loss": 18.13, "step": 176480 }, { "epoch": 0.3565209662366625, "grad_norm": 135.59262084960938, "learning_rate": 8.173922293179086e-06, "loss": 15.985, "step": 176490 }, { "epoch": 0.35654116686934634, "grad_norm": 263.75164794921875, "learning_rate": 8.173652566011339e-06, "loss": 29.7916, "step": 176500 }, { "epoch": 0.35656136750203016, "grad_norm": 424.6169738769531, "learning_rate": 8.173382823375594e-06, "loss": 22.1316, "step": 176510 }, { "epoch": 0.356581568134714, "grad_norm": 487.9251708984375, "learning_rate": 8.173113065273167e-06, "loss": 12.1967, "step": 176520 }, { "epoch": 0.3566017687673978, "grad_norm": 316.0293884277344, "learning_rate": 8.17284329170537e-06, "loss": 19.9682, "step": 176530 }, { "epoch": 0.3566219694000816, "grad_norm": 613.0550537109375, "learning_rate": 8.172573502673522e-06, "loss": 12.1763, "step": 176540 }, { "epoch": 0.35664217003276544, "grad_norm": 183.81101989746094, "learning_rate": 8.172303698178934e-06, "loss": 13.2585, "step": 176550 }, { "epoch": 0.35666237066544926, "grad_norm": 183.7642364501953, "learning_rate": 8.172033878222924e-06, "loss": 21.5417, "step": 176560 }, { "epoch": 0.3566825712981331, "grad_norm": 508.7245178222656, "learning_rate": 8.171764042806804e-06, "loss": 20.4488, "step": 176570 }, { "epoch": 0.3567027719308169, "grad_norm": 192.7313232421875, "learning_rate": 8.171494191931892e-06, "loss": 22.8201, "step": 176580 }, { "epoch": 0.35672297256350066, "grad_norm": 275.0957336425781, "learning_rate": 8.171224325599502e-06, "loss": 20.6334, "step": 176590 }, { "epoch": 0.3567431731961845, "grad_norm": 459.2030029296875, "learning_rate": 8.170954443810947e-06, "loss": 19.7246, "step": 176600 }, { "epoch": 0.3567633738288683, "grad_norm": 724.6907348632812, "learning_rate": 8.170684546567546e-06, "loss": 48.7054, "step": 176610 }, { "epoch": 0.3567835744615521, "grad_norm": 320.8407287597656, "learning_rate": 8.170414633870617e-06, "loss": 36.5331, "step": 176620 }, { "epoch": 0.35680377509423594, "grad_norm": 379.14349365234375, "learning_rate": 8.170144705721465e-06, "loss": 34.0888, "step": 176630 }, { "epoch": 0.35682397572691976, "grad_norm": 463.0989074707031, "learning_rate": 8.169874762121416e-06, "loss": 16.4816, "step": 176640 }, { "epoch": 0.3568441763596036, "grad_norm": 297.63055419921875, "learning_rate": 8.169604803071783e-06, "loss": 15.3545, "step": 176650 }, { "epoch": 0.3568643769922874, "grad_norm": 551.3389282226562, "learning_rate": 8.169334828573878e-06, "loss": 17.3687, "step": 176660 }, { "epoch": 0.3568845776249712, "grad_norm": 367.9317932128906, "learning_rate": 8.16906483862902e-06, "loss": 18.374, "step": 176670 }, { "epoch": 0.35690477825765504, "grad_norm": 951.7230834960938, "learning_rate": 8.168794833238523e-06, "loss": 36.017, "step": 176680 }, { "epoch": 0.35692497889033886, "grad_norm": 297.3556823730469, "learning_rate": 8.168524812403707e-06, "loss": 22.7016, "step": 176690 }, { "epoch": 0.3569451795230227, "grad_norm": 207.41456604003906, "learning_rate": 8.168254776125883e-06, "loss": 24.2357, "step": 176700 }, { "epoch": 0.3569653801557065, "grad_norm": 428.276123046875, "learning_rate": 8.167984724406371e-06, "loss": 13.914, "step": 176710 }, { "epoch": 0.35698558078839027, "grad_norm": 234.00286865234375, "learning_rate": 8.167714657246486e-06, "loss": 24.5092, "step": 176720 }, { "epoch": 0.3570057814210741, "grad_norm": 7.838464736938477, "learning_rate": 8.167444574647542e-06, "loss": 17.7004, "step": 176730 }, { "epoch": 0.3570259820537579, "grad_norm": 384.3555603027344, "learning_rate": 8.16717447661086e-06, "loss": 22.2544, "step": 176740 }, { "epoch": 0.3570461826864417, "grad_norm": 132.55084228515625, "learning_rate": 8.166904363137752e-06, "loss": 19.6422, "step": 176750 }, { "epoch": 0.35706638331912555, "grad_norm": 231.16519165039062, "learning_rate": 8.166634234229535e-06, "loss": 23.2205, "step": 176760 }, { "epoch": 0.35708658395180937, "grad_norm": 488.4976501464844, "learning_rate": 8.166364089887528e-06, "loss": 26.6965, "step": 176770 }, { "epoch": 0.3571067845844932, "grad_norm": 471.4288330078125, "learning_rate": 8.166093930113048e-06, "loss": 16.4289, "step": 176780 }, { "epoch": 0.357126985217177, "grad_norm": 189.90719604492188, "learning_rate": 8.16582375490741e-06, "loss": 17.9405, "step": 176790 }, { "epoch": 0.3571471858498608, "grad_norm": 333.07122802734375, "learning_rate": 8.165553564271928e-06, "loss": 20.4349, "step": 176800 }, { "epoch": 0.35716738648254465, "grad_norm": 306.79095458984375, "learning_rate": 8.165283358207924e-06, "loss": 23.715, "step": 176810 }, { "epoch": 0.35718758711522847, "grad_norm": 202.62399291992188, "learning_rate": 8.165013136716714e-06, "loss": 15.7137, "step": 176820 }, { "epoch": 0.3572077877479123, "grad_norm": 1108.8873291015625, "learning_rate": 8.164742899799612e-06, "loss": 27.1154, "step": 176830 }, { "epoch": 0.3572279883805961, "grad_norm": 481.61407470703125, "learning_rate": 8.164472647457937e-06, "loss": 9.8774, "step": 176840 }, { "epoch": 0.35724818901327987, "grad_norm": 924.6329956054688, "learning_rate": 8.164202379693008e-06, "loss": 19.521, "step": 176850 }, { "epoch": 0.3572683896459637, "grad_norm": 148.47378540039062, "learning_rate": 8.163932096506137e-06, "loss": 22.9844, "step": 176860 }, { "epoch": 0.3572885902786475, "grad_norm": 513.387939453125, "learning_rate": 8.163661797898647e-06, "loss": 22.9582, "step": 176870 }, { "epoch": 0.35730879091133133, "grad_norm": 0.0, "learning_rate": 8.163391483871853e-06, "loss": 11.2833, "step": 176880 }, { "epoch": 0.35732899154401515, "grad_norm": 411.4132385253906, "learning_rate": 8.163121154427073e-06, "loss": 16.2289, "step": 176890 }, { "epoch": 0.35734919217669897, "grad_norm": 213.57513427734375, "learning_rate": 8.162850809565623e-06, "loss": 22.0561, "step": 176900 }, { "epoch": 0.3573693928093828, "grad_norm": 391.9679870605469, "learning_rate": 8.162580449288822e-06, "loss": 19.1955, "step": 176910 }, { "epoch": 0.3573895934420666, "grad_norm": 525.6038818359375, "learning_rate": 8.162310073597987e-06, "loss": 38.1771, "step": 176920 }, { "epoch": 0.35740979407475043, "grad_norm": 267.0684814453125, "learning_rate": 8.162039682494438e-06, "loss": 16.3052, "step": 176930 }, { "epoch": 0.35742999470743425, "grad_norm": 350.752685546875, "learning_rate": 8.16176927597949e-06, "loss": 23.3383, "step": 176940 }, { "epoch": 0.35745019534011807, "grad_norm": 502.6983642578125, "learning_rate": 8.161498854054462e-06, "loss": 11.7765, "step": 176950 }, { "epoch": 0.3574703959728019, "grad_norm": 161.5146026611328, "learning_rate": 8.161228416720673e-06, "loss": 23.4157, "step": 176960 }, { "epoch": 0.3574905966054857, "grad_norm": 789.0687866210938, "learning_rate": 8.160957963979438e-06, "loss": 20.3483, "step": 176970 }, { "epoch": 0.3575107972381695, "grad_norm": 1065.0162353515625, "learning_rate": 8.160687495832078e-06, "loss": 25.1569, "step": 176980 }, { "epoch": 0.3575309978708533, "grad_norm": 365.522705078125, "learning_rate": 8.160417012279911e-06, "loss": 19.2211, "step": 176990 }, { "epoch": 0.3575511985035371, "grad_norm": 785.7221069335938, "learning_rate": 8.160146513324256e-06, "loss": 18.4823, "step": 177000 }, { "epoch": 0.35757139913622094, "grad_norm": 177.9609375, "learning_rate": 8.159875998966427e-06, "loss": 15.5257, "step": 177010 }, { "epoch": 0.35759159976890476, "grad_norm": 261.8163757324219, "learning_rate": 8.15960546920775e-06, "loss": 19.7163, "step": 177020 }, { "epoch": 0.3576118004015886, "grad_norm": 622.7557373046875, "learning_rate": 8.159334924049536e-06, "loss": 24.4171, "step": 177030 }, { "epoch": 0.3576320010342724, "grad_norm": 230.59262084960938, "learning_rate": 8.159064363493106e-06, "loss": 20.035, "step": 177040 }, { "epoch": 0.3576522016669562, "grad_norm": 252.6420440673828, "learning_rate": 8.158793787539782e-06, "loss": 32.239, "step": 177050 }, { "epoch": 0.35767240229964004, "grad_norm": 678.0930786132812, "learning_rate": 8.158523196190879e-06, "loss": 25.604, "step": 177060 }, { "epoch": 0.35769260293232386, "grad_norm": 320.4878234863281, "learning_rate": 8.158252589447717e-06, "loss": 31.6477, "step": 177070 }, { "epoch": 0.3577128035650077, "grad_norm": 1682.7012939453125, "learning_rate": 8.157981967311614e-06, "loss": 19.9565, "step": 177080 }, { "epoch": 0.3577330041976915, "grad_norm": 429.1732177734375, "learning_rate": 8.15771132978389e-06, "loss": 24.1278, "step": 177090 }, { "epoch": 0.35775320483037526, "grad_norm": 230.71633911132812, "learning_rate": 8.157440676865866e-06, "loss": 12.2427, "step": 177100 }, { "epoch": 0.3577734054630591, "grad_norm": 682.2756958007812, "learning_rate": 8.15717000855886e-06, "loss": 22.3429, "step": 177110 }, { "epoch": 0.3577936060957429, "grad_norm": 234.9181671142578, "learning_rate": 8.156899324864187e-06, "loss": 22.0557, "step": 177120 }, { "epoch": 0.3578138067284267, "grad_norm": 83.15522766113281, "learning_rate": 8.15662862578317e-06, "loss": 29.9858, "step": 177130 }, { "epoch": 0.35783400736111054, "grad_norm": 206.202392578125, "learning_rate": 8.15635791131713e-06, "loss": 14.7609, "step": 177140 }, { "epoch": 0.35785420799379436, "grad_norm": 492.3747863769531, "learning_rate": 8.156087181467382e-06, "loss": 25.9009, "step": 177150 }, { "epoch": 0.3578744086264782, "grad_norm": 181.47689819335938, "learning_rate": 8.15581643623525e-06, "loss": 27.0679, "step": 177160 }, { "epoch": 0.357894609259162, "grad_norm": 315.83404541015625, "learning_rate": 8.155545675622049e-06, "loss": 19.0138, "step": 177170 }, { "epoch": 0.3579148098918458, "grad_norm": 234.9701385498047, "learning_rate": 8.155274899629104e-06, "loss": 12.2873, "step": 177180 }, { "epoch": 0.35793501052452964, "grad_norm": 77.23339080810547, "learning_rate": 8.155004108257731e-06, "loss": 11.3722, "step": 177190 }, { "epoch": 0.35795521115721346, "grad_norm": 954.743896484375, "learning_rate": 8.154733301509249e-06, "loss": 36.6601, "step": 177200 }, { "epoch": 0.3579754117898973, "grad_norm": 144.52452087402344, "learning_rate": 8.154462479384982e-06, "loss": 23.6636, "step": 177210 }, { "epoch": 0.3579956124225811, "grad_norm": 0.0, "learning_rate": 8.154191641886244e-06, "loss": 20.1353, "step": 177220 }, { "epoch": 0.35801581305526486, "grad_norm": 503.2443542480469, "learning_rate": 8.15392078901436e-06, "loss": 21.068, "step": 177230 }, { "epoch": 0.3580360136879487, "grad_norm": 317.6268310546875, "learning_rate": 8.15364992077065e-06, "loss": 13.95, "step": 177240 }, { "epoch": 0.3580562143206325, "grad_norm": 106.98784637451172, "learning_rate": 8.153379037156433e-06, "loss": 16.8383, "step": 177250 }, { "epoch": 0.3580764149533163, "grad_norm": 453.4849853515625, "learning_rate": 8.153108138173027e-06, "loss": 21.0666, "step": 177260 }, { "epoch": 0.35809661558600014, "grad_norm": 1080.802001953125, "learning_rate": 8.152837223821755e-06, "loss": 27.8877, "step": 177270 }, { "epoch": 0.35811681621868396, "grad_norm": 730.4622802734375, "learning_rate": 8.152566294103936e-06, "loss": 19.6041, "step": 177280 }, { "epoch": 0.3581370168513678, "grad_norm": 451.93182373046875, "learning_rate": 8.152295349020893e-06, "loss": 18.5048, "step": 177290 }, { "epoch": 0.3581572174840516, "grad_norm": 427.9884948730469, "learning_rate": 8.152024388573945e-06, "loss": 23.6541, "step": 177300 }, { "epoch": 0.3581774181167354, "grad_norm": 3.375775098800659, "learning_rate": 8.15175341276441e-06, "loss": 28.5684, "step": 177310 }, { "epoch": 0.35819761874941924, "grad_norm": 328.9010314941406, "learning_rate": 8.151482421593613e-06, "loss": 31.7084, "step": 177320 }, { "epoch": 0.35821781938210306, "grad_norm": 21.159927368164062, "learning_rate": 8.151211415062872e-06, "loss": 24.8793, "step": 177330 }, { "epoch": 0.3582380200147869, "grad_norm": 321.2985534667969, "learning_rate": 8.15094039317351e-06, "loss": 26.8257, "step": 177340 }, { "epoch": 0.3582582206474707, "grad_norm": 401.93707275390625, "learning_rate": 8.150669355926848e-06, "loss": 18.7087, "step": 177350 }, { "epoch": 0.35827842128015447, "grad_norm": 57.58470916748047, "learning_rate": 8.150398303324201e-06, "loss": 13.7024, "step": 177360 }, { "epoch": 0.3582986219128383, "grad_norm": 227.62020874023438, "learning_rate": 8.150127235366897e-06, "loss": 13.9598, "step": 177370 }, { "epoch": 0.3583188225455221, "grad_norm": 197.00538635253906, "learning_rate": 8.149856152056257e-06, "loss": 27.529, "step": 177380 }, { "epoch": 0.35833902317820593, "grad_norm": 167.59326171875, "learning_rate": 8.149585053393599e-06, "loss": 15.2473, "step": 177390 }, { "epoch": 0.35835922381088975, "grad_norm": 160.30685424804688, "learning_rate": 8.149313939380244e-06, "loss": 22.6754, "step": 177400 }, { "epoch": 0.35837942444357357, "grad_norm": 209.0167999267578, "learning_rate": 8.149042810017515e-06, "loss": 15.2747, "step": 177410 }, { "epoch": 0.3583996250762574, "grad_norm": 112.47742462158203, "learning_rate": 8.148771665306736e-06, "loss": 20.4897, "step": 177420 }, { "epoch": 0.3584198257089412, "grad_norm": 470.9197692871094, "learning_rate": 8.148500505249224e-06, "loss": 20.8236, "step": 177430 }, { "epoch": 0.35844002634162503, "grad_norm": 538.292724609375, "learning_rate": 8.1482293298463e-06, "loss": 27.6507, "step": 177440 }, { "epoch": 0.35846022697430885, "grad_norm": 415.1450500488281, "learning_rate": 8.147958139099292e-06, "loss": 28.701, "step": 177450 }, { "epoch": 0.35848042760699267, "grad_norm": 379.1101989746094, "learning_rate": 8.147686933009515e-06, "loss": 16.573, "step": 177460 }, { "epoch": 0.3585006282396765, "grad_norm": 2266.6806640625, "learning_rate": 8.147415711578295e-06, "loss": 44.919, "step": 177470 }, { "epoch": 0.3585208288723603, "grad_norm": 393.7117614746094, "learning_rate": 8.147144474806954e-06, "loss": 16.6654, "step": 177480 }, { "epoch": 0.3585410295050441, "grad_norm": 874.4924926757812, "learning_rate": 8.14687322269681e-06, "loss": 25.838, "step": 177490 }, { "epoch": 0.3585612301377279, "grad_norm": 288.9904479980469, "learning_rate": 8.146601955249187e-06, "loss": 11.5666, "step": 177500 }, { "epoch": 0.3585814307704117, "grad_norm": 838.0281372070312, "learning_rate": 8.14633067246541e-06, "loss": 30.3927, "step": 177510 }, { "epoch": 0.35860163140309553, "grad_norm": 547.0402221679688, "learning_rate": 8.146059374346798e-06, "loss": 19.2305, "step": 177520 }, { "epoch": 0.35862183203577935, "grad_norm": 509.0076904296875, "learning_rate": 8.145788060894675e-06, "loss": 14.9584, "step": 177530 }, { "epoch": 0.3586420326684632, "grad_norm": 354.7601013183594, "learning_rate": 8.145516732110362e-06, "loss": 21.724, "step": 177540 }, { "epoch": 0.358662233301147, "grad_norm": 309.3062438964844, "learning_rate": 8.14524538799518e-06, "loss": 17.2265, "step": 177550 }, { "epoch": 0.3586824339338308, "grad_norm": 715.89990234375, "learning_rate": 8.144974028550456e-06, "loss": 40.1115, "step": 177560 }, { "epoch": 0.35870263456651463, "grad_norm": 586.0116577148438, "learning_rate": 8.14470265377751e-06, "loss": 33.4867, "step": 177570 }, { "epoch": 0.35872283519919845, "grad_norm": 120.88922882080078, "learning_rate": 8.144431263677663e-06, "loss": 12.1822, "step": 177580 }, { "epoch": 0.3587430358318823, "grad_norm": 298.3090515136719, "learning_rate": 8.144159858252241e-06, "loss": 14.616, "step": 177590 }, { "epoch": 0.3587632364645661, "grad_norm": 692.1654052734375, "learning_rate": 8.143888437502565e-06, "loss": 29.6946, "step": 177600 }, { "epoch": 0.3587834370972499, "grad_norm": 417.9906311035156, "learning_rate": 8.143617001429957e-06, "loss": 28.8346, "step": 177610 }, { "epoch": 0.3588036377299337, "grad_norm": 487.2813415527344, "learning_rate": 8.143345550035742e-06, "loss": 16.8323, "step": 177620 }, { "epoch": 0.3588238383626175, "grad_norm": 268.7579040527344, "learning_rate": 8.14307408332124e-06, "loss": 22.2511, "step": 177630 }, { "epoch": 0.3588440389953013, "grad_norm": 4335.0478515625, "learning_rate": 8.14280260128778e-06, "loss": 49.9402, "step": 177640 }, { "epoch": 0.35886423962798514, "grad_norm": 284.3321228027344, "learning_rate": 8.142531103936677e-06, "loss": 18.8025, "step": 177650 }, { "epoch": 0.35888444026066896, "grad_norm": 593.8846435546875, "learning_rate": 8.142259591269261e-06, "loss": 15.7247, "step": 177660 }, { "epoch": 0.3589046408933528, "grad_norm": 239.56578063964844, "learning_rate": 8.141988063286853e-06, "loss": 24.4761, "step": 177670 }, { "epoch": 0.3589248415260366, "grad_norm": 29.926515579223633, "learning_rate": 8.141716519990776e-06, "loss": 31.3717, "step": 177680 }, { "epoch": 0.3589450421587204, "grad_norm": 1262.7120361328125, "learning_rate": 8.141444961382353e-06, "loss": 33.6779, "step": 177690 }, { "epoch": 0.35896524279140424, "grad_norm": 622.3956909179688, "learning_rate": 8.141173387462908e-06, "loss": 30.4117, "step": 177700 }, { "epoch": 0.35898544342408806, "grad_norm": 427.6423645019531, "learning_rate": 8.140901798233766e-06, "loss": 21.7507, "step": 177710 }, { "epoch": 0.3590056440567719, "grad_norm": 701.2109375, "learning_rate": 8.14063019369625e-06, "loss": 30.8782, "step": 177720 }, { "epoch": 0.3590258446894557, "grad_norm": 27.665742874145508, "learning_rate": 8.140358573851682e-06, "loss": 27.4379, "step": 177730 }, { "epoch": 0.35904604532213946, "grad_norm": 201.99661254882812, "learning_rate": 8.140086938701387e-06, "loss": 12.4247, "step": 177740 }, { "epoch": 0.3590662459548233, "grad_norm": 178.28997802734375, "learning_rate": 8.139815288246692e-06, "loss": 17.5456, "step": 177750 }, { "epoch": 0.3590864465875071, "grad_norm": 569.2922973632812, "learning_rate": 8.139543622488914e-06, "loss": 34.9385, "step": 177760 }, { "epoch": 0.3591066472201909, "grad_norm": 293.24383544921875, "learning_rate": 8.139271941429383e-06, "loss": 17.1171, "step": 177770 }, { "epoch": 0.35912684785287474, "grad_norm": 75.68390655517578, "learning_rate": 8.139000245069421e-06, "loss": 17.4726, "step": 177780 }, { "epoch": 0.35914704848555856, "grad_norm": 401.88262939453125, "learning_rate": 8.138728533410354e-06, "loss": 22.3922, "step": 177790 }, { "epoch": 0.3591672491182424, "grad_norm": 574.680419921875, "learning_rate": 8.138456806453503e-06, "loss": 13.5508, "step": 177800 }, { "epoch": 0.3591874497509262, "grad_norm": 515.5734252929688, "learning_rate": 8.138185064200195e-06, "loss": 29.5561, "step": 177810 }, { "epoch": 0.35920765038361, "grad_norm": 722.4268188476562, "learning_rate": 8.137913306651754e-06, "loss": 28.8704, "step": 177820 }, { "epoch": 0.35922785101629384, "grad_norm": 499.84326171875, "learning_rate": 8.137641533809503e-06, "loss": 21.1517, "step": 177830 }, { "epoch": 0.35924805164897766, "grad_norm": 451.6605224609375, "learning_rate": 8.137369745674769e-06, "loss": 13.4682, "step": 177840 }, { "epoch": 0.3592682522816615, "grad_norm": 372.6636657714844, "learning_rate": 8.137097942248875e-06, "loss": 27.0819, "step": 177850 }, { "epoch": 0.3592884529143453, "grad_norm": 22.09795379638672, "learning_rate": 8.136826123533144e-06, "loss": 22.3577, "step": 177860 }, { "epoch": 0.35930865354702907, "grad_norm": 78.34536743164062, "learning_rate": 8.136554289528906e-06, "loss": 18.4124, "step": 177870 }, { "epoch": 0.3593288541797129, "grad_norm": 683.2900390625, "learning_rate": 8.136282440237481e-06, "loss": 22.9591, "step": 177880 }, { "epoch": 0.3593490548123967, "grad_norm": 596.79541015625, "learning_rate": 8.136010575660197e-06, "loss": 14.4441, "step": 177890 }, { "epoch": 0.3593692554450805, "grad_norm": 287.7501220703125, "learning_rate": 8.135738695798377e-06, "loss": 19.8037, "step": 177900 }, { "epoch": 0.35938945607776435, "grad_norm": 34.817054748535156, "learning_rate": 8.135466800653347e-06, "loss": 9.004, "step": 177910 }, { "epoch": 0.35940965671044817, "grad_norm": 544.5115356445312, "learning_rate": 8.135194890226432e-06, "loss": 23.7276, "step": 177920 }, { "epoch": 0.359429857343132, "grad_norm": 754.536865234375, "learning_rate": 8.134922964518957e-06, "loss": 34.7466, "step": 177930 }, { "epoch": 0.3594500579758158, "grad_norm": 302.80645751953125, "learning_rate": 8.134651023532249e-06, "loss": 24.1984, "step": 177940 }, { "epoch": 0.3594702586084996, "grad_norm": 314.097412109375, "learning_rate": 8.13437906726763e-06, "loss": 21.9839, "step": 177950 }, { "epoch": 0.35949045924118345, "grad_norm": 627.1465454101562, "learning_rate": 8.13410709572643e-06, "loss": 42.2638, "step": 177960 }, { "epoch": 0.35951065987386727, "grad_norm": 199.92535400390625, "learning_rate": 8.13383510890997e-06, "loss": 10.2202, "step": 177970 }, { "epoch": 0.3595308605065511, "grad_norm": 678.3382568359375, "learning_rate": 8.133563106819579e-06, "loss": 20.0439, "step": 177980 }, { "epoch": 0.3595510611392349, "grad_norm": 436.7383117675781, "learning_rate": 8.13329108945658e-06, "loss": 17.698, "step": 177990 }, { "epoch": 0.35957126177191867, "grad_norm": 559.6670532226562, "learning_rate": 8.133019056822303e-06, "loss": 22.6511, "step": 178000 }, { "epoch": 0.3595914624046025, "grad_norm": 306.81243896484375, "learning_rate": 8.132747008918069e-06, "loss": 31.3838, "step": 178010 }, { "epoch": 0.3596116630372863, "grad_norm": 190.06283569335938, "learning_rate": 8.132474945745207e-06, "loss": 21.2258, "step": 178020 }, { "epoch": 0.35963186366997013, "grad_norm": 36.86237716674805, "learning_rate": 8.132202867305043e-06, "loss": 11.6965, "step": 178030 }, { "epoch": 0.35965206430265395, "grad_norm": 521.4072265625, "learning_rate": 8.1319307735989e-06, "loss": 28.9941, "step": 178040 }, { "epoch": 0.35967226493533777, "grad_norm": 1079.01318359375, "learning_rate": 8.131658664628108e-06, "loss": 21.3401, "step": 178050 }, { "epoch": 0.3596924655680216, "grad_norm": 300.1889953613281, "learning_rate": 8.131386540393991e-06, "loss": 24.1562, "step": 178060 }, { "epoch": 0.3597126662007054, "grad_norm": 5.549616813659668, "learning_rate": 8.131114400897874e-06, "loss": 16.3086, "step": 178070 }, { "epoch": 0.35973286683338923, "grad_norm": 557.4177856445312, "learning_rate": 8.130842246141086e-06, "loss": 25.6069, "step": 178080 }, { "epoch": 0.35975306746607305, "grad_norm": 246.23886108398438, "learning_rate": 8.130570076124954e-06, "loss": 17.3673, "step": 178090 }, { "epoch": 0.35977326809875687, "grad_norm": 338.0411376953125, "learning_rate": 8.130297890850803e-06, "loss": 24.9579, "step": 178100 }, { "epoch": 0.3597934687314407, "grad_norm": 146.607421875, "learning_rate": 8.130025690319958e-06, "loss": 36.285, "step": 178110 }, { "epoch": 0.3598136693641245, "grad_norm": 228.3867645263672, "learning_rate": 8.129753474533749e-06, "loss": 20.9564, "step": 178120 }, { "epoch": 0.3598338699968083, "grad_norm": 380.2311096191406, "learning_rate": 8.1294812434935e-06, "loss": 20.4764, "step": 178130 }, { "epoch": 0.3598540706294921, "grad_norm": 360.1344299316406, "learning_rate": 8.129208997200539e-06, "loss": 18.9098, "step": 178140 }, { "epoch": 0.3598742712621759, "grad_norm": 486.223388671875, "learning_rate": 8.128936735656195e-06, "loss": 19.3291, "step": 178150 }, { "epoch": 0.35989447189485974, "grad_norm": 258.1197204589844, "learning_rate": 8.12866445886179e-06, "loss": 17.2444, "step": 178160 }, { "epoch": 0.35991467252754356, "grad_norm": 306.47991943359375, "learning_rate": 8.128392166818655e-06, "loss": 14.1862, "step": 178170 }, { "epoch": 0.3599348731602274, "grad_norm": 343.256591796875, "learning_rate": 8.128119859528116e-06, "loss": 14.6788, "step": 178180 }, { "epoch": 0.3599550737929112, "grad_norm": 157.1279296875, "learning_rate": 8.127847536991501e-06, "loss": 28.506, "step": 178190 }, { "epoch": 0.359975274425595, "grad_norm": 647.3510131835938, "learning_rate": 8.127575199210136e-06, "loss": 24.6872, "step": 178200 }, { "epoch": 0.35999547505827884, "grad_norm": 430.32830810546875, "learning_rate": 8.127302846185348e-06, "loss": 18.3237, "step": 178210 }, { "epoch": 0.36001567569096266, "grad_norm": 307.2442932128906, "learning_rate": 8.127030477918466e-06, "loss": 28.0633, "step": 178220 }, { "epoch": 0.3600358763236465, "grad_norm": 328.6007080078125, "learning_rate": 8.126758094410816e-06, "loss": 20.8106, "step": 178230 }, { "epoch": 0.3600560769563303, "grad_norm": 205.92359924316406, "learning_rate": 8.126485695663728e-06, "loss": 18.6058, "step": 178240 }, { "epoch": 0.3600762775890141, "grad_norm": 419.4658508300781, "learning_rate": 8.126213281678527e-06, "loss": 30.8753, "step": 178250 }, { "epoch": 0.3600964782216979, "grad_norm": 575.1708374023438, "learning_rate": 8.12594085245654e-06, "loss": 14.9826, "step": 178260 }, { "epoch": 0.3601166788543817, "grad_norm": 415.85113525390625, "learning_rate": 8.1256684079991e-06, "loss": 13.5627, "step": 178270 }, { "epoch": 0.3601368794870655, "grad_norm": 374.0655212402344, "learning_rate": 8.125395948307528e-06, "loss": 13.7356, "step": 178280 }, { "epoch": 0.36015708011974934, "grad_norm": 447.27825927734375, "learning_rate": 8.125123473383156e-06, "loss": 17.7901, "step": 178290 }, { "epoch": 0.36017728075243316, "grad_norm": 611.2520751953125, "learning_rate": 8.124850983227313e-06, "loss": 30.2359, "step": 178300 }, { "epoch": 0.360197481385117, "grad_norm": 398.5036926269531, "learning_rate": 8.124578477841323e-06, "loss": 17.1272, "step": 178310 }, { "epoch": 0.3602176820178008, "grad_norm": 208.12890625, "learning_rate": 8.124305957226518e-06, "loss": 18.1814, "step": 178320 }, { "epoch": 0.3602378826504846, "grad_norm": 405.2329406738281, "learning_rate": 8.124033421384223e-06, "loss": 13.769, "step": 178330 }, { "epoch": 0.36025808328316844, "grad_norm": 451.02703857421875, "learning_rate": 8.12376087031577e-06, "loss": 26.2292, "step": 178340 }, { "epoch": 0.36027828391585226, "grad_norm": 146.75135803222656, "learning_rate": 8.123488304022485e-06, "loss": 5.5689, "step": 178350 }, { "epoch": 0.3602984845485361, "grad_norm": 279.85107421875, "learning_rate": 8.123215722505695e-06, "loss": 29.3162, "step": 178360 }, { "epoch": 0.3603186851812199, "grad_norm": 279.0551452636719, "learning_rate": 8.122943125766731e-06, "loss": 24.3572, "step": 178370 }, { "epoch": 0.36033888581390366, "grad_norm": 280.7283020019531, "learning_rate": 8.122670513806924e-06, "loss": 37.2789, "step": 178380 }, { "epoch": 0.3603590864465875, "grad_norm": 534.52587890625, "learning_rate": 8.122397886627596e-06, "loss": 29.5953, "step": 178390 }, { "epoch": 0.3603792870792713, "grad_norm": 439.2769775390625, "learning_rate": 8.12212524423008e-06, "loss": 12.9245, "step": 178400 }, { "epoch": 0.3603994877119551, "grad_norm": 801.3646850585938, "learning_rate": 8.121852586615705e-06, "loss": 29.8669, "step": 178410 }, { "epoch": 0.36041968834463894, "grad_norm": 461.25018310546875, "learning_rate": 8.121579913785799e-06, "loss": 23.2916, "step": 178420 }, { "epoch": 0.36043988897732276, "grad_norm": 255.2156982421875, "learning_rate": 8.12130722574169e-06, "loss": 11.5908, "step": 178430 }, { "epoch": 0.3604600896100066, "grad_norm": 20.481443405151367, "learning_rate": 8.12103452248471e-06, "loss": 22.8622, "step": 178440 }, { "epoch": 0.3604802902426904, "grad_norm": 42.72632598876953, "learning_rate": 8.120761804016186e-06, "loss": 10.2893, "step": 178450 }, { "epoch": 0.3605004908753742, "grad_norm": 182.3892822265625, "learning_rate": 8.120489070337448e-06, "loss": 30.1117, "step": 178460 }, { "epoch": 0.36052069150805804, "grad_norm": 350.43414306640625, "learning_rate": 8.120216321449823e-06, "loss": 14.188, "step": 178470 }, { "epoch": 0.36054089214074186, "grad_norm": 435.0140686035156, "learning_rate": 8.119943557354641e-06, "loss": 12.9704, "step": 178480 }, { "epoch": 0.3605610927734257, "grad_norm": 172.6223602294922, "learning_rate": 8.119670778053236e-06, "loss": 23.8365, "step": 178490 }, { "epoch": 0.3605812934061095, "grad_norm": 447.8199157714844, "learning_rate": 8.119397983546932e-06, "loss": 13.7439, "step": 178500 }, { "epoch": 0.36060149403879327, "grad_norm": 254.7931365966797, "learning_rate": 8.119125173837061e-06, "loss": 15.6657, "step": 178510 }, { "epoch": 0.3606216946714771, "grad_norm": 245.6374969482422, "learning_rate": 8.118852348924951e-06, "loss": 25.515, "step": 178520 }, { "epoch": 0.3606418953041609, "grad_norm": 205.4235382080078, "learning_rate": 8.118579508811934e-06, "loss": 30.7655, "step": 178530 }, { "epoch": 0.36066209593684473, "grad_norm": 137.30201721191406, "learning_rate": 8.118306653499339e-06, "loss": 30.7389, "step": 178540 }, { "epoch": 0.36068229656952855, "grad_norm": 428.897705078125, "learning_rate": 8.118033782988496e-06, "loss": 28.0444, "step": 178550 }, { "epoch": 0.36070249720221237, "grad_norm": 592.7665405273438, "learning_rate": 8.117760897280733e-06, "loss": 43.9568, "step": 178560 }, { "epoch": 0.3607226978348962, "grad_norm": 0.0, "learning_rate": 8.117487996377383e-06, "loss": 34.4643, "step": 178570 }, { "epoch": 0.36074289846758, "grad_norm": 530.3194580078125, "learning_rate": 8.117215080279774e-06, "loss": 24.9329, "step": 178580 }, { "epoch": 0.36076309910026383, "grad_norm": 512.5408325195312, "learning_rate": 8.116942148989238e-06, "loss": 29.8084, "step": 178590 }, { "epoch": 0.36078329973294765, "grad_norm": 326.04730224609375, "learning_rate": 8.116669202507102e-06, "loss": 39.6674, "step": 178600 }, { "epoch": 0.36080350036563147, "grad_norm": 1247.66943359375, "learning_rate": 8.116396240834699e-06, "loss": 23.1247, "step": 178610 }, { "epoch": 0.3608237009983153, "grad_norm": 67.5570068359375, "learning_rate": 8.116123263973359e-06, "loss": 26.048, "step": 178620 }, { "epoch": 0.3608439016309991, "grad_norm": 419.583984375, "learning_rate": 8.11585027192441e-06, "loss": 22.5238, "step": 178630 }, { "epoch": 0.3608641022636829, "grad_norm": 253.14178466796875, "learning_rate": 8.115577264689188e-06, "loss": 17.6159, "step": 178640 }, { "epoch": 0.3608843028963667, "grad_norm": 208.3504638671875, "learning_rate": 8.115304242269017e-06, "loss": 27.7183, "step": 178650 }, { "epoch": 0.3609045035290505, "grad_norm": 266.0090026855469, "learning_rate": 8.115031204665233e-06, "loss": 15.1854, "step": 178660 }, { "epoch": 0.36092470416173433, "grad_norm": 594.6963500976562, "learning_rate": 8.114758151879164e-06, "loss": 18.6671, "step": 178670 }, { "epoch": 0.36094490479441815, "grad_norm": 677.4618530273438, "learning_rate": 8.114485083912143e-06, "loss": 16.8773, "step": 178680 }, { "epoch": 0.360965105427102, "grad_norm": 788.9229125976562, "learning_rate": 8.1142120007655e-06, "loss": 27.072, "step": 178690 }, { "epoch": 0.3609853060597858, "grad_norm": 360.16583251953125, "learning_rate": 8.113938902440563e-06, "loss": 29.9497, "step": 178700 }, { "epoch": 0.3610055066924696, "grad_norm": 475.398193359375, "learning_rate": 8.113665788938667e-06, "loss": 14.3001, "step": 178710 }, { "epoch": 0.36102570732515343, "grad_norm": 192.18301391601562, "learning_rate": 8.113392660261141e-06, "loss": 18.8154, "step": 178720 }, { "epoch": 0.36104590795783725, "grad_norm": 462.969482421875, "learning_rate": 8.113119516409315e-06, "loss": 15.4176, "step": 178730 }, { "epoch": 0.3610661085905211, "grad_norm": 345.4085693359375, "learning_rate": 8.112846357384526e-06, "loss": 10.2465, "step": 178740 }, { "epoch": 0.3610863092232049, "grad_norm": 318.5648498535156, "learning_rate": 8.112573183188099e-06, "loss": 27.8682, "step": 178750 }, { "epoch": 0.3611065098558887, "grad_norm": 376.4531555175781, "learning_rate": 8.112299993821366e-06, "loss": 23.3852, "step": 178760 }, { "epoch": 0.3611267104885725, "grad_norm": 650.11279296875, "learning_rate": 8.112026789285664e-06, "loss": 17.0906, "step": 178770 }, { "epoch": 0.3611469111212563, "grad_norm": 423.9063720703125, "learning_rate": 8.111753569582319e-06, "loss": 27.5014, "step": 178780 }, { "epoch": 0.3611671117539401, "grad_norm": 793.5993041992188, "learning_rate": 8.111480334712664e-06, "loss": 18.9096, "step": 178790 }, { "epoch": 0.36118731238662394, "grad_norm": 205.39283752441406, "learning_rate": 8.111207084678033e-06, "loss": 17.618, "step": 178800 }, { "epoch": 0.36120751301930776, "grad_norm": 164.9736785888672, "learning_rate": 8.110933819479754e-06, "loss": 13.0748, "step": 178810 }, { "epoch": 0.3612277136519916, "grad_norm": 243.2801513671875, "learning_rate": 8.110660539119163e-06, "loss": 5.6099, "step": 178820 }, { "epoch": 0.3612479142846754, "grad_norm": 600.1412353515625, "learning_rate": 8.110387243597588e-06, "loss": 25.9991, "step": 178830 }, { "epoch": 0.3612681149173592, "grad_norm": 203.08787536621094, "learning_rate": 8.110113932916363e-06, "loss": 18.2952, "step": 178840 }, { "epoch": 0.36128831555004304, "grad_norm": 15.868556022644043, "learning_rate": 8.10984060707682e-06, "loss": 10.9692, "step": 178850 }, { "epoch": 0.36130851618272686, "grad_norm": 365.302734375, "learning_rate": 8.109567266080292e-06, "loss": 32.2658, "step": 178860 }, { "epoch": 0.3613287168154107, "grad_norm": 508.83551025390625, "learning_rate": 8.109293909928111e-06, "loss": 26.9549, "step": 178870 }, { "epoch": 0.3613489174480945, "grad_norm": 230.3358154296875, "learning_rate": 8.109020538621607e-06, "loss": 29.653, "step": 178880 }, { "epoch": 0.3613691180807783, "grad_norm": 532.472412109375, "learning_rate": 8.108747152162113e-06, "loss": 11.6277, "step": 178890 }, { "epoch": 0.3613893187134621, "grad_norm": 239.40606689453125, "learning_rate": 8.108473750550965e-06, "loss": 25.3715, "step": 178900 }, { "epoch": 0.3614095193461459, "grad_norm": 459.02783203125, "learning_rate": 8.10820033378949e-06, "loss": 26.4688, "step": 178910 }, { "epoch": 0.3614297199788297, "grad_norm": 1088.55615234375, "learning_rate": 8.107926901879027e-06, "loss": 25.8571, "step": 178920 }, { "epoch": 0.36144992061151354, "grad_norm": 209.74513244628906, "learning_rate": 8.107653454820902e-06, "loss": 8.8448, "step": 178930 }, { "epoch": 0.36147012124419736, "grad_norm": 376.676513671875, "learning_rate": 8.107379992616453e-06, "loss": 37.7792, "step": 178940 }, { "epoch": 0.3614903218768812, "grad_norm": 506.7843017578125, "learning_rate": 8.10710651526701e-06, "loss": 13.5302, "step": 178950 }, { "epoch": 0.361510522509565, "grad_norm": 483.62109375, "learning_rate": 8.106833022773908e-06, "loss": 18.1689, "step": 178960 }, { "epoch": 0.3615307231422488, "grad_norm": 304.67926025390625, "learning_rate": 8.106559515138477e-06, "loss": 15.037, "step": 178970 }, { "epoch": 0.36155092377493264, "grad_norm": 716.6126708984375, "learning_rate": 8.106285992362052e-06, "loss": 17.5214, "step": 178980 }, { "epoch": 0.36157112440761646, "grad_norm": 160.1989288330078, "learning_rate": 8.106012454445966e-06, "loss": 27.9466, "step": 178990 }, { "epoch": 0.3615913250403003, "grad_norm": 349.72882080078125, "learning_rate": 8.105738901391553e-06, "loss": 25.9572, "step": 179000 }, { "epoch": 0.3616115256729841, "grad_norm": 366.52716064453125, "learning_rate": 8.105465333200144e-06, "loss": 20.5821, "step": 179010 }, { "epoch": 0.36163172630566787, "grad_norm": 268.8068542480469, "learning_rate": 8.105191749873075e-06, "loss": 27.8748, "step": 179020 }, { "epoch": 0.3616519269383517, "grad_norm": 67.10267639160156, "learning_rate": 8.104918151411676e-06, "loss": 12.8232, "step": 179030 }, { "epoch": 0.3616721275710355, "grad_norm": 665.4845581054688, "learning_rate": 8.104644537817284e-06, "loss": 49.0935, "step": 179040 }, { "epoch": 0.3616923282037193, "grad_norm": 449.352294921875, "learning_rate": 8.10437090909123e-06, "loss": 29.9721, "step": 179050 }, { "epoch": 0.36171252883640315, "grad_norm": 290.5399475097656, "learning_rate": 8.10409726523485e-06, "loss": 13.8746, "step": 179060 }, { "epoch": 0.36173272946908697, "grad_norm": 498.26446533203125, "learning_rate": 8.103823606249476e-06, "loss": 18.2015, "step": 179070 }, { "epoch": 0.3617529301017708, "grad_norm": 556.02587890625, "learning_rate": 8.103549932136442e-06, "loss": 13.341, "step": 179080 }, { "epoch": 0.3617731307344546, "grad_norm": 474.5405578613281, "learning_rate": 8.10327624289708e-06, "loss": 19.0943, "step": 179090 }, { "epoch": 0.3617933313671384, "grad_norm": 139.08218383789062, "learning_rate": 8.103002538532729e-06, "loss": 15.2622, "step": 179100 }, { "epoch": 0.36181353199982225, "grad_norm": 291.2727355957031, "learning_rate": 8.102728819044718e-06, "loss": 30.471, "step": 179110 }, { "epoch": 0.36183373263250607, "grad_norm": 631.0053100585938, "learning_rate": 8.102455084434385e-06, "loss": 35.978, "step": 179120 }, { "epoch": 0.3618539332651899, "grad_norm": 453.00311279296875, "learning_rate": 8.102181334703061e-06, "loss": 13.041, "step": 179130 }, { "epoch": 0.3618741338978737, "grad_norm": 106.15189361572266, "learning_rate": 8.101907569852081e-06, "loss": 8.384, "step": 179140 }, { "epoch": 0.36189433453055747, "grad_norm": 333.9896240234375, "learning_rate": 8.101633789882781e-06, "loss": 18.6485, "step": 179150 }, { "epoch": 0.3619145351632413, "grad_norm": 400.0981140136719, "learning_rate": 8.101359994796494e-06, "loss": 20.3175, "step": 179160 }, { "epoch": 0.3619347357959251, "grad_norm": 257.89410400390625, "learning_rate": 8.101086184594554e-06, "loss": 15.6535, "step": 179170 }, { "epoch": 0.36195493642860893, "grad_norm": 28.253049850463867, "learning_rate": 8.100812359278294e-06, "loss": 24.1828, "step": 179180 }, { "epoch": 0.36197513706129275, "grad_norm": 553.4799194335938, "learning_rate": 8.100538518849053e-06, "loss": 13.5885, "step": 179190 }, { "epoch": 0.36199533769397657, "grad_norm": 210.08636474609375, "learning_rate": 8.100264663308165e-06, "loss": 11.83, "step": 179200 }, { "epoch": 0.3620155383266604, "grad_norm": 519.5848999023438, "learning_rate": 8.09999079265696e-06, "loss": 16.0559, "step": 179210 }, { "epoch": 0.3620357389593442, "grad_norm": 285.53985595703125, "learning_rate": 8.099716906896776e-06, "loss": 42.4347, "step": 179220 }, { "epoch": 0.36205593959202803, "grad_norm": 325.1741027832031, "learning_rate": 8.099443006028948e-06, "loss": 24.6796, "step": 179230 }, { "epoch": 0.36207614022471185, "grad_norm": 671.6212158203125, "learning_rate": 8.099169090054812e-06, "loss": 23.3181, "step": 179240 }, { "epoch": 0.36209634085739567, "grad_norm": 683.6408081054688, "learning_rate": 8.098895158975702e-06, "loss": 23.4439, "step": 179250 }, { "epoch": 0.3621165414900795, "grad_norm": 384.7148132324219, "learning_rate": 8.098621212792952e-06, "loss": 23.5365, "step": 179260 }, { "epoch": 0.3621367421227633, "grad_norm": 4892.22119140625, "learning_rate": 8.098347251507896e-06, "loss": 53.4412, "step": 179270 }, { "epoch": 0.3621569427554471, "grad_norm": 223.54103088378906, "learning_rate": 8.098073275121876e-06, "loss": 17.9581, "step": 179280 }, { "epoch": 0.3621771433881309, "grad_norm": 448.0371398925781, "learning_rate": 8.097799283636219e-06, "loss": 19.9835, "step": 179290 }, { "epoch": 0.3621973440208147, "grad_norm": 550.0449829101562, "learning_rate": 8.097525277052265e-06, "loss": 16.1264, "step": 179300 }, { "epoch": 0.36221754465349854, "grad_norm": 496.9193115234375, "learning_rate": 8.097251255371347e-06, "loss": 24.7837, "step": 179310 }, { "epoch": 0.36223774528618236, "grad_norm": 415.5220031738281, "learning_rate": 8.096977218594803e-06, "loss": 20.3981, "step": 179320 }, { "epoch": 0.3622579459188662, "grad_norm": 279.1328125, "learning_rate": 8.09670316672397e-06, "loss": 29.0992, "step": 179330 }, { "epoch": 0.36227814655155, "grad_norm": 472.2081604003906, "learning_rate": 8.096429099760176e-06, "loss": 19.3455, "step": 179340 }, { "epoch": 0.3622983471842338, "grad_norm": 279.03106689453125, "learning_rate": 8.096155017704768e-06, "loss": 22.1065, "step": 179350 }, { "epoch": 0.36231854781691764, "grad_norm": 679.9822387695312, "learning_rate": 8.095880920559073e-06, "loss": 26.3839, "step": 179360 }, { "epoch": 0.36233874844960146, "grad_norm": 461.66571044921875, "learning_rate": 8.09560680832443e-06, "loss": 30.5445, "step": 179370 }, { "epoch": 0.3623589490822853, "grad_norm": 556.361083984375, "learning_rate": 8.095332681002175e-06, "loss": 33.7321, "step": 179380 }, { "epoch": 0.3623791497149691, "grad_norm": 143.40692138671875, "learning_rate": 8.095058538593645e-06, "loss": 13.6774, "step": 179390 }, { "epoch": 0.3623993503476529, "grad_norm": 733.951904296875, "learning_rate": 8.094784381100174e-06, "loss": 18.1463, "step": 179400 }, { "epoch": 0.3624195509803367, "grad_norm": 611.850341796875, "learning_rate": 8.094510208523099e-06, "loss": 33.0224, "step": 179410 }, { "epoch": 0.3624397516130205, "grad_norm": 436.51654052734375, "learning_rate": 8.094236020863758e-06, "loss": 29.0462, "step": 179420 }, { "epoch": 0.3624599522457043, "grad_norm": 880.206787109375, "learning_rate": 8.093961818123483e-06, "loss": 22.809, "step": 179430 }, { "epoch": 0.36248015287838814, "grad_norm": 314.6743469238281, "learning_rate": 8.093687600303616e-06, "loss": 13.1, "step": 179440 }, { "epoch": 0.36250035351107196, "grad_norm": 513.7949829101562, "learning_rate": 8.09341336740549e-06, "loss": 12.9643, "step": 179450 }, { "epoch": 0.3625205541437558, "grad_norm": 377.70062255859375, "learning_rate": 8.093139119430442e-06, "loss": 20.9338, "step": 179460 }, { "epoch": 0.3625407547764396, "grad_norm": 191.74209594726562, "learning_rate": 8.09286485637981e-06, "loss": 20.7988, "step": 179470 }, { "epoch": 0.3625609554091234, "grad_norm": 546.7145385742188, "learning_rate": 8.092590578254931e-06, "loss": 25.0234, "step": 179480 }, { "epoch": 0.36258115604180724, "grad_norm": 781.304931640625, "learning_rate": 8.092316285057138e-06, "loss": 24.1487, "step": 179490 }, { "epoch": 0.36260135667449106, "grad_norm": 784.0794067382812, "learning_rate": 8.092041976787772e-06, "loss": 19.9087, "step": 179500 }, { "epoch": 0.3626215573071749, "grad_norm": 461.7140197753906, "learning_rate": 8.091767653448169e-06, "loss": 21.3678, "step": 179510 }, { "epoch": 0.3626417579398587, "grad_norm": 847.074951171875, "learning_rate": 8.091493315039662e-06, "loss": 27.9731, "step": 179520 }, { "epoch": 0.36266195857254246, "grad_norm": 352.7601623535156, "learning_rate": 8.091218961563593e-06, "loss": 14.7476, "step": 179530 }, { "epoch": 0.3626821592052263, "grad_norm": 465.680908203125, "learning_rate": 8.0909445930213e-06, "loss": 24.597, "step": 179540 }, { "epoch": 0.3627023598379101, "grad_norm": 3518.7138671875, "learning_rate": 8.090670209414117e-06, "loss": 23.9385, "step": 179550 }, { "epoch": 0.3627225604705939, "grad_norm": 509.2041931152344, "learning_rate": 8.090395810743382e-06, "loss": 21.9164, "step": 179560 }, { "epoch": 0.36274276110327774, "grad_norm": 428.3517150878906, "learning_rate": 8.090121397010432e-06, "loss": 16.0016, "step": 179570 }, { "epoch": 0.36276296173596156, "grad_norm": 173.6181640625, "learning_rate": 8.089846968216605e-06, "loss": 22.2588, "step": 179580 }, { "epoch": 0.3627831623686454, "grad_norm": 205.15170288085938, "learning_rate": 8.08957252436324e-06, "loss": 10.1585, "step": 179590 }, { "epoch": 0.3628033630013292, "grad_norm": 601.03564453125, "learning_rate": 8.089298065451673e-06, "loss": 28.1236, "step": 179600 }, { "epoch": 0.362823563634013, "grad_norm": 50.00336456298828, "learning_rate": 8.08902359148324e-06, "loss": 25.6974, "step": 179610 }, { "epoch": 0.36284376426669684, "grad_norm": 598.7530517578125, "learning_rate": 8.088749102459284e-06, "loss": 28.9201, "step": 179620 }, { "epoch": 0.36286396489938066, "grad_norm": 235.47975158691406, "learning_rate": 8.088474598381134e-06, "loss": 16.8251, "step": 179630 }, { "epoch": 0.3628841655320645, "grad_norm": 356.95037841796875, "learning_rate": 8.088200079250139e-06, "loss": 23.515, "step": 179640 }, { "epoch": 0.3629043661647483, "grad_norm": 318.74761962890625, "learning_rate": 8.08792554506763e-06, "loss": 22.7573, "step": 179650 }, { "epoch": 0.36292456679743207, "grad_norm": 356.8153991699219, "learning_rate": 8.087650995834946e-06, "loss": 11.2592, "step": 179660 }, { "epoch": 0.3629447674301159, "grad_norm": 218.93699645996094, "learning_rate": 8.087376431553425e-06, "loss": 23.3865, "step": 179670 }, { "epoch": 0.3629649680627997, "grad_norm": 1522.609375, "learning_rate": 8.087101852224406e-06, "loss": 18.9979, "step": 179680 }, { "epoch": 0.36298516869548353, "grad_norm": 0.0, "learning_rate": 8.086827257849225e-06, "loss": 15.1281, "step": 179690 }, { "epoch": 0.36300536932816735, "grad_norm": 311.6251525878906, "learning_rate": 8.086552648429225e-06, "loss": 22.289, "step": 179700 }, { "epoch": 0.36302556996085117, "grad_norm": 191.47767639160156, "learning_rate": 8.08627802396574e-06, "loss": 22.3104, "step": 179710 }, { "epoch": 0.363045770593535, "grad_norm": 458.1260070800781, "learning_rate": 8.086003384460113e-06, "loss": 23.9723, "step": 179720 }, { "epoch": 0.3630659712262188, "grad_norm": 478.980712890625, "learning_rate": 8.085728729913677e-06, "loss": 17.0848, "step": 179730 }, { "epoch": 0.36308617185890263, "grad_norm": 138.81344604492188, "learning_rate": 8.085454060327775e-06, "loss": 19.4853, "step": 179740 }, { "epoch": 0.36310637249158645, "grad_norm": 374.0357666015625, "learning_rate": 8.085179375703745e-06, "loss": 31.6013, "step": 179750 }, { "epoch": 0.36312657312427027, "grad_norm": 495.9537658691406, "learning_rate": 8.084904676042923e-06, "loss": 21.5826, "step": 179760 }, { "epoch": 0.3631467737569541, "grad_norm": 540.7644653320312, "learning_rate": 8.08462996134665e-06, "loss": 16.5344, "step": 179770 }, { "epoch": 0.3631669743896379, "grad_norm": 346.47137451171875, "learning_rate": 8.084355231616266e-06, "loss": 12.8951, "step": 179780 }, { "epoch": 0.3631871750223217, "grad_norm": 294.4875793457031, "learning_rate": 8.084080486853106e-06, "loss": 24.5862, "step": 179790 }, { "epoch": 0.3632073756550055, "grad_norm": 48.79007339477539, "learning_rate": 8.083805727058514e-06, "loss": 44.3199, "step": 179800 }, { "epoch": 0.3632275762876893, "grad_norm": 1104.3382568359375, "learning_rate": 8.083530952233826e-06, "loss": 32.3153, "step": 179810 }, { "epoch": 0.36324777692037313, "grad_norm": 501.9084167480469, "learning_rate": 8.083256162380382e-06, "loss": 15.4729, "step": 179820 }, { "epoch": 0.36326797755305695, "grad_norm": 443.52587890625, "learning_rate": 8.082981357499522e-06, "loss": 14.8998, "step": 179830 }, { "epoch": 0.3632881781857408, "grad_norm": 70.27091217041016, "learning_rate": 8.082706537592585e-06, "loss": 33.6036, "step": 179840 }, { "epoch": 0.3633083788184246, "grad_norm": 348.7554626464844, "learning_rate": 8.08243170266091e-06, "loss": 19.2431, "step": 179850 }, { "epoch": 0.3633285794511084, "grad_norm": 248.2569122314453, "learning_rate": 8.082156852705837e-06, "loss": 11.5737, "step": 179860 }, { "epoch": 0.36334878008379223, "grad_norm": 190.74111938476562, "learning_rate": 8.081881987728704e-06, "loss": 10.9432, "step": 179870 }, { "epoch": 0.36336898071647605, "grad_norm": 204.70008850097656, "learning_rate": 8.081607107730853e-06, "loss": 24.379, "step": 179880 }, { "epoch": 0.3633891813491599, "grad_norm": 538.6603393554688, "learning_rate": 8.081332212713625e-06, "loss": 32.2789, "step": 179890 }, { "epoch": 0.3634093819818437, "grad_norm": 662.2504272460938, "learning_rate": 8.081057302678352e-06, "loss": 21.0323, "step": 179900 }, { "epoch": 0.3634295826145275, "grad_norm": 397.1094665527344, "learning_rate": 8.080782377626383e-06, "loss": 15.8753, "step": 179910 }, { "epoch": 0.3634497832472113, "grad_norm": 79.99320983886719, "learning_rate": 8.080507437559056e-06, "loss": 23.1658, "step": 179920 }, { "epoch": 0.3634699838798951, "grad_norm": 710.0244140625, "learning_rate": 8.080232482477705e-06, "loss": 27.1346, "step": 179930 }, { "epoch": 0.3634901845125789, "grad_norm": 476.2806091308594, "learning_rate": 8.079957512383679e-06, "loss": 18.4552, "step": 179940 }, { "epoch": 0.36351038514526274, "grad_norm": 657.8384399414062, "learning_rate": 8.07968252727831e-06, "loss": 23.6445, "step": 179950 }, { "epoch": 0.36353058577794656, "grad_norm": 268.9906005859375, "learning_rate": 8.079407527162944e-06, "loss": 31.5085, "step": 179960 }, { "epoch": 0.3635507864106304, "grad_norm": 479.5155334472656, "learning_rate": 8.079132512038917e-06, "loss": 28.7466, "step": 179970 }, { "epoch": 0.3635709870433142, "grad_norm": 266.0442199707031, "learning_rate": 8.078857481907573e-06, "loss": 11.4897, "step": 179980 }, { "epoch": 0.363591187675998, "grad_norm": 248.67543029785156, "learning_rate": 8.078582436770252e-06, "loss": 31.0825, "step": 179990 }, { "epoch": 0.36361138830868184, "grad_norm": 1310.8602294921875, "learning_rate": 8.078307376628292e-06, "loss": 26.8525, "step": 180000 }, { "epoch": 0.36363158894136566, "grad_norm": 243.6558837890625, "learning_rate": 8.078032301483036e-06, "loss": 23.1223, "step": 180010 }, { "epoch": 0.3636517895740495, "grad_norm": 484.58319091796875, "learning_rate": 8.077757211335823e-06, "loss": 29.5466, "step": 180020 }, { "epoch": 0.3636719902067333, "grad_norm": 837.57177734375, "learning_rate": 8.077482106187997e-06, "loss": 26.9726, "step": 180030 }, { "epoch": 0.3636921908394171, "grad_norm": 731.7139282226562, "learning_rate": 8.077206986040894e-06, "loss": 34.0762, "step": 180040 }, { "epoch": 0.3637123914721009, "grad_norm": 653.1241455078125, "learning_rate": 8.076931850895858e-06, "loss": 34.7637, "step": 180050 }, { "epoch": 0.3637325921047847, "grad_norm": 304.6058349609375, "learning_rate": 8.07665670075423e-06, "loss": 34.1574, "step": 180060 }, { "epoch": 0.3637527927374685, "grad_norm": 318.70135498046875, "learning_rate": 8.07638153561735e-06, "loss": 21.1969, "step": 180070 }, { "epoch": 0.36377299337015234, "grad_norm": 100.94208526611328, "learning_rate": 8.076106355486559e-06, "loss": 20.3431, "step": 180080 }, { "epoch": 0.36379319400283616, "grad_norm": 540.5836791992188, "learning_rate": 8.0758311603632e-06, "loss": 28.3074, "step": 180090 }, { "epoch": 0.36381339463552, "grad_norm": 741.1316528320312, "learning_rate": 8.075555950248613e-06, "loss": 24.8105, "step": 180100 }, { "epoch": 0.3638335952682038, "grad_norm": 878.4921264648438, "learning_rate": 8.075280725144136e-06, "loss": 24.5495, "step": 180110 }, { "epoch": 0.3638537959008876, "grad_norm": 423.2711486816406, "learning_rate": 8.075005485051117e-06, "loss": 27.7036, "step": 180120 }, { "epoch": 0.36387399653357144, "grad_norm": 74.36318969726562, "learning_rate": 8.074730229970895e-06, "loss": 23.0773, "step": 180130 }, { "epoch": 0.36389419716625526, "grad_norm": 713.6236572265625, "learning_rate": 8.074454959904807e-06, "loss": 25.2313, "step": 180140 }, { "epoch": 0.3639143977989391, "grad_norm": 2.3272178173065186, "learning_rate": 8.0741796748542e-06, "loss": 11.2923, "step": 180150 }, { "epoch": 0.3639345984316229, "grad_norm": 399.4425048828125, "learning_rate": 8.073904374820416e-06, "loss": 29.8339, "step": 180160 }, { "epoch": 0.36395479906430667, "grad_norm": 551.170166015625, "learning_rate": 8.073629059804795e-06, "loss": 23.4163, "step": 180170 }, { "epoch": 0.3639749996969905, "grad_norm": 61.851165771484375, "learning_rate": 8.073353729808676e-06, "loss": 13.2651, "step": 180180 }, { "epoch": 0.3639952003296743, "grad_norm": 423.3778381347656, "learning_rate": 8.073078384833406e-06, "loss": 20.787, "step": 180190 }, { "epoch": 0.3640154009623581, "grad_norm": 442.4151306152344, "learning_rate": 8.072803024880322e-06, "loss": 16.1326, "step": 180200 }, { "epoch": 0.36403560159504195, "grad_norm": 663.5044555664062, "learning_rate": 8.072527649950772e-06, "loss": 16.5667, "step": 180210 }, { "epoch": 0.36405580222772577, "grad_norm": 328.0487365722656, "learning_rate": 8.072252260046094e-06, "loss": 27.5722, "step": 180220 }, { "epoch": 0.3640760028604096, "grad_norm": 474.4621276855469, "learning_rate": 8.071976855167629e-06, "loss": 27.4091, "step": 180230 }, { "epoch": 0.3640962034930934, "grad_norm": 123.0985336303711, "learning_rate": 8.071701435316724e-06, "loss": 41.9142, "step": 180240 }, { "epoch": 0.3641164041257772, "grad_norm": 86.01426696777344, "learning_rate": 8.071426000494716e-06, "loss": 19.347, "step": 180250 }, { "epoch": 0.36413660475846105, "grad_norm": 239.38197326660156, "learning_rate": 8.071150550702953e-06, "loss": 34.2087, "step": 180260 }, { "epoch": 0.36415680539114487, "grad_norm": 432.0234069824219, "learning_rate": 8.070875085942772e-06, "loss": 18.4752, "step": 180270 }, { "epoch": 0.3641770060238287, "grad_norm": 36.60873794555664, "learning_rate": 8.070599606215522e-06, "loss": 23.4685, "step": 180280 }, { "epoch": 0.3641972066565125, "grad_norm": 147.41677856445312, "learning_rate": 8.070324111522538e-06, "loss": 24.2534, "step": 180290 }, { "epoch": 0.36421740728919627, "grad_norm": 460.8913269042969, "learning_rate": 8.07004860186517e-06, "loss": 31.2957, "step": 180300 }, { "epoch": 0.3642376079218801, "grad_norm": 519.7053833007812, "learning_rate": 8.069773077244756e-06, "loss": 17.9148, "step": 180310 }, { "epoch": 0.3642578085545639, "grad_norm": 198.05221557617188, "learning_rate": 8.069497537662638e-06, "loss": 20.4212, "step": 180320 }, { "epoch": 0.36427800918724773, "grad_norm": 151.0083770751953, "learning_rate": 8.069221983120164e-06, "loss": 19.3584, "step": 180330 }, { "epoch": 0.36429820981993155, "grad_norm": 270.20404052734375, "learning_rate": 8.068946413618674e-06, "loss": 18.1931, "step": 180340 }, { "epoch": 0.36431841045261537, "grad_norm": 282.3987121582031, "learning_rate": 8.068670829159511e-06, "loss": 18.7832, "step": 180350 }, { "epoch": 0.3643386110852992, "grad_norm": 449.9928894042969, "learning_rate": 8.06839522974402e-06, "loss": 43.0912, "step": 180360 }, { "epoch": 0.364358811717983, "grad_norm": 634.7880859375, "learning_rate": 8.068119615373541e-06, "loss": 16.687, "step": 180370 }, { "epoch": 0.36437901235066683, "grad_norm": 661.2182006835938, "learning_rate": 8.06784398604942e-06, "loss": 18.101, "step": 180380 }, { "epoch": 0.36439921298335065, "grad_norm": 98.8208236694336, "learning_rate": 8.067568341773e-06, "loss": 22.6252, "step": 180390 }, { "epoch": 0.36441941361603447, "grad_norm": 593.2794799804688, "learning_rate": 8.067292682545622e-06, "loss": 16.2175, "step": 180400 }, { "epoch": 0.3644396142487183, "grad_norm": 192.91513061523438, "learning_rate": 8.067017008368632e-06, "loss": 9.7837, "step": 180410 }, { "epoch": 0.3644598148814021, "grad_norm": 847.2479248046875, "learning_rate": 8.066741319243374e-06, "loss": 18.9218, "step": 180420 }, { "epoch": 0.3644800155140859, "grad_norm": 501.9024963378906, "learning_rate": 8.06646561517119e-06, "loss": 27.7609, "step": 180430 }, { "epoch": 0.3645002161467697, "grad_norm": 248.13670349121094, "learning_rate": 8.066189896153425e-06, "loss": 28.4785, "step": 180440 }, { "epoch": 0.3645204167794535, "grad_norm": 249.80906677246094, "learning_rate": 8.065914162191424e-06, "loss": 13.8947, "step": 180450 }, { "epoch": 0.36454061741213734, "grad_norm": 107.41999816894531, "learning_rate": 8.065638413286526e-06, "loss": 13.3479, "step": 180460 }, { "epoch": 0.36456081804482116, "grad_norm": 236.6258087158203, "learning_rate": 8.065362649440081e-06, "loss": 19.4068, "step": 180470 }, { "epoch": 0.364581018677505, "grad_norm": 50.60342788696289, "learning_rate": 8.065086870653428e-06, "loss": 17.5798, "step": 180480 }, { "epoch": 0.3646012193101888, "grad_norm": 227.71481323242188, "learning_rate": 8.064811076927915e-06, "loss": 37.9688, "step": 180490 }, { "epoch": 0.3646214199428726, "grad_norm": 542.9400634765625, "learning_rate": 8.064535268264883e-06, "loss": 22.8471, "step": 180500 }, { "epoch": 0.36464162057555644, "grad_norm": 650.06640625, "learning_rate": 8.064259444665678e-06, "loss": 14.5285, "step": 180510 }, { "epoch": 0.36466182120824026, "grad_norm": 169.16299438476562, "learning_rate": 8.063983606131645e-06, "loss": 30.553, "step": 180520 }, { "epoch": 0.3646820218409241, "grad_norm": 260.5411682128906, "learning_rate": 8.063707752664127e-06, "loss": 16.6584, "step": 180530 }, { "epoch": 0.3647022224736079, "grad_norm": 948.4075927734375, "learning_rate": 8.06343188426447e-06, "loss": 31.9307, "step": 180540 }, { "epoch": 0.3647224231062917, "grad_norm": 178.24842834472656, "learning_rate": 8.063156000934017e-06, "loss": 35.5165, "step": 180550 }, { "epoch": 0.3647426237389755, "grad_norm": 545.8712158203125, "learning_rate": 8.062880102674112e-06, "loss": 29.6667, "step": 180560 }, { "epoch": 0.3647628243716593, "grad_norm": 259.62982177734375, "learning_rate": 8.062604189486102e-06, "loss": 35.8326, "step": 180570 }, { "epoch": 0.3647830250043431, "grad_norm": 451.8420104980469, "learning_rate": 8.06232826137133e-06, "loss": 14.4411, "step": 180580 }, { "epoch": 0.36480322563702694, "grad_norm": 222.64039611816406, "learning_rate": 8.062052318331142e-06, "loss": 17.8508, "step": 180590 }, { "epoch": 0.36482342626971076, "grad_norm": 1076.473388671875, "learning_rate": 8.061776360366883e-06, "loss": 28.3678, "step": 180600 }, { "epoch": 0.3648436269023946, "grad_norm": 338.62493896484375, "learning_rate": 8.061500387479896e-06, "loss": 25.0373, "step": 180610 }, { "epoch": 0.3648638275350784, "grad_norm": 491.73956298828125, "learning_rate": 8.06122439967153e-06, "loss": 8.7945, "step": 180620 }, { "epoch": 0.3648840281677622, "grad_norm": 250.841552734375, "learning_rate": 8.060948396943125e-06, "loss": 21.4569, "step": 180630 }, { "epoch": 0.36490422880044604, "grad_norm": 600.4892578125, "learning_rate": 8.060672379296028e-06, "loss": 32.9581, "step": 180640 }, { "epoch": 0.36492442943312986, "grad_norm": 579.3340454101562, "learning_rate": 8.060396346731587e-06, "loss": 23.5043, "step": 180650 }, { "epoch": 0.3649446300658137, "grad_norm": 1081.14013671875, "learning_rate": 8.060120299251145e-06, "loss": 41.1831, "step": 180660 }, { "epoch": 0.3649648306984975, "grad_norm": 260.2357177734375, "learning_rate": 8.059844236856047e-06, "loss": 22.1888, "step": 180670 }, { "epoch": 0.3649850313311813, "grad_norm": 665.8071899414062, "learning_rate": 8.05956815954764e-06, "loss": 32.5861, "step": 180680 }, { "epoch": 0.3650052319638651, "grad_norm": 539.4544677734375, "learning_rate": 8.059292067327268e-06, "loss": 23.4349, "step": 180690 }, { "epoch": 0.3650254325965489, "grad_norm": 222.7199249267578, "learning_rate": 8.05901596019628e-06, "loss": 32.5975, "step": 180700 }, { "epoch": 0.3650456332292327, "grad_norm": 653.107666015625, "learning_rate": 8.058739838156018e-06, "loss": 26.6703, "step": 180710 }, { "epoch": 0.36506583386191654, "grad_norm": 131.5776824951172, "learning_rate": 8.058463701207828e-06, "loss": 26.4162, "step": 180720 }, { "epoch": 0.36508603449460036, "grad_norm": 159.77001953125, "learning_rate": 8.058187549353058e-06, "loss": 17.4183, "step": 180730 }, { "epoch": 0.3651062351272842, "grad_norm": 389.9031677246094, "learning_rate": 8.057911382593052e-06, "loss": 13.0312, "step": 180740 }, { "epoch": 0.365126435759968, "grad_norm": 271.0975036621094, "learning_rate": 8.057635200929157e-06, "loss": 21.7841, "step": 180750 }, { "epoch": 0.3651466363926518, "grad_norm": 256.2905578613281, "learning_rate": 8.057359004362719e-06, "loss": 18.0252, "step": 180760 }, { "epoch": 0.36516683702533564, "grad_norm": 393.3341979980469, "learning_rate": 8.057082792895084e-06, "loss": 15.9911, "step": 180770 }, { "epoch": 0.36518703765801946, "grad_norm": 1234.55810546875, "learning_rate": 8.056806566527597e-06, "loss": 50.5567, "step": 180780 }, { "epoch": 0.3652072382907033, "grad_norm": 167.49339294433594, "learning_rate": 8.056530325261607e-06, "loss": 23.2267, "step": 180790 }, { "epoch": 0.3652274389233871, "grad_norm": 1536.591796875, "learning_rate": 8.05625406909846e-06, "loss": 32.0553, "step": 180800 }, { "epoch": 0.36524763955607087, "grad_norm": 460.4673767089844, "learning_rate": 8.055977798039499e-06, "loss": 18.0047, "step": 180810 }, { "epoch": 0.3652678401887547, "grad_norm": 286.6156921386719, "learning_rate": 8.055701512086073e-06, "loss": 18.4002, "step": 180820 }, { "epoch": 0.3652880408214385, "grad_norm": 507.64703369140625, "learning_rate": 8.05542521123953e-06, "loss": 34.6961, "step": 180830 }, { "epoch": 0.36530824145412233, "grad_norm": 197.2182159423828, "learning_rate": 8.055148895501214e-06, "loss": 25.0815, "step": 180840 }, { "epoch": 0.36532844208680615, "grad_norm": 241.7445068359375, "learning_rate": 8.054872564872474e-06, "loss": 19.066, "step": 180850 }, { "epoch": 0.36534864271948997, "grad_norm": 405.707763671875, "learning_rate": 8.054596219354655e-06, "loss": 16.7721, "step": 180860 }, { "epoch": 0.3653688433521738, "grad_norm": 652.0069580078125, "learning_rate": 8.054319858949104e-06, "loss": 16.9904, "step": 180870 }, { "epoch": 0.3653890439848576, "grad_norm": 437.1751708984375, "learning_rate": 8.054043483657169e-06, "loss": 20.3146, "step": 180880 }, { "epoch": 0.36540924461754143, "grad_norm": 276.588134765625, "learning_rate": 8.053767093480196e-06, "loss": 18.7465, "step": 180890 }, { "epoch": 0.36542944525022525, "grad_norm": 67.01463317871094, "learning_rate": 8.053490688419532e-06, "loss": 27.196, "step": 180900 }, { "epoch": 0.36544964588290907, "grad_norm": 532.4506225585938, "learning_rate": 8.053214268476526e-06, "loss": 35.5491, "step": 180910 }, { "epoch": 0.3654698465155929, "grad_norm": 255.35911560058594, "learning_rate": 8.052937833652524e-06, "loss": 11.376, "step": 180920 }, { "epoch": 0.3654900471482767, "grad_norm": 78.1842041015625, "learning_rate": 8.052661383948873e-06, "loss": 27.2627, "step": 180930 }, { "epoch": 0.3655102477809605, "grad_norm": 225.21063232421875, "learning_rate": 8.05238491936692e-06, "loss": 19.6099, "step": 180940 }, { "epoch": 0.3655304484136443, "grad_norm": 316.75054931640625, "learning_rate": 8.052108439908014e-06, "loss": 32.9721, "step": 180950 }, { "epoch": 0.3655506490463281, "grad_norm": 331.910888671875, "learning_rate": 8.0518319455735e-06, "loss": 16.345, "step": 180960 }, { "epoch": 0.36557084967901193, "grad_norm": 550.0551147460938, "learning_rate": 8.051555436364728e-06, "loss": 24.1847, "step": 180970 }, { "epoch": 0.36559105031169575, "grad_norm": 383.29962158203125, "learning_rate": 8.051278912283046e-06, "loss": 15.6813, "step": 180980 }, { "epoch": 0.3656112509443796, "grad_norm": 700.6581420898438, "learning_rate": 8.0510023733298e-06, "loss": 18.2657, "step": 180990 }, { "epoch": 0.3656314515770634, "grad_norm": 1364.6787109375, "learning_rate": 8.05072581950634e-06, "loss": 24.5909, "step": 181000 }, { "epoch": 0.3656516522097472, "grad_norm": 357.5721130371094, "learning_rate": 8.05044925081401e-06, "loss": 24.2228, "step": 181010 }, { "epoch": 0.36567185284243103, "grad_norm": 374.67138671875, "learning_rate": 8.050172667254161e-06, "loss": 18.6345, "step": 181020 }, { "epoch": 0.36569205347511485, "grad_norm": 587.245361328125, "learning_rate": 8.04989606882814e-06, "loss": 12.8158, "step": 181030 }, { "epoch": 0.3657122541077987, "grad_norm": 69.31981658935547, "learning_rate": 8.049619455537296e-06, "loss": 15.0433, "step": 181040 }, { "epoch": 0.3657324547404825, "grad_norm": 149.47796630859375, "learning_rate": 8.049342827382978e-06, "loss": 17.1523, "step": 181050 }, { "epoch": 0.3657526553731663, "grad_norm": 233.05543518066406, "learning_rate": 8.049066184366532e-06, "loss": 39.2642, "step": 181060 }, { "epoch": 0.3657728560058501, "grad_norm": 609.6397094726562, "learning_rate": 8.048789526489305e-06, "loss": 21.6553, "step": 181070 }, { "epoch": 0.3657930566385339, "grad_norm": 234.69485473632812, "learning_rate": 8.04851285375265e-06, "loss": 10.3634, "step": 181080 }, { "epoch": 0.3658132572712177, "grad_norm": 1949.347900390625, "learning_rate": 8.048236166157912e-06, "loss": 23.8406, "step": 181090 }, { "epoch": 0.36583345790390154, "grad_norm": 913.1204833984375, "learning_rate": 8.047959463706441e-06, "loss": 32.6902, "step": 181100 }, { "epoch": 0.36585365853658536, "grad_norm": 311.9753723144531, "learning_rate": 8.047682746399585e-06, "loss": 17.295, "step": 181110 }, { "epoch": 0.3658738591692692, "grad_norm": 548.8505249023438, "learning_rate": 8.047406014238695e-06, "loss": 20.3511, "step": 181120 }, { "epoch": 0.365894059801953, "grad_norm": 177.647705078125, "learning_rate": 8.047129267225116e-06, "loss": 19.28, "step": 181130 }, { "epoch": 0.3659142604346368, "grad_norm": 131.7987518310547, "learning_rate": 8.046852505360196e-06, "loss": 23.0578, "step": 181140 }, { "epoch": 0.36593446106732064, "grad_norm": 0.0, "learning_rate": 8.04657572864529e-06, "loss": 22.0129, "step": 181150 }, { "epoch": 0.36595466170000446, "grad_norm": 601.8718872070312, "learning_rate": 8.046298937081742e-06, "loss": 15.4517, "step": 181160 }, { "epoch": 0.3659748623326883, "grad_norm": 386.86602783203125, "learning_rate": 8.046022130670903e-06, "loss": 19.1586, "step": 181170 }, { "epoch": 0.3659950629653721, "grad_norm": 537.0209350585938, "learning_rate": 8.045745309414122e-06, "loss": 25.2806, "step": 181180 }, { "epoch": 0.3660152635980559, "grad_norm": 49.718807220458984, "learning_rate": 8.045468473312748e-06, "loss": 8.4768, "step": 181190 }, { "epoch": 0.3660354642307397, "grad_norm": 17.38374137878418, "learning_rate": 8.045191622368128e-06, "loss": 17.5699, "step": 181200 }, { "epoch": 0.3660556648634235, "grad_norm": 759.65380859375, "learning_rate": 8.044914756581614e-06, "loss": 32.4582, "step": 181210 }, { "epoch": 0.3660758654961073, "grad_norm": 363.1199035644531, "learning_rate": 8.044637875954556e-06, "loss": 13.3467, "step": 181220 }, { "epoch": 0.36609606612879114, "grad_norm": 676.3450317382812, "learning_rate": 8.044360980488302e-06, "loss": 16.8875, "step": 181230 }, { "epoch": 0.36611626676147496, "grad_norm": 570.9820556640625, "learning_rate": 8.044084070184202e-06, "loss": 24.6783, "step": 181240 }, { "epoch": 0.3661364673941588, "grad_norm": 388.772216796875, "learning_rate": 8.043807145043604e-06, "loss": 16.8156, "step": 181250 }, { "epoch": 0.3661566680268426, "grad_norm": 172.58050537109375, "learning_rate": 8.04353020506786e-06, "loss": 25.0926, "step": 181260 }, { "epoch": 0.3661768686595264, "grad_norm": 1798.0380859375, "learning_rate": 8.04325325025832e-06, "loss": 36.0175, "step": 181270 }, { "epoch": 0.36619706929221024, "grad_norm": 444.1122741699219, "learning_rate": 8.04297628061633e-06, "loss": 20.0588, "step": 181280 }, { "epoch": 0.36621726992489406, "grad_norm": 432.8059387207031, "learning_rate": 8.042699296143244e-06, "loss": 28.8735, "step": 181290 }, { "epoch": 0.3662374705575779, "grad_norm": 179.3892059326172, "learning_rate": 8.04242229684041e-06, "loss": 28.0737, "step": 181300 }, { "epoch": 0.3662576711902617, "grad_norm": 382.18927001953125, "learning_rate": 8.042145282709181e-06, "loss": 11.817, "step": 181310 }, { "epoch": 0.3662778718229455, "grad_norm": 231.79049682617188, "learning_rate": 8.041868253750904e-06, "loss": 14.6898, "step": 181320 }, { "epoch": 0.3662980724556293, "grad_norm": 265.1534729003906, "learning_rate": 8.04159120996693e-06, "loss": 19.368, "step": 181330 }, { "epoch": 0.3663182730883131, "grad_norm": 338.3877258300781, "learning_rate": 8.04131415135861e-06, "loss": 20.6414, "step": 181340 }, { "epoch": 0.3663384737209969, "grad_norm": 543.4014282226562, "learning_rate": 8.041037077927291e-06, "loss": 23.0925, "step": 181350 }, { "epoch": 0.36635867435368075, "grad_norm": 155.44252014160156, "learning_rate": 8.040759989674328e-06, "loss": 27.2844, "step": 181360 }, { "epoch": 0.36637887498636457, "grad_norm": 349.7950744628906, "learning_rate": 8.04048288660107e-06, "loss": 30.5285, "step": 181370 }, { "epoch": 0.3663990756190484, "grad_norm": 289.8055114746094, "learning_rate": 8.040205768708866e-06, "loss": 14.5676, "step": 181380 }, { "epoch": 0.3664192762517322, "grad_norm": 568.9749145507812, "learning_rate": 8.03992863599907e-06, "loss": 22.2054, "step": 181390 }, { "epoch": 0.366439476884416, "grad_norm": 610.5119018554688, "learning_rate": 8.039651488473028e-06, "loss": 22.5269, "step": 181400 }, { "epoch": 0.36645967751709985, "grad_norm": 58.552066802978516, "learning_rate": 8.039374326132095e-06, "loss": 19.4534, "step": 181410 }, { "epoch": 0.36647987814978367, "grad_norm": 413.19134521484375, "learning_rate": 8.03909714897762e-06, "loss": 44.6754, "step": 181420 }, { "epoch": 0.3665000787824675, "grad_norm": 345.2781677246094, "learning_rate": 8.038819957010953e-06, "loss": 25.9225, "step": 181430 }, { "epoch": 0.3665202794151513, "grad_norm": 76.79401397705078, "learning_rate": 8.038542750233445e-06, "loss": 21.2028, "step": 181440 }, { "epoch": 0.36654048004783507, "grad_norm": 467.41796875, "learning_rate": 8.03826552864645e-06, "loss": 28.2208, "step": 181450 }, { "epoch": 0.3665606806805189, "grad_norm": 118.24577331542969, "learning_rate": 8.037988292251317e-06, "loss": 29.4202, "step": 181460 }, { "epoch": 0.3665808813132027, "grad_norm": 422.4919128417969, "learning_rate": 8.037711041049398e-06, "loss": 16.9169, "step": 181470 }, { "epoch": 0.36660108194588653, "grad_norm": 159.02706909179688, "learning_rate": 8.037433775042044e-06, "loss": 21.7535, "step": 181480 }, { "epoch": 0.36662128257857035, "grad_norm": 401.123779296875, "learning_rate": 8.037156494230605e-06, "loss": 22.9303, "step": 181490 }, { "epoch": 0.36664148321125417, "grad_norm": 278.9327087402344, "learning_rate": 8.036879198616434e-06, "loss": 35.944, "step": 181500 }, { "epoch": 0.366661683843938, "grad_norm": 426.00067138671875, "learning_rate": 8.036601888200883e-06, "loss": 25.5249, "step": 181510 }, { "epoch": 0.3666818844766218, "grad_norm": 1824.5927734375, "learning_rate": 8.036324562985302e-06, "loss": 42.9552, "step": 181520 }, { "epoch": 0.36670208510930563, "grad_norm": 556.7666625976562, "learning_rate": 8.036047222971043e-06, "loss": 27.7041, "step": 181530 }, { "epoch": 0.36672228574198945, "grad_norm": 337.087890625, "learning_rate": 8.035769868159457e-06, "loss": 14.2904, "step": 181540 }, { "epoch": 0.36674248637467327, "grad_norm": 480.3008728027344, "learning_rate": 8.0354924985519e-06, "loss": 21.9382, "step": 181550 }, { "epoch": 0.3667626870073571, "grad_norm": 701.7417602539062, "learning_rate": 8.035215114149719e-06, "loss": 14.3989, "step": 181560 }, { "epoch": 0.3667828876400409, "grad_norm": 478.9831237792969, "learning_rate": 8.034937714954267e-06, "loss": 12.8167, "step": 181570 }, { "epoch": 0.3668030882727247, "grad_norm": 2242.091796875, "learning_rate": 8.034660300966898e-06, "loss": 24.1358, "step": 181580 }, { "epoch": 0.3668232889054085, "grad_norm": 383.171875, "learning_rate": 8.034382872188961e-06, "loss": 27.349, "step": 181590 }, { "epoch": 0.3668434895380923, "grad_norm": 59.53066635131836, "learning_rate": 8.034105428621812e-06, "loss": 10.8993, "step": 181600 }, { "epoch": 0.36686369017077614, "grad_norm": 516.87939453125, "learning_rate": 8.0338279702668e-06, "loss": 19.6811, "step": 181610 }, { "epoch": 0.36688389080345996, "grad_norm": 535.0899658203125, "learning_rate": 8.033550497125277e-06, "loss": 14.4022, "step": 181620 }, { "epoch": 0.3669040914361438, "grad_norm": 249.20155334472656, "learning_rate": 8.0332730091986e-06, "loss": 17.234, "step": 181630 }, { "epoch": 0.3669242920688276, "grad_norm": 345.7061767578125, "learning_rate": 8.032995506488117e-06, "loss": 17.3267, "step": 181640 }, { "epoch": 0.3669444927015114, "grad_norm": 341.51251220703125, "learning_rate": 8.03271798899518e-06, "loss": 13.1889, "step": 181650 }, { "epoch": 0.36696469333419524, "grad_norm": 622.7710571289062, "learning_rate": 8.032440456721145e-06, "loss": 23.3273, "step": 181660 }, { "epoch": 0.36698489396687906, "grad_norm": 1223.9803466796875, "learning_rate": 8.032162909667363e-06, "loss": 26.7826, "step": 181670 }, { "epoch": 0.3670050945995629, "grad_norm": 697.6757202148438, "learning_rate": 8.031885347835187e-06, "loss": 26.0568, "step": 181680 }, { "epoch": 0.3670252952322467, "grad_norm": 227.46725463867188, "learning_rate": 8.031607771225969e-06, "loss": 20.1208, "step": 181690 }, { "epoch": 0.3670454958649305, "grad_norm": 624.7969970703125, "learning_rate": 8.031330179841062e-06, "loss": 22.6029, "step": 181700 }, { "epoch": 0.3670656964976143, "grad_norm": 474.216552734375, "learning_rate": 8.031052573681819e-06, "loss": 18.4208, "step": 181710 }, { "epoch": 0.3670858971302981, "grad_norm": 419.1664733886719, "learning_rate": 8.030774952749596e-06, "loss": 25.571, "step": 181720 }, { "epoch": 0.3671060977629819, "grad_norm": 605.2910766601562, "learning_rate": 8.03049731704574e-06, "loss": 26.0323, "step": 181730 }, { "epoch": 0.36712629839566574, "grad_norm": 268.3743896484375, "learning_rate": 8.03021966657161e-06, "loss": 20.9834, "step": 181740 }, { "epoch": 0.36714649902834956, "grad_norm": 135.5460662841797, "learning_rate": 8.029942001328555e-06, "loss": 28.5735, "step": 181750 }, { "epoch": 0.3671666996610334, "grad_norm": 386.54852294921875, "learning_rate": 8.029664321317932e-06, "loss": 12.4357, "step": 181760 }, { "epoch": 0.3671869002937172, "grad_norm": 591.2630004882812, "learning_rate": 8.029386626541092e-06, "loss": 14.311, "step": 181770 }, { "epoch": 0.367207100926401, "grad_norm": 236.5461883544922, "learning_rate": 8.02910891699939e-06, "loss": 20.2664, "step": 181780 }, { "epoch": 0.36722730155908484, "grad_norm": 310.11029052734375, "learning_rate": 8.028831192694176e-06, "loss": 6.558, "step": 181790 }, { "epoch": 0.36724750219176866, "grad_norm": 530.2633666992188, "learning_rate": 8.028553453626809e-06, "loss": 11.7515, "step": 181800 }, { "epoch": 0.3672677028244525, "grad_norm": 660.1512451171875, "learning_rate": 8.028275699798638e-06, "loss": 30.4409, "step": 181810 }, { "epoch": 0.3672879034571363, "grad_norm": 528.1337280273438, "learning_rate": 8.027997931211017e-06, "loss": 24.7369, "step": 181820 }, { "epoch": 0.3673081040898201, "grad_norm": 185.4175567626953, "learning_rate": 8.027720147865304e-06, "loss": 12.895, "step": 181830 }, { "epoch": 0.3673283047225039, "grad_norm": 333.89111328125, "learning_rate": 8.02744234976285e-06, "loss": 17.903, "step": 181840 }, { "epoch": 0.3673485053551877, "grad_norm": 6.933417320251465, "learning_rate": 8.027164536905008e-06, "loss": 20.3719, "step": 181850 }, { "epoch": 0.3673687059878715, "grad_norm": 941.4908447265625, "learning_rate": 8.026886709293133e-06, "loss": 31.5246, "step": 181860 }, { "epoch": 0.36738890662055534, "grad_norm": 345.4415283203125, "learning_rate": 8.02660886692858e-06, "loss": 12.1413, "step": 181870 }, { "epoch": 0.36740910725323916, "grad_norm": 1225.314453125, "learning_rate": 8.026331009812703e-06, "loss": 26.2431, "step": 181880 }, { "epoch": 0.367429307885923, "grad_norm": 773.1281127929688, "learning_rate": 8.026053137946855e-06, "loss": 30.959, "step": 181890 }, { "epoch": 0.3674495085186068, "grad_norm": 127.11233520507812, "learning_rate": 8.02577525133239e-06, "loss": 24.5994, "step": 181900 }, { "epoch": 0.3674697091512906, "grad_norm": 442.1996765136719, "learning_rate": 8.025497349970666e-06, "loss": 18.6258, "step": 181910 }, { "epoch": 0.36748990978397444, "grad_norm": 191.32305908203125, "learning_rate": 8.025219433863035e-06, "loss": 16.1111, "step": 181920 }, { "epoch": 0.36751011041665826, "grad_norm": 265.1795349121094, "learning_rate": 8.024941503010848e-06, "loss": 15.1225, "step": 181930 }, { "epoch": 0.3675303110493421, "grad_norm": 273.599365234375, "learning_rate": 8.024663557415466e-06, "loss": 23.978, "step": 181940 }, { "epoch": 0.3675505116820259, "grad_norm": 696.9441528320312, "learning_rate": 8.024385597078239e-06, "loss": 21.3629, "step": 181950 }, { "epoch": 0.3675707123147097, "grad_norm": 429.6669921875, "learning_rate": 8.024107622000524e-06, "loss": 14.9189, "step": 181960 }, { "epoch": 0.3675909129473935, "grad_norm": 283.3707580566406, "learning_rate": 8.023829632183676e-06, "loss": 30.5343, "step": 181970 }, { "epoch": 0.3676111135800773, "grad_norm": 380.0705871582031, "learning_rate": 8.023551627629047e-06, "loss": 22.6781, "step": 181980 }, { "epoch": 0.36763131421276113, "grad_norm": 620.158203125, "learning_rate": 8.023273608337997e-06, "loss": 17.7174, "step": 181990 }, { "epoch": 0.36765151484544495, "grad_norm": 26.114177703857422, "learning_rate": 8.022995574311876e-06, "loss": 15.6572, "step": 182000 }, { "epoch": 0.36767171547812877, "grad_norm": 745.3638305664062, "learning_rate": 8.022717525552041e-06, "loss": 22.326, "step": 182010 }, { "epoch": 0.3676919161108126, "grad_norm": 901.6583862304688, "learning_rate": 8.022439462059849e-06, "loss": 24.4014, "step": 182020 }, { "epoch": 0.3677121167434964, "grad_norm": 221.56088256835938, "learning_rate": 8.022161383836652e-06, "loss": 22.557, "step": 182030 }, { "epoch": 0.36773231737618023, "grad_norm": 532.9639892578125, "learning_rate": 8.021883290883808e-06, "loss": 24.7744, "step": 182040 }, { "epoch": 0.36775251800886405, "grad_norm": 524.7433471679688, "learning_rate": 8.021605183202669e-06, "loss": 28.2642, "step": 182050 }, { "epoch": 0.36777271864154787, "grad_norm": 306.8576965332031, "learning_rate": 8.021327060794597e-06, "loss": 12.5226, "step": 182060 }, { "epoch": 0.3677929192742317, "grad_norm": 289.05828857421875, "learning_rate": 8.02104892366094e-06, "loss": 36.4176, "step": 182070 }, { "epoch": 0.3678131199069155, "grad_norm": 103.75401306152344, "learning_rate": 8.02077077180306e-06, "loss": 31.7933, "step": 182080 }, { "epoch": 0.3678333205395993, "grad_norm": 198.9027557373047, "learning_rate": 8.020492605222307e-06, "loss": 26.2181, "step": 182090 }, { "epoch": 0.3678535211722831, "grad_norm": 159.69471740722656, "learning_rate": 8.020214423920039e-06, "loss": 42.431, "step": 182100 }, { "epoch": 0.3678737218049669, "grad_norm": 133.8673553466797, "learning_rate": 8.019936227897614e-06, "loss": 29.5133, "step": 182110 }, { "epoch": 0.36789392243765073, "grad_norm": 344.2096252441406, "learning_rate": 8.019658017156384e-06, "loss": 30.8522, "step": 182120 }, { "epoch": 0.36791412307033455, "grad_norm": 394.213623046875, "learning_rate": 8.01937979169771e-06, "loss": 23.3407, "step": 182130 }, { "epoch": 0.3679343237030184, "grad_norm": 174.77699279785156, "learning_rate": 8.019101551522942e-06, "loss": 13.8389, "step": 182140 }, { "epoch": 0.3679545243357022, "grad_norm": 699.2435913085938, "learning_rate": 8.018823296633442e-06, "loss": 20.0749, "step": 182150 }, { "epoch": 0.367974724968386, "grad_norm": 355.6721496582031, "learning_rate": 8.018545027030564e-06, "loss": 18.3917, "step": 182160 }, { "epoch": 0.36799492560106983, "grad_norm": 820.6582641601562, "learning_rate": 8.01826674271566e-06, "loss": 16.2338, "step": 182170 }, { "epoch": 0.36801512623375365, "grad_norm": 326.598388671875, "learning_rate": 8.017988443690092e-06, "loss": 33.0122, "step": 182180 }, { "epoch": 0.3680353268664375, "grad_norm": 461.7607727050781, "learning_rate": 8.017710129955215e-06, "loss": 12.0651, "step": 182190 }, { "epoch": 0.3680555274991213, "grad_norm": 504.27825927734375, "learning_rate": 8.017431801512384e-06, "loss": 14.9889, "step": 182200 }, { "epoch": 0.3680757281318051, "grad_norm": 12.012791633605957, "learning_rate": 8.017153458362957e-06, "loss": 23.0942, "step": 182210 }, { "epoch": 0.3680959287644889, "grad_norm": 171.19497680664062, "learning_rate": 8.016875100508289e-06, "loss": 20.3272, "step": 182220 }, { "epoch": 0.3681161293971727, "grad_norm": 657.1697998046875, "learning_rate": 8.016596727949737e-06, "loss": 40.3708, "step": 182230 }, { "epoch": 0.3681363300298565, "grad_norm": 25.94593620300293, "learning_rate": 8.01631834068866e-06, "loss": 40.1797, "step": 182240 }, { "epoch": 0.36815653066254034, "grad_norm": 441.80059814453125, "learning_rate": 8.016039938726413e-06, "loss": 12.376, "step": 182250 }, { "epoch": 0.36817673129522416, "grad_norm": 265.05938720703125, "learning_rate": 8.015761522064353e-06, "loss": 28.1768, "step": 182260 }, { "epoch": 0.368196931927908, "grad_norm": 165.88687133789062, "learning_rate": 8.015483090703837e-06, "loss": 19.9978, "step": 182270 }, { "epoch": 0.3682171325605918, "grad_norm": 318.32977294921875, "learning_rate": 8.015204644646222e-06, "loss": 28.3211, "step": 182280 }, { "epoch": 0.3682373331932756, "grad_norm": 230.91534423828125, "learning_rate": 8.014926183892867e-06, "loss": 5.3446, "step": 182290 }, { "epoch": 0.36825753382595944, "grad_norm": 254.33131408691406, "learning_rate": 8.014647708445124e-06, "loss": 26.3181, "step": 182300 }, { "epoch": 0.36827773445864326, "grad_norm": 780.2587890625, "learning_rate": 8.014369218304356e-06, "loss": 14.7973, "step": 182310 }, { "epoch": 0.3682979350913271, "grad_norm": 58.186317443847656, "learning_rate": 8.014090713471917e-06, "loss": 21.2951, "step": 182320 }, { "epoch": 0.3683181357240109, "grad_norm": 1826.95654296875, "learning_rate": 8.013812193949166e-06, "loss": 29.8689, "step": 182330 }, { "epoch": 0.3683383363566947, "grad_norm": 210.12147521972656, "learning_rate": 8.01353365973746e-06, "loss": 24.4536, "step": 182340 }, { "epoch": 0.3683585369893785, "grad_norm": 297.1112976074219, "learning_rate": 8.013255110838156e-06, "loss": 33.6899, "step": 182350 }, { "epoch": 0.3683787376220623, "grad_norm": 196.2924346923828, "learning_rate": 8.012976547252614e-06, "loss": 25.512, "step": 182360 }, { "epoch": 0.3683989382547461, "grad_norm": 334.15185546875, "learning_rate": 8.012697968982187e-06, "loss": 13.9515, "step": 182370 }, { "epoch": 0.36841913888742994, "grad_norm": 336.0948791503906, "learning_rate": 8.012419376028237e-06, "loss": 18.3184, "step": 182380 }, { "epoch": 0.36843933952011376, "grad_norm": 238.00503540039062, "learning_rate": 8.01214076839212e-06, "loss": 24.894, "step": 182390 }, { "epoch": 0.3684595401527976, "grad_norm": 240.62161254882812, "learning_rate": 8.011862146075194e-06, "loss": 13.059, "step": 182400 }, { "epoch": 0.3684797407854814, "grad_norm": 417.91510009765625, "learning_rate": 8.011583509078817e-06, "loss": 17.8153, "step": 182410 }, { "epoch": 0.3684999414181652, "grad_norm": 242.91551208496094, "learning_rate": 8.011304857404347e-06, "loss": 9.9211, "step": 182420 }, { "epoch": 0.36852014205084904, "grad_norm": 291.0343017578125, "learning_rate": 8.011026191053144e-06, "loss": 14.2289, "step": 182430 }, { "epoch": 0.36854034268353286, "grad_norm": 649.30908203125, "learning_rate": 8.010747510026564e-06, "loss": 27.8836, "step": 182440 }, { "epoch": 0.3685605433162167, "grad_norm": 524.3955078125, "learning_rate": 8.010468814325964e-06, "loss": 32.7936, "step": 182450 }, { "epoch": 0.3685807439489005, "grad_norm": 389.1418151855469, "learning_rate": 8.010190103952706e-06, "loss": 23.8763, "step": 182460 }, { "epoch": 0.3686009445815843, "grad_norm": 329.4264831542969, "learning_rate": 8.009911378908147e-06, "loss": 22.2178, "step": 182470 }, { "epoch": 0.3686211452142681, "grad_norm": 319.6559753417969, "learning_rate": 8.009632639193643e-06, "loss": 22.7205, "step": 182480 }, { "epoch": 0.3686413458469519, "grad_norm": 75.72455596923828, "learning_rate": 8.009353884810555e-06, "loss": 36.0206, "step": 182490 }, { "epoch": 0.3686615464796357, "grad_norm": 481.7266845703125, "learning_rate": 8.009075115760243e-06, "loss": 23.0295, "step": 182500 }, { "epoch": 0.36868174711231955, "grad_norm": 350.93804931640625, "learning_rate": 8.008796332044062e-06, "loss": 27.9491, "step": 182510 }, { "epoch": 0.36870194774500337, "grad_norm": 80.4354476928711, "learning_rate": 8.008517533663372e-06, "loss": 20.2024, "step": 182520 }, { "epoch": 0.3687221483776872, "grad_norm": 594.6436157226562, "learning_rate": 8.008238720619534e-06, "loss": 15.693, "step": 182530 }, { "epoch": 0.368742349010371, "grad_norm": 584.0489501953125, "learning_rate": 8.007959892913906e-06, "loss": 38.8679, "step": 182540 }, { "epoch": 0.3687625496430548, "grad_norm": 404.3018493652344, "learning_rate": 8.007681050547844e-06, "loss": 22.9341, "step": 182550 }, { "epoch": 0.36878275027573865, "grad_norm": 431.6450500488281, "learning_rate": 8.007402193522711e-06, "loss": 24.3709, "step": 182560 }, { "epoch": 0.36880295090842247, "grad_norm": 330.33197021484375, "learning_rate": 8.007123321839865e-06, "loss": 30.0981, "step": 182570 }, { "epoch": 0.3688231515411063, "grad_norm": 485.0052795410156, "learning_rate": 8.006844435500663e-06, "loss": 27.0596, "step": 182580 }, { "epoch": 0.3688433521737901, "grad_norm": 253.75457763671875, "learning_rate": 8.006565534506465e-06, "loss": 16.497, "step": 182590 }, { "epoch": 0.36886355280647387, "grad_norm": 260.1213073730469, "learning_rate": 8.006286618858634e-06, "loss": 18.9027, "step": 182600 }, { "epoch": 0.3688837534391577, "grad_norm": 185.5214080810547, "learning_rate": 8.006007688558526e-06, "loss": 18.1766, "step": 182610 }, { "epoch": 0.3689039540718415, "grad_norm": 362.86444091796875, "learning_rate": 8.005728743607499e-06, "loss": 25.2606, "step": 182620 }, { "epoch": 0.36892415470452533, "grad_norm": 444.9572448730469, "learning_rate": 8.005449784006917e-06, "loss": 20.8313, "step": 182630 }, { "epoch": 0.36894435533720915, "grad_norm": 296.40045166015625, "learning_rate": 8.005170809758136e-06, "loss": 15.4983, "step": 182640 }, { "epoch": 0.36896455596989297, "grad_norm": 306.0716552734375, "learning_rate": 8.004891820862516e-06, "loss": 18.7421, "step": 182650 }, { "epoch": 0.3689847566025768, "grad_norm": 522.7783813476562, "learning_rate": 8.004612817321419e-06, "loss": 24.4774, "step": 182660 }, { "epoch": 0.3690049572352606, "grad_norm": 141.30459594726562, "learning_rate": 8.004333799136206e-06, "loss": 12.4619, "step": 182670 }, { "epoch": 0.36902515786794443, "grad_norm": 36.370323181152344, "learning_rate": 8.004054766308232e-06, "loss": 7.8107, "step": 182680 }, { "epoch": 0.36904535850062825, "grad_norm": 982.9057006835938, "learning_rate": 8.003775718838859e-06, "loss": 33.2967, "step": 182690 }, { "epoch": 0.36906555913331207, "grad_norm": 378.8469543457031, "learning_rate": 8.003496656729448e-06, "loss": 23.5365, "step": 182700 }, { "epoch": 0.3690857597659959, "grad_norm": 165.53233337402344, "learning_rate": 8.003217579981358e-06, "loss": 17.2538, "step": 182710 }, { "epoch": 0.3691059603986797, "grad_norm": 318.7522888183594, "learning_rate": 8.002938488595951e-06, "loss": 31.0203, "step": 182720 }, { "epoch": 0.3691261610313635, "grad_norm": 189.60289001464844, "learning_rate": 8.002659382574584e-06, "loss": 17.6615, "step": 182730 }, { "epoch": 0.3691463616640473, "grad_norm": 206.4030303955078, "learning_rate": 8.00238026191862e-06, "loss": 13.3053, "step": 182740 }, { "epoch": 0.3691665622967311, "grad_norm": 985.864013671875, "learning_rate": 8.002101126629422e-06, "loss": 36.234, "step": 182750 }, { "epoch": 0.36918676292941494, "grad_norm": 334.8808288574219, "learning_rate": 8.001821976708344e-06, "loss": 14.8257, "step": 182760 }, { "epoch": 0.36920696356209876, "grad_norm": 606.4573974609375, "learning_rate": 8.001542812156751e-06, "loss": 17.9043, "step": 182770 }, { "epoch": 0.3692271641947826, "grad_norm": 377.3143615722656, "learning_rate": 8.001263632976001e-06, "loss": 49.0505, "step": 182780 }, { "epoch": 0.3692473648274664, "grad_norm": 819.2080078125, "learning_rate": 8.000984439167457e-06, "loss": 28.7495, "step": 182790 }, { "epoch": 0.3692675654601502, "grad_norm": 192.60838317871094, "learning_rate": 8.000705230732478e-06, "loss": 17.4605, "step": 182800 }, { "epoch": 0.36928776609283404, "grad_norm": 375.5916442871094, "learning_rate": 8.000426007672426e-06, "loss": 12.4707, "step": 182810 }, { "epoch": 0.36930796672551786, "grad_norm": 1288.135498046875, "learning_rate": 8.000146769988662e-06, "loss": 29.7924, "step": 182820 }, { "epoch": 0.3693281673582017, "grad_norm": 432.34814453125, "learning_rate": 7.999867517682547e-06, "loss": 15.0319, "step": 182830 }, { "epoch": 0.3693483679908855, "grad_norm": 353.95367431640625, "learning_rate": 7.999588250755442e-06, "loss": 7.9291, "step": 182840 }, { "epoch": 0.3693685686235693, "grad_norm": 593.1589965820312, "learning_rate": 7.999308969208705e-06, "loss": 38.9186, "step": 182850 }, { "epoch": 0.3693887692562531, "grad_norm": 573.551025390625, "learning_rate": 7.999029673043703e-06, "loss": 14.8998, "step": 182860 }, { "epoch": 0.3694089698889369, "grad_norm": 268.9027404785156, "learning_rate": 7.99875036226179e-06, "loss": 21.6615, "step": 182870 }, { "epoch": 0.3694291705216207, "grad_norm": 223.42030334472656, "learning_rate": 7.998471036864336e-06, "loss": 14.4676, "step": 182880 }, { "epoch": 0.36944937115430454, "grad_norm": 374.5646667480469, "learning_rate": 7.998191696852696e-06, "loss": 15.6709, "step": 182890 }, { "epoch": 0.36946957178698836, "grad_norm": 469.9173889160156, "learning_rate": 7.997912342228232e-06, "loss": 46.8033, "step": 182900 }, { "epoch": 0.3694897724196722, "grad_norm": 630.7611694335938, "learning_rate": 7.997632972992308e-06, "loss": 20.4953, "step": 182910 }, { "epoch": 0.369509973052356, "grad_norm": 295.6542663574219, "learning_rate": 7.997353589146284e-06, "loss": 19.6812, "step": 182920 }, { "epoch": 0.3695301736850398, "grad_norm": 312.4400939941406, "learning_rate": 7.997074190691523e-06, "loss": 19.4398, "step": 182930 }, { "epoch": 0.36955037431772364, "grad_norm": 495.8638000488281, "learning_rate": 7.996794777629386e-06, "loss": 19.1701, "step": 182940 }, { "epoch": 0.36957057495040746, "grad_norm": 415.1043395996094, "learning_rate": 7.996515349961233e-06, "loss": 25.876, "step": 182950 }, { "epoch": 0.3695907755830913, "grad_norm": 257.7971496582031, "learning_rate": 7.99623590768843e-06, "loss": 24.0562, "step": 182960 }, { "epoch": 0.3696109762157751, "grad_norm": 269.9927978515625, "learning_rate": 7.995956450812335e-06, "loss": 20.5977, "step": 182970 }, { "epoch": 0.3696311768484589, "grad_norm": 272.4700622558594, "learning_rate": 7.995676979334313e-06, "loss": 16.3563, "step": 182980 }, { "epoch": 0.3696513774811427, "grad_norm": 343.4119873046875, "learning_rate": 7.995397493255723e-06, "loss": 18.765, "step": 182990 }, { "epoch": 0.3696715781138265, "grad_norm": 348.2850646972656, "learning_rate": 7.99511799257793e-06, "loss": 19.3499, "step": 183000 }, { "epoch": 0.3696917787465103, "grad_norm": 963.1719970703125, "learning_rate": 7.994838477302294e-06, "loss": 33.3418, "step": 183010 }, { "epoch": 0.36971197937919414, "grad_norm": 0.0, "learning_rate": 7.99455894743018e-06, "loss": 15.1736, "step": 183020 }, { "epoch": 0.36973218001187796, "grad_norm": 772.2694091796875, "learning_rate": 7.994279402962948e-06, "loss": 31.3075, "step": 183030 }, { "epoch": 0.3697523806445618, "grad_norm": 381.2769775390625, "learning_rate": 7.993999843901963e-06, "loss": 17.7624, "step": 183040 }, { "epoch": 0.3697725812772456, "grad_norm": 168.1398162841797, "learning_rate": 7.993720270248583e-06, "loss": 27.4773, "step": 183050 }, { "epoch": 0.3697927819099294, "grad_norm": 681.6910400390625, "learning_rate": 7.993440682004176e-06, "loss": 18.7589, "step": 183060 }, { "epoch": 0.36981298254261324, "grad_norm": 389.6490783691406, "learning_rate": 7.993161079170101e-06, "loss": 11.7036, "step": 183070 }, { "epoch": 0.36983318317529706, "grad_norm": 721.0125122070312, "learning_rate": 7.992881461747721e-06, "loss": 50.7051, "step": 183080 }, { "epoch": 0.3698533838079809, "grad_norm": 90.48036193847656, "learning_rate": 7.992601829738403e-06, "loss": 31.4077, "step": 183090 }, { "epoch": 0.3698735844406647, "grad_norm": 321.55548095703125, "learning_rate": 7.992322183143504e-06, "loss": 13.8314, "step": 183100 }, { "epoch": 0.3698937850733485, "grad_norm": 6.5746660232543945, "learning_rate": 7.99204252196439e-06, "loss": 11.8207, "step": 183110 }, { "epoch": 0.3699139857060323, "grad_norm": 368.2030334472656, "learning_rate": 7.991762846202423e-06, "loss": 21.1813, "step": 183120 }, { "epoch": 0.3699341863387161, "grad_norm": 415.379638671875, "learning_rate": 7.991483155858968e-06, "loss": 16.6932, "step": 183130 }, { "epoch": 0.36995438697139993, "grad_norm": 889.5968017578125, "learning_rate": 7.991203450935385e-06, "loss": 29.9055, "step": 183140 }, { "epoch": 0.36997458760408375, "grad_norm": 579.6605224609375, "learning_rate": 7.990923731433043e-06, "loss": 18.9249, "step": 183150 }, { "epoch": 0.36999478823676757, "grad_norm": 456.553955078125, "learning_rate": 7.990643997353296e-06, "loss": 30.3161, "step": 183160 }, { "epoch": 0.3700149888694514, "grad_norm": 326.1436767578125, "learning_rate": 7.990364248697517e-06, "loss": 16.7434, "step": 183170 }, { "epoch": 0.3700351895021352, "grad_norm": 422.5854187011719, "learning_rate": 7.990084485467065e-06, "loss": 14.8333, "step": 183180 }, { "epoch": 0.37005539013481903, "grad_norm": 81.252197265625, "learning_rate": 7.989804707663302e-06, "loss": 11.8127, "step": 183190 }, { "epoch": 0.37007559076750285, "grad_norm": 585.47802734375, "learning_rate": 7.989524915287595e-06, "loss": 27.9933, "step": 183200 }, { "epoch": 0.37009579140018667, "grad_norm": 44.53159713745117, "learning_rate": 7.989245108341305e-06, "loss": 11.5468, "step": 183210 }, { "epoch": 0.3701159920328705, "grad_norm": 1733.242919921875, "learning_rate": 7.988965286825798e-06, "loss": 20.0665, "step": 183220 }, { "epoch": 0.3701361926655543, "grad_norm": 17.573762893676758, "learning_rate": 7.988685450742438e-06, "loss": 19.0419, "step": 183230 }, { "epoch": 0.3701563932982381, "grad_norm": 299.3620300292969, "learning_rate": 7.988405600092585e-06, "loss": 19.412, "step": 183240 }, { "epoch": 0.3701765939309219, "grad_norm": 182.89193725585938, "learning_rate": 7.988125734877607e-06, "loss": 19.0907, "step": 183250 }, { "epoch": 0.3701967945636057, "grad_norm": 687.41455078125, "learning_rate": 7.987845855098864e-06, "loss": 19.0766, "step": 183260 }, { "epoch": 0.37021699519628953, "grad_norm": 303.67315673828125, "learning_rate": 7.987565960757726e-06, "loss": 26.404, "step": 183270 }, { "epoch": 0.37023719582897335, "grad_norm": 322.6246337890625, "learning_rate": 7.987286051855552e-06, "loss": 17.0765, "step": 183280 }, { "epoch": 0.3702573964616572, "grad_norm": 255.3560791015625, "learning_rate": 7.98700612839371e-06, "loss": 12.1922, "step": 183290 }, { "epoch": 0.370277597094341, "grad_norm": 520.8690185546875, "learning_rate": 7.986726190373562e-06, "loss": 13.4479, "step": 183300 }, { "epoch": 0.3702977977270248, "grad_norm": 293.56982421875, "learning_rate": 7.986446237796471e-06, "loss": 18.1435, "step": 183310 }, { "epoch": 0.37031799835970863, "grad_norm": 238.17291259765625, "learning_rate": 7.986166270663805e-06, "loss": 25.508, "step": 183320 }, { "epoch": 0.37033819899239245, "grad_norm": 571.9578247070312, "learning_rate": 7.985886288976926e-06, "loss": 18.8088, "step": 183330 }, { "epoch": 0.3703583996250763, "grad_norm": 94.60584259033203, "learning_rate": 7.985606292737199e-06, "loss": 15.2967, "step": 183340 }, { "epoch": 0.3703786002577601, "grad_norm": 14.213637351989746, "learning_rate": 7.985326281945988e-06, "loss": 14.4738, "step": 183350 }, { "epoch": 0.3703988008904439, "grad_norm": 331.3981018066406, "learning_rate": 7.98504625660466e-06, "loss": 15.4799, "step": 183360 }, { "epoch": 0.3704190015231277, "grad_norm": 543.9615478515625, "learning_rate": 7.98476621671458e-06, "loss": 14.1034, "step": 183370 }, { "epoch": 0.3704392021558115, "grad_norm": 718.8535766601562, "learning_rate": 7.98448616227711e-06, "loss": 23.0639, "step": 183380 }, { "epoch": 0.3704594027884953, "grad_norm": 515.5036010742188, "learning_rate": 7.984206093293617e-06, "loss": 13.5569, "step": 183390 }, { "epoch": 0.37047960342117914, "grad_norm": 317.13751220703125, "learning_rate": 7.983926009765464e-06, "loss": 28.3156, "step": 183400 }, { "epoch": 0.37049980405386296, "grad_norm": 455.3040466308594, "learning_rate": 7.983645911694018e-06, "loss": 25.6995, "step": 183410 }, { "epoch": 0.3705200046865468, "grad_norm": 321.3277587890625, "learning_rate": 7.983365799080645e-06, "loss": 19.8978, "step": 183420 }, { "epoch": 0.3705402053192306, "grad_norm": 262.7257385253906, "learning_rate": 7.983085671926707e-06, "loss": 16.4916, "step": 183430 }, { "epoch": 0.3705604059519144, "grad_norm": 149.5018768310547, "learning_rate": 7.982805530233573e-06, "loss": 32.7754, "step": 183440 }, { "epoch": 0.37058060658459824, "grad_norm": 77.14970397949219, "learning_rate": 7.982525374002607e-06, "loss": 26.9118, "step": 183450 }, { "epoch": 0.37060080721728206, "grad_norm": 15.213008880615234, "learning_rate": 7.982245203235172e-06, "loss": 21.7054, "step": 183460 }, { "epoch": 0.3706210078499659, "grad_norm": 399.4362487792969, "learning_rate": 7.981965017932638e-06, "loss": 18.1264, "step": 183470 }, { "epoch": 0.3706412084826497, "grad_norm": 205.84544372558594, "learning_rate": 7.981684818096367e-06, "loss": 23.8543, "step": 183480 }, { "epoch": 0.3706614091153335, "grad_norm": 661.1862182617188, "learning_rate": 7.981404603727726e-06, "loss": 21.7212, "step": 183490 }, { "epoch": 0.3706816097480173, "grad_norm": 284.22589111328125, "learning_rate": 7.981124374828079e-06, "loss": 27.9908, "step": 183500 }, { "epoch": 0.3707018103807011, "grad_norm": 636.8121337890625, "learning_rate": 7.980844131398795e-06, "loss": 21.8741, "step": 183510 }, { "epoch": 0.3707220110133849, "grad_norm": 235.91986083984375, "learning_rate": 7.980563873441239e-06, "loss": 18.6829, "step": 183520 }, { "epoch": 0.37074221164606874, "grad_norm": 34.92259979248047, "learning_rate": 7.980283600956775e-06, "loss": 21.6579, "step": 183530 }, { "epoch": 0.37076241227875256, "grad_norm": 531.4364624023438, "learning_rate": 7.98000331394677e-06, "loss": 16.9734, "step": 183540 }, { "epoch": 0.3707826129114364, "grad_norm": 314.3226013183594, "learning_rate": 7.97972301241259e-06, "loss": 20.7448, "step": 183550 }, { "epoch": 0.3708028135441202, "grad_norm": 459.74737548828125, "learning_rate": 7.979442696355601e-06, "loss": 14.8336, "step": 183560 }, { "epoch": 0.370823014176804, "grad_norm": 701.6031494140625, "learning_rate": 7.979162365777173e-06, "loss": 26.2635, "step": 183570 }, { "epoch": 0.37084321480948784, "grad_norm": 99.23480987548828, "learning_rate": 7.978882020678666e-06, "loss": 14.496, "step": 183580 }, { "epoch": 0.37086341544217166, "grad_norm": 511.1658630371094, "learning_rate": 7.978601661061449e-06, "loss": 26.4086, "step": 183590 }, { "epoch": 0.3708836160748555, "grad_norm": 363.9020080566406, "learning_rate": 7.978321286926892e-06, "loss": 16.8137, "step": 183600 }, { "epoch": 0.3709038167075393, "grad_norm": 549.7080078125, "learning_rate": 7.978040898276353e-06, "loss": 39.1137, "step": 183610 }, { "epoch": 0.3709240173402231, "grad_norm": 875.7857666015625, "learning_rate": 7.977760495111209e-06, "loss": 29.2739, "step": 183620 }, { "epoch": 0.3709442179729069, "grad_norm": 273.4394836425781, "learning_rate": 7.97748007743282e-06, "loss": 11.0776, "step": 183630 }, { "epoch": 0.3709644186055907, "grad_norm": 290.8146667480469, "learning_rate": 7.977199645242553e-06, "loss": 22.1111, "step": 183640 }, { "epoch": 0.3709846192382745, "grad_norm": 750.2342529296875, "learning_rate": 7.976919198541775e-06, "loss": 38.7514, "step": 183650 }, { "epoch": 0.37100481987095835, "grad_norm": 199.48460388183594, "learning_rate": 7.976638737331855e-06, "loss": 21.5552, "step": 183660 }, { "epoch": 0.37102502050364217, "grad_norm": 3.6414854526519775, "learning_rate": 7.97635826161416e-06, "loss": 12.262, "step": 183670 }, { "epoch": 0.371045221136326, "grad_norm": 324.2091979980469, "learning_rate": 7.976077771390056e-06, "loss": 18.6805, "step": 183680 }, { "epoch": 0.3710654217690098, "grad_norm": 409.9293518066406, "learning_rate": 7.975797266660908e-06, "loss": 13.9024, "step": 183690 }, { "epoch": 0.3710856224016936, "grad_norm": 556.5054321289062, "learning_rate": 7.975516747428087e-06, "loss": 19.1498, "step": 183700 }, { "epoch": 0.37110582303437745, "grad_norm": 1997.013427734375, "learning_rate": 7.975236213692956e-06, "loss": 11.7474, "step": 183710 }, { "epoch": 0.37112602366706127, "grad_norm": 1051.9697265625, "learning_rate": 7.974955665456887e-06, "loss": 15.8293, "step": 183720 }, { "epoch": 0.3711462242997451, "grad_norm": 827.4751586914062, "learning_rate": 7.974675102721244e-06, "loss": 26.5937, "step": 183730 }, { "epoch": 0.3711664249324289, "grad_norm": 525.66357421875, "learning_rate": 7.974394525487395e-06, "loss": 19.3954, "step": 183740 }, { "epoch": 0.3711866255651127, "grad_norm": 850.7464599609375, "learning_rate": 7.974113933756708e-06, "loss": 14.5438, "step": 183750 }, { "epoch": 0.3712068261977965, "grad_norm": 945.1376953125, "learning_rate": 7.97383332753055e-06, "loss": 23.5675, "step": 183760 }, { "epoch": 0.3712270268304803, "grad_norm": 308.317626953125, "learning_rate": 7.973552706810288e-06, "loss": 18.4932, "step": 183770 }, { "epoch": 0.37124722746316413, "grad_norm": 316.6867980957031, "learning_rate": 7.973272071597293e-06, "loss": 24.5264, "step": 183780 }, { "epoch": 0.37126742809584795, "grad_norm": 395.24273681640625, "learning_rate": 7.97299142189293e-06, "loss": 9.765, "step": 183790 }, { "epoch": 0.37128762872853177, "grad_norm": 195.8678436279297, "learning_rate": 7.972710757698567e-06, "loss": 15.1407, "step": 183800 }, { "epoch": 0.3713078293612156, "grad_norm": 1529.356689453125, "learning_rate": 7.972430079015572e-06, "loss": 31.2506, "step": 183810 }, { "epoch": 0.3713280299938994, "grad_norm": 325.48455810546875, "learning_rate": 7.972149385845314e-06, "loss": 18.5395, "step": 183820 }, { "epoch": 0.37134823062658323, "grad_norm": 593.8530883789062, "learning_rate": 7.97186867818916e-06, "loss": 9.7227, "step": 183830 }, { "epoch": 0.37136843125926705, "grad_norm": 644.62451171875, "learning_rate": 7.971587956048479e-06, "loss": 17.8562, "step": 183840 }, { "epoch": 0.37138863189195087, "grad_norm": 591.8629760742188, "learning_rate": 7.971307219424637e-06, "loss": 18.2705, "step": 183850 }, { "epoch": 0.3714088325246347, "grad_norm": 545.7822875976562, "learning_rate": 7.971026468319006e-06, "loss": 24.0909, "step": 183860 }, { "epoch": 0.3714290331573185, "grad_norm": 1182.5684814453125, "learning_rate": 7.970745702732951e-06, "loss": 27.9939, "step": 183870 }, { "epoch": 0.3714492337900023, "grad_norm": 427.9289855957031, "learning_rate": 7.970464922667842e-06, "loss": 15.6524, "step": 183880 }, { "epoch": 0.3714694344226861, "grad_norm": 50.75448226928711, "learning_rate": 7.97018412812505e-06, "loss": 23.8423, "step": 183890 }, { "epoch": 0.3714896350553699, "grad_norm": 464.32373046875, "learning_rate": 7.969903319105935e-06, "loss": 19.8493, "step": 183900 }, { "epoch": 0.37150983568805374, "grad_norm": 304.79669189453125, "learning_rate": 7.969622495611877e-06, "loss": 16.9334, "step": 183910 }, { "epoch": 0.37153003632073756, "grad_norm": 206.56349182128906, "learning_rate": 7.969341657644236e-06, "loss": 15.2697, "step": 183920 }, { "epoch": 0.3715502369534214, "grad_norm": 236.22669982910156, "learning_rate": 7.969060805204385e-06, "loss": 33.679, "step": 183930 }, { "epoch": 0.3715704375861052, "grad_norm": 254.5858917236328, "learning_rate": 7.968779938293691e-06, "loss": 21.397, "step": 183940 }, { "epoch": 0.371590638218789, "grad_norm": 440.5389404296875, "learning_rate": 7.968499056913525e-06, "loss": 21.9985, "step": 183950 }, { "epoch": 0.37161083885147284, "grad_norm": 398.2488708496094, "learning_rate": 7.968218161065253e-06, "loss": 41.6348, "step": 183960 }, { "epoch": 0.37163103948415666, "grad_norm": 54.56743240356445, "learning_rate": 7.967937250750248e-06, "loss": 12.1987, "step": 183970 }, { "epoch": 0.3716512401168405, "grad_norm": 234.84764099121094, "learning_rate": 7.967656325969875e-06, "loss": 14.9494, "step": 183980 }, { "epoch": 0.3716714407495243, "grad_norm": 228.147216796875, "learning_rate": 7.967375386725505e-06, "loss": 25.4243, "step": 183990 }, { "epoch": 0.3716916413822081, "grad_norm": 123.51333618164062, "learning_rate": 7.967094433018508e-06, "loss": 13.0959, "step": 184000 }, { "epoch": 0.3717118420148919, "grad_norm": 469.3757019042969, "learning_rate": 7.966813464850252e-06, "loss": 22.9168, "step": 184010 }, { "epoch": 0.3717320426475757, "grad_norm": 250.6117401123047, "learning_rate": 7.966532482222106e-06, "loss": 23.3032, "step": 184020 }, { "epoch": 0.3717522432802595, "grad_norm": 565.9239501953125, "learning_rate": 7.966251485135443e-06, "loss": 17.2938, "step": 184030 }, { "epoch": 0.37177244391294334, "grad_norm": 226.63751220703125, "learning_rate": 7.96597047359163e-06, "loss": 15.2441, "step": 184040 }, { "epoch": 0.37179264454562716, "grad_norm": 234.7274169921875, "learning_rate": 7.965689447592034e-06, "loss": 24.1701, "step": 184050 }, { "epoch": 0.371812845178311, "grad_norm": 191.4552764892578, "learning_rate": 7.96540840713803e-06, "loss": 16.944, "step": 184060 }, { "epoch": 0.3718330458109948, "grad_norm": 364.73040771484375, "learning_rate": 7.965127352230984e-06, "loss": 22.2877, "step": 184070 }, { "epoch": 0.3718532464436786, "grad_norm": 229.94163513183594, "learning_rate": 7.964846282872265e-06, "loss": 21.0565, "step": 184080 }, { "epoch": 0.37187344707636244, "grad_norm": 437.13079833984375, "learning_rate": 7.964565199063247e-06, "loss": 20.9628, "step": 184090 }, { "epoch": 0.37189364770904626, "grad_norm": 303.15606689453125, "learning_rate": 7.964284100805297e-06, "loss": 17.737, "step": 184100 }, { "epoch": 0.3719138483417301, "grad_norm": 599.64990234375, "learning_rate": 7.964002988099785e-06, "loss": 31.0308, "step": 184110 }, { "epoch": 0.3719340489744139, "grad_norm": 447.39349365234375, "learning_rate": 7.963721860948085e-06, "loss": 18.8614, "step": 184120 }, { "epoch": 0.3719542496070977, "grad_norm": 253.20962524414062, "learning_rate": 7.96344071935156e-06, "loss": 23.9809, "step": 184130 }, { "epoch": 0.3719744502397815, "grad_norm": 263.40814208984375, "learning_rate": 7.963159563311587e-06, "loss": 24.8219, "step": 184140 }, { "epoch": 0.3719946508724653, "grad_norm": 187.50816345214844, "learning_rate": 7.962878392829533e-06, "loss": 27.1222, "step": 184150 }, { "epoch": 0.3720148515051491, "grad_norm": 787.1674194335938, "learning_rate": 7.96259720790677e-06, "loss": 36.3851, "step": 184160 }, { "epoch": 0.37203505213783294, "grad_norm": 309.73291015625, "learning_rate": 7.962316008544666e-06, "loss": 15.5864, "step": 184170 }, { "epoch": 0.37205525277051676, "grad_norm": 2.7626118659973145, "learning_rate": 7.962034794744594e-06, "loss": 25.9718, "step": 184180 }, { "epoch": 0.3720754534032006, "grad_norm": 233.19387817382812, "learning_rate": 7.961753566507924e-06, "loss": 13.7824, "step": 184190 }, { "epoch": 0.3720956540358844, "grad_norm": 233.17050170898438, "learning_rate": 7.961472323836025e-06, "loss": 22.2851, "step": 184200 }, { "epoch": 0.3721158546685682, "grad_norm": 683.0888061523438, "learning_rate": 7.961191066730272e-06, "loss": 31.4705, "step": 184210 }, { "epoch": 0.37213605530125204, "grad_norm": 204.0174102783203, "learning_rate": 7.960909795192029e-06, "loss": 16.8607, "step": 184220 }, { "epoch": 0.37215625593393586, "grad_norm": 126.80240631103516, "learning_rate": 7.960628509222674e-06, "loss": 12.3081, "step": 184230 }, { "epoch": 0.3721764565666197, "grad_norm": 155.76065063476562, "learning_rate": 7.960347208823572e-06, "loss": 29.5864, "step": 184240 }, { "epoch": 0.3721966571993035, "grad_norm": 575.4931030273438, "learning_rate": 7.960065893996099e-06, "loss": 24.3775, "step": 184250 }, { "epoch": 0.3722168578319873, "grad_norm": 427.9136047363281, "learning_rate": 7.959784564741622e-06, "loss": 29.6747, "step": 184260 }, { "epoch": 0.3722370584646711, "grad_norm": 384.1243591308594, "learning_rate": 7.959503221061515e-06, "loss": 27.1969, "step": 184270 }, { "epoch": 0.3722572590973549, "grad_norm": 370.0421447753906, "learning_rate": 7.959221862957149e-06, "loss": 28.6106, "step": 184280 }, { "epoch": 0.37227745973003873, "grad_norm": 414.8506774902344, "learning_rate": 7.958940490429893e-06, "loss": 34.214, "step": 184290 }, { "epoch": 0.37229766036272255, "grad_norm": 85.80133819580078, "learning_rate": 7.95865910348112e-06, "loss": 17.7264, "step": 184300 }, { "epoch": 0.37231786099540637, "grad_norm": 876.0701293945312, "learning_rate": 7.958377702112204e-06, "loss": 27.1869, "step": 184310 }, { "epoch": 0.3723380616280902, "grad_norm": 346.3531494140625, "learning_rate": 7.95809628632451e-06, "loss": 15.2381, "step": 184320 }, { "epoch": 0.372358262260774, "grad_norm": 331.1015319824219, "learning_rate": 7.957814856119416e-06, "loss": 16.0605, "step": 184330 }, { "epoch": 0.37237846289345783, "grad_norm": 305.2842102050781, "learning_rate": 7.95753341149829e-06, "loss": 21.837, "step": 184340 }, { "epoch": 0.37239866352614165, "grad_norm": 529.4951171875, "learning_rate": 7.957251952462506e-06, "loss": 22.0057, "step": 184350 }, { "epoch": 0.37241886415882547, "grad_norm": 549.4971923828125, "learning_rate": 7.956970479013433e-06, "loss": 20.2053, "step": 184360 }, { "epoch": 0.3724390647915093, "grad_norm": 384.4616394042969, "learning_rate": 7.956688991152446e-06, "loss": 21.9653, "step": 184370 }, { "epoch": 0.3724592654241931, "grad_norm": 411.3118591308594, "learning_rate": 7.956407488880915e-06, "loss": 27.5025, "step": 184380 }, { "epoch": 0.37247946605687693, "grad_norm": 182.23385620117188, "learning_rate": 7.956125972200212e-06, "loss": 25.5676, "step": 184390 }, { "epoch": 0.3724996666895607, "grad_norm": 218.42762756347656, "learning_rate": 7.95584444111171e-06, "loss": 14.4574, "step": 184400 }, { "epoch": 0.3725198673222445, "grad_norm": 294.8897705078125, "learning_rate": 7.955562895616782e-06, "loss": 21.5955, "step": 184410 }, { "epoch": 0.37254006795492833, "grad_norm": 634.1650390625, "learning_rate": 7.955281335716797e-06, "loss": 20.6862, "step": 184420 }, { "epoch": 0.37256026858761215, "grad_norm": 450.59613037109375, "learning_rate": 7.954999761413129e-06, "loss": 39.3258, "step": 184430 }, { "epoch": 0.372580469220296, "grad_norm": 412.40533447265625, "learning_rate": 7.954718172707153e-06, "loss": 16.5582, "step": 184440 }, { "epoch": 0.3726006698529798, "grad_norm": 1051.6776123046875, "learning_rate": 7.954436569600238e-06, "loss": 18.6176, "step": 184450 }, { "epoch": 0.3726208704856636, "grad_norm": 656.8740234375, "learning_rate": 7.954154952093754e-06, "loss": 26.7486, "step": 184460 }, { "epoch": 0.37264107111834743, "grad_norm": 130.8815460205078, "learning_rate": 7.95387332018908e-06, "loss": 12.8114, "step": 184470 }, { "epoch": 0.37266127175103125, "grad_norm": 92.77995300292969, "learning_rate": 7.953591673887586e-06, "loss": 30.7558, "step": 184480 }, { "epoch": 0.3726814723837151, "grad_norm": 363.2783203125, "learning_rate": 7.953310013190645e-06, "loss": 19.3335, "step": 184490 }, { "epoch": 0.3727016730163989, "grad_norm": 1399.116455078125, "learning_rate": 7.953028338099628e-06, "loss": 33.2493, "step": 184500 }, { "epoch": 0.3727218736490827, "grad_norm": 227.3540802001953, "learning_rate": 7.952746648615908e-06, "loss": 18.106, "step": 184510 }, { "epoch": 0.3727420742817665, "grad_norm": 507.50384521484375, "learning_rate": 7.952464944740861e-06, "loss": 21.1331, "step": 184520 }, { "epoch": 0.3727622749144503, "grad_norm": 143.76055908203125, "learning_rate": 7.952183226475858e-06, "loss": 14.0552, "step": 184530 }, { "epoch": 0.3727824755471341, "grad_norm": 132.0059051513672, "learning_rate": 7.95190149382227e-06, "loss": 25.0118, "step": 184540 }, { "epoch": 0.37280267617981794, "grad_norm": 117.39488983154297, "learning_rate": 7.951619746781474e-06, "loss": 29.0449, "step": 184550 }, { "epoch": 0.37282287681250176, "grad_norm": 579.1425170898438, "learning_rate": 7.95133798535484e-06, "loss": 22.7947, "step": 184560 }, { "epoch": 0.3728430774451856, "grad_norm": 458.9516906738281, "learning_rate": 7.951056209543744e-06, "loss": 29.5082, "step": 184570 }, { "epoch": 0.3728632780778694, "grad_norm": 534.38623046875, "learning_rate": 7.950774419349557e-06, "loss": 22.181, "step": 184580 }, { "epoch": 0.3728834787105532, "grad_norm": 342.5497131347656, "learning_rate": 7.950492614773653e-06, "loss": 25.2034, "step": 184590 }, { "epoch": 0.37290367934323704, "grad_norm": 0.0, "learning_rate": 7.950210795817406e-06, "loss": 17.5624, "step": 184600 }, { "epoch": 0.37292387997592086, "grad_norm": 975.2837524414062, "learning_rate": 7.949928962482191e-06, "loss": 28.2033, "step": 184610 }, { "epoch": 0.3729440806086047, "grad_norm": 490.6043395996094, "learning_rate": 7.94964711476938e-06, "loss": 18.7533, "step": 184620 }, { "epoch": 0.3729642812412885, "grad_norm": 281.4523620605469, "learning_rate": 7.949365252680343e-06, "loss": 29.3018, "step": 184630 }, { "epoch": 0.3729844818739723, "grad_norm": 476.4045715332031, "learning_rate": 7.94908337621646e-06, "loss": 25.1157, "step": 184640 }, { "epoch": 0.3730046825066561, "grad_norm": 468.6783142089844, "learning_rate": 7.948801485379103e-06, "loss": 27.7701, "step": 184650 }, { "epoch": 0.3730248831393399, "grad_norm": 316.97369384765625, "learning_rate": 7.948519580169644e-06, "loss": 30.6865, "step": 184660 }, { "epoch": 0.3730450837720237, "grad_norm": 599.8587646484375, "learning_rate": 7.94823766058946e-06, "loss": 36.0465, "step": 184670 }, { "epoch": 0.37306528440470754, "grad_norm": 415.52838134765625, "learning_rate": 7.947955726639922e-06, "loss": 20.1033, "step": 184680 }, { "epoch": 0.37308548503739136, "grad_norm": 308.10943603515625, "learning_rate": 7.947673778322405e-06, "loss": 19.1519, "step": 184690 }, { "epoch": 0.3731056856700752, "grad_norm": 364.94189453125, "learning_rate": 7.947391815638284e-06, "loss": 13.5899, "step": 184700 }, { "epoch": 0.373125886302759, "grad_norm": 363.39337158203125, "learning_rate": 7.947109838588932e-06, "loss": 23.8721, "step": 184710 }, { "epoch": 0.3731460869354428, "grad_norm": 872.3594970703125, "learning_rate": 7.946827847175724e-06, "loss": 14.1021, "step": 184720 }, { "epoch": 0.37316628756812664, "grad_norm": 495.41619873046875, "learning_rate": 7.946545841400035e-06, "loss": 25.4238, "step": 184730 }, { "epoch": 0.37318648820081046, "grad_norm": 903.4898681640625, "learning_rate": 7.94626382126324e-06, "loss": 20.8995, "step": 184740 }, { "epoch": 0.3732066888334943, "grad_norm": 420.36553955078125, "learning_rate": 7.945981786766712e-06, "loss": 42.4123, "step": 184750 }, { "epoch": 0.3732268894661781, "grad_norm": 45.977108001708984, "learning_rate": 7.945699737911825e-06, "loss": 20.0298, "step": 184760 }, { "epoch": 0.3732470900988619, "grad_norm": 463.26324462890625, "learning_rate": 7.945417674699954e-06, "loss": 29.0649, "step": 184770 }, { "epoch": 0.3732672907315457, "grad_norm": 300.8017272949219, "learning_rate": 7.945135597132477e-06, "loss": 21.3997, "step": 184780 }, { "epoch": 0.3732874913642295, "grad_norm": 373.9449462890625, "learning_rate": 7.944853505210766e-06, "loss": 23.3273, "step": 184790 }, { "epoch": 0.3733076919969133, "grad_norm": 226.5374755859375, "learning_rate": 7.944571398936193e-06, "loss": 19.7992, "step": 184800 }, { "epoch": 0.37332789262959715, "grad_norm": 325.9032287597656, "learning_rate": 7.94428927831014e-06, "loss": 23.8617, "step": 184810 }, { "epoch": 0.37334809326228097, "grad_norm": 264.9305725097656, "learning_rate": 7.944007143333976e-06, "loss": 16.1416, "step": 184820 }, { "epoch": 0.3733682938949648, "grad_norm": 396.746337890625, "learning_rate": 7.943724994009078e-06, "loss": 20.2477, "step": 184830 }, { "epoch": 0.3733884945276486, "grad_norm": 514.2771606445312, "learning_rate": 7.943442830336822e-06, "loss": 28.5255, "step": 184840 }, { "epoch": 0.3734086951603324, "grad_norm": 406.6208190917969, "learning_rate": 7.943160652318585e-06, "loss": 29.833, "step": 184850 }, { "epoch": 0.37342889579301625, "grad_norm": 339.4564514160156, "learning_rate": 7.942878459955737e-06, "loss": 14.0227, "step": 184860 }, { "epoch": 0.37344909642570007, "grad_norm": 1454.90673828125, "learning_rate": 7.942596253249658e-06, "loss": 25.755, "step": 184870 }, { "epoch": 0.3734692970583839, "grad_norm": 338.5536804199219, "learning_rate": 7.94231403220172e-06, "loss": 12.4781, "step": 184880 }, { "epoch": 0.3734894976910677, "grad_norm": 338.1551513671875, "learning_rate": 7.942031796813302e-06, "loss": 23.3433, "step": 184890 }, { "epoch": 0.3735096983237515, "grad_norm": 311.31689453125, "learning_rate": 7.941749547085778e-06, "loss": 19.965, "step": 184900 }, { "epoch": 0.3735298989564353, "grad_norm": 404.0726013183594, "learning_rate": 7.941467283020521e-06, "loss": 34.9207, "step": 184910 }, { "epoch": 0.3735500995891191, "grad_norm": 159.56248474121094, "learning_rate": 7.941185004618911e-06, "loss": 20.8413, "step": 184920 }, { "epoch": 0.37357030022180293, "grad_norm": 195.2562255859375, "learning_rate": 7.940902711882321e-06, "loss": 10.9093, "step": 184930 }, { "epoch": 0.37359050085448675, "grad_norm": 180.9736328125, "learning_rate": 7.940620404812129e-06, "loss": 19.7429, "step": 184940 }, { "epoch": 0.37361070148717057, "grad_norm": 115.31999206542969, "learning_rate": 7.94033808340971e-06, "loss": 16.3377, "step": 184950 }, { "epoch": 0.3736309021198544, "grad_norm": 665.5597534179688, "learning_rate": 7.940055747676439e-06, "loss": 43.3205, "step": 184960 }, { "epoch": 0.3736511027525382, "grad_norm": 128.6699676513672, "learning_rate": 7.939773397613692e-06, "loss": 23.3228, "step": 184970 }, { "epoch": 0.37367130338522203, "grad_norm": 502.3322448730469, "learning_rate": 7.939491033222848e-06, "loss": 19.8535, "step": 184980 }, { "epoch": 0.37369150401790585, "grad_norm": 211.36598205566406, "learning_rate": 7.939208654505281e-06, "loss": 11.8954, "step": 184990 }, { "epoch": 0.37371170465058967, "grad_norm": 351.20111083984375, "learning_rate": 7.938926261462366e-06, "loss": 28.067, "step": 185000 }, { "epoch": 0.3737319052832735, "grad_norm": 792.7965698242188, "learning_rate": 7.938643854095482e-06, "loss": 56.3682, "step": 185010 }, { "epoch": 0.3737521059159573, "grad_norm": 714.4829711914062, "learning_rate": 7.938361432406005e-06, "loss": 18.4576, "step": 185020 }, { "epoch": 0.37377230654864113, "grad_norm": 186.06739807128906, "learning_rate": 7.93807899639531e-06, "loss": 23.9017, "step": 185030 }, { "epoch": 0.3737925071813249, "grad_norm": 246.42047119140625, "learning_rate": 7.937796546064773e-06, "loss": 25.0836, "step": 185040 }, { "epoch": 0.3738127078140087, "grad_norm": 913.75439453125, "learning_rate": 7.937514081415773e-06, "loss": 39.7503, "step": 185050 }, { "epoch": 0.37383290844669254, "grad_norm": 215.49111938476562, "learning_rate": 7.937231602449687e-06, "loss": 21.732, "step": 185060 }, { "epoch": 0.37385310907937636, "grad_norm": 629.5859985351562, "learning_rate": 7.936949109167887e-06, "loss": 42.6736, "step": 185070 }, { "epoch": 0.3738733097120602, "grad_norm": 203.20004272460938, "learning_rate": 7.936666601571756e-06, "loss": 18.076, "step": 185080 }, { "epoch": 0.373893510344744, "grad_norm": 25.225662231445312, "learning_rate": 7.936384079662666e-06, "loss": 20.4229, "step": 185090 }, { "epoch": 0.3739137109774278, "grad_norm": 276.54180908203125, "learning_rate": 7.936101543441998e-06, "loss": 16.5489, "step": 185100 }, { "epoch": 0.37393391161011164, "grad_norm": 256.1173095703125, "learning_rate": 7.935818992911129e-06, "loss": 34.7231, "step": 185110 }, { "epoch": 0.37395411224279546, "grad_norm": 265.9543762207031, "learning_rate": 7.935536428071431e-06, "loss": 14.2963, "step": 185120 }, { "epoch": 0.3739743128754793, "grad_norm": 230.64804077148438, "learning_rate": 7.935253848924285e-06, "loss": 16.0491, "step": 185130 }, { "epoch": 0.3739945135081631, "grad_norm": 334.02545166015625, "learning_rate": 7.93497125547107e-06, "loss": 28.3974, "step": 185140 }, { "epoch": 0.3740147141408469, "grad_norm": 468.404052734375, "learning_rate": 7.934688647713158e-06, "loss": 20.2318, "step": 185150 }, { "epoch": 0.3740349147735307, "grad_norm": 645.5770874023438, "learning_rate": 7.93440602565193e-06, "loss": 18.7274, "step": 185160 }, { "epoch": 0.3740551154062145, "grad_norm": 257.2210998535156, "learning_rate": 7.934123389288765e-06, "loss": 18.5803, "step": 185170 }, { "epoch": 0.3740753160388983, "grad_norm": 185.73089599609375, "learning_rate": 7.933840738625035e-06, "loss": 10.0916, "step": 185180 }, { "epoch": 0.37409551667158214, "grad_norm": 257.7461242675781, "learning_rate": 7.933558073662125e-06, "loss": 16.1209, "step": 185190 }, { "epoch": 0.37411571730426596, "grad_norm": 42.35219955444336, "learning_rate": 7.933275394401407e-06, "loss": 13.1954, "step": 185200 }, { "epoch": 0.3741359179369498, "grad_norm": 409.8907775878906, "learning_rate": 7.932992700844261e-06, "loss": 16.9948, "step": 185210 }, { "epoch": 0.3741561185696336, "grad_norm": 494.62371826171875, "learning_rate": 7.932709992992063e-06, "loss": 27.6559, "step": 185220 }, { "epoch": 0.3741763192023174, "grad_norm": 357.8368835449219, "learning_rate": 7.932427270846194e-06, "loss": 10.1764, "step": 185230 }, { "epoch": 0.37419651983500124, "grad_norm": 504.85205078125, "learning_rate": 7.932144534408028e-06, "loss": 12.3351, "step": 185240 }, { "epoch": 0.37421672046768506, "grad_norm": 248.2692108154297, "learning_rate": 7.931861783678946e-06, "loss": 21.6287, "step": 185250 }, { "epoch": 0.3742369211003689, "grad_norm": 102.75997161865234, "learning_rate": 7.931579018660327e-06, "loss": 31.1875, "step": 185260 }, { "epoch": 0.3742571217330527, "grad_norm": 194.78038024902344, "learning_rate": 7.931296239353546e-06, "loss": 9.991, "step": 185270 }, { "epoch": 0.3742773223657365, "grad_norm": 449.73455810546875, "learning_rate": 7.931013445759984e-06, "loss": 21.2125, "step": 185280 }, { "epoch": 0.3742975229984203, "grad_norm": 282.6627502441406, "learning_rate": 7.930730637881016e-06, "loss": 25.5474, "step": 185290 }, { "epoch": 0.3743177236311041, "grad_norm": 27.483171463012695, "learning_rate": 7.930447815718022e-06, "loss": 18.2717, "step": 185300 }, { "epoch": 0.3743379242637879, "grad_norm": 310.1260070800781, "learning_rate": 7.93016497927238e-06, "loss": 18.9379, "step": 185310 }, { "epoch": 0.37435812489647174, "grad_norm": 296.8552551269531, "learning_rate": 7.929882128545474e-06, "loss": 31.4366, "step": 185320 }, { "epoch": 0.37437832552915556, "grad_norm": 416.75634765625, "learning_rate": 7.929599263538674e-06, "loss": 12.4571, "step": 185330 }, { "epoch": 0.3743985261618394, "grad_norm": 207.06988525390625, "learning_rate": 7.929316384253363e-06, "loss": 37.065, "step": 185340 }, { "epoch": 0.3744187267945232, "grad_norm": 252.4696502685547, "learning_rate": 7.929033490690921e-06, "loss": 13.6604, "step": 185350 }, { "epoch": 0.374438927427207, "grad_norm": 674.5070190429688, "learning_rate": 7.928750582852722e-06, "loss": 24.2581, "step": 185360 }, { "epoch": 0.37445912805989084, "grad_norm": 218.89048767089844, "learning_rate": 7.92846766074015e-06, "loss": 12.1517, "step": 185370 }, { "epoch": 0.37447932869257466, "grad_norm": 116.3596420288086, "learning_rate": 7.928184724354581e-06, "loss": 15.6534, "step": 185380 }, { "epoch": 0.3744995293252585, "grad_norm": 236.49302673339844, "learning_rate": 7.927901773697396e-06, "loss": 19.4553, "step": 185390 }, { "epoch": 0.3745197299579423, "grad_norm": 307.58331298828125, "learning_rate": 7.927618808769971e-06, "loss": 13.7072, "step": 185400 }, { "epoch": 0.3745399305906261, "grad_norm": 116.84063720703125, "learning_rate": 7.927335829573688e-06, "loss": 27.0985, "step": 185410 }, { "epoch": 0.3745601312233099, "grad_norm": 451.7219543457031, "learning_rate": 7.927052836109925e-06, "loss": 23.0556, "step": 185420 }, { "epoch": 0.3745803318559937, "grad_norm": 488.555908203125, "learning_rate": 7.926769828380062e-06, "loss": 24.1628, "step": 185430 }, { "epoch": 0.37460053248867753, "grad_norm": 421.19952392578125, "learning_rate": 7.926486806385479e-06, "loss": 13.3053, "step": 185440 }, { "epoch": 0.37462073312136135, "grad_norm": 579.669189453125, "learning_rate": 7.926203770127552e-06, "loss": 16.6306, "step": 185450 }, { "epoch": 0.37464093375404517, "grad_norm": 571.9569091796875, "learning_rate": 7.925920719607663e-06, "loss": 25.5336, "step": 185460 }, { "epoch": 0.374661134386729, "grad_norm": 557.4139404296875, "learning_rate": 7.925637654827192e-06, "loss": 16.2948, "step": 185470 }, { "epoch": 0.3746813350194128, "grad_norm": 279.9828186035156, "learning_rate": 7.925354575787517e-06, "loss": 28.4328, "step": 185480 }, { "epoch": 0.37470153565209663, "grad_norm": 1.3079050779342651, "learning_rate": 7.925071482490018e-06, "loss": 23.791, "step": 185490 }, { "epoch": 0.37472173628478045, "grad_norm": 19.147375106811523, "learning_rate": 7.92478837493608e-06, "loss": 28.4207, "step": 185500 }, { "epoch": 0.37474193691746427, "grad_norm": 133.83213806152344, "learning_rate": 7.924505253127072e-06, "loss": 18.1067, "step": 185510 }, { "epoch": 0.3747621375501481, "grad_norm": 170.837890625, "learning_rate": 7.924222117064385e-06, "loss": 10.1431, "step": 185520 }, { "epoch": 0.3747823381828319, "grad_norm": 694.3130493164062, "learning_rate": 7.92393896674939e-06, "loss": 12.2083, "step": 185530 }, { "epoch": 0.37480253881551573, "grad_norm": 365.13720703125, "learning_rate": 7.923655802183475e-06, "loss": 25.7444, "step": 185540 }, { "epoch": 0.3748227394481995, "grad_norm": 478.3114013671875, "learning_rate": 7.923372623368014e-06, "loss": 22.0276, "step": 185550 }, { "epoch": 0.3748429400808833, "grad_norm": 540.748046875, "learning_rate": 7.92308943030439e-06, "loss": 26.7105, "step": 185560 }, { "epoch": 0.37486314071356713, "grad_norm": 431.2481384277344, "learning_rate": 7.922806222993981e-06, "loss": 15.4215, "step": 185570 }, { "epoch": 0.37488334134625095, "grad_norm": 311.3813781738281, "learning_rate": 7.92252300143817e-06, "loss": 23.4757, "step": 185580 }, { "epoch": 0.3749035419789348, "grad_norm": 240.32777404785156, "learning_rate": 7.922239765638338e-06, "loss": 21.5707, "step": 185590 }, { "epoch": 0.3749237426116186, "grad_norm": 359.6653747558594, "learning_rate": 7.921956515595861e-06, "loss": 22.6223, "step": 185600 }, { "epoch": 0.3749439432443024, "grad_norm": 899.4860229492188, "learning_rate": 7.921673251312124e-06, "loss": 35.97, "step": 185610 }, { "epoch": 0.37496414387698623, "grad_norm": 29.04227066040039, "learning_rate": 7.921389972788505e-06, "loss": 12.8657, "step": 185620 }, { "epoch": 0.37498434450967005, "grad_norm": 263.94097900390625, "learning_rate": 7.921106680026388e-06, "loss": 18.596, "step": 185630 }, { "epoch": 0.3750045451423539, "grad_norm": 283.1744384765625, "learning_rate": 7.920823373027149e-06, "loss": 14.0105, "step": 185640 }, { "epoch": 0.3750247457750377, "grad_norm": 454.0027770996094, "learning_rate": 7.920540051792171e-06, "loss": 22.3368, "step": 185650 }, { "epoch": 0.3750449464077215, "grad_norm": 525.3453979492188, "learning_rate": 7.920256716322837e-06, "loss": 10.4326, "step": 185660 }, { "epoch": 0.3750651470404053, "grad_norm": 365.10662841796875, "learning_rate": 7.919973366620525e-06, "loss": 23.8483, "step": 185670 }, { "epoch": 0.3750853476730891, "grad_norm": 431.7030334472656, "learning_rate": 7.919690002686615e-06, "loss": 20.7615, "step": 185680 }, { "epoch": 0.3751055483057729, "grad_norm": 366.341552734375, "learning_rate": 7.919406624522492e-06, "loss": 32.8188, "step": 185690 }, { "epoch": 0.37512574893845674, "grad_norm": 436.1279602050781, "learning_rate": 7.919123232129535e-06, "loss": 16.5177, "step": 185700 }, { "epoch": 0.37514594957114056, "grad_norm": 488.3873291015625, "learning_rate": 7.918839825509126e-06, "loss": 19.1131, "step": 185710 }, { "epoch": 0.3751661502038244, "grad_norm": 3.707166910171509, "learning_rate": 7.918556404662645e-06, "loss": 11.3262, "step": 185720 }, { "epoch": 0.3751863508365082, "grad_norm": 423.8901062011719, "learning_rate": 7.918272969591474e-06, "loss": 32.1798, "step": 185730 }, { "epoch": 0.375206551469192, "grad_norm": 550.1453857421875, "learning_rate": 7.917989520296996e-06, "loss": 18.4277, "step": 185740 }, { "epoch": 0.37522675210187584, "grad_norm": 283.56903076171875, "learning_rate": 7.917706056780588e-06, "loss": 27.2015, "step": 185750 }, { "epoch": 0.37524695273455966, "grad_norm": 608.2725219726562, "learning_rate": 7.917422579043637e-06, "loss": 11.4259, "step": 185760 }, { "epoch": 0.3752671533672435, "grad_norm": 490.80792236328125, "learning_rate": 7.91713908708752e-06, "loss": 21.0511, "step": 185770 }, { "epoch": 0.3752873539999273, "grad_norm": 448.4722900390625, "learning_rate": 7.916855580913622e-06, "loss": 24.288, "step": 185780 }, { "epoch": 0.3753075546326111, "grad_norm": 232.2841796875, "learning_rate": 7.916572060523326e-06, "loss": 26.2219, "step": 185790 }, { "epoch": 0.3753277552652949, "grad_norm": 77.63753509521484, "learning_rate": 7.916288525918008e-06, "loss": 20.1102, "step": 185800 }, { "epoch": 0.3753479558979787, "grad_norm": 405.3289489746094, "learning_rate": 7.916004977099054e-06, "loss": 31.0604, "step": 185810 }, { "epoch": 0.3753681565306625, "grad_norm": 761.98583984375, "learning_rate": 7.915721414067847e-06, "loss": 21.6656, "step": 185820 }, { "epoch": 0.37538835716334634, "grad_norm": 342.01947021484375, "learning_rate": 7.915437836825767e-06, "loss": 27.4627, "step": 185830 }, { "epoch": 0.37540855779603016, "grad_norm": 149.497314453125, "learning_rate": 7.915154245374197e-06, "loss": 12.2475, "step": 185840 }, { "epoch": 0.375428758428714, "grad_norm": 264.66595458984375, "learning_rate": 7.914870639714517e-06, "loss": 17.7935, "step": 185850 }, { "epoch": 0.3754489590613978, "grad_norm": 260.6634826660156, "learning_rate": 7.914587019848113e-06, "loss": 24.8694, "step": 185860 }, { "epoch": 0.3754691596940816, "grad_norm": 575.6069946289062, "learning_rate": 7.914303385776365e-06, "loss": 14.7805, "step": 185870 }, { "epoch": 0.37548936032676544, "grad_norm": 121.77664184570312, "learning_rate": 7.914019737500655e-06, "loss": 21.3474, "step": 185880 }, { "epoch": 0.37550956095944926, "grad_norm": 368.7645263671875, "learning_rate": 7.913736075022366e-06, "loss": 18.4518, "step": 185890 }, { "epoch": 0.3755297615921331, "grad_norm": 530.3685913085938, "learning_rate": 7.913452398342882e-06, "loss": 22.438, "step": 185900 }, { "epoch": 0.3755499622248169, "grad_norm": 460.9221496582031, "learning_rate": 7.913168707463583e-06, "loss": 15.6313, "step": 185910 }, { "epoch": 0.3755701628575007, "grad_norm": 575.2742309570312, "learning_rate": 7.912885002385852e-06, "loss": 27.6675, "step": 185920 }, { "epoch": 0.3755903634901845, "grad_norm": 1062.7508544921875, "learning_rate": 7.912601283111076e-06, "loss": 25.1376, "step": 185930 }, { "epoch": 0.3756105641228683, "grad_norm": 249.41693115234375, "learning_rate": 7.912317549640632e-06, "loss": 13.3535, "step": 185940 }, { "epoch": 0.3756307647555521, "grad_norm": 436.9518737792969, "learning_rate": 7.912033801975907e-06, "loss": 31.6522, "step": 185950 }, { "epoch": 0.37565096538823595, "grad_norm": 534.3568115234375, "learning_rate": 7.911750040118282e-06, "loss": 25.9282, "step": 185960 }, { "epoch": 0.37567116602091977, "grad_norm": 501.3494873046875, "learning_rate": 7.91146626406914e-06, "loss": 14.9841, "step": 185970 }, { "epoch": 0.3756913666536036, "grad_norm": 855.99853515625, "learning_rate": 7.911182473829865e-06, "loss": 15.4411, "step": 185980 }, { "epoch": 0.3757115672862874, "grad_norm": 342.9635925292969, "learning_rate": 7.91089866940184e-06, "loss": 26.5816, "step": 185990 }, { "epoch": 0.3757317679189712, "grad_norm": 877.3455200195312, "learning_rate": 7.910614850786448e-06, "loss": 28.9508, "step": 186000 }, { "epoch": 0.37575196855165505, "grad_norm": 675.5576171875, "learning_rate": 7.910331017985072e-06, "loss": 15.8867, "step": 186010 }, { "epoch": 0.37577216918433887, "grad_norm": 598.6139526367188, "learning_rate": 7.910047170999095e-06, "loss": 30.0019, "step": 186020 }, { "epoch": 0.3757923698170227, "grad_norm": 59.5485725402832, "learning_rate": 7.9097633098299e-06, "loss": 46.0081, "step": 186030 }, { "epoch": 0.3758125704497065, "grad_norm": 273.9651794433594, "learning_rate": 7.909479434478874e-06, "loss": 16.2739, "step": 186040 }, { "epoch": 0.3758327710823903, "grad_norm": 455.9841003417969, "learning_rate": 7.909195544947398e-06, "loss": 28.814, "step": 186050 }, { "epoch": 0.3758529717150741, "grad_norm": 252.17733764648438, "learning_rate": 7.908911641236855e-06, "loss": 21.4245, "step": 186060 }, { "epoch": 0.3758731723477579, "grad_norm": 173.3985137939453, "learning_rate": 7.908627723348628e-06, "loss": 23.1291, "step": 186070 }, { "epoch": 0.37589337298044173, "grad_norm": 385.39447021484375, "learning_rate": 7.908343791284104e-06, "loss": 29.9944, "step": 186080 }, { "epoch": 0.37591357361312555, "grad_norm": 375.59173583984375, "learning_rate": 7.908059845044665e-06, "loss": 13.6519, "step": 186090 }, { "epoch": 0.37593377424580937, "grad_norm": 460.693603515625, "learning_rate": 7.907775884631694e-06, "loss": 17.0569, "step": 186100 }, { "epoch": 0.3759539748784932, "grad_norm": 265.4934387207031, "learning_rate": 7.907491910046578e-06, "loss": 37.8087, "step": 186110 }, { "epoch": 0.375974175511177, "grad_norm": 124.66250610351562, "learning_rate": 7.907207921290698e-06, "loss": 24.2959, "step": 186120 }, { "epoch": 0.37599437614386083, "grad_norm": 80.29561614990234, "learning_rate": 7.906923918365439e-06, "loss": 26.6489, "step": 186130 }, { "epoch": 0.37601457677654465, "grad_norm": 146.98678588867188, "learning_rate": 7.906639901272183e-06, "loss": 40.2186, "step": 186140 }, { "epoch": 0.37603477740922847, "grad_norm": 333.65533447265625, "learning_rate": 7.90635587001232e-06, "loss": 25.5619, "step": 186150 }, { "epoch": 0.3760549780419123, "grad_norm": 158.60435485839844, "learning_rate": 7.906071824587231e-06, "loss": 20.7498, "step": 186160 }, { "epoch": 0.3760751786745961, "grad_norm": 178.82809448242188, "learning_rate": 7.9057877649983e-06, "loss": 25.6808, "step": 186170 }, { "epoch": 0.37609537930727993, "grad_norm": 361.49884033203125, "learning_rate": 7.905503691246909e-06, "loss": 17.2704, "step": 186180 }, { "epoch": 0.3761155799399637, "grad_norm": 311.1093444824219, "learning_rate": 7.905219603334449e-06, "loss": 14.6666, "step": 186190 }, { "epoch": 0.3761357805726475, "grad_norm": 72.5036849975586, "learning_rate": 7.904935501262301e-06, "loss": 17.7347, "step": 186200 }, { "epoch": 0.37615598120533134, "grad_norm": 183.01370239257812, "learning_rate": 7.904651385031847e-06, "loss": 36.2247, "step": 186210 }, { "epoch": 0.37617618183801516, "grad_norm": 903.3137817382812, "learning_rate": 7.904367254644475e-06, "loss": 20.9463, "step": 186220 }, { "epoch": 0.376196382470699, "grad_norm": 272.78167724609375, "learning_rate": 7.90408311010157e-06, "loss": 21.3787, "step": 186230 }, { "epoch": 0.3762165831033828, "grad_norm": 486.9576721191406, "learning_rate": 7.903798951404518e-06, "loss": 17.8778, "step": 186240 }, { "epoch": 0.3762367837360666, "grad_norm": 321.8508605957031, "learning_rate": 7.903514778554699e-06, "loss": 26.1256, "step": 186250 }, { "epoch": 0.37625698436875044, "grad_norm": 455.9219970703125, "learning_rate": 7.903230591553504e-06, "loss": 53.3584, "step": 186260 }, { "epoch": 0.37627718500143426, "grad_norm": 829.6468505859375, "learning_rate": 7.902946390402313e-06, "loss": 32.9458, "step": 186270 }, { "epoch": 0.3762973856341181, "grad_norm": 248.43630981445312, "learning_rate": 7.902662175102514e-06, "loss": 13.599, "step": 186280 }, { "epoch": 0.3763175862668019, "grad_norm": 236.6037139892578, "learning_rate": 7.90237794565549e-06, "loss": 32.8911, "step": 186290 }, { "epoch": 0.3763377868994857, "grad_norm": 403.4139404296875, "learning_rate": 7.90209370206263e-06, "loss": 16.999, "step": 186300 }, { "epoch": 0.3763579875321695, "grad_norm": 303.2038269042969, "learning_rate": 7.901809444325318e-06, "loss": 13.2335, "step": 186310 }, { "epoch": 0.3763781881648533, "grad_norm": 201.00413513183594, "learning_rate": 7.901525172444938e-06, "loss": 6.8179, "step": 186320 }, { "epoch": 0.3763983887975371, "grad_norm": 1031.934814453125, "learning_rate": 7.901240886422875e-06, "loss": 37.6776, "step": 186330 }, { "epoch": 0.37641858943022094, "grad_norm": 217.69149780273438, "learning_rate": 7.900956586260516e-06, "loss": 14.5887, "step": 186340 }, { "epoch": 0.37643879006290476, "grad_norm": 391.96905517578125, "learning_rate": 7.900672271959247e-06, "loss": 14.6044, "step": 186350 }, { "epoch": 0.3764589906955886, "grad_norm": 532.9293823242188, "learning_rate": 7.900387943520453e-06, "loss": 21.1594, "step": 186360 }, { "epoch": 0.3764791913282724, "grad_norm": 449.4956970214844, "learning_rate": 7.900103600945521e-06, "loss": 26.3718, "step": 186370 }, { "epoch": 0.3764993919609562, "grad_norm": 946.3658447265625, "learning_rate": 7.899819244235835e-06, "loss": 19.1424, "step": 186380 }, { "epoch": 0.37651959259364004, "grad_norm": 440.26019287109375, "learning_rate": 7.899534873392781e-06, "loss": 12.1415, "step": 186390 }, { "epoch": 0.37653979322632386, "grad_norm": 562.1298217773438, "learning_rate": 7.899250488417746e-06, "loss": 17.3849, "step": 186400 }, { "epoch": 0.3765599938590077, "grad_norm": 213.060791015625, "learning_rate": 7.898966089312117e-06, "loss": 23.1838, "step": 186410 }, { "epoch": 0.3765801944916915, "grad_norm": 203.11265563964844, "learning_rate": 7.898681676077278e-06, "loss": 24.2536, "step": 186420 }, { "epoch": 0.3766003951243753, "grad_norm": 50.743221282958984, "learning_rate": 7.898397248714615e-06, "loss": 28.5922, "step": 186430 }, { "epoch": 0.3766205957570591, "grad_norm": 318.5630187988281, "learning_rate": 7.898112807225517e-06, "loss": 9.3861, "step": 186440 }, { "epoch": 0.3766407963897429, "grad_norm": 493.37646484375, "learning_rate": 7.897828351611368e-06, "loss": 18.3657, "step": 186450 }, { "epoch": 0.3766609970224267, "grad_norm": 669.4931640625, "learning_rate": 7.897543881873555e-06, "loss": 29.6392, "step": 186460 }, { "epoch": 0.37668119765511054, "grad_norm": 449.8827819824219, "learning_rate": 7.897259398013465e-06, "loss": 19.5032, "step": 186470 }, { "epoch": 0.37670139828779436, "grad_norm": 400.0908203125, "learning_rate": 7.896974900032483e-06, "loss": 54.8798, "step": 186480 }, { "epoch": 0.3767215989204782, "grad_norm": 40.167388916015625, "learning_rate": 7.896690387931997e-06, "loss": 33.8369, "step": 186490 }, { "epoch": 0.376741799553162, "grad_norm": 226.82191467285156, "learning_rate": 7.896405861713393e-06, "loss": 24.2288, "step": 186500 }, { "epoch": 0.3767620001858458, "grad_norm": 322.2902526855469, "learning_rate": 7.89612132137806e-06, "loss": 20.8587, "step": 186510 }, { "epoch": 0.37678220081852964, "grad_norm": 66.47044372558594, "learning_rate": 7.895836766927383e-06, "loss": 17.9935, "step": 186520 }, { "epoch": 0.37680240145121346, "grad_norm": 228.1887969970703, "learning_rate": 7.895552198362748e-06, "loss": 6.859, "step": 186530 }, { "epoch": 0.3768226020838973, "grad_norm": 336.7323303222656, "learning_rate": 7.895267615685542e-06, "loss": 16.1734, "step": 186540 }, { "epoch": 0.3768428027165811, "grad_norm": 603.1956787109375, "learning_rate": 7.894983018897153e-06, "loss": 24.2695, "step": 186550 }, { "epoch": 0.3768630033492649, "grad_norm": 325.160888671875, "learning_rate": 7.89469840799897e-06, "loss": 19.9261, "step": 186560 }, { "epoch": 0.3768832039819487, "grad_norm": 351.0687255859375, "learning_rate": 7.894413782992375e-06, "loss": 20.6885, "step": 186570 }, { "epoch": 0.3769034046146325, "grad_norm": 216.7996063232422, "learning_rate": 7.894129143878758e-06, "loss": 16.5278, "step": 186580 }, { "epoch": 0.37692360524731633, "grad_norm": 539.2125854492188, "learning_rate": 7.89384449065951e-06, "loss": 16.7036, "step": 186590 }, { "epoch": 0.37694380588000015, "grad_norm": 596.241943359375, "learning_rate": 7.893559823336013e-06, "loss": 25.1195, "step": 186600 }, { "epoch": 0.37696400651268397, "grad_norm": 527.4559326171875, "learning_rate": 7.893275141909655e-06, "loss": 16.728, "step": 186610 }, { "epoch": 0.3769842071453678, "grad_norm": 25.770774841308594, "learning_rate": 7.892990446381828e-06, "loss": 21.4469, "step": 186620 }, { "epoch": 0.3770044077780516, "grad_norm": 642.983642578125, "learning_rate": 7.892705736753913e-06, "loss": 14.4212, "step": 186630 }, { "epoch": 0.37702460841073543, "grad_norm": 278.6478271484375, "learning_rate": 7.892421013027302e-06, "loss": 22.8077, "step": 186640 }, { "epoch": 0.37704480904341925, "grad_norm": 351.9095764160156, "learning_rate": 7.892136275203383e-06, "loss": 15.5565, "step": 186650 }, { "epoch": 0.37706500967610307, "grad_norm": 694.929931640625, "learning_rate": 7.891851523283542e-06, "loss": 23.1556, "step": 186660 }, { "epoch": 0.3770852103087869, "grad_norm": 393.3650817871094, "learning_rate": 7.891566757269169e-06, "loss": 33.6424, "step": 186670 }, { "epoch": 0.3771054109414707, "grad_norm": 568.8635864257812, "learning_rate": 7.891281977161648e-06, "loss": 20.8657, "step": 186680 }, { "epoch": 0.37712561157415453, "grad_norm": 675.2408447265625, "learning_rate": 7.89099718296237e-06, "loss": 23.8988, "step": 186690 }, { "epoch": 0.3771458122068383, "grad_norm": 681.5302734375, "learning_rate": 7.890712374672724e-06, "loss": 14.7216, "step": 186700 }, { "epoch": 0.3771660128395221, "grad_norm": 177.2668914794922, "learning_rate": 7.890427552294093e-06, "loss": 16.5428, "step": 186710 }, { "epoch": 0.37718621347220593, "grad_norm": 616.7086181640625, "learning_rate": 7.890142715827871e-06, "loss": 19.9336, "step": 186720 }, { "epoch": 0.37720641410488975, "grad_norm": 148.07113647460938, "learning_rate": 7.889857865275445e-06, "loss": 19.689, "step": 186730 }, { "epoch": 0.3772266147375736, "grad_norm": 561.8738403320312, "learning_rate": 7.8895730006382e-06, "loss": 21.0662, "step": 186740 }, { "epoch": 0.3772468153702574, "grad_norm": 583.3108520507812, "learning_rate": 7.889288121917528e-06, "loss": 30.6697, "step": 186750 }, { "epoch": 0.3772670160029412, "grad_norm": 135.86241149902344, "learning_rate": 7.889003229114816e-06, "loss": 23.6817, "step": 186760 }, { "epoch": 0.37728721663562503, "grad_norm": 152.9756317138672, "learning_rate": 7.888718322231452e-06, "loss": 11.7561, "step": 186770 }, { "epoch": 0.37730741726830885, "grad_norm": 271.7252197265625, "learning_rate": 7.888433401268825e-06, "loss": 20.0215, "step": 186780 }, { "epoch": 0.3773276179009927, "grad_norm": 18.141109466552734, "learning_rate": 7.888148466228325e-06, "loss": 20.4894, "step": 186790 }, { "epoch": 0.3773478185336765, "grad_norm": 678.092529296875, "learning_rate": 7.887863517111337e-06, "loss": 27.0805, "step": 186800 }, { "epoch": 0.3773680191663603, "grad_norm": 328.9731750488281, "learning_rate": 7.887578553919256e-06, "loss": 16.3972, "step": 186810 }, { "epoch": 0.37738821979904413, "grad_norm": 484.07122802734375, "learning_rate": 7.887293576653467e-06, "loss": 19.7992, "step": 186820 }, { "epoch": 0.3774084204317279, "grad_norm": 101.78085327148438, "learning_rate": 7.887008585315358e-06, "loss": 11.8989, "step": 186830 }, { "epoch": 0.3774286210644117, "grad_norm": 602.8466796875, "learning_rate": 7.88672357990632e-06, "loss": 33.1497, "step": 186840 }, { "epoch": 0.37744882169709554, "grad_norm": 647.81005859375, "learning_rate": 7.88643856042774e-06, "loss": 19.9207, "step": 186850 }, { "epoch": 0.37746902232977936, "grad_norm": 705.634033203125, "learning_rate": 7.886153526881011e-06, "loss": 29.5627, "step": 186860 }, { "epoch": 0.3774892229624632, "grad_norm": 383.0318298339844, "learning_rate": 7.885868479267517e-06, "loss": 9.3724, "step": 186870 }, { "epoch": 0.377509423595147, "grad_norm": 65.01758575439453, "learning_rate": 7.885583417588652e-06, "loss": 20.5341, "step": 186880 }, { "epoch": 0.3775296242278308, "grad_norm": 398.67120361328125, "learning_rate": 7.885298341845803e-06, "loss": 19.8146, "step": 186890 }, { "epoch": 0.37754982486051464, "grad_norm": 483.8502197265625, "learning_rate": 7.88501325204036e-06, "loss": 21.7544, "step": 186900 }, { "epoch": 0.37757002549319846, "grad_norm": 431.016357421875, "learning_rate": 7.88472814817371e-06, "loss": 18.4124, "step": 186910 }, { "epoch": 0.3775902261258823, "grad_norm": 407.1339111328125, "learning_rate": 7.884443030247248e-06, "loss": 8.0406, "step": 186920 }, { "epoch": 0.3776104267585661, "grad_norm": 169.00169372558594, "learning_rate": 7.88415789826236e-06, "loss": 10.5729, "step": 186930 }, { "epoch": 0.3776306273912499, "grad_norm": 445.7185363769531, "learning_rate": 7.883872752220434e-06, "loss": 25.9141, "step": 186940 }, { "epoch": 0.3776508280239337, "grad_norm": 80.28812408447266, "learning_rate": 7.883587592122864e-06, "loss": 20.7481, "step": 186950 }, { "epoch": 0.3776710286566175, "grad_norm": 347.7830505371094, "learning_rate": 7.883302417971037e-06, "loss": 15.599, "step": 186960 }, { "epoch": 0.3776912292893013, "grad_norm": 65.90535736083984, "learning_rate": 7.883017229766344e-06, "loss": 14.5699, "step": 186970 }, { "epoch": 0.37771142992198514, "grad_norm": 297.8687744140625, "learning_rate": 7.882732027510174e-06, "loss": 18.6648, "step": 186980 }, { "epoch": 0.37773163055466896, "grad_norm": 289.4709167480469, "learning_rate": 7.88244681120392e-06, "loss": 14.5055, "step": 186990 }, { "epoch": 0.3777518311873528, "grad_norm": 745.4989013671875, "learning_rate": 7.882161580848966e-06, "loss": 23.7828, "step": 187000 }, { "epoch": 0.3777720318200366, "grad_norm": 388.3495178222656, "learning_rate": 7.88187633644671e-06, "loss": 23.081, "step": 187010 }, { "epoch": 0.3777922324527204, "grad_norm": 296.5321960449219, "learning_rate": 7.881591077998536e-06, "loss": 29.1688, "step": 187020 }, { "epoch": 0.37781243308540424, "grad_norm": 619.4912109375, "learning_rate": 7.881305805505836e-06, "loss": 26.354, "step": 187030 }, { "epoch": 0.37783263371808806, "grad_norm": 281.350830078125, "learning_rate": 7.881020518970003e-06, "loss": 10.417, "step": 187040 }, { "epoch": 0.3778528343507719, "grad_norm": 431.0904846191406, "learning_rate": 7.880735218392424e-06, "loss": 25.9254, "step": 187050 }, { "epoch": 0.3778730349834557, "grad_norm": 740.3833618164062, "learning_rate": 7.880449903774492e-06, "loss": 52.8058, "step": 187060 }, { "epoch": 0.3778932356161395, "grad_norm": 615.2548217773438, "learning_rate": 7.880164575117596e-06, "loss": 15.4073, "step": 187070 }, { "epoch": 0.3779134362488233, "grad_norm": 236.98509216308594, "learning_rate": 7.879879232423127e-06, "loss": 15.5054, "step": 187080 }, { "epoch": 0.3779336368815071, "grad_norm": 472.54852294921875, "learning_rate": 7.879593875692476e-06, "loss": 36.7834, "step": 187090 }, { "epoch": 0.3779538375141909, "grad_norm": 390.18182373046875, "learning_rate": 7.879308504927034e-06, "loss": 17.5821, "step": 187100 }, { "epoch": 0.37797403814687475, "grad_norm": 549.0300903320312, "learning_rate": 7.879023120128191e-06, "loss": 17.6275, "step": 187110 }, { "epoch": 0.37799423877955857, "grad_norm": 570.148681640625, "learning_rate": 7.87873772129734e-06, "loss": 19.3192, "step": 187120 }, { "epoch": 0.3780144394122424, "grad_norm": 462.4393005371094, "learning_rate": 7.878452308435868e-06, "loss": 20.0012, "step": 187130 }, { "epoch": 0.3780346400449262, "grad_norm": 876.2901611328125, "learning_rate": 7.878166881545171e-06, "loss": 19.8765, "step": 187140 }, { "epoch": 0.37805484067761, "grad_norm": 21940.1875, "learning_rate": 7.877881440626635e-06, "loss": 35.6072, "step": 187150 }, { "epoch": 0.37807504131029385, "grad_norm": 544.189697265625, "learning_rate": 7.877595985681656e-06, "loss": 25.7395, "step": 187160 }, { "epoch": 0.37809524194297767, "grad_norm": 674.6602783203125, "learning_rate": 7.877310516711623e-06, "loss": 27.2366, "step": 187170 }, { "epoch": 0.3781154425756615, "grad_norm": 373.325439453125, "learning_rate": 7.877025033717926e-06, "loss": 25.4264, "step": 187180 }, { "epoch": 0.3781356432083453, "grad_norm": 429.7815246582031, "learning_rate": 7.876739536701961e-06, "loss": 10.7018, "step": 187190 }, { "epoch": 0.3781558438410291, "grad_norm": 79.63567352294922, "learning_rate": 7.876454025665114e-06, "loss": 18.9043, "step": 187200 }, { "epoch": 0.3781760444737129, "grad_norm": 11.09257698059082, "learning_rate": 7.87616850060878e-06, "loss": 19.4218, "step": 187210 }, { "epoch": 0.3781962451063967, "grad_norm": 999.5443725585938, "learning_rate": 7.875882961534347e-06, "loss": 42.3215, "step": 187220 }, { "epoch": 0.37821644573908053, "grad_norm": 222.81358337402344, "learning_rate": 7.875597408443212e-06, "loss": 8.4141, "step": 187230 }, { "epoch": 0.37823664637176435, "grad_norm": 252.15322875976562, "learning_rate": 7.875311841336763e-06, "loss": 20.9764, "step": 187240 }, { "epoch": 0.37825684700444817, "grad_norm": 270.7761535644531, "learning_rate": 7.875026260216395e-06, "loss": 14.9763, "step": 187250 }, { "epoch": 0.378277047637132, "grad_norm": 38126.50390625, "learning_rate": 7.874740665083494e-06, "loss": 58.4065, "step": 187260 }, { "epoch": 0.3782972482698158, "grad_norm": 737.64892578125, "learning_rate": 7.874455055939458e-06, "loss": 35.3872, "step": 187270 }, { "epoch": 0.37831744890249963, "grad_norm": 72.3812026977539, "learning_rate": 7.874169432785677e-06, "loss": 24.2148, "step": 187280 }, { "epoch": 0.37833764953518345, "grad_norm": 80.65542602539062, "learning_rate": 7.87388379562354e-06, "loss": 26.749, "step": 187290 }, { "epoch": 0.37835785016786727, "grad_norm": 792.4655151367188, "learning_rate": 7.873598144454444e-06, "loss": 20.2782, "step": 187300 }, { "epoch": 0.3783780508005511, "grad_norm": 325.38720703125, "learning_rate": 7.87331247927978e-06, "loss": 13.6539, "step": 187310 }, { "epoch": 0.3783982514332349, "grad_norm": 160.98269653320312, "learning_rate": 7.873026800100937e-06, "loss": 16.7994, "step": 187320 }, { "epoch": 0.37841845206591873, "grad_norm": 297.0093994140625, "learning_rate": 7.872741106919313e-06, "loss": 24.4319, "step": 187330 }, { "epoch": 0.3784386526986025, "grad_norm": 330.3622741699219, "learning_rate": 7.872455399736295e-06, "loss": 21.5576, "step": 187340 }, { "epoch": 0.3784588533312863, "grad_norm": 475.5498046875, "learning_rate": 7.872169678553279e-06, "loss": 23.0462, "step": 187350 }, { "epoch": 0.37847905396397014, "grad_norm": 209.27825927734375, "learning_rate": 7.871883943371656e-06, "loss": 15.8601, "step": 187360 }, { "epoch": 0.37849925459665396, "grad_norm": 427.2062072753906, "learning_rate": 7.871598194192817e-06, "loss": 14.7591, "step": 187370 }, { "epoch": 0.3785194552293378, "grad_norm": 235.0536651611328, "learning_rate": 7.871312431018158e-06, "loss": 9.4029, "step": 187380 }, { "epoch": 0.3785396558620216, "grad_norm": 306.8526916503906, "learning_rate": 7.871026653849071e-06, "loss": 25.201, "step": 187390 }, { "epoch": 0.3785598564947054, "grad_norm": 409.6279602050781, "learning_rate": 7.87074086268695e-06, "loss": 28.2371, "step": 187400 }, { "epoch": 0.37858005712738924, "grad_norm": 281.5046081542969, "learning_rate": 7.870455057533184e-06, "loss": 16.8539, "step": 187410 }, { "epoch": 0.37860025776007306, "grad_norm": 1179.369140625, "learning_rate": 7.870169238389168e-06, "loss": 38.7465, "step": 187420 }, { "epoch": 0.3786204583927569, "grad_norm": 392.25579833984375, "learning_rate": 7.869883405256296e-06, "loss": 24.1973, "step": 187430 }, { "epoch": 0.3786406590254407, "grad_norm": 377.63763427734375, "learning_rate": 7.869597558135959e-06, "loss": 20.7806, "step": 187440 }, { "epoch": 0.3786608596581245, "grad_norm": 739.8941040039062, "learning_rate": 7.869311697029553e-06, "loss": 23.3404, "step": 187450 }, { "epoch": 0.37868106029080834, "grad_norm": 282.66680908203125, "learning_rate": 7.86902582193847e-06, "loss": 25.5504, "step": 187460 }, { "epoch": 0.3787012609234921, "grad_norm": 373.229736328125, "learning_rate": 7.868739932864102e-06, "loss": 31.0513, "step": 187470 }, { "epoch": 0.3787214615561759, "grad_norm": 299.4689636230469, "learning_rate": 7.868454029807843e-06, "loss": 16.7641, "step": 187480 }, { "epoch": 0.37874166218885974, "grad_norm": 183.34661865234375, "learning_rate": 7.86816811277109e-06, "loss": 12.0302, "step": 187490 }, { "epoch": 0.37876186282154356, "grad_norm": 377.3712158203125, "learning_rate": 7.86788218175523e-06, "loss": 26.0073, "step": 187500 }, { "epoch": 0.3787820634542274, "grad_norm": 522.1876831054688, "learning_rate": 7.867596236761663e-06, "loss": 28.2889, "step": 187510 }, { "epoch": 0.3788022640869112, "grad_norm": 253.38307189941406, "learning_rate": 7.867310277791778e-06, "loss": 13.8486, "step": 187520 }, { "epoch": 0.378822464719595, "grad_norm": 843.4441528320312, "learning_rate": 7.867024304846971e-06, "loss": 18.8832, "step": 187530 }, { "epoch": 0.37884266535227884, "grad_norm": 611.51171875, "learning_rate": 7.866738317928636e-06, "loss": 21.2437, "step": 187540 }, { "epoch": 0.37886286598496266, "grad_norm": 191.75840759277344, "learning_rate": 7.866452317038164e-06, "loss": 30.4904, "step": 187550 }, { "epoch": 0.3788830666176465, "grad_norm": 491.4467468261719, "learning_rate": 7.866166302176952e-06, "loss": 20.1586, "step": 187560 }, { "epoch": 0.3789032672503303, "grad_norm": 470.33642578125, "learning_rate": 7.865880273346393e-06, "loss": 20.3936, "step": 187570 }, { "epoch": 0.3789234678830141, "grad_norm": 204.30572509765625, "learning_rate": 7.865594230547882e-06, "loss": 48.1156, "step": 187580 }, { "epoch": 0.3789436685156979, "grad_norm": 312.6099548339844, "learning_rate": 7.865308173782812e-06, "loss": 33.8019, "step": 187590 }, { "epoch": 0.3789638691483817, "grad_norm": 699.95703125, "learning_rate": 7.865022103052578e-06, "loss": 31.6593, "step": 187600 }, { "epoch": 0.3789840697810655, "grad_norm": 188.8434600830078, "learning_rate": 7.864736018358571e-06, "loss": 22.8804, "step": 187610 }, { "epoch": 0.37900427041374934, "grad_norm": 341.70623779296875, "learning_rate": 7.864449919702192e-06, "loss": 27.6174, "step": 187620 }, { "epoch": 0.37902447104643316, "grad_norm": 656.0889282226562, "learning_rate": 7.864163807084831e-06, "loss": 22.6183, "step": 187630 }, { "epoch": 0.379044671679117, "grad_norm": 748.5330200195312, "learning_rate": 7.863877680507879e-06, "loss": 36.9047, "step": 187640 }, { "epoch": 0.3790648723118008, "grad_norm": 476.5628662109375, "learning_rate": 7.863591539972739e-06, "loss": 20.3583, "step": 187650 }, { "epoch": 0.3790850729444846, "grad_norm": 31.000638961791992, "learning_rate": 7.863305385480798e-06, "loss": 31.3927, "step": 187660 }, { "epoch": 0.37910527357716844, "grad_norm": 685.523193359375, "learning_rate": 7.863019217033456e-06, "loss": 22.1542, "step": 187670 }, { "epoch": 0.37912547420985226, "grad_norm": 569.0435791015625, "learning_rate": 7.862733034632105e-06, "loss": 23.0225, "step": 187680 }, { "epoch": 0.3791456748425361, "grad_norm": 2024.16259765625, "learning_rate": 7.862446838278139e-06, "loss": 64.9608, "step": 187690 }, { "epoch": 0.3791658754752199, "grad_norm": 159.88900756835938, "learning_rate": 7.862160627972956e-06, "loss": 23.9056, "step": 187700 }, { "epoch": 0.3791860761079037, "grad_norm": 371.289306640625, "learning_rate": 7.861874403717948e-06, "loss": 12.7317, "step": 187710 }, { "epoch": 0.3792062767405875, "grad_norm": 213.77850341796875, "learning_rate": 7.86158816551451e-06, "loss": 23.5957, "step": 187720 }, { "epoch": 0.3792264773732713, "grad_norm": 389.7715759277344, "learning_rate": 7.861301913364043e-06, "loss": 31.0234, "step": 187730 }, { "epoch": 0.37924667800595513, "grad_norm": 251.98199462890625, "learning_rate": 7.861015647267934e-06, "loss": 11.2755, "step": 187740 }, { "epoch": 0.37926687863863895, "grad_norm": 139.1150360107422, "learning_rate": 7.860729367227582e-06, "loss": 23.1484, "step": 187750 }, { "epoch": 0.37928707927132277, "grad_norm": 426.55010986328125, "learning_rate": 7.860443073244383e-06, "loss": 20.0323, "step": 187760 }, { "epoch": 0.3793072799040066, "grad_norm": 647.5835571289062, "learning_rate": 7.86015676531973e-06, "loss": 20.186, "step": 187770 }, { "epoch": 0.3793274805366904, "grad_norm": 164.36106872558594, "learning_rate": 7.859870443455021e-06, "loss": 27.9901, "step": 187780 }, { "epoch": 0.37934768116937423, "grad_norm": 460.61383056640625, "learning_rate": 7.85958410765165e-06, "loss": 16.9639, "step": 187790 }, { "epoch": 0.37936788180205805, "grad_norm": 141.99009704589844, "learning_rate": 7.859297757911013e-06, "loss": 13.2798, "step": 187800 }, { "epoch": 0.37938808243474187, "grad_norm": 229.5868377685547, "learning_rate": 7.859011394234506e-06, "loss": 42.6979, "step": 187810 }, { "epoch": 0.3794082830674257, "grad_norm": 331.86865234375, "learning_rate": 7.858725016623523e-06, "loss": 9.1058, "step": 187820 }, { "epoch": 0.3794284837001095, "grad_norm": 1173.534423828125, "learning_rate": 7.85843862507946e-06, "loss": 15.9746, "step": 187830 }, { "epoch": 0.37944868433279333, "grad_norm": 498.7364196777344, "learning_rate": 7.858152219603718e-06, "loss": 21.3509, "step": 187840 }, { "epoch": 0.3794688849654771, "grad_norm": 258.2376403808594, "learning_rate": 7.857865800197684e-06, "loss": 20.0005, "step": 187850 }, { "epoch": 0.3794890855981609, "grad_norm": 440.5531005859375, "learning_rate": 7.857579366862761e-06, "loss": 17.516, "step": 187860 }, { "epoch": 0.37950928623084473, "grad_norm": 453.6659240722656, "learning_rate": 7.857292919600343e-06, "loss": 18.621, "step": 187870 }, { "epoch": 0.37952948686352855, "grad_norm": 253.365234375, "learning_rate": 7.857006458411826e-06, "loss": 22.758, "step": 187880 }, { "epoch": 0.3795496874962124, "grad_norm": 468.333251953125, "learning_rate": 7.856719983298606e-06, "loss": 32.0136, "step": 187890 }, { "epoch": 0.3795698881288962, "grad_norm": 334.6289367675781, "learning_rate": 7.856433494262078e-06, "loss": 15.2881, "step": 187900 }, { "epoch": 0.37959008876158, "grad_norm": 213.6356658935547, "learning_rate": 7.856146991303641e-06, "loss": 24.8394, "step": 187910 }, { "epoch": 0.37961028939426383, "grad_norm": 389.71392822265625, "learning_rate": 7.85586047442469e-06, "loss": 27.6617, "step": 187920 }, { "epoch": 0.37963049002694765, "grad_norm": 776.3466796875, "learning_rate": 7.85557394362662e-06, "loss": 25.2853, "step": 187930 }, { "epoch": 0.3796506906596315, "grad_norm": 360.78558349609375, "learning_rate": 7.85528739891083e-06, "loss": 15.2057, "step": 187940 }, { "epoch": 0.3796708912923153, "grad_norm": 553.1041259765625, "learning_rate": 7.855000840278715e-06, "loss": 15.2743, "step": 187950 }, { "epoch": 0.3796910919249991, "grad_norm": 150.0745849609375, "learning_rate": 7.854714267731673e-06, "loss": 14.223, "step": 187960 }, { "epoch": 0.37971129255768293, "grad_norm": 296.0186462402344, "learning_rate": 7.8544276812711e-06, "loss": 33.1519, "step": 187970 }, { "epoch": 0.3797314931903667, "grad_norm": 34.12297439575195, "learning_rate": 7.85414108089839e-06, "loss": 12.3409, "step": 187980 }, { "epoch": 0.3797516938230505, "grad_norm": 495.8981018066406, "learning_rate": 7.853854466614945e-06, "loss": 26.7397, "step": 187990 }, { "epoch": 0.37977189445573434, "grad_norm": 47.39965057373047, "learning_rate": 7.85356783842216e-06, "loss": 25.0049, "step": 188000 }, { "epoch": 0.37979209508841816, "grad_norm": 214.90065002441406, "learning_rate": 7.85328119632143e-06, "loss": 10.1669, "step": 188010 }, { "epoch": 0.379812295721102, "grad_norm": 449.6014099121094, "learning_rate": 7.852994540314154e-06, "loss": 18.693, "step": 188020 }, { "epoch": 0.3798324963537858, "grad_norm": 692.6038208007812, "learning_rate": 7.852707870401728e-06, "loss": 18.3583, "step": 188030 }, { "epoch": 0.3798526969864696, "grad_norm": 385.4586486816406, "learning_rate": 7.85242118658555e-06, "loss": 14.5829, "step": 188040 }, { "epoch": 0.37987289761915344, "grad_norm": 284.35723876953125, "learning_rate": 7.852134488867017e-06, "loss": 6.5827, "step": 188050 }, { "epoch": 0.37989309825183726, "grad_norm": 339.4539794921875, "learning_rate": 7.851847777247528e-06, "loss": 23.9021, "step": 188060 }, { "epoch": 0.3799132988845211, "grad_norm": 331.1771545410156, "learning_rate": 7.851561051728478e-06, "loss": 24.8811, "step": 188070 }, { "epoch": 0.3799334995172049, "grad_norm": 773.5575561523438, "learning_rate": 7.851274312311266e-06, "loss": 28.8373, "step": 188080 }, { "epoch": 0.3799537001498887, "grad_norm": 521.5496826171875, "learning_rate": 7.850987558997287e-06, "loss": 30.2774, "step": 188090 }, { "epoch": 0.37997390078257254, "grad_norm": 436.01220703125, "learning_rate": 7.850700791787941e-06, "loss": 27.6853, "step": 188100 }, { "epoch": 0.3799941014152563, "grad_norm": 329.2882385253906, "learning_rate": 7.850414010684626e-06, "loss": 12.8973, "step": 188110 }, { "epoch": 0.3800143020479401, "grad_norm": 623.2317504882812, "learning_rate": 7.85012721568874e-06, "loss": 25.3543, "step": 188120 }, { "epoch": 0.38003450268062394, "grad_norm": 219.14224243164062, "learning_rate": 7.849840406801676e-06, "loss": 22.3661, "step": 188130 }, { "epoch": 0.38005470331330776, "grad_norm": 538.2972412109375, "learning_rate": 7.849553584024836e-06, "loss": 26.4047, "step": 188140 }, { "epoch": 0.3800749039459916, "grad_norm": 560.73388671875, "learning_rate": 7.849266747359619e-06, "loss": 35.3035, "step": 188150 }, { "epoch": 0.3800951045786754, "grad_norm": 171.05557250976562, "learning_rate": 7.848979896807422e-06, "loss": 10.2947, "step": 188160 }, { "epoch": 0.3801153052113592, "grad_norm": 250.48715209960938, "learning_rate": 7.848693032369641e-06, "loss": 11.1912, "step": 188170 }, { "epoch": 0.38013550584404304, "grad_norm": 7.347401142120361, "learning_rate": 7.848406154047677e-06, "loss": 7.639, "step": 188180 }, { "epoch": 0.38015570647672686, "grad_norm": 639.7820434570312, "learning_rate": 7.848119261842926e-06, "loss": 21.6782, "step": 188190 }, { "epoch": 0.3801759071094107, "grad_norm": 395.41497802734375, "learning_rate": 7.847832355756788e-06, "loss": 16.0722, "step": 188200 }, { "epoch": 0.3801961077420945, "grad_norm": 200.6482696533203, "learning_rate": 7.84754543579066e-06, "loss": 20.1189, "step": 188210 }, { "epoch": 0.3802163083747783, "grad_norm": 895.510986328125, "learning_rate": 7.84725850194594e-06, "loss": 27.9294, "step": 188220 }, { "epoch": 0.3802365090074621, "grad_norm": 23.38909149169922, "learning_rate": 7.84697155422403e-06, "loss": 14.721, "step": 188230 }, { "epoch": 0.3802567096401459, "grad_norm": 57.09502029418945, "learning_rate": 7.846684592626324e-06, "loss": 17.3742, "step": 188240 }, { "epoch": 0.3802769102728297, "grad_norm": 953.9886474609375, "learning_rate": 7.846397617154223e-06, "loss": 50.8312, "step": 188250 }, { "epoch": 0.38029711090551355, "grad_norm": 270.4121398925781, "learning_rate": 7.846110627809123e-06, "loss": 22.7044, "step": 188260 }, { "epoch": 0.38031731153819737, "grad_norm": 826.6783447265625, "learning_rate": 7.845823624592427e-06, "loss": 24.6503, "step": 188270 }, { "epoch": 0.3803375121708812, "grad_norm": 792.712890625, "learning_rate": 7.845536607505533e-06, "loss": 23.5779, "step": 188280 }, { "epoch": 0.380357712803565, "grad_norm": 543.3534545898438, "learning_rate": 7.845249576549836e-06, "loss": 17.7022, "step": 188290 }, { "epoch": 0.3803779134362488, "grad_norm": 524.1748657226562, "learning_rate": 7.844962531726742e-06, "loss": 15.8757, "step": 188300 }, { "epoch": 0.38039811406893265, "grad_norm": 361.21148681640625, "learning_rate": 7.844675473037641e-06, "loss": 9.4886, "step": 188310 }, { "epoch": 0.38041831470161647, "grad_norm": 777.5284423828125, "learning_rate": 7.844388400483938e-06, "loss": 27.2019, "step": 188320 }, { "epoch": 0.3804385153343003, "grad_norm": 198.74984741210938, "learning_rate": 7.844101314067031e-06, "loss": 13.2818, "step": 188330 }, { "epoch": 0.3804587159669841, "grad_norm": 793.528564453125, "learning_rate": 7.843814213788322e-06, "loss": 21.0352, "step": 188340 }, { "epoch": 0.3804789165996679, "grad_norm": 568.158935546875, "learning_rate": 7.843527099649204e-06, "loss": 37.7767, "step": 188350 }, { "epoch": 0.3804991172323517, "grad_norm": 539.360595703125, "learning_rate": 7.84323997165108e-06, "loss": 21.5503, "step": 188360 }, { "epoch": 0.3805193178650355, "grad_norm": 513.9918823242188, "learning_rate": 7.842952829795352e-06, "loss": 32.8909, "step": 188370 }, { "epoch": 0.38053951849771933, "grad_norm": 437.0247497558594, "learning_rate": 7.842665674083413e-06, "loss": 26.4054, "step": 188380 }, { "epoch": 0.38055971913040315, "grad_norm": 344.40313720703125, "learning_rate": 7.842378504516669e-06, "loss": 15.8336, "step": 188390 }, { "epoch": 0.38057991976308697, "grad_norm": 185.64108276367188, "learning_rate": 7.842091321096515e-06, "loss": 13.0182, "step": 188400 }, { "epoch": 0.3806001203957708, "grad_norm": 197.11865234375, "learning_rate": 7.841804123824354e-06, "loss": 18.4705, "step": 188410 }, { "epoch": 0.3806203210284546, "grad_norm": 168.9591522216797, "learning_rate": 7.841516912701585e-06, "loss": 33.6644, "step": 188420 }, { "epoch": 0.38064052166113843, "grad_norm": 539.0736083984375, "learning_rate": 7.841229687729606e-06, "loss": 20.0488, "step": 188430 }, { "epoch": 0.38066072229382225, "grad_norm": 7.903078556060791, "learning_rate": 7.840942448909818e-06, "loss": 22.0559, "step": 188440 }, { "epoch": 0.38068092292650607, "grad_norm": 370.5745849609375, "learning_rate": 7.84065519624362e-06, "loss": 18.5808, "step": 188450 }, { "epoch": 0.3807011235591899, "grad_norm": 0.0, "learning_rate": 7.840367929732415e-06, "loss": 9.9115, "step": 188460 }, { "epoch": 0.3807213241918737, "grad_norm": 14.133622169494629, "learning_rate": 7.840080649377602e-06, "loss": 19.3093, "step": 188470 }, { "epoch": 0.38074152482455753, "grad_norm": 271.5661315917969, "learning_rate": 7.839793355180578e-06, "loss": 22.0619, "step": 188480 }, { "epoch": 0.3807617254572413, "grad_norm": 927.3042602539062, "learning_rate": 7.839506047142747e-06, "loss": 24.626, "step": 188490 }, { "epoch": 0.3807819260899251, "grad_norm": 314.3900451660156, "learning_rate": 7.839218725265507e-06, "loss": 51.4573, "step": 188500 }, { "epoch": 0.38080212672260894, "grad_norm": 221.56214904785156, "learning_rate": 7.83893138955026e-06, "loss": 13.1621, "step": 188510 }, { "epoch": 0.38082232735529276, "grad_norm": 651.3807983398438, "learning_rate": 7.838644039998405e-06, "loss": 17.0011, "step": 188520 }, { "epoch": 0.3808425279879766, "grad_norm": 400.63250732421875, "learning_rate": 7.838356676611345e-06, "loss": 22.624, "step": 188530 }, { "epoch": 0.3808627286206604, "grad_norm": 308.29071044921875, "learning_rate": 7.838069299390476e-06, "loss": 13.878, "step": 188540 }, { "epoch": 0.3808829292533442, "grad_norm": 62.3780517578125, "learning_rate": 7.837781908337204e-06, "loss": 11.383, "step": 188550 }, { "epoch": 0.38090312988602804, "grad_norm": 837.8966064453125, "learning_rate": 7.837494503452925e-06, "loss": 25.1983, "step": 188560 }, { "epoch": 0.38092333051871186, "grad_norm": 241.6901397705078, "learning_rate": 7.837207084739044e-06, "loss": 30.5917, "step": 188570 }, { "epoch": 0.3809435311513957, "grad_norm": 537.393798828125, "learning_rate": 7.83691965219696e-06, "loss": 14.8654, "step": 188580 }, { "epoch": 0.3809637317840795, "grad_norm": 0.5184242725372314, "learning_rate": 7.836632205828072e-06, "loss": 23.3382, "step": 188590 }, { "epoch": 0.3809839324167633, "grad_norm": 377.3197937011719, "learning_rate": 7.836344745633785e-06, "loss": 26.2711, "step": 188600 }, { "epoch": 0.38100413304944714, "grad_norm": 266.58966064453125, "learning_rate": 7.836057271615496e-06, "loss": 16.5561, "step": 188610 }, { "epoch": 0.3810243336821309, "grad_norm": 604.7203369140625, "learning_rate": 7.835769783774606e-06, "loss": 29.1615, "step": 188620 }, { "epoch": 0.3810445343148147, "grad_norm": 451.97906494140625, "learning_rate": 7.83548228211252e-06, "loss": 10.576, "step": 188630 }, { "epoch": 0.38106473494749854, "grad_norm": 465.3996276855469, "learning_rate": 7.835194766630638e-06, "loss": 22.2135, "step": 188640 }, { "epoch": 0.38108493558018236, "grad_norm": 342.02655029296875, "learning_rate": 7.834907237330359e-06, "loss": 13.0836, "step": 188650 }, { "epoch": 0.3811051362128662, "grad_norm": 619.200927734375, "learning_rate": 7.834619694213087e-06, "loss": 17.6876, "step": 188660 }, { "epoch": 0.38112533684555, "grad_norm": 657.8128662109375, "learning_rate": 7.83433213728022e-06, "loss": 18.6648, "step": 188670 }, { "epoch": 0.3811455374782338, "grad_norm": 333.6239318847656, "learning_rate": 7.834044566533166e-06, "loss": 26.969, "step": 188680 }, { "epoch": 0.38116573811091764, "grad_norm": 335.3335876464844, "learning_rate": 7.833756981973321e-06, "loss": 46.9242, "step": 188690 }, { "epoch": 0.38118593874360146, "grad_norm": 618.9785766601562, "learning_rate": 7.833469383602086e-06, "loss": 20.7087, "step": 188700 }, { "epoch": 0.3812061393762853, "grad_norm": 325.0849304199219, "learning_rate": 7.833181771420869e-06, "loss": 34.5737, "step": 188710 }, { "epoch": 0.3812263400089691, "grad_norm": 586.0824584960938, "learning_rate": 7.832894145431062e-06, "loss": 14.6962, "step": 188720 }, { "epoch": 0.3812465406416529, "grad_norm": 134.80641174316406, "learning_rate": 7.832606505634077e-06, "loss": 14.8707, "step": 188730 }, { "epoch": 0.3812667412743367, "grad_norm": 677.0626831054688, "learning_rate": 7.832318852031311e-06, "loss": 39.5213, "step": 188740 }, { "epoch": 0.3812869419070205, "grad_norm": 378.0164794921875, "learning_rate": 7.832031184624165e-06, "loss": 11.0117, "step": 188750 }, { "epoch": 0.3813071425397043, "grad_norm": 555.2911987304688, "learning_rate": 7.831743503414043e-06, "loss": 24.0668, "step": 188760 }, { "epoch": 0.38132734317238814, "grad_norm": 252.79132080078125, "learning_rate": 7.831455808402348e-06, "loss": 18.3286, "step": 188770 }, { "epoch": 0.38134754380507196, "grad_norm": 151.18746948242188, "learning_rate": 7.831168099590478e-06, "loss": 12.5021, "step": 188780 }, { "epoch": 0.3813677444377558, "grad_norm": 309.6161193847656, "learning_rate": 7.83088037697984e-06, "loss": 17.4055, "step": 188790 }, { "epoch": 0.3813879450704396, "grad_norm": 1953.6256103515625, "learning_rate": 7.830592640571833e-06, "loss": 22.8445, "step": 188800 }, { "epoch": 0.3814081457031234, "grad_norm": 371.9723205566406, "learning_rate": 7.830304890367862e-06, "loss": 12.5413, "step": 188810 }, { "epoch": 0.38142834633580724, "grad_norm": 329.8029479980469, "learning_rate": 7.83001712636933e-06, "loss": 25.8127, "step": 188820 }, { "epoch": 0.38144854696849106, "grad_norm": 619.4336547851562, "learning_rate": 7.829729348577636e-06, "loss": 27.3079, "step": 188830 }, { "epoch": 0.3814687476011749, "grad_norm": 45.39575958251953, "learning_rate": 7.829441556994182e-06, "loss": 16.6073, "step": 188840 }, { "epoch": 0.3814889482338587, "grad_norm": 319.5869140625, "learning_rate": 7.829153751620375e-06, "loss": 11.1007, "step": 188850 }, { "epoch": 0.3815091488665425, "grad_norm": 255.23594665527344, "learning_rate": 7.828865932457617e-06, "loss": 34.4464, "step": 188860 }, { "epoch": 0.3815293494992263, "grad_norm": 678.3028564453125, "learning_rate": 7.828578099507308e-06, "loss": 19.8113, "step": 188870 }, { "epoch": 0.3815495501319101, "grad_norm": 205.38990783691406, "learning_rate": 7.828290252770852e-06, "loss": 25.729, "step": 188880 }, { "epoch": 0.38156975076459393, "grad_norm": 359.9259338378906, "learning_rate": 7.828002392249654e-06, "loss": 19.4046, "step": 188890 }, { "epoch": 0.38158995139727775, "grad_norm": 429.4687805175781, "learning_rate": 7.827714517945116e-06, "loss": 20.4328, "step": 188900 }, { "epoch": 0.38161015202996157, "grad_norm": 564.55322265625, "learning_rate": 7.827426629858636e-06, "loss": 9.6021, "step": 188910 }, { "epoch": 0.3816303526626454, "grad_norm": 803.4877319335938, "learning_rate": 7.827138727991625e-06, "loss": 20.2551, "step": 188920 }, { "epoch": 0.3816505532953292, "grad_norm": 921.0742797851562, "learning_rate": 7.826850812345484e-06, "loss": 18.397, "step": 188930 }, { "epoch": 0.38167075392801303, "grad_norm": 560.7493896484375, "learning_rate": 7.826562882921613e-06, "loss": 40.604, "step": 188940 }, { "epoch": 0.38169095456069685, "grad_norm": 286.7036437988281, "learning_rate": 7.826274939721417e-06, "loss": 16.4252, "step": 188950 }, { "epoch": 0.38171115519338067, "grad_norm": 304.8981628417969, "learning_rate": 7.8259869827463e-06, "loss": 24.3084, "step": 188960 }, { "epoch": 0.3817313558260645, "grad_norm": 233.96810913085938, "learning_rate": 7.825699011997665e-06, "loss": 19.5671, "step": 188970 }, { "epoch": 0.3817515564587483, "grad_norm": 263.20111083984375, "learning_rate": 7.825411027476917e-06, "loss": 19.3988, "step": 188980 }, { "epoch": 0.38177175709143213, "grad_norm": 293.4358825683594, "learning_rate": 7.825123029185457e-06, "loss": 29.9489, "step": 188990 }, { "epoch": 0.3817919577241159, "grad_norm": 225.1694793701172, "learning_rate": 7.82483501712469e-06, "loss": 18.0091, "step": 189000 }, { "epoch": 0.3818121583567997, "grad_norm": 581.0350952148438, "learning_rate": 7.824546991296021e-06, "loss": 26.3481, "step": 189010 }, { "epoch": 0.38183235898948353, "grad_norm": 126.13812255859375, "learning_rate": 7.824258951700852e-06, "loss": 15.3107, "step": 189020 }, { "epoch": 0.38185255962216735, "grad_norm": 272.8997497558594, "learning_rate": 7.823970898340587e-06, "loss": 24.682, "step": 189030 }, { "epoch": 0.3818727602548512, "grad_norm": 316.7326354980469, "learning_rate": 7.82368283121663e-06, "loss": 24.832, "step": 189040 }, { "epoch": 0.381892960887535, "grad_norm": 269.50006103515625, "learning_rate": 7.823394750330386e-06, "loss": 10.9026, "step": 189050 }, { "epoch": 0.3819131615202188, "grad_norm": 538.9169921875, "learning_rate": 7.823106655683259e-06, "loss": 18.1382, "step": 189060 }, { "epoch": 0.38193336215290263, "grad_norm": 234.98548889160156, "learning_rate": 7.822818547276652e-06, "loss": 12.2784, "step": 189070 }, { "epoch": 0.38195356278558645, "grad_norm": 485.49481201171875, "learning_rate": 7.822530425111969e-06, "loss": 8.7811, "step": 189080 }, { "epoch": 0.3819737634182703, "grad_norm": 375.4350891113281, "learning_rate": 7.822242289190615e-06, "loss": 20.5474, "step": 189090 }, { "epoch": 0.3819939640509541, "grad_norm": 387.18035888671875, "learning_rate": 7.821954139513997e-06, "loss": 20.0211, "step": 189100 }, { "epoch": 0.3820141646836379, "grad_norm": 20.593969345092773, "learning_rate": 7.821665976083515e-06, "loss": 19.7231, "step": 189110 }, { "epoch": 0.38203436531632173, "grad_norm": 537.4613647460938, "learning_rate": 7.821377798900574e-06, "loss": 14.9787, "step": 189120 }, { "epoch": 0.3820545659490055, "grad_norm": 420.259521484375, "learning_rate": 7.82108960796658e-06, "loss": 36.6597, "step": 189130 }, { "epoch": 0.3820747665816893, "grad_norm": 981.8267211914062, "learning_rate": 7.82080140328294e-06, "loss": 20.7919, "step": 189140 }, { "epoch": 0.38209496721437314, "grad_norm": 56.50301742553711, "learning_rate": 7.820513184851052e-06, "loss": 35.5867, "step": 189150 }, { "epoch": 0.38211516784705696, "grad_norm": 23.353208541870117, "learning_rate": 7.820224952672329e-06, "loss": 19.9272, "step": 189160 }, { "epoch": 0.3821353684797408, "grad_norm": 14.043118476867676, "learning_rate": 7.819936706748168e-06, "loss": 28.0067, "step": 189170 }, { "epoch": 0.3821555691124246, "grad_norm": 138.26553344726562, "learning_rate": 7.81964844707998e-06, "loss": 28.2015, "step": 189180 }, { "epoch": 0.3821757697451084, "grad_norm": 266.5179748535156, "learning_rate": 7.819360173669168e-06, "loss": 19.5376, "step": 189190 }, { "epoch": 0.38219597037779224, "grad_norm": 461.5394287109375, "learning_rate": 7.819071886517134e-06, "loss": 16.4602, "step": 189200 }, { "epoch": 0.38221617101047606, "grad_norm": 522.180908203125, "learning_rate": 7.818783585625287e-06, "loss": 40.6513, "step": 189210 }, { "epoch": 0.3822363716431599, "grad_norm": 253.56768798828125, "learning_rate": 7.818495270995031e-06, "loss": 17.4649, "step": 189220 }, { "epoch": 0.3822565722758437, "grad_norm": 177.39678955078125, "learning_rate": 7.81820694262777e-06, "loss": 25.4444, "step": 189230 }, { "epoch": 0.3822767729085275, "grad_norm": 196.9581298828125, "learning_rate": 7.81791860052491e-06, "loss": 11.0922, "step": 189240 }, { "epoch": 0.38229697354121134, "grad_norm": 421.8256530761719, "learning_rate": 7.817630244687857e-06, "loss": 22.3353, "step": 189250 }, { "epoch": 0.3823171741738951, "grad_norm": 413.8868103027344, "learning_rate": 7.817341875118016e-06, "loss": 22.3017, "step": 189260 }, { "epoch": 0.3823373748065789, "grad_norm": 295.10400390625, "learning_rate": 7.817053491816794e-06, "loss": 15.9826, "step": 189270 }, { "epoch": 0.38235757543926274, "grad_norm": 304.6611022949219, "learning_rate": 7.816765094785593e-06, "loss": 13.6321, "step": 189280 }, { "epoch": 0.38237777607194656, "grad_norm": 149.24656677246094, "learning_rate": 7.81647668402582e-06, "loss": 15.778, "step": 189290 }, { "epoch": 0.3823979767046304, "grad_norm": 195.9346160888672, "learning_rate": 7.816188259538885e-06, "loss": 18.8319, "step": 189300 }, { "epoch": 0.3824181773373142, "grad_norm": 85.01852416992188, "learning_rate": 7.815899821326185e-06, "loss": 33.3886, "step": 189310 }, { "epoch": 0.382438377969998, "grad_norm": 1904.7947998046875, "learning_rate": 7.815611369389134e-06, "loss": 20.0872, "step": 189320 }, { "epoch": 0.38245857860268184, "grad_norm": 350.1895446777344, "learning_rate": 7.815322903729133e-06, "loss": 16.7494, "step": 189330 }, { "epoch": 0.38247877923536566, "grad_norm": 396.7063293457031, "learning_rate": 7.81503442434759e-06, "loss": 13.4726, "step": 189340 }, { "epoch": 0.3824989798680495, "grad_norm": 592.1580200195312, "learning_rate": 7.814745931245911e-06, "loss": 31.8094, "step": 189350 }, { "epoch": 0.3825191805007333, "grad_norm": 358.4219665527344, "learning_rate": 7.814457424425501e-06, "loss": 22.111, "step": 189360 }, { "epoch": 0.3825393811334171, "grad_norm": 7.479894161224365, "learning_rate": 7.814168903887768e-06, "loss": 16.0946, "step": 189370 }, { "epoch": 0.3825595817661009, "grad_norm": 439.0794982910156, "learning_rate": 7.813880369634114e-06, "loss": 18.5301, "step": 189380 }, { "epoch": 0.3825797823987847, "grad_norm": 388.0850524902344, "learning_rate": 7.813591821665953e-06, "loss": 23.1441, "step": 189390 }, { "epoch": 0.3825999830314685, "grad_norm": 117.37612915039062, "learning_rate": 7.813303259984685e-06, "loss": 12.417, "step": 189400 }, { "epoch": 0.38262018366415235, "grad_norm": 505.7078857421875, "learning_rate": 7.813014684591718e-06, "loss": 67.48, "step": 189410 }, { "epoch": 0.38264038429683617, "grad_norm": 416.0635986328125, "learning_rate": 7.812726095488457e-06, "loss": 41.8872, "step": 189420 }, { "epoch": 0.38266058492952, "grad_norm": 606.1459350585938, "learning_rate": 7.812437492676312e-06, "loss": 19.2614, "step": 189430 }, { "epoch": 0.3826807855622038, "grad_norm": 346.3567199707031, "learning_rate": 7.812148876156687e-06, "loss": 17.6643, "step": 189440 }, { "epoch": 0.3827009861948876, "grad_norm": 43.39950942993164, "learning_rate": 7.81186024593099e-06, "loss": 34.3869, "step": 189450 }, { "epoch": 0.38272118682757145, "grad_norm": 2429.51220703125, "learning_rate": 7.811571602000628e-06, "loss": 42.3883, "step": 189460 }, { "epoch": 0.38274138746025527, "grad_norm": 26.305356979370117, "learning_rate": 7.811282944367004e-06, "loss": 14.7009, "step": 189470 }, { "epoch": 0.3827615880929391, "grad_norm": 343.4811706542969, "learning_rate": 7.810994273031532e-06, "loss": 22.4447, "step": 189480 }, { "epoch": 0.3827817887256229, "grad_norm": 330.87255859375, "learning_rate": 7.81070558799561e-06, "loss": 6.6134, "step": 189490 }, { "epoch": 0.3828019893583067, "grad_norm": 556.8169555664062, "learning_rate": 7.810416889260653e-06, "loss": 27.3821, "step": 189500 }, { "epoch": 0.3828221899909905, "grad_norm": 748.4137573242188, "learning_rate": 7.810128176828065e-06, "loss": 26.3715, "step": 189510 }, { "epoch": 0.3828423906236743, "grad_norm": 293.07574462890625, "learning_rate": 7.809839450699253e-06, "loss": 16.7798, "step": 189520 }, { "epoch": 0.38286259125635813, "grad_norm": 874.266357421875, "learning_rate": 7.809550710875624e-06, "loss": 16.807, "step": 189530 }, { "epoch": 0.38288279188904195, "grad_norm": 379.8854675292969, "learning_rate": 7.809261957358585e-06, "loss": 20.7475, "step": 189540 }, { "epoch": 0.38290299252172577, "grad_norm": 809.4370727539062, "learning_rate": 7.808973190149544e-06, "loss": 23.3438, "step": 189550 }, { "epoch": 0.3829231931544096, "grad_norm": 867.1205444335938, "learning_rate": 7.80868440924991e-06, "loss": 21.947, "step": 189560 }, { "epoch": 0.3829433937870934, "grad_norm": 23.575105667114258, "learning_rate": 7.808395614661086e-06, "loss": 4.3154, "step": 189570 }, { "epoch": 0.38296359441977723, "grad_norm": 314.0118103027344, "learning_rate": 7.808106806384484e-06, "loss": 22.6902, "step": 189580 }, { "epoch": 0.38298379505246105, "grad_norm": 475.39581298828125, "learning_rate": 7.80781798442151e-06, "loss": 31.9625, "step": 189590 }, { "epoch": 0.38300399568514487, "grad_norm": 474.6445617675781, "learning_rate": 7.807529148773572e-06, "loss": 20.4564, "step": 189600 }, { "epoch": 0.3830241963178287, "grad_norm": 422.025390625, "learning_rate": 7.807240299442078e-06, "loss": 16.3724, "step": 189610 }, { "epoch": 0.3830443969505125, "grad_norm": 716.3912963867188, "learning_rate": 7.806951436428433e-06, "loss": 21.4261, "step": 189620 }, { "epoch": 0.38306459758319633, "grad_norm": 161.43482971191406, "learning_rate": 7.80666255973405e-06, "loss": 34.0578, "step": 189630 }, { "epoch": 0.3830847982158801, "grad_norm": 570.9780883789062, "learning_rate": 7.806373669360332e-06, "loss": 25.1385, "step": 189640 }, { "epoch": 0.3831049988485639, "grad_norm": 509.7627868652344, "learning_rate": 7.80608476530869e-06, "loss": 14.3529, "step": 189650 }, { "epoch": 0.38312519948124774, "grad_norm": 294.26800537109375, "learning_rate": 7.80579584758053e-06, "loss": 27.4212, "step": 189660 }, { "epoch": 0.38314540011393156, "grad_norm": 307.59808349609375, "learning_rate": 7.805506916177263e-06, "loss": 8.6388, "step": 189670 }, { "epoch": 0.3831656007466154, "grad_norm": 793.291259765625, "learning_rate": 7.805217971100295e-06, "loss": 36.2088, "step": 189680 }, { "epoch": 0.3831858013792992, "grad_norm": 297.2881164550781, "learning_rate": 7.804929012351034e-06, "loss": 12.2604, "step": 189690 }, { "epoch": 0.383206002011983, "grad_norm": 621.73876953125, "learning_rate": 7.80464003993089e-06, "loss": 21.661, "step": 189700 }, { "epoch": 0.38322620264466684, "grad_norm": 266.7144775390625, "learning_rate": 7.80435105384127e-06, "loss": 14.7892, "step": 189710 }, { "epoch": 0.38324640327735066, "grad_norm": 389.62469482421875, "learning_rate": 7.804062054083585e-06, "loss": 15.0259, "step": 189720 }, { "epoch": 0.3832666039100345, "grad_norm": 1625.658935546875, "learning_rate": 7.803773040659239e-06, "loss": 29.3019, "step": 189730 }, { "epoch": 0.3832868045427183, "grad_norm": 314.7716064453125, "learning_rate": 7.803484013569644e-06, "loss": 22.3522, "step": 189740 }, { "epoch": 0.3833070051754021, "grad_norm": 456.04412841796875, "learning_rate": 7.80319497281621e-06, "loss": 34.2024, "step": 189750 }, { "epoch": 0.38332720580808594, "grad_norm": 0.025324681773781776, "learning_rate": 7.802905918400342e-06, "loss": 18.5097, "step": 189760 }, { "epoch": 0.3833474064407697, "grad_norm": 363.8247375488281, "learning_rate": 7.80261685032345e-06, "loss": 18.735, "step": 189770 }, { "epoch": 0.3833676070734535, "grad_norm": 132.87318420410156, "learning_rate": 7.802327768586944e-06, "loss": 10.1872, "step": 189780 }, { "epoch": 0.38338780770613734, "grad_norm": 319.84515380859375, "learning_rate": 7.802038673192233e-06, "loss": 13.8941, "step": 189790 }, { "epoch": 0.38340800833882116, "grad_norm": 166.3119659423828, "learning_rate": 7.801749564140724e-06, "loss": 38.4708, "step": 189800 }, { "epoch": 0.383428208971505, "grad_norm": 551.60205078125, "learning_rate": 7.801460441433828e-06, "loss": 11.4967, "step": 189810 }, { "epoch": 0.3834484096041888, "grad_norm": 486.2964782714844, "learning_rate": 7.801171305072954e-06, "loss": 54.4579, "step": 189820 }, { "epoch": 0.3834686102368726, "grad_norm": 522.7513427734375, "learning_rate": 7.80088215505951e-06, "loss": 29.501, "step": 189830 }, { "epoch": 0.38348881086955644, "grad_norm": 342.9999694824219, "learning_rate": 7.800592991394906e-06, "loss": 20.4978, "step": 189840 }, { "epoch": 0.38350901150224026, "grad_norm": 182.85626220703125, "learning_rate": 7.800303814080552e-06, "loss": 15.3206, "step": 189850 }, { "epoch": 0.3835292121349241, "grad_norm": 917.25830078125, "learning_rate": 7.800014623117858e-06, "loss": 39.3917, "step": 189860 }, { "epoch": 0.3835494127676079, "grad_norm": 296.2469177246094, "learning_rate": 7.799725418508231e-06, "loss": 42.9546, "step": 189870 }, { "epoch": 0.3835696134002917, "grad_norm": 525.8936767578125, "learning_rate": 7.799436200253082e-06, "loss": 17.0192, "step": 189880 }, { "epoch": 0.38358981403297554, "grad_norm": 544.18359375, "learning_rate": 7.79914696835382e-06, "loss": 27.5664, "step": 189890 }, { "epoch": 0.3836100146656593, "grad_norm": 309.77520751953125, "learning_rate": 7.798857722811857e-06, "loss": 33.1132, "step": 189900 }, { "epoch": 0.3836302152983431, "grad_norm": 788.8636474609375, "learning_rate": 7.798568463628597e-06, "loss": 16.2416, "step": 189910 }, { "epoch": 0.38365041593102694, "grad_norm": 363.8883972167969, "learning_rate": 7.798279190805458e-06, "loss": 16.0867, "step": 189920 }, { "epoch": 0.38367061656371076, "grad_norm": 475.0231018066406, "learning_rate": 7.797989904343844e-06, "loss": 21.7322, "step": 189930 }, { "epoch": 0.3836908171963946, "grad_norm": 297.6060485839844, "learning_rate": 7.797700604245166e-06, "loss": 21.8392, "step": 189940 }, { "epoch": 0.3837110178290784, "grad_norm": 308.3179016113281, "learning_rate": 7.797411290510836e-06, "loss": 24.0743, "step": 189950 }, { "epoch": 0.3837312184617622, "grad_norm": 385.8530578613281, "learning_rate": 7.797121963142263e-06, "loss": 16.2976, "step": 189960 }, { "epoch": 0.38375141909444604, "grad_norm": 850.6321411132812, "learning_rate": 7.796832622140854e-06, "loss": 17.7315, "step": 189970 }, { "epoch": 0.38377161972712986, "grad_norm": 74.19915771484375, "learning_rate": 7.796543267508023e-06, "loss": 26.2938, "step": 189980 }, { "epoch": 0.3837918203598137, "grad_norm": 147.29254150390625, "learning_rate": 7.79625389924518e-06, "loss": 19.7365, "step": 189990 }, { "epoch": 0.3838120209924975, "grad_norm": 649.164306640625, "learning_rate": 7.795964517353734e-06, "loss": 26.2491, "step": 190000 }, { "epoch": 0.3838322216251813, "grad_norm": 12.49026107788086, "learning_rate": 7.795675121835099e-06, "loss": 12.423, "step": 190010 }, { "epoch": 0.3838524222578651, "grad_norm": 535.0386962890625, "learning_rate": 7.795385712690678e-06, "loss": 19.0475, "step": 190020 }, { "epoch": 0.3838726228905489, "grad_norm": 294.6451721191406, "learning_rate": 7.795096289921888e-06, "loss": 40.8549, "step": 190030 }, { "epoch": 0.38389282352323273, "grad_norm": 517.9992065429688, "learning_rate": 7.794806853530139e-06, "loss": 28.8375, "step": 190040 }, { "epoch": 0.38391302415591655, "grad_norm": 400.79620361328125, "learning_rate": 7.79451740351684e-06, "loss": 26.9044, "step": 190050 }, { "epoch": 0.38393322478860037, "grad_norm": 626.6098022460938, "learning_rate": 7.7942279398834e-06, "loss": 31.716, "step": 190060 }, { "epoch": 0.3839534254212842, "grad_norm": 429.52264404296875, "learning_rate": 7.793938462631233e-06, "loss": 23.1419, "step": 190070 }, { "epoch": 0.383973626053968, "grad_norm": 595.3565063476562, "learning_rate": 7.79364897176175e-06, "loss": 25.5712, "step": 190080 }, { "epoch": 0.38399382668665183, "grad_norm": 801.8369750976562, "learning_rate": 7.79335946727636e-06, "loss": 26.2808, "step": 190090 }, { "epoch": 0.38401402731933565, "grad_norm": 566.8032836914062, "learning_rate": 7.793069949176474e-06, "loss": 19.8818, "step": 190100 }, { "epoch": 0.38403422795201947, "grad_norm": 537.2616577148438, "learning_rate": 7.792780417463505e-06, "loss": 33.2724, "step": 190110 }, { "epoch": 0.3840544285847033, "grad_norm": 444.0425109863281, "learning_rate": 7.792490872138861e-06, "loss": 30.7525, "step": 190120 }, { "epoch": 0.3840746292173871, "grad_norm": 251.57086181640625, "learning_rate": 7.792201313203957e-06, "loss": 24.0304, "step": 190130 }, { "epoch": 0.38409482985007093, "grad_norm": 126.93512725830078, "learning_rate": 7.791911740660203e-06, "loss": 17.5722, "step": 190140 }, { "epoch": 0.3841150304827547, "grad_norm": 23.59386444091797, "learning_rate": 7.791622154509008e-06, "loss": 25.2037, "step": 190150 }, { "epoch": 0.3841352311154385, "grad_norm": 170.85472106933594, "learning_rate": 7.791332554751784e-06, "loss": 12.2092, "step": 190160 }, { "epoch": 0.38415543174812233, "grad_norm": 418.3758239746094, "learning_rate": 7.791042941389948e-06, "loss": 15.9761, "step": 190170 }, { "epoch": 0.38417563238080615, "grad_norm": 331.4839172363281, "learning_rate": 7.790753314424903e-06, "loss": 30.2002, "step": 190180 }, { "epoch": 0.38419583301349, "grad_norm": 347.7175598144531, "learning_rate": 7.790463673858069e-06, "loss": 31.4372, "step": 190190 }, { "epoch": 0.3842160336461738, "grad_norm": 256.3693542480469, "learning_rate": 7.79017401969085e-06, "loss": 16.3942, "step": 190200 }, { "epoch": 0.3842362342788576, "grad_norm": 512.9190063476562, "learning_rate": 7.789884351924662e-06, "loss": 23.2703, "step": 190210 }, { "epoch": 0.38425643491154143, "grad_norm": 211.8291778564453, "learning_rate": 7.789594670560917e-06, "loss": 26.066, "step": 190220 }, { "epoch": 0.38427663554422525, "grad_norm": 1608.3006591796875, "learning_rate": 7.789304975601025e-06, "loss": 23.6601, "step": 190230 }, { "epoch": 0.3842968361769091, "grad_norm": 259.23809814453125, "learning_rate": 7.789015267046399e-06, "loss": 9.3303, "step": 190240 }, { "epoch": 0.3843170368095929, "grad_norm": 403.8951110839844, "learning_rate": 7.788725544898452e-06, "loss": 22.6142, "step": 190250 }, { "epoch": 0.3843372374422767, "grad_norm": 600.2481689453125, "learning_rate": 7.788435809158593e-06, "loss": 31.1368, "step": 190260 }, { "epoch": 0.38435743807496053, "grad_norm": 284.01580810546875, "learning_rate": 7.788146059828238e-06, "loss": 26.3911, "step": 190270 }, { "epoch": 0.3843776387076443, "grad_norm": 484.2990417480469, "learning_rate": 7.787856296908795e-06, "loss": 25.2612, "step": 190280 }, { "epoch": 0.3843978393403281, "grad_norm": 481.3173522949219, "learning_rate": 7.787566520401681e-06, "loss": 53.1752, "step": 190290 }, { "epoch": 0.38441803997301194, "grad_norm": 470.0443115234375, "learning_rate": 7.787276730308304e-06, "loss": 32.446, "step": 190300 }, { "epoch": 0.38443824060569576, "grad_norm": 302.90643310546875, "learning_rate": 7.786986926630079e-06, "loss": 26.6058, "step": 190310 }, { "epoch": 0.3844584412383796, "grad_norm": 445.5045166015625, "learning_rate": 7.786697109368418e-06, "loss": 18.3555, "step": 190320 }, { "epoch": 0.3844786418710634, "grad_norm": 165.96913146972656, "learning_rate": 7.786407278524733e-06, "loss": 15.2357, "step": 190330 }, { "epoch": 0.3844988425037472, "grad_norm": 256.4248046875, "learning_rate": 7.786117434100438e-06, "loss": 13.3223, "step": 190340 }, { "epoch": 0.38451904313643104, "grad_norm": 491.4432067871094, "learning_rate": 7.785827576096943e-06, "loss": 16.5863, "step": 190350 }, { "epoch": 0.38453924376911486, "grad_norm": 549.3370971679688, "learning_rate": 7.785537704515662e-06, "loss": 11.8801, "step": 190360 }, { "epoch": 0.3845594444017987, "grad_norm": 1140.859619140625, "learning_rate": 7.785247819358009e-06, "loss": 26.7579, "step": 190370 }, { "epoch": 0.3845796450344825, "grad_norm": 249.52108764648438, "learning_rate": 7.784957920625396e-06, "loss": 40.0003, "step": 190380 }, { "epoch": 0.3845998456671663, "grad_norm": 897.1212768554688, "learning_rate": 7.784668008319235e-06, "loss": 20.8859, "step": 190390 }, { "epoch": 0.38462004629985014, "grad_norm": 297.2471618652344, "learning_rate": 7.78437808244094e-06, "loss": 24.5372, "step": 190400 }, { "epoch": 0.3846402469325339, "grad_norm": 182.60113525390625, "learning_rate": 7.784088142991926e-06, "loss": 32.806, "step": 190410 }, { "epoch": 0.3846604475652177, "grad_norm": 286.5628967285156, "learning_rate": 7.783798189973601e-06, "loss": 15.1996, "step": 190420 }, { "epoch": 0.38468064819790154, "grad_norm": 292.6827392578125, "learning_rate": 7.783508223387384e-06, "loss": 8.7446, "step": 190430 }, { "epoch": 0.38470084883058536, "grad_norm": 1819.3795166015625, "learning_rate": 7.783218243234684e-06, "loss": 17.3953, "step": 190440 }, { "epoch": 0.3847210494632692, "grad_norm": 210.9979248046875, "learning_rate": 7.782928249516915e-06, "loss": 15.8199, "step": 190450 }, { "epoch": 0.384741250095953, "grad_norm": 94.75699615478516, "learning_rate": 7.782638242235493e-06, "loss": 18.1524, "step": 190460 }, { "epoch": 0.3847614507286368, "grad_norm": 125.23542022705078, "learning_rate": 7.782348221391828e-06, "loss": 18.0945, "step": 190470 }, { "epoch": 0.38478165136132064, "grad_norm": 614.4030151367188, "learning_rate": 7.782058186987337e-06, "loss": 16.2525, "step": 190480 }, { "epoch": 0.38480185199400446, "grad_norm": 588.0127563476562, "learning_rate": 7.781768139023431e-06, "loss": 18.1065, "step": 190490 }, { "epoch": 0.3848220526266883, "grad_norm": 94.29621124267578, "learning_rate": 7.781478077501526e-06, "loss": 25.8049, "step": 190500 }, { "epoch": 0.3848422532593721, "grad_norm": 663.5479736328125, "learning_rate": 7.78118800242303e-06, "loss": 17.9266, "step": 190510 }, { "epoch": 0.3848624538920559, "grad_norm": 362.5411071777344, "learning_rate": 7.780897913789364e-06, "loss": 15.5323, "step": 190520 }, { "epoch": 0.38488265452473974, "grad_norm": 860.7693481445312, "learning_rate": 7.780607811601939e-06, "loss": 31.4978, "step": 190530 }, { "epoch": 0.3849028551574235, "grad_norm": 536.4878540039062, "learning_rate": 7.78031769586217e-06, "loss": 33.0594, "step": 190540 }, { "epoch": 0.3849230557901073, "grad_norm": 372.718994140625, "learning_rate": 7.780027566571467e-06, "loss": 18.7467, "step": 190550 }, { "epoch": 0.38494325642279115, "grad_norm": 238.12620544433594, "learning_rate": 7.779737423731248e-06, "loss": 14.4867, "step": 190560 }, { "epoch": 0.38496345705547497, "grad_norm": 751.9949340820312, "learning_rate": 7.779447267342926e-06, "loss": 27.8818, "step": 190570 }, { "epoch": 0.3849836576881588, "grad_norm": 244.15658569335938, "learning_rate": 7.779157097407914e-06, "loss": 22.7166, "step": 190580 }, { "epoch": 0.3850038583208426, "grad_norm": 751.5225219726562, "learning_rate": 7.77886691392763e-06, "loss": 17.3041, "step": 190590 }, { "epoch": 0.3850240589535264, "grad_norm": 578.5184936523438, "learning_rate": 7.778576716903484e-06, "loss": 20.246, "step": 190600 }, { "epoch": 0.38504425958621025, "grad_norm": 264.69122314453125, "learning_rate": 7.778286506336892e-06, "loss": 12.2246, "step": 190610 }, { "epoch": 0.38506446021889407, "grad_norm": 356.569091796875, "learning_rate": 7.777996282229267e-06, "loss": 13.9804, "step": 190620 }, { "epoch": 0.3850846608515779, "grad_norm": 850.6675415039062, "learning_rate": 7.777706044582027e-06, "loss": 24.7217, "step": 190630 }, { "epoch": 0.3851048614842617, "grad_norm": 2920.63818359375, "learning_rate": 7.777415793396585e-06, "loss": 35.0636, "step": 190640 }, { "epoch": 0.3851250621169455, "grad_norm": 1809.05517578125, "learning_rate": 7.777125528674356e-06, "loss": 19.7704, "step": 190650 }, { "epoch": 0.3851452627496293, "grad_norm": 762.5745239257812, "learning_rate": 7.776835250416752e-06, "loss": 36.9752, "step": 190660 }, { "epoch": 0.3851654633823131, "grad_norm": 122.67737579345703, "learning_rate": 7.776544958625189e-06, "loss": 21.3244, "step": 190670 }, { "epoch": 0.38518566401499693, "grad_norm": 805.9771728515625, "learning_rate": 7.776254653301086e-06, "loss": 23.1274, "step": 190680 }, { "epoch": 0.38520586464768075, "grad_norm": 739.9912719726562, "learning_rate": 7.775964334445851e-06, "loss": 28.9868, "step": 190690 }, { "epoch": 0.38522606528036457, "grad_norm": 421.4423828125, "learning_rate": 7.775674002060905e-06, "loss": 27.7118, "step": 190700 }, { "epoch": 0.3852462659130484, "grad_norm": 266.5092468261719, "learning_rate": 7.775383656147659e-06, "loss": 12.4261, "step": 190710 }, { "epoch": 0.3852664665457322, "grad_norm": 389.5838928222656, "learning_rate": 7.77509329670753e-06, "loss": 17.9006, "step": 190720 }, { "epoch": 0.38528666717841603, "grad_norm": 388.9902648925781, "learning_rate": 7.774802923741936e-06, "loss": 21.794, "step": 190730 }, { "epoch": 0.38530686781109985, "grad_norm": 165.98974609375, "learning_rate": 7.774512537252284e-06, "loss": 25.8932, "step": 190740 }, { "epoch": 0.38532706844378367, "grad_norm": 566.0869140625, "learning_rate": 7.774222137239998e-06, "loss": 13.8304, "step": 190750 }, { "epoch": 0.3853472690764675, "grad_norm": 616.65185546875, "learning_rate": 7.773931723706487e-06, "loss": 17.8071, "step": 190760 }, { "epoch": 0.3853674697091513, "grad_norm": 462.8968811035156, "learning_rate": 7.773641296653171e-06, "loss": 24.443, "step": 190770 }, { "epoch": 0.38538767034183513, "grad_norm": 488.7454833984375, "learning_rate": 7.773350856081464e-06, "loss": 22.0167, "step": 190780 }, { "epoch": 0.3854078709745189, "grad_norm": 218.26710510253906, "learning_rate": 7.773060401992781e-06, "loss": 15.0263, "step": 190790 }, { "epoch": 0.3854280716072027, "grad_norm": 437.1186828613281, "learning_rate": 7.772769934388537e-06, "loss": 15.2703, "step": 190800 }, { "epoch": 0.38544827223988654, "grad_norm": 188.64244079589844, "learning_rate": 7.772479453270149e-06, "loss": 16.9342, "step": 190810 }, { "epoch": 0.38546847287257036, "grad_norm": 256.7730712890625, "learning_rate": 7.772188958639034e-06, "loss": 16.2013, "step": 190820 }, { "epoch": 0.3854886735052542, "grad_norm": 299.796142578125, "learning_rate": 7.771898450496605e-06, "loss": 23.0266, "step": 190830 }, { "epoch": 0.385508874137938, "grad_norm": 746.550048828125, "learning_rate": 7.771607928844278e-06, "loss": 17.1555, "step": 190840 }, { "epoch": 0.3855290747706218, "grad_norm": 209.331298828125, "learning_rate": 7.771317393683471e-06, "loss": 18.055, "step": 190850 }, { "epoch": 0.38554927540330564, "grad_norm": 918.1318969726562, "learning_rate": 7.7710268450156e-06, "loss": 22.2411, "step": 190860 }, { "epoch": 0.38556947603598946, "grad_norm": 396.0271911621094, "learning_rate": 7.770736282842079e-06, "loss": 29.7558, "step": 190870 }, { "epoch": 0.3855896766686733, "grad_norm": 543.81494140625, "learning_rate": 7.770445707164325e-06, "loss": 22.1566, "step": 190880 }, { "epoch": 0.3856098773013571, "grad_norm": 231.0266876220703, "learning_rate": 7.770155117983757e-06, "loss": 15.992, "step": 190890 }, { "epoch": 0.3856300779340409, "grad_norm": 236.5028076171875, "learning_rate": 7.769864515301787e-06, "loss": 18.8548, "step": 190900 }, { "epoch": 0.38565027856672474, "grad_norm": 553.9424438476562, "learning_rate": 7.769573899119834e-06, "loss": 32.5234, "step": 190910 }, { "epoch": 0.3856704791994085, "grad_norm": 217.63796997070312, "learning_rate": 7.769283269439314e-06, "loss": 17.9023, "step": 190920 }, { "epoch": 0.3856906798320923, "grad_norm": 265.1778564453125, "learning_rate": 7.768992626261642e-06, "loss": 20.8228, "step": 190930 }, { "epoch": 0.38571088046477614, "grad_norm": 330.60870361328125, "learning_rate": 7.768701969588237e-06, "loss": 23.952, "step": 190940 }, { "epoch": 0.38573108109745996, "grad_norm": 64.96464538574219, "learning_rate": 7.768411299420513e-06, "loss": 14.4169, "step": 190950 }, { "epoch": 0.3857512817301438, "grad_norm": 468.0285339355469, "learning_rate": 7.76812061575989e-06, "loss": 20.3455, "step": 190960 }, { "epoch": 0.3857714823628276, "grad_norm": 251.83633422851562, "learning_rate": 7.767829918607782e-06, "loss": 15.6292, "step": 190970 }, { "epoch": 0.3857916829955114, "grad_norm": 352.02020263671875, "learning_rate": 7.767539207965606e-06, "loss": 20.8917, "step": 190980 }, { "epoch": 0.38581188362819524, "grad_norm": 202.3304443359375, "learning_rate": 7.767248483834781e-06, "loss": 7.3944, "step": 190990 }, { "epoch": 0.38583208426087906, "grad_norm": 782.37939453125, "learning_rate": 7.76695774621672e-06, "loss": 21.0491, "step": 191000 }, { "epoch": 0.3858522848935629, "grad_norm": 323.6791076660156, "learning_rate": 7.766666995112846e-06, "loss": 22.5071, "step": 191010 }, { "epoch": 0.3858724855262467, "grad_norm": 381.05926513671875, "learning_rate": 7.76637623052457e-06, "loss": 20.2027, "step": 191020 }, { "epoch": 0.3858926861589305, "grad_norm": 833.758056640625, "learning_rate": 7.766085452453312e-06, "loss": 16.7291, "step": 191030 }, { "epoch": 0.38591288679161434, "grad_norm": 652.1970825195312, "learning_rate": 7.765794660900489e-06, "loss": 25.2256, "step": 191040 }, { "epoch": 0.3859330874242981, "grad_norm": 328.4167175292969, "learning_rate": 7.76550385586752e-06, "loss": 11.8474, "step": 191050 }, { "epoch": 0.3859532880569819, "grad_norm": 767.3618774414062, "learning_rate": 7.76521303735582e-06, "loss": 26.2798, "step": 191060 }, { "epoch": 0.38597348868966574, "grad_norm": 994.3623657226562, "learning_rate": 7.764922205366807e-06, "loss": 26.8572, "step": 191070 }, { "epoch": 0.38599368932234956, "grad_norm": 280.192626953125, "learning_rate": 7.764631359901897e-06, "loss": 24.9527, "step": 191080 }, { "epoch": 0.3860138899550334, "grad_norm": 405.3322448730469, "learning_rate": 7.764340500962511e-06, "loss": 29.9454, "step": 191090 }, { "epoch": 0.3860340905877172, "grad_norm": 217.43472290039062, "learning_rate": 7.764049628550063e-06, "loss": 24.0357, "step": 191100 }, { "epoch": 0.386054291220401, "grad_norm": 278.47412109375, "learning_rate": 7.763758742665973e-06, "loss": 32.7479, "step": 191110 }, { "epoch": 0.38607449185308484, "grad_norm": 332.62255859375, "learning_rate": 7.763467843311658e-06, "loss": 26.9294, "step": 191120 }, { "epoch": 0.38609469248576866, "grad_norm": 353.11480712890625, "learning_rate": 7.763176930488537e-06, "loss": 25.5999, "step": 191130 }, { "epoch": 0.3861148931184525, "grad_norm": 623.3184204101562, "learning_rate": 7.762886004198024e-06, "loss": 17.434, "step": 191140 }, { "epoch": 0.3861350937511363, "grad_norm": 525.96923828125, "learning_rate": 7.762595064441542e-06, "loss": 16.3577, "step": 191150 }, { "epoch": 0.3861552943838201, "grad_norm": 447.5262145996094, "learning_rate": 7.762304111220506e-06, "loss": 15.5832, "step": 191160 }, { "epoch": 0.38617549501650394, "grad_norm": 373.88299560546875, "learning_rate": 7.762013144536337e-06, "loss": 30.258, "step": 191170 }, { "epoch": 0.3861956956491877, "grad_norm": 342.3337707519531, "learning_rate": 7.761722164390448e-06, "loss": 16.9355, "step": 191180 }, { "epoch": 0.38621589628187153, "grad_norm": 916.4569091796875, "learning_rate": 7.761431170784261e-06, "loss": 46.6152, "step": 191190 }, { "epoch": 0.38623609691455535, "grad_norm": 630.3865356445312, "learning_rate": 7.761140163719194e-06, "loss": 19.4969, "step": 191200 }, { "epoch": 0.38625629754723917, "grad_norm": 522.5431518554688, "learning_rate": 7.760849143196664e-06, "loss": 15.8899, "step": 191210 }, { "epoch": 0.386276498179923, "grad_norm": 128.63572692871094, "learning_rate": 7.76055810921809e-06, "loss": 17.217, "step": 191220 }, { "epoch": 0.3862966988126068, "grad_norm": 1020.7322387695312, "learning_rate": 7.760267061784891e-06, "loss": 25.6774, "step": 191230 }, { "epoch": 0.38631689944529063, "grad_norm": 330.7348937988281, "learning_rate": 7.759976000898486e-06, "loss": 19.0391, "step": 191240 }, { "epoch": 0.38633710007797445, "grad_norm": 156.26284790039062, "learning_rate": 7.759684926560292e-06, "loss": 14.8975, "step": 191250 }, { "epoch": 0.38635730071065827, "grad_norm": 406.6961975097656, "learning_rate": 7.759393838771728e-06, "loss": 18.1821, "step": 191260 }, { "epoch": 0.3863775013433421, "grad_norm": 175.15341186523438, "learning_rate": 7.759102737534214e-06, "loss": 27.0331, "step": 191270 }, { "epoch": 0.3863977019760259, "grad_norm": 535.4419555664062, "learning_rate": 7.758811622849167e-06, "loss": 20.4541, "step": 191280 }, { "epoch": 0.38641790260870973, "grad_norm": 560.9713134765625, "learning_rate": 7.758520494718006e-06, "loss": 27.5345, "step": 191290 }, { "epoch": 0.3864381032413935, "grad_norm": 104.56470489501953, "learning_rate": 7.758229353142153e-06, "loss": 18.1173, "step": 191300 }, { "epoch": 0.3864583038740773, "grad_norm": 896.4780883789062, "learning_rate": 7.757938198123024e-06, "loss": 17.6291, "step": 191310 }, { "epoch": 0.38647850450676113, "grad_norm": 774.9434204101562, "learning_rate": 7.757647029662037e-06, "loss": 16.2023, "step": 191320 }, { "epoch": 0.38649870513944495, "grad_norm": 441.3625793457031, "learning_rate": 7.757355847760614e-06, "loss": 13.2716, "step": 191330 }, { "epoch": 0.3865189057721288, "grad_norm": 192.32289123535156, "learning_rate": 7.757064652420172e-06, "loss": 37.5933, "step": 191340 }, { "epoch": 0.3865391064048126, "grad_norm": 221.5293426513672, "learning_rate": 7.756773443642132e-06, "loss": 58.5677, "step": 191350 }, { "epoch": 0.3865593070374964, "grad_norm": 188.2999267578125, "learning_rate": 7.756482221427914e-06, "loss": 42.004, "step": 191360 }, { "epoch": 0.38657950767018023, "grad_norm": 148.3327178955078, "learning_rate": 7.756190985778933e-06, "loss": 23.1077, "step": 191370 }, { "epoch": 0.38659970830286405, "grad_norm": 871.6217041015625, "learning_rate": 7.755899736696613e-06, "loss": 24.2541, "step": 191380 }, { "epoch": 0.3866199089355479, "grad_norm": 551.8317260742188, "learning_rate": 7.755608474182372e-06, "loss": 28.5641, "step": 191390 }, { "epoch": 0.3866401095682317, "grad_norm": 79.60022735595703, "learning_rate": 7.755317198237631e-06, "loss": 23.9328, "step": 191400 }, { "epoch": 0.3866603102009155, "grad_norm": 776.9974365234375, "learning_rate": 7.755025908863807e-06, "loss": 33.3793, "step": 191410 }, { "epoch": 0.38668051083359933, "grad_norm": 207.62042236328125, "learning_rate": 7.75473460606232e-06, "loss": 11.9068, "step": 191420 }, { "epoch": 0.3867007114662831, "grad_norm": 1.0078130960464478, "learning_rate": 7.75444328983459e-06, "loss": 18.8599, "step": 191430 }, { "epoch": 0.3867209120989669, "grad_norm": 428.4453430175781, "learning_rate": 7.75415196018204e-06, "loss": 37.7331, "step": 191440 }, { "epoch": 0.38674111273165074, "grad_norm": 509.4866638183594, "learning_rate": 7.753860617106085e-06, "loss": 19.8049, "step": 191450 }, { "epoch": 0.38676131336433456, "grad_norm": 248.05801391601562, "learning_rate": 7.75356926060815e-06, "loss": 23.2241, "step": 191460 }, { "epoch": 0.3867815139970184, "grad_norm": 185.79217529296875, "learning_rate": 7.75327789068965e-06, "loss": 23.705, "step": 191470 }, { "epoch": 0.3868017146297022, "grad_norm": 459.7008972167969, "learning_rate": 7.752986507352009e-06, "loss": 21.4752, "step": 191480 }, { "epoch": 0.386821915262386, "grad_norm": 524.0685424804688, "learning_rate": 7.752695110596644e-06, "loss": 18.9021, "step": 191490 }, { "epoch": 0.38684211589506984, "grad_norm": 545.9414672851562, "learning_rate": 7.752403700424978e-06, "loss": 22.1293, "step": 191500 }, { "epoch": 0.38686231652775366, "grad_norm": 491.1232604980469, "learning_rate": 7.75211227683843e-06, "loss": 33.53, "step": 191510 }, { "epoch": 0.3868825171604375, "grad_norm": 428.9862976074219, "learning_rate": 7.751820839838423e-06, "loss": 18.6401, "step": 191520 }, { "epoch": 0.3869027177931213, "grad_norm": 39.37624740600586, "learning_rate": 7.751529389426372e-06, "loss": 12.3774, "step": 191530 }, { "epoch": 0.3869229184258051, "grad_norm": 361.1414794921875, "learning_rate": 7.7512379256037e-06, "loss": 29.8175, "step": 191540 }, { "epoch": 0.38694311905848894, "grad_norm": 444.081787109375, "learning_rate": 7.75094644837183e-06, "loss": 10.0405, "step": 191550 }, { "epoch": 0.3869633196911727, "grad_norm": 337.3037414550781, "learning_rate": 7.750654957732179e-06, "loss": 15.4009, "step": 191560 }, { "epoch": 0.3869835203238565, "grad_norm": 593.4365844726562, "learning_rate": 7.75036345368617e-06, "loss": 18.3733, "step": 191570 }, { "epoch": 0.38700372095654034, "grad_norm": 437.93780517578125, "learning_rate": 7.750071936235223e-06, "loss": 23.0736, "step": 191580 }, { "epoch": 0.38702392158922416, "grad_norm": 179.80548095703125, "learning_rate": 7.74978040538076e-06, "loss": 8.6473, "step": 191590 }, { "epoch": 0.387044122221908, "grad_norm": 180.412353515625, "learning_rate": 7.7494888611242e-06, "loss": 21.2373, "step": 191600 }, { "epoch": 0.3870643228545918, "grad_norm": 150.6357421875, "learning_rate": 7.749197303466964e-06, "loss": 23.9392, "step": 191610 }, { "epoch": 0.3870845234872756, "grad_norm": 316.60296630859375, "learning_rate": 7.748905732410475e-06, "loss": 29.0779, "step": 191620 }, { "epoch": 0.38710472411995944, "grad_norm": 552.203857421875, "learning_rate": 7.748614147956153e-06, "loss": 21.3639, "step": 191630 }, { "epoch": 0.38712492475264326, "grad_norm": 399.39093017578125, "learning_rate": 7.748322550105419e-06, "loss": 16.7633, "step": 191640 }, { "epoch": 0.3871451253853271, "grad_norm": 317.05401611328125, "learning_rate": 7.748030938859692e-06, "loss": 23.4956, "step": 191650 }, { "epoch": 0.3871653260180109, "grad_norm": 502.89508056640625, "learning_rate": 7.747739314220398e-06, "loss": 14.9821, "step": 191660 }, { "epoch": 0.3871855266506947, "grad_norm": 153.30264282226562, "learning_rate": 7.747447676188955e-06, "loss": 26.0995, "step": 191670 }, { "epoch": 0.38720572728337854, "grad_norm": 213.2479248046875, "learning_rate": 7.747156024766785e-06, "loss": 33.2892, "step": 191680 }, { "epoch": 0.3872259279160623, "grad_norm": 83.84458923339844, "learning_rate": 7.74686435995531e-06, "loss": 17.1863, "step": 191690 }, { "epoch": 0.3872461285487461, "grad_norm": 280.7131042480469, "learning_rate": 7.74657268175595e-06, "loss": 18.4512, "step": 191700 }, { "epoch": 0.38726632918142995, "grad_norm": 203.6442108154297, "learning_rate": 7.74628099017013e-06, "loss": 24.3327, "step": 191710 }, { "epoch": 0.38728652981411377, "grad_norm": 260.60430908203125, "learning_rate": 7.74598928519927e-06, "loss": 25.9397, "step": 191720 }, { "epoch": 0.3873067304467976, "grad_norm": 357.5342102050781, "learning_rate": 7.745697566844788e-06, "loss": 20.6397, "step": 191730 }, { "epoch": 0.3873269310794814, "grad_norm": 65.1893539428711, "learning_rate": 7.745405835108112e-06, "loss": 22.6363, "step": 191740 }, { "epoch": 0.3873471317121652, "grad_norm": 114.95198822021484, "learning_rate": 7.74511408999066e-06, "loss": 23.7369, "step": 191750 }, { "epoch": 0.38736733234484905, "grad_norm": 304.2437438964844, "learning_rate": 7.744822331493855e-06, "loss": 11.8269, "step": 191760 }, { "epoch": 0.38738753297753287, "grad_norm": 680.9169921875, "learning_rate": 7.744530559619117e-06, "loss": 29.2421, "step": 191770 }, { "epoch": 0.3874077336102167, "grad_norm": 389.0510559082031, "learning_rate": 7.744238774367873e-06, "loss": 20.0267, "step": 191780 }, { "epoch": 0.3874279342429005, "grad_norm": 356.3277587890625, "learning_rate": 7.743946975741541e-06, "loss": 16.5415, "step": 191790 }, { "epoch": 0.3874481348755843, "grad_norm": 185.0585479736328, "learning_rate": 7.743655163741544e-06, "loss": 26.0596, "step": 191800 }, { "epoch": 0.3874683355082681, "grad_norm": 188.16104125976562, "learning_rate": 7.743363338369303e-06, "loss": 28.3094, "step": 191810 }, { "epoch": 0.3874885361409519, "grad_norm": 147.9081573486328, "learning_rate": 7.743071499626244e-06, "loss": 16.0463, "step": 191820 }, { "epoch": 0.38750873677363573, "grad_norm": 448.7315673828125, "learning_rate": 7.742779647513785e-06, "loss": 20.7494, "step": 191830 }, { "epoch": 0.38752893740631955, "grad_norm": 196.2622833251953, "learning_rate": 7.742487782033352e-06, "loss": 16.3189, "step": 191840 }, { "epoch": 0.38754913803900337, "grad_norm": 307.7751159667969, "learning_rate": 7.742195903186366e-06, "loss": 13.5797, "step": 191850 }, { "epoch": 0.3875693386716872, "grad_norm": 263.0933837890625, "learning_rate": 7.74190401097425e-06, "loss": 21.196, "step": 191860 }, { "epoch": 0.387589539304371, "grad_norm": 455.3670654296875, "learning_rate": 7.741612105398429e-06, "loss": 24.6287, "step": 191870 }, { "epoch": 0.38760973993705483, "grad_norm": 136.99728393554688, "learning_rate": 7.74132018646032e-06, "loss": 14.6892, "step": 191880 }, { "epoch": 0.38762994056973865, "grad_norm": 242.41091918945312, "learning_rate": 7.741028254161349e-06, "loss": 13.3496, "step": 191890 }, { "epoch": 0.38765014120242247, "grad_norm": 267.5481262207031, "learning_rate": 7.740736308502939e-06, "loss": 20.801, "step": 191900 }, { "epoch": 0.3876703418351063, "grad_norm": 184.3376007080078, "learning_rate": 7.740444349486512e-06, "loss": 14.7279, "step": 191910 }, { "epoch": 0.3876905424677901, "grad_norm": 638.7349243164062, "learning_rate": 7.740152377113493e-06, "loss": 20.4639, "step": 191920 }, { "epoch": 0.38771074310047393, "grad_norm": 517.0067749023438, "learning_rate": 7.739860391385303e-06, "loss": 26.5182, "step": 191930 }, { "epoch": 0.3877309437331577, "grad_norm": 703.1930541992188, "learning_rate": 7.739568392303364e-06, "loss": 38.8584, "step": 191940 }, { "epoch": 0.3877511443658415, "grad_norm": 519.8761596679688, "learning_rate": 7.739276379869105e-06, "loss": 23.3888, "step": 191950 }, { "epoch": 0.38777134499852534, "grad_norm": 296.1531677246094, "learning_rate": 7.738984354083942e-06, "loss": 41.9171, "step": 191960 }, { "epoch": 0.38779154563120916, "grad_norm": 523.1737060546875, "learning_rate": 7.738692314949304e-06, "loss": 22.122, "step": 191970 }, { "epoch": 0.387811746263893, "grad_norm": 373.5809326171875, "learning_rate": 7.738400262466608e-06, "loss": 15.7975, "step": 191980 }, { "epoch": 0.3878319468965768, "grad_norm": 316.2525939941406, "learning_rate": 7.738108196637284e-06, "loss": 24.286, "step": 191990 }, { "epoch": 0.3878521475292606, "grad_norm": 323.22698974609375, "learning_rate": 7.737816117462752e-06, "loss": 32.5908, "step": 192000 }, { "epoch": 0.38787234816194444, "grad_norm": 59.11886215209961, "learning_rate": 7.737524024944437e-06, "loss": 26.2432, "step": 192010 }, { "epoch": 0.38789254879462826, "grad_norm": 574.417724609375, "learning_rate": 7.737231919083761e-06, "loss": 37.8521, "step": 192020 }, { "epoch": 0.3879127494273121, "grad_norm": 552.07568359375, "learning_rate": 7.736939799882149e-06, "loss": 17.1835, "step": 192030 }, { "epoch": 0.3879329500599959, "grad_norm": 455.1221618652344, "learning_rate": 7.736647667341025e-06, "loss": 36.4382, "step": 192040 }, { "epoch": 0.3879531506926797, "grad_norm": 910.0355834960938, "learning_rate": 7.736355521461812e-06, "loss": 34.3768, "step": 192050 }, { "epoch": 0.38797335132536354, "grad_norm": 832.4783325195312, "learning_rate": 7.736063362245931e-06, "loss": 30.0665, "step": 192060 }, { "epoch": 0.3879935519580473, "grad_norm": 690.0657348632812, "learning_rate": 7.735771189694813e-06, "loss": 19.8979, "step": 192070 }, { "epoch": 0.3880137525907311, "grad_norm": 220.23095703125, "learning_rate": 7.735479003809876e-06, "loss": 10.865, "step": 192080 }, { "epoch": 0.38803395322341494, "grad_norm": 172.29656982421875, "learning_rate": 7.735186804592548e-06, "loss": 9.8104, "step": 192090 }, { "epoch": 0.38805415385609876, "grad_norm": 558.6802368164062, "learning_rate": 7.734894592044249e-06, "loss": 26.7397, "step": 192100 }, { "epoch": 0.3880743544887826, "grad_norm": 519.433349609375, "learning_rate": 7.734602366166406e-06, "loss": 16.3344, "step": 192110 }, { "epoch": 0.3880945551214664, "grad_norm": 250.96353149414062, "learning_rate": 7.734310126960444e-06, "loss": 26.8956, "step": 192120 }, { "epoch": 0.3881147557541502, "grad_norm": 271.8106384277344, "learning_rate": 7.734017874427786e-06, "loss": 20.7124, "step": 192130 }, { "epoch": 0.38813495638683404, "grad_norm": 8.314308166503906, "learning_rate": 7.733725608569856e-06, "loss": 14.2017, "step": 192140 }, { "epoch": 0.38815515701951786, "grad_norm": 426.02459716796875, "learning_rate": 7.733433329388079e-06, "loss": 14.6203, "step": 192150 }, { "epoch": 0.3881753576522017, "grad_norm": 328.6076965332031, "learning_rate": 7.733141036883878e-06, "loss": 14.6803, "step": 192160 }, { "epoch": 0.3881955582848855, "grad_norm": 632.5792236328125, "learning_rate": 7.73284873105868e-06, "loss": 27.803, "step": 192170 }, { "epoch": 0.3882157589175693, "grad_norm": 295.1548767089844, "learning_rate": 7.73255641191391e-06, "loss": 23.968, "step": 192180 }, { "epoch": 0.38823595955025314, "grad_norm": 366.90863037109375, "learning_rate": 7.73226407945099e-06, "loss": 12.7493, "step": 192190 }, { "epoch": 0.3882561601829369, "grad_norm": 515.6712646484375, "learning_rate": 7.731971733671347e-06, "loss": 24.4788, "step": 192200 }, { "epoch": 0.3882763608156207, "grad_norm": 517.9892578125, "learning_rate": 7.731679374576404e-06, "loss": 14.1814, "step": 192210 }, { "epoch": 0.38829656144830454, "grad_norm": 318.1728210449219, "learning_rate": 7.731387002167587e-06, "loss": 15.2578, "step": 192220 }, { "epoch": 0.38831676208098836, "grad_norm": 997.310302734375, "learning_rate": 7.731094616446323e-06, "loss": 26.4868, "step": 192230 }, { "epoch": 0.3883369627136722, "grad_norm": 754.043212890625, "learning_rate": 7.730802217414034e-06, "loss": 21.1662, "step": 192240 }, { "epoch": 0.388357163346356, "grad_norm": 342.50286865234375, "learning_rate": 7.730509805072146e-06, "loss": 24.6172, "step": 192250 }, { "epoch": 0.3883773639790398, "grad_norm": 380.1715393066406, "learning_rate": 7.730217379422084e-06, "loss": 36.2436, "step": 192260 }, { "epoch": 0.38839756461172364, "grad_norm": 315.2763671875, "learning_rate": 7.729924940465275e-06, "loss": 17.2449, "step": 192270 }, { "epoch": 0.38841776524440746, "grad_norm": 1045.7430419921875, "learning_rate": 7.729632488203142e-06, "loss": 40.8509, "step": 192280 }, { "epoch": 0.3884379658770913, "grad_norm": 376.32586669921875, "learning_rate": 7.729340022637111e-06, "loss": 9.346, "step": 192290 }, { "epoch": 0.3884581665097751, "grad_norm": 407.0836486816406, "learning_rate": 7.729047543768608e-06, "loss": 26.9397, "step": 192300 }, { "epoch": 0.3884783671424589, "grad_norm": 1427.2691650390625, "learning_rate": 7.72875505159906e-06, "loss": 28.058, "step": 192310 }, { "epoch": 0.38849856777514274, "grad_norm": 107.99658966064453, "learning_rate": 7.728462546129888e-06, "loss": 29.4344, "step": 192320 }, { "epoch": 0.3885187684078265, "grad_norm": 236.956298828125, "learning_rate": 7.728170027362523e-06, "loss": 21.1595, "step": 192330 }, { "epoch": 0.38853896904051033, "grad_norm": 213.91917419433594, "learning_rate": 7.727877495298386e-06, "loss": 36.8074, "step": 192340 }, { "epoch": 0.38855916967319415, "grad_norm": 533.0302734375, "learning_rate": 7.727584949938907e-06, "loss": 22.6468, "step": 192350 }, { "epoch": 0.38857937030587797, "grad_norm": 520.6922607421875, "learning_rate": 7.727292391285507e-06, "loss": 46.2784, "step": 192360 }, { "epoch": 0.3885995709385618, "grad_norm": 232.45205688476562, "learning_rate": 7.726999819339618e-06, "loss": 40.3498, "step": 192370 }, { "epoch": 0.3886197715712456, "grad_norm": 445.8552551269531, "learning_rate": 7.72670723410266e-06, "loss": 12.3434, "step": 192380 }, { "epoch": 0.38863997220392943, "grad_norm": 425.1505126953125, "learning_rate": 7.726414635576062e-06, "loss": 22.3091, "step": 192390 }, { "epoch": 0.38866017283661325, "grad_norm": 256.747802734375, "learning_rate": 7.726122023761252e-06, "loss": 18.3736, "step": 192400 }, { "epoch": 0.38868037346929707, "grad_norm": 401.13604736328125, "learning_rate": 7.72582939865965e-06, "loss": 24.4971, "step": 192410 }, { "epoch": 0.3887005741019809, "grad_norm": 253.91697692871094, "learning_rate": 7.72553676027269e-06, "loss": 17.021, "step": 192420 }, { "epoch": 0.3887207747346647, "grad_norm": 109.07809448242188, "learning_rate": 7.725244108601793e-06, "loss": 34.0692, "step": 192430 }, { "epoch": 0.38874097536734853, "grad_norm": 335.65625, "learning_rate": 7.724951443648386e-06, "loss": 17.4623, "step": 192440 }, { "epoch": 0.3887611760000323, "grad_norm": 333.1289367675781, "learning_rate": 7.724658765413897e-06, "loss": 14.9412, "step": 192450 }, { "epoch": 0.3887813766327161, "grad_norm": 62.65464782714844, "learning_rate": 7.72436607389975e-06, "loss": 28.4945, "step": 192460 }, { "epoch": 0.38880157726539993, "grad_norm": 262.5960693359375, "learning_rate": 7.724073369107376e-06, "loss": 13.1065, "step": 192470 }, { "epoch": 0.38882177789808375, "grad_norm": 345.6170959472656, "learning_rate": 7.723780651038196e-06, "loss": 31.3918, "step": 192480 }, { "epoch": 0.3888419785307676, "grad_norm": 329.9227600097656, "learning_rate": 7.723487919693642e-06, "loss": 15.6708, "step": 192490 }, { "epoch": 0.3888621791634514, "grad_norm": 561.7655639648438, "learning_rate": 7.723195175075136e-06, "loss": 21.4188, "step": 192500 }, { "epoch": 0.3888823797961352, "grad_norm": 0.0, "learning_rate": 7.722902417184109e-06, "loss": 14.1593, "step": 192510 }, { "epoch": 0.38890258042881903, "grad_norm": 409.5954284667969, "learning_rate": 7.722609646021984e-06, "loss": 22.6441, "step": 192520 }, { "epoch": 0.38892278106150285, "grad_norm": 517.1251831054688, "learning_rate": 7.72231686159019e-06, "loss": 15.9057, "step": 192530 }, { "epoch": 0.3889429816941867, "grad_norm": 144.67063903808594, "learning_rate": 7.722024063890154e-06, "loss": 14.5635, "step": 192540 }, { "epoch": 0.3889631823268705, "grad_norm": 103.00260162353516, "learning_rate": 7.721731252923305e-06, "loss": 14.1731, "step": 192550 }, { "epoch": 0.3889833829595543, "grad_norm": 426.76556396484375, "learning_rate": 7.721438428691065e-06, "loss": 27.5293, "step": 192560 }, { "epoch": 0.38900358359223813, "grad_norm": 193.24110412597656, "learning_rate": 7.721145591194865e-06, "loss": 14.3015, "step": 192570 }, { "epoch": 0.3890237842249219, "grad_norm": 259.59454345703125, "learning_rate": 7.720852740436134e-06, "loss": 16.3027, "step": 192580 }, { "epoch": 0.3890439848576057, "grad_norm": 293.9356384277344, "learning_rate": 7.720559876416293e-06, "loss": 24.0709, "step": 192590 }, { "epoch": 0.38906418549028954, "grad_norm": 293.2192687988281, "learning_rate": 7.720266999136774e-06, "loss": 25.922, "step": 192600 }, { "epoch": 0.38908438612297336, "grad_norm": 728.6549072265625, "learning_rate": 7.719974108599005e-06, "loss": 24.7767, "step": 192610 }, { "epoch": 0.3891045867556572, "grad_norm": 579.4147338867188, "learning_rate": 7.719681204804413e-06, "loss": 51.0215, "step": 192620 }, { "epoch": 0.389124787388341, "grad_norm": 544.8616333007812, "learning_rate": 7.71938828775442e-06, "loss": 14.4531, "step": 192630 }, { "epoch": 0.3891449880210248, "grad_norm": 413.74578857421875, "learning_rate": 7.719095357450462e-06, "loss": 20.1423, "step": 192640 }, { "epoch": 0.38916518865370864, "grad_norm": 470.0211486816406, "learning_rate": 7.718802413893963e-06, "loss": 20.2963, "step": 192650 }, { "epoch": 0.38918538928639246, "grad_norm": 65.1580810546875, "learning_rate": 7.718509457086351e-06, "loss": 22.7796, "step": 192660 }, { "epoch": 0.3892055899190763, "grad_norm": 468.04290771484375, "learning_rate": 7.718216487029051e-06, "loss": 15.223, "step": 192670 }, { "epoch": 0.3892257905517601, "grad_norm": 691.6110229492188, "learning_rate": 7.717923503723496e-06, "loss": 31.8396, "step": 192680 }, { "epoch": 0.3892459911844439, "grad_norm": 285.8040771484375, "learning_rate": 7.71763050717111e-06, "loss": 20.0454, "step": 192690 }, { "epoch": 0.38926619181712774, "grad_norm": 98.07317352294922, "learning_rate": 7.717337497373324e-06, "loss": 13.061, "step": 192700 }, { "epoch": 0.3892863924498115, "grad_norm": 278.7805480957031, "learning_rate": 7.717044474331565e-06, "loss": 33.9224, "step": 192710 }, { "epoch": 0.3893065930824953, "grad_norm": 951.0253295898438, "learning_rate": 7.716751438047259e-06, "loss": 18.3914, "step": 192720 }, { "epoch": 0.38932679371517914, "grad_norm": 635.8330078125, "learning_rate": 7.716458388521837e-06, "loss": 40.7988, "step": 192730 }, { "epoch": 0.38934699434786296, "grad_norm": 391.28814697265625, "learning_rate": 7.716165325756727e-06, "loss": 17.1659, "step": 192740 }, { "epoch": 0.3893671949805468, "grad_norm": 255.3839569091797, "learning_rate": 7.715872249753353e-06, "loss": 13.6852, "step": 192750 }, { "epoch": 0.3893873956132306, "grad_norm": 405.2212219238281, "learning_rate": 7.715579160513152e-06, "loss": 27.0415, "step": 192760 }, { "epoch": 0.3894075962459144, "grad_norm": 38.79237747192383, "learning_rate": 7.715286058037544e-06, "loss": 20.1129, "step": 192770 }, { "epoch": 0.38942779687859824, "grad_norm": 281.222412109375, "learning_rate": 7.714992942327962e-06, "loss": 17.8623, "step": 192780 }, { "epoch": 0.38944799751128206, "grad_norm": 329.6753234863281, "learning_rate": 7.714699813385834e-06, "loss": 23.4179, "step": 192790 }, { "epoch": 0.3894681981439659, "grad_norm": 270.1520080566406, "learning_rate": 7.714406671212589e-06, "loss": 39.082, "step": 192800 }, { "epoch": 0.3894883987766497, "grad_norm": 484.9346923828125, "learning_rate": 7.714113515809653e-06, "loss": 15.3479, "step": 192810 }, { "epoch": 0.3895085994093335, "grad_norm": 405.59246826171875, "learning_rate": 7.71382034717846e-06, "loss": 18.2914, "step": 192820 }, { "epoch": 0.38952880004201734, "grad_norm": 92.56423950195312, "learning_rate": 7.713527165320432e-06, "loss": 26.7206, "step": 192830 }, { "epoch": 0.3895490006747011, "grad_norm": 258.7493896484375, "learning_rate": 7.713233970237004e-06, "loss": 9.1603, "step": 192840 }, { "epoch": 0.3895692013073849, "grad_norm": 52.760894775390625, "learning_rate": 7.712940761929604e-06, "loss": 15.5544, "step": 192850 }, { "epoch": 0.38958940194006875, "grad_norm": 360.18975830078125, "learning_rate": 7.712647540399658e-06, "loss": 14.6412, "step": 192860 }, { "epoch": 0.38960960257275257, "grad_norm": 476.0581359863281, "learning_rate": 7.712354305648597e-06, "loss": 14.0446, "step": 192870 }, { "epoch": 0.3896298032054364, "grad_norm": 431.9054870605469, "learning_rate": 7.71206105767785e-06, "loss": 20.7469, "step": 192880 }, { "epoch": 0.3896500038381202, "grad_norm": 445.0193786621094, "learning_rate": 7.711767796488847e-06, "loss": 13.5873, "step": 192890 }, { "epoch": 0.389670204470804, "grad_norm": 973.5383911132812, "learning_rate": 7.711474522083015e-06, "loss": 25.6614, "step": 192900 }, { "epoch": 0.38969040510348785, "grad_norm": 590.4380493164062, "learning_rate": 7.711181234461786e-06, "loss": 12.0582, "step": 192910 }, { "epoch": 0.38971060573617167, "grad_norm": 314.5450744628906, "learning_rate": 7.71088793362659e-06, "loss": 19.7088, "step": 192920 }, { "epoch": 0.3897308063688555, "grad_norm": 346.41595458984375, "learning_rate": 7.710594619578853e-06, "loss": 15.1289, "step": 192930 }, { "epoch": 0.3897510070015393, "grad_norm": 643.9741821289062, "learning_rate": 7.710301292320007e-06, "loss": 18.48, "step": 192940 }, { "epoch": 0.3897712076342231, "grad_norm": 338.6170654296875, "learning_rate": 7.710007951851482e-06, "loss": 13.5522, "step": 192950 }, { "epoch": 0.38979140826690695, "grad_norm": 231.61537170410156, "learning_rate": 7.709714598174706e-06, "loss": 10.4692, "step": 192960 }, { "epoch": 0.3898116088995907, "grad_norm": 497.3166809082031, "learning_rate": 7.709421231291112e-06, "loss": 28.2675, "step": 192970 }, { "epoch": 0.38983180953227453, "grad_norm": 1395.8343505859375, "learning_rate": 7.709127851202126e-06, "loss": 25.3676, "step": 192980 }, { "epoch": 0.38985201016495835, "grad_norm": 246.81260681152344, "learning_rate": 7.708834457909179e-06, "loss": 15.7524, "step": 192990 }, { "epoch": 0.38987221079764217, "grad_norm": 610.1190185546875, "learning_rate": 7.7085410514137e-06, "loss": 17.1402, "step": 193000 }, { "epoch": 0.389892411430326, "grad_norm": 19.861980438232422, "learning_rate": 7.708247631717122e-06, "loss": 19.2926, "step": 193010 }, { "epoch": 0.3899126120630098, "grad_norm": 415.28173828125, "learning_rate": 7.707954198820873e-06, "loss": 15.6091, "step": 193020 }, { "epoch": 0.38993281269569363, "grad_norm": 84.7052001953125, "learning_rate": 7.707660752726384e-06, "loss": 16.5228, "step": 193030 }, { "epoch": 0.38995301332837745, "grad_norm": 52.818817138671875, "learning_rate": 7.707367293435086e-06, "loss": 26.4588, "step": 193040 }, { "epoch": 0.38997321396106127, "grad_norm": 332.837890625, "learning_rate": 7.707073820948407e-06, "loss": 16.7823, "step": 193050 }, { "epoch": 0.3899934145937451, "grad_norm": 505.16143798828125, "learning_rate": 7.706780335267778e-06, "loss": 28.5368, "step": 193060 }, { "epoch": 0.3900136152264289, "grad_norm": 519.2085571289062, "learning_rate": 7.706486836394632e-06, "loss": 29.9999, "step": 193070 }, { "epoch": 0.39003381585911273, "grad_norm": 812.468017578125, "learning_rate": 7.706193324330396e-06, "loss": 32.646, "step": 193080 }, { "epoch": 0.3900540164917965, "grad_norm": 126.26093292236328, "learning_rate": 7.705899799076502e-06, "loss": 16.4912, "step": 193090 }, { "epoch": 0.3900742171244803, "grad_norm": 15.94903564453125, "learning_rate": 7.70560626063438e-06, "loss": 13.8317, "step": 193100 }, { "epoch": 0.39009441775716414, "grad_norm": 410.4071960449219, "learning_rate": 7.70531270900546e-06, "loss": 22.1461, "step": 193110 }, { "epoch": 0.39011461838984796, "grad_norm": 280.9446105957031, "learning_rate": 7.705019144191178e-06, "loss": 15.9059, "step": 193120 }, { "epoch": 0.3901348190225318, "grad_norm": 121.53821563720703, "learning_rate": 7.704725566192959e-06, "loss": 9.9436, "step": 193130 }, { "epoch": 0.3901550196552156, "grad_norm": 393.3848876953125, "learning_rate": 7.704431975012234e-06, "loss": 24.0284, "step": 193140 }, { "epoch": 0.3901752202878994, "grad_norm": 205.39065551757812, "learning_rate": 7.704138370650437e-06, "loss": 20.3011, "step": 193150 }, { "epoch": 0.39019542092058324, "grad_norm": 569.0725708007812, "learning_rate": 7.703844753108997e-06, "loss": 26.1612, "step": 193160 }, { "epoch": 0.39021562155326706, "grad_norm": 1265.81396484375, "learning_rate": 7.703551122389345e-06, "loss": 21.4275, "step": 193170 }, { "epoch": 0.3902358221859509, "grad_norm": 639.0967407226562, "learning_rate": 7.703257478492915e-06, "loss": 17.0219, "step": 193180 }, { "epoch": 0.3902560228186347, "grad_norm": 619.8237915039062, "learning_rate": 7.702963821421135e-06, "loss": 19.2808, "step": 193190 }, { "epoch": 0.3902762234513185, "grad_norm": 208.57553100585938, "learning_rate": 7.702670151175435e-06, "loss": 20.6654, "step": 193200 }, { "epoch": 0.39029642408400234, "grad_norm": 26.127004623413086, "learning_rate": 7.70237646775725e-06, "loss": 38.2808, "step": 193210 }, { "epoch": 0.3903166247166861, "grad_norm": 897.0552368164062, "learning_rate": 7.70208277116801e-06, "loss": 24.5054, "step": 193220 }, { "epoch": 0.3903368253493699, "grad_norm": 0.0, "learning_rate": 7.701789061409148e-06, "loss": 25.8468, "step": 193230 }, { "epoch": 0.39035702598205374, "grad_norm": 482.36175537109375, "learning_rate": 7.701495338482093e-06, "loss": 24.4538, "step": 193240 }, { "epoch": 0.39037722661473756, "grad_norm": 197.87158203125, "learning_rate": 7.701201602388276e-06, "loss": 28.4927, "step": 193250 }, { "epoch": 0.3903974272474214, "grad_norm": 449.4877014160156, "learning_rate": 7.70090785312913e-06, "loss": 25.7932, "step": 193260 }, { "epoch": 0.3904176278801052, "grad_norm": 384.9527893066406, "learning_rate": 7.700614090706087e-06, "loss": 54.9316, "step": 193270 }, { "epoch": 0.390437828512789, "grad_norm": 582.7141723632812, "learning_rate": 7.70032031512058e-06, "loss": 16.4824, "step": 193280 }, { "epoch": 0.39045802914547284, "grad_norm": 291.9700622558594, "learning_rate": 7.700026526374038e-06, "loss": 18.14, "step": 193290 }, { "epoch": 0.39047822977815666, "grad_norm": 342.890380859375, "learning_rate": 7.699732724467894e-06, "loss": 34.7699, "step": 193300 }, { "epoch": 0.3904984304108405, "grad_norm": 469.6289367675781, "learning_rate": 7.69943890940358e-06, "loss": 19.6911, "step": 193310 }, { "epoch": 0.3905186310435243, "grad_norm": 337.8787841796875, "learning_rate": 7.699145081182528e-06, "loss": 8.6771, "step": 193320 }, { "epoch": 0.3905388316762081, "grad_norm": 711.6795043945312, "learning_rate": 7.69885123980617e-06, "loss": 25.2997, "step": 193330 }, { "epoch": 0.39055903230889194, "grad_norm": 521.74169921875, "learning_rate": 7.69855738527594e-06, "loss": 25.3729, "step": 193340 }, { "epoch": 0.3905792329415757, "grad_norm": 870.8306884765625, "learning_rate": 7.698263517593268e-06, "loss": 35.401, "step": 193350 }, { "epoch": 0.3905994335742595, "grad_norm": 312.2770080566406, "learning_rate": 7.697969636759586e-06, "loss": 36.4694, "step": 193360 }, { "epoch": 0.39061963420694334, "grad_norm": 318.42279052734375, "learning_rate": 7.69767574277633e-06, "loss": 20.2815, "step": 193370 }, { "epoch": 0.39063983483962716, "grad_norm": 171.6785125732422, "learning_rate": 7.697381835644926e-06, "loss": 19.937, "step": 193380 }, { "epoch": 0.390660035472311, "grad_norm": 838.3765258789062, "learning_rate": 7.697087915366811e-06, "loss": 27.686, "step": 193390 }, { "epoch": 0.3906802361049948, "grad_norm": 327.0580139160156, "learning_rate": 7.696793981943418e-06, "loss": 19.0435, "step": 193400 }, { "epoch": 0.3907004367376786, "grad_norm": 377.91900634765625, "learning_rate": 7.696500035376177e-06, "loss": 25.9461, "step": 193410 }, { "epoch": 0.39072063737036244, "grad_norm": 189.01881408691406, "learning_rate": 7.696206075666523e-06, "loss": 23.5916, "step": 193420 }, { "epoch": 0.39074083800304626, "grad_norm": 552.4807739257812, "learning_rate": 7.695912102815886e-06, "loss": 17.8163, "step": 193430 }, { "epoch": 0.3907610386357301, "grad_norm": 327.3682861328125, "learning_rate": 7.6956181168257e-06, "loss": 22.9008, "step": 193440 }, { "epoch": 0.3907812392684139, "grad_norm": 214.7379608154297, "learning_rate": 7.695324117697401e-06, "loss": 40.267, "step": 193450 }, { "epoch": 0.3908014399010977, "grad_norm": 296.865234375, "learning_rate": 7.695030105432417e-06, "loss": 18.1039, "step": 193460 }, { "epoch": 0.39082164053378154, "grad_norm": 265.7020263671875, "learning_rate": 7.694736080032185e-06, "loss": 14.8433, "step": 193470 }, { "epoch": 0.3908418411664653, "grad_norm": 197.13731384277344, "learning_rate": 7.694442041498133e-06, "loss": 14.579, "step": 193480 }, { "epoch": 0.39086204179914913, "grad_norm": 557.2015991210938, "learning_rate": 7.6941479898317e-06, "loss": 24.7952, "step": 193490 }, { "epoch": 0.39088224243183295, "grad_norm": 260.6639099121094, "learning_rate": 7.693853925034316e-06, "loss": 29.1451, "step": 193500 }, { "epoch": 0.39090244306451677, "grad_norm": 322.4444274902344, "learning_rate": 7.693559847107415e-06, "loss": 9.7132, "step": 193510 }, { "epoch": 0.3909226436972006, "grad_norm": 284.7377624511719, "learning_rate": 7.693265756052427e-06, "loss": 23.6692, "step": 193520 }, { "epoch": 0.3909428443298844, "grad_norm": 315.2529602050781, "learning_rate": 7.692971651870793e-06, "loss": 12.345, "step": 193530 }, { "epoch": 0.39096304496256823, "grad_norm": 373.9523620605469, "learning_rate": 7.692677534563939e-06, "loss": 24.3102, "step": 193540 }, { "epoch": 0.39098324559525205, "grad_norm": 697.5858154296875, "learning_rate": 7.692383404133302e-06, "loss": 25.1512, "step": 193550 }, { "epoch": 0.39100344622793587, "grad_norm": 700.3854370117188, "learning_rate": 7.692089260580315e-06, "loss": 13.5812, "step": 193560 }, { "epoch": 0.3910236468606197, "grad_norm": 254.58399963378906, "learning_rate": 7.69179510390641e-06, "loss": 17.3985, "step": 193570 }, { "epoch": 0.3910438474933035, "grad_norm": 45.8987922668457, "learning_rate": 7.691500934113022e-06, "loss": 25.2974, "step": 193580 }, { "epoch": 0.39106404812598733, "grad_norm": 662.2791748046875, "learning_rate": 7.691206751201588e-06, "loss": 23.1583, "step": 193590 }, { "epoch": 0.39108424875867115, "grad_norm": 540.3480224609375, "learning_rate": 7.690912555173536e-06, "loss": 36.1515, "step": 193600 }, { "epoch": 0.3911044493913549, "grad_norm": 897.8367919921875, "learning_rate": 7.690618346030303e-06, "loss": 27.4172, "step": 193610 }, { "epoch": 0.39112465002403873, "grad_norm": 32.94099044799805, "learning_rate": 7.690324123773324e-06, "loss": 22.4372, "step": 193620 }, { "epoch": 0.39114485065672255, "grad_norm": 360.4986877441406, "learning_rate": 7.69002988840403e-06, "loss": 14.5806, "step": 193630 }, { "epoch": 0.3911650512894064, "grad_norm": 89.0041275024414, "learning_rate": 7.689735639923857e-06, "loss": 15.5969, "step": 193640 }, { "epoch": 0.3911852519220902, "grad_norm": 516.5864868164062, "learning_rate": 7.689441378334239e-06, "loss": 15.9018, "step": 193650 }, { "epoch": 0.391205452554774, "grad_norm": 640.2452392578125, "learning_rate": 7.68914710363661e-06, "loss": 20.0243, "step": 193660 }, { "epoch": 0.39122565318745783, "grad_norm": 348.3353271484375, "learning_rate": 7.688852815832405e-06, "loss": 9.4373, "step": 193670 }, { "epoch": 0.39124585382014165, "grad_norm": 556.3218383789062, "learning_rate": 7.688558514923055e-06, "loss": 26.3589, "step": 193680 }, { "epoch": 0.3912660544528255, "grad_norm": 357.5398864746094, "learning_rate": 7.688264200909998e-06, "loss": 21.5004, "step": 193690 }, { "epoch": 0.3912862550855093, "grad_norm": 262.3053894042969, "learning_rate": 7.687969873794667e-06, "loss": 23.7308, "step": 193700 }, { "epoch": 0.3913064557181931, "grad_norm": 1059.08154296875, "learning_rate": 7.687675533578497e-06, "loss": 20.6328, "step": 193710 }, { "epoch": 0.39132665635087693, "grad_norm": 896.2767333984375, "learning_rate": 7.687381180262924e-06, "loss": 37.2704, "step": 193720 }, { "epoch": 0.3913468569835607, "grad_norm": 287.42926025390625, "learning_rate": 7.687086813849378e-06, "loss": 18.2111, "step": 193730 }, { "epoch": 0.3913670576162445, "grad_norm": 247.09051513671875, "learning_rate": 7.6867924343393e-06, "loss": 23.5803, "step": 193740 }, { "epoch": 0.39138725824892834, "grad_norm": 257.3861083984375, "learning_rate": 7.686498041734121e-06, "loss": 30.9001, "step": 193750 }, { "epoch": 0.39140745888161216, "grad_norm": 68.65151977539062, "learning_rate": 7.686203636035274e-06, "loss": 15.7408, "step": 193760 }, { "epoch": 0.391427659514296, "grad_norm": 1279.1070556640625, "learning_rate": 7.685909217244198e-06, "loss": 45.0109, "step": 193770 }, { "epoch": 0.3914478601469798, "grad_norm": 137.1002960205078, "learning_rate": 7.685614785362325e-06, "loss": 19.9498, "step": 193780 }, { "epoch": 0.3914680607796636, "grad_norm": 151.82278442382812, "learning_rate": 7.685320340391093e-06, "loss": 11.8919, "step": 193790 }, { "epoch": 0.39148826141234744, "grad_norm": 265.51702880859375, "learning_rate": 7.685025882331936e-06, "loss": 32.6317, "step": 193800 }, { "epoch": 0.39150846204503126, "grad_norm": 589.0602416992188, "learning_rate": 7.684731411186285e-06, "loss": 21.0638, "step": 193810 }, { "epoch": 0.3915286626777151, "grad_norm": 2400.545166015625, "learning_rate": 7.684436926955584e-06, "loss": 50.3162, "step": 193820 }, { "epoch": 0.3915488633103989, "grad_norm": 286.2929992675781, "learning_rate": 7.684142429641258e-06, "loss": 17.3957, "step": 193830 }, { "epoch": 0.3915690639430827, "grad_norm": 363.6485595703125, "learning_rate": 7.683847919244748e-06, "loss": 19.5178, "step": 193840 }, { "epoch": 0.39158926457576654, "grad_norm": 141.14840698242188, "learning_rate": 7.683553395767492e-06, "loss": 16.3106, "step": 193850 }, { "epoch": 0.3916094652084503, "grad_norm": 747.1369018554688, "learning_rate": 7.683258859210921e-06, "loss": 38.7589, "step": 193860 }, { "epoch": 0.3916296658411341, "grad_norm": 57.289024353027344, "learning_rate": 7.68296430957647e-06, "loss": 20.7131, "step": 193870 }, { "epoch": 0.39164986647381794, "grad_norm": 173.58657836914062, "learning_rate": 7.682669746865577e-06, "loss": 28.5628, "step": 193880 }, { "epoch": 0.39167006710650176, "grad_norm": 152.33546447753906, "learning_rate": 7.682375171079677e-06, "loss": 13.3818, "step": 193890 }, { "epoch": 0.3916902677391856, "grad_norm": 517.7659912109375, "learning_rate": 7.682080582220206e-06, "loss": 25.9379, "step": 193900 }, { "epoch": 0.3917104683718694, "grad_norm": 99.8304443359375, "learning_rate": 7.681785980288601e-06, "loss": 24.2838, "step": 193910 }, { "epoch": 0.3917306690045532, "grad_norm": 317.5882873535156, "learning_rate": 7.681491365286294e-06, "loss": 15.5712, "step": 193920 }, { "epoch": 0.39175086963723704, "grad_norm": 599.1548461914062, "learning_rate": 7.681196737214725e-06, "loss": 27.534, "step": 193930 }, { "epoch": 0.39177107026992086, "grad_norm": 275.6092224121094, "learning_rate": 7.680902096075327e-06, "loss": 10.7184, "step": 193940 }, { "epoch": 0.3917912709026047, "grad_norm": 411.04400634765625, "learning_rate": 7.680607441869538e-06, "loss": 14.6416, "step": 193950 }, { "epoch": 0.3918114715352885, "grad_norm": 498.05950927734375, "learning_rate": 7.680312774598794e-06, "loss": 25.6096, "step": 193960 }, { "epoch": 0.3918316721679723, "grad_norm": 96.87602996826172, "learning_rate": 7.68001809426453e-06, "loss": 18.0629, "step": 193970 }, { "epoch": 0.39185187280065614, "grad_norm": 346.5243225097656, "learning_rate": 7.679723400868181e-06, "loss": 19.9727, "step": 193980 }, { "epoch": 0.3918720734333399, "grad_norm": 462.3692321777344, "learning_rate": 7.679428694411188e-06, "loss": 17.1533, "step": 193990 }, { "epoch": 0.3918922740660237, "grad_norm": 357.5788879394531, "learning_rate": 7.679133974894984e-06, "loss": 18.8631, "step": 194000 }, { "epoch": 0.39191247469870755, "grad_norm": 872.04248046875, "learning_rate": 7.678839242321005e-06, "loss": 21.2325, "step": 194010 }, { "epoch": 0.39193267533139137, "grad_norm": 219.5369415283203, "learning_rate": 7.67854449669069e-06, "loss": 17.9927, "step": 194020 }, { "epoch": 0.3919528759640752, "grad_norm": 701.3224487304688, "learning_rate": 7.678249738005473e-06, "loss": 25.9722, "step": 194030 }, { "epoch": 0.391973076596759, "grad_norm": 110.27085876464844, "learning_rate": 7.677954966266791e-06, "loss": 17.4507, "step": 194040 }, { "epoch": 0.3919932772294428, "grad_norm": 402.5440673828125, "learning_rate": 7.67766018147608e-06, "loss": 19.4773, "step": 194050 }, { "epoch": 0.39201347786212665, "grad_norm": 224.45831298828125, "learning_rate": 7.677365383634782e-06, "loss": 15.217, "step": 194060 }, { "epoch": 0.39203367849481047, "grad_norm": 18.64542579650879, "learning_rate": 7.677070572744327e-06, "loss": 14.8555, "step": 194070 }, { "epoch": 0.3920538791274943, "grad_norm": 448.8019104003906, "learning_rate": 7.676775748806156e-06, "loss": 16.4689, "step": 194080 }, { "epoch": 0.3920740797601781, "grad_norm": 680.8139038085938, "learning_rate": 7.676480911821705e-06, "loss": 19.097, "step": 194090 }, { "epoch": 0.3920942803928619, "grad_norm": 333.9926452636719, "learning_rate": 7.676186061792408e-06, "loss": 28.193, "step": 194100 }, { "epoch": 0.39211448102554575, "grad_norm": 250.51133728027344, "learning_rate": 7.675891198719707e-06, "loss": 44.4848, "step": 194110 }, { "epoch": 0.3921346816582295, "grad_norm": 350.780517578125, "learning_rate": 7.675596322605036e-06, "loss": 10.5025, "step": 194120 }, { "epoch": 0.39215488229091333, "grad_norm": 682.9677124023438, "learning_rate": 7.675301433449833e-06, "loss": 23.318, "step": 194130 }, { "epoch": 0.39217508292359715, "grad_norm": 266.6212158203125, "learning_rate": 7.675006531255537e-06, "loss": 19.9699, "step": 194140 }, { "epoch": 0.39219528355628097, "grad_norm": 493.7453308105469, "learning_rate": 7.67471161602358e-06, "loss": 23.1714, "step": 194150 }, { "epoch": 0.3922154841889648, "grad_norm": 237.18321228027344, "learning_rate": 7.674416687755406e-06, "loss": 11.0524, "step": 194160 }, { "epoch": 0.3922356848216486, "grad_norm": 233.67233276367188, "learning_rate": 7.67412174645245e-06, "loss": 26.9773, "step": 194170 }, { "epoch": 0.39225588545433243, "grad_norm": 503.0744934082031, "learning_rate": 7.673826792116146e-06, "loss": 19.6552, "step": 194180 }, { "epoch": 0.39227608608701625, "grad_norm": 1173.34814453125, "learning_rate": 7.673531824747937e-06, "loss": 33.5286, "step": 194190 }, { "epoch": 0.39229628671970007, "grad_norm": 446.452880859375, "learning_rate": 7.673236844349257e-06, "loss": 18.7192, "step": 194200 }, { "epoch": 0.3923164873523839, "grad_norm": 192.9727783203125, "learning_rate": 7.672941850921545e-06, "loss": 17.7934, "step": 194210 }, { "epoch": 0.3923366879850677, "grad_norm": 660.9296264648438, "learning_rate": 7.67264684446624e-06, "loss": 16.9177, "step": 194220 }, { "epoch": 0.39235688861775153, "grad_norm": 330.9119567871094, "learning_rate": 7.672351824984777e-06, "loss": 29.6802, "step": 194230 }, { "epoch": 0.39237708925043535, "grad_norm": 202.8923797607422, "learning_rate": 7.672056792478595e-06, "loss": 11.5582, "step": 194240 }, { "epoch": 0.3923972898831191, "grad_norm": 226.911865234375, "learning_rate": 7.671761746949133e-06, "loss": 27.6348, "step": 194250 }, { "epoch": 0.39241749051580294, "grad_norm": 316.6273498535156, "learning_rate": 7.671466688397828e-06, "loss": 20.0882, "step": 194260 }, { "epoch": 0.39243769114848676, "grad_norm": 129.1420135498047, "learning_rate": 7.671171616826117e-06, "loss": 19.2911, "step": 194270 }, { "epoch": 0.3924578917811706, "grad_norm": 636.3347778320312, "learning_rate": 7.670876532235444e-06, "loss": 16.3546, "step": 194280 }, { "epoch": 0.3924780924138544, "grad_norm": 334.3447265625, "learning_rate": 7.670581434627237e-06, "loss": 45.9473, "step": 194290 }, { "epoch": 0.3924982930465382, "grad_norm": 63.227874755859375, "learning_rate": 7.670286324002943e-06, "loss": 8.8525, "step": 194300 }, { "epoch": 0.39251849367922204, "grad_norm": 351.48858642578125, "learning_rate": 7.669991200363997e-06, "loss": 16.564, "step": 194310 }, { "epoch": 0.39253869431190586, "grad_norm": 22.39614486694336, "learning_rate": 7.669696063711837e-06, "loss": 19.5439, "step": 194320 }, { "epoch": 0.3925588949445897, "grad_norm": 349.4576110839844, "learning_rate": 7.669400914047903e-06, "loss": 17.9836, "step": 194330 }, { "epoch": 0.3925790955772735, "grad_norm": 331.0430603027344, "learning_rate": 7.669105751373633e-06, "loss": 16.4443, "step": 194340 }, { "epoch": 0.3925992962099573, "grad_norm": 414.0130920410156, "learning_rate": 7.668810575690465e-06, "loss": 21.053, "step": 194350 }, { "epoch": 0.39261949684264114, "grad_norm": 350.8014831542969, "learning_rate": 7.668515386999837e-06, "loss": 22.02, "step": 194360 }, { "epoch": 0.3926396974753249, "grad_norm": 351.5557556152344, "learning_rate": 7.66822018530319e-06, "loss": 24.537, "step": 194370 }, { "epoch": 0.3926598981080087, "grad_norm": 233.8751220703125, "learning_rate": 7.667924970601961e-06, "loss": 29.8172, "step": 194380 }, { "epoch": 0.39268009874069254, "grad_norm": 894.9293823242188, "learning_rate": 7.667629742897589e-06, "loss": 30.2181, "step": 194390 }, { "epoch": 0.39270029937337636, "grad_norm": 247.10067749023438, "learning_rate": 7.667334502191514e-06, "loss": 12.0517, "step": 194400 }, { "epoch": 0.3927205000060602, "grad_norm": 409.8434143066406, "learning_rate": 7.667039248485173e-06, "loss": 15.6907, "step": 194410 }, { "epoch": 0.392740700638744, "grad_norm": 364.4486389160156, "learning_rate": 7.666743981780007e-06, "loss": 20.5752, "step": 194420 }, { "epoch": 0.3927609012714278, "grad_norm": 112.05078125, "learning_rate": 7.666448702077454e-06, "loss": 14.4298, "step": 194430 }, { "epoch": 0.39278110190411164, "grad_norm": 246.4239501953125, "learning_rate": 7.666153409378954e-06, "loss": 13.8918, "step": 194440 }, { "epoch": 0.39280130253679546, "grad_norm": 729.3032836914062, "learning_rate": 7.665858103685944e-06, "loss": 22.3902, "step": 194450 }, { "epoch": 0.3928215031694793, "grad_norm": 496.1769714355469, "learning_rate": 7.665562784999865e-06, "loss": 28.3222, "step": 194460 }, { "epoch": 0.3928417038021631, "grad_norm": 784.860595703125, "learning_rate": 7.665267453322158e-06, "loss": 18.3699, "step": 194470 }, { "epoch": 0.3928619044348469, "grad_norm": 225.1392059326172, "learning_rate": 7.664972108654261e-06, "loss": 21.1566, "step": 194480 }, { "epoch": 0.39288210506753074, "grad_norm": 371.5826721191406, "learning_rate": 7.664676750997611e-06, "loss": 18.9349, "step": 194490 }, { "epoch": 0.3929023057002145, "grad_norm": 220.47027587890625, "learning_rate": 7.66438138035365e-06, "loss": 11.1904, "step": 194500 }, { "epoch": 0.3929225063328983, "grad_norm": 239.3990478515625, "learning_rate": 7.664085996723819e-06, "loss": 16.0179, "step": 194510 }, { "epoch": 0.39294270696558214, "grad_norm": 57.55672836303711, "learning_rate": 7.663790600109554e-06, "loss": 32.0592, "step": 194520 }, { "epoch": 0.39296290759826596, "grad_norm": 682.9308471679688, "learning_rate": 7.663495190512297e-06, "loss": 27.1631, "step": 194530 }, { "epoch": 0.3929831082309498, "grad_norm": 966.5193481445312, "learning_rate": 7.663199767933489e-06, "loss": 23.8877, "step": 194540 }, { "epoch": 0.3930033088636336, "grad_norm": 568.3528442382812, "learning_rate": 7.662904332374568e-06, "loss": 32.1116, "step": 194550 }, { "epoch": 0.3930235094963174, "grad_norm": 1156.96875, "learning_rate": 7.662608883836975e-06, "loss": 27.4185, "step": 194560 }, { "epoch": 0.39304371012900124, "grad_norm": 294.32171630859375, "learning_rate": 7.662313422322147e-06, "loss": 19.2916, "step": 194570 }, { "epoch": 0.39306391076168506, "grad_norm": 581.0121459960938, "learning_rate": 7.662017947831528e-06, "loss": 19.6493, "step": 194580 }, { "epoch": 0.3930841113943689, "grad_norm": 762.0816650390625, "learning_rate": 7.661722460366556e-06, "loss": 36.2527, "step": 194590 }, { "epoch": 0.3931043120270527, "grad_norm": 326.9516906738281, "learning_rate": 7.66142695992867e-06, "loss": 15.3531, "step": 194600 }, { "epoch": 0.3931245126597365, "grad_norm": 295.166259765625, "learning_rate": 7.661131446519314e-06, "loss": 24.2526, "step": 194610 }, { "epoch": 0.39314471329242034, "grad_norm": 699.7713623046875, "learning_rate": 7.660835920139926e-06, "loss": 30.8892, "step": 194620 }, { "epoch": 0.3931649139251041, "grad_norm": 365.5238952636719, "learning_rate": 7.660540380791944e-06, "loss": 13.6505, "step": 194630 }, { "epoch": 0.39318511455778793, "grad_norm": 188.6708221435547, "learning_rate": 7.660244828476812e-06, "loss": 16.7058, "step": 194640 }, { "epoch": 0.39320531519047175, "grad_norm": 623.6669311523438, "learning_rate": 7.659949263195971e-06, "loss": 31.2965, "step": 194650 }, { "epoch": 0.39322551582315557, "grad_norm": 505.28240966796875, "learning_rate": 7.659653684950859e-06, "loss": 13.8678, "step": 194660 }, { "epoch": 0.3932457164558394, "grad_norm": 212.126953125, "learning_rate": 7.659358093742917e-06, "loss": 13.8969, "step": 194670 }, { "epoch": 0.3932659170885232, "grad_norm": 942.80322265625, "learning_rate": 7.659062489573585e-06, "loss": 29.1217, "step": 194680 }, { "epoch": 0.39328611772120703, "grad_norm": 313.01116943359375, "learning_rate": 7.658766872444307e-06, "loss": 12.3671, "step": 194690 }, { "epoch": 0.39330631835389085, "grad_norm": 293.50726318359375, "learning_rate": 7.658471242356521e-06, "loss": 15.6545, "step": 194700 }, { "epoch": 0.39332651898657467, "grad_norm": 198.49395751953125, "learning_rate": 7.658175599311667e-06, "loss": 22.4372, "step": 194710 }, { "epoch": 0.3933467196192585, "grad_norm": 486.50640869140625, "learning_rate": 7.65787994331119e-06, "loss": 34.3354, "step": 194720 }, { "epoch": 0.3933669202519423, "grad_norm": 602.3167114257812, "learning_rate": 7.657584274356529e-06, "loss": 26.0346, "step": 194730 }, { "epoch": 0.39338712088462613, "grad_norm": 655.1782836914062, "learning_rate": 7.657288592449124e-06, "loss": 23.3669, "step": 194740 }, { "epoch": 0.39340732151730995, "grad_norm": 334.3829345703125, "learning_rate": 7.656992897590416e-06, "loss": 21.9736, "step": 194750 }, { "epoch": 0.3934275221499937, "grad_norm": 420.82806396484375, "learning_rate": 7.656697189781846e-06, "loss": 14.5847, "step": 194760 }, { "epoch": 0.39344772278267753, "grad_norm": 351.6455078125, "learning_rate": 7.656401469024856e-06, "loss": 17.5412, "step": 194770 }, { "epoch": 0.39346792341536135, "grad_norm": 207.95065307617188, "learning_rate": 7.65610573532089e-06, "loss": 28.8794, "step": 194780 }, { "epoch": 0.3934881240480452, "grad_norm": 291.03179931640625, "learning_rate": 7.655809988671383e-06, "loss": 50.9971, "step": 194790 }, { "epoch": 0.393508324680729, "grad_norm": 432.8161926269531, "learning_rate": 7.655514229077784e-06, "loss": 17.4149, "step": 194800 }, { "epoch": 0.3935285253134128, "grad_norm": 554.6521606445312, "learning_rate": 7.65521845654153e-06, "loss": 12.0385, "step": 194810 }, { "epoch": 0.39354872594609663, "grad_norm": 356.1087341308594, "learning_rate": 7.654922671064062e-06, "loss": 21.1111, "step": 194820 }, { "epoch": 0.39356892657878045, "grad_norm": 248.7077178955078, "learning_rate": 7.654626872646824e-06, "loss": 26.6326, "step": 194830 }, { "epoch": 0.3935891272114643, "grad_norm": 127.09835815429688, "learning_rate": 7.654331061291254e-06, "loss": 40.9527, "step": 194840 }, { "epoch": 0.3936093278441481, "grad_norm": 168.29798889160156, "learning_rate": 7.6540352369988e-06, "loss": 30.0293, "step": 194850 }, { "epoch": 0.3936295284768319, "grad_norm": 252.59275817871094, "learning_rate": 7.653739399770897e-06, "loss": 15.8375, "step": 194860 }, { "epoch": 0.39364972910951573, "grad_norm": 602.8658447265625, "learning_rate": 7.653443549608993e-06, "loss": 37.0763, "step": 194870 }, { "epoch": 0.3936699297421995, "grad_norm": 411.3060302734375, "learning_rate": 7.653147686514523e-06, "loss": 32.9975, "step": 194880 }, { "epoch": 0.3936901303748833, "grad_norm": 624.1795043945312, "learning_rate": 7.652851810488937e-06, "loss": 33.5942, "step": 194890 }, { "epoch": 0.39371033100756714, "grad_norm": 113.71038055419922, "learning_rate": 7.652555921533671e-06, "loss": 9.8372, "step": 194900 }, { "epoch": 0.39373053164025096, "grad_norm": 820.1229858398438, "learning_rate": 7.65226001965017e-06, "loss": 25.7756, "step": 194910 }, { "epoch": 0.3937507322729348, "grad_norm": 383.6437683105469, "learning_rate": 7.651964104839876e-06, "loss": 19.5417, "step": 194920 }, { "epoch": 0.3937709329056186, "grad_norm": 464.3129577636719, "learning_rate": 7.651668177104227e-06, "loss": 15.4653, "step": 194930 }, { "epoch": 0.3937911335383024, "grad_norm": 97.2220230102539, "learning_rate": 7.651372236444673e-06, "loss": 22.6917, "step": 194940 }, { "epoch": 0.39381133417098624, "grad_norm": 459.4883117675781, "learning_rate": 7.65107628286265e-06, "loss": 21.9122, "step": 194950 }, { "epoch": 0.39383153480367006, "grad_norm": 702.5147094726562, "learning_rate": 7.650780316359604e-06, "loss": 19.4244, "step": 194960 }, { "epoch": 0.3938517354363539, "grad_norm": 127.46588897705078, "learning_rate": 7.650484336936976e-06, "loss": 14.7059, "step": 194970 }, { "epoch": 0.3938719360690377, "grad_norm": 164.09481811523438, "learning_rate": 7.650188344596207e-06, "loss": 13.868, "step": 194980 }, { "epoch": 0.3938921367017215, "grad_norm": 575.3648071289062, "learning_rate": 7.649892339338743e-06, "loss": 27.2575, "step": 194990 }, { "epoch": 0.39391233733440534, "grad_norm": 111.28274536132812, "learning_rate": 7.649596321166024e-06, "loss": 15.7311, "step": 195000 }, { "epoch": 0.3939325379670891, "grad_norm": 645.0296630859375, "learning_rate": 7.649300290079497e-06, "loss": 23.9902, "step": 195010 }, { "epoch": 0.3939527385997729, "grad_norm": 1080.9793701171875, "learning_rate": 7.6490042460806e-06, "loss": 31.0383, "step": 195020 }, { "epoch": 0.39397293923245674, "grad_norm": 611.94921875, "learning_rate": 7.648708189170777e-06, "loss": 30.8021, "step": 195030 }, { "epoch": 0.39399313986514056, "grad_norm": 380.3797607421875, "learning_rate": 7.648412119351471e-06, "loss": 21.9114, "step": 195040 }, { "epoch": 0.3940133404978244, "grad_norm": 234.54901123046875, "learning_rate": 7.648116036624125e-06, "loss": 36.2674, "step": 195050 }, { "epoch": 0.3940335411305082, "grad_norm": 327.9686279296875, "learning_rate": 7.647819940990184e-06, "loss": 15.7611, "step": 195060 }, { "epoch": 0.394053741763192, "grad_norm": 187.89361572265625, "learning_rate": 7.647523832451091e-06, "loss": 28.8206, "step": 195070 }, { "epoch": 0.39407394239587584, "grad_norm": 289.9301452636719, "learning_rate": 7.647227711008288e-06, "loss": 24.5769, "step": 195080 }, { "epoch": 0.39409414302855966, "grad_norm": 3.1522254943847656, "learning_rate": 7.646931576663215e-06, "loss": 24.1421, "step": 195090 }, { "epoch": 0.3941143436612435, "grad_norm": 4.271010398864746, "learning_rate": 7.646635429417322e-06, "loss": 12.4696, "step": 195100 }, { "epoch": 0.3941345442939273, "grad_norm": 267.3883056640625, "learning_rate": 7.646339269272045e-06, "loss": 23.5687, "step": 195110 }, { "epoch": 0.3941547449266111, "grad_norm": 338.36083984375, "learning_rate": 7.646043096228835e-06, "loss": 18.5252, "step": 195120 }, { "epoch": 0.39417494555929494, "grad_norm": 473.6835632324219, "learning_rate": 7.645746910289128e-06, "loss": 19.1282, "step": 195130 }, { "epoch": 0.3941951461919787, "grad_norm": 211.53900146484375, "learning_rate": 7.645450711454377e-06, "loss": 15.0999, "step": 195140 }, { "epoch": 0.3942153468246625, "grad_norm": 588.5606079101562, "learning_rate": 7.645154499726017e-06, "loss": 23.0524, "step": 195150 }, { "epoch": 0.39423554745734635, "grad_norm": 350.8065490722656, "learning_rate": 7.644858275105494e-06, "loss": 12.7395, "step": 195160 }, { "epoch": 0.39425574809003017, "grad_norm": 261.5005798339844, "learning_rate": 7.644562037594254e-06, "loss": 16.985, "step": 195170 }, { "epoch": 0.394275948722714, "grad_norm": 599.6815185546875, "learning_rate": 7.644265787193739e-06, "loss": 20.9377, "step": 195180 }, { "epoch": 0.3942961493553978, "grad_norm": 790.2697143554688, "learning_rate": 7.643969523905392e-06, "loss": 23.1945, "step": 195190 }, { "epoch": 0.3943163499880816, "grad_norm": 480.89324951171875, "learning_rate": 7.64367324773066e-06, "loss": 37.3124, "step": 195200 }, { "epoch": 0.39433655062076545, "grad_norm": 286.4555358886719, "learning_rate": 7.643376958670983e-06, "loss": 22.1148, "step": 195210 }, { "epoch": 0.39435675125344927, "grad_norm": 358.4879455566406, "learning_rate": 7.643080656727809e-06, "loss": 19.6391, "step": 195220 }, { "epoch": 0.3943769518861331, "grad_norm": 455.17266845703125, "learning_rate": 7.642784341902581e-06, "loss": 13.485, "step": 195230 }, { "epoch": 0.3943971525188169, "grad_norm": 251.19456481933594, "learning_rate": 7.642488014196742e-06, "loss": 25.8569, "step": 195240 }, { "epoch": 0.3944173531515007, "grad_norm": 478.7985534667969, "learning_rate": 7.642191673611737e-06, "loss": 28.5158, "step": 195250 }, { "epoch": 0.39443755378418455, "grad_norm": 328.29254150390625, "learning_rate": 7.641895320149008e-06, "loss": 21.5013, "step": 195260 }, { "epoch": 0.3944577544168683, "grad_norm": 197.05569458007812, "learning_rate": 7.641598953810006e-06, "loss": 12.8383, "step": 195270 }, { "epoch": 0.39447795504955213, "grad_norm": 427.639892578125, "learning_rate": 7.641302574596168e-06, "loss": 24.9733, "step": 195280 }, { "epoch": 0.39449815568223595, "grad_norm": 310.03375244140625, "learning_rate": 7.64100618250894e-06, "loss": 29.2077, "step": 195290 }, { "epoch": 0.39451835631491977, "grad_norm": 226.11190795898438, "learning_rate": 7.640709777549773e-06, "loss": 40.6232, "step": 195300 }, { "epoch": 0.3945385569476036, "grad_norm": 1317.2608642578125, "learning_rate": 7.640413359720105e-06, "loss": 21.724, "step": 195310 }, { "epoch": 0.3945587575802874, "grad_norm": 172.037841796875, "learning_rate": 7.64011692902138e-06, "loss": 11.937, "step": 195320 }, { "epoch": 0.39457895821297123, "grad_norm": 579.522216796875, "learning_rate": 7.639820485455047e-06, "loss": 20.1632, "step": 195330 }, { "epoch": 0.39459915884565505, "grad_norm": 1046.946533203125, "learning_rate": 7.639524029022552e-06, "loss": 30.2525, "step": 195340 }, { "epoch": 0.39461935947833887, "grad_norm": 661.1083374023438, "learning_rate": 7.639227559725333e-06, "loss": 20.4884, "step": 195350 }, { "epoch": 0.3946395601110227, "grad_norm": 864.996826171875, "learning_rate": 7.63893107756484e-06, "loss": 18.8866, "step": 195360 }, { "epoch": 0.3946597607437065, "grad_norm": 448.4447021484375, "learning_rate": 7.638634582542516e-06, "loss": 16.8178, "step": 195370 }, { "epoch": 0.39467996137639033, "grad_norm": 669.9586791992188, "learning_rate": 7.63833807465981e-06, "loss": 18.6679, "step": 195380 }, { "epoch": 0.39470016200907415, "grad_norm": 263.8163146972656, "learning_rate": 7.638041553918162e-06, "loss": 17.5448, "step": 195390 }, { "epoch": 0.3947203626417579, "grad_norm": 543.4058837890625, "learning_rate": 7.637745020319019e-06, "loss": 10.2975, "step": 195400 }, { "epoch": 0.39474056327444174, "grad_norm": 430.3114318847656, "learning_rate": 7.63744847386383e-06, "loss": 18.436, "step": 195410 }, { "epoch": 0.39476076390712556, "grad_norm": 474.70654296875, "learning_rate": 7.637151914554033e-06, "loss": 24.9709, "step": 195420 }, { "epoch": 0.3947809645398094, "grad_norm": 733.6818237304688, "learning_rate": 7.63685534239108e-06, "loss": 19.4783, "step": 195430 }, { "epoch": 0.3948011651724932, "grad_norm": 208.99607849121094, "learning_rate": 7.636558757376413e-06, "loss": 17.9064, "step": 195440 }, { "epoch": 0.394821365805177, "grad_norm": 785.7476806640625, "learning_rate": 7.636262159511479e-06, "loss": 25.3541, "step": 195450 }, { "epoch": 0.39484156643786084, "grad_norm": 7.1770548820495605, "learning_rate": 7.63596554879772e-06, "loss": 39.068, "step": 195460 }, { "epoch": 0.39486176707054466, "grad_norm": 293.75311279296875, "learning_rate": 7.635668925236588e-06, "loss": 16.6436, "step": 195470 }, { "epoch": 0.3948819677032285, "grad_norm": 455.05364990234375, "learning_rate": 7.635372288829524e-06, "loss": 29.1417, "step": 195480 }, { "epoch": 0.3949021683359123, "grad_norm": 664.2986450195312, "learning_rate": 7.635075639577976e-06, "loss": 17.6787, "step": 195490 }, { "epoch": 0.3949223689685961, "grad_norm": 1943.1253662109375, "learning_rate": 7.634778977483389e-06, "loss": 43.1422, "step": 195500 }, { "epoch": 0.39494256960127994, "grad_norm": 351.3155212402344, "learning_rate": 7.634482302547208e-06, "loss": 28.7181, "step": 195510 }, { "epoch": 0.3949627702339637, "grad_norm": 181.8381805419922, "learning_rate": 7.63418561477088e-06, "loss": 20.9259, "step": 195520 }, { "epoch": 0.3949829708666475, "grad_norm": 194.98898315429688, "learning_rate": 7.63388891415585e-06, "loss": 17.6536, "step": 195530 }, { "epoch": 0.39500317149933134, "grad_norm": 308.9329833984375, "learning_rate": 7.633592200703566e-06, "loss": 25.9925, "step": 195540 }, { "epoch": 0.39502337213201516, "grad_norm": 702.1701049804688, "learning_rate": 7.633295474415473e-06, "loss": 22.3518, "step": 195550 }, { "epoch": 0.395043572764699, "grad_norm": 698.7434692382812, "learning_rate": 7.632998735293016e-06, "loss": 20.4989, "step": 195560 }, { "epoch": 0.3950637733973828, "grad_norm": 549.2517700195312, "learning_rate": 7.632701983337645e-06, "loss": 47.9753, "step": 195570 }, { "epoch": 0.3950839740300666, "grad_norm": 686.4854125976562, "learning_rate": 7.632405218550801e-06, "loss": 21.3223, "step": 195580 }, { "epoch": 0.39510417466275044, "grad_norm": 73.8600082397461, "learning_rate": 7.632108440933934e-06, "loss": 14.1037, "step": 195590 }, { "epoch": 0.39512437529543426, "grad_norm": 224.45777893066406, "learning_rate": 7.63181165048849e-06, "loss": 13.8222, "step": 195600 }, { "epoch": 0.3951445759281181, "grad_norm": 287.5701904296875, "learning_rate": 7.631514847215914e-06, "loss": 33.526, "step": 195610 }, { "epoch": 0.3951647765608019, "grad_norm": 0.0, "learning_rate": 7.631218031117658e-06, "loss": 19.2528, "step": 195620 }, { "epoch": 0.3951849771934857, "grad_norm": 795.55029296875, "learning_rate": 7.630921202195161e-06, "loss": 24.2244, "step": 195630 }, { "epoch": 0.39520517782616954, "grad_norm": 187.6152801513672, "learning_rate": 7.630624360449875e-06, "loss": 13.1685, "step": 195640 }, { "epoch": 0.3952253784588533, "grad_norm": 337.955078125, "learning_rate": 7.630327505883243e-06, "loss": 12.6366, "step": 195650 }, { "epoch": 0.3952455790915371, "grad_norm": 153.82737731933594, "learning_rate": 7.630030638496714e-06, "loss": 17.0972, "step": 195660 }, { "epoch": 0.39526577972422094, "grad_norm": 423.9061584472656, "learning_rate": 7.629733758291736e-06, "loss": 24.7043, "step": 195670 }, { "epoch": 0.39528598035690476, "grad_norm": 843.265869140625, "learning_rate": 7.629436865269753e-06, "loss": 32.2621, "step": 195680 }, { "epoch": 0.3953061809895886, "grad_norm": 294.4344482421875, "learning_rate": 7.629139959432215e-06, "loss": 16.6348, "step": 195690 }, { "epoch": 0.3953263816222724, "grad_norm": 165.56494140625, "learning_rate": 7.628843040780567e-06, "loss": 20.1531, "step": 195700 }, { "epoch": 0.3953465822549562, "grad_norm": 106.0355453491211, "learning_rate": 7.628546109316257e-06, "loss": 17.0529, "step": 195710 }, { "epoch": 0.39536678288764004, "grad_norm": 368.1607666015625, "learning_rate": 7.628249165040731e-06, "loss": 19.9909, "step": 195720 }, { "epoch": 0.39538698352032386, "grad_norm": 425.4073486328125, "learning_rate": 7.627952207955439e-06, "loss": 24.6675, "step": 195730 }, { "epoch": 0.3954071841530077, "grad_norm": 375.9248962402344, "learning_rate": 7.627655238061825e-06, "loss": 34.5446, "step": 195740 }, { "epoch": 0.3954273847856915, "grad_norm": 900.6353759765625, "learning_rate": 7.627358255361339e-06, "loss": 18.2016, "step": 195750 }, { "epoch": 0.3954475854183753, "grad_norm": 351.25244140625, "learning_rate": 7.627061259855428e-06, "loss": 8.3365, "step": 195760 }, { "epoch": 0.39546778605105914, "grad_norm": 367.19952392578125, "learning_rate": 7.626764251545539e-06, "loss": 21.8847, "step": 195770 }, { "epoch": 0.3954879866837429, "grad_norm": 552.2587890625, "learning_rate": 7.62646723043312e-06, "loss": 23.0697, "step": 195780 }, { "epoch": 0.39550818731642673, "grad_norm": 345.9349060058594, "learning_rate": 7.626170196519618e-06, "loss": 29.2348, "step": 195790 }, { "epoch": 0.39552838794911055, "grad_norm": 613.0227661132812, "learning_rate": 7.6258731498064796e-06, "loss": 44.9344, "step": 195800 }, { "epoch": 0.39554858858179437, "grad_norm": 109.75715637207031, "learning_rate": 7.625576090295155e-06, "loss": 27.0129, "step": 195810 }, { "epoch": 0.3955687892144782, "grad_norm": 366.6320495605469, "learning_rate": 7.625279017987091e-06, "loss": 17.0775, "step": 195820 }, { "epoch": 0.395588989847162, "grad_norm": 280.7791748046875, "learning_rate": 7.624981932883735e-06, "loss": 15.4656, "step": 195830 }, { "epoch": 0.39560919047984583, "grad_norm": 61.66830062866211, "learning_rate": 7.624684834986536e-06, "loss": 15.0344, "step": 195840 }, { "epoch": 0.39562939111252965, "grad_norm": 108.14071655273438, "learning_rate": 7.624387724296941e-06, "loss": 16.5607, "step": 195850 }, { "epoch": 0.39564959174521347, "grad_norm": 224.29344177246094, "learning_rate": 7.6240906008163985e-06, "loss": 25.5896, "step": 195860 }, { "epoch": 0.3956697923778973, "grad_norm": 128.72021484375, "learning_rate": 7.623793464546359e-06, "loss": 40.8091, "step": 195870 }, { "epoch": 0.3956899930105811, "grad_norm": 780.8737182617188, "learning_rate": 7.623496315488264e-06, "loss": 35.4666, "step": 195880 }, { "epoch": 0.39571019364326493, "grad_norm": 361.8275146484375, "learning_rate": 7.623199153643569e-06, "loss": 12.9063, "step": 195890 }, { "epoch": 0.39573039427594875, "grad_norm": 228.5116729736328, "learning_rate": 7.622901979013717e-06, "loss": 15.9548, "step": 195900 }, { "epoch": 0.3957505949086325, "grad_norm": 378.7085266113281, "learning_rate": 7.6226047916001624e-06, "loss": 18.3263, "step": 195910 }, { "epoch": 0.39577079554131633, "grad_norm": 566.7664794921875, "learning_rate": 7.622307591404347e-06, "loss": 25.1457, "step": 195920 }, { "epoch": 0.39579099617400015, "grad_norm": 454.21746826171875, "learning_rate": 7.622010378427725e-06, "loss": 28.153, "step": 195930 }, { "epoch": 0.395811196806684, "grad_norm": 268.2760009765625, "learning_rate": 7.621713152671742e-06, "loss": 13.4463, "step": 195940 }, { "epoch": 0.3958313974393678, "grad_norm": 234.8856964111328, "learning_rate": 7.6214159141378465e-06, "loss": 26.7978, "step": 195950 }, { "epoch": 0.3958515980720516, "grad_norm": 562.821044921875, "learning_rate": 7.621118662827487e-06, "loss": 13.562, "step": 195960 }, { "epoch": 0.39587179870473543, "grad_norm": 353.9815368652344, "learning_rate": 7.620821398742114e-06, "loss": 10.7441, "step": 195970 }, { "epoch": 0.39589199933741925, "grad_norm": 1172.15966796875, "learning_rate": 7.620524121883175e-06, "loss": 16.3424, "step": 195980 }, { "epoch": 0.3959121999701031, "grad_norm": 326.10137939453125, "learning_rate": 7.62022683225212e-06, "loss": 20.9486, "step": 195990 }, { "epoch": 0.3959324006027869, "grad_norm": 10.76241397857666, "learning_rate": 7.619929529850397e-06, "loss": 26.3895, "step": 196000 }, { "epoch": 0.3959526012354707, "grad_norm": 661.6845703125, "learning_rate": 7.6196322146794534e-06, "loss": 11.5033, "step": 196010 }, { "epoch": 0.39597280186815453, "grad_norm": 655.6265258789062, "learning_rate": 7.619334886740744e-06, "loss": 14.49, "step": 196020 }, { "epoch": 0.39599300250083835, "grad_norm": 820.6245727539062, "learning_rate": 7.61903754603571e-06, "loss": 25.1901, "step": 196030 }, { "epoch": 0.3960132031335221, "grad_norm": 353.4188537597656, "learning_rate": 7.618740192565806e-06, "loss": 23.5844, "step": 196040 }, { "epoch": 0.39603340376620594, "grad_norm": 731.4053344726562, "learning_rate": 7.6184428263324815e-06, "loss": 38.6164, "step": 196050 }, { "epoch": 0.39605360439888976, "grad_norm": 323.4394226074219, "learning_rate": 7.618145447337182e-06, "loss": 16.8984, "step": 196060 }, { "epoch": 0.3960738050315736, "grad_norm": 418.0279846191406, "learning_rate": 7.617848055581361e-06, "loss": 19.5996, "step": 196070 }, { "epoch": 0.3960940056642574, "grad_norm": 267.29449462890625, "learning_rate": 7.6175506510664645e-06, "loss": 12.705, "step": 196080 }, { "epoch": 0.3961142062969412, "grad_norm": 327.3512878417969, "learning_rate": 7.617253233793944e-06, "loss": 12.123, "step": 196090 }, { "epoch": 0.39613440692962504, "grad_norm": 512.3671875, "learning_rate": 7.616955803765249e-06, "loss": 23.7502, "step": 196100 }, { "epoch": 0.39615460756230886, "grad_norm": 817.8502807617188, "learning_rate": 7.616658360981828e-06, "loss": 19.767, "step": 196110 }, { "epoch": 0.3961748081949927, "grad_norm": 197.24513244628906, "learning_rate": 7.616360905445132e-06, "loss": 36.9933, "step": 196120 }, { "epoch": 0.3961950088276765, "grad_norm": 875.3771362304688, "learning_rate": 7.616063437156611e-06, "loss": 39.9314, "step": 196130 }, { "epoch": 0.3962152094603603, "grad_norm": 288.16058349609375, "learning_rate": 7.615765956117714e-06, "loss": 10.6367, "step": 196140 }, { "epoch": 0.39623541009304414, "grad_norm": 545.18603515625, "learning_rate": 7.61546846232989e-06, "loss": 21.7529, "step": 196150 }, { "epoch": 0.3962556107257279, "grad_norm": 80.21540069580078, "learning_rate": 7.615170955794592e-06, "loss": 38.5269, "step": 196160 }, { "epoch": 0.3962758113584117, "grad_norm": 321.4330749511719, "learning_rate": 7.614873436513265e-06, "loss": 15.3385, "step": 196170 }, { "epoch": 0.39629601199109554, "grad_norm": 392.32110595703125, "learning_rate": 7.614575904487365e-06, "loss": 22.9912, "step": 196180 }, { "epoch": 0.39631621262377936, "grad_norm": 468.4725646972656, "learning_rate": 7.6142783597183365e-06, "loss": 21.5545, "step": 196190 }, { "epoch": 0.3963364132564632, "grad_norm": 502.376708984375, "learning_rate": 7.613980802207633e-06, "loss": 18.922, "step": 196200 }, { "epoch": 0.396356613889147, "grad_norm": 578.26123046875, "learning_rate": 7.613683231956705e-06, "loss": 25.8956, "step": 196210 }, { "epoch": 0.3963768145218308, "grad_norm": 328.1247863769531, "learning_rate": 7.613385648967002e-06, "loss": 18.3482, "step": 196220 }, { "epoch": 0.39639701515451464, "grad_norm": 365.4832763671875, "learning_rate": 7.613088053239974e-06, "loss": 12.2532, "step": 196230 }, { "epoch": 0.39641721578719846, "grad_norm": 146.03553771972656, "learning_rate": 7.612790444777072e-06, "loss": 26.2427, "step": 196240 }, { "epoch": 0.3964374164198823, "grad_norm": 614.1575927734375, "learning_rate": 7.612492823579744e-06, "loss": 19.1647, "step": 196250 }, { "epoch": 0.3964576170525661, "grad_norm": 662.9046630859375, "learning_rate": 7.612195189649445e-06, "loss": 36.0849, "step": 196260 }, { "epoch": 0.3964778176852499, "grad_norm": 194.89370727539062, "learning_rate": 7.611897542987623e-06, "loss": 15.8646, "step": 196270 }, { "epoch": 0.39649801831793374, "grad_norm": 401.27874755859375, "learning_rate": 7.611599883595731e-06, "loss": 11.8066, "step": 196280 }, { "epoch": 0.3965182189506175, "grad_norm": 207.72634887695312, "learning_rate": 7.611302211475216e-06, "loss": 19.1647, "step": 196290 }, { "epoch": 0.3965384195833013, "grad_norm": 471.66790771484375, "learning_rate": 7.6110045266275305e-06, "loss": 15.0866, "step": 196300 }, { "epoch": 0.39655862021598515, "grad_norm": 246.96693420410156, "learning_rate": 7.610706829054126e-06, "loss": 27.5003, "step": 196310 }, { "epoch": 0.39657882084866897, "grad_norm": 401.9541320800781, "learning_rate": 7.610409118756454e-06, "loss": 14.7462, "step": 196320 }, { "epoch": 0.3965990214813528, "grad_norm": 593.8477783203125, "learning_rate": 7.610111395735962e-06, "loss": 37.6239, "step": 196330 }, { "epoch": 0.3966192221140366, "grad_norm": 453.9722900390625, "learning_rate": 7.609813659994107e-06, "loss": 14.7958, "step": 196340 }, { "epoch": 0.3966394227467204, "grad_norm": 587.7510986328125, "learning_rate": 7.6095159115323335e-06, "loss": 20.1343, "step": 196350 }, { "epoch": 0.39665962337940425, "grad_norm": 504.5757751464844, "learning_rate": 7.609218150352098e-06, "loss": 27.4714, "step": 196360 }, { "epoch": 0.39667982401208807, "grad_norm": 404.4772644042969, "learning_rate": 7.608920376454849e-06, "loss": 20.5916, "step": 196370 }, { "epoch": 0.3967000246447719, "grad_norm": 323.6693115234375, "learning_rate": 7.608622589842039e-06, "loss": 16.1455, "step": 196380 }, { "epoch": 0.3967202252774557, "grad_norm": 316.3536682128906, "learning_rate": 7.608324790515119e-06, "loss": 18.9734, "step": 196390 }, { "epoch": 0.3967404259101395, "grad_norm": 565.381591796875, "learning_rate": 7.6080269784755405e-06, "loss": 29.0074, "step": 196400 }, { "epoch": 0.39676062654282335, "grad_norm": 323.6951599121094, "learning_rate": 7.607729153724755e-06, "loss": 17.4683, "step": 196410 }, { "epoch": 0.3967808271755071, "grad_norm": 386.5226135253906, "learning_rate": 7.607431316264211e-06, "loss": 13.6444, "step": 196420 }, { "epoch": 0.39680102780819093, "grad_norm": 136.9131317138672, "learning_rate": 7.607133466095365e-06, "loss": 11.536, "step": 196430 }, { "epoch": 0.39682122844087475, "grad_norm": 366.0284423828125, "learning_rate": 7.606835603219666e-06, "loss": 17.8898, "step": 196440 }, { "epoch": 0.39684142907355857, "grad_norm": 434.31982421875, "learning_rate": 7.60653772763857e-06, "loss": 16.1737, "step": 196450 }, { "epoch": 0.3968616297062424, "grad_norm": 632.747802734375, "learning_rate": 7.606239839353522e-06, "loss": 18.743, "step": 196460 }, { "epoch": 0.3968818303389262, "grad_norm": 513.1290283203125, "learning_rate": 7.605941938365977e-06, "loss": 14.2984, "step": 196470 }, { "epoch": 0.39690203097161003, "grad_norm": 1058.4290771484375, "learning_rate": 7.6056440246773884e-06, "loss": 25.4774, "step": 196480 }, { "epoch": 0.39692223160429385, "grad_norm": 674.5564575195312, "learning_rate": 7.605346098289206e-06, "loss": 17.9885, "step": 196490 }, { "epoch": 0.39694243223697767, "grad_norm": 570.1978149414062, "learning_rate": 7.605048159202884e-06, "loss": 24.6575, "step": 196500 }, { "epoch": 0.3969626328696615, "grad_norm": 621.2515258789062, "learning_rate": 7.60475020741987e-06, "loss": 31.1932, "step": 196510 }, { "epoch": 0.3969828335023453, "grad_norm": 641.5540161132812, "learning_rate": 7.604452242941622e-06, "loss": 21.09, "step": 196520 }, { "epoch": 0.39700303413502913, "grad_norm": 514.0419921875, "learning_rate": 7.60415426576959e-06, "loss": 21.2872, "step": 196530 }, { "epoch": 0.39702323476771295, "grad_norm": 267.4769287109375, "learning_rate": 7.603856275905223e-06, "loss": 16.4014, "step": 196540 }, { "epoch": 0.3970434354003967, "grad_norm": 793.6900634765625, "learning_rate": 7.6035582733499805e-06, "loss": 27.5892, "step": 196550 }, { "epoch": 0.39706363603308054, "grad_norm": 972.1981811523438, "learning_rate": 7.6032602581053075e-06, "loss": 17.3954, "step": 196560 }, { "epoch": 0.39708383666576436, "grad_norm": 312.2232360839844, "learning_rate": 7.602962230172661e-06, "loss": 13.5148, "step": 196570 }, { "epoch": 0.3971040372984482, "grad_norm": 254.5087127685547, "learning_rate": 7.6026641895534925e-06, "loss": 22.2328, "step": 196580 }, { "epoch": 0.397124237931132, "grad_norm": 351.739501953125, "learning_rate": 7.602366136249254e-06, "loss": 13.8995, "step": 196590 }, { "epoch": 0.3971444385638158, "grad_norm": 587.6464233398438, "learning_rate": 7.6020680702613995e-06, "loss": 21.4347, "step": 196600 }, { "epoch": 0.39716463919649964, "grad_norm": 348.7734680175781, "learning_rate": 7.60176999159138e-06, "loss": 21.451, "step": 196610 }, { "epoch": 0.39718483982918346, "grad_norm": 483.9173583984375, "learning_rate": 7.601471900240648e-06, "loss": 21.8984, "step": 196620 }, { "epoch": 0.3972050404618673, "grad_norm": 347.1903076171875, "learning_rate": 7.601173796210659e-06, "loss": 21.8069, "step": 196630 }, { "epoch": 0.3972252410945511, "grad_norm": 169.56793212890625, "learning_rate": 7.600875679502864e-06, "loss": 17.1872, "step": 196640 }, { "epoch": 0.3972454417272349, "grad_norm": 252.4035186767578, "learning_rate": 7.6005775501187165e-06, "loss": 19.6626, "step": 196650 }, { "epoch": 0.39726564235991874, "grad_norm": 198.06736755371094, "learning_rate": 7.60027940805967e-06, "loss": 13.199, "step": 196660 }, { "epoch": 0.39728584299260256, "grad_norm": 1069.942138671875, "learning_rate": 7.5999812533271755e-06, "loss": 15.3515, "step": 196670 }, { "epoch": 0.3973060436252863, "grad_norm": 282.4885559082031, "learning_rate": 7.599683085922689e-06, "loss": 21.1991, "step": 196680 }, { "epoch": 0.39732624425797014, "grad_norm": 131.6488037109375, "learning_rate": 7.599384905847662e-06, "loss": 23.1902, "step": 196690 }, { "epoch": 0.39734644489065396, "grad_norm": 367.5249938964844, "learning_rate": 7.5990867131035474e-06, "loss": 12.6256, "step": 196700 }, { "epoch": 0.3973666455233378, "grad_norm": 1070.554931640625, "learning_rate": 7.598788507691801e-06, "loss": 22.2413, "step": 196710 }, { "epoch": 0.3973868461560216, "grad_norm": 386.4486999511719, "learning_rate": 7.5984902896138736e-06, "loss": 32.7206, "step": 196720 }, { "epoch": 0.3974070467887054, "grad_norm": 75.12928009033203, "learning_rate": 7.598192058871221e-06, "loss": 25.8482, "step": 196730 }, { "epoch": 0.39742724742138924, "grad_norm": 198.99949645996094, "learning_rate": 7.597893815465294e-06, "loss": 9.4323, "step": 196740 }, { "epoch": 0.39744744805407306, "grad_norm": 417.2445983886719, "learning_rate": 7.597595559397548e-06, "loss": 18.515, "step": 196750 }, { "epoch": 0.3974676486867569, "grad_norm": 304.7272644042969, "learning_rate": 7.597297290669437e-06, "loss": 13.3798, "step": 196760 }, { "epoch": 0.3974878493194407, "grad_norm": 576.5830688476562, "learning_rate": 7.596999009282413e-06, "loss": 20.7862, "step": 196770 }, { "epoch": 0.3975080499521245, "grad_norm": 580.6087036132812, "learning_rate": 7.5967007152379305e-06, "loss": 29.0417, "step": 196780 }, { "epoch": 0.39752825058480834, "grad_norm": 351.1151428222656, "learning_rate": 7.596402408537444e-06, "loss": 23.2411, "step": 196790 }, { "epoch": 0.3975484512174921, "grad_norm": 889.0579833984375, "learning_rate": 7.596104089182408e-06, "loss": 26.9643, "step": 196800 }, { "epoch": 0.3975686518501759, "grad_norm": 710.7026977539062, "learning_rate": 7.595805757174275e-06, "loss": 22.4164, "step": 196810 }, { "epoch": 0.39758885248285974, "grad_norm": 351.8357238769531, "learning_rate": 7.5955074125145e-06, "loss": 20.6835, "step": 196820 }, { "epoch": 0.39760905311554356, "grad_norm": 509.480224609375, "learning_rate": 7.595209055204534e-06, "loss": 13.3233, "step": 196830 }, { "epoch": 0.3976292537482274, "grad_norm": 145.74423217773438, "learning_rate": 7.594910685245837e-06, "loss": 25.3805, "step": 196840 }, { "epoch": 0.3976494543809112, "grad_norm": 207.77796936035156, "learning_rate": 7.594612302639859e-06, "loss": 23.9481, "step": 196850 }, { "epoch": 0.397669655013595, "grad_norm": 375.04638671875, "learning_rate": 7.5943139073880555e-06, "loss": 19.9612, "step": 196860 }, { "epoch": 0.39768985564627884, "grad_norm": 433.2248229980469, "learning_rate": 7.5940154994918806e-06, "loss": 15.8358, "step": 196870 }, { "epoch": 0.39771005627896266, "grad_norm": 187.95147705078125, "learning_rate": 7.593717078952788e-06, "loss": 24.1666, "step": 196880 }, { "epoch": 0.3977302569116465, "grad_norm": 376.43994140625, "learning_rate": 7.593418645772235e-06, "loss": 28.1238, "step": 196890 }, { "epoch": 0.3977504575443303, "grad_norm": 634.16455078125, "learning_rate": 7.5931201999516715e-06, "loss": 25.7747, "step": 196900 }, { "epoch": 0.3977706581770141, "grad_norm": 931.7929077148438, "learning_rate": 7.592821741492555e-06, "loss": 22.2204, "step": 196910 }, { "epoch": 0.39779085880969794, "grad_norm": 567.7821044921875, "learning_rate": 7.592523270396342e-06, "loss": 60.1065, "step": 196920 }, { "epoch": 0.3978110594423817, "grad_norm": 189.20652770996094, "learning_rate": 7.592224786664484e-06, "loss": 19.5442, "step": 196930 }, { "epoch": 0.39783126007506553, "grad_norm": 277.2565002441406, "learning_rate": 7.591926290298435e-06, "loss": 23.0767, "step": 196940 }, { "epoch": 0.39785146070774935, "grad_norm": 260.0182800292969, "learning_rate": 7.591627781299654e-06, "loss": 17.1038, "step": 196950 }, { "epoch": 0.39787166134043317, "grad_norm": 338.0267333984375, "learning_rate": 7.5913292596695906e-06, "loss": 17.5577, "step": 196960 }, { "epoch": 0.397891861973117, "grad_norm": 584.3551635742188, "learning_rate": 7.5910307254097075e-06, "loss": 10.1535, "step": 196970 }, { "epoch": 0.3979120626058008, "grad_norm": 676.46630859375, "learning_rate": 7.590732178521451e-06, "loss": 18.7005, "step": 196980 }, { "epoch": 0.39793226323848463, "grad_norm": 457.1989440917969, "learning_rate": 7.590433619006281e-06, "loss": 21.0176, "step": 196990 }, { "epoch": 0.39795246387116845, "grad_norm": 198.6430206298828, "learning_rate": 7.590135046865652e-06, "loss": 18.1502, "step": 197000 }, { "epoch": 0.39797266450385227, "grad_norm": 607.3787841796875, "learning_rate": 7.589836462101019e-06, "loss": 18.3786, "step": 197010 }, { "epoch": 0.3979928651365361, "grad_norm": 383.4787292480469, "learning_rate": 7.589537864713836e-06, "loss": 21.7777, "step": 197020 }, { "epoch": 0.3980130657692199, "grad_norm": 258.12158203125, "learning_rate": 7.58923925470556e-06, "loss": 12.7004, "step": 197030 }, { "epoch": 0.39803326640190373, "grad_norm": 149.9590606689453, "learning_rate": 7.588940632077647e-06, "loss": 16.3806, "step": 197040 }, { "epoch": 0.39805346703458755, "grad_norm": 42.28246307373047, "learning_rate": 7.588641996831551e-06, "loss": 16.3697, "step": 197050 }, { "epoch": 0.3980736676672713, "grad_norm": 605.8641357421875, "learning_rate": 7.588343348968728e-06, "loss": 22.4684, "step": 197060 }, { "epoch": 0.39809386829995513, "grad_norm": 631.8109130859375, "learning_rate": 7.588044688490633e-06, "loss": 19.5397, "step": 197070 }, { "epoch": 0.39811406893263895, "grad_norm": 980.2217407226562, "learning_rate": 7.587746015398723e-06, "loss": 42.9501, "step": 197080 }, { "epoch": 0.3981342695653228, "grad_norm": 256.7890319824219, "learning_rate": 7.587447329694451e-06, "loss": 12.6726, "step": 197090 }, { "epoch": 0.3981544701980066, "grad_norm": 273.36187744140625, "learning_rate": 7.587148631379276e-06, "loss": 29.7439, "step": 197100 }, { "epoch": 0.3981746708306904, "grad_norm": 142.3683319091797, "learning_rate": 7.586849920454652e-06, "loss": 18.1001, "step": 197110 }, { "epoch": 0.39819487146337423, "grad_norm": 90.94284057617188, "learning_rate": 7.586551196922034e-06, "loss": 21.5349, "step": 197120 }, { "epoch": 0.39821507209605805, "grad_norm": 1.5760222673416138, "learning_rate": 7.586252460782882e-06, "loss": 14.9537, "step": 197130 }, { "epoch": 0.3982352727287419, "grad_norm": 346.44256591796875, "learning_rate": 7.585953712038646e-06, "loss": 9.2722, "step": 197140 }, { "epoch": 0.3982554733614257, "grad_norm": 1218.5108642578125, "learning_rate": 7.585654950690786e-06, "loss": 35.6418, "step": 197150 }, { "epoch": 0.3982756739941095, "grad_norm": 427.25762939453125, "learning_rate": 7.585356176740759e-06, "loss": 17.8915, "step": 197160 }, { "epoch": 0.39829587462679333, "grad_norm": 426.3423156738281, "learning_rate": 7.5850573901900185e-06, "loss": 19.4384, "step": 197170 }, { "epoch": 0.39831607525947715, "grad_norm": 478.8867492675781, "learning_rate": 7.584758591040022e-06, "loss": 26.0589, "step": 197180 }, { "epoch": 0.3983362758921609, "grad_norm": 616.3558959960938, "learning_rate": 7.584459779292226e-06, "loss": 22.0291, "step": 197190 }, { "epoch": 0.39835647652484474, "grad_norm": 205.8076934814453, "learning_rate": 7.5841609549480854e-06, "loss": 29.4691, "step": 197200 }, { "epoch": 0.39837667715752856, "grad_norm": 312.9051513671875, "learning_rate": 7.583862118009058e-06, "loss": 28.2658, "step": 197210 }, { "epoch": 0.3983968777902124, "grad_norm": 771.70703125, "learning_rate": 7.583563268476602e-06, "loss": 24.9648, "step": 197220 }, { "epoch": 0.3984170784228962, "grad_norm": 231.82586669921875, "learning_rate": 7.583264406352169e-06, "loss": 21.6845, "step": 197230 }, { "epoch": 0.39843727905558, "grad_norm": 693.255615234375, "learning_rate": 7.582965531637221e-06, "loss": 16.0977, "step": 197240 }, { "epoch": 0.39845747968826384, "grad_norm": 1050.954345703125, "learning_rate": 7.58266664433321e-06, "loss": 31.7357, "step": 197250 }, { "epoch": 0.39847768032094766, "grad_norm": 264.2238464355469, "learning_rate": 7.582367744441597e-06, "loss": 17.3054, "step": 197260 }, { "epoch": 0.3984978809536315, "grad_norm": 29.328338623046875, "learning_rate": 7.582068831963836e-06, "loss": 16.1974, "step": 197270 }, { "epoch": 0.3985180815863153, "grad_norm": 90.00679016113281, "learning_rate": 7.5817699069013835e-06, "loss": 18.1018, "step": 197280 }, { "epoch": 0.3985382822189991, "grad_norm": 230.550537109375, "learning_rate": 7.5814709692557e-06, "loss": 23.5122, "step": 197290 }, { "epoch": 0.39855848285168294, "grad_norm": 338.776123046875, "learning_rate": 7.581172019028238e-06, "loss": 10.5504, "step": 197300 }, { "epoch": 0.39857868348436676, "grad_norm": 313.533447265625, "learning_rate": 7.580873056220458e-06, "loss": 27.3461, "step": 197310 }, { "epoch": 0.3985988841170505, "grad_norm": 162.95481872558594, "learning_rate": 7.580574080833816e-06, "loss": 14.9247, "step": 197320 }, { "epoch": 0.39861908474973434, "grad_norm": 59.26035690307617, "learning_rate": 7.580275092869766e-06, "loss": 8.7449, "step": 197330 }, { "epoch": 0.39863928538241816, "grad_norm": 303.17474365234375, "learning_rate": 7.579976092329772e-06, "loss": 19.3581, "step": 197340 }, { "epoch": 0.398659486015102, "grad_norm": 45.15639114379883, "learning_rate": 7.579677079215286e-06, "loss": 21.2257, "step": 197350 }, { "epoch": 0.3986796866477858, "grad_norm": 364.3130798339844, "learning_rate": 7.5793780535277665e-06, "loss": 16.8157, "step": 197360 }, { "epoch": 0.3986998872804696, "grad_norm": 113.99015045166016, "learning_rate": 7.579079015268671e-06, "loss": 16.8319, "step": 197370 }, { "epoch": 0.39872008791315344, "grad_norm": 816.494384765625, "learning_rate": 7.5787799644394576e-06, "loss": 32.5022, "step": 197380 }, { "epoch": 0.39874028854583726, "grad_norm": 11.595226287841797, "learning_rate": 7.578480901041583e-06, "loss": 15.6218, "step": 197390 }, { "epoch": 0.3987604891785211, "grad_norm": 329.2650146484375, "learning_rate": 7.578181825076506e-06, "loss": 17.355, "step": 197400 }, { "epoch": 0.3987806898112049, "grad_norm": 415.5538330078125, "learning_rate": 7.577882736545683e-06, "loss": 16.8613, "step": 197410 }, { "epoch": 0.3988008904438887, "grad_norm": 156.80242919921875, "learning_rate": 7.577583635450572e-06, "loss": 39.1839, "step": 197420 }, { "epoch": 0.39882109107657254, "grad_norm": 392.8519592285156, "learning_rate": 7.577284521792632e-06, "loss": 39.5167, "step": 197430 }, { "epoch": 0.3988412917092563, "grad_norm": 160.69647216796875, "learning_rate": 7.576985395573318e-06, "loss": 18.4188, "step": 197440 }, { "epoch": 0.3988614923419401, "grad_norm": 66.4518814086914, "learning_rate": 7.576686256794092e-06, "loss": 20.8966, "step": 197450 }, { "epoch": 0.39888169297462395, "grad_norm": 1258.77783203125, "learning_rate": 7.576387105456408e-06, "loss": 22.6945, "step": 197460 }, { "epoch": 0.39890189360730777, "grad_norm": 210.94589233398438, "learning_rate": 7.576087941561725e-06, "loss": 14.4726, "step": 197470 }, { "epoch": 0.3989220942399916, "grad_norm": 321.79595947265625, "learning_rate": 7.575788765111504e-06, "loss": 20.7766, "step": 197480 }, { "epoch": 0.3989422948726754, "grad_norm": 377.4902038574219, "learning_rate": 7.5754895761072e-06, "loss": 20.7367, "step": 197490 }, { "epoch": 0.3989624955053592, "grad_norm": 3.9381821155548096, "learning_rate": 7.575190374550272e-06, "loss": 20.1107, "step": 197500 }, { "epoch": 0.39898269613804305, "grad_norm": 1592.7799072265625, "learning_rate": 7.574891160442179e-06, "loss": 36.0749, "step": 197510 }, { "epoch": 0.39900289677072687, "grad_norm": 616.9158325195312, "learning_rate": 7.574591933784378e-06, "loss": 27.1391, "step": 197520 }, { "epoch": 0.3990230974034107, "grad_norm": 507.11236572265625, "learning_rate": 7.574292694578329e-06, "loss": 15.9596, "step": 197530 }, { "epoch": 0.3990432980360945, "grad_norm": 16.916385650634766, "learning_rate": 7.573993442825489e-06, "loss": 15.5859, "step": 197540 }, { "epoch": 0.3990634986687783, "grad_norm": 271.2107849121094, "learning_rate": 7.573694178527317e-06, "loss": 12.2203, "step": 197550 }, { "epoch": 0.39908369930146215, "grad_norm": 187.93568420410156, "learning_rate": 7.573394901685271e-06, "loss": 10.1335, "step": 197560 }, { "epoch": 0.3991038999341459, "grad_norm": 298.5325012207031, "learning_rate": 7.573095612300813e-06, "loss": 30.5151, "step": 197570 }, { "epoch": 0.39912410056682973, "grad_norm": 109.72039031982422, "learning_rate": 7.572796310375397e-06, "loss": 27.6701, "step": 197580 }, { "epoch": 0.39914430119951355, "grad_norm": 23.563430786132812, "learning_rate": 7.5724969959104835e-06, "loss": 13.946, "step": 197590 }, { "epoch": 0.39916450183219737, "grad_norm": 168.8724365234375, "learning_rate": 7.572197668907533e-06, "loss": 35.5947, "step": 197600 }, { "epoch": 0.3991847024648812, "grad_norm": 246.98825073242188, "learning_rate": 7.571898329368004e-06, "loss": 16.9728, "step": 197610 }, { "epoch": 0.399204903097565, "grad_norm": 180.78948974609375, "learning_rate": 7.571598977293351e-06, "loss": 22.5512, "step": 197620 }, { "epoch": 0.39922510373024883, "grad_norm": 328.37451171875, "learning_rate": 7.571299612685039e-06, "loss": 19.8352, "step": 197630 }, { "epoch": 0.39924530436293265, "grad_norm": 1071.57421875, "learning_rate": 7.571000235544524e-06, "loss": 23.538, "step": 197640 }, { "epoch": 0.39926550499561647, "grad_norm": 11091.6884765625, "learning_rate": 7.570700845873265e-06, "loss": 38.0649, "step": 197650 }, { "epoch": 0.3992857056283003, "grad_norm": 177.7987823486328, "learning_rate": 7.570401443672723e-06, "loss": 21.3488, "step": 197660 }, { "epoch": 0.3993059062609841, "grad_norm": 206.8262939453125, "learning_rate": 7.570102028944356e-06, "loss": 31.9598, "step": 197670 }, { "epoch": 0.39932610689366793, "grad_norm": 237.55679321289062, "learning_rate": 7.569802601689623e-06, "loss": 33.1679, "step": 197680 }, { "epoch": 0.39934630752635175, "grad_norm": 552.0098266601562, "learning_rate": 7.569503161909984e-06, "loss": 12.7157, "step": 197690 }, { "epoch": 0.3993665081590355, "grad_norm": 419.996337890625, "learning_rate": 7.569203709606898e-06, "loss": 31.8053, "step": 197700 }, { "epoch": 0.39938670879171934, "grad_norm": 506.3651123046875, "learning_rate": 7.568904244781825e-06, "loss": 21.68, "step": 197710 }, { "epoch": 0.39940690942440316, "grad_norm": 86.0506591796875, "learning_rate": 7.568604767436225e-06, "loss": 16.1703, "step": 197720 }, { "epoch": 0.399427110057087, "grad_norm": 430.3340759277344, "learning_rate": 7.5683052775715545e-06, "loss": 29.1856, "step": 197730 }, { "epoch": 0.3994473106897708, "grad_norm": 181.5095977783203, "learning_rate": 7.568005775189278e-06, "loss": 20.1455, "step": 197740 }, { "epoch": 0.3994675113224546, "grad_norm": 352.21929931640625, "learning_rate": 7.5677062602908515e-06, "loss": 20.4361, "step": 197750 }, { "epoch": 0.39948771195513844, "grad_norm": 361.66436767578125, "learning_rate": 7.567406732877735e-06, "loss": 14.4292, "step": 197760 }, { "epoch": 0.39950791258782226, "grad_norm": 148.053955078125, "learning_rate": 7.567107192951393e-06, "loss": 16.3337, "step": 197770 }, { "epoch": 0.3995281132205061, "grad_norm": 253.73312377929688, "learning_rate": 7.566807640513278e-06, "loss": 9.7145, "step": 197780 }, { "epoch": 0.3995483138531899, "grad_norm": 0.0, "learning_rate": 7.5665080755648575e-06, "loss": 18.1261, "step": 197790 }, { "epoch": 0.3995685144858737, "grad_norm": 211.78326416015625, "learning_rate": 7.566208498107586e-06, "loss": 20.9847, "step": 197800 }, { "epoch": 0.39958871511855754, "grad_norm": 290.6148986816406, "learning_rate": 7.5659089081429245e-06, "loss": 24.0462, "step": 197810 }, { "epoch": 0.39960891575124136, "grad_norm": 406.0163269042969, "learning_rate": 7.565609305672336e-06, "loss": 20.732, "step": 197820 }, { "epoch": 0.3996291163839251, "grad_norm": 785.3375244140625, "learning_rate": 7.565309690697279e-06, "loss": 29.2806, "step": 197830 }, { "epoch": 0.39964931701660894, "grad_norm": 258.56170654296875, "learning_rate": 7.565010063219214e-06, "loss": 16.6117, "step": 197840 }, { "epoch": 0.39966951764929276, "grad_norm": 117.95482635498047, "learning_rate": 7.5647104232395985e-06, "loss": 20.0101, "step": 197850 }, { "epoch": 0.3996897182819766, "grad_norm": 344.67987060546875, "learning_rate": 7.564410770759897e-06, "loss": 32.3461, "step": 197860 }, { "epoch": 0.3997099189146604, "grad_norm": 283.5302429199219, "learning_rate": 7.564111105781568e-06, "loss": 26.5465, "step": 197870 }, { "epoch": 0.3997301195473442, "grad_norm": 483.49658203125, "learning_rate": 7.5638114283060735e-06, "loss": 19.7947, "step": 197880 }, { "epoch": 0.39975032018002804, "grad_norm": 43.05898666381836, "learning_rate": 7.5635117383348725e-06, "loss": 16.5961, "step": 197890 }, { "epoch": 0.39977052081271186, "grad_norm": 311.8305969238281, "learning_rate": 7.563212035869426e-06, "loss": 14.6824, "step": 197900 }, { "epoch": 0.3997907214453957, "grad_norm": 369.6077880859375, "learning_rate": 7.5629123209111955e-06, "loss": 21.0569, "step": 197910 }, { "epoch": 0.3998109220780795, "grad_norm": 571.8080444335938, "learning_rate": 7.56261259346164e-06, "loss": 28.5138, "step": 197920 }, { "epoch": 0.3998311227107633, "grad_norm": 815.8748168945312, "learning_rate": 7.5623128535222224e-06, "loss": 20.3018, "step": 197930 }, { "epoch": 0.39985132334344714, "grad_norm": 466.9893493652344, "learning_rate": 7.562013101094403e-06, "loss": 11.5698, "step": 197940 }, { "epoch": 0.3998715239761309, "grad_norm": 503.07440185546875, "learning_rate": 7.561713336179642e-06, "loss": 40.4079, "step": 197950 }, { "epoch": 0.3998917246088147, "grad_norm": 1044.09619140625, "learning_rate": 7.561413558779401e-06, "loss": 39.9123, "step": 197960 }, { "epoch": 0.39991192524149854, "grad_norm": 720.7599487304688, "learning_rate": 7.5611137688951405e-06, "loss": 27.5598, "step": 197970 }, { "epoch": 0.39993212587418236, "grad_norm": 314.8182373046875, "learning_rate": 7.560813966528323e-06, "loss": 13.6287, "step": 197980 }, { "epoch": 0.3999523265068662, "grad_norm": 356.9963073730469, "learning_rate": 7.560514151680409e-06, "loss": 16.692, "step": 197990 }, { "epoch": 0.39997252713955, "grad_norm": 1151.5570068359375, "learning_rate": 7.560214324352858e-06, "loss": 32.3764, "step": 198000 }, { "epoch": 0.3999927277722338, "grad_norm": 686.989013671875, "learning_rate": 7.559914484547135e-06, "loss": 26.3395, "step": 198010 }, { "epoch": 0.40001292840491764, "grad_norm": 447.1755676269531, "learning_rate": 7.559614632264698e-06, "loss": 20.3976, "step": 198020 }, { "epoch": 0.40003312903760146, "grad_norm": 280.5345764160156, "learning_rate": 7.559314767507009e-06, "loss": 30.6482, "step": 198030 }, { "epoch": 0.4000533296702853, "grad_norm": 871.556396484375, "learning_rate": 7.559014890275533e-06, "loss": 19.0827, "step": 198040 }, { "epoch": 0.4000735303029691, "grad_norm": 585.48828125, "learning_rate": 7.5587150005717256e-06, "loss": 32.5156, "step": 198050 }, { "epoch": 0.4000937309356529, "grad_norm": 284.8545227050781, "learning_rate": 7.558415098397054e-06, "loss": 21.1683, "step": 198060 }, { "epoch": 0.40011393156833674, "grad_norm": 694.2283325195312, "learning_rate": 7.558115183752975e-06, "loss": 16.6071, "step": 198070 }, { "epoch": 0.4001341322010205, "grad_norm": 667.6007690429688, "learning_rate": 7.557815256640954e-06, "loss": 20.7687, "step": 198080 }, { "epoch": 0.40015433283370433, "grad_norm": 318.7449645996094, "learning_rate": 7.557515317062451e-06, "loss": 23.4089, "step": 198090 }, { "epoch": 0.40017453346638815, "grad_norm": 1349.544921875, "learning_rate": 7.55721536501893e-06, "loss": 21.347, "step": 198100 }, { "epoch": 0.40019473409907197, "grad_norm": 256.22198486328125, "learning_rate": 7.556915400511853e-06, "loss": 8.9897, "step": 198110 }, { "epoch": 0.4002149347317558, "grad_norm": 86.92630767822266, "learning_rate": 7.556615423542677e-06, "loss": 28.3522, "step": 198120 }, { "epoch": 0.4002351353644396, "grad_norm": 918.8314208984375, "learning_rate": 7.5563154341128695e-06, "loss": 21.2617, "step": 198130 }, { "epoch": 0.40025533599712343, "grad_norm": 78.71343994140625, "learning_rate": 7.55601543222389e-06, "loss": 23.4041, "step": 198140 }, { "epoch": 0.40027553662980725, "grad_norm": 143.3230743408203, "learning_rate": 7.555715417877201e-06, "loss": 16.1013, "step": 198150 }, { "epoch": 0.40029573726249107, "grad_norm": 369.9261474609375, "learning_rate": 7.5554153910742655e-06, "loss": 19.2886, "step": 198160 }, { "epoch": 0.4003159378951749, "grad_norm": 148.36329650878906, "learning_rate": 7.555115351816545e-06, "loss": 17.6967, "step": 198170 }, { "epoch": 0.4003361385278587, "grad_norm": 618.6707153320312, "learning_rate": 7.554815300105502e-06, "loss": 23.1028, "step": 198180 }, { "epoch": 0.40035633916054253, "grad_norm": 19.182809829711914, "learning_rate": 7.5545152359426e-06, "loss": 23.1774, "step": 198190 }, { "epoch": 0.40037653979322635, "grad_norm": 145.69302368164062, "learning_rate": 7.5542151593293e-06, "loss": 14.8161, "step": 198200 }, { "epoch": 0.4003967404259101, "grad_norm": 346.6979675292969, "learning_rate": 7.553915070267065e-06, "loss": 26.5464, "step": 198210 }, { "epoch": 0.40041694105859393, "grad_norm": 475.8887939453125, "learning_rate": 7.553614968757359e-06, "loss": 18.5249, "step": 198220 }, { "epoch": 0.40043714169127775, "grad_norm": 420.0718994140625, "learning_rate": 7.553314854801641e-06, "loss": 27.0639, "step": 198230 }, { "epoch": 0.4004573423239616, "grad_norm": 45.04717254638672, "learning_rate": 7.553014728401378e-06, "loss": 21.8668, "step": 198240 }, { "epoch": 0.4004775429566454, "grad_norm": 547.7006225585938, "learning_rate": 7.55271458955803e-06, "loss": 27.888, "step": 198250 }, { "epoch": 0.4004977435893292, "grad_norm": 388.16845703125, "learning_rate": 7.5524144382730605e-06, "loss": 16.2497, "step": 198260 }, { "epoch": 0.40051794422201303, "grad_norm": 276.40576171875, "learning_rate": 7.552114274547933e-06, "loss": 9.4322, "step": 198270 }, { "epoch": 0.40053814485469685, "grad_norm": 989.9608154296875, "learning_rate": 7.5518140983841095e-06, "loss": 35.7655, "step": 198280 }, { "epoch": 0.4005583454873807, "grad_norm": 530.4025268554688, "learning_rate": 7.551513909783055e-06, "loss": 20.6342, "step": 198290 }, { "epoch": 0.4005785461200645, "grad_norm": 573.7498779296875, "learning_rate": 7.55121370874623e-06, "loss": 15.7636, "step": 198300 }, { "epoch": 0.4005987467527483, "grad_norm": 514.5701293945312, "learning_rate": 7.550913495275098e-06, "loss": 22.8541, "step": 198310 }, { "epoch": 0.40061894738543213, "grad_norm": 284.38702392578125, "learning_rate": 7.550613269371124e-06, "loss": 19.1488, "step": 198320 }, { "epoch": 0.40063914801811595, "grad_norm": 540.36767578125, "learning_rate": 7.550313031035772e-06, "loss": 14.1672, "step": 198330 }, { "epoch": 0.4006593486507997, "grad_norm": 233.47023010253906, "learning_rate": 7.550012780270499e-06, "loss": 29.5194, "step": 198340 }, { "epoch": 0.40067954928348354, "grad_norm": 634.5081787109375, "learning_rate": 7.549712517076777e-06, "loss": 20.172, "step": 198350 }, { "epoch": 0.40069974991616736, "grad_norm": 816.7568359375, "learning_rate": 7.5494122414560645e-06, "loss": 20.3773, "step": 198360 }, { "epoch": 0.4007199505488512, "grad_norm": 570.5978393554688, "learning_rate": 7.549111953409827e-06, "loss": 17.73, "step": 198370 }, { "epoch": 0.400740151181535, "grad_norm": 329.0459289550781, "learning_rate": 7.548811652939525e-06, "loss": 18.9579, "step": 198380 }, { "epoch": 0.4007603518142188, "grad_norm": 289.1005554199219, "learning_rate": 7.548511340046625e-06, "loss": 28.7145, "step": 198390 }, { "epoch": 0.40078055244690264, "grad_norm": 455.5233154296875, "learning_rate": 7.548211014732589e-06, "loss": 15.8559, "step": 198400 }, { "epoch": 0.40080075307958646, "grad_norm": 300.59869384765625, "learning_rate": 7.547910676998883e-06, "loss": 27.1662, "step": 198410 }, { "epoch": 0.4008209537122703, "grad_norm": 676.0629272460938, "learning_rate": 7.547610326846968e-06, "loss": 20.6297, "step": 198420 }, { "epoch": 0.4008411543449541, "grad_norm": 242.51876831054688, "learning_rate": 7.547309964278311e-06, "loss": 17.7566, "step": 198430 }, { "epoch": 0.4008613549776379, "grad_norm": 141.97592163085938, "learning_rate": 7.547009589294374e-06, "loss": 16.042, "step": 198440 }, { "epoch": 0.40088155561032174, "grad_norm": 27.422712326049805, "learning_rate": 7.546709201896619e-06, "loss": 10.796, "step": 198450 }, { "epoch": 0.40090175624300556, "grad_norm": 127.43767547607422, "learning_rate": 7.546408802086513e-06, "loss": 16.8642, "step": 198460 }, { "epoch": 0.4009219568756893, "grad_norm": 83.21980285644531, "learning_rate": 7.546108389865521e-06, "loss": 18.779, "step": 198470 }, { "epoch": 0.40094215750837314, "grad_norm": 403.13580322265625, "learning_rate": 7.545807965235103e-06, "loss": 23.6962, "step": 198480 }, { "epoch": 0.40096235814105696, "grad_norm": 24.013486862182617, "learning_rate": 7.5455075281967295e-06, "loss": 13.1861, "step": 198490 }, { "epoch": 0.4009825587737408, "grad_norm": 0.0, "learning_rate": 7.545207078751858e-06, "loss": 21.689, "step": 198500 }, { "epoch": 0.4010027594064246, "grad_norm": 506.35382080078125, "learning_rate": 7.544906616901957e-06, "loss": 16.2279, "step": 198510 }, { "epoch": 0.4010229600391084, "grad_norm": 663.2824096679688, "learning_rate": 7.544606142648489e-06, "loss": 25.5661, "step": 198520 }, { "epoch": 0.40104316067179224, "grad_norm": 71.92509460449219, "learning_rate": 7.544305655992918e-06, "loss": 30.7218, "step": 198530 }, { "epoch": 0.40106336130447606, "grad_norm": 245.72608947753906, "learning_rate": 7.544005156936713e-06, "loss": 39.2497, "step": 198540 }, { "epoch": 0.4010835619371599, "grad_norm": 454.83160400390625, "learning_rate": 7.543704645481333e-06, "loss": 22.8334, "step": 198550 }, { "epoch": 0.4011037625698437, "grad_norm": 230.10459899902344, "learning_rate": 7.543404121628246e-06, "loss": 42.689, "step": 198560 }, { "epoch": 0.4011239632025275, "grad_norm": 1047.18994140625, "learning_rate": 7.543103585378917e-06, "loss": 18.5602, "step": 198570 }, { "epoch": 0.40114416383521134, "grad_norm": 450.0099792480469, "learning_rate": 7.5428030367348075e-06, "loss": 35.403, "step": 198580 }, { "epoch": 0.4011643644678951, "grad_norm": 381.8701477050781, "learning_rate": 7.542502475697385e-06, "loss": 20.5552, "step": 198590 }, { "epoch": 0.4011845651005789, "grad_norm": 155.0900421142578, "learning_rate": 7.542201902268115e-06, "loss": 17.0598, "step": 198600 }, { "epoch": 0.40120476573326275, "grad_norm": 183.87039184570312, "learning_rate": 7.541901316448459e-06, "loss": 14.9127, "step": 198610 }, { "epoch": 0.40122496636594657, "grad_norm": 467.0203552246094, "learning_rate": 7.541600718239887e-06, "loss": 13.6777, "step": 198620 }, { "epoch": 0.4012451669986304, "grad_norm": 288.0802001953125, "learning_rate": 7.541300107643858e-06, "loss": 30.3727, "step": 198630 }, { "epoch": 0.4012653676313142, "grad_norm": 256.5554504394531, "learning_rate": 7.540999484661844e-06, "loss": 17.2276, "step": 198640 }, { "epoch": 0.401285568263998, "grad_norm": 321.2003173828125, "learning_rate": 7.5406988492953045e-06, "loss": 24.4281, "step": 198650 }, { "epoch": 0.40130576889668185, "grad_norm": 98.70697021484375, "learning_rate": 7.5403982015457065e-06, "loss": 18.2068, "step": 198660 }, { "epoch": 0.40132596952936567, "grad_norm": 342.7528991699219, "learning_rate": 7.540097541414518e-06, "loss": 26.4316, "step": 198670 }, { "epoch": 0.4013461701620495, "grad_norm": 485.7228088378906, "learning_rate": 7.5397968689032e-06, "loss": 20.552, "step": 198680 }, { "epoch": 0.4013663707947333, "grad_norm": 332.6778869628906, "learning_rate": 7.539496184013222e-06, "loss": 12.969, "step": 198690 }, { "epoch": 0.4013865714274171, "grad_norm": 458.6863098144531, "learning_rate": 7.539195486746047e-06, "loss": 20.9644, "step": 198700 }, { "epoch": 0.40140677206010095, "grad_norm": 325.9056091308594, "learning_rate": 7.538894777103141e-06, "loss": 14.1785, "step": 198710 }, { "epoch": 0.4014269726927847, "grad_norm": 166.2456817626953, "learning_rate": 7.538594055085971e-06, "loss": 19.2282, "step": 198720 }, { "epoch": 0.40144717332546853, "grad_norm": 163.84922790527344, "learning_rate": 7.538293320696e-06, "loss": 17.1688, "step": 198730 }, { "epoch": 0.40146737395815235, "grad_norm": 269.4591369628906, "learning_rate": 7.537992573934694e-06, "loss": 17.306, "step": 198740 }, { "epoch": 0.40148757459083617, "grad_norm": 124.59857177734375, "learning_rate": 7.537691814803522e-06, "loss": 13.5246, "step": 198750 }, { "epoch": 0.40150777522352, "grad_norm": 431.02301025390625, "learning_rate": 7.537391043303947e-06, "loss": 20.7026, "step": 198760 }, { "epoch": 0.4015279758562038, "grad_norm": 401.8933410644531, "learning_rate": 7.537090259437436e-06, "loss": 21.808, "step": 198770 }, { "epoch": 0.40154817648888763, "grad_norm": 383.54058837890625, "learning_rate": 7.536789463205455e-06, "loss": 16.0117, "step": 198780 }, { "epoch": 0.40156837712157145, "grad_norm": 258.8091735839844, "learning_rate": 7.536488654609469e-06, "loss": 13.9646, "step": 198790 }, { "epoch": 0.40158857775425527, "grad_norm": 128.9613037109375, "learning_rate": 7.536187833650947e-06, "loss": 25.6044, "step": 198800 }, { "epoch": 0.4016087783869391, "grad_norm": 1039.2686767578125, "learning_rate": 7.535887000331352e-06, "loss": 39.5227, "step": 198810 }, { "epoch": 0.4016289790196229, "grad_norm": 92.21083068847656, "learning_rate": 7.53558615465215e-06, "loss": 21.3274, "step": 198820 }, { "epoch": 0.40164917965230673, "grad_norm": 507.3398742675781, "learning_rate": 7.535285296614809e-06, "loss": 22.0745, "step": 198830 }, { "epoch": 0.40166938028499055, "grad_norm": 408.8777770996094, "learning_rate": 7.534984426220795e-06, "loss": 14.6964, "step": 198840 }, { "epoch": 0.4016895809176743, "grad_norm": 532.9547729492188, "learning_rate": 7.534683543471577e-06, "loss": 27.8968, "step": 198850 }, { "epoch": 0.40170978155035814, "grad_norm": 315.3031921386719, "learning_rate": 7.534382648368617e-06, "loss": 12.7511, "step": 198860 }, { "epoch": 0.40172998218304196, "grad_norm": 578.9645385742188, "learning_rate": 7.534081740913382e-06, "loss": 29.401, "step": 198870 }, { "epoch": 0.4017501828157258, "grad_norm": 537.0770874023438, "learning_rate": 7.5337808211073415e-06, "loss": 18.7072, "step": 198880 }, { "epoch": 0.4017703834484096, "grad_norm": 600.1065673828125, "learning_rate": 7.53347988895196e-06, "loss": 13.651, "step": 198890 }, { "epoch": 0.4017905840810934, "grad_norm": 227.85195922851562, "learning_rate": 7.533178944448705e-06, "loss": 24.1425, "step": 198900 }, { "epoch": 0.40181078471377724, "grad_norm": 477.8645935058594, "learning_rate": 7.532877987599043e-06, "loss": 23.5492, "step": 198910 }, { "epoch": 0.40183098534646106, "grad_norm": 130.61325073242188, "learning_rate": 7.532577018404441e-06, "loss": 21.4196, "step": 198920 }, { "epoch": 0.4018511859791449, "grad_norm": 399.0851135253906, "learning_rate": 7.532276036866366e-06, "loss": 32.2595, "step": 198930 }, { "epoch": 0.4018713866118287, "grad_norm": 641.951904296875, "learning_rate": 7.531975042986285e-06, "loss": 15.4523, "step": 198940 }, { "epoch": 0.4018915872445125, "grad_norm": 821.94873046875, "learning_rate": 7.531674036765662e-06, "loss": 13.3444, "step": 198950 }, { "epoch": 0.40191178787719634, "grad_norm": 337.8610534667969, "learning_rate": 7.531373018205971e-06, "loss": 7.4867, "step": 198960 }, { "epoch": 0.40193198850988016, "grad_norm": 164.3924102783203, "learning_rate": 7.531071987308672e-06, "loss": 21.7905, "step": 198970 }, { "epoch": 0.4019521891425639, "grad_norm": 172.9576873779297, "learning_rate": 7.5307709440752364e-06, "loss": 16.716, "step": 198980 }, { "epoch": 0.40197238977524774, "grad_norm": 155.1348876953125, "learning_rate": 7.530469888507131e-06, "loss": 14.815, "step": 198990 }, { "epoch": 0.40199259040793156, "grad_norm": 260.4828186035156, "learning_rate": 7.530168820605819e-06, "loss": 35.3219, "step": 199000 }, { "epoch": 0.4020127910406154, "grad_norm": 258.833984375, "learning_rate": 7.529867740372774e-06, "loss": 20.1072, "step": 199010 }, { "epoch": 0.4020329916732992, "grad_norm": 695.7155151367188, "learning_rate": 7.52956664780946e-06, "loss": 9.5076, "step": 199020 }, { "epoch": 0.402053192305983, "grad_norm": 399.386474609375, "learning_rate": 7.529265542917343e-06, "loss": 22.2962, "step": 199030 }, { "epoch": 0.40207339293866684, "grad_norm": 462.0411376953125, "learning_rate": 7.528964425697895e-06, "loss": 24.6419, "step": 199040 }, { "epoch": 0.40209359357135066, "grad_norm": 347.6090087890625, "learning_rate": 7.5286632961525806e-06, "loss": 30.8706, "step": 199050 }, { "epoch": 0.4021137942040345, "grad_norm": 380.2854919433594, "learning_rate": 7.528362154282868e-06, "loss": 16.8862, "step": 199060 }, { "epoch": 0.4021339948367183, "grad_norm": 551.378662109375, "learning_rate": 7.528061000090223e-06, "loss": 19.1845, "step": 199070 }, { "epoch": 0.4021541954694021, "grad_norm": 264.61962890625, "learning_rate": 7.527759833576118e-06, "loss": 29.0762, "step": 199080 }, { "epoch": 0.40217439610208594, "grad_norm": 918.442626953125, "learning_rate": 7.527458654742017e-06, "loss": 23.0314, "step": 199090 }, { "epoch": 0.40219459673476976, "grad_norm": 550.7683715820312, "learning_rate": 7.527157463589389e-06, "loss": 26.4817, "step": 199100 }, { "epoch": 0.4022147973674535, "grad_norm": 247.42665100097656, "learning_rate": 7.526856260119702e-06, "loss": 9.8326, "step": 199110 }, { "epoch": 0.40223499800013734, "grad_norm": 532.6323852539062, "learning_rate": 7.526555044334424e-06, "loss": 21.7748, "step": 199120 }, { "epoch": 0.40225519863282116, "grad_norm": 1574.6651611328125, "learning_rate": 7.526253816235023e-06, "loss": 20.4568, "step": 199130 }, { "epoch": 0.402275399265505, "grad_norm": 65.26931762695312, "learning_rate": 7.525952575822967e-06, "loss": 18.6802, "step": 199140 }, { "epoch": 0.4022955998981888, "grad_norm": 211.4417266845703, "learning_rate": 7.525651323099725e-06, "loss": 21.3643, "step": 199150 }, { "epoch": 0.4023158005308726, "grad_norm": 315.5894775390625, "learning_rate": 7.525350058066765e-06, "loss": 14.3725, "step": 199160 }, { "epoch": 0.40233600116355644, "grad_norm": 291.6868591308594, "learning_rate": 7.525048780725554e-06, "loss": 10.4822, "step": 199170 }, { "epoch": 0.40235620179624026, "grad_norm": 387.8648986816406, "learning_rate": 7.524747491077563e-06, "loss": 24.4473, "step": 199180 }, { "epoch": 0.4023764024289241, "grad_norm": 533.245361328125, "learning_rate": 7.524446189124259e-06, "loss": 19.3918, "step": 199190 }, { "epoch": 0.4023966030616079, "grad_norm": 908.2100219726562, "learning_rate": 7.52414487486711e-06, "loss": 19.0767, "step": 199200 }, { "epoch": 0.4024168036942917, "grad_norm": 717.2238159179688, "learning_rate": 7.523843548307583e-06, "loss": 16.1978, "step": 199210 }, { "epoch": 0.40243700432697554, "grad_norm": 149.34889221191406, "learning_rate": 7.523542209447152e-06, "loss": 9.9471, "step": 199220 }, { "epoch": 0.4024572049596593, "grad_norm": 330.5276794433594, "learning_rate": 7.5232408582872805e-06, "loss": 7.4752, "step": 199230 }, { "epoch": 0.40247740559234313, "grad_norm": 464.214599609375, "learning_rate": 7.52293949482944e-06, "loss": 32.6366, "step": 199240 }, { "epoch": 0.40249760622502695, "grad_norm": 396.9613952636719, "learning_rate": 7.522638119075097e-06, "loss": 22.1046, "step": 199250 }, { "epoch": 0.40251780685771077, "grad_norm": 266.44549560546875, "learning_rate": 7.522336731025723e-06, "loss": 28.5606, "step": 199260 }, { "epoch": 0.4025380074903946, "grad_norm": 360.39642333984375, "learning_rate": 7.522035330682784e-06, "loss": 21.5936, "step": 199270 }, { "epoch": 0.4025582081230784, "grad_norm": 866.492919921875, "learning_rate": 7.521733918047753e-06, "loss": 16.0734, "step": 199280 }, { "epoch": 0.40257840875576223, "grad_norm": 750.3490600585938, "learning_rate": 7.5214324931220936e-06, "loss": 22.1805, "step": 199290 }, { "epoch": 0.40259860938844605, "grad_norm": 543.060791015625, "learning_rate": 7.521131055907283e-06, "loss": 13.1011, "step": 199300 }, { "epoch": 0.40261881002112987, "grad_norm": 199.49288940429688, "learning_rate": 7.520829606404781e-06, "loss": 20.3131, "step": 199310 }, { "epoch": 0.4026390106538137, "grad_norm": 691.2821655273438, "learning_rate": 7.520528144616063e-06, "loss": 17.9429, "step": 199320 }, { "epoch": 0.4026592112864975, "grad_norm": 686.4976806640625, "learning_rate": 7.520226670542597e-06, "loss": 19.5697, "step": 199330 }, { "epoch": 0.40267941191918133, "grad_norm": 581.8713989257812, "learning_rate": 7.519925184185851e-06, "loss": 20.2, "step": 199340 }, { "epoch": 0.40269961255186515, "grad_norm": 351.9085388183594, "learning_rate": 7.5196236855472945e-06, "loss": 11.4018, "step": 199350 }, { "epoch": 0.4027198131845489, "grad_norm": 242.6192169189453, "learning_rate": 7.519322174628399e-06, "loss": 30.2951, "step": 199360 }, { "epoch": 0.40274001381723273, "grad_norm": 330.2269287109375, "learning_rate": 7.519020651430633e-06, "loss": 14.6469, "step": 199370 }, { "epoch": 0.40276021444991655, "grad_norm": 130.55140686035156, "learning_rate": 7.518719115955464e-06, "loss": 13.2948, "step": 199380 }, { "epoch": 0.4027804150826004, "grad_norm": 163.21871948242188, "learning_rate": 7.518417568204366e-06, "loss": 21.737, "step": 199390 }, { "epoch": 0.4028006157152842, "grad_norm": 394.7507019042969, "learning_rate": 7.518116008178805e-06, "loss": 22.8714, "step": 199400 }, { "epoch": 0.402820816347968, "grad_norm": 305.2882080078125, "learning_rate": 7.517814435880252e-06, "loss": 23.5342, "step": 199410 }, { "epoch": 0.40284101698065183, "grad_norm": 523.1074829101562, "learning_rate": 7.517512851310178e-06, "loss": 18.615, "step": 199420 }, { "epoch": 0.40286121761333565, "grad_norm": 75.26525115966797, "learning_rate": 7.517211254470049e-06, "loss": 7.9642, "step": 199430 }, { "epoch": 0.4028814182460195, "grad_norm": 309.88348388671875, "learning_rate": 7.51690964536134e-06, "loss": 23.2928, "step": 199440 }, { "epoch": 0.4029016188787033, "grad_norm": 556.8353271484375, "learning_rate": 7.516608023985516e-06, "loss": 34.7362, "step": 199450 }, { "epoch": 0.4029218195113871, "grad_norm": 327.7669982910156, "learning_rate": 7.516306390344052e-06, "loss": 24.0189, "step": 199460 }, { "epoch": 0.40294202014407093, "grad_norm": 654.4342041015625, "learning_rate": 7.516004744438417e-06, "loss": 23.8425, "step": 199470 }, { "epoch": 0.40296222077675475, "grad_norm": 313.38922119140625, "learning_rate": 7.515703086270078e-06, "loss": 21.5069, "step": 199480 }, { "epoch": 0.4029824214094385, "grad_norm": 588.447509765625, "learning_rate": 7.515401415840509e-06, "loss": 22.1496, "step": 199490 }, { "epoch": 0.40300262204212234, "grad_norm": 579.3743286132812, "learning_rate": 7.515099733151177e-06, "loss": 16.3075, "step": 199500 }, { "epoch": 0.40302282267480616, "grad_norm": 661.6896362304688, "learning_rate": 7.514798038203553e-06, "loss": 34.3176, "step": 199510 }, { "epoch": 0.40304302330749, "grad_norm": 94.56706237792969, "learning_rate": 7.514496330999111e-06, "loss": 22.348, "step": 199520 }, { "epoch": 0.4030632239401738, "grad_norm": 840.5774536132812, "learning_rate": 7.514194611539316e-06, "loss": 15.5795, "step": 199530 }, { "epoch": 0.4030834245728576, "grad_norm": 887.9461669921875, "learning_rate": 7.513892879825643e-06, "loss": 26.4058, "step": 199540 }, { "epoch": 0.40310362520554144, "grad_norm": 363.53839111328125, "learning_rate": 7.5135911358595615e-06, "loss": 22.4984, "step": 199550 }, { "epoch": 0.40312382583822526, "grad_norm": 180.75489807128906, "learning_rate": 7.513289379642541e-06, "loss": 16.9116, "step": 199560 }, { "epoch": 0.4031440264709091, "grad_norm": 2551.956298828125, "learning_rate": 7.512987611176052e-06, "loss": 28.4475, "step": 199570 }, { "epoch": 0.4031642271035929, "grad_norm": 702.0039672851562, "learning_rate": 7.512685830461568e-06, "loss": 20.512, "step": 199580 }, { "epoch": 0.4031844277362767, "grad_norm": 497.1060791015625, "learning_rate": 7.5123840375005565e-06, "loss": 23.2045, "step": 199590 }, { "epoch": 0.40320462836896054, "grad_norm": 1770.20166015625, "learning_rate": 7.512082232294491e-06, "loss": 37.5429, "step": 199600 }, { "epoch": 0.40322482900164436, "grad_norm": 0.0, "learning_rate": 7.51178041484484e-06, "loss": 31.8172, "step": 199610 }, { "epoch": 0.4032450296343281, "grad_norm": 513.242919921875, "learning_rate": 7.511478585153078e-06, "loss": 21.8842, "step": 199620 }, { "epoch": 0.40326523026701194, "grad_norm": 404.1490478515625, "learning_rate": 7.511176743220672e-06, "loss": 24.6893, "step": 199630 }, { "epoch": 0.40328543089969576, "grad_norm": 112.2563705444336, "learning_rate": 7.510874889049096e-06, "loss": 16.5327, "step": 199640 }, { "epoch": 0.4033056315323796, "grad_norm": 346.9835510253906, "learning_rate": 7.51057302263982e-06, "loss": 18.7282, "step": 199650 }, { "epoch": 0.4033258321650634, "grad_norm": 1143.9215087890625, "learning_rate": 7.510271143994317e-06, "loss": 23.472, "step": 199660 }, { "epoch": 0.4033460327977472, "grad_norm": 0.3586396276950836, "learning_rate": 7.509969253114056e-06, "loss": 16.7748, "step": 199670 }, { "epoch": 0.40336623343043104, "grad_norm": 607.0986938476562, "learning_rate": 7.509667350000509e-06, "loss": 16.4035, "step": 199680 }, { "epoch": 0.40338643406311486, "grad_norm": 347.8395690917969, "learning_rate": 7.509365434655147e-06, "loss": 17.4465, "step": 199690 }, { "epoch": 0.4034066346957987, "grad_norm": 252.93080139160156, "learning_rate": 7.509063507079443e-06, "loss": 17.7776, "step": 199700 }, { "epoch": 0.4034268353284825, "grad_norm": 140.82369995117188, "learning_rate": 7.508761567274868e-06, "loss": 26.0802, "step": 199710 }, { "epoch": 0.4034470359611663, "grad_norm": 346.9831237792969, "learning_rate": 7.5084596152428925e-06, "loss": 17.0384, "step": 199720 }, { "epoch": 0.40346723659385014, "grad_norm": 828.2061767578125, "learning_rate": 7.508157650984991e-06, "loss": 25.8712, "step": 199730 }, { "epoch": 0.40348743722653396, "grad_norm": 204.78436279296875, "learning_rate": 7.507855674502631e-06, "loss": 19.5967, "step": 199740 }, { "epoch": 0.4035076378592177, "grad_norm": 159.3317413330078, "learning_rate": 7.507553685797288e-06, "loss": 17.1598, "step": 199750 }, { "epoch": 0.40352783849190155, "grad_norm": 1737.2420654296875, "learning_rate": 7.507251684870433e-06, "loss": 18.5982, "step": 199760 }, { "epoch": 0.40354803912458537, "grad_norm": 303.4064636230469, "learning_rate": 7.506949671723534e-06, "loss": 21.3745, "step": 199770 }, { "epoch": 0.4035682397572692, "grad_norm": 1069.7166748046875, "learning_rate": 7.506647646358069e-06, "loss": 22.2233, "step": 199780 }, { "epoch": 0.403588440389953, "grad_norm": 639.1734008789062, "learning_rate": 7.5063456087755085e-06, "loss": 28.4787, "step": 199790 }, { "epoch": 0.4036086410226368, "grad_norm": 151.21701049804688, "learning_rate": 7.5060435589773215e-06, "loss": 21.6127, "step": 199800 }, { "epoch": 0.40362884165532065, "grad_norm": 479.32763671875, "learning_rate": 7.505741496964984e-06, "loss": 23.1587, "step": 199810 }, { "epoch": 0.40364904228800447, "grad_norm": 754.65234375, "learning_rate": 7.505439422739964e-06, "loss": 18.3213, "step": 199820 }, { "epoch": 0.4036692429206883, "grad_norm": 440.26763916015625, "learning_rate": 7.505137336303739e-06, "loss": 42.7359, "step": 199830 }, { "epoch": 0.4036894435533721, "grad_norm": 416.4516906738281, "learning_rate": 7.504835237657776e-06, "loss": 25.8692, "step": 199840 }, { "epoch": 0.4037096441860559, "grad_norm": 772.91552734375, "learning_rate": 7.5045331268035505e-06, "loss": 26.3638, "step": 199850 }, { "epoch": 0.40372984481873975, "grad_norm": 1250.84765625, "learning_rate": 7.5042310037425345e-06, "loss": 34.9652, "step": 199860 }, { "epoch": 0.4037500454514235, "grad_norm": 749.7967529296875, "learning_rate": 7.5039288684762e-06, "loss": 24.5819, "step": 199870 }, { "epoch": 0.40377024608410733, "grad_norm": 0.0, "learning_rate": 7.50362672100602e-06, "loss": 19.5698, "step": 199880 }, { "epoch": 0.40379044671679115, "grad_norm": 914.7378540039062, "learning_rate": 7.503324561333467e-06, "loss": 18.7178, "step": 199890 }, { "epoch": 0.40381064734947497, "grad_norm": 216.0771026611328, "learning_rate": 7.503022389460014e-06, "loss": 20.7132, "step": 199900 }, { "epoch": 0.4038308479821588, "grad_norm": 357.29656982421875, "learning_rate": 7.502720205387133e-06, "loss": 19.8191, "step": 199910 }, { "epoch": 0.4038510486148426, "grad_norm": 609.9511108398438, "learning_rate": 7.5024180091162976e-06, "loss": 20.7704, "step": 199920 }, { "epoch": 0.40387124924752643, "grad_norm": 3.1290600299835205, "learning_rate": 7.50211580064898e-06, "loss": 19.2354, "step": 199930 }, { "epoch": 0.40389144988021025, "grad_norm": 367.80419921875, "learning_rate": 7.501813579986655e-06, "loss": 20.0358, "step": 199940 }, { "epoch": 0.40391165051289407, "grad_norm": 107.23719787597656, "learning_rate": 7.501511347130792e-06, "loss": 19.2733, "step": 199950 }, { "epoch": 0.4039318511455779, "grad_norm": 326.3555908203125, "learning_rate": 7.501209102082867e-06, "loss": 10.1754, "step": 199960 }, { "epoch": 0.4039520517782617, "grad_norm": 425.93548583984375, "learning_rate": 7.500906844844352e-06, "loss": 24.8486, "step": 199970 }, { "epoch": 0.40397225241094553, "grad_norm": 371.8782653808594, "learning_rate": 7.5006045754167216e-06, "loss": 17.8385, "step": 199980 }, { "epoch": 0.40399245304362935, "grad_norm": 564.3142700195312, "learning_rate": 7.5003022938014445e-06, "loss": 23.0844, "step": 199990 }, { "epoch": 0.4040126536763131, "grad_norm": 365.4569396972656, "learning_rate": 7.500000000000001e-06, "loss": 27.8551, "step": 200000 }, { "epoch": 0.40403285430899694, "grad_norm": 907.5307006835938, "learning_rate": 7.499697694013859e-06, "loss": 19.9156, "step": 200010 }, { "epoch": 0.40405305494168076, "grad_norm": 483.8946228027344, "learning_rate": 7.499395375844494e-06, "loss": 12.2011, "step": 200020 }, { "epoch": 0.4040732555743646, "grad_norm": 534.7166748046875, "learning_rate": 7.49909304549338e-06, "loss": 25.33, "step": 200030 }, { "epoch": 0.4040934562070484, "grad_norm": 531.9784545898438, "learning_rate": 7.498790702961987e-06, "loss": 9.2343, "step": 200040 }, { "epoch": 0.4041136568397322, "grad_norm": 289.97991943359375, "learning_rate": 7.498488348251793e-06, "loss": 26.1014, "step": 200050 }, { "epoch": 0.40413385747241604, "grad_norm": 354.9217834472656, "learning_rate": 7.498185981364268e-06, "loss": 18.9162, "step": 200060 }, { "epoch": 0.40415405810509986, "grad_norm": 0.0, "learning_rate": 7.497883602300891e-06, "loss": 23.4203, "step": 200070 }, { "epoch": 0.4041742587377837, "grad_norm": 511.7737731933594, "learning_rate": 7.49758121106313e-06, "loss": 19.8826, "step": 200080 }, { "epoch": 0.4041944593704675, "grad_norm": 74.34274291992188, "learning_rate": 7.49727880765246e-06, "loss": 44.2725, "step": 200090 }, { "epoch": 0.4042146600031513, "grad_norm": 709.9552612304688, "learning_rate": 7.496976392070358e-06, "loss": 46.4911, "step": 200100 }, { "epoch": 0.40423486063583514, "grad_norm": 334.0511169433594, "learning_rate": 7.496673964318295e-06, "loss": 8.786, "step": 200110 }, { "epoch": 0.40425506126851896, "grad_norm": 404.2134704589844, "learning_rate": 7.496371524397747e-06, "loss": 48.092, "step": 200120 }, { "epoch": 0.4042752619012027, "grad_norm": 151.0152130126953, "learning_rate": 7.496069072310185e-06, "loss": 13.3238, "step": 200130 }, { "epoch": 0.40429546253388654, "grad_norm": 386.9094543457031, "learning_rate": 7.495766608057087e-06, "loss": 19.2344, "step": 200140 }, { "epoch": 0.40431566316657036, "grad_norm": 292.6839294433594, "learning_rate": 7.495464131639923e-06, "loss": 12.1242, "step": 200150 }, { "epoch": 0.4043358637992542, "grad_norm": 356.8404235839844, "learning_rate": 7.495161643060171e-06, "loss": 26.1107, "step": 200160 }, { "epoch": 0.404356064431938, "grad_norm": 375.3106994628906, "learning_rate": 7.494859142319304e-06, "loss": 30.0069, "step": 200170 }, { "epoch": 0.4043762650646218, "grad_norm": 203.5823516845703, "learning_rate": 7.494556629418796e-06, "loss": 29.5559, "step": 200180 }, { "epoch": 0.40439646569730564, "grad_norm": 428.19873046875, "learning_rate": 7.494254104360121e-06, "loss": 25.7056, "step": 200190 }, { "epoch": 0.40441666632998946, "grad_norm": 584.8751220703125, "learning_rate": 7.493951567144755e-06, "loss": 16.5882, "step": 200200 }, { "epoch": 0.4044368669626733, "grad_norm": 160.39398193359375, "learning_rate": 7.493649017774172e-06, "loss": 15.0911, "step": 200210 }, { "epoch": 0.4044570675953571, "grad_norm": 42.888275146484375, "learning_rate": 7.493346456249844e-06, "loss": 16.9504, "step": 200220 }, { "epoch": 0.4044772682280409, "grad_norm": 650.8150634765625, "learning_rate": 7.493043882573249e-06, "loss": 14.835, "step": 200230 }, { "epoch": 0.40449746886072474, "grad_norm": 686.9885864257812, "learning_rate": 7.49274129674586e-06, "loss": 30.2144, "step": 200240 }, { "epoch": 0.40451766949340856, "grad_norm": 366.8212585449219, "learning_rate": 7.4924386987691525e-06, "loss": 17.2971, "step": 200250 }, { "epoch": 0.4045378701260923, "grad_norm": 526.153564453125, "learning_rate": 7.492136088644601e-06, "loss": 14.0622, "step": 200260 }, { "epoch": 0.40455807075877614, "grad_norm": 352.774658203125, "learning_rate": 7.4918334663736805e-06, "loss": 26.5112, "step": 200270 }, { "epoch": 0.40457827139145996, "grad_norm": 319.0962829589844, "learning_rate": 7.491530831957866e-06, "loss": 26.2128, "step": 200280 }, { "epoch": 0.4045984720241438, "grad_norm": 406.3224182128906, "learning_rate": 7.491228185398633e-06, "loss": 26.2267, "step": 200290 }, { "epoch": 0.4046186726568276, "grad_norm": 596.3237915039062, "learning_rate": 7.490925526697455e-06, "loss": 33.5153, "step": 200300 }, { "epoch": 0.4046388732895114, "grad_norm": 728.4494018554688, "learning_rate": 7.490622855855808e-06, "loss": 34.1726, "step": 200310 }, { "epoch": 0.40465907392219524, "grad_norm": 289.8499755859375, "learning_rate": 7.490320172875169e-06, "loss": 24.2974, "step": 200320 }, { "epoch": 0.40467927455487906, "grad_norm": 2.1218676567077637, "learning_rate": 7.490017477757009e-06, "loss": 15.4083, "step": 200330 }, { "epoch": 0.4046994751875629, "grad_norm": 231.6231689453125, "learning_rate": 7.489714770502807e-06, "loss": 20.2343, "step": 200340 }, { "epoch": 0.4047196758202467, "grad_norm": 241.0740966796875, "learning_rate": 7.489412051114038e-06, "loss": 21.3217, "step": 200350 }, { "epoch": 0.4047398764529305, "grad_norm": 409.89190673828125, "learning_rate": 7.4891093195921764e-06, "loss": 12.4392, "step": 200360 }, { "epoch": 0.40476007708561434, "grad_norm": 415.6175537109375, "learning_rate": 7.488806575938697e-06, "loss": 13.2343, "step": 200370 }, { "epoch": 0.40478027771829816, "grad_norm": 519.8626708984375, "learning_rate": 7.488503820155075e-06, "loss": 20.2115, "step": 200380 }, { "epoch": 0.40480047835098193, "grad_norm": 227.9258270263672, "learning_rate": 7.488201052242791e-06, "loss": 28.8321, "step": 200390 }, { "epoch": 0.40482067898366575, "grad_norm": 391.5074768066406, "learning_rate": 7.487898272203314e-06, "loss": 19.1745, "step": 200400 }, { "epoch": 0.40484087961634957, "grad_norm": 165.18077087402344, "learning_rate": 7.487595480038122e-06, "loss": 23.5488, "step": 200410 }, { "epoch": 0.4048610802490334, "grad_norm": 390.2011413574219, "learning_rate": 7.487292675748693e-06, "loss": 22.4712, "step": 200420 }, { "epoch": 0.4048812808817172, "grad_norm": 182.7901153564453, "learning_rate": 7.4869898593365e-06, "loss": 15.2531, "step": 200430 }, { "epoch": 0.40490148151440103, "grad_norm": 410.7278747558594, "learning_rate": 7.4866870308030215e-06, "loss": 26.6576, "step": 200440 }, { "epoch": 0.40492168214708485, "grad_norm": 160.73777770996094, "learning_rate": 7.486384190149731e-06, "loss": 29.0196, "step": 200450 }, { "epoch": 0.40494188277976867, "grad_norm": 514.1658325195312, "learning_rate": 7.486081337378106e-06, "loss": 15.2621, "step": 200460 }, { "epoch": 0.4049620834124525, "grad_norm": 843.35400390625, "learning_rate": 7.485778472489622e-06, "loss": 16.1729, "step": 200470 }, { "epoch": 0.4049822840451363, "grad_norm": 721.1657104492188, "learning_rate": 7.485475595485756e-06, "loss": 17.5695, "step": 200480 }, { "epoch": 0.40500248467782013, "grad_norm": 520.6153564453125, "learning_rate": 7.4851727063679806e-06, "loss": 9.6996, "step": 200490 }, { "epoch": 0.40502268531050395, "grad_norm": 641.8265991210938, "learning_rate": 7.484869805137778e-06, "loss": 26.9172, "step": 200500 }, { "epoch": 0.4050428859431877, "grad_norm": 266.42669677734375, "learning_rate": 7.48456689179662e-06, "loss": 18.7096, "step": 200510 }, { "epoch": 0.40506308657587153, "grad_norm": 246.48928833007812, "learning_rate": 7.484263966345984e-06, "loss": 13.7408, "step": 200520 }, { "epoch": 0.40508328720855535, "grad_norm": 632.6735229492188, "learning_rate": 7.483961028787346e-06, "loss": 16.9427, "step": 200530 }, { "epoch": 0.4051034878412392, "grad_norm": 308.1974182128906, "learning_rate": 7.483658079122185e-06, "loss": 16.8318, "step": 200540 }, { "epoch": 0.405123688473923, "grad_norm": 579.98291015625, "learning_rate": 7.483355117351976e-06, "loss": 19.9662, "step": 200550 }, { "epoch": 0.4051438891066068, "grad_norm": 698.7310791015625, "learning_rate": 7.483052143478193e-06, "loss": 17.7118, "step": 200560 }, { "epoch": 0.40516408973929063, "grad_norm": 295.27142333984375, "learning_rate": 7.482749157502317e-06, "loss": 29.2573, "step": 200570 }, { "epoch": 0.40518429037197445, "grad_norm": 530.3827514648438, "learning_rate": 7.482446159425822e-06, "loss": 10.686, "step": 200580 }, { "epoch": 0.4052044910046583, "grad_norm": 868.5879516601562, "learning_rate": 7.482143149250185e-06, "loss": 24.0939, "step": 200590 }, { "epoch": 0.4052246916373421, "grad_norm": 235.33192443847656, "learning_rate": 7.481840126976885e-06, "loss": 11.6707, "step": 200600 }, { "epoch": 0.4052448922700259, "grad_norm": 474.0995788574219, "learning_rate": 7.481537092607396e-06, "loss": 15.2922, "step": 200610 }, { "epoch": 0.40526509290270973, "grad_norm": 6.325074672698975, "learning_rate": 7.4812340461431965e-06, "loss": 24.3909, "step": 200620 }, { "epoch": 0.40528529353539355, "grad_norm": 270.37689208984375, "learning_rate": 7.480930987585763e-06, "loss": 17.6095, "step": 200630 }, { "epoch": 0.4053054941680773, "grad_norm": 1066.9154052734375, "learning_rate": 7.480627916936574e-06, "loss": 26.306, "step": 200640 }, { "epoch": 0.40532569480076114, "grad_norm": 179.23202514648438, "learning_rate": 7.480324834197103e-06, "loss": 21.1797, "step": 200650 }, { "epoch": 0.40534589543344496, "grad_norm": 63.762760162353516, "learning_rate": 7.480021739368831e-06, "loss": 17.4896, "step": 200660 }, { "epoch": 0.4053660960661288, "grad_norm": 713.467041015625, "learning_rate": 7.479718632453233e-06, "loss": 21.3761, "step": 200670 }, { "epoch": 0.4053862966988126, "grad_norm": 127.81704711914062, "learning_rate": 7.4794155134517885e-06, "loss": 17.1732, "step": 200680 }, { "epoch": 0.4054064973314964, "grad_norm": 322.3304138183594, "learning_rate": 7.479112382365973e-06, "loss": 24.931, "step": 200690 }, { "epoch": 0.40542669796418024, "grad_norm": 431.6565246582031, "learning_rate": 7.478809239197264e-06, "loss": 30.5781, "step": 200700 }, { "epoch": 0.40544689859686406, "grad_norm": 237.55812072753906, "learning_rate": 7.47850608394714e-06, "loss": 14.3789, "step": 200710 }, { "epoch": 0.4054670992295479, "grad_norm": 520.4130859375, "learning_rate": 7.478202916617077e-06, "loss": 12.677, "step": 200720 }, { "epoch": 0.4054872998622317, "grad_norm": 1962.9849853515625, "learning_rate": 7.477899737208555e-06, "loss": 30.0419, "step": 200730 }, { "epoch": 0.4055075004949155, "grad_norm": 549.2138061523438, "learning_rate": 7.477596545723049e-06, "loss": 23.2541, "step": 200740 }, { "epoch": 0.40552770112759934, "grad_norm": 294.5437316894531, "learning_rate": 7.477293342162038e-06, "loss": 20.2328, "step": 200750 }, { "epoch": 0.40554790176028316, "grad_norm": 220.3087615966797, "learning_rate": 7.476990126527e-06, "loss": 17.5602, "step": 200760 }, { "epoch": 0.4055681023929669, "grad_norm": 121.65559387207031, "learning_rate": 7.476686898819414e-06, "loss": 8.0995, "step": 200770 }, { "epoch": 0.40558830302565074, "grad_norm": 59.63343048095703, "learning_rate": 7.476383659040754e-06, "loss": 20.0355, "step": 200780 }, { "epoch": 0.40560850365833456, "grad_norm": 244.6937255859375, "learning_rate": 7.476080407192502e-06, "loss": 16.1754, "step": 200790 }, { "epoch": 0.4056287042910184, "grad_norm": 117.62286376953125, "learning_rate": 7.475777143276133e-06, "loss": 14.7477, "step": 200800 }, { "epoch": 0.4056489049237022, "grad_norm": 641.1858520507812, "learning_rate": 7.475473867293127e-06, "loss": 23.3399, "step": 200810 }, { "epoch": 0.405669105556386, "grad_norm": 204.1864471435547, "learning_rate": 7.475170579244964e-06, "loss": 17.1633, "step": 200820 }, { "epoch": 0.40568930618906984, "grad_norm": 1005.5781860351562, "learning_rate": 7.474867279133115e-06, "loss": 33.7334, "step": 200830 }, { "epoch": 0.40570950682175366, "grad_norm": 489.997802734375, "learning_rate": 7.474563966959068e-06, "loss": 15.5835, "step": 200840 }, { "epoch": 0.4057297074544375, "grad_norm": 38.442405700683594, "learning_rate": 7.4742606427242935e-06, "loss": 24.9425, "step": 200850 }, { "epoch": 0.4057499080871213, "grad_norm": 481.94927978515625, "learning_rate": 7.473957306430273e-06, "loss": 32.2975, "step": 200860 }, { "epoch": 0.4057701087198051, "grad_norm": 259.0964660644531, "learning_rate": 7.473653958078484e-06, "loss": 21.2508, "step": 200870 }, { "epoch": 0.40579030935248894, "grad_norm": 425.4049072265625, "learning_rate": 7.473350597670407e-06, "loss": 37.0715, "step": 200880 }, { "epoch": 0.40581050998517276, "grad_norm": 83.00106811523438, "learning_rate": 7.47304722520752e-06, "loss": 14.0333, "step": 200890 }, { "epoch": 0.4058307106178565, "grad_norm": 356.96600341796875, "learning_rate": 7.4727438406912986e-06, "loss": 21.6185, "step": 200900 }, { "epoch": 0.40585091125054035, "grad_norm": 457.668212890625, "learning_rate": 7.472440444123224e-06, "loss": 19.1185, "step": 200910 }, { "epoch": 0.40587111188322417, "grad_norm": 234.08029174804688, "learning_rate": 7.472137035504776e-06, "loss": 12.4808, "step": 200920 }, { "epoch": 0.405891312515908, "grad_norm": 446.756103515625, "learning_rate": 7.471833614837431e-06, "loss": 22.8839, "step": 200930 }, { "epoch": 0.4059115131485918, "grad_norm": 197.4498748779297, "learning_rate": 7.471530182122668e-06, "loss": 26.2008, "step": 200940 }, { "epoch": 0.4059317137812756, "grad_norm": 658.5452880859375, "learning_rate": 7.471226737361968e-06, "loss": 21.0591, "step": 200950 }, { "epoch": 0.40595191441395945, "grad_norm": 351.7759094238281, "learning_rate": 7.470923280556808e-06, "loss": 24.4132, "step": 200960 }, { "epoch": 0.40597211504664327, "grad_norm": 521.5507202148438, "learning_rate": 7.4706198117086685e-06, "loss": 11.5193, "step": 200970 }, { "epoch": 0.4059923156793271, "grad_norm": 410.0379333496094, "learning_rate": 7.4703163308190275e-06, "loss": 13.6378, "step": 200980 }, { "epoch": 0.4060125163120109, "grad_norm": 816.7725219726562, "learning_rate": 7.470012837889362e-06, "loss": 33.3381, "step": 200990 }, { "epoch": 0.4060327169446947, "grad_norm": 483.8327941894531, "learning_rate": 7.469709332921155e-06, "loss": 15.425, "step": 201000 }, { "epoch": 0.40605291757737855, "grad_norm": 437.8172912597656, "learning_rate": 7.469405815915885e-06, "loss": 22.8705, "step": 201010 }, { "epoch": 0.4060731182100623, "grad_norm": 654.0513916015625, "learning_rate": 7.469102286875029e-06, "loss": 22.8105, "step": 201020 }, { "epoch": 0.40609331884274613, "grad_norm": 539.8277587890625, "learning_rate": 7.46879874580007e-06, "loss": 15.6053, "step": 201030 }, { "epoch": 0.40611351947542995, "grad_norm": 568.4539184570312, "learning_rate": 7.468495192692484e-06, "loss": 28.7137, "step": 201040 }, { "epoch": 0.40613372010811377, "grad_norm": 343.5415344238281, "learning_rate": 7.468191627553752e-06, "loss": 15.7525, "step": 201050 }, { "epoch": 0.4061539207407976, "grad_norm": 756.5459594726562, "learning_rate": 7.467888050385355e-06, "loss": 31.3906, "step": 201060 }, { "epoch": 0.4061741213734814, "grad_norm": 61.31733703613281, "learning_rate": 7.467584461188769e-06, "loss": 15.9251, "step": 201070 }, { "epoch": 0.40619432200616523, "grad_norm": 246.74839782714844, "learning_rate": 7.467280859965476e-06, "loss": 21.4076, "step": 201080 }, { "epoch": 0.40621452263884905, "grad_norm": 276.4493103027344, "learning_rate": 7.4669772467169555e-06, "loss": 19.7777, "step": 201090 }, { "epoch": 0.40623472327153287, "grad_norm": 397.8076171875, "learning_rate": 7.4666736214446855e-06, "loss": 14.9288, "step": 201100 }, { "epoch": 0.4062549239042167, "grad_norm": 230.67694091796875, "learning_rate": 7.46636998415015e-06, "loss": 31.7875, "step": 201110 }, { "epoch": 0.4062751245369005, "grad_norm": 400.58160400390625, "learning_rate": 7.466066334834825e-06, "loss": 13.756, "step": 201120 }, { "epoch": 0.40629532516958433, "grad_norm": 223.71376037597656, "learning_rate": 7.465762673500192e-06, "loss": 23.8489, "step": 201130 }, { "epoch": 0.40631552580226815, "grad_norm": 425.3001708984375, "learning_rate": 7.465459000147731e-06, "loss": 16.3558, "step": 201140 }, { "epoch": 0.4063357264349519, "grad_norm": 302.7783203125, "learning_rate": 7.46515531477892e-06, "loss": 15.9492, "step": 201150 }, { "epoch": 0.40635592706763574, "grad_norm": 142.73504638671875, "learning_rate": 7.464851617395244e-06, "loss": 16.9348, "step": 201160 }, { "epoch": 0.40637612770031956, "grad_norm": 261.61956787109375, "learning_rate": 7.464547907998179e-06, "loss": 22.927, "step": 201170 }, { "epoch": 0.4063963283330034, "grad_norm": 456.87176513671875, "learning_rate": 7.464244186589206e-06, "loss": 19.6105, "step": 201180 }, { "epoch": 0.4064165289656872, "grad_norm": 995.794921875, "learning_rate": 7.4639404531698054e-06, "loss": 20.3693, "step": 201190 }, { "epoch": 0.406436729598371, "grad_norm": 491.23028564453125, "learning_rate": 7.463636707741458e-06, "loss": 24.6229, "step": 201200 }, { "epoch": 0.40645693023105484, "grad_norm": 263.67669677734375, "learning_rate": 7.463332950305646e-06, "loss": 23.7628, "step": 201210 }, { "epoch": 0.40647713086373866, "grad_norm": 564.0216674804688, "learning_rate": 7.463029180863846e-06, "loss": 20.9312, "step": 201220 }, { "epoch": 0.4064973314964225, "grad_norm": 1087.8101806640625, "learning_rate": 7.462725399417541e-06, "loss": 34.9737, "step": 201230 }, { "epoch": 0.4065175321291063, "grad_norm": 827.571044921875, "learning_rate": 7.4624216059682106e-06, "loss": 38.8245, "step": 201240 }, { "epoch": 0.4065377327617901, "grad_norm": 457.0788269042969, "learning_rate": 7.462117800517337e-06, "loss": 26.9808, "step": 201250 }, { "epoch": 0.40655793339447394, "grad_norm": 381.0020446777344, "learning_rate": 7.461813983066398e-06, "loss": 27.545, "step": 201260 }, { "epoch": 0.40657813402715776, "grad_norm": 159.75901794433594, "learning_rate": 7.461510153616879e-06, "loss": 12.4726, "step": 201270 }, { "epoch": 0.4065983346598415, "grad_norm": 2.2260327339172363, "learning_rate": 7.461206312170255e-06, "loss": 14.3518, "step": 201280 }, { "epoch": 0.40661853529252534, "grad_norm": 371.715576171875, "learning_rate": 7.460902458728012e-06, "loss": 22.9897, "step": 201290 }, { "epoch": 0.40663873592520916, "grad_norm": 188.25341796875, "learning_rate": 7.460598593291628e-06, "loss": 30.0347, "step": 201300 }, { "epoch": 0.406658936557893, "grad_norm": 311.2943420410156, "learning_rate": 7.460294715862586e-06, "loss": 24.9204, "step": 201310 }, { "epoch": 0.4066791371905768, "grad_norm": 603.1354370117188, "learning_rate": 7.459990826442366e-06, "loss": 27.4273, "step": 201320 }, { "epoch": 0.4066993378232606, "grad_norm": 315.0934143066406, "learning_rate": 7.459686925032446e-06, "loss": 23.9535, "step": 201330 }, { "epoch": 0.40671953845594444, "grad_norm": 188.21473693847656, "learning_rate": 7.459383011634314e-06, "loss": 15.5692, "step": 201340 }, { "epoch": 0.40673973908862826, "grad_norm": 724.6349487304688, "learning_rate": 7.459079086249445e-06, "loss": 27.3631, "step": 201350 }, { "epoch": 0.4067599397213121, "grad_norm": 1370.4459228515625, "learning_rate": 7.458775148879325e-06, "loss": 28.1437, "step": 201360 }, { "epoch": 0.4067801403539959, "grad_norm": 494.84234619140625, "learning_rate": 7.458471199525431e-06, "loss": 24.9262, "step": 201370 }, { "epoch": 0.4068003409866797, "grad_norm": 374.4007263183594, "learning_rate": 7.458167238189249e-06, "loss": 30.2492, "step": 201380 }, { "epoch": 0.40682054161936354, "grad_norm": 439.54937744140625, "learning_rate": 7.457863264872256e-06, "loss": 14.7385, "step": 201390 }, { "epoch": 0.40684074225204736, "grad_norm": 522.9287719726562, "learning_rate": 7.4575592795759356e-06, "loss": 12.2373, "step": 201400 }, { "epoch": 0.4068609428847311, "grad_norm": 175.83682250976562, "learning_rate": 7.45725528230177e-06, "loss": 14.9058, "step": 201410 }, { "epoch": 0.40688114351741494, "grad_norm": 491.727294921875, "learning_rate": 7.456951273051239e-06, "loss": 15.4886, "step": 201420 }, { "epoch": 0.40690134415009876, "grad_norm": 448.6680908203125, "learning_rate": 7.456647251825828e-06, "loss": 31.4303, "step": 201430 }, { "epoch": 0.4069215447827826, "grad_norm": 98.84780883789062, "learning_rate": 7.4563432186270135e-06, "loss": 11.6186, "step": 201440 }, { "epoch": 0.4069417454154664, "grad_norm": 104.43572235107422, "learning_rate": 7.456039173456282e-06, "loss": 13.9035, "step": 201450 }, { "epoch": 0.4069619460481502, "grad_norm": 390.1668395996094, "learning_rate": 7.455735116315113e-06, "loss": 20.1905, "step": 201460 }, { "epoch": 0.40698214668083404, "grad_norm": 138.4781036376953, "learning_rate": 7.455431047204988e-06, "loss": 15.5138, "step": 201470 }, { "epoch": 0.40700234731351786, "grad_norm": 214.82301330566406, "learning_rate": 7.455126966127392e-06, "loss": 15.5275, "step": 201480 }, { "epoch": 0.4070225479462017, "grad_norm": 151.06898498535156, "learning_rate": 7.4548228730838025e-06, "loss": 12.9611, "step": 201490 }, { "epoch": 0.4070427485788855, "grad_norm": 435.325927734375, "learning_rate": 7.454518768075705e-06, "loss": 17.9772, "step": 201500 }, { "epoch": 0.4070629492115693, "grad_norm": 255.72364807128906, "learning_rate": 7.454214651104581e-06, "loss": 11.7055, "step": 201510 }, { "epoch": 0.40708314984425314, "grad_norm": 502.916748046875, "learning_rate": 7.453910522171912e-06, "loss": 17.7385, "step": 201520 }, { "epoch": 0.40710335047693696, "grad_norm": 325.4402160644531, "learning_rate": 7.453606381279181e-06, "loss": 19.5053, "step": 201530 }, { "epoch": 0.40712355110962073, "grad_norm": 407.8590087890625, "learning_rate": 7.45330222842787e-06, "loss": 25.5274, "step": 201540 }, { "epoch": 0.40714375174230455, "grad_norm": 993.3406372070312, "learning_rate": 7.452998063619461e-06, "loss": 11.7498, "step": 201550 }, { "epoch": 0.40716395237498837, "grad_norm": 646.5173950195312, "learning_rate": 7.452693886855438e-06, "loss": 21.7291, "step": 201560 }, { "epoch": 0.4071841530076722, "grad_norm": 223.51336669921875, "learning_rate": 7.452389698137281e-06, "loss": 41.3807, "step": 201570 }, { "epoch": 0.407204353640356, "grad_norm": 137.25628662109375, "learning_rate": 7.452085497466476e-06, "loss": 16.8889, "step": 201580 }, { "epoch": 0.40722455427303983, "grad_norm": 266.11163330078125, "learning_rate": 7.451781284844503e-06, "loss": 7.3088, "step": 201590 }, { "epoch": 0.40724475490572365, "grad_norm": 160.74749755859375, "learning_rate": 7.451477060272844e-06, "loss": 19.159, "step": 201600 }, { "epoch": 0.40726495553840747, "grad_norm": 104.86251831054688, "learning_rate": 7.4511728237529845e-06, "loss": 23.7645, "step": 201610 }, { "epoch": 0.4072851561710913, "grad_norm": 908.7479858398438, "learning_rate": 7.450868575286405e-06, "loss": 31.8811, "step": 201620 }, { "epoch": 0.4073053568037751, "grad_norm": 309.2737121582031, "learning_rate": 7.450564314874591e-06, "loss": 20.9828, "step": 201630 }, { "epoch": 0.40732555743645893, "grad_norm": 568.97998046875, "learning_rate": 7.450260042519022e-06, "loss": 43.8262, "step": 201640 }, { "epoch": 0.40734575806914275, "grad_norm": 167.1912078857422, "learning_rate": 7.449955758221184e-06, "loss": 16.0625, "step": 201650 }, { "epoch": 0.4073659587018265, "grad_norm": 339.8267517089844, "learning_rate": 7.449651461982559e-06, "loss": 32.839, "step": 201660 }, { "epoch": 0.40738615933451033, "grad_norm": 259.1501770019531, "learning_rate": 7.449347153804628e-06, "loss": 17.2005, "step": 201670 }, { "epoch": 0.40740635996719415, "grad_norm": 476.617431640625, "learning_rate": 7.4490428336888775e-06, "loss": 30.358, "step": 201680 }, { "epoch": 0.407426560599878, "grad_norm": 350.8307189941406, "learning_rate": 7.4487385016367885e-06, "loss": 45.564, "step": 201690 }, { "epoch": 0.4074467612325618, "grad_norm": 433.2794494628906, "learning_rate": 7.448434157649846e-06, "loss": 25.2486, "step": 201700 }, { "epoch": 0.4074669618652456, "grad_norm": 518.6293334960938, "learning_rate": 7.448129801729531e-06, "loss": 25.7643, "step": 201710 }, { "epoch": 0.40748716249792943, "grad_norm": 223.87640380859375, "learning_rate": 7.447825433877329e-06, "loss": 14.6394, "step": 201720 }, { "epoch": 0.40750736313061325, "grad_norm": 452.998779296875, "learning_rate": 7.447521054094723e-06, "loss": 14.4795, "step": 201730 }, { "epoch": 0.4075275637632971, "grad_norm": 251.29132080078125, "learning_rate": 7.447216662383196e-06, "loss": 29.2115, "step": 201740 }, { "epoch": 0.4075477643959809, "grad_norm": 325.8575134277344, "learning_rate": 7.446912258744232e-06, "loss": 22.5336, "step": 201750 }, { "epoch": 0.4075679650286647, "grad_norm": 897.65869140625, "learning_rate": 7.446607843179314e-06, "loss": 26.6089, "step": 201760 }, { "epoch": 0.40758816566134853, "grad_norm": 784.4694213867188, "learning_rate": 7.446303415689927e-06, "loss": 25.6471, "step": 201770 }, { "epoch": 0.40760836629403235, "grad_norm": 187.5708770751953, "learning_rate": 7.4459989762775516e-06, "loss": 26.8016, "step": 201780 }, { "epoch": 0.4076285669267161, "grad_norm": 268.2976989746094, "learning_rate": 7.445694524943677e-06, "loss": 16.3666, "step": 201790 }, { "epoch": 0.40764876755939994, "grad_norm": 446.0238952636719, "learning_rate": 7.445390061689782e-06, "loss": 13.2344, "step": 201800 }, { "epoch": 0.40766896819208376, "grad_norm": 361.369140625, "learning_rate": 7.4450855865173534e-06, "loss": 23.7117, "step": 201810 }, { "epoch": 0.4076891688247676, "grad_norm": 383.89849853515625, "learning_rate": 7.444781099427873e-06, "loss": 18.8971, "step": 201820 }, { "epoch": 0.4077093694574514, "grad_norm": 353.0237121582031, "learning_rate": 7.444476600422827e-06, "loss": 18.4254, "step": 201830 }, { "epoch": 0.4077295700901352, "grad_norm": 268.77392578125, "learning_rate": 7.444172089503698e-06, "loss": 8.8478, "step": 201840 }, { "epoch": 0.40774977072281904, "grad_norm": 63.98426818847656, "learning_rate": 7.443867566671971e-06, "loss": 8.1564, "step": 201850 }, { "epoch": 0.40776997135550286, "grad_norm": 307.0477294921875, "learning_rate": 7.4435630319291295e-06, "loss": 21.182, "step": 201860 }, { "epoch": 0.4077901719881867, "grad_norm": 103.76622009277344, "learning_rate": 7.4432584852766575e-06, "loss": 19.0367, "step": 201870 }, { "epoch": 0.4078103726208705, "grad_norm": 468.2156066894531, "learning_rate": 7.442953926716042e-06, "loss": 27.5066, "step": 201880 }, { "epoch": 0.4078305732535543, "grad_norm": 362.2070617675781, "learning_rate": 7.442649356248765e-06, "loss": 23.6275, "step": 201890 }, { "epoch": 0.40785077388623814, "grad_norm": 389.12847900390625, "learning_rate": 7.44234477387631e-06, "loss": 13.1216, "step": 201900 }, { "epoch": 0.40787097451892196, "grad_norm": 328.23065185546875, "learning_rate": 7.442040179600163e-06, "loss": 12.5546, "step": 201910 }, { "epoch": 0.4078911751516057, "grad_norm": 1921.57666015625, "learning_rate": 7.4417355734218085e-06, "loss": 46.7711, "step": 201920 }, { "epoch": 0.40791137578428954, "grad_norm": 721.4837646484375, "learning_rate": 7.441430955342733e-06, "loss": 17.6815, "step": 201930 }, { "epoch": 0.40793157641697336, "grad_norm": 728.987548828125, "learning_rate": 7.441126325364415e-06, "loss": 19.4633, "step": 201940 }, { "epoch": 0.4079517770496572, "grad_norm": 739.6961059570312, "learning_rate": 7.440821683488346e-06, "loss": 25.4722, "step": 201950 }, { "epoch": 0.407971977682341, "grad_norm": 329.321044921875, "learning_rate": 7.440517029716008e-06, "loss": 31.0787, "step": 201960 }, { "epoch": 0.4079921783150248, "grad_norm": 332.2442626953125, "learning_rate": 7.4402123640488855e-06, "loss": 21.4414, "step": 201970 }, { "epoch": 0.40801237894770864, "grad_norm": 202.59873962402344, "learning_rate": 7.439907686488463e-06, "loss": 16.7013, "step": 201980 }, { "epoch": 0.40803257958039246, "grad_norm": 1010.4586181640625, "learning_rate": 7.439602997036229e-06, "loss": 15.1833, "step": 201990 }, { "epoch": 0.4080527802130763, "grad_norm": 505.4180908203125, "learning_rate": 7.4392982956936644e-06, "loss": 30.9323, "step": 202000 }, { "epoch": 0.4080729808457601, "grad_norm": 5731.296875, "learning_rate": 7.438993582462255e-06, "loss": 44.6847, "step": 202010 }, { "epoch": 0.4080931814784439, "grad_norm": 286.26513671875, "learning_rate": 7.438688857343488e-06, "loss": 16.6637, "step": 202020 }, { "epoch": 0.40811338211112774, "grad_norm": 488.8510437011719, "learning_rate": 7.438384120338846e-06, "loss": 14.8836, "step": 202030 }, { "epoch": 0.40813358274381156, "grad_norm": 224.29898071289062, "learning_rate": 7.4380793714498175e-06, "loss": 13.4785, "step": 202040 }, { "epoch": 0.4081537833764953, "grad_norm": 664.1687622070312, "learning_rate": 7.437774610677884e-06, "loss": 19.8974, "step": 202050 }, { "epoch": 0.40817398400917915, "grad_norm": 215.94056701660156, "learning_rate": 7.437469838024534e-06, "loss": 17.0218, "step": 202060 }, { "epoch": 0.40819418464186297, "grad_norm": 560.67626953125, "learning_rate": 7.437165053491251e-06, "loss": 19.3743, "step": 202070 }, { "epoch": 0.4082143852745468, "grad_norm": 84.43445587158203, "learning_rate": 7.43686025707952e-06, "loss": 13.7687, "step": 202080 }, { "epoch": 0.4082345859072306, "grad_norm": 563.4779052734375, "learning_rate": 7.43655544879083e-06, "loss": 11.2371, "step": 202090 }, { "epoch": 0.4082547865399144, "grad_norm": 316.088623046875, "learning_rate": 7.436250628626662e-06, "loss": 16.9678, "step": 202100 }, { "epoch": 0.40827498717259825, "grad_norm": 846.0938720703125, "learning_rate": 7.4359457965885066e-06, "loss": 24.9185, "step": 202110 }, { "epoch": 0.40829518780528207, "grad_norm": 394.32635498046875, "learning_rate": 7.435640952677844e-06, "loss": 24.0668, "step": 202120 }, { "epoch": 0.4083153884379659, "grad_norm": 517.8413696289062, "learning_rate": 7.435336096896164e-06, "loss": 33.0063, "step": 202130 }, { "epoch": 0.4083355890706497, "grad_norm": 13.747394561767578, "learning_rate": 7.435031229244951e-06, "loss": 22.4625, "step": 202140 }, { "epoch": 0.4083557897033335, "grad_norm": 348.4352722167969, "learning_rate": 7.434726349725692e-06, "loss": 27.3487, "step": 202150 }, { "epoch": 0.40837599033601735, "grad_norm": 224.69532775878906, "learning_rate": 7.434421458339871e-06, "loss": 20.6076, "step": 202160 }, { "epoch": 0.40839619096870117, "grad_norm": 278.6298828125, "learning_rate": 7.434116555088975e-06, "loss": 23.4357, "step": 202170 }, { "epoch": 0.40841639160138493, "grad_norm": 404.5762939453125, "learning_rate": 7.4338116399744905e-06, "loss": 23.1307, "step": 202180 }, { "epoch": 0.40843659223406875, "grad_norm": 538.7630615234375, "learning_rate": 7.433506712997903e-06, "loss": 27.1231, "step": 202190 }, { "epoch": 0.40845679286675257, "grad_norm": 552.8690795898438, "learning_rate": 7.433201774160701e-06, "loss": 16.8527, "step": 202200 }, { "epoch": 0.4084769934994364, "grad_norm": 463.61944580078125, "learning_rate": 7.432896823464366e-06, "loss": 24.177, "step": 202210 }, { "epoch": 0.4084971941321202, "grad_norm": 583.8330688476562, "learning_rate": 7.432591860910389e-06, "loss": 31.2486, "step": 202220 }, { "epoch": 0.40851739476480403, "grad_norm": 605.05322265625, "learning_rate": 7.432286886500253e-06, "loss": 30.0312, "step": 202230 }, { "epoch": 0.40853759539748785, "grad_norm": 1038.759765625, "learning_rate": 7.431981900235446e-06, "loss": 20.2549, "step": 202240 }, { "epoch": 0.40855779603017167, "grad_norm": 293.982666015625, "learning_rate": 7.431676902117453e-06, "loss": 15.2225, "step": 202250 }, { "epoch": 0.4085779966628555, "grad_norm": 417.32220458984375, "learning_rate": 7.431371892147763e-06, "loss": 21.8134, "step": 202260 }, { "epoch": 0.4085981972955393, "grad_norm": 28.197507858276367, "learning_rate": 7.431066870327861e-06, "loss": 19.4645, "step": 202270 }, { "epoch": 0.40861839792822313, "grad_norm": 867.1630859375, "learning_rate": 7.430761836659235e-06, "loss": 25.0237, "step": 202280 }, { "epoch": 0.40863859856090695, "grad_norm": 347.8146057128906, "learning_rate": 7.430456791143369e-06, "loss": 29.2887, "step": 202290 }, { "epoch": 0.4086587991935907, "grad_norm": 331.7054748535156, "learning_rate": 7.430151733781752e-06, "loss": 15.065, "step": 202300 }, { "epoch": 0.40867899982627454, "grad_norm": 270.35186767578125, "learning_rate": 7.42984666457587e-06, "loss": 13.1611, "step": 202310 }, { "epoch": 0.40869920045895836, "grad_norm": 352.6448974609375, "learning_rate": 7.42954158352721e-06, "loss": 34.8578, "step": 202320 }, { "epoch": 0.4087194010916422, "grad_norm": 236.7410430908203, "learning_rate": 7.42923649063726e-06, "loss": 20.2191, "step": 202330 }, { "epoch": 0.408739601724326, "grad_norm": 839.5408325195312, "learning_rate": 7.428931385907505e-06, "loss": 22.1993, "step": 202340 }, { "epoch": 0.4087598023570098, "grad_norm": 260.7296142578125, "learning_rate": 7.428626269339433e-06, "loss": 18.8646, "step": 202350 }, { "epoch": 0.40878000298969364, "grad_norm": 479.09130859375, "learning_rate": 7.428321140934532e-06, "loss": 33.4774, "step": 202360 }, { "epoch": 0.40880020362237746, "grad_norm": 350.2244567871094, "learning_rate": 7.428016000694287e-06, "loss": 16.9029, "step": 202370 }, { "epoch": 0.4088204042550613, "grad_norm": 0.0, "learning_rate": 7.427710848620188e-06, "loss": 21.7193, "step": 202380 }, { "epoch": 0.4088406048877451, "grad_norm": 780.4125366210938, "learning_rate": 7.4274056847137185e-06, "loss": 32.7082, "step": 202390 }, { "epoch": 0.4088608055204289, "grad_norm": 229.0189666748047, "learning_rate": 7.42710050897637e-06, "loss": 43.0292, "step": 202400 }, { "epoch": 0.40888100615311274, "grad_norm": 182.1045379638672, "learning_rate": 7.426795321409628e-06, "loss": 26.0979, "step": 202410 }, { "epoch": 0.40890120678579656, "grad_norm": 473.6593017578125, "learning_rate": 7.42649012201498e-06, "loss": 30.5279, "step": 202420 }, { "epoch": 0.4089214074184803, "grad_norm": 315.952880859375, "learning_rate": 7.426184910793914e-06, "loss": 11.5901, "step": 202430 }, { "epoch": 0.40894160805116414, "grad_norm": 510.8643493652344, "learning_rate": 7.425879687747915e-06, "loss": 18.8998, "step": 202440 }, { "epoch": 0.40896180868384796, "grad_norm": 170.85086059570312, "learning_rate": 7.425574452878474e-06, "loss": 21.0382, "step": 202450 }, { "epoch": 0.4089820093165318, "grad_norm": 567.182861328125, "learning_rate": 7.425269206187076e-06, "loss": 16.5877, "step": 202460 }, { "epoch": 0.4090022099492156, "grad_norm": 449.30023193359375, "learning_rate": 7.42496394767521e-06, "loss": 11.9506, "step": 202470 }, { "epoch": 0.4090224105818994, "grad_norm": 165.76919555664062, "learning_rate": 7.424658677344365e-06, "loss": 18.0442, "step": 202480 }, { "epoch": 0.40904261121458324, "grad_norm": 166.05328369140625, "learning_rate": 7.424353395196029e-06, "loss": 9.8056, "step": 202490 }, { "epoch": 0.40906281184726706, "grad_norm": 460.68206787109375, "learning_rate": 7.424048101231687e-06, "loss": 26.734, "step": 202500 }, { "epoch": 0.4090830124799509, "grad_norm": 479.8690185546875, "learning_rate": 7.423742795452827e-06, "loss": 18.3991, "step": 202510 }, { "epoch": 0.4091032131126347, "grad_norm": 385.1668701171875, "learning_rate": 7.423437477860941e-06, "loss": 16.2015, "step": 202520 }, { "epoch": 0.4091234137453185, "grad_norm": 265.1182556152344, "learning_rate": 7.423132148457512e-06, "loss": 16.7979, "step": 202530 }, { "epoch": 0.40914361437800234, "grad_norm": 124.39286804199219, "learning_rate": 7.422826807244034e-06, "loss": 21.9892, "step": 202540 }, { "epoch": 0.40916381501068616, "grad_norm": 457.02484130859375, "learning_rate": 7.42252145422199e-06, "loss": 25.6926, "step": 202550 }, { "epoch": 0.4091840156433699, "grad_norm": 365.1929931640625, "learning_rate": 7.422216089392872e-06, "loss": 23.4752, "step": 202560 }, { "epoch": 0.40920421627605374, "grad_norm": 217.96307373046875, "learning_rate": 7.421910712758165e-06, "loss": 17.9261, "step": 202570 }, { "epoch": 0.40922441690873756, "grad_norm": 339.1838073730469, "learning_rate": 7.421605324319359e-06, "loss": 21.7752, "step": 202580 }, { "epoch": 0.4092446175414214, "grad_norm": 667.7643432617188, "learning_rate": 7.421299924077943e-06, "loss": 41.6153, "step": 202590 }, { "epoch": 0.4092648181741052, "grad_norm": 114.67329406738281, "learning_rate": 7.4209945120354045e-06, "loss": 17.4382, "step": 202600 }, { "epoch": 0.409285018806789, "grad_norm": 217.40821838378906, "learning_rate": 7.420689088193232e-06, "loss": 24.6881, "step": 202610 }, { "epoch": 0.40930521943947284, "grad_norm": 512.017822265625, "learning_rate": 7.420383652552915e-06, "loss": 15.7967, "step": 202620 }, { "epoch": 0.40932542007215666, "grad_norm": 673.688232421875, "learning_rate": 7.420078205115942e-06, "loss": 32.7925, "step": 202630 }, { "epoch": 0.4093456207048405, "grad_norm": 621.5011596679688, "learning_rate": 7.4197727458837995e-06, "loss": 20.3245, "step": 202640 }, { "epoch": 0.4093658213375243, "grad_norm": 547.8865356445312, "learning_rate": 7.419467274857981e-06, "loss": 13.0149, "step": 202650 }, { "epoch": 0.4093860219702081, "grad_norm": 565.602783203125, "learning_rate": 7.419161792039969e-06, "loss": 21.422, "step": 202660 }, { "epoch": 0.40940622260289194, "grad_norm": 1128.8487548828125, "learning_rate": 7.41885629743126e-06, "loss": 25.6424, "step": 202670 }, { "epoch": 0.40942642323557576, "grad_norm": 213.93804931640625, "learning_rate": 7.418550791033335e-06, "loss": 34.2035, "step": 202680 }, { "epoch": 0.40944662386825953, "grad_norm": 231.7338409423828, "learning_rate": 7.418245272847688e-06, "loss": 25.0876, "step": 202690 }, { "epoch": 0.40946682450094335, "grad_norm": 461.5252685546875, "learning_rate": 7.4179397428758085e-06, "loss": 24.8995, "step": 202700 }, { "epoch": 0.40948702513362717, "grad_norm": 127.33111572265625, "learning_rate": 7.4176342011191816e-06, "loss": 15.6913, "step": 202710 }, { "epoch": 0.409507225766311, "grad_norm": 473.0237731933594, "learning_rate": 7.417328647579301e-06, "loss": 16.9951, "step": 202720 }, { "epoch": 0.4095274263989948, "grad_norm": 535.3770141601562, "learning_rate": 7.417023082257653e-06, "loss": 20.9535, "step": 202730 }, { "epoch": 0.40954762703167863, "grad_norm": 281.7862243652344, "learning_rate": 7.416717505155726e-06, "loss": 24.6127, "step": 202740 }, { "epoch": 0.40956782766436245, "grad_norm": 207.27316284179688, "learning_rate": 7.416411916275012e-06, "loss": 13.6992, "step": 202750 }, { "epoch": 0.40958802829704627, "grad_norm": 315.2615051269531, "learning_rate": 7.416106315617e-06, "loss": 13.0337, "step": 202760 }, { "epoch": 0.4096082289297301, "grad_norm": 365.64556884765625, "learning_rate": 7.415800703183179e-06, "loss": 17.1957, "step": 202770 }, { "epoch": 0.4096284295624139, "grad_norm": 591.7393188476562, "learning_rate": 7.415495078975038e-06, "loss": 24.1295, "step": 202780 }, { "epoch": 0.40964863019509773, "grad_norm": 523.6378784179688, "learning_rate": 7.415189442994066e-06, "loss": 11.2499, "step": 202790 }, { "epoch": 0.40966883082778155, "grad_norm": 30.62116241455078, "learning_rate": 7.414883795241754e-06, "loss": 23.3721, "step": 202800 }, { "epoch": 0.40968903146046537, "grad_norm": 253.954345703125, "learning_rate": 7.4145781357195936e-06, "loss": 14.5089, "step": 202810 }, { "epoch": 0.40970923209314913, "grad_norm": 1037.10302734375, "learning_rate": 7.414272464429068e-06, "loss": 22.0036, "step": 202820 }, { "epoch": 0.40972943272583295, "grad_norm": 915.9358520507812, "learning_rate": 7.413966781371676e-06, "loss": 22.3646, "step": 202830 }, { "epoch": 0.4097496333585168, "grad_norm": 1416.9718017578125, "learning_rate": 7.413661086548899e-06, "loss": 22.3141, "step": 202840 }, { "epoch": 0.4097698339912006, "grad_norm": 597.7158813476562, "learning_rate": 7.413355379962231e-06, "loss": 35.5899, "step": 202850 }, { "epoch": 0.4097900346238844, "grad_norm": 335.5372009277344, "learning_rate": 7.413049661613163e-06, "loss": 21.6018, "step": 202860 }, { "epoch": 0.40981023525656823, "grad_norm": 534.3197631835938, "learning_rate": 7.412743931503182e-06, "loss": 37.7867, "step": 202870 }, { "epoch": 0.40983043588925205, "grad_norm": 350.77685546875, "learning_rate": 7.412438189633781e-06, "loss": 16.7635, "step": 202880 }, { "epoch": 0.4098506365219359, "grad_norm": 210.72047424316406, "learning_rate": 7.412132436006449e-06, "loss": 12.5931, "step": 202890 }, { "epoch": 0.4098708371546197, "grad_norm": 342.9144287109375, "learning_rate": 7.411826670622676e-06, "loss": 16.952, "step": 202900 }, { "epoch": 0.4098910377873035, "grad_norm": 328.1678161621094, "learning_rate": 7.411520893483952e-06, "loss": 8.9762, "step": 202910 }, { "epoch": 0.40991123841998733, "grad_norm": 291.2751159667969, "learning_rate": 7.411215104591767e-06, "loss": 10.1271, "step": 202920 }, { "epoch": 0.40993143905267115, "grad_norm": 152.46485900878906, "learning_rate": 7.410909303947613e-06, "loss": 22.4722, "step": 202930 }, { "epoch": 0.4099516396853549, "grad_norm": 168.26895141601562, "learning_rate": 7.4106034915529786e-06, "loss": 27.4402, "step": 202940 }, { "epoch": 0.40997184031803874, "grad_norm": 3.0440235137939453, "learning_rate": 7.410297667409356e-06, "loss": 14.0275, "step": 202950 }, { "epoch": 0.40999204095072256, "grad_norm": 204.01937866210938, "learning_rate": 7.409991831518235e-06, "loss": 23.0723, "step": 202960 }, { "epoch": 0.4100122415834064, "grad_norm": 221.46669006347656, "learning_rate": 7.409685983881107e-06, "loss": 16.941, "step": 202970 }, { "epoch": 0.4100324422160902, "grad_norm": 815.1734008789062, "learning_rate": 7.409380124499459e-06, "loss": 26.154, "step": 202980 }, { "epoch": 0.410052642848774, "grad_norm": 491.68438720703125, "learning_rate": 7.409074253374786e-06, "loss": 15.2505, "step": 202990 }, { "epoch": 0.41007284348145784, "grad_norm": 342.01397705078125, "learning_rate": 7.408768370508577e-06, "loss": 19.1597, "step": 203000 }, { "epoch": 0.41009304411414166, "grad_norm": 346.72747802734375, "learning_rate": 7.408462475902324e-06, "loss": 23.4766, "step": 203010 }, { "epoch": 0.4101132447468255, "grad_norm": 901.693603515625, "learning_rate": 7.408156569557515e-06, "loss": 24.8032, "step": 203020 }, { "epoch": 0.4101334453795093, "grad_norm": 868.064453125, "learning_rate": 7.407850651475645e-06, "loss": 30.5301, "step": 203030 }, { "epoch": 0.4101536460121931, "grad_norm": 533.5048828125, "learning_rate": 7.407544721658203e-06, "loss": 18.0294, "step": 203040 }, { "epoch": 0.41017384664487694, "grad_norm": 32.24917984008789, "learning_rate": 7.407238780106679e-06, "loss": 18.1304, "step": 203050 }, { "epoch": 0.41019404727756076, "grad_norm": 316.50787353515625, "learning_rate": 7.406932826822565e-06, "loss": 22.6199, "step": 203060 }, { "epoch": 0.4102142479102445, "grad_norm": 281.2059326171875, "learning_rate": 7.406626861807352e-06, "loss": 20.8594, "step": 203070 }, { "epoch": 0.41023444854292834, "grad_norm": 350.651611328125, "learning_rate": 7.406320885062532e-06, "loss": 14.0463, "step": 203080 }, { "epoch": 0.41025464917561216, "grad_norm": 98.49825286865234, "learning_rate": 7.406014896589597e-06, "loss": 13.0616, "step": 203090 }, { "epoch": 0.410274849808296, "grad_norm": 459.5002746582031, "learning_rate": 7.405708896390037e-06, "loss": 32.8267, "step": 203100 }, { "epoch": 0.4102950504409798, "grad_norm": 359.4190979003906, "learning_rate": 7.405402884465342e-06, "loss": 16.3583, "step": 203110 }, { "epoch": 0.4103152510736636, "grad_norm": 454.3992919921875, "learning_rate": 7.405096860817007e-06, "loss": 27.9597, "step": 203120 }, { "epoch": 0.41033545170634744, "grad_norm": 197.53135681152344, "learning_rate": 7.404790825446522e-06, "loss": 20.3972, "step": 203130 }, { "epoch": 0.41035565233903126, "grad_norm": 158.32972717285156, "learning_rate": 7.404484778355374e-06, "loss": 21.0419, "step": 203140 }, { "epoch": 0.4103758529717151, "grad_norm": 706.7918090820312, "learning_rate": 7.404178719545063e-06, "loss": 17.9008, "step": 203150 }, { "epoch": 0.4103960536043989, "grad_norm": 496.1558837890625, "learning_rate": 7.403872649017074e-06, "loss": 12.4259, "step": 203160 }, { "epoch": 0.4104162542370827, "grad_norm": 0.0, "learning_rate": 7.403566566772904e-06, "loss": 40.8558, "step": 203170 }, { "epoch": 0.41043645486976654, "grad_norm": 491.2930908203125, "learning_rate": 7.40326047281404e-06, "loss": 27.3981, "step": 203180 }, { "epoch": 0.41045665550245036, "grad_norm": 792.5803833007812, "learning_rate": 7.4029543671419765e-06, "loss": 39.0564, "step": 203190 }, { "epoch": 0.4104768561351341, "grad_norm": 429.5687255859375, "learning_rate": 7.402648249758204e-06, "loss": 13.5491, "step": 203200 }, { "epoch": 0.41049705676781795, "grad_norm": 488.1636657714844, "learning_rate": 7.4023421206642176e-06, "loss": 26.4762, "step": 203210 }, { "epoch": 0.41051725740050177, "grad_norm": 776.623779296875, "learning_rate": 7.402035979861505e-06, "loss": 22.4744, "step": 203220 }, { "epoch": 0.4105374580331856, "grad_norm": 1082.503662109375, "learning_rate": 7.401729827351561e-06, "loss": 17.9678, "step": 203230 }, { "epoch": 0.4105576586658694, "grad_norm": 374.99774169921875, "learning_rate": 7.401423663135877e-06, "loss": 14.9524, "step": 203240 }, { "epoch": 0.4105778592985532, "grad_norm": 184.60501098632812, "learning_rate": 7.401117487215945e-06, "loss": 19.6489, "step": 203250 }, { "epoch": 0.41059805993123705, "grad_norm": 299.70745849609375, "learning_rate": 7.400811299593259e-06, "loss": 28.3008, "step": 203260 }, { "epoch": 0.41061826056392087, "grad_norm": 298.9436340332031, "learning_rate": 7.400505100269307e-06, "loss": 15.3563, "step": 203270 }, { "epoch": 0.4106384611966047, "grad_norm": 468.0664367675781, "learning_rate": 7.4001988892455875e-06, "loss": 24.0958, "step": 203280 }, { "epoch": 0.4106586618292885, "grad_norm": 408.8467102050781, "learning_rate": 7.399892666523588e-06, "loss": 17.696, "step": 203290 }, { "epoch": 0.4106788624619723, "grad_norm": 351.5890197753906, "learning_rate": 7.3995864321048036e-06, "loss": 22.1796, "step": 203300 }, { "epoch": 0.41069906309465615, "grad_norm": 796.6549682617188, "learning_rate": 7.399280185990726e-06, "loss": 27.2794, "step": 203310 }, { "epoch": 0.41071926372733997, "grad_norm": 521.300537109375, "learning_rate": 7.3989739281828466e-06, "loss": 30.68, "step": 203320 }, { "epoch": 0.41073946436002373, "grad_norm": 210.74981689453125, "learning_rate": 7.39866765868266e-06, "loss": 16.6639, "step": 203330 }, { "epoch": 0.41075966499270755, "grad_norm": 38.808624267578125, "learning_rate": 7.398361377491659e-06, "loss": 17.412, "step": 203340 }, { "epoch": 0.41077986562539137, "grad_norm": 12.65576171875, "learning_rate": 7.398055084611333e-06, "loss": 14.3816, "step": 203350 }, { "epoch": 0.4108000662580752, "grad_norm": 308.3738708496094, "learning_rate": 7.397748780043179e-06, "loss": 15.9892, "step": 203360 }, { "epoch": 0.410820266890759, "grad_norm": 350.11297607421875, "learning_rate": 7.3974424637886886e-06, "loss": 14.9189, "step": 203370 }, { "epoch": 0.41084046752344283, "grad_norm": 687.0380249023438, "learning_rate": 7.397136135849354e-06, "loss": 13.4597, "step": 203380 }, { "epoch": 0.41086066815612665, "grad_norm": 36.86468505859375, "learning_rate": 7.3968297962266696e-06, "loss": 30.9209, "step": 203390 }, { "epoch": 0.41088086878881047, "grad_norm": 58.81327438354492, "learning_rate": 7.396523444922126e-06, "loss": 13.2363, "step": 203400 }, { "epoch": 0.4109010694214943, "grad_norm": 379.4734802246094, "learning_rate": 7.396217081937218e-06, "loss": 11.533, "step": 203410 }, { "epoch": 0.4109212700541781, "grad_norm": 233.38462829589844, "learning_rate": 7.395910707273439e-06, "loss": 11.755, "step": 203420 }, { "epoch": 0.41094147068686193, "grad_norm": 419.86407470703125, "learning_rate": 7.395604320932281e-06, "loss": 12.0955, "step": 203430 }, { "epoch": 0.41096167131954575, "grad_norm": 139.32289123535156, "learning_rate": 7.39529792291524e-06, "loss": 15.9278, "step": 203440 }, { "epoch": 0.41098187195222957, "grad_norm": 145.92091369628906, "learning_rate": 7.394991513223806e-06, "loss": 30.9979, "step": 203450 }, { "epoch": 0.41100207258491334, "grad_norm": 341.9765625, "learning_rate": 7.394685091859474e-06, "loss": 22.7676, "step": 203460 }, { "epoch": 0.41102227321759716, "grad_norm": 581.0768432617188, "learning_rate": 7.394378658823738e-06, "loss": 12.4343, "step": 203470 }, { "epoch": 0.411042473850281, "grad_norm": 407.8321533203125, "learning_rate": 7.3940722141180885e-06, "loss": 13.9842, "step": 203480 }, { "epoch": 0.4110626744829648, "grad_norm": 830.1275024414062, "learning_rate": 7.393765757744025e-06, "loss": 14.1395, "step": 203490 }, { "epoch": 0.4110828751156486, "grad_norm": 361.3041076660156, "learning_rate": 7.393459289703035e-06, "loss": 7.2721, "step": 203500 }, { "epoch": 0.41110307574833244, "grad_norm": 262.95428466796875, "learning_rate": 7.393152809996616e-06, "loss": 19.7618, "step": 203510 }, { "epoch": 0.41112327638101626, "grad_norm": 261.2764587402344, "learning_rate": 7.392846318626259e-06, "loss": 22.2981, "step": 203520 }, { "epoch": 0.4111434770137001, "grad_norm": 610.3558349609375, "learning_rate": 7.392539815593459e-06, "loss": 15.0741, "step": 203530 }, { "epoch": 0.4111636776463839, "grad_norm": 364.45721435546875, "learning_rate": 7.392233300899712e-06, "loss": 13.466, "step": 203540 }, { "epoch": 0.4111838782790677, "grad_norm": 537.528076171875, "learning_rate": 7.391926774546509e-06, "loss": 24.3812, "step": 203550 }, { "epoch": 0.41120407891175154, "grad_norm": 1105.9019775390625, "learning_rate": 7.391620236535345e-06, "loss": 18.9171, "step": 203560 }, { "epoch": 0.41122427954443536, "grad_norm": 430.5234069824219, "learning_rate": 7.391313686867715e-06, "loss": 24.4805, "step": 203570 }, { "epoch": 0.4112444801771191, "grad_norm": 337.411376953125, "learning_rate": 7.391007125545111e-06, "loss": 15.8487, "step": 203580 }, { "epoch": 0.41126468080980294, "grad_norm": 203.77084350585938, "learning_rate": 7.390700552569028e-06, "loss": 12.0691, "step": 203590 }, { "epoch": 0.41128488144248676, "grad_norm": 363.8847351074219, "learning_rate": 7.390393967940962e-06, "loss": 25.2019, "step": 203600 }, { "epoch": 0.4113050820751706, "grad_norm": 200.1789093017578, "learning_rate": 7.390087371662403e-06, "loss": 11.106, "step": 203610 }, { "epoch": 0.4113252827078544, "grad_norm": 1082.9810791015625, "learning_rate": 7.389780763734851e-06, "loss": 28.1564, "step": 203620 }, { "epoch": 0.4113454833405382, "grad_norm": 235.7948455810547, "learning_rate": 7.3894741441597965e-06, "loss": 17.6264, "step": 203630 }, { "epoch": 0.41136568397322204, "grad_norm": 765.6566772460938, "learning_rate": 7.389167512938734e-06, "loss": 29.9497, "step": 203640 }, { "epoch": 0.41138588460590586, "grad_norm": 976.2962646484375, "learning_rate": 7.38886087007316e-06, "loss": 21.5795, "step": 203650 }, { "epoch": 0.4114060852385897, "grad_norm": 342.59521484375, "learning_rate": 7.388554215564567e-06, "loss": 18.6348, "step": 203660 }, { "epoch": 0.4114262858712735, "grad_norm": 465.6590576171875, "learning_rate": 7.388247549414451e-06, "loss": 27.5429, "step": 203670 }, { "epoch": 0.4114464865039573, "grad_norm": 959.4237670898438, "learning_rate": 7.387940871624306e-06, "loss": 13.0305, "step": 203680 }, { "epoch": 0.41146668713664114, "grad_norm": 947.7572021484375, "learning_rate": 7.387634182195626e-06, "loss": 20.6679, "step": 203690 }, { "epoch": 0.41148688776932496, "grad_norm": 185.0061798095703, "learning_rate": 7.3873274811299065e-06, "loss": 15.733, "step": 203700 }, { "epoch": 0.4115070884020087, "grad_norm": 338.54742431640625, "learning_rate": 7.387020768428644e-06, "loss": 10.8132, "step": 203710 }, { "epoch": 0.41152728903469254, "grad_norm": 238.6177215576172, "learning_rate": 7.386714044093331e-06, "loss": 36.3459, "step": 203720 }, { "epoch": 0.41154748966737636, "grad_norm": 186.13943481445312, "learning_rate": 7.3864073081254625e-06, "loss": 22.4083, "step": 203730 }, { "epoch": 0.4115676903000602, "grad_norm": 471.1614990234375, "learning_rate": 7.386100560526537e-06, "loss": 34.5945, "step": 203740 }, { "epoch": 0.411587890932744, "grad_norm": 320.0458068847656, "learning_rate": 7.3857938012980425e-06, "loss": 21.0611, "step": 203750 }, { "epoch": 0.4116080915654278, "grad_norm": 251.6160125732422, "learning_rate": 7.385487030441482e-06, "loss": 13.937, "step": 203760 }, { "epoch": 0.41162829219811164, "grad_norm": 14.12028694152832, "learning_rate": 7.385180247958343e-06, "loss": 16.3978, "step": 203770 }, { "epoch": 0.41164849283079546, "grad_norm": 354.54119873046875, "learning_rate": 7.384873453850128e-06, "loss": 14.0409, "step": 203780 }, { "epoch": 0.4116686934634793, "grad_norm": 515.3600463867188, "learning_rate": 7.3845666481183285e-06, "loss": 17.1991, "step": 203790 }, { "epoch": 0.4116888940961631, "grad_norm": 668.2739868164062, "learning_rate": 7.3842598307644396e-06, "loss": 18.2747, "step": 203800 }, { "epoch": 0.4117090947288469, "grad_norm": 690.9266357421875, "learning_rate": 7.38395300178996e-06, "loss": 17.0975, "step": 203810 }, { "epoch": 0.41172929536153074, "grad_norm": 632.6204223632812, "learning_rate": 7.383646161196379e-06, "loss": 25.168, "step": 203820 }, { "epoch": 0.41174949599421456, "grad_norm": 91.12115478515625, "learning_rate": 7.3833393089851975e-06, "loss": 36.994, "step": 203830 }, { "epoch": 0.41176969662689833, "grad_norm": 303.2684326171875, "learning_rate": 7.383032445157909e-06, "loss": 15.9723, "step": 203840 }, { "epoch": 0.41178989725958215, "grad_norm": 187.7299041748047, "learning_rate": 7.382725569716009e-06, "loss": 32.0824, "step": 203850 }, { "epoch": 0.41181009789226597, "grad_norm": 52.39496994018555, "learning_rate": 7.3824186826609945e-06, "loss": 16.3438, "step": 203860 }, { "epoch": 0.4118302985249498, "grad_norm": 155.1835479736328, "learning_rate": 7.382111783994361e-06, "loss": 26.1586, "step": 203870 }, { "epoch": 0.4118504991576336, "grad_norm": 748.326904296875, "learning_rate": 7.381804873717602e-06, "loss": 34.8786, "step": 203880 }, { "epoch": 0.41187069979031743, "grad_norm": 42.789878845214844, "learning_rate": 7.381497951832215e-06, "loss": 29.3765, "step": 203890 }, { "epoch": 0.41189090042300125, "grad_norm": 10.138392448425293, "learning_rate": 7.381191018339697e-06, "loss": 33.3606, "step": 203900 }, { "epoch": 0.41191110105568507, "grad_norm": 463.3682861328125, "learning_rate": 7.380884073241541e-06, "loss": 17.2549, "step": 203910 }, { "epoch": 0.4119313016883689, "grad_norm": 349.8173828125, "learning_rate": 7.380577116539247e-06, "loss": 17.862, "step": 203920 }, { "epoch": 0.4119515023210527, "grad_norm": 213.31219482421875, "learning_rate": 7.380270148234306e-06, "loss": 15.8991, "step": 203930 }, { "epoch": 0.41197170295373653, "grad_norm": 257.1610107421875, "learning_rate": 7.379963168328219e-06, "loss": 20.6672, "step": 203940 }, { "epoch": 0.41199190358642035, "grad_norm": 474.9005432128906, "learning_rate": 7.379656176822481e-06, "loss": 26.1415, "step": 203950 }, { "epoch": 0.41201210421910417, "grad_norm": 368.1659240722656, "learning_rate": 7.379349173718585e-06, "loss": 7.9902, "step": 203960 }, { "epoch": 0.41203230485178793, "grad_norm": 339.0737609863281, "learning_rate": 7.379042159018031e-06, "loss": 26.1943, "step": 203970 }, { "epoch": 0.41205250548447175, "grad_norm": 911.4879150390625, "learning_rate": 7.378735132722313e-06, "loss": 29.2985, "step": 203980 }, { "epoch": 0.4120727061171556, "grad_norm": 334.9006652832031, "learning_rate": 7.3784280948329315e-06, "loss": 28.405, "step": 203990 }, { "epoch": 0.4120929067498394, "grad_norm": 32.65821838378906, "learning_rate": 7.378121045351378e-06, "loss": 13.0551, "step": 204000 }, { "epoch": 0.4121131073825232, "grad_norm": 719.0232543945312, "learning_rate": 7.37781398427915e-06, "loss": 23.5942, "step": 204010 }, { "epoch": 0.41213330801520703, "grad_norm": 433.3742370605469, "learning_rate": 7.377506911617746e-06, "loss": 12.329, "step": 204020 }, { "epoch": 0.41215350864789085, "grad_norm": 441.4324645996094, "learning_rate": 7.3771998273686615e-06, "loss": 11.4904, "step": 204030 }, { "epoch": 0.4121737092805747, "grad_norm": 204.50839233398438, "learning_rate": 7.376892731533392e-06, "loss": 35.7096, "step": 204040 }, { "epoch": 0.4121939099132585, "grad_norm": 705.2774658203125, "learning_rate": 7.376585624113438e-06, "loss": 24.6124, "step": 204050 }, { "epoch": 0.4122141105459423, "grad_norm": 232.0381317138672, "learning_rate": 7.3762785051102924e-06, "loss": 12.1146, "step": 204060 }, { "epoch": 0.41223431117862613, "grad_norm": 329.8414001464844, "learning_rate": 7.375971374525454e-06, "loss": 26.7676, "step": 204070 }, { "epoch": 0.41225451181130995, "grad_norm": 147.4716033935547, "learning_rate": 7.375664232360421e-06, "loss": 53.1563, "step": 204080 }, { "epoch": 0.4122747124439937, "grad_norm": 7.299252986907959, "learning_rate": 7.375357078616685e-06, "loss": 22.3826, "step": 204090 }, { "epoch": 0.41229491307667754, "grad_norm": 28.4975528717041, "learning_rate": 7.37504991329575e-06, "loss": 7.3029, "step": 204100 }, { "epoch": 0.41231511370936136, "grad_norm": 830.8320922851562, "learning_rate": 7.374742736399108e-06, "loss": 25.8122, "step": 204110 }, { "epoch": 0.4123353143420452, "grad_norm": 170.08763122558594, "learning_rate": 7.374435547928258e-06, "loss": 25.8492, "step": 204120 }, { "epoch": 0.412355514974729, "grad_norm": 265.1971130371094, "learning_rate": 7.374128347884698e-06, "loss": 20.4346, "step": 204130 }, { "epoch": 0.4123757156074128, "grad_norm": 27.740524291992188, "learning_rate": 7.373821136269924e-06, "loss": 30.3226, "step": 204140 }, { "epoch": 0.41239591624009664, "grad_norm": 388.4397888183594, "learning_rate": 7.373513913085434e-06, "loss": 12.6602, "step": 204150 }, { "epoch": 0.41241611687278046, "grad_norm": 503.6349792480469, "learning_rate": 7.3732066783327246e-06, "loss": 30.4778, "step": 204160 }, { "epoch": 0.4124363175054643, "grad_norm": 281.2956237792969, "learning_rate": 7.372899432013294e-06, "loss": 16.2556, "step": 204170 }, { "epoch": 0.4124565181381481, "grad_norm": 784.6923828125, "learning_rate": 7.372592174128639e-06, "loss": 23.285, "step": 204180 }, { "epoch": 0.4124767187708319, "grad_norm": 442.5145263671875, "learning_rate": 7.372284904680258e-06, "loss": 24.7501, "step": 204190 }, { "epoch": 0.41249691940351574, "grad_norm": 516.041015625, "learning_rate": 7.371977623669646e-06, "loss": 38.6752, "step": 204200 }, { "epoch": 0.41251712003619956, "grad_norm": 43.92538833618164, "learning_rate": 7.371670331098306e-06, "loss": 21.5389, "step": 204210 }, { "epoch": 0.4125373206688833, "grad_norm": 530.768310546875, "learning_rate": 7.371363026967731e-06, "loss": 25.1033, "step": 204220 }, { "epoch": 0.41255752130156714, "grad_norm": 267.6069641113281, "learning_rate": 7.3710557112794205e-06, "loss": 9.8462, "step": 204230 }, { "epoch": 0.41257772193425096, "grad_norm": 301.1824035644531, "learning_rate": 7.370748384034871e-06, "loss": 18.6672, "step": 204240 }, { "epoch": 0.4125979225669348, "grad_norm": 239.70164489746094, "learning_rate": 7.370441045235581e-06, "loss": 19.2322, "step": 204250 }, { "epoch": 0.4126181231996186, "grad_norm": 341.6642761230469, "learning_rate": 7.370133694883052e-06, "loss": 15.1475, "step": 204260 }, { "epoch": 0.4126383238323024, "grad_norm": 203.58834838867188, "learning_rate": 7.369826332978776e-06, "loss": 26.8191, "step": 204270 }, { "epoch": 0.41265852446498624, "grad_norm": 358.1466979980469, "learning_rate": 7.369518959524254e-06, "loss": 18.3549, "step": 204280 }, { "epoch": 0.41267872509767006, "grad_norm": 822.9906616210938, "learning_rate": 7.369211574520985e-06, "loss": 16.4689, "step": 204290 }, { "epoch": 0.4126989257303539, "grad_norm": 6.4609479904174805, "learning_rate": 7.368904177970466e-06, "loss": 12.5418, "step": 204300 }, { "epoch": 0.4127191263630377, "grad_norm": 289.052978515625, "learning_rate": 7.368596769874194e-06, "loss": 30.5566, "step": 204310 }, { "epoch": 0.4127393269957215, "grad_norm": 598.248291015625, "learning_rate": 7.368289350233672e-06, "loss": 15.5053, "step": 204320 }, { "epoch": 0.41275952762840534, "grad_norm": 766.3134155273438, "learning_rate": 7.367981919050392e-06, "loss": 40.4611, "step": 204330 }, { "epoch": 0.41277972826108916, "grad_norm": 271.3908386230469, "learning_rate": 7.367674476325856e-06, "loss": 23.5434, "step": 204340 }, { "epoch": 0.4127999288937729, "grad_norm": 845.899169921875, "learning_rate": 7.3673670220615615e-06, "loss": 142.2028, "step": 204350 }, { "epoch": 0.41282012952645675, "grad_norm": 355.33087158203125, "learning_rate": 7.367059556259008e-06, "loss": 25.4397, "step": 204360 }, { "epoch": 0.41284033015914057, "grad_norm": 779.6798095703125, "learning_rate": 7.366752078919693e-06, "loss": 17.3048, "step": 204370 }, { "epoch": 0.4128605307918244, "grad_norm": 256.36920166015625, "learning_rate": 7.366444590045114e-06, "loss": 11.5476, "step": 204380 }, { "epoch": 0.4128807314245082, "grad_norm": 46.427059173583984, "learning_rate": 7.366137089636774e-06, "loss": 31.0527, "step": 204390 }, { "epoch": 0.412900932057192, "grad_norm": 299.9348449707031, "learning_rate": 7.365829577696166e-06, "loss": 13.5647, "step": 204400 }, { "epoch": 0.41292113268987585, "grad_norm": 702.4867553710938, "learning_rate": 7.365522054224793e-06, "loss": 24.8403, "step": 204410 }, { "epoch": 0.41294133332255967, "grad_norm": 198.4513702392578, "learning_rate": 7.365214519224153e-06, "loss": 27.4059, "step": 204420 }, { "epoch": 0.4129615339552435, "grad_norm": 616.6324462890625, "learning_rate": 7.3649069726957426e-06, "loss": 13.2356, "step": 204430 }, { "epoch": 0.4129817345879273, "grad_norm": 379.34423828125, "learning_rate": 7.364599414641064e-06, "loss": 20.9384, "step": 204440 }, { "epoch": 0.4130019352206111, "grad_norm": 588.1680908203125, "learning_rate": 7.364291845061613e-06, "loss": 14.3176, "step": 204450 }, { "epoch": 0.41302213585329495, "grad_norm": 152.29234313964844, "learning_rate": 7.3639842639588924e-06, "loss": 20.8452, "step": 204460 }, { "epoch": 0.41304233648597877, "grad_norm": 768.8727416992188, "learning_rate": 7.363676671334397e-06, "loss": 25.8743, "step": 204470 }, { "epoch": 0.41306253711866253, "grad_norm": 883.4291381835938, "learning_rate": 7.36336906718963e-06, "loss": 64.602, "step": 204480 }, { "epoch": 0.41308273775134635, "grad_norm": 394.97369384765625, "learning_rate": 7.363061451526088e-06, "loss": 32.4078, "step": 204490 }, { "epoch": 0.41310293838403017, "grad_norm": 362.8598327636719, "learning_rate": 7.362753824345271e-06, "loss": 16.4351, "step": 204500 }, { "epoch": 0.413123139016714, "grad_norm": 93.06488800048828, "learning_rate": 7.362446185648678e-06, "loss": 16.3617, "step": 204510 }, { "epoch": 0.4131433396493978, "grad_norm": 252.51776123046875, "learning_rate": 7.362138535437809e-06, "loss": 13.6326, "step": 204520 }, { "epoch": 0.41316354028208163, "grad_norm": 688.4585571289062, "learning_rate": 7.361830873714165e-06, "loss": 16.4144, "step": 204530 }, { "epoch": 0.41318374091476545, "grad_norm": 846.5159912109375, "learning_rate": 7.361523200479241e-06, "loss": 25.2794, "step": 204540 }, { "epoch": 0.41320394154744927, "grad_norm": 293.8544006347656, "learning_rate": 7.361215515734542e-06, "loss": 19.9846, "step": 204550 }, { "epoch": 0.4132241421801331, "grad_norm": 524.58203125, "learning_rate": 7.360907819481564e-06, "loss": 24.4492, "step": 204560 }, { "epoch": 0.4132443428128169, "grad_norm": 319.6919860839844, "learning_rate": 7.360600111721807e-06, "loss": 19.3178, "step": 204570 }, { "epoch": 0.41326454344550073, "grad_norm": 254.0984344482422, "learning_rate": 7.360292392456772e-06, "loss": 19.386, "step": 204580 }, { "epoch": 0.41328474407818455, "grad_norm": 647.4578857421875, "learning_rate": 7.359984661687957e-06, "loss": 19.9465, "step": 204590 }, { "epoch": 0.41330494471086837, "grad_norm": 395.6432189941406, "learning_rate": 7.3596769194168646e-06, "loss": 13.5074, "step": 204600 }, { "epoch": 0.41332514534355214, "grad_norm": 427.6363220214844, "learning_rate": 7.359369165644992e-06, "loss": 39.348, "step": 204610 }, { "epoch": 0.41334534597623596, "grad_norm": 679.1809692382812, "learning_rate": 7.359061400373841e-06, "loss": 11.2077, "step": 204620 }, { "epoch": 0.4133655466089198, "grad_norm": 305.5386047363281, "learning_rate": 7.358753623604911e-06, "loss": 14.5615, "step": 204630 }, { "epoch": 0.4133857472416036, "grad_norm": 625.0375366210938, "learning_rate": 7.358445835339702e-06, "loss": 26.0267, "step": 204640 }, { "epoch": 0.4134059478742874, "grad_norm": 71.08431243896484, "learning_rate": 7.358138035579712e-06, "loss": 18.9844, "step": 204650 }, { "epoch": 0.41342614850697124, "grad_norm": 156.96670532226562, "learning_rate": 7.357830224326445e-06, "loss": 20.113, "step": 204660 }, { "epoch": 0.41344634913965506, "grad_norm": 176.9730682373047, "learning_rate": 7.357522401581398e-06, "loss": 24.6944, "step": 204670 }, { "epoch": 0.4134665497723389, "grad_norm": 215.47763061523438, "learning_rate": 7.357214567346075e-06, "loss": 17.0557, "step": 204680 }, { "epoch": 0.4134867504050227, "grad_norm": 331.2965393066406, "learning_rate": 7.356906721621974e-06, "loss": 20.3168, "step": 204690 }, { "epoch": 0.4135069510377065, "grad_norm": 643.3457641601562, "learning_rate": 7.3565988644105926e-06, "loss": 27.3129, "step": 204700 }, { "epoch": 0.41352715167039034, "grad_norm": 25.501609802246094, "learning_rate": 7.356290995713436e-06, "loss": 27.9145, "step": 204710 }, { "epoch": 0.41354735230307416, "grad_norm": 334.7339172363281, "learning_rate": 7.355983115532004e-06, "loss": 13.3175, "step": 204720 }, { "epoch": 0.4135675529357579, "grad_norm": 157.26942443847656, "learning_rate": 7.355675223867794e-06, "loss": 20.8501, "step": 204730 }, { "epoch": 0.41358775356844174, "grad_norm": 515.1536254882812, "learning_rate": 7.35536732072231e-06, "loss": 13.5202, "step": 204740 }, { "epoch": 0.41360795420112556, "grad_norm": 380.37646484375, "learning_rate": 7.35505940609705e-06, "loss": 31.1653, "step": 204750 }, { "epoch": 0.4136281548338094, "grad_norm": 253.8275146484375, "learning_rate": 7.354751479993518e-06, "loss": 22.4868, "step": 204760 }, { "epoch": 0.4136483554664932, "grad_norm": 19.62946128845215, "learning_rate": 7.354443542413212e-06, "loss": 18.2909, "step": 204770 }, { "epoch": 0.413668556099177, "grad_norm": 424.73797607421875, "learning_rate": 7.3541355933576345e-06, "loss": 20.9551, "step": 204780 }, { "epoch": 0.41368875673186084, "grad_norm": 446.65283203125, "learning_rate": 7.353827632828283e-06, "loss": 15.9439, "step": 204790 }, { "epoch": 0.41370895736454466, "grad_norm": 501.2140197753906, "learning_rate": 7.353519660826665e-06, "loss": 16.7541, "step": 204800 }, { "epoch": 0.4137291579972285, "grad_norm": 446.319580078125, "learning_rate": 7.353211677354274e-06, "loss": 14.0886, "step": 204810 }, { "epoch": 0.4137493586299123, "grad_norm": 302.3290710449219, "learning_rate": 7.352903682412617e-06, "loss": 14.2707, "step": 204820 }, { "epoch": 0.4137695592625961, "grad_norm": 765.657958984375, "learning_rate": 7.352595676003191e-06, "loss": 18.2536, "step": 204830 }, { "epoch": 0.41378975989527994, "grad_norm": 307.65966796875, "learning_rate": 7.3522876581275e-06, "loss": 22.2144, "step": 204840 }, { "epoch": 0.41380996052796376, "grad_norm": 0.0, "learning_rate": 7.351979628787045e-06, "loss": 23.47, "step": 204850 }, { "epoch": 0.4138301611606475, "grad_norm": 699.7431030273438, "learning_rate": 7.351671587983325e-06, "loss": 12.5187, "step": 204860 }, { "epoch": 0.41385036179333134, "grad_norm": 138.17953491210938, "learning_rate": 7.351363535717845e-06, "loss": 10.1403, "step": 204870 }, { "epoch": 0.41387056242601516, "grad_norm": 425.6673583984375, "learning_rate": 7.3510554719921015e-06, "loss": 30.1279, "step": 204880 }, { "epoch": 0.413890763058699, "grad_norm": 376.48590087890625, "learning_rate": 7.350747396807601e-06, "loss": 14.3699, "step": 204890 }, { "epoch": 0.4139109636913828, "grad_norm": 459.9209899902344, "learning_rate": 7.350439310165842e-06, "loss": 13.465, "step": 204900 }, { "epoch": 0.4139311643240666, "grad_norm": 226.81353759765625, "learning_rate": 7.350131212068328e-06, "loss": 21.4474, "step": 204910 }, { "epoch": 0.41395136495675044, "grad_norm": 971.2474975585938, "learning_rate": 7.349823102516558e-06, "loss": 21.9557, "step": 204920 }, { "epoch": 0.41397156558943426, "grad_norm": 251.871826171875, "learning_rate": 7.349514981512036e-06, "loss": 17.9313, "step": 204930 }, { "epoch": 0.4139917662221181, "grad_norm": 283.456787109375, "learning_rate": 7.349206849056263e-06, "loss": 28.0992, "step": 204940 }, { "epoch": 0.4140119668548019, "grad_norm": 262.2191467285156, "learning_rate": 7.34889870515074e-06, "loss": 16.7783, "step": 204950 }, { "epoch": 0.4140321674874857, "grad_norm": 489.7118835449219, "learning_rate": 7.34859054979697e-06, "loss": 48.0792, "step": 204960 }, { "epoch": 0.41405236812016954, "grad_norm": 160.91929626464844, "learning_rate": 7.348282382996454e-06, "loss": 29.146, "step": 204970 }, { "epoch": 0.41407256875285336, "grad_norm": 370.2843933105469, "learning_rate": 7.3479742047506955e-06, "loss": 29.2165, "step": 204980 }, { "epoch": 0.41409276938553713, "grad_norm": 593.0636596679688, "learning_rate": 7.347666015061195e-06, "loss": 27.1644, "step": 204990 }, { "epoch": 0.41411297001822095, "grad_norm": 196.46533203125, "learning_rate": 7.347357813929455e-06, "loss": 21.9118, "step": 205000 }, { "epoch": 0.41413317065090477, "grad_norm": 634.07275390625, "learning_rate": 7.347049601356977e-06, "loss": 28.3629, "step": 205010 }, { "epoch": 0.4141533712835886, "grad_norm": 368.7333068847656, "learning_rate": 7.346741377345264e-06, "loss": 25.4299, "step": 205020 }, { "epoch": 0.4141735719162724, "grad_norm": 1.2591146230697632, "learning_rate": 7.34643314189582e-06, "loss": 12.8976, "step": 205030 }, { "epoch": 0.41419377254895623, "grad_norm": 340.10321044921875, "learning_rate": 7.346124895010144e-06, "loss": 12.9368, "step": 205040 }, { "epoch": 0.41421397318164005, "grad_norm": 436.80450439453125, "learning_rate": 7.345816636689741e-06, "loss": 23.3278, "step": 205050 }, { "epoch": 0.41423417381432387, "grad_norm": 202.7328338623047, "learning_rate": 7.345508366936111e-06, "loss": 12.795, "step": 205060 }, { "epoch": 0.4142543744470077, "grad_norm": 275.9850158691406, "learning_rate": 7.345200085750758e-06, "loss": 9.8136, "step": 205070 }, { "epoch": 0.4142745750796915, "grad_norm": 193.0493927001953, "learning_rate": 7.344891793135184e-06, "loss": 49.3537, "step": 205080 }, { "epoch": 0.41429477571237533, "grad_norm": 431.5648498535156, "learning_rate": 7.344583489090893e-06, "loss": 18.3269, "step": 205090 }, { "epoch": 0.41431497634505915, "grad_norm": 426.5965576171875, "learning_rate": 7.344275173619385e-06, "loss": 12.8332, "step": 205100 }, { "epoch": 0.41433517697774297, "grad_norm": 283.1705322265625, "learning_rate": 7.343966846722164e-06, "loss": 32.939, "step": 205110 }, { "epoch": 0.41435537761042673, "grad_norm": 325.40460205078125, "learning_rate": 7.343658508400734e-06, "loss": 15.246, "step": 205120 }, { "epoch": 0.41437557824311055, "grad_norm": 63.674461364746094, "learning_rate": 7.343350158656596e-06, "loss": 27.4282, "step": 205130 }, { "epoch": 0.4143957788757944, "grad_norm": 145.14161682128906, "learning_rate": 7.343041797491253e-06, "loss": 13.7614, "step": 205140 }, { "epoch": 0.4144159795084782, "grad_norm": 712.3728637695312, "learning_rate": 7.3427334249062085e-06, "loss": 36.2831, "step": 205150 }, { "epoch": 0.414436180141162, "grad_norm": 161.9210968017578, "learning_rate": 7.342425040902967e-06, "loss": 18.0161, "step": 205160 }, { "epoch": 0.41445638077384583, "grad_norm": 1166.4088134765625, "learning_rate": 7.3421166454830295e-06, "loss": 28.6236, "step": 205170 }, { "epoch": 0.41447658140652965, "grad_norm": 690.5027465820312, "learning_rate": 7.341808238647898e-06, "loss": 25.2683, "step": 205180 }, { "epoch": 0.4144967820392135, "grad_norm": 449.7722473144531, "learning_rate": 7.3414998203990784e-06, "loss": 15.3232, "step": 205190 }, { "epoch": 0.4145169826718973, "grad_norm": 261.26495361328125, "learning_rate": 7.341191390738073e-06, "loss": 18.6568, "step": 205200 }, { "epoch": 0.4145371833045811, "grad_norm": 285.2146301269531, "learning_rate": 7.340882949666385e-06, "loss": 18.5354, "step": 205210 }, { "epoch": 0.41455738393726493, "grad_norm": 307.7693786621094, "learning_rate": 7.340574497185516e-06, "loss": 18.4317, "step": 205220 }, { "epoch": 0.41457758456994875, "grad_norm": 848.750244140625, "learning_rate": 7.340266033296972e-06, "loss": 26.6416, "step": 205230 }, { "epoch": 0.4145977852026326, "grad_norm": 227.63143920898438, "learning_rate": 7.339957558002254e-06, "loss": 24.8482, "step": 205240 }, { "epoch": 0.41461798583531634, "grad_norm": 1408.728271484375, "learning_rate": 7.3396490713028674e-06, "loss": 41.6129, "step": 205250 }, { "epoch": 0.41463818646800016, "grad_norm": 381.8595886230469, "learning_rate": 7.339340573200314e-06, "loss": 17.6669, "step": 205260 }, { "epoch": 0.414658387100684, "grad_norm": 645.9013061523438, "learning_rate": 7.339032063696101e-06, "loss": 17.5308, "step": 205270 }, { "epoch": 0.4146785877333678, "grad_norm": 701.4525756835938, "learning_rate": 7.3387235427917266e-06, "loss": 15.9599, "step": 205280 }, { "epoch": 0.4146987883660516, "grad_norm": 317.63165283203125, "learning_rate": 7.338415010488699e-06, "loss": 16.3648, "step": 205290 }, { "epoch": 0.41471898899873544, "grad_norm": 39.73927307128906, "learning_rate": 7.33810646678852e-06, "loss": 12.4666, "step": 205300 }, { "epoch": 0.41473918963141926, "grad_norm": 380.56182861328125, "learning_rate": 7.3377979116926925e-06, "loss": 15.8573, "step": 205310 }, { "epoch": 0.4147593902641031, "grad_norm": 866.0729370117188, "learning_rate": 7.337489345202723e-06, "loss": 23.2634, "step": 205320 }, { "epoch": 0.4147795908967869, "grad_norm": 888.8053588867188, "learning_rate": 7.337180767320113e-06, "loss": 24.4546, "step": 205330 }, { "epoch": 0.4147997915294707, "grad_norm": 651.4313354492188, "learning_rate": 7.336872178046368e-06, "loss": 38.669, "step": 205340 }, { "epoch": 0.41481999216215454, "grad_norm": 531.4393310546875, "learning_rate": 7.33656357738299e-06, "loss": 30.526, "step": 205350 }, { "epoch": 0.41484019279483836, "grad_norm": 167.88613891601562, "learning_rate": 7.336254965331486e-06, "loss": 11.7034, "step": 205360 }, { "epoch": 0.4148603934275221, "grad_norm": 653.5435180664062, "learning_rate": 7.335946341893359e-06, "loss": 24.5979, "step": 205370 }, { "epoch": 0.41488059406020594, "grad_norm": 614.8929443359375, "learning_rate": 7.335637707070111e-06, "loss": 44.128, "step": 205380 }, { "epoch": 0.41490079469288976, "grad_norm": 488.77435302734375, "learning_rate": 7.33532906086325e-06, "loss": 10.9534, "step": 205390 }, { "epoch": 0.4149209953255736, "grad_norm": 341.00799560546875, "learning_rate": 7.335020403274277e-06, "loss": 19.0022, "step": 205400 }, { "epoch": 0.4149411959582574, "grad_norm": 342.5754699707031, "learning_rate": 7.334711734304698e-06, "loss": 31.824, "step": 205410 }, { "epoch": 0.4149613965909412, "grad_norm": 273.30535888671875, "learning_rate": 7.334403053956018e-06, "loss": 20.4858, "step": 205420 }, { "epoch": 0.41498159722362504, "grad_norm": 287.59893798828125, "learning_rate": 7.334094362229741e-06, "loss": 14.5842, "step": 205430 }, { "epoch": 0.41500179785630886, "grad_norm": 404.0746154785156, "learning_rate": 7.333785659127371e-06, "loss": 15.8078, "step": 205440 }, { "epoch": 0.4150219984889927, "grad_norm": 263.71044921875, "learning_rate": 7.333476944650411e-06, "loss": 20.3386, "step": 205450 }, { "epoch": 0.4150421991216765, "grad_norm": 399.9277038574219, "learning_rate": 7.333168218800369e-06, "loss": 18.7517, "step": 205460 }, { "epoch": 0.4150623997543603, "grad_norm": 364.37640380859375, "learning_rate": 7.332859481578747e-06, "loss": 22.8713, "step": 205470 }, { "epoch": 0.41508260038704414, "grad_norm": 348.4693603515625, "learning_rate": 7.332550732987051e-06, "loss": 21.0402, "step": 205480 }, { "epoch": 0.41510280101972796, "grad_norm": 477.7831115722656, "learning_rate": 7.332241973026786e-06, "loss": 20.8785, "step": 205490 }, { "epoch": 0.4151230016524117, "grad_norm": 327.65472412109375, "learning_rate": 7.3319332016994575e-06, "loss": 12.5089, "step": 205500 }, { "epoch": 0.41514320228509555, "grad_norm": 609.080078125, "learning_rate": 7.331624419006568e-06, "loss": 16.9733, "step": 205510 }, { "epoch": 0.41516340291777937, "grad_norm": 362.18572998046875, "learning_rate": 7.331315624949624e-06, "loss": 53.9419, "step": 205520 }, { "epoch": 0.4151836035504632, "grad_norm": 1026.9534912109375, "learning_rate": 7.33100681953013e-06, "loss": 15.2135, "step": 205530 }, { "epoch": 0.415203804183147, "grad_norm": 483.15625, "learning_rate": 7.330698002749593e-06, "loss": 13.6422, "step": 205540 }, { "epoch": 0.4152240048158308, "grad_norm": 187.79249572753906, "learning_rate": 7.330389174609516e-06, "loss": 13.0211, "step": 205550 }, { "epoch": 0.41524420544851465, "grad_norm": 716.1710815429688, "learning_rate": 7.330080335111405e-06, "loss": 14.7922, "step": 205560 }, { "epoch": 0.41526440608119847, "grad_norm": 1296.7987060546875, "learning_rate": 7.329771484256764e-06, "loss": 16.539, "step": 205570 }, { "epoch": 0.4152846067138823, "grad_norm": 603.66357421875, "learning_rate": 7.3294626220470984e-06, "loss": 14.3115, "step": 205580 }, { "epoch": 0.4153048073465661, "grad_norm": 154.29220581054688, "learning_rate": 7.329153748483918e-06, "loss": 17.6815, "step": 205590 }, { "epoch": 0.4153250079792499, "grad_norm": 451.9192199707031, "learning_rate": 7.3288448635687215e-06, "loss": 21.3467, "step": 205600 }, { "epoch": 0.41534520861193375, "grad_norm": 376.01995849609375, "learning_rate": 7.32853596730302e-06, "loss": 22.9711, "step": 205610 }, { "epoch": 0.41536540924461757, "grad_norm": 186.4207763671875, "learning_rate": 7.3282270596883155e-06, "loss": 21.0603, "step": 205620 }, { "epoch": 0.41538560987730133, "grad_norm": 544.6901245117188, "learning_rate": 7.327918140726115e-06, "loss": 14.387, "step": 205630 }, { "epoch": 0.41540581050998515, "grad_norm": 295.2100524902344, "learning_rate": 7.327609210417923e-06, "loss": 22.3907, "step": 205640 }, { "epoch": 0.41542601114266897, "grad_norm": 300.23260498046875, "learning_rate": 7.327300268765246e-06, "loss": 17.4491, "step": 205650 }, { "epoch": 0.4154462117753528, "grad_norm": 274.13958740234375, "learning_rate": 7.3269913157695915e-06, "loss": 37.3402, "step": 205660 }, { "epoch": 0.4154664124080366, "grad_norm": 464.6728515625, "learning_rate": 7.326682351432462e-06, "loss": 31.4462, "step": 205670 }, { "epoch": 0.41548661304072043, "grad_norm": 350.0167541503906, "learning_rate": 7.326373375755365e-06, "loss": 15.5198, "step": 205680 }, { "epoch": 0.41550681367340425, "grad_norm": 514.5641479492188, "learning_rate": 7.326064388739806e-06, "loss": 21.8145, "step": 205690 }, { "epoch": 0.41552701430608807, "grad_norm": 393.8197937011719, "learning_rate": 7.325755390387293e-06, "loss": 21.2698, "step": 205700 }, { "epoch": 0.4155472149387719, "grad_norm": 215.42408752441406, "learning_rate": 7.325446380699329e-06, "loss": 31.1187, "step": 205710 }, { "epoch": 0.4155674155714557, "grad_norm": 161.3758544921875, "learning_rate": 7.3251373596774214e-06, "loss": 16.2595, "step": 205720 }, { "epoch": 0.41558761620413953, "grad_norm": 538.1591186523438, "learning_rate": 7.324828327323077e-06, "loss": 16.8449, "step": 205730 }, { "epoch": 0.41560781683682335, "grad_norm": 828.3040161132812, "learning_rate": 7.3245192836378e-06, "loss": 44.7471, "step": 205740 }, { "epoch": 0.41562801746950717, "grad_norm": 438.98577880859375, "learning_rate": 7.3242102286231e-06, "loss": 11.2179, "step": 205750 }, { "epoch": 0.41564821810219094, "grad_norm": 98.83910369873047, "learning_rate": 7.323901162280478e-06, "loss": 12.6262, "step": 205760 }, { "epoch": 0.41566841873487476, "grad_norm": 450.12481689453125, "learning_rate": 7.323592084611446e-06, "loss": 19.8717, "step": 205770 }, { "epoch": 0.4156886193675586, "grad_norm": 304.15716552734375, "learning_rate": 7.3232829956175074e-06, "loss": 13.5696, "step": 205780 }, { "epoch": 0.4157088200002424, "grad_norm": 448.3150939941406, "learning_rate": 7.32297389530017e-06, "loss": 25.9163, "step": 205790 }, { "epoch": 0.4157290206329262, "grad_norm": 359.23626708984375, "learning_rate": 7.32266478366094e-06, "loss": 12.7628, "step": 205800 }, { "epoch": 0.41574922126561004, "grad_norm": 876.4931030273438, "learning_rate": 7.322355660701321e-06, "loss": 22.1785, "step": 205810 }, { "epoch": 0.41576942189829386, "grad_norm": 443.26605224609375, "learning_rate": 7.322046526422824e-06, "loss": 17.2462, "step": 205820 }, { "epoch": 0.4157896225309777, "grad_norm": 388.3150329589844, "learning_rate": 7.321737380826954e-06, "loss": 22.4893, "step": 205830 }, { "epoch": 0.4158098231636615, "grad_norm": 145.12567138671875, "learning_rate": 7.321428223915217e-06, "loss": 32.9967, "step": 205840 }, { "epoch": 0.4158300237963453, "grad_norm": 71.86688995361328, "learning_rate": 7.321119055689121e-06, "loss": 17.2356, "step": 205850 }, { "epoch": 0.41585022442902914, "grad_norm": 348.533203125, "learning_rate": 7.3208098761501714e-06, "loss": 20.6987, "step": 205860 }, { "epoch": 0.41587042506171296, "grad_norm": 816.877685546875, "learning_rate": 7.320500685299876e-06, "loss": 21.9663, "step": 205870 }, { "epoch": 0.4158906256943968, "grad_norm": 206.76580810546875, "learning_rate": 7.320191483139742e-06, "loss": 22.9791, "step": 205880 }, { "epoch": 0.41591082632708054, "grad_norm": 96.70581817626953, "learning_rate": 7.319882269671277e-06, "loss": 20.0063, "step": 205890 }, { "epoch": 0.41593102695976436, "grad_norm": 191.33203125, "learning_rate": 7.319573044895986e-06, "loss": 18.062, "step": 205900 }, { "epoch": 0.4159512275924482, "grad_norm": 419.3578186035156, "learning_rate": 7.319263808815378e-06, "loss": 19.1683, "step": 205910 }, { "epoch": 0.415971428225132, "grad_norm": 457.2339782714844, "learning_rate": 7.318954561430959e-06, "loss": 13.6817, "step": 205920 }, { "epoch": 0.4159916288578158, "grad_norm": 1044.152587890625, "learning_rate": 7.318645302744237e-06, "loss": 24.3602, "step": 205930 }, { "epoch": 0.41601182949049964, "grad_norm": 928.43212890625, "learning_rate": 7.318336032756717e-06, "loss": 26.4416, "step": 205940 }, { "epoch": 0.41603203012318346, "grad_norm": 520.3009033203125, "learning_rate": 7.318026751469912e-06, "loss": 10.3366, "step": 205950 }, { "epoch": 0.4160522307558673, "grad_norm": 283.0570068359375, "learning_rate": 7.317717458885324e-06, "loss": 21.4138, "step": 205960 }, { "epoch": 0.4160724313885511, "grad_norm": 201.0565643310547, "learning_rate": 7.317408155004462e-06, "loss": 22.4877, "step": 205970 }, { "epoch": 0.4160926320212349, "grad_norm": 93.96663665771484, "learning_rate": 7.317098839828835e-06, "loss": 31.8869, "step": 205980 }, { "epoch": 0.41611283265391874, "grad_norm": 414.8111572265625, "learning_rate": 7.316789513359948e-06, "loss": 23.2044, "step": 205990 }, { "epoch": 0.41613303328660256, "grad_norm": 429.84356689453125, "learning_rate": 7.31648017559931e-06, "loss": 17.882, "step": 206000 }, { "epoch": 0.4161532339192863, "grad_norm": 496.6199645996094, "learning_rate": 7.316170826548428e-06, "loss": 23.4283, "step": 206010 }, { "epoch": 0.41617343455197015, "grad_norm": 97.62599182128906, "learning_rate": 7.315861466208811e-06, "loss": 14.062, "step": 206020 }, { "epoch": 0.41619363518465397, "grad_norm": 98.52935028076172, "learning_rate": 7.315552094581966e-06, "loss": 20.5307, "step": 206030 }, { "epoch": 0.4162138358173378, "grad_norm": 410.72332763671875, "learning_rate": 7.315242711669401e-06, "loss": 23.0629, "step": 206040 }, { "epoch": 0.4162340364500216, "grad_norm": 222.60693359375, "learning_rate": 7.3149333174726246e-06, "loss": 10.9954, "step": 206050 }, { "epoch": 0.4162542370827054, "grad_norm": 446.1466979980469, "learning_rate": 7.314623911993143e-06, "loss": 27.7812, "step": 206060 }, { "epoch": 0.41627443771538924, "grad_norm": 0.9989863634109497, "learning_rate": 7.314314495232467e-06, "loss": 22.6591, "step": 206070 }, { "epoch": 0.41629463834807306, "grad_norm": 431.9134216308594, "learning_rate": 7.314005067192099e-06, "loss": 27.4647, "step": 206080 }, { "epoch": 0.4163148389807569, "grad_norm": 1403.6568603515625, "learning_rate": 7.313695627873554e-06, "loss": 24.585, "step": 206090 }, { "epoch": 0.4163350396134407, "grad_norm": 194.90769958496094, "learning_rate": 7.313386177278335e-06, "loss": 23.6103, "step": 206100 }, { "epoch": 0.4163552402461245, "grad_norm": 306.5736389160156, "learning_rate": 7.3130767154079555e-06, "loss": 32.0867, "step": 206110 }, { "epoch": 0.41637544087880834, "grad_norm": 298.9986877441406, "learning_rate": 7.312767242263919e-06, "loss": 12.8305, "step": 206120 }, { "epoch": 0.41639564151149216, "grad_norm": 477.2821960449219, "learning_rate": 7.312457757847734e-06, "loss": 16.3254, "step": 206130 }, { "epoch": 0.41641584214417593, "grad_norm": 595.7302856445312, "learning_rate": 7.312148262160913e-06, "loss": 22.1576, "step": 206140 }, { "epoch": 0.41643604277685975, "grad_norm": 911.7860107421875, "learning_rate": 7.31183875520496e-06, "loss": 8.1722, "step": 206150 }, { "epoch": 0.41645624340954357, "grad_norm": 777.1712036132812, "learning_rate": 7.311529236981385e-06, "loss": 64.3875, "step": 206160 }, { "epoch": 0.4164764440422274, "grad_norm": 711.8444213867188, "learning_rate": 7.3112197074916975e-06, "loss": 22.7231, "step": 206170 }, { "epoch": 0.4164966446749112, "grad_norm": 364.54669189453125, "learning_rate": 7.310910166737406e-06, "loss": 20.7396, "step": 206180 }, { "epoch": 0.41651684530759503, "grad_norm": 412.8030090332031, "learning_rate": 7.3106006147200185e-06, "loss": 20.6863, "step": 206190 }, { "epoch": 0.41653704594027885, "grad_norm": 1091.5716552734375, "learning_rate": 7.310291051441044e-06, "loss": 26.541, "step": 206200 }, { "epoch": 0.41655724657296267, "grad_norm": 249.6706085205078, "learning_rate": 7.30998147690199e-06, "loss": 16.1993, "step": 206210 }, { "epoch": 0.4165774472056465, "grad_norm": 93.3351821899414, "learning_rate": 7.3096718911043675e-06, "loss": 14.1391, "step": 206220 }, { "epoch": 0.4165976478383303, "grad_norm": 278.6354675292969, "learning_rate": 7.309362294049683e-06, "loss": 25.293, "step": 206230 }, { "epoch": 0.41661784847101413, "grad_norm": 215.47840881347656, "learning_rate": 7.309052685739448e-06, "loss": 16.4743, "step": 206240 }, { "epoch": 0.41663804910369795, "grad_norm": 186.58836364746094, "learning_rate": 7.308743066175172e-06, "loss": 25.0049, "step": 206250 }, { "epoch": 0.41665824973638177, "grad_norm": 348.30169677734375, "learning_rate": 7.308433435358357e-06, "loss": 17.7779, "step": 206260 }, { "epoch": 0.41667845036906553, "grad_norm": 296.0768737792969, "learning_rate": 7.308123793290523e-06, "loss": 20.9964, "step": 206270 }, { "epoch": 0.41669865100174935, "grad_norm": 0.0, "learning_rate": 7.307814139973171e-06, "loss": 8.9119, "step": 206280 }, { "epoch": 0.4167188516344332, "grad_norm": 364.7955322265625, "learning_rate": 7.307504475407813e-06, "loss": 30.5145, "step": 206290 }, { "epoch": 0.416739052267117, "grad_norm": 131.2972869873047, "learning_rate": 7.307194799595958e-06, "loss": 22.1773, "step": 206300 }, { "epoch": 0.4167592528998008, "grad_norm": 510.60003662109375, "learning_rate": 7.306885112539116e-06, "loss": 21.3477, "step": 206310 }, { "epoch": 0.41677945353248463, "grad_norm": 113.77836608886719, "learning_rate": 7.306575414238794e-06, "loss": 16.8134, "step": 206320 }, { "epoch": 0.41679965416516845, "grad_norm": 958.2135009765625, "learning_rate": 7.306265704696505e-06, "loss": 32.4911, "step": 206330 }, { "epoch": 0.4168198547978523, "grad_norm": 89.57183074951172, "learning_rate": 7.305955983913756e-06, "loss": 20.0299, "step": 206340 }, { "epoch": 0.4168400554305361, "grad_norm": 273.4055480957031, "learning_rate": 7.305646251892058e-06, "loss": 13.8955, "step": 206350 }, { "epoch": 0.4168602560632199, "grad_norm": 126.69972229003906, "learning_rate": 7.30533650863292e-06, "loss": 17.1203, "step": 206360 }, { "epoch": 0.41688045669590373, "grad_norm": 609.8422241210938, "learning_rate": 7.305026754137849e-06, "loss": 14.8693, "step": 206370 }, { "epoch": 0.41690065732858755, "grad_norm": 927.921875, "learning_rate": 7.304716988408359e-06, "loss": 20.7629, "step": 206380 }, { "epoch": 0.4169208579612714, "grad_norm": 590.805419921875, "learning_rate": 7.3044072114459585e-06, "loss": 28.4767, "step": 206390 }, { "epoch": 0.41694105859395514, "grad_norm": 683.1563720703125, "learning_rate": 7.3040974232521555e-06, "loss": 20.3865, "step": 206400 }, { "epoch": 0.41696125922663896, "grad_norm": 528.8218994140625, "learning_rate": 7.3037876238284625e-06, "loss": 12.0202, "step": 206410 }, { "epoch": 0.4169814598593228, "grad_norm": 326.8551940917969, "learning_rate": 7.303477813176385e-06, "loss": 23.3848, "step": 206420 }, { "epoch": 0.4170016604920066, "grad_norm": 75.90850067138672, "learning_rate": 7.303167991297439e-06, "loss": 21.5874, "step": 206430 }, { "epoch": 0.4170218611246904, "grad_norm": 499.3680419921875, "learning_rate": 7.302858158193131e-06, "loss": 24.6299, "step": 206440 }, { "epoch": 0.41704206175737424, "grad_norm": 365.0440979003906, "learning_rate": 7.302548313864971e-06, "loss": 13.7051, "step": 206450 }, { "epoch": 0.41706226239005806, "grad_norm": 277.8636169433594, "learning_rate": 7.30223845831447e-06, "loss": 14.326, "step": 206460 }, { "epoch": 0.4170824630227419, "grad_norm": 320.0940246582031, "learning_rate": 7.301928591543137e-06, "loss": 22.6345, "step": 206470 }, { "epoch": 0.4171026636554257, "grad_norm": 675.6278686523438, "learning_rate": 7.301618713552485e-06, "loss": 18.9414, "step": 206480 }, { "epoch": 0.4171228642881095, "grad_norm": 519.8043823242188, "learning_rate": 7.301308824344022e-06, "loss": 22.9404, "step": 206490 }, { "epoch": 0.41714306492079334, "grad_norm": 419.30902099609375, "learning_rate": 7.300998923919259e-06, "loss": 25.8645, "step": 206500 }, { "epoch": 0.41716326555347716, "grad_norm": 125.47054290771484, "learning_rate": 7.300689012279706e-06, "loss": 21.1088, "step": 206510 }, { "epoch": 0.4171834661861609, "grad_norm": 140.15902709960938, "learning_rate": 7.300379089426874e-06, "loss": 22.0836, "step": 206520 }, { "epoch": 0.41720366681884474, "grad_norm": 225.854248046875, "learning_rate": 7.300069155362272e-06, "loss": 25.7466, "step": 206530 }, { "epoch": 0.41722386745152856, "grad_norm": 439.0897216796875, "learning_rate": 7.299759210087415e-06, "loss": 14.7174, "step": 206540 }, { "epoch": 0.4172440680842124, "grad_norm": 538.1312866210938, "learning_rate": 7.299449253603807e-06, "loss": 30.5284, "step": 206550 }, { "epoch": 0.4172642687168962, "grad_norm": 41.05196762084961, "learning_rate": 7.299139285912965e-06, "loss": 14.0157, "step": 206560 }, { "epoch": 0.41728446934958, "grad_norm": 579.4649047851562, "learning_rate": 7.298829307016395e-06, "loss": 11.66, "step": 206570 }, { "epoch": 0.41730466998226384, "grad_norm": 91.174560546875, "learning_rate": 7.298519316915611e-06, "loss": 34.2421, "step": 206580 }, { "epoch": 0.41732487061494766, "grad_norm": 239.1872100830078, "learning_rate": 7.298209315612123e-06, "loss": 20.6073, "step": 206590 }, { "epoch": 0.4173450712476315, "grad_norm": 366.6288146972656, "learning_rate": 7.297899303107441e-06, "loss": 24.0675, "step": 206600 }, { "epoch": 0.4173652718803153, "grad_norm": 1000.869384765625, "learning_rate": 7.297589279403076e-06, "loss": 27.9361, "step": 206610 }, { "epoch": 0.4173854725129991, "grad_norm": 326.3446044921875, "learning_rate": 7.297279244500539e-06, "loss": 23.5082, "step": 206620 }, { "epoch": 0.41740567314568294, "grad_norm": 209.549560546875, "learning_rate": 7.296969198401342e-06, "loss": 14.9825, "step": 206630 }, { "epoch": 0.41742587377836676, "grad_norm": 510.73297119140625, "learning_rate": 7.296659141106996e-06, "loss": 31.6884, "step": 206640 }, { "epoch": 0.4174460744110505, "grad_norm": 230.9799041748047, "learning_rate": 7.2963490726190134e-06, "loss": 10.3942, "step": 206650 }, { "epoch": 0.41746627504373435, "grad_norm": 722.6381225585938, "learning_rate": 7.296038992938902e-06, "loss": 29.7932, "step": 206660 }, { "epoch": 0.41748647567641817, "grad_norm": 743.6947631835938, "learning_rate": 7.2957289020681755e-06, "loss": 43.5345, "step": 206670 }, { "epoch": 0.417506676309102, "grad_norm": 614.7973022460938, "learning_rate": 7.295418800008345e-06, "loss": 31.6349, "step": 206680 }, { "epoch": 0.4175268769417858, "grad_norm": 286.641357421875, "learning_rate": 7.295108686760921e-06, "loss": 21.4192, "step": 206690 }, { "epoch": 0.4175470775744696, "grad_norm": 567.8367919921875, "learning_rate": 7.294798562327417e-06, "loss": 17.8248, "step": 206700 }, { "epoch": 0.41756727820715345, "grad_norm": 883.4342651367188, "learning_rate": 7.2944884267093405e-06, "loss": 19.8862, "step": 206710 }, { "epoch": 0.41758747883983727, "grad_norm": 72.49630737304688, "learning_rate": 7.294178279908208e-06, "loss": 45.195, "step": 206720 }, { "epoch": 0.4176076794725211, "grad_norm": 236.8124237060547, "learning_rate": 7.293868121925528e-06, "loss": 15.5517, "step": 206730 }, { "epoch": 0.4176278801052049, "grad_norm": 251.05450439453125, "learning_rate": 7.293557952762813e-06, "loss": 15.2094, "step": 206740 }, { "epoch": 0.4176480807378887, "grad_norm": 461.9261169433594, "learning_rate": 7.293247772421577e-06, "loss": 17.8967, "step": 206750 }, { "epoch": 0.41766828137057255, "grad_norm": 335.471923828125, "learning_rate": 7.292937580903326e-06, "loss": 14.177, "step": 206760 }, { "epoch": 0.41768848200325637, "grad_norm": 17.844736099243164, "learning_rate": 7.2926273782095766e-06, "loss": 24.8497, "step": 206770 }, { "epoch": 0.41770868263594013, "grad_norm": 225.99588012695312, "learning_rate": 7.29231716434184e-06, "loss": 18.3583, "step": 206780 }, { "epoch": 0.41772888326862395, "grad_norm": 281.2262878417969, "learning_rate": 7.292006939301627e-06, "loss": 28.0654, "step": 206790 }, { "epoch": 0.41774908390130777, "grad_norm": 174.10264587402344, "learning_rate": 7.291696703090449e-06, "loss": 12.1054, "step": 206800 }, { "epoch": 0.4177692845339916, "grad_norm": 354.1954650878906, "learning_rate": 7.291386455709823e-06, "loss": 18.5032, "step": 206810 }, { "epoch": 0.4177894851666754, "grad_norm": 581.7930908203125, "learning_rate": 7.291076197161253e-06, "loss": 21.4246, "step": 206820 }, { "epoch": 0.41780968579935923, "grad_norm": 353.8553771972656, "learning_rate": 7.290765927446258e-06, "loss": 25.3988, "step": 206830 }, { "epoch": 0.41782988643204305, "grad_norm": 274.6319274902344, "learning_rate": 7.290455646566347e-06, "loss": 31.9479, "step": 206840 }, { "epoch": 0.41785008706472687, "grad_norm": 414.32049560546875, "learning_rate": 7.2901453545230325e-06, "loss": 8.159, "step": 206850 }, { "epoch": 0.4178702876974107, "grad_norm": 627.198974609375, "learning_rate": 7.289835051317828e-06, "loss": 14.0879, "step": 206860 }, { "epoch": 0.4178904883300945, "grad_norm": 970.3900146484375, "learning_rate": 7.289524736952245e-06, "loss": 18.2681, "step": 206870 }, { "epoch": 0.41791068896277833, "grad_norm": 439.5081481933594, "learning_rate": 7.289214411427796e-06, "loss": 15.3993, "step": 206880 }, { "epoch": 0.41793088959546215, "grad_norm": 1673.9371337890625, "learning_rate": 7.288904074745994e-06, "loss": 25.6307, "step": 206890 }, { "epoch": 0.41795109022814597, "grad_norm": 235.52084350585938, "learning_rate": 7.288593726908351e-06, "loss": 15.1453, "step": 206900 }, { "epoch": 0.41797129086082974, "grad_norm": 480.03887939453125, "learning_rate": 7.28828336791638e-06, "loss": 28.3371, "step": 206910 }, { "epoch": 0.41799149149351356, "grad_norm": 323.4153747558594, "learning_rate": 7.287972997771592e-06, "loss": 20.6894, "step": 206920 }, { "epoch": 0.4180116921261974, "grad_norm": 28.118074417114258, "learning_rate": 7.287662616475504e-06, "loss": 19.9836, "step": 206930 }, { "epoch": 0.4180318927588812, "grad_norm": 639.7236938476562, "learning_rate": 7.287352224029623e-06, "loss": 37.9913, "step": 206940 }, { "epoch": 0.418052093391565, "grad_norm": 411.906494140625, "learning_rate": 7.287041820435465e-06, "loss": 30.1956, "step": 206950 }, { "epoch": 0.41807229402424884, "grad_norm": 184.6607208251953, "learning_rate": 7.286731405694544e-06, "loss": 4.7745, "step": 206960 }, { "epoch": 0.41809249465693266, "grad_norm": 59.70170211791992, "learning_rate": 7.28642097980837e-06, "loss": 24.4321, "step": 206970 }, { "epoch": 0.4181126952896165, "grad_norm": 468.37042236328125, "learning_rate": 7.286110542778459e-06, "loss": 25.9108, "step": 206980 }, { "epoch": 0.4181328959223003, "grad_norm": 249.84591674804688, "learning_rate": 7.285800094606321e-06, "loss": 11.4053, "step": 206990 }, { "epoch": 0.4181530965549841, "grad_norm": 310.65936279296875, "learning_rate": 7.285489635293472e-06, "loss": 24.1084, "step": 207000 }, { "epoch": 0.41817329718766794, "grad_norm": 688.833984375, "learning_rate": 7.2851791648414226e-06, "loss": 25.7734, "step": 207010 }, { "epoch": 0.41819349782035176, "grad_norm": 172.11061096191406, "learning_rate": 7.284868683251688e-06, "loss": 16.902, "step": 207020 }, { "epoch": 0.4182136984530356, "grad_norm": 112.6848373413086, "learning_rate": 7.284558190525779e-06, "loss": 16.9506, "step": 207030 }, { "epoch": 0.41823389908571934, "grad_norm": 275.7078857421875, "learning_rate": 7.284247686665212e-06, "loss": 16.2917, "step": 207040 }, { "epoch": 0.41825409971840316, "grad_norm": 659.5950317382812, "learning_rate": 7.283937171671498e-06, "loss": 21.892, "step": 207050 }, { "epoch": 0.418274300351087, "grad_norm": 126.5652084350586, "learning_rate": 7.283626645546152e-06, "loss": 19.0833, "step": 207060 }, { "epoch": 0.4182945009837708, "grad_norm": 438.12249755859375, "learning_rate": 7.283316108290685e-06, "loss": 13.0235, "step": 207070 }, { "epoch": 0.4183147016164546, "grad_norm": 363.8396301269531, "learning_rate": 7.283005559906614e-06, "loss": 20.077, "step": 207080 }, { "epoch": 0.41833490224913844, "grad_norm": 288.7080383300781, "learning_rate": 7.282695000395451e-06, "loss": 20.4268, "step": 207090 }, { "epoch": 0.41835510288182226, "grad_norm": 188.05029296875, "learning_rate": 7.282384429758709e-06, "loss": 14.1814, "step": 207100 }, { "epoch": 0.4183753035145061, "grad_norm": 396.8157653808594, "learning_rate": 7.282073847997901e-06, "loss": 11.6911, "step": 207110 }, { "epoch": 0.4183955041471899, "grad_norm": 17.411792755126953, "learning_rate": 7.281763255114542e-06, "loss": 22.379, "step": 207120 }, { "epoch": 0.4184157047798737, "grad_norm": 575.6268310546875, "learning_rate": 7.281452651110148e-06, "loss": 19.024, "step": 207130 }, { "epoch": 0.41843590541255754, "grad_norm": 583.982177734375, "learning_rate": 7.281142035986227e-06, "loss": 19.9401, "step": 207140 }, { "epoch": 0.41845610604524136, "grad_norm": 332.6950378417969, "learning_rate": 7.280831409744299e-06, "loss": 20.0233, "step": 207150 }, { "epoch": 0.4184763066779251, "grad_norm": 557.7929077148438, "learning_rate": 7.280520772385875e-06, "loss": 18.3228, "step": 207160 }, { "epoch": 0.41849650731060895, "grad_norm": 857.3450927734375, "learning_rate": 7.280210123912468e-06, "loss": 21.378, "step": 207170 }, { "epoch": 0.41851670794329277, "grad_norm": 391.34783935546875, "learning_rate": 7.2798994643255945e-06, "loss": 19.3676, "step": 207180 }, { "epoch": 0.4185369085759766, "grad_norm": 865.761962890625, "learning_rate": 7.279588793626767e-06, "loss": 27.0067, "step": 207190 }, { "epoch": 0.4185571092086604, "grad_norm": 467.2917785644531, "learning_rate": 7.279278111817502e-06, "loss": 18.5604, "step": 207200 }, { "epoch": 0.4185773098413442, "grad_norm": 372.0297546386719, "learning_rate": 7.2789674188993096e-06, "loss": 21.0548, "step": 207210 }, { "epoch": 0.41859751047402805, "grad_norm": 9.171235084533691, "learning_rate": 7.278656714873707e-06, "loss": 17.0036, "step": 207220 }, { "epoch": 0.41861771110671187, "grad_norm": 406.84490966796875, "learning_rate": 7.2783459997422075e-06, "loss": 24.6613, "step": 207230 }, { "epoch": 0.4186379117393957, "grad_norm": 585.8035278320312, "learning_rate": 7.278035273506327e-06, "loss": 26.4588, "step": 207240 }, { "epoch": 0.4186581123720795, "grad_norm": 464.6855773925781, "learning_rate": 7.2777245361675786e-06, "loss": 24.9899, "step": 207250 }, { "epoch": 0.4186783130047633, "grad_norm": 176.9896697998047, "learning_rate": 7.277413787727478e-06, "loss": 21.9758, "step": 207260 }, { "epoch": 0.41869851363744715, "grad_norm": 680.6339721679688, "learning_rate": 7.277103028187536e-06, "loss": 35.1897, "step": 207270 }, { "epoch": 0.41871871427013097, "grad_norm": 520.2926025390625, "learning_rate": 7.276792257549273e-06, "loss": 19.2672, "step": 207280 }, { "epoch": 0.41873891490281473, "grad_norm": 194.8145751953125, "learning_rate": 7.276481475814199e-06, "loss": 23.8696, "step": 207290 }, { "epoch": 0.41875911553549855, "grad_norm": 370.65338134765625, "learning_rate": 7.27617068298383e-06, "loss": 24.2894, "step": 207300 }, { "epoch": 0.41877931616818237, "grad_norm": 1000.7235107421875, "learning_rate": 7.2758598790596836e-06, "loss": 35.6905, "step": 207310 }, { "epoch": 0.4187995168008662, "grad_norm": 95.40765380859375, "learning_rate": 7.275549064043269e-06, "loss": 22.1614, "step": 207320 }, { "epoch": 0.41881971743355, "grad_norm": 466.3887939453125, "learning_rate": 7.275238237936106e-06, "loss": 20.3975, "step": 207330 }, { "epoch": 0.41883991806623383, "grad_norm": 421.66552734375, "learning_rate": 7.274927400739708e-06, "loss": 22.5977, "step": 207340 }, { "epoch": 0.41886011869891765, "grad_norm": 304.6739807128906, "learning_rate": 7.274616552455589e-06, "loss": 20.5304, "step": 207350 }, { "epoch": 0.41888031933160147, "grad_norm": 114.920654296875, "learning_rate": 7.274305693085266e-06, "loss": 11.689, "step": 207360 }, { "epoch": 0.4189005199642853, "grad_norm": 495.1329345703125, "learning_rate": 7.273994822630251e-06, "loss": 17.1498, "step": 207370 }, { "epoch": 0.4189207205969691, "grad_norm": 562.6421508789062, "learning_rate": 7.273683941092063e-06, "loss": 23.1622, "step": 207380 }, { "epoch": 0.41894092122965293, "grad_norm": 428.80682373046875, "learning_rate": 7.273373048472214e-06, "loss": 17.5005, "step": 207390 }, { "epoch": 0.41896112186233675, "grad_norm": 417.3091735839844, "learning_rate": 7.27306214477222e-06, "loss": 23.5517, "step": 207400 }, { "epoch": 0.41898132249502057, "grad_norm": 293.5716857910156, "learning_rate": 7.272751229993598e-06, "loss": 11.2909, "step": 207410 }, { "epoch": 0.41900152312770433, "grad_norm": 741.0191040039062, "learning_rate": 7.272440304137862e-06, "loss": 36.1299, "step": 207420 }, { "epoch": 0.41902172376038815, "grad_norm": 219.34278869628906, "learning_rate": 7.2721293672065275e-06, "loss": 33.6884, "step": 207430 }, { "epoch": 0.419041924393072, "grad_norm": 801.2993774414062, "learning_rate": 7.27181841920111e-06, "loss": 18.6123, "step": 207440 }, { "epoch": 0.4190621250257558, "grad_norm": 406.53662109375, "learning_rate": 7.271507460123124e-06, "loss": 14.4973, "step": 207450 }, { "epoch": 0.4190823256584396, "grad_norm": 239.15638732910156, "learning_rate": 7.271196489974087e-06, "loss": 26.1185, "step": 207460 }, { "epoch": 0.41910252629112343, "grad_norm": 225.15029907226562, "learning_rate": 7.270885508755515e-06, "loss": 23.1628, "step": 207470 }, { "epoch": 0.41912272692380725, "grad_norm": 285.8903503417969, "learning_rate": 7.27057451646892e-06, "loss": 13.9068, "step": 207480 }, { "epoch": 0.4191429275564911, "grad_norm": 179.57513427734375, "learning_rate": 7.270263513115823e-06, "loss": 10.4213, "step": 207490 }, { "epoch": 0.4191631281891749, "grad_norm": 209.47906494140625, "learning_rate": 7.269952498697734e-06, "loss": 10.3899, "step": 207500 }, { "epoch": 0.4191833288218587, "grad_norm": 430.1376953125, "learning_rate": 7.269641473216174e-06, "loss": 22.5931, "step": 207510 }, { "epoch": 0.41920352945454253, "grad_norm": 270.9019775390625, "learning_rate": 7.269330436672656e-06, "loss": 14.631, "step": 207520 }, { "epoch": 0.41922373008722635, "grad_norm": 424.13189697265625, "learning_rate": 7.269019389068697e-06, "loss": 27.2419, "step": 207530 }, { "epoch": 0.4192439307199102, "grad_norm": 557.1117553710938, "learning_rate": 7.2687083304058125e-06, "loss": 28.1627, "step": 207540 }, { "epoch": 0.41926413135259394, "grad_norm": 68.83080291748047, "learning_rate": 7.268397260685518e-06, "loss": 20.6843, "step": 207550 }, { "epoch": 0.41928433198527776, "grad_norm": 120.33457946777344, "learning_rate": 7.268086179909331e-06, "loss": 15.7439, "step": 207560 }, { "epoch": 0.4193045326179616, "grad_norm": 428.87127685546875, "learning_rate": 7.267775088078768e-06, "loss": 19.5306, "step": 207570 }, { "epoch": 0.4193247332506454, "grad_norm": 291.04327392578125, "learning_rate": 7.267463985195343e-06, "loss": 32.1417, "step": 207580 }, { "epoch": 0.4193449338833292, "grad_norm": 647.67578125, "learning_rate": 7.267152871260573e-06, "loss": 22.1257, "step": 207590 }, { "epoch": 0.41936513451601304, "grad_norm": 301.8692321777344, "learning_rate": 7.266841746275977e-06, "loss": 22.967, "step": 207600 }, { "epoch": 0.41938533514869686, "grad_norm": 243.04318237304688, "learning_rate": 7.266530610243068e-06, "loss": 12.8156, "step": 207610 }, { "epoch": 0.4194055357813807, "grad_norm": 64.8611831665039, "learning_rate": 7.266219463163363e-06, "loss": 23.5269, "step": 207620 }, { "epoch": 0.4194257364140645, "grad_norm": 27.306312561035156, "learning_rate": 7.265908305038381e-06, "loss": 11.3525, "step": 207630 }, { "epoch": 0.4194459370467483, "grad_norm": 267.2720642089844, "learning_rate": 7.265597135869635e-06, "loss": 19.133, "step": 207640 }, { "epoch": 0.41946613767943214, "grad_norm": 967.9885864257812, "learning_rate": 7.265285955658645e-06, "loss": 30.2306, "step": 207650 }, { "epoch": 0.41948633831211596, "grad_norm": 400.0880126953125, "learning_rate": 7.264974764406924e-06, "loss": 9.7783, "step": 207660 }, { "epoch": 0.4195065389447998, "grad_norm": 588.6100463867188, "learning_rate": 7.2646635621159925e-06, "loss": 18.3165, "step": 207670 }, { "epoch": 0.41952673957748354, "grad_norm": 767.2931518554688, "learning_rate": 7.264352348787364e-06, "loss": 19.7032, "step": 207680 }, { "epoch": 0.41954694021016736, "grad_norm": 408.10540771484375, "learning_rate": 7.2640411244225576e-06, "loss": 15.6501, "step": 207690 }, { "epoch": 0.4195671408428512, "grad_norm": 282.24578857421875, "learning_rate": 7.26372988902309e-06, "loss": 33.6294, "step": 207700 }, { "epoch": 0.419587341475535, "grad_norm": 467.11175537109375, "learning_rate": 7.263418642590476e-06, "loss": 13.9205, "step": 207710 }, { "epoch": 0.4196075421082188, "grad_norm": 139.88189697265625, "learning_rate": 7.263107385126236e-06, "loss": 16.2457, "step": 207720 }, { "epoch": 0.41962774274090264, "grad_norm": 795.6160888671875, "learning_rate": 7.262796116631882e-06, "loss": 16.2437, "step": 207730 }, { "epoch": 0.41964794337358646, "grad_norm": 294.77490234375, "learning_rate": 7.262484837108937e-06, "loss": 34.8987, "step": 207740 }, { "epoch": 0.4196681440062703, "grad_norm": 246.17417907714844, "learning_rate": 7.262173546558914e-06, "loss": 17.3762, "step": 207750 }, { "epoch": 0.4196883446389541, "grad_norm": 418.432861328125, "learning_rate": 7.261862244983333e-06, "loss": 22.3757, "step": 207760 }, { "epoch": 0.4197085452716379, "grad_norm": 8.907363891601562, "learning_rate": 7.261550932383707e-06, "loss": 14.0214, "step": 207770 }, { "epoch": 0.41972874590432174, "grad_norm": 158.2021026611328, "learning_rate": 7.2612396087615586e-06, "loss": 15.8603, "step": 207780 }, { "epoch": 0.41974894653700556, "grad_norm": 262.1641540527344, "learning_rate": 7.260928274118402e-06, "loss": 14.9543, "step": 207790 }, { "epoch": 0.4197691471696893, "grad_norm": 486.50714111328125, "learning_rate": 7.260616928455754e-06, "loss": 19.0735, "step": 207800 }, { "epoch": 0.41978934780237315, "grad_norm": 444.6836853027344, "learning_rate": 7.260305571775135e-06, "loss": 31.9929, "step": 207810 }, { "epoch": 0.41980954843505697, "grad_norm": 290.2806091308594, "learning_rate": 7.2599942040780605e-06, "loss": 23.0906, "step": 207820 }, { "epoch": 0.4198297490677408, "grad_norm": 895.4974365234375, "learning_rate": 7.259682825366047e-06, "loss": 16.3634, "step": 207830 }, { "epoch": 0.4198499497004246, "grad_norm": 632.72900390625, "learning_rate": 7.2593714356406146e-06, "loss": 21.4046, "step": 207840 }, { "epoch": 0.4198701503331084, "grad_norm": 235.52828979492188, "learning_rate": 7.259060034903278e-06, "loss": 16.791, "step": 207850 }, { "epoch": 0.41989035096579225, "grad_norm": 63.5164794921875, "learning_rate": 7.258748623155558e-06, "loss": 15.0879, "step": 207860 }, { "epoch": 0.41991055159847607, "grad_norm": 203.7410430908203, "learning_rate": 7.258437200398974e-06, "loss": 31.3387, "step": 207870 }, { "epoch": 0.4199307522311599, "grad_norm": 671.734375, "learning_rate": 7.258125766635038e-06, "loss": 19.7088, "step": 207880 }, { "epoch": 0.4199509528638437, "grad_norm": 142.62875366210938, "learning_rate": 7.257814321865271e-06, "loss": 12.4566, "step": 207890 }, { "epoch": 0.4199711534965275, "grad_norm": 352.7214660644531, "learning_rate": 7.257502866091192e-06, "loss": 32.3652, "step": 207900 }, { "epoch": 0.41999135412921135, "grad_norm": 602.37060546875, "learning_rate": 7.257191399314315e-06, "loss": 15.4602, "step": 207910 }, { "epoch": 0.42001155476189517, "grad_norm": 93.40857696533203, "learning_rate": 7.256879921536164e-06, "loss": 23.5006, "step": 207920 }, { "epoch": 0.42003175539457893, "grad_norm": 191.13961791992188, "learning_rate": 7.256568432758252e-06, "loss": 15.4048, "step": 207930 }, { "epoch": 0.42005195602726275, "grad_norm": 463.3614196777344, "learning_rate": 7.256256932982101e-06, "loss": 19.8775, "step": 207940 }, { "epoch": 0.42007215665994657, "grad_norm": 31.436904907226562, "learning_rate": 7.2559454222092265e-06, "loss": 16.4579, "step": 207950 }, { "epoch": 0.4200923572926304, "grad_norm": 541.3646240234375, "learning_rate": 7.255633900441147e-06, "loss": 27.1186, "step": 207960 }, { "epoch": 0.4201125579253142, "grad_norm": 807.3328857421875, "learning_rate": 7.255322367679382e-06, "loss": 21.5338, "step": 207970 }, { "epoch": 0.42013275855799803, "grad_norm": 220.16342163085938, "learning_rate": 7.255010823925448e-06, "loss": 25.4912, "step": 207980 }, { "epoch": 0.42015295919068185, "grad_norm": 342.37786865234375, "learning_rate": 7.254699269180867e-06, "loss": 16.0834, "step": 207990 }, { "epoch": 0.42017315982336567, "grad_norm": 484.2235412597656, "learning_rate": 7.254387703447154e-06, "loss": 21.7082, "step": 208000 }, { "epoch": 0.4201933604560495, "grad_norm": 647.9937133789062, "learning_rate": 7.25407612672583e-06, "loss": 13.444, "step": 208010 }, { "epoch": 0.4202135610887333, "grad_norm": 243.7660369873047, "learning_rate": 7.253764539018411e-06, "loss": 21.8181, "step": 208020 }, { "epoch": 0.42023376172141713, "grad_norm": 9.34383487701416, "learning_rate": 7.253452940326418e-06, "loss": 21.1636, "step": 208030 }, { "epoch": 0.42025396235410095, "grad_norm": 402.57147216796875, "learning_rate": 7.253141330651367e-06, "loss": 18.6828, "step": 208040 }, { "epoch": 0.42027416298678477, "grad_norm": 186.9805145263672, "learning_rate": 7.2528297099947796e-06, "loss": 18.4708, "step": 208050 }, { "epoch": 0.42029436361946854, "grad_norm": 484.6163330078125, "learning_rate": 7.252518078358173e-06, "loss": 29.91, "step": 208060 }, { "epoch": 0.42031456425215236, "grad_norm": 602.1930541992188, "learning_rate": 7.252206435743067e-06, "loss": 18.7558, "step": 208070 }, { "epoch": 0.4203347648848362, "grad_norm": 233.40078735351562, "learning_rate": 7.251894782150981e-06, "loss": 11.5505, "step": 208080 }, { "epoch": 0.42035496551752, "grad_norm": 353.1018371582031, "learning_rate": 7.251583117583429e-06, "loss": 17.2449, "step": 208090 }, { "epoch": 0.4203751661502038, "grad_norm": 531.6416015625, "learning_rate": 7.251271442041938e-06, "loss": 32.751, "step": 208100 }, { "epoch": 0.42039536678288764, "grad_norm": 508.6883850097656, "learning_rate": 7.250959755528022e-06, "loss": 16.1892, "step": 208110 }, { "epoch": 0.42041556741557146, "grad_norm": 713.8525390625, "learning_rate": 7.2506480580432005e-06, "loss": 23.175, "step": 208120 }, { "epoch": 0.4204357680482553, "grad_norm": 1152.8912353515625, "learning_rate": 7.250336349588995e-06, "loss": 26.7371, "step": 208130 }, { "epoch": 0.4204559686809391, "grad_norm": 48.779632568359375, "learning_rate": 7.250024630166921e-06, "loss": 24.1488, "step": 208140 }, { "epoch": 0.4204761693136229, "grad_norm": 537.6607055664062, "learning_rate": 7.2497128997785e-06, "loss": 10.6233, "step": 208150 }, { "epoch": 0.42049636994630674, "grad_norm": 312.4154968261719, "learning_rate": 7.249401158425252e-06, "loss": 22.843, "step": 208160 }, { "epoch": 0.42051657057899056, "grad_norm": 279.8778076171875, "learning_rate": 7.249089406108696e-06, "loss": 13.4447, "step": 208170 }, { "epoch": 0.4205367712116744, "grad_norm": 114.59381103515625, "learning_rate": 7.248777642830351e-06, "loss": 11.0693, "step": 208180 }, { "epoch": 0.42055697184435814, "grad_norm": 373.0764465332031, "learning_rate": 7.248465868591735e-06, "loss": 30.5604, "step": 208190 }, { "epoch": 0.42057717247704196, "grad_norm": 227.80795288085938, "learning_rate": 7.24815408339437e-06, "loss": 7.9865, "step": 208200 }, { "epoch": 0.4205973731097258, "grad_norm": 383.9432067871094, "learning_rate": 7.247842287239775e-06, "loss": 16.1091, "step": 208210 }, { "epoch": 0.4206175737424096, "grad_norm": 51.92649841308594, "learning_rate": 7.247530480129469e-06, "loss": 15.9713, "step": 208220 }, { "epoch": 0.4206377743750934, "grad_norm": 510.3590087890625, "learning_rate": 7.247218662064972e-06, "loss": 31.5859, "step": 208230 }, { "epoch": 0.42065797500777724, "grad_norm": 514.0302124023438, "learning_rate": 7.2469068330478046e-06, "loss": 13.0219, "step": 208240 }, { "epoch": 0.42067817564046106, "grad_norm": 243.36740112304688, "learning_rate": 7.246594993079483e-06, "loss": 17.2838, "step": 208250 }, { "epoch": 0.4206983762731449, "grad_norm": 24.595321655273438, "learning_rate": 7.246283142161533e-06, "loss": 10.2763, "step": 208260 }, { "epoch": 0.4207185769058287, "grad_norm": 34.77042770385742, "learning_rate": 7.245971280295469e-06, "loss": 24.0394, "step": 208270 }, { "epoch": 0.4207387775385125, "grad_norm": 246.41275024414062, "learning_rate": 7.245659407482815e-06, "loss": 17.2448, "step": 208280 }, { "epoch": 0.42075897817119634, "grad_norm": 8.182950019836426, "learning_rate": 7.2453475237250895e-06, "loss": 19.6814, "step": 208290 }, { "epoch": 0.42077917880388016, "grad_norm": 793.640869140625, "learning_rate": 7.245035629023812e-06, "loss": 22.2856, "step": 208300 }, { "epoch": 0.420799379436564, "grad_norm": 274.09912109375, "learning_rate": 7.244723723380504e-06, "loss": 27.2288, "step": 208310 }, { "epoch": 0.42081958006924775, "grad_norm": 265.5552673339844, "learning_rate": 7.244411806796684e-06, "loss": 16.1567, "step": 208320 }, { "epoch": 0.42083978070193157, "grad_norm": 243.61830139160156, "learning_rate": 7.244099879273873e-06, "loss": 27.1939, "step": 208330 }, { "epoch": 0.4208599813346154, "grad_norm": 134.2394256591797, "learning_rate": 7.243787940813591e-06, "loss": 22.9721, "step": 208340 }, { "epoch": 0.4208801819672992, "grad_norm": 142.01174926757812, "learning_rate": 7.24347599141736e-06, "loss": 19.7979, "step": 208350 }, { "epoch": 0.420900382599983, "grad_norm": 439.92041015625, "learning_rate": 7.243164031086697e-06, "loss": 30.2856, "step": 208360 }, { "epoch": 0.42092058323266685, "grad_norm": 182.24444580078125, "learning_rate": 7.242852059823127e-06, "loss": 15.2457, "step": 208370 }, { "epoch": 0.42094078386535067, "grad_norm": 265.6875, "learning_rate": 7.2425400776281665e-06, "loss": 20.2317, "step": 208380 }, { "epoch": 0.4209609844980345, "grad_norm": 683.1009521484375, "learning_rate": 7.242228084503338e-06, "loss": 12.1677, "step": 208390 }, { "epoch": 0.4209811851307183, "grad_norm": 416.8982849121094, "learning_rate": 7.241916080450163e-06, "loss": 10.0464, "step": 208400 }, { "epoch": 0.4210013857634021, "grad_norm": 471.6433410644531, "learning_rate": 7.241604065470158e-06, "loss": 21.2547, "step": 208410 }, { "epoch": 0.42102158639608595, "grad_norm": 158.52459716796875, "learning_rate": 7.24129203956485e-06, "loss": 14.5647, "step": 208420 }, { "epoch": 0.42104178702876977, "grad_norm": 573.9026489257812, "learning_rate": 7.240980002735754e-06, "loss": 13.6013, "step": 208430 }, { "epoch": 0.42106198766145353, "grad_norm": 577.913330078125, "learning_rate": 7.240667954984395e-06, "loss": 25.1335, "step": 208440 }, { "epoch": 0.42108218829413735, "grad_norm": 652.5313110351562, "learning_rate": 7.24035589631229e-06, "loss": 23.0305, "step": 208450 }, { "epoch": 0.42110238892682117, "grad_norm": 265.8028869628906, "learning_rate": 7.240043826720964e-06, "loss": 25.7539, "step": 208460 }, { "epoch": 0.421122589559505, "grad_norm": 3482.27294921875, "learning_rate": 7.239731746211936e-06, "loss": 34.0574, "step": 208470 }, { "epoch": 0.4211427901921888, "grad_norm": 20.080074310302734, "learning_rate": 7.239419654786727e-06, "loss": 11.3123, "step": 208480 }, { "epoch": 0.42116299082487263, "grad_norm": 551.6575927734375, "learning_rate": 7.239107552446858e-06, "loss": 21.158, "step": 208490 }, { "epoch": 0.42118319145755645, "grad_norm": 288.7182312011719, "learning_rate": 7.238795439193849e-06, "loss": 12.6363, "step": 208500 }, { "epoch": 0.42120339209024027, "grad_norm": 268.54833984375, "learning_rate": 7.2384833150292234e-06, "loss": 25.1454, "step": 208510 }, { "epoch": 0.4212235927229241, "grad_norm": 347.07891845703125, "learning_rate": 7.238171179954502e-06, "loss": 24.0564, "step": 208520 }, { "epoch": 0.4212437933556079, "grad_norm": 584.1370239257812, "learning_rate": 7.237859033971206e-06, "loss": 17.0298, "step": 208530 }, { "epoch": 0.42126399398829173, "grad_norm": 951.8050537109375, "learning_rate": 7.2375468770808555e-06, "loss": 35.244, "step": 208540 }, { "epoch": 0.42128419462097555, "grad_norm": 492.3121032714844, "learning_rate": 7.2372347092849744e-06, "loss": 17.6317, "step": 208550 }, { "epoch": 0.42130439525365937, "grad_norm": 295.8072814941406, "learning_rate": 7.236922530585082e-06, "loss": 17.6025, "step": 208560 }, { "epoch": 0.42132459588634313, "grad_norm": 238.51760864257812, "learning_rate": 7.236610340982699e-06, "loss": 13.1738, "step": 208570 }, { "epoch": 0.42134479651902695, "grad_norm": 559.2195434570312, "learning_rate": 7.236298140479352e-06, "loss": 25.8419, "step": 208580 }, { "epoch": 0.4213649971517108, "grad_norm": 61.331138610839844, "learning_rate": 7.235985929076556e-06, "loss": 21.84, "step": 208590 }, { "epoch": 0.4213851977843946, "grad_norm": 585.8510131835938, "learning_rate": 7.235673706775837e-06, "loss": 18.3465, "step": 208600 }, { "epoch": 0.4214053984170784, "grad_norm": 221.32998657226562, "learning_rate": 7.235361473578715e-06, "loss": 15.0191, "step": 208610 }, { "epoch": 0.42142559904976223, "grad_norm": 461.74273681640625, "learning_rate": 7.235049229486713e-06, "loss": 27.2304, "step": 208620 }, { "epoch": 0.42144579968244605, "grad_norm": 382.2008972167969, "learning_rate": 7.23473697450135e-06, "loss": 14.0785, "step": 208630 }, { "epoch": 0.4214660003151299, "grad_norm": 367.16229248046875, "learning_rate": 7.234424708624152e-06, "loss": 22.0599, "step": 208640 }, { "epoch": 0.4214862009478137, "grad_norm": 436.0984191894531, "learning_rate": 7.234112431856639e-06, "loss": 14.0695, "step": 208650 }, { "epoch": 0.4215064015804975, "grad_norm": 142.49688720703125, "learning_rate": 7.233800144200332e-06, "loss": 5.8185, "step": 208660 }, { "epoch": 0.42152660221318133, "grad_norm": 224.3113555908203, "learning_rate": 7.233487845656755e-06, "loss": 24.458, "step": 208670 }, { "epoch": 0.42154680284586515, "grad_norm": 207.14895629882812, "learning_rate": 7.233175536227428e-06, "loss": 23.031, "step": 208680 }, { "epoch": 0.421567003478549, "grad_norm": 445.96478271484375, "learning_rate": 7.2328632159138764e-06, "loss": 17.6489, "step": 208690 }, { "epoch": 0.42158720411123274, "grad_norm": 636.1257934570312, "learning_rate": 7.2325508847176175e-06, "loss": 21.732, "step": 208700 }, { "epoch": 0.42160740474391656, "grad_norm": 154.71957397460938, "learning_rate": 7.232238542640178e-06, "loss": 11.7602, "step": 208710 }, { "epoch": 0.4216276053766004, "grad_norm": 234.51773071289062, "learning_rate": 7.23192618968308e-06, "loss": 26.8346, "step": 208720 }, { "epoch": 0.4216478060092842, "grad_norm": 572.9177856445312, "learning_rate": 7.231613825847842e-06, "loss": 22.7497, "step": 208730 }, { "epoch": 0.421668006641968, "grad_norm": 146.1887969970703, "learning_rate": 7.23130145113599e-06, "loss": 15.8045, "step": 208740 }, { "epoch": 0.42168820727465184, "grad_norm": 230.3306427001953, "learning_rate": 7.2309890655490446e-06, "loss": 14.3457, "step": 208750 }, { "epoch": 0.42170840790733566, "grad_norm": 407.8550720214844, "learning_rate": 7.23067666908853e-06, "loss": 16.3858, "step": 208760 }, { "epoch": 0.4217286085400195, "grad_norm": 160.23910522460938, "learning_rate": 7.230364261755967e-06, "loss": 17.3511, "step": 208770 }, { "epoch": 0.4217488091727033, "grad_norm": 521.4282836914062, "learning_rate": 7.230051843552879e-06, "loss": 14.473, "step": 208780 }, { "epoch": 0.4217690098053871, "grad_norm": 364.94281005859375, "learning_rate": 7.22973941448079e-06, "loss": 13.961, "step": 208790 }, { "epoch": 0.42178921043807094, "grad_norm": 436.3430480957031, "learning_rate": 7.2294269745412214e-06, "loss": 16.7884, "step": 208800 }, { "epoch": 0.42180941107075476, "grad_norm": 1169.0938720703125, "learning_rate": 7.229114523735695e-06, "loss": 30.58, "step": 208810 }, { "epoch": 0.4218296117034386, "grad_norm": 610.2642822265625, "learning_rate": 7.228802062065735e-06, "loss": 37.7107, "step": 208820 }, { "epoch": 0.42184981233612234, "grad_norm": 236.06874084472656, "learning_rate": 7.228489589532865e-06, "loss": 14.797, "step": 208830 }, { "epoch": 0.42187001296880616, "grad_norm": 321.25360107421875, "learning_rate": 7.228177106138605e-06, "loss": 16.5439, "step": 208840 }, { "epoch": 0.42189021360149, "grad_norm": 1895.261474609375, "learning_rate": 7.227864611884483e-06, "loss": 34.6209, "step": 208850 }, { "epoch": 0.4219104142341738, "grad_norm": 433.1759033203125, "learning_rate": 7.227552106772015e-06, "loss": 34.8761, "step": 208860 }, { "epoch": 0.4219306148668576, "grad_norm": 634.3358764648438, "learning_rate": 7.227239590802733e-06, "loss": 10.7749, "step": 208870 }, { "epoch": 0.42195081549954144, "grad_norm": 316.2372131347656, "learning_rate": 7.226927063978153e-06, "loss": 21.7075, "step": 208880 }, { "epoch": 0.42197101613222526, "grad_norm": 693.2899169921875, "learning_rate": 7.2266145262998e-06, "loss": 35.3719, "step": 208890 }, { "epoch": 0.4219912167649091, "grad_norm": 550.0910034179688, "learning_rate": 7.226301977769199e-06, "loss": 18.3152, "step": 208900 }, { "epoch": 0.4220114173975929, "grad_norm": 472.962890625, "learning_rate": 7.225989418387871e-06, "loss": 14.3691, "step": 208910 }, { "epoch": 0.4220316180302767, "grad_norm": 194.69569396972656, "learning_rate": 7.2256768481573414e-06, "loss": 9.0484, "step": 208920 }, { "epoch": 0.42205181866296054, "grad_norm": 331.1741638183594, "learning_rate": 7.225364267079134e-06, "loss": 22.7028, "step": 208930 }, { "epoch": 0.42207201929564436, "grad_norm": 153.98699951171875, "learning_rate": 7.225051675154768e-06, "loss": 25.4647, "step": 208940 }, { "epoch": 0.4220922199283282, "grad_norm": 788.9549560546875, "learning_rate": 7.224739072385773e-06, "loss": 14.5946, "step": 208950 }, { "epoch": 0.42211242056101195, "grad_norm": 644.8583984375, "learning_rate": 7.224426458773668e-06, "loss": 46.768, "step": 208960 }, { "epoch": 0.42213262119369577, "grad_norm": 439.9338073730469, "learning_rate": 7.224113834319978e-06, "loss": 21.0104, "step": 208970 }, { "epoch": 0.4221528218263796, "grad_norm": 719.8021850585938, "learning_rate": 7.223801199026228e-06, "loss": 20.113, "step": 208980 }, { "epoch": 0.4221730224590634, "grad_norm": 76.28775024414062, "learning_rate": 7.22348855289394e-06, "loss": 21.883, "step": 208990 }, { "epoch": 0.4221932230917472, "grad_norm": 182.53370666503906, "learning_rate": 7.223175895924638e-06, "loss": 16.6704, "step": 209000 }, { "epoch": 0.42221342372443105, "grad_norm": 1.7606089115142822, "learning_rate": 7.2228632281198475e-06, "loss": 22.5764, "step": 209010 }, { "epoch": 0.42223362435711487, "grad_norm": 396.4153137207031, "learning_rate": 7.22255054948109e-06, "loss": 11.976, "step": 209020 }, { "epoch": 0.4222538249897987, "grad_norm": 252.0966339111328, "learning_rate": 7.222237860009892e-06, "loss": 16.9671, "step": 209030 }, { "epoch": 0.4222740256224825, "grad_norm": 152.38465881347656, "learning_rate": 7.2219251597077745e-06, "loss": 20.7021, "step": 209040 }, { "epoch": 0.4222942262551663, "grad_norm": 130.76669311523438, "learning_rate": 7.221612448576265e-06, "loss": 15.2044, "step": 209050 }, { "epoch": 0.42231442688785015, "grad_norm": 325.18719482421875, "learning_rate": 7.221299726616885e-06, "loss": 20.5442, "step": 209060 }, { "epoch": 0.42233462752053397, "grad_norm": 598.7401733398438, "learning_rate": 7.220986993831159e-06, "loss": 20.5388, "step": 209070 }, { "epoch": 0.42235482815321773, "grad_norm": 260.79327392578125, "learning_rate": 7.220674250220614e-06, "loss": 12.8102, "step": 209080 }, { "epoch": 0.42237502878590155, "grad_norm": 499.1023254394531, "learning_rate": 7.220361495786769e-06, "loss": 29.819, "step": 209090 }, { "epoch": 0.42239522941858537, "grad_norm": 362.8083190917969, "learning_rate": 7.220048730531154e-06, "loss": 12.4547, "step": 209100 }, { "epoch": 0.4224154300512692, "grad_norm": 28.141883850097656, "learning_rate": 7.219735954455289e-06, "loss": 23.9655, "step": 209110 }, { "epoch": 0.422435630683953, "grad_norm": 450.5405578613281, "learning_rate": 7.219423167560701e-06, "loss": 25.0005, "step": 209120 }, { "epoch": 0.42245583131663683, "grad_norm": 167.74432373046875, "learning_rate": 7.219110369848913e-06, "loss": 17.3343, "step": 209130 }, { "epoch": 0.42247603194932065, "grad_norm": 11.758810043334961, "learning_rate": 7.218797561321451e-06, "loss": 16.1645, "step": 209140 }, { "epoch": 0.42249623258200447, "grad_norm": 345.144775390625, "learning_rate": 7.2184847419798384e-06, "loss": 19.0552, "step": 209150 }, { "epoch": 0.4225164332146883, "grad_norm": 1046.2261962890625, "learning_rate": 7.2181719118256e-06, "loss": 17.4823, "step": 209160 }, { "epoch": 0.4225366338473721, "grad_norm": 372.1084289550781, "learning_rate": 7.217859070860261e-06, "loss": 21.2658, "step": 209170 }, { "epoch": 0.42255683448005593, "grad_norm": 227.75784301757812, "learning_rate": 7.217546219085346e-06, "loss": 14.4277, "step": 209180 }, { "epoch": 0.42257703511273975, "grad_norm": 382.9267578125, "learning_rate": 7.21723335650238e-06, "loss": 26.5891, "step": 209190 }, { "epoch": 0.42259723574542357, "grad_norm": 97.09908294677734, "learning_rate": 7.216920483112886e-06, "loss": 21.0516, "step": 209200 }, { "epoch": 0.42261743637810734, "grad_norm": 480.6952819824219, "learning_rate": 7.216607598918392e-06, "loss": 21.1206, "step": 209210 }, { "epoch": 0.42263763701079116, "grad_norm": 27.950443267822266, "learning_rate": 7.216294703920421e-06, "loss": 24.9184, "step": 209220 }, { "epoch": 0.422657837643475, "grad_norm": 238.2189483642578, "learning_rate": 7.215981798120499e-06, "loss": 25.0528, "step": 209230 }, { "epoch": 0.4226780382761588, "grad_norm": 367.7483215332031, "learning_rate": 7.215668881520149e-06, "loss": 15.9533, "step": 209240 }, { "epoch": 0.4226982389088426, "grad_norm": 801.534912109375, "learning_rate": 7.215355954120899e-06, "loss": 28.8105, "step": 209250 }, { "epoch": 0.42271843954152644, "grad_norm": 325.34014892578125, "learning_rate": 7.2150430159242724e-06, "loss": 23.3782, "step": 209260 }, { "epoch": 0.42273864017421026, "grad_norm": 157.3614044189453, "learning_rate": 7.214730066931794e-06, "loss": 15.9653, "step": 209270 }, { "epoch": 0.4227588408068941, "grad_norm": 698.5846557617188, "learning_rate": 7.214417107144991e-06, "loss": 20.0662, "step": 209280 }, { "epoch": 0.4227790414395779, "grad_norm": 110.46583557128906, "learning_rate": 7.2141041365653876e-06, "loss": 22.2963, "step": 209290 }, { "epoch": 0.4227992420722617, "grad_norm": 479.1408386230469, "learning_rate": 7.21379115519451e-06, "loss": 31.6837, "step": 209300 }, { "epoch": 0.42281944270494554, "grad_norm": 337.5665588378906, "learning_rate": 7.213478163033879e-06, "loss": 10.4446, "step": 209310 }, { "epoch": 0.42283964333762936, "grad_norm": 174.47479248046875, "learning_rate": 7.213165160085027e-06, "loss": 22.3634, "step": 209320 }, { "epoch": 0.4228598439703132, "grad_norm": 577.8895263671875, "learning_rate": 7.212852146349476e-06, "loss": 17.4193, "step": 209330 }, { "epoch": 0.42288004460299694, "grad_norm": 371.3006591796875, "learning_rate": 7.212539121828752e-06, "loss": 24.541, "step": 209340 }, { "epoch": 0.42290024523568076, "grad_norm": 270.4233703613281, "learning_rate": 7.212226086524381e-06, "loss": 15.2102, "step": 209350 }, { "epoch": 0.4229204458683646, "grad_norm": 735.9139404296875, "learning_rate": 7.211913040437887e-06, "loss": 32.0948, "step": 209360 }, { "epoch": 0.4229406465010484, "grad_norm": 74.67229461669922, "learning_rate": 7.211599983570799e-06, "loss": 11.3681, "step": 209370 }, { "epoch": 0.4229608471337322, "grad_norm": 114.5218734741211, "learning_rate": 7.211286915924639e-06, "loss": 27.2411, "step": 209380 }, { "epoch": 0.42298104776641604, "grad_norm": 680.1890258789062, "learning_rate": 7.210973837500936e-06, "loss": 20.7182, "step": 209390 }, { "epoch": 0.42300124839909986, "grad_norm": 565.722412109375, "learning_rate": 7.210660748301214e-06, "loss": 20.5261, "step": 209400 }, { "epoch": 0.4230214490317837, "grad_norm": 731.522705078125, "learning_rate": 7.210347648327001e-06, "loss": 23.8383, "step": 209410 }, { "epoch": 0.4230416496644675, "grad_norm": 363.3365478515625, "learning_rate": 7.21003453757982e-06, "loss": 28.7912, "step": 209420 }, { "epoch": 0.4230618502971513, "grad_norm": 588.8091430664062, "learning_rate": 7.209721416061199e-06, "loss": 20.4795, "step": 209430 }, { "epoch": 0.42308205092983514, "grad_norm": 286.3702087402344, "learning_rate": 7.209408283772664e-06, "loss": 16.4857, "step": 209440 }, { "epoch": 0.42310225156251896, "grad_norm": 560.5650634765625, "learning_rate": 7.209095140715742e-06, "loss": 19.5949, "step": 209450 }, { "epoch": 0.4231224521952028, "grad_norm": 596.6691284179688, "learning_rate": 7.208781986891957e-06, "loss": 30.0939, "step": 209460 }, { "epoch": 0.42314265282788655, "grad_norm": 393.65325927734375, "learning_rate": 7.208468822302837e-06, "loss": 24.3944, "step": 209470 }, { "epoch": 0.42316285346057037, "grad_norm": 542.9483642578125, "learning_rate": 7.208155646949908e-06, "loss": 22.9175, "step": 209480 }, { "epoch": 0.4231830540932542, "grad_norm": 229.46798706054688, "learning_rate": 7.207842460834695e-06, "loss": 25.6597, "step": 209490 }, { "epoch": 0.423203254725938, "grad_norm": 907.5210571289062, "learning_rate": 7.207529263958727e-06, "loss": 22.0783, "step": 209500 }, { "epoch": 0.4232234553586218, "grad_norm": 134.958984375, "learning_rate": 7.2072160563235285e-06, "loss": 9.5013, "step": 209510 }, { "epoch": 0.42324365599130565, "grad_norm": 199.20062255859375, "learning_rate": 7.206902837930626e-06, "loss": 14.9864, "step": 209520 }, { "epoch": 0.42326385662398947, "grad_norm": 440.49334716796875, "learning_rate": 7.206589608781549e-06, "loss": 40.4692, "step": 209530 }, { "epoch": 0.4232840572566733, "grad_norm": 201.7251434326172, "learning_rate": 7.206276368877821e-06, "loss": 17.232, "step": 209540 }, { "epoch": 0.4233042578893571, "grad_norm": 188.55267333984375, "learning_rate": 7.2059631182209676e-06, "loss": 25.9212, "step": 209550 }, { "epoch": 0.4233244585220409, "grad_norm": 387.6357116699219, "learning_rate": 7.205649856812519e-06, "loss": 26.5822, "step": 209560 }, { "epoch": 0.42334465915472475, "grad_norm": 587.2101440429688, "learning_rate": 7.205336584653999e-06, "loss": 37.4754, "step": 209570 }, { "epoch": 0.42336485978740857, "grad_norm": 305.0256042480469, "learning_rate": 7.205023301746938e-06, "loss": 34.4858, "step": 209580 }, { "epoch": 0.42338506042009233, "grad_norm": 531.5726928710938, "learning_rate": 7.204710008092861e-06, "loss": 24.7794, "step": 209590 }, { "epoch": 0.42340526105277615, "grad_norm": 801.3169555664062, "learning_rate": 7.2043967036932935e-06, "loss": 23.223, "step": 209600 }, { "epoch": 0.42342546168545997, "grad_norm": 270.9145202636719, "learning_rate": 7.204083388549764e-06, "loss": 16.9073, "step": 209610 }, { "epoch": 0.4234456623181438, "grad_norm": 499.89617919921875, "learning_rate": 7.203770062663801e-06, "loss": 31.6552, "step": 209620 }, { "epoch": 0.4234658629508276, "grad_norm": 526.369873046875, "learning_rate": 7.203456726036927e-06, "loss": 17.5986, "step": 209630 }, { "epoch": 0.42348606358351143, "grad_norm": 102.6902847290039, "learning_rate": 7.203143378670675e-06, "loss": 20.5156, "step": 209640 }, { "epoch": 0.42350626421619525, "grad_norm": 241.0461883544922, "learning_rate": 7.202830020566567e-06, "loss": 20.2259, "step": 209650 }, { "epoch": 0.42352646484887907, "grad_norm": 265.1438903808594, "learning_rate": 7.202516651726135e-06, "loss": 28.8852, "step": 209660 }, { "epoch": 0.4235466654815629, "grad_norm": 581.31298828125, "learning_rate": 7.202203272150903e-06, "loss": 31.6967, "step": 209670 }, { "epoch": 0.4235668661142467, "grad_norm": 109.85911560058594, "learning_rate": 7.2018898818423985e-06, "loss": 13.9738, "step": 209680 }, { "epoch": 0.42358706674693053, "grad_norm": 360.0051574707031, "learning_rate": 7.201576480802151e-06, "loss": 23.8676, "step": 209690 }, { "epoch": 0.42360726737961435, "grad_norm": 174.0601348876953, "learning_rate": 7.201263069031686e-06, "loss": 22.7484, "step": 209700 }, { "epoch": 0.42362746801229817, "grad_norm": 380.489990234375, "learning_rate": 7.200949646532532e-06, "loss": 19.4527, "step": 209710 }, { "epoch": 0.42364766864498193, "grad_norm": 725.2266235351562, "learning_rate": 7.200636213306216e-06, "loss": 23.2591, "step": 209720 }, { "epoch": 0.42366786927766575, "grad_norm": 435.45306396484375, "learning_rate": 7.200322769354267e-06, "loss": 26.7146, "step": 209730 }, { "epoch": 0.4236880699103496, "grad_norm": 195.5260467529297, "learning_rate": 7.20000931467821e-06, "loss": 13.8227, "step": 209740 }, { "epoch": 0.4237082705430334, "grad_norm": 554.610107421875, "learning_rate": 7.199695849279576e-06, "loss": 20.7622, "step": 209750 }, { "epoch": 0.4237284711757172, "grad_norm": 190.65818786621094, "learning_rate": 7.199382373159891e-06, "loss": 11.9765, "step": 209760 }, { "epoch": 0.42374867180840103, "grad_norm": 52.058013916015625, "learning_rate": 7.1990688863206835e-06, "loss": 20.5043, "step": 209770 }, { "epoch": 0.42376887244108485, "grad_norm": 529.6057739257812, "learning_rate": 7.19875538876348e-06, "loss": 32.3624, "step": 209780 }, { "epoch": 0.4237890730737687, "grad_norm": 450.0621032714844, "learning_rate": 7.198441880489809e-06, "loss": 15.9361, "step": 209790 }, { "epoch": 0.4238092737064525, "grad_norm": 248.64169311523438, "learning_rate": 7.1981283615012e-06, "loss": 11.9052, "step": 209800 }, { "epoch": 0.4238294743391363, "grad_norm": 275.8608093261719, "learning_rate": 7.197814831799179e-06, "loss": 14.8522, "step": 209810 }, { "epoch": 0.42384967497182013, "grad_norm": 565.5928955078125, "learning_rate": 7.197501291385276e-06, "loss": 26.5751, "step": 209820 }, { "epoch": 0.42386987560450395, "grad_norm": 161.0192413330078, "learning_rate": 7.197187740261018e-06, "loss": 17.3746, "step": 209830 }, { "epoch": 0.4238900762371878, "grad_norm": 982.380126953125, "learning_rate": 7.196874178427933e-06, "loss": 19.237, "step": 209840 }, { "epoch": 0.42391027686987154, "grad_norm": 99.38864135742188, "learning_rate": 7.196560605887551e-06, "loss": 16.0603, "step": 209850 }, { "epoch": 0.42393047750255536, "grad_norm": 367.59686279296875, "learning_rate": 7.196247022641398e-06, "loss": 11.7232, "step": 209860 }, { "epoch": 0.4239506781352392, "grad_norm": 590.4494018554688, "learning_rate": 7.195933428691003e-06, "loss": 25.6529, "step": 209870 }, { "epoch": 0.423970878767923, "grad_norm": 760.5358276367188, "learning_rate": 7.195619824037895e-06, "loss": 21.0403, "step": 209880 }, { "epoch": 0.4239910794006068, "grad_norm": 73.87620544433594, "learning_rate": 7.195306208683602e-06, "loss": 19.5883, "step": 209890 }, { "epoch": 0.42401128003329064, "grad_norm": 566.1576538085938, "learning_rate": 7.194992582629654e-06, "loss": 31.2496, "step": 209900 }, { "epoch": 0.42403148066597446, "grad_norm": 525.5848999023438, "learning_rate": 7.194678945877578e-06, "loss": 16.0137, "step": 209910 }, { "epoch": 0.4240516812986583, "grad_norm": 22.791072845458984, "learning_rate": 7.194365298428901e-06, "loss": 24.1958, "step": 209920 }, { "epoch": 0.4240718819313421, "grad_norm": 12.593621253967285, "learning_rate": 7.194051640285156e-06, "loss": 24.5946, "step": 209930 }, { "epoch": 0.4240920825640259, "grad_norm": 670.6082153320312, "learning_rate": 7.1937379714478696e-06, "loss": 14.8021, "step": 209940 }, { "epoch": 0.42411228319670974, "grad_norm": 218.0416259765625, "learning_rate": 7.19342429191857e-06, "loss": 20.3079, "step": 209950 }, { "epoch": 0.42413248382939356, "grad_norm": 522.1910400390625, "learning_rate": 7.193110601698785e-06, "loss": 34.548, "step": 209960 }, { "epoch": 0.4241526844620774, "grad_norm": 818.6900634765625, "learning_rate": 7.192796900790046e-06, "loss": 29.4781, "step": 209970 }, { "epoch": 0.42417288509476114, "grad_norm": 166.05030822753906, "learning_rate": 7.192483189193881e-06, "loss": 21.9706, "step": 209980 }, { "epoch": 0.42419308572744496, "grad_norm": 167.2766876220703, "learning_rate": 7.192169466911818e-06, "loss": 10.6179, "step": 209990 }, { "epoch": 0.4242132863601288, "grad_norm": 123.68822479248047, "learning_rate": 7.191855733945388e-06, "loss": 16.7535, "step": 210000 }, { "epoch": 0.4242334869928126, "grad_norm": 215.9828338623047, "learning_rate": 7.191541990296118e-06, "loss": 32.7292, "step": 210010 }, { "epoch": 0.4242536876254964, "grad_norm": 98.85929107666016, "learning_rate": 7.191228235965539e-06, "loss": 10.6004, "step": 210020 }, { "epoch": 0.42427388825818024, "grad_norm": 414.95037841796875, "learning_rate": 7.190914470955179e-06, "loss": 13.2742, "step": 210030 }, { "epoch": 0.42429408889086406, "grad_norm": 69.71099090576172, "learning_rate": 7.190600695266567e-06, "loss": 25.0969, "step": 210040 }, { "epoch": 0.4243142895235479, "grad_norm": 138.5528564453125, "learning_rate": 7.190286908901234e-06, "loss": 14.603, "step": 210050 }, { "epoch": 0.4243344901562317, "grad_norm": 169.1397247314453, "learning_rate": 7.189973111860708e-06, "loss": 17.6189, "step": 210060 }, { "epoch": 0.4243546907889155, "grad_norm": 357.02044677734375, "learning_rate": 7.189659304146519e-06, "loss": 11.5942, "step": 210070 }, { "epoch": 0.42437489142159934, "grad_norm": 197.739013671875, "learning_rate": 7.189345485760194e-06, "loss": 19.1052, "step": 210080 }, { "epoch": 0.42439509205428316, "grad_norm": 1462.9423828125, "learning_rate": 7.189031656703267e-06, "loss": 19.1876, "step": 210090 }, { "epoch": 0.424415292686967, "grad_norm": 365.7261657714844, "learning_rate": 7.188717816977264e-06, "loss": 23.9252, "step": 210100 }, { "epoch": 0.42443549331965075, "grad_norm": 276.45184326171875, "learning_rate": 7.1884039665837165e-06, "loss": 9.0559, "step": 210110 }, { "epoch": 0.42445569395233457, "grad_norm": 36.809783935546875, "learning_rate": 7.188090105524152e-06, "loss": 30.0834, "step": 210120 }, { "epoch": 0.4244758945850184, "grad_norm": 273.6554260253906, "learning_rate": 7.187776233800104e-06, "loss": 17.0771, "step": 210130 }, { "epoch": 0.4244960952177022, "grad_norm": 232.99725341796875, "learning_rate": 7.187462351413099e-06, "loss": 22.8462, "step": 210140 }, { "epoch": 0.424516295850386, "grad_norm": 169.66261291503906, "learning_rate": 7.187148458364668e-06, "loss": 9.2088, "step": 210150 }, { "epoch": 0.42453649648306985, "grad_norm": 355.7333068847656, "learning_rate": 7.18683455465634e-06, "loss": 11.4464, "step": 210160 }, { "epoch": 0.42455669711575367, "grad_norm": 401.5683288574219, "learning_rate": 7.1865206402896455e-06, "loss": 17.6637, "step": 210170 }, { "epoch": 0.4245768977484375, "grad_norm": 116.06808471679688, "learning_rate": 7.1862067152661155e-06, "loss": 13.2952, "step": 210180 }, { "epoch": 0.4245970983811213, "grad_norm": 587.5383911132812, "learning_rate": 7.185892779587278e-06, "loss": 21.3133, "step": 210190 }, { "epoch": 0.4246172990138051, "grad_norm": 443.3553161621094, "learning_rate": 7.185578833254665e-06, "loss": 14.619, "step": 210200 }, { "epoch": 0.42463749964648895, "grad_norm": 262.3222961425781, "learning_rate": 7.185264876269806e-06, "loss": 22.8874, "step": 210210 }, { "epoch": 0.42465770027917277, "grad_norm": 417.6518859863281, "learning_rate": 7.18495090863423e-06, "loss": 20.7733, "step": 210220 }, { "epoch": 0.42467790091185653, "grad_norm": 900.1268920898438, "learning_rate": 7.184636930349471e-06, "loss": 26.1569, "step": 210230 }, { "epoch": 0.42469810154454035, "grad_norm": 806.3509521484375, "learning_rate": 7.184322941417052e-06, "loss": 21.0926, "step": 210240 }, { "epoch": 0.42471830217722417, "grad_norm": 370.7440185546875, "learning_rate": 7.184008941838512e-06, "loss": 26.7139, "step": 210250 }, { "epoch": 0.424738502809908, "grad_norm": 26.355684280395508, "learning_rate": 7.183694931615374e-06, "loss": 7.9719, "step": 210260 }, { "epoch": 0.4247587034425918, "grad_norm": 629.7391357421875, "learning_rate": 7.183380910749176e-06, "loss": 21.9063, "step": 210270 }, { "epoch": 0.42477890407527563, "grad_norm": 467.73724365234375, "learning_rate": 7.1830668792414405e-06, "loss": 15.0258, "step": 210280 }, { "epoch": 0.42479910470795945, "grad_norm": 182.96475219726562, "learning_rate": 7.182752837093704e-06, "loss": 20.7013, "step": 210290 }, { "epoch": 0.42481930534064327, "grad_norm": 226.529541015625, "learning_rate": 7.182438784307495e-06, "loss": 18.3346, "step": 210300 }, { "epoch": 0.4248395059733271, "grad_norm": 404.8439025878906, "learning_rate": 7.182124720884342e-06, "loss": 13.0509, "step": 210310 }, { "epoch": 0.4248597066060109, "grad_norm": 478.815673828125, "learning_rate": 7.181810646825779e-06, "loss": 17.1866, "step": 210320 }, { "epoch": 0.42487990723869473, "grad_norm": 646.7852783203125, "learning_rate": 7.1814965621333355e-06, "loss": 31.5088, "step": 210330 }, { "epoch": 0.42490010787137855, "grad_norm": 278.8764953613281, "learning_rate": 7.181182466808542e-06, "loss": 20.0878, "step": 210340 }, { "epoch": 0.42492030850406237, "grad_norm": 598.1981811523438, "learning_rate": 7.18086836085293e-06, "loss": 17.7833, "step": 210350 }, { "epoch": 0.42494050913674614, "grad_norm": 44.72614288330078, "learning_rate": 7.18055424426803e-06, "loss": 17.147, "step": 210360 }, { "epoch": 0.42496070976942996, "grad_norm": 136.76609802246094, "learning_rate": 7.180240117055372e-06, "loss": 11.8497, "step": 210370 }, { "epoch": 0.4249809104021138, "grad_norm": 460.1715393066406, "learning_rate": 7.1799259792164914e-06, "loss": 17.4917, "step": 210380 }, { "epoch": 0.4250011110347976, "grad_norm": 305.7075500488281, "learning_rate": 7.179611830752914e-06, "loss": 18.4208, "step": 210390 }, { "epoch": 0.4250213116674814, "grad_norm": 5.594964027404785, "learning_rate": 7.179297671666171e-06, "loss": 8.8133, "step": 210400 }, { "epoch": 0.42504151230016524, "grad_norm": 369.8493347167969, "learning_rate": 7.178983501957798e-06, "loss": 20.4684, "step": 210410 }, { "epoch": 0.42506171293284906, "grad_norm": 89.79129028320312, "learning_rate": 7.178669321629321e-06, "loss": 20.433, "step": 210420 }, { "epoch": 0.4250819135655329, "grad_norm": 602.5833740234375, "learning_rate": 7.178355130682278e-06, "loss": 18.3495, "step": 210430 }, { "epoch": 0.4251021141982167, "grad_norm": 429.21966552734375, "learning_rate": 7.178040929118193e-06, "loss": 94.1416, "step": 210440 }, { "epoch": 0.4251223148309005, "grad_norm": 383.83087158203125, "learning_rate": 7.177726716938602e-06, "loss": 21.4504, "step": 210450 }, { "epoch": 0.42514251546358434, "grad_norm": 571.14892578125, "learning_rate": 7.177412494145035e-06, "loss": 21.3484, "step": 210460 }, { "epoch": 0.42516271609626816, "grad_norm": 1431.5240478515625, "learning_rate": 7.177098260739024e-06, "loss": 15.8618, "step": 210470 }, { "epoch": 0.425182916728952, "grad_norm": 670.5843505859375, "learning_rate": 7.176784016722099e-06, "loss": 17.4151, "step": 210480 }, { "epoch": 0.42520311736163574, "grad_norm": 317.0255432128906, "learning_rate": 7.1764697620957935e-06, "loss": 16.6094, "step": 210490 }, { "epoch": 0.42522331799431956, "grad_norm": 232.52720642089844, "learning_rate": 7.176155496861639e-06, "loss": 22.0382, "step": 210500 }, { "epoch": 0.4252435186270034, "grad_norm": 313.4559020996094, "learning_rate": 7.175841221021165e-06, "loss": 10.5647, "step": 210510 }, { "epoch": 0.4252637192596872, "grad_norm": 479.8163146972656, "learning_rate": 7.175526934575906e-06, "loss": 25.453, "step": 210520 }, { "epoch": 0.425283919892371, "grad_norm": 444.48052978515625, "learning_rate": 7.175212637527391e-06, "loss": 16.5115, "step": 210530 }, { "epoch": 0.42530412052505484, "grad_norm": 647.677734375, "learning_rate": 7.174898329877156e-06, "loss": 20.0601, "step": 210540 }, { "epoch": 0.42532432115773866, "grad_norm": 585.613037109375, "learning_rate": 7.174584011626729e-06, "loss": 17.7973, "step": 210550 }, { "epoch": 0.4253445217904225, "grad_norm": 314.0742492675781, "learning_rate": 7.1742696827776415e-06, "loss": 22.3381, "step": 210560 }, { "epoch": 0.4253647224231063, "grad_norm": 751.9611206054688, "learning_rate": 7.17395534333143e-06, "loss": 29.1542, "step": 210570 }, { "epoch": 0.4253849230557901, "grad_norm": 453.81414794921875, "learning_rate": 7.173640993289621e-06, "loss": 19.3853, "step": 210580 }, { "epoch": 0.42540512368847394, "grad_norm": 48.85789489746094, "learning_rate": 7.173326632653752e-06, "loss": 14.1212, "step": 210590 }, { "epoch": 0.42542532432115776, "grad_norm": 999.33984375, "learning_rate": 7.173012261425352e-06, "loss": 19.0004, "step": 210600 }, { "epoch": 0.4254455249538416, "grad_norm": 519.2509155273438, "learning_rate": 7.172697879605954e-06, "loss": 16.9589, "step": 210610 }, { "epoch": 0.42546572558652535, "grad_norm": 534.8131103515625, "learning_rate": 7.1723834871970885e-06, "loss": 25.6091, "step": 210620 }, { "epoch": 0.42548592621920917, "grad_norm": 212.594482421875, "learning_rate": 7.172069084200291e-06, "loss": 19.7887, "step": 210630 }, { "epoch": 0.425506126851893, "grad_norm": 195.0337677001953, "learning_rate": 7.171754670617093e-06, "loss": 21.1371, "step": 210640 }, { "epoch": 0.4255263274845768, "grad_norm": 1047.99169921875, "learning_rate": 7.171440246449024e-06, "loss": 32.4524, "step": 210650 }, { "epoch": 0.4255465281172606, "grad_norm": 702.249267578125, "learning_rate": 7.171125811697619e-06, "loss": 17.065, "step": 210660 }, { "epoch": 0.42556672874994445, "grad_norm": 239.98944091796875, "learning_rate": 7.1708113663644105e-06, "loss": 27.1717, "step": 210670 }, { "epoch": 0.42558692938262827, "grad_norm": 579.8043823242188, "learning_rate": 7.170496910450932e-06, "loss": 23.0293, "step": 210680 }, { "epoch": 0.4256071300153121, "grad_norm": 227.3568572998047, "learning_rate": 7.170182443958712e-06, "loss": 36.0902, "step": 210690 }, { "epoch": 0.4256273306479959, "grad_norm": 145.62576293945312, "learning_rate": 7.169867966889288e-06, "loss": 26.828, "step": 210700 }, { "epoch": 0.4256475312806797, "grad_norm": 800.6478881835938, "learning_rate": 7.16955347924419e-06, "loss": 31.4572, "step": 210710 }, { "epoch": 0.42566773191336355, "grad_norm": 350.2894287109375, "learning_rate": 7.169238981024952e-06, "loss": 18.8867, "step": 210720 }, { "epoch": 0.42568793254604737, "grad_norm": 293.75341796875, "learning_rate": 7.168924472233107e-06, "loss": 18.7785, "step": 210730 }, { "epoch": 0.4257081331787312, "grad_norm": 168.0781707763672, "learning_rate": 7.168609952870185e-06, "loss": 17.6992, "step": 210740 }, { "epoch": 0.42572833381141495, "grad_norm": 618.8905029296875, "learning_rate": 7.168295422937723e-06, "loss": 19.3183, "step": 210750 }, { "epoch": 0.42574853444409877, "grad_norm": 753.1238403320312, "learning_rate": 7.167980882437251e-06, "loss": 24.5493, "step": 210760 }, { "epoch": 0.4257687350767826, "grad_norm": 360.7587585449219, "learning_rate": 7.167666331370303e-06, "loss": 33.7452, "step": 210770 }, { "epoch": 0.4257889357094664, "grad_norm": 635.6531372070312, "learning_rate": 7.167351769738413e-06, "loss": 9.7641, "step": 210780 }, { "epoch": 0.42580913634215023, "grad_norm": 382.7436828613281, "learning_rate": 7.167037197543112e-06, "loss": 16.3177, "step": 210790 }, { "epoch": 0.42582933697483405, "grad_norm": 212.64776611328125, "learning_rate": 7.166722614785937e-06, "loss": 12.0685, "step": 210800 }, { "epoch": 0.42584953760751787, "grad_norm": 328.70526123046875, "learning_rate": 7.1664080214684176e-06, "loss": 22.4504, "step": 210810 }, { "epoch": 0.4258697382402017, "grad_norm": 444.7970275878906, "learning_rate": 7.166093417592087e-06, "loss": 13.8742, "step": 210820 }, { "epoch": 0.4258899388728855, "grad_norm": 488.2218933105469, "learning_rate": 7.165778803158481e-06, "loss": 15.0398, "step": 210830 }, { "epoch": 0.42591013950556933, "grad_norm": 432.7123718261719, "learning_rate": 7.165464178169133e-06, "loss": 15.9308, "step": 210840 }, { "epoch": 0.42593034013825315, "grad_norm": 310.0197448730469, "learning_rate": 7.1651495426255725e-06, "loss": 18.5798, "step": 210850 }, { "epoch": 0.42595054077093697, "grad_norm": 202.10557556152344, "learning_rate": 7.164834896529338e-06, "loss": 19.17, "step": 210860 }, { "epoch": 0.42597074140362073, "grad_norm": 305.1272277832031, "learning_rate": 7.164520239881958e-06, "loss": 31.638, "step": 210870 }, { "epoch": 0.42599094203630455, "grad_norm": 411.7037353515625, "learning_rate": 7.164205572684971e-06, "loss": 20.7471, "step": 210880 }, { "epoch": 0.4260111426689884, "grad_norm": 108.20954895019531, "learning_rate": 7.163890894939909e-06, "loss": 18.4649, "step": 210890 }, { "epoch": 0.4260313433016722, "grad_norm": 544.68798828125, "learning_rate": 7.1635762066483035e-06, "loss": 17.9135, "step": 210900 }, { "epoch": 0.426051543934356, "grad_norm": 302.5722961425781, "learning_rate": 7.163261507811692e-06, "loss": 26.4492, "step": 210910 }, { "epoch": 0.42607174456703983, "grad_norm": 552.00146484375, "learning_rate": 7.162946798431605e-06, "loss": 21.0426, "step": 210920 }, { "epoch": 0.42609194519972365, "grad_norm": 883.0687866210938, "learning_rate": 7.162632078509578e-06, "loss": 24.7735, "step": 210930 }, { "epoch": 0.4261121458324075, "grad_norm": 166.25155639648438, "learning_rate": 7.162317348047144e-06, "loss": 10.7019, "step": 210940 }, { "epoch": 0.4261323464650913, "grad_norm": 274.6679382324219, "learning_rate": 7.162002607045838e-06, "loss": 16.6136, "step": 210950 }, { "epoch": 0.4261525470977751, "grad_norm": 536.052978515625, "learning_rate": 7.161687855507193e-06, "loss": 12.0968, "step": 210960 }, { "epoch": 0.42617274773045893, "grad_norm": 575.5294189453125, "learning_rate": 7.161373093432745e-06, "loss": 17.3558, "step": 210970 }, { "epoch": 0.42619294836314275, "grad_norm": 120.9773178100586, "learning_rate": 7.161058320824026e-06, "loss": 21.9131, "step": 210980 }, { "epoch": 0.4262131489958266, "grad_norm": 953.031982421875, "learning_rate": 7.160743537682569e-06, "loss": 32.6143, "step": 210990 }, { "epoch": 0.42623334962851034, "grad_norm": 469.15252685546875, "learning_rate": 7.160428744009913e-06, "loss": 13.7952, "step": 211000 }, { "epoch": 0.42625355026119416, "grad_norm": 175.22052001953125, "learning_rate": 7.160113939807587e-06, "loss": 15.7662, "step": 211010 }, { "epoch": 0.426273750893878, "grad_norm": 91.44915008544922, "learning_rate": 7.159799125077129e-06, "loss": 9.9428, "step": 211020 }, { "epoch": 0.4262939515265618, "grad_norm": 388.4784240722656, "learning_rate": 7.159484299820071e-06, "loss": 21.2014, "step": 211030 }, { "epoch": 0.4263141521592456, "grad_norm": 279.7063903808594, "learning_rate": 7.15916946403795e-06, "loss": 20.4712, "step": 211040 }, { "epoch": 0.42633435279192944, "grad_norm": 411.2039794921875, "learning_rate": 7.1588546177322975e-06, "loss": 22.7108, "step": 211050 }, { "epoch": 0.42635455342461326, "grad_norm": 331.2437438964844, "learning_rate": 7.158539760904649e-06, "loss": 14.3297, "step": 211060 }, { "epoch": 0.4263747540572971, "grad_norm": 70.88005065917969, "learning_rate": 7.158224893556541e-06, "loss": 29.7413, "step": 211070 }, { "epoch": 0.4263949546899809, "grad_norm": 229.89979553222656, "learning_rate": 7.157910015689505e-06, "loss": 6.9969, "step": 211080 }, { "epoch": 0.4264151553226647, "grad_norm": 278.6521911621094, "learning_rate": 7.157595127305079e-06, "loss": 41.8598, "step": 211090 }, { "epoch": 0.42643535595534854, "grad_norm": 196.2091064453125, "learning_rate": 7.157280228404796e-06, "loss": 15.4831, "step": 211100 }, { "epoch": 0.42645555658803236, "grad_norm": 130.30809020996094, "learning_rate": 7.15696531899019e-06, "loss": 15.3417, "step": 211110 }, { "epoch": 0.4264757572207162, "grad_norm": 479.2704772949219, "learning_rate": 7.156650399062797e-06, "loss": 21.1902, "step": 211120 }, { "epoch": 0.42649595785339994, "grad_norm": 488.7560119628906, "learning_rate": 7.156335468624151e-06, "loss": 19.0772, "step": 211130 }, { "epoch": 0.42651615848608376, "grad_norm": 377.4590759277344, "learning_rate": 7.156020527675788e-06, "loss": 18.6304, "step": 211140 }, { "epoch": 0.4265363591187676, "grad_norm": 243.23861694335938, "learning_rate": 7.155705576219242e-06, "loss": 11.6741, "step": 211150 }, { "epoch": 0.4265565597514514, "grad_norm": 336.5179748535156, "learning_rate": 7.155390614256048e-06, "loss": 18.4848, "step": 211160 }, { "epoch": 0.4265767603841352, "grad_norm": 153.4668731689453, "learning_rate": 7.1550756417877436e-06, "loss": 26.9486, "step": 211170 }, { "epoch": 0.42659696101681904, "grad_norm": 420.29925537109375, "learning_rate": 7.15476065881586e-06, "loss": 24.4821, "step": 211180 }, { "epoch": 0.42661716164950286, "grad_norm": 197.16571044921875, "learning_rate": 7.154445665341933e-06, "loss": 22.205, "step": 211190 }, { "epoch": 0.4266373622821867, "grad_norm": 552.336669921875, "learning_rate": 7.154130661367503e-06, "loss": 16.2936, "step": 211200 }, { "epoch": 0.4266575629148705, "grad_norm": 732.9390869140625, "learning_rate": 7.1538156468940986e-06, "loss": 30.0069, "step": 211210 }, { "epoch": 0.4266777635475543, "grad_norm": 412.35076904296875, "learning_rate": 7.15350062192326e-06, "loss": 11.5509, "step": 211220 }, { "epoch": 0.42669796418023814, "grad_norm": 425.7041015625, "learning_rate": 7.153185586456518e-06, "loss": 17.0695, "step": 211230 }, { "epoch": 0.42671816481292196, "grad_norm": 22.26059341430664, "learning_rate": 7.152870540495413e-06, "loss": 15.4676, "step": 211240 }, { "epoch": 0.4267383654456058, "grad_norm": 94.48267364501953, "learning_rate": 7.1525554840414765e-06, "loss": 17.9319, "step": 211250 }, { "epoch": 0.42675856607828955, "grad_norm": 156.0415496826172, "learning_rate": 7.152240417096247e-06, "loss": 17.3309, "step": 211260 }, { "epoch": 0.42677876671097337, "grad_norm": 350.1610107421875, "learning_rate": 7.151925339661256e-06, "loss": 27.3246, "step": 211270 }, { "epoch": 0.4267989673436572, "grad_norm": 381.147705078125, "learning_rate": 7.151610251738045e-06, "loss": 12.8145, "step": 211280 }, { "epoch": 0.426819167976341, "grad_norm": 291.07183837890625, "learning_rate": 7.151295153328146e-06, "loss": 17.6673, "step": 211290 }, { "epoch": 0.4268393686090248, "grad_norm": 319.807373046875, "learning_rate": 7.150980044433094e-06, "loss": 39.6193, "step": 211300 }, { "epoch": 0.42685956924170865, "grad_norm": 845.2122802734375, "learning_rate": 7.150664925054427e-06, "loss": 21.6314, "step": 211310 }, { "epoch": 0.42687976987439247, "grad_norm": 166.87449645996094, "learning_rate": 7.1503497951936794e-06, "loss": 28.2948, "step": 211320 }, { "epoch": 0.4268999705070763, "grad_norm": 449.9621887207031, "learning_rate": 7.1500346548523894e-06, "loss": 34.1296, "step": 211330 }, { "epoch": 0.4269201711397601, "grad_norm": 339.46661376953125, "learning_rate": 7.14971950403209e-06, "loss": 9.2132, "step": 211340 }, { "epoch": 0.4269403717724439, "grad_norm": 417.22430419921875, "learning_rate": 7.149404342734317e-06, "loss": 25.15, "step": 211350 }, { "epoch": 0.42696057240512775, "grad_norm": 607.6698608398438, "learning_rate": 7.14908917096061e-06, "loss": 19.1785, "step": 211360 }, { "epoch": 0.42698077303781157, "grad_norm": 250.4014129638672, "learning_rate": 7.148773988712503e-06, "loss": 23.7206, "step": 211370 }, { "epoch": 0.4270009736704954, "grad_norm": 21.49866485595703, "learning_rate": 7.148458795991531e-06, "loss": 21.4286, "step": 211380 }, { "epoch": 0.42702117430317915, "grad_norm": 386.2162170410156, "learning_rate": 7.148143592799232e-06, "loss": 15.9567, "step": 211390 }, { "epoch": 0.42704137493586297, "grad_norm": 230.6778106689453, "learning_rate": 7.1478283791371415e-06, "loss": 36.8292, "step": 211400 }, { "epoch": 0.4270615755685468, "grad_norm": 367.3896789550781, "learning_rate": 7.147513155006798e-06, "loss": 17.0131, "step": 211410 }, { "epoch": 0.4270817762012306, "grad_norm": 784.0731201171875, "learning_rate": 7.147197920409733e-06, "loss": 24.7425, "step": 211420 }, { "epoch": 0.42710197683391443, "grad_norm": 171.8694305419922, "learning_rate": 7.146882675347486e-06, "loss": 18.0884, "step": 211430 }, { "epoch": 0.42712217746659825, "grad_norm": 596.0709838867188, "learning_rate": 7.146567419821595e-06, "loss": 26.5218, "step": 211440 }, { "epoch": 0.42714237809928207, "grad_norm": 591.632080078125, "learning_rate": 7.146252153833594e-06, "loss": 33.4292, "step": 211450 }, { "epoch": 0.4271625787319659, "grad_norm": 115.9808578491211, "learning_rate": 7.145936877385019e-06, "loss": 28.2537, "step": 211460 }, { "epoch": 0.4271827793646497, "grad_norm": 542.8873901367188, "learning_rate": 7.145621590477409e-06, "loss": 17.5467, "step": 211470 }, { "epoch": 0.42720297999733353, "grad_norm": 87.06926727294922, "learning_rate": 7.1453062931123e-06, "loss": 10.7627, "step": 211480 }, { "epoch": 0.42722318063001735, "grad_norm": 290.7402648925781, "learning_rate": 7.144990985291228e-06, "loss": 20.0356, "step": 211490 }, { "epoch": 0.42724338126270117, "grad_norm": 271.3774108886719, "learning_rate": 7.1446756670157306e-06, "loss": 22.2469, "step": 211500 }, { "epoch": 0.42726358189538494, "grad_norm": 615.0104370117188, "learning_rate": 7.144360338287343e-06, "loss": 20.2854, "step": 211510 }, { "epoch": 0.42728378252806876, "grad_norm": 226.7115478515625, "learning_rate": 7.1440449991076045e-06, "loss": 25.3044, "step": 211520 }, { "epoch": 0.4273039831607526, "grad_norm": 281.998046875, "learning_rate": 7.143729649478049e-06, "loss": 25.6546, "step": 211530 }, { "epoch": 0.4273241837934364, "grad_norm": 300.0990905761719, "learning_rate": 7.143414289400217e-06, "loss": 11.4684, "step": 211540 }, { "epoch": 0.4273443844261202, "grad_norm": 495.08087158203125, "learning_rate": 7.143098918875643e-06, "loss": 13.4182, "step": 211550 }, { "epoch": 0.42736458505880404, "grad_norm": 566.4196166992188, "learning_rate": 7.142783537905864e-06, "loss": 16.3631, "step": 211560 }, { "epoch": 0.42738478569148786, "grad_norm": 490.5919494628906, "learning_rate": 7.1424681464924185e-06, "loss": 25.3629, "step": 211570 }, { "epoch": 0.4274049863241717, "grad_norm": 836.9344482421875, "learning_rate": 7.142152744636843e-06, "loss": 30.2841, "step": 211580 }, { "epoch": 0.4274251869568555, "grad_norm": 356.6913146972656, "learning_rate": 7.141837332340675e-06, "loss": 22.8689, "step": 211590 }, { "epoch": 0.4274453875895393, "grad_norm": 977.859130859375, "learning_rate": 7.141521909605452e-06, "loss": 34.7252, "step": 211600 }, { "epoch": 0.42746558822222314, "grad_norm": 822.9716186523438, "learning_rate": 7.141206476432711e-06, "loss": 21.0304, "step": 211610 }, { "epoch": 0.42748578885490696, "grad_norm": 24.67584228515625, "learning_rate": 7.140891032823989e-06, "loss": 25.4088, "step": 211620 }, { "epoch": 0.4275059894875908, "grad_norm": 194.58030700683594, "learning_rate": 7.140575578780824e-06, "loss": 26.2878, "step": 211630 }, { "epoch": 0.42752619012027454, "grad_norm": 323.7582702636719, "learning_rate": 7.1402601143047514e-06, "loss": 26.4877, "step": 211640 }, { "epoch": 0.42754639075295836, "grad_norm": 456.35797119140625, "learning_rate": 7.139944639397313e-06, "loss": 24.8844, "step": 211650 }, { "epoch": 0.4275665913856422, "grad_norm": 201.45193481445312, "learning_rate": 7.1396291540600435e-06, "loss": 29.4343, "step": 211660 }, { "epoch": 0.427586792018326, "grad_norm": 303.1866455078125, "learning_rate": 7.13931365829448e-06, "loss": 21.7534, "step": 211670 }, { "epoch": 0.4276069926510098, "grad_norm": 368.20196533203125, "learning_rate": 7.138998152102162e-06, "loss": 17.7064, "step": 211680 }, { "epoch": 0.42762719328369364, "grad_norm": 306.7176513671875, "learning_rate": 7.138682635484626e-06, "loss": 21.6391, "step": 211690 }, { "epoch": 0.42764739391637746, "grad_norm": 1134.7449951171875, "learning_rate": 7.138367108443411e-06, "loss": 18.8113, "step": 211700 }, { "epoch": 0.4276675945490613, "grad_norm": 337.86138916015625, "learning_rate": 7.138051570980053e-06, "loss": 19.6044, "step": 211710 }, { "epoch": 0.4276877951817451, "grad_norm": 422.91522216796875, "learning_rate": 7.137736023096091e-06, "loss": 20.632, "step": 211720 }, { "epoch": 0.4277079958144289, "grad_norm": 871.5953979492188, "learning_rate": 7.1374204647930636e-06, "loss": 29.7215, "step": 211730 }, { "epoch": 0.42772819644711274, "grad_norm": 688.8816528320312, "learning_rate": 7.137104896072508e-06, "loss": 22.6805, "step": 211740 }, { "epoch": 0.42774839707979656, "grad_norm": 27.244319915771484, "learning_rate": 7.1367893169359636e-06, "loss": 13.1682, "step": 211750 }, { "epoch": 0.4277685977124804, "grad_norm": 232.0713653564453, "learning_rate": 7.136473727384965e-06, "loss": 27.5891, "step": 211760 }, { "epoch": 0.42778879834516415, "grad_norm": 317.5654602050781, "learning_rate": 7.136158127421053e-06, "loss": 20.855, "step": 211770 }, { "epoch": 0.42780899897784797, "grad_norm": 3.613424777984619, "learning_rate": 7.1358425170457655e-06, "loss": 12.7244, "step": 211780 }, { "epoch": 0.4278291996105318, "grad_norm": 705.9700927734375, "learning_rate": 7.135526896260643e-06, "loss": 23.082, "step": 211790 }, { "epoch": 0.4278494002432156, "grad_norm": 416.4897766113281, "learning_rate": 7.135211265067217e-06, "loss": 10.2001, "step": 211800 }, { "epoch": 0.4278696008758994, "grad_norm": 384.7234802246094, "learning_rate": 7.1348956234670345e-06, "loss": 19.6609, "step": 211810 }, { "epoch": 0.42788980150858325, "grad_norm": 73.60569763183594, "learning_rate": 7.134579971461627e-06, "loss": 19.9937, "step": 211820 }, { "epoch": 0.42791000214126707, "grad_norm": 281.4009094238281, "learning_rate": 7.134264309052537e-06, "loss": 17.6055, "step": 211830 }, { "epoch": 0.4279302027739509, "grad_norm": 114.01895904541016, "learning_rate": 7.1339486362413005e-06, "loss": 17.1725, "step": 211840 }, { "epoch": 0.4279504034066347, "grad_norm": 450.64923095703125, "learning_rate": 7.133632953029457e-06, "loss": 9.1914, "step": 211850 }, { "epoch": 0.4279706040393185, "grad_norm": 319.7637023925781, "learning_rate": 7.133317259418546e-06, "loss": 27.361, "step": 211860 }, { "epoch": 0.42799080467200235, "grad_norm": 377.401611328125, "learning_rate": 7.133001555410106e-06, "loss": 20.2308, "step": 211870 }, { "epoch": 0.42801100530468617, "grad_norm": 696.2091064453125, "learning_rate": 7.132685841005674e-06, "loss": 18.5949, "step": 211880 }, { "epoch": 0.42803120593737, "grad_norm": 883.6140747070312, "learning_rate": 7.1323701162067905e-06, "loss": 32.1629, "step": 211890 }, { "epoch": 0.42805140657005375, "grad_norm": 539.1190185546875, "learning_rate": 7.1320543810149945e-06, "loss": 19.7657, "step": 211900 }, { "epoch": 0.42807160720273757, "grad_norm": 252.74114990234375, "learning_rate": 7.131738635431822e-06, "loss": 25.9459, "step": 211910 }, { "epoch": 0.4280918078354214, "grad_norm": 230.0977020263672, "learning_rate": 7.131422879458815e-06, "loss": 15.9849, "step": 211920 }, { "epoch": 0.4281120084681052, "grad_norm": 1002.0272216796875, "learning_rate": 7.131107113097512e-06, "loss": 25.0592, "step": 211930 }, { "epoch": 0.42813220910078903, "grad_norm": 390.1118469238281, "learning_rate": 7.13079133634945e-06, "loss": 17.789, "step": 211940 }, { "epoch": 0.42815240973347285, "grad_norm": 1.0614250898361206, "learning_rate": 7.130475549216171e-06, "loss": 14.2543, "step": 211950 }, { "epoch": 0.42817261036615667, "grad_norm": 644.6792602539062, "learning_rate": 7.130159751699211e-06, "loss": 19.5148, "step": 211960 }, { "epoch": 0.4281928109988405, "grad_norm": 489.6703796386719, "learning_rate": 7.129843943800112e-06, "loss": 23.6921, "step": 211970 }, { "epoch": 0.4282130116315243, "grad_norm": 54.380401611328125, "learning_rate": 7.129528125520411e-06, "loss": 23.5899, "step": 211980 }, { "epoch": 0.42823321226420813, "grad_norm": 252.82147216796875, "learning_rate": 7.129212296861649e-06, "loss": 13.7989, "step": 211990 }, { "epoch": 0.42825341289689195, "grad_norm": 300.24127197265625, "learning_rate": 7.128896457825364e-06, "loss": 24.0652, "step": 212000 }, { "epoch": 0.42827361352957577, "grad_norm": 414.4104309082031, "learning_rate": 7.128580608413096e-06, "loss": 16.4752, "step": 212010 }, { "epoch": 0.4282938141622596, "grad_norm": 400.53228759765625, "learning_rate": 7.128264748626385e-06, "loss": 12.5373, "step": 212020 }, { "epoch": 0.42831401479494335, "grad_norm": 431.4949951171875, "learning_rate": 7.127948878466768e-06, "loss": 21.7802, "step": 212030 }, { "epoch": 0.4283342154276272, "grad_norm": 115.11209869384766, "learning_rate": 7.127632997935787e-06, "loss": 24.0653, "step": 212040 }, { "epoch": 0.428354416060311, "grad_norm": 335.8630065917969, "learning_rate": 7.127317107034982e-06, "loss": 20.3889, "step": 212050 }, { "epoch": 0.4283746166929948, "grad_norm": 1286.7281494140625, "learning_rate": 7.12700120576589e-06, "loss": 29.3023, "step": 212060 }, { "epoch": 0.42839481732567863, "grad_norm": 167.96475219726562, "learning_rate": 7.126685294130053e-06, "loss": 12.2335, "step": 212070 }, { "epoch": 0.42841501795836245, "grad_norm": 413.0950622558594, "learning_rate": 7.126369372129009e-06, "loss": 25.9105, "step": 212080 }, { "epoch": 0.4284352185910463, "grad_norm": 5.152590274810791, "learning_rate": 7.1260534397643e-06, "loss": 6.9625, "step": 212090 }, { "epoch": 0.4284554192237301, "grad_norm": 264.45416259765625, "learning_rate": 7.125737497037464e-06, "loss": 11.4568, "step": 212100 }, { "epoch": 0.4284756198564139, "grad_norm": 473.06207275390625, "learning_rate": 7.125421543950039e-06, "loss": 13.1764, "step": 212110 }, { "epoch": 0.42849582048909773, "grad_norm": 243.21975708007812, "learning_rate": 7.12510558050357e-06, "loss": 30.7551, "step": 212120 }, { "epoch": 0.42851602112178155, "grad_norm": 164.69775390625, "learning_rate": 7.124789606699594e-06, "loss": 17.1487, "step": 212130 }, { "epoch": 0.4285362217544654, "grad_norm": 1040.53369140625, "learning_rate": 7.1244736225396485e-06, "loss": 22.3604, "step": 212140 }, { "epoch": 0.42855642238714914, "grad_norm": 641.1317138671875, "learning_rate": 7.124157628025279e-06, "loss": 28.1907, "step": 212150 }, { "epoch": 0.42857662301983296, "grad_norm": 159.34039306640625, "learning_rate": 7.123841623158021e-06, "loss": 26.2785, "step": 212160 }, { "epoch": 0.4285968236525168, "grad_norm": 812.2888793945312, "learning_rate": 7.123525607939418e-06, "loss": 30.7258, "step": 212170 }, { "epoch": 0.4286170242852006, "grad_norm": 245.14108276367188, "learning_rate": 7.1232095823710064e-06, "loss": 10.38, "step": 212180 }, { "epoch": 0.4286372249178844, "grad_norm": 109.20542907714844, "learning_rate": 7.1228935464543325e-06, "loss": 15.28, "step": 212190 }, { "epoch": 0.42865742555056824, "grad_norm": 442.0393371582031, "learning_rate": 7.12257750019093e-06, "loss": 24.887, "step": 212200 }, { "epoch": 0.42867762618325206, "grad_norm": 99.32170104980469, "learning_rate": 7.122261443582343e-06, "loss": 23.0774, "step": 212210 }, { "epoch": 0.4286978268159359, "grad_norm": 324.8145751953125, "learning_rate": 7.12194537663011e-06, "loss": 26.7955, "step": 212220 }, { "epoch": 0.4287180274486197, "grad_norm": 288.45196533203125, "learning_rate": 7.121629299335775e-06, "loss": 24.4507, "step": 212230 }, { "epoch": 0.4287382280813035, "grad_norm": 0.0, "learning_rate": 7.121313211700875e-06, "loss": 16.4163, "step": 212240 }, { "epoch": 0.42875842871398734, "grad_norm": 152.95367431640625, "learning_rate": 7.120997113726951e-06, "loss": 12.9391, "step": 212250 }, { "epoch": 0.42877862934667116, "grad_norm": 65.94784545898438, "learning_rate": 7.120681005415546e-06, "loss": 13.3712, "step": 212260 }, { "epoch": 0.428798829979355, "grad_norm": 579.9019165039062, "learning_rate": 7.120364886768197e-06, "loss": 16.0123, "step": 212270 }, { "epoch": 0.42881903061203874, "grad_norm": 595.8623046875, "learning_rate": 7.120048757786448e-06, "loss": 33.4327, "step": 212280 }, { "epoch": 0.42883923124472256, "grad_norm": 543.7451782226562, "learning_rate": 7.119732618471838e-06, "loss": 37.4489, "step": 212290 }, { "epoch": 0.4288594318774064, "grad_norm": 290.9337463378906, "learning_rate": 7.119416468825908e-06, "loss": 30.5408, "step": 212300 }, { "epoch": 0.4288796325100902, "grad_norm": 342.37957763671875, "learning_rate": 7.119100308850201e-06, "loss": 36.2474, "step": 212310 }, { "epoch": 0.428899833142774, "grad_norm": 753.0562744140625, "learning_rate": 7.118784138546254e-06, "loss": 14.3913, "step": 212320 }, { "epoch": 0.42892003377545784, "grad_norm": 6.931411266326904, "learning_rate": 7.1184679579156115e-06, "loss": 29.2693, "step": 212330 }, { "epoch": 0.42894023440814166, "grad_norm": 412.5704040527344, "learning_rate": 7.118151766959811e-06, "loss": 25.7069, "step": 212340 }, { "epoch": 0.4289604350408255, "grad_norm": 226.2316131591797, "learning_rate": 7.117835565680399e-06, "loss": 24.1271, "step": 212350 }, { "epoch": 0.4289806356735093, "grad_norm": 186.11773681640625, "learning_rate": 7.11751935407891e-06, "loss": 23.0482, "step": 212360 }, { "epoch": 0.4290008363061931, "grad_norm": 198.1680145263672, "learning_rate": 7.11720313215689e-06, "loss": 23.9003, "step": 212370 }, { "epoch": 0.42902103693887694, "grad_norm": 295.8449401855469, "learning_rate": 7.116886899915879e-06, "loss": 26.7899, "step": 212380 }, { "epoch": 0.42904123757156076, "grad_norm": 847.6760864257812, "learning_rate": 7.116570657357418e-06, "loss": 33.5373, "step": 212390 }, { "epoch": 0.4290614382042446, "grad_norm": 423.29107666015625, "learning_rate": 7.116254404483049e-06, "loss": 11.6476, "step": 212400 }, { "epoch": 0.42908163883692835, "grad_norm": 222.54244995117188, "learning_rate": 7.115938141294309e-06, "loss": 31.9477, "step": 212410 }, { "epoch": 0.42910183946961217, "grad_norm": 274.11883544921875, "learning_rate": 7.1156218677927465e-06, "loss": 23.6792, "step": 212420 }, { "epoch": 0.429122040102296, "grad_norm": 451.3059387207031, "learning_rate": 7.115305583979899e-06, "loss": 15.9437, "step": 212430 }, { "epoch": 0.4291422407349798, "grad_norm": 12.71606731414795, "learning_rate": 7.114989289857308e-06, "loss": 26.5223, "step": 212440 }, { "epoch": 0.4291624413676636, "grad_norm": 350.2530517578125, "learning_rate": 7.114672985426516e-06, "loss": 26.1803, "step": 212450 }, { "epoch": 0.42918264200034745, "grad_norm": 310.4293518066406, "learning_rate": 7.114356670689065e-06, "loss": 11.0326, "step": 212460 }, { "epoch": 0.42920284263303127, "grad_norm": 361.52862548828125, "learning_rate": 7.114040345646497e-06, "loss": 23.1904, "step": 212470 }, { "epoch": 0.4292230432657151, "grad_norm": 143.16583251953125, "learning_rate": 7.113724010300351e-06, "loss": 21.7281, "step": 212480 }, { "epoch": 0.4292432438983989, "grad_norm": 404.5968017578125, "learning_rate": 7.113407664652171e-06, "loss": 13.2178, "step": 212490 }, { "epoch": 0.4292634445310827, "grad_norm": 77.86985778808594, "learning_rate": 7.113091308703498e-06, "loss": 17.3989, "step": 212500 }, { "epoch": 0.42928364516376655, "grad_norm": 38.63712692260742, "learning_rate": 7.1127749424558755e-06, "loss": 20.7504, "step": 212510 }, { "epoch": 0.42930384579645037, "grad_norm": 424.34197998046875, "learning_rate": 7.112458565910841e-06, "loss": 11.3512, "step": 212520 }, { "epoch": 0.4293240464291342, "grad_norm": 761.510986328125, "learning_rate": 7.112142179069943e-06, "loss": 25.2012, "step": 212530 }, { "epoch": 0.42934424706181795, "grad_norm": 1095.1092529296875, "learning_rate": 7.111825781934719e-06, "loss": 25.1316, "step": 212540 }, { "epoch": 0.42936444769450177, "grad_norm": 602.100341796875, "learning_rate": 7.111509374506712e-06, "loss": 22.8112, "step": 212550 }, { "epoch": 0.4293846483271856, "grad_norm": 318.4169921875, "learning_rate": 7.111192956787466e-06, "loss": 12.117, "step": 212560 }, { "epoch": 0.4294048489598694, "grad_norm": 289.4682922363281, "learning_rate": 7.1108765287785185e-06, "loss": 20.1972, "step": 212570 }, { "epoch": 0.42942504959255323, "grad_norm": 292.43341064453125, "learning_rate": 7.110560090481418e-06, "loss": 19.0301, "step": 212580 }, { "epoch": 0.42944525022523705, "grad_norm": 341.2841491699219, "learning_rate": 7.1102436418977e-06, "loss": 36.331, "step": 212590 }, { "epoch": 0.42946545085792087, "grad_norm": 897.2501220703125, "learning_rate": 7.1099271830289155e-06, "loss": 28.4771, "step": 212600 }, { "epoch": 0.4294856514906047, "grad_norm": 22.196765899658203, "learning_rate": 7.109610713876598e-06, "loss": 21.6898, "step": 212610 }, { "epoch": 0.4295058521232885, "grad_norm": 151.3219451904297, "learning_rate": 7.109294234442294e-06, "loss": 22.8881, "step": 212620 }, { "epoch": 0.42952605275597233, "grad_norm": 210.76492309570312, "learning_rate": 7.108977744727548e-06, "loss": 19.365, "step": 212630 }, { "epoch": 0.42954625338865615, "grad_norm": 822.7898559570312, "learning_rate": 7.108661244733899e-06, "loss": 22.5107, "step": 212640 }, { "epoch": 0.42956645402133997, "grad_norm": 119.3055648803711, "learning_rate": 7.10834473446289e-06, "loss": 22.6131, "step": 212650 }, { "epoch": 0.42958665465402374, "grad_norm": 1025.1552734375, "learning_rate": 7.108028213916065e-06, "loss": 41.4985, "step": 212660 }, { "epoch": 0.42960685528670756, "grad_norm": 417.2041931152344, "learning_rate": 7.107711683094966e-06, "loss": 18.0833, "step": 212670 }, { "epoch": 0.4296270559193914, "grad_norm": 419.1497497558594, "learning_rate": 7.107395142001135e-06, "loss": 25.933, "step": 212680 }, { "epoch": 0.4296472565520752, "grad_norm": 1319.3250732421875, "learning_rate": 7.107078590636118e-06, "loss": 27.9086, "step": 212690 }, { "epoch": 0.429667457184759, "grad_norm": 900.4307250976562, "learning_rate": 7.106762029001455e-06, "loss": 11.1124, "step": 212700 }, { "epoch": 0.42968765781744284, "grad_norm": 374.51129150390625, "learning_rate": 7.106445457098687e-06, "loss": 15.3615, "step": 212710 }, { "epoch": 0.42970785845012666, "grad_norm": 485.9091796875, "learning_rate": 7.106128874929364e-06, "loss": 24.1599, "step": 212720 }, { "epoch": 0.4297280590828105, "grad_norm": 438.72784423828125, "learning_rate": 7.10581228249502e-06, "loss": 17.5406, "step": 212730 }, { "epoch": 0.4297482597154943, "grad_norm": 211.18467712402344, "learning_rate": 7.105495679797203e-06, "loss": 24.0315, "step": 212740 }, { "epoch": 0.4297684603481781, "grad_norm": 250.50799560546875, "learning_rate": 7.105179066837456e-06, "loss": 16.8138, "step": 212750 }, { "epoch": 0.42978866098086194, "grad_norm": 179.66966247558594, "learning_rate": 7.104862443617322e-06, "loss": 14.6905, "step": 212760 }, { "epoch": 0.42980886161354576, "grad_norm": 509.5496826171875, "learning_rate": 7.104545810138343e-06, "loss": 16.3801, "step": 212770 }, { "epoch": 0.4298290622462296, "grad_norm": 397.1598205566406, "learning_rate": 7.1042291664020635e-06, "loss": 22.1303, "step": 212780 }, { "epoch": 0.42984926287891334, "grad_norm": 522.1380004882812, "learning_rate": 7.103912512410025e-06, "loss": 23.0236, "step": 212790 }, { "epoch": 0.42986946351159716, "grad_norm": 464.6538391113281, "learning_rate": 7.103595848163775e-06, "loss": 24.2334, "step": 212800 }, { "epoch": 0.429889664144281, "grad_norm": 73.5645523071289, "learning_rate": 7.103279173664851e-06, "loss": 17.8165, "step": 212810 }, { "epoch": 0.4299098647769648, "grad_norm": 469.95623779296875, "learning_rate": 7.1029624889148005e-06, "loss": 16.158, "step": 212820 }, { "epoch": 0.4299300654096486, "grad_norm": 427.65716552734375, "learning_rate": 7.102645793915166e-06, "loss": 28.5236, "step": 212830 }, { "epoch": 0.42995026604233244, "grad_norm": 367.964111328125, "learning_rate": 7.10232908866749e-06, "loss": 29.3951, "step": 212840 }, { "epoch": 0.42997046667501626, "grad_norm": 376.3992614746094, "learning_rate": 7.102012373173319e-06, "loss": 27.3425, "step": 212850 }, { "epoch": 0.4299906673077001, "grad_norm": 258.5666809082031, "learning_rate": 7.101695647434193e-06, "loss": 16.5746, "step": 212860 }, { "epoch": 0.4300108679403839, "grad_norm": 212.1715087890625, "learning_rate": 7.101378911451659e-06, "loss": 12.4041, "step": 212870 }, { "epoch": 0.4300310685730677, "grad_norm": 590.6844482421875, "learning_rate": 7.101062165227257e-06, "loss": 20.8692, "step": 212880 }, { "epoch": 0.43005126920575154, "grad_norm": 489.03302001953125, "learning_rate": 7.100745408762534e-06, "loss": 14.9576, "step": 212890 }, { "epoch": 0.43007146983843536, "grad_norm": 1098.0113525390625, "learning_rate": 7.100428642059033e-06, "loss": 29.7588, "step": 212900 }, { "epoch": 0.4300916704711192, "grad_norm": 201.09103393554688, "learning_rate": 7.100111865118295e-06, "loss": 23.0751, "step": 212910 }, { "epoch": 0.43011187110380295, "grad_norm": 607.2546997070312, "learning_rate": 7.099795077941869e-06, "loss": 26.2209, "step": 212920 }, { "epoch": 0.43013207173648677, "grad_norm": 51.52604675292969, "learning_rate": 7.099478280531296e-06, "loss": 10.3928, "step": 212930 }, { "epoch": 0.4301522723691706, "grad_norm": 291.2106018066406, "learning_rate": 7.0991614728881205e-06, "loss": 24.6395, "step": 212940 }, { "epoch": 0.4301724730018544, "grad_norm": 292.82940673828125, "learning_rate": 7.098844655013886e-06, "loss": 19.3948, "step": 212950 }, { "epoch": 0.4301926736345382, "grad_norm": 143.66151428222656, "learning_rate": 7.098527826910138e-06, "loss": 46.2533, "step": 212960 }, { "epoch": 0.43021287426722205, "grad_norm": 370.6066589355469, "learning_rate": 7.09821098857842e-06, "loss": 22.9495, "step": 212970 }, { "epoch": 0.43023307489990587, "grad_norm": 434.4966125488281, "learning_rate": 7.097894140020276e-06, "loss": 36.5388, "step": 212980 }, { "epoch": 0.4302532755325897, "grad_norm": 313.8411865234375, "learning_rate": 7.097577281237249e-06, "loss": 17.4724, "step": 212990 }, { "epoch": 0.4302734761652735, "grad_norm": 517.5624389648438, "learning_rate": 7.0972604122308865e-06, "loss": 19.9412, "step": 213000 }, { "epoch": 0.4302936767979573, "grad_norm": 317.34381103515625, "learning_rate": 7.096943533002732e-06, "loss": 17.9368, "step": 213010 }, { "epoch": 0.43031387743064115, "grad_norm": 239.87741088867188, "learning_rate": 7.096626643554325e-06, "loss": 20.8212, "step": 213020 }, { "epoch": 0.43033407806332497, "grad_norm": 402.55206298828125, "learning_rate": 7.0963097438872175e-06, "loss": 41.616, "step": 213030 }, { "epoch": 0.4303542786960088, "grad_norm": 178.4177703857422, "learning_rate": 7.09599283400295e-06, "loss": 35.7992, "step": 213040 }, { "epoch": 0.43037447932869255, "grad_norm": 157.42201232910156, "learning_rate": 7.095675913903067e-06, "loss": 20.6485, "step": 213050 }, { "epoch": 0.43039467996137637, "grad_norm": 179.64877319335938, "learning_rate": 7.095358983589115e-06, "loss": 10.9177, "step": 213060 }, { "epoch": 0.4304148805940602, "grad_norm": 318.3880310058594, "learning_rate": 7.095042043062635e-06, "loss": 14.0704, "step": 213070 }, { "epoch": 0.430435081226744, "grad_norm": 198.04489135742188, "learning_rate": 7.094725092325177e-06, "loss": 28.2684, "step": 213080 }, { "epoch": 0.43045528185942783, "grad_norm": 610.4573974609375, "learning_rate": 7.094408131378281e-06, "loss": 21.1807, "step": 213090 }, { "epoch": 0.43047548249211165, "grad_norm": 487.8459167480469, "learning_rate": 7.094091160223493e-06, "loss": 13.2935, "step": 213100 }, { "epoch": 0.43049568312479547, "grad_norm": 259.0301818847656, "learning_rate": 7.093774178862361e-06, "loss": 13.0684, "step": 213110 }, { "epoch": 0.4305158837574793, "grad_norm": 276.094970703125, "learning_rate": 7.0934571872964265e-06, "loss": 32.0942, "step": 213120 }, { "epoch": 0.4305360843901631, "grad_norm": 206.2740020751953, "learning_rate": 7.093140185527236e-06, "loss": 19.2888, "step": 213130 }, { "epoch": 0.43055628502284693, "grad_norm": 381.5347595214844, "learning_rate": 7.092823173556333e-06, "loss": 20.5193, "step": 213140 }, { "epoch": 0.43057648565553075, "grad_norm": 174.14508056640625, "learning_rate": 7.092506151385265e-06, "loss": 16.89, "step": 213150 }, { "epoch": 0.43059668628821457, "grad_norm": 175.1168975830078, "learning_rate": 7.092189119015575e-06, "loss": 14.0268, "step": 213160 }, { "epoch": 0.4306168869208984, "grad_norm": 184.75022888183594, "learning_rate": 7.09187207644881e-06, "loss": 21.9329, "step": 213170 }, { "epoch": 0.43063708755358215, "grad_norm": 201.6131591796875, "learning_rate": 7.091555023686512e-06, "loss": 32.6859, "step": 213180 }, { "epoch": 0.430657288186266, "grad_norm": 438.4466247558594, "learning_rate": 7.091237960730231e-06, "loss": 21.0089, "step": 213190 }, { "epoch": 0.4306774888189498, "grad_norm": 287.1460876464844, "learning_rate": 7.090920887581507e-06, "loss": 21.9322, "step": 213200 }, { "epoch": 0.4306976894516336, "grad_norm": 270.5567321777344, "learning_rate": 7.090603804241892e-06, "loss": 14.682, "step": 213210 }, { "epoch": 0.43071789008431743, "grad_norm": 1029.9796142578125, "learning_rate": 7.090286710712925e-06, "loss": 34.4511, "step": 213220 }, { "epoch": 0.43073809071700125, "grad_norm": 413.9538879394531, "learning_rate": 7.089969606996155e-06, "loss": 9.93, "step": 213230 }, { "epoch": 0.4307582913496851, "grad_norm": 948.9886474609375, "learning_rate": 7.089652493093127e-06, "loss": 36.8057, "step": 213240 }, { "epoch": 0.4307784919823689, "grad_norm": 321.8413391113281, "learning_rate": 7.0893353690053845e-06, "loss": 16.7714, "step": 213250 }, { "epoch": 0.4307986926150527, "grad_norm": 447.0562744140625, "learning_rate": 7.089018234734476e-06, "loss": 15.2927, "step": 213260 }, { "epoch": 0.43081889324773653, "grad_norm": 176.10293579101562, "learning_rate": 7.088701090281945e-06, "loss": 11.8446, "step": 213270 }, { "epoch": 0.43083909388042035, "grad_norm": 560.9588012695312, "learning_rate": 7.088383935649339e-06, "loss": 21.9758, "step": 213280 }, { "epoch": 0.4308592945131042, "grad_norm": 1164.3033447265625, "learning_rate": 7.088066770838204e-06, "loss": 21.1519, "step": 213290 }, { "epoch": 0.43087949514578794, "grad_norm": 766.023681640625, "learning_rate": 7.087749595850084e-06, "loss": 28.1324, "step": 213300 }, { "epoch": 0.43089969577847176, "grad_norm": 567.0196533203125, "learning_rate": 7.087432410686526e-06, "loss": 23.1312, "step": 213310 }, { "epoch": 0.4309198964111556, "grad_norm": 62.32025146484375, "learning_rate": 7.087115215349074e-06, "loss": 19.6072, "step": 213320 }, { "epoch": 0.4309400970438394, "grad_norm": 195.3494873046875, "learning_rate": 7.086798009839278e-06, "loss": 11.3675, "step": 213330 }, { "epoch": 0.4309602976765232, "grad_norm": 226.8108673095703, "learning_rate": 7.086480794158679e-06, "loss": 15.9095, "step": 213340 }, { "epoch": 0.43098049830920704, "grad_norm": 442.96417236328125, "learning_rate": 7.086163568308828e-06, "loss": 12.637, "step": 213350 }, { "epoch": 0.43100069894189086, "grad_norm": 369.91412353515625, "learning_rate": 7.085846332291267e-06, "loss": 32.5252, "step": 213360 }, { "epoch": 0.4310208995745747, "grad_norm": 833.627685546875, "learning_rate": 7.085529086107545e-06, "loss": 27.3388, "step": 213370 }, { "epoch": 0.4310411002072585, "grad_norm": 211.1463623046875, "learning_rate": 7.085211829759207e-06, "loss": 21.9671, "step": 213380 }, { "epoch": 0.4310613008399423, "grad_norm": 556.7465209960938, "learning_rate": 7.084894563247798e-06, "loss": 17.2722, "step": 213390 }, { "epoch": 0.43108150147262614, "grad_norm": 69.29907989501953, "learning_rate": 7.0845772865748684e-06, "loss": 10.2243, "step": 213400 }, { "epoch": 0.43110170210530996, "grad_norm": 80.62250518798828, "learning_rate": 7.0842599997419606e-06, "loss": 33.0582, "step": 213410 }, { "epoch": 0.4311219027379938, "grad_norm": 718.1709594726562, "learning_rate": 7.083942702750622e-06, "loss": 11.4429, "step": 213420 }, { "epoch": 0.43114210337067754, "grad_norm": 159.78565979003906, "learning_rate": 7.083625395602401e-06, "loss": 21.0411, "step": 213430 }, { "epoch": 0.43116230400336136, "grad_norm": 380.5772399902344, "learning_rate": 7.08330807829884e-06, "loss": 15.6969, "step": 213440 }, { "epoch": 0.4311825046360452, "grad_norm": 401.239990234375, "learning_rate": 7.0829907508414894e-06, "loss": 25.3976, "step": 213450 }, { "epoch": 0.431202705268729, "grad_norm": 280.7538146972656, "learning_rate": 7.082673413231896e-06, "loss": 20.0536, "step": 213460 }, { "epoch": 0.4312229059014128, "grad_norm": 96.32061004638672, "learning_rate": 7.082356065471603e-06, "loss": 20.6452, "step": 213470 }, { "epoch": 0.43124310653409664, "grad_norm": 28.504817962646484, "learning_rate": 7.082038707562159e-06, "loss": 15.115, "step": 213480 }, { "epoch": 0.43126330716678046, "grad_norm": 421.4388122558594, "learning_rate": 7.081721339505112e-06, "loss": 24.9443, "step": 213490 }, { "epoch": 0.4312835077994643, "grad_norm": 3.3708393573760986, "learning_rate": 7.081403961302007e-06, "loss": 14.0149, "step": 213500 }, { "epoch": 0.4313037084321481, "grad_norm": 347.34014892578125, "learning_rate": 7.081086572954392e-06, "loss": 21.9458, "step": 213510 }, { "epoch": 0.4313239090648319, "grad_norm": 647.5054321289062, "learning_rate": 7.080769174463812e-06, "loss": 30.6609, "step": 213520 }, { "epoch": 0.43134410969751574, "grad_norm": 280.7157287597656, "learning_rate": 7.080451765831817e-06, "loss": 15.5381, "step": 213530 }, { "epoch": 0.43136431033019956, "grad_norm": 252.02383422851562, "learning_rate": 7.0801343470599525e-06, "loss": 19.6196, "step": 213540 }, { "epoch": 0.4313845109628834, "grad_norm": 768.6023559570312, "learning_rate": 7.079816918149765e-06, "loss": 25.3262, "step": 213550 }, { "epoch": 0.43140471159556715, "grad_norm": 810.6908569335938, "learning_rate": 7.079499479102802e-06, "loss": 26.0697, "step": 213560 }, { "epoch": 0.43142491222825097, "grad_norm": 624.0357055664062, "learning_rate": 7.07918202992061e-06, "loss": 10.6303, "step": 213570 }, { "epoch": 0.4314451128609348, "grad_norm": 20.6352481842041, "learning_rate": 7.0788645706047384e-06, "loss": 10.3129, "step": 213580 }, { "epoch": 0.4314653134936186, "grad_norm": 226.48268127441406, "learning_rate": 7.078547101156732e-06, "loss": 18.2315, "step": 213590 }, { "epoch": 0.4314855141263024, "grad_norm": 416.14483642578125, "learning_rate": 7.07822962157814e-06, "loss": 30.8123, "step": 213600 }, { "epoch": 0.43150571475898625, "grad_norm": 302.602294921875, "learning_rate": 7.077912131870508e-06, "loss": 19.0619, "step": 213610 }, { "epoch": 0.43152591539167007, "grad_norm": 163.70896911621094, "learning_rate": 7.077594632035385e-06, "loss": 13.1436, "step": 213620 }, { "epoch": 0.4315461160243539, "grad_norm": 254.6735076904297, "learning_rate": 7.077277122074317e-06, "loss": 21.3446, "step": 213630 }, { "epoch": 0.4315663166570377, "grad_norm": 123.6911849975586, "learning_rate": 7.076959601988853e-06, "loss": 12.5261, "step": 213640 }, { "epoch": 0.4315865172897215, "grad_norm": 49.97380065917969, "learning_rate": 7.076642071780539e-06, "loss": 16.6701, "step": 213650 }, { "epoch": 0.43160671792240535, "grad_norm": 507.8262939453125, "learning_rate": 7.076324531450924e-06, "loss": 10.1495, "step": 213660 }, { "epoch": 0.43162691855508917, "grad_norm": 296.35711669921875, "learning_rate": 7.076006981001556e-06, "loss": 24.1735, "step": 213670 }, { "epoch": 0.431647119187773, "grad_norm": 341.41241455078125, "learning_rate": 7.07568942043398e-06, "loss": 38.0018, "step": 213680 }, { "epoch": 0.43166731982045675, "grad_norm": 360.9331359863281, "learning_rate": 7.075371849749747e-06, "loss": 25.1891, "step": 213690 }, { "epoch": 0.43168752045314057, "grad_norm": 497.16473388671875, "learning_rate": 7.075054268950402e-06, "loss": 21.5986, "step": 213700 }, { "epoch": 0.4317077210858244, "grad_norm": 211.05453491210938, "learning_rate": 7.074736678037495e-06, "loss": 20.4163, "step": 213710 }, { "epoch": 0.4317279217185082, "grad_norm": 682.28515625, "learning_rate": 7.074419077012572e-06, "loss": 11.8788, "step": 213720 }, { "epoch": 0.43174812235119203, "grad_norm": 1382.1717529296875, "learning_rate": 7.074101465877183e-06, "loss": 30.4152, "step": 213730 }, { "epoch": 0.43176832298387585, "grad_norm": 346.9513854980469, "learning_rate": 7.073783844632875e-06, "loss": 25.7935, "step": 213740 }, { "epoch": 0.43178852361655967, "grad_norm": 723.1521606445312, "learning_rate": 7.073466213281196e-06, "loss": 14.6055, "step": 213750 }, { "epoch": 0.4318087242492435, "grad_norm": 324.0091857910156, "learning_rate": 7.073148571823694e-06, "loss": 18.7438, "step": 213760 }, { "epoch": 0.4318289248819273, "grad_norm": 470.0965576171875, "learning_rate": 7.072830920261918e-06, "loss": 23.5092, "step": 213770 }, { "epoch": 0.43184912551461113, "grad_norm": 656.0071411132812, "learning_rate": 7.072513258597416e-06, "loss": 19.2885, "step": 213780 }, { "epoch": 0.43186932614729495, "grad_norm": 203.9122314453125, "learning_rate": 7.072195586831733e-06, "loss": 15.3756, "step": 213790 }, { "epoch": 0.43188952677997877, "grad_norm": 0.7824701070785522, "learning_rate": 7.071877904966422e-06, "loss": 14.3607, "step": 213800 }, { "epoch": 0.4319097274126626, "grad_norm": 383.6852111816406, "learning_rate": 7.071560213003028e-06, "loss": 17.5401, "step": 213810 }, { "epoch": 0.43192992804534636, "grad_norm": 121.68672180175781, "learning_rate": 7.071242510943101e-06, "loss": 24.731, "step": 213820 }, { "epoch": 0.4319501286780302, "grad_norm": 113.0888442993164, "learning_rate": 7.070924798788191e-06, "loss": 12.6736, "step": 213830 }, { "epoch": 0.431970329310714, "grad_norm": 411.6690368652344, "learning_rate": 7.070607076539844e-06, "loss": 15.2181, "step": 213840 }, { "epoch": 0.4319905299433978, "grad_norm": 593.9186401367188, "learning_rate": 7.070289344199609e-06, "loss": 28.4739, "step": 213850 }, { "epoch": 0.43201073057608164, "grad_norm": 522.96728515625, "learning_rate": 7.069971601769035e-06, "loss": 19.7131, "step": 213860 }, { "epoch": 0.43203093120876546, "grad_norm": 49.98808288574219, "learning_rate": 7.069653849249671e-06, "loss": 15.4424, "step": 213870 }, { "epoch": 0.4320511318414493, "grad_norm": 1380.20703125, "learning_rate": 7.069336086643064e-06, "loss": 16.537, "step": 213880 }, { "epoch": 0.4320713324741331, "grad_norm": 413.2056579589844, "learning_rate": 7.0690183139507625e-06, "loss": 10.349, "step": 213890 }, { "epoch": 0.4320915331068169, "grad_norm": 285.13067626953125, "learning_rate": 7.0687005311743195e-06, "loss": 31.0651, "step": 213900 }, { "epoch": 0.43211173373950074, "grad_norm": 174.30828857421875, "learning_rate": 7.068382738315281e-06, "loss": 10.5683, "step": 213910 }, { "epoch": 0.43213193437218456, "grad_norm": 1726.405517578125, "learning_rate": 7.068064935375194e-06, "loss": 23.6315, "step": 213920 }, { "epoch": 0.4321521350048684, "grad_norm": 246.17745971679688, "learning_rate": 7.06774712235561e-06, "loss": 25.6117, "step": 213930 }, { "epoch": 0.43217233563755214, "grad_norm": 181.28305053710938, "learning_rate": 7.067429299258078e-06, "loss": 10.6982, "step": 213940 }, { "epoch": 0.43219253627023596, "grad_norm": 255.8702392578125, "learning_rate": 7.0671114660841455e-06, "loss": 10.8665, "step": 213950 }, { "epoch": 0.4322127369029198, "grad_norm": 411.35986328125, "learning_rate": 7.066793622835364e-06, "loss": 27.6026, "step": 213960 }, { "epoch": 0.4322329375356036, "grad_norm": 200.5729217529297, "learning_rate": 7.066475769513278e-06, "loss": 12.6426, "step": 213970 }, { "epoch": 0.4322531381682874, "grad_norm": 305.8490905761719, "learning_rate": 7.066157906119442e-06, "loss": 16.995, "step": 213980 }, { "epoch": 0.43227333880097124, "grad_norm": 178.36602783203125, "learning_rate": 7.0658400326554025e-06, "loss": 19.5498, "step": 213990 }, { "epoch": 0.43229353943365506, "grad_norm": 426.994140625, "learning_rate": 7.06552214912271e-06, "loss": 24.935, "step": 214000 }, { "epoch": 0.4323137400663389, "grad_norm": 309.235107421875, "learning_rate": 7.065204255522913e-06, "loss": 15.2317, "step": 214010 }, { "epoch": 0.4323339406990227, "grad_norm": 325.0115966796875, "learning_rate": 7.064886351857559e-06, "loss": 18.5206, "step": 214020 }, { "epoch": 0.4323541413317065, "grad_norm": 691.6421508789062, "learning_rate": 7.064568438128201e-06, "loss": 27.4744, "step": 214030 }, { "epoch": 0.43237434196439034, "grad_norm": 431.8277587890625, "learning_rate": 7.064250514336386e-06, "loss": 18.0513, "step": 214040 }, { "epoch": 0.43239454259707416, "grad_norm": 308.5205993652344, "learning_rate": 7.063932580483665e-06, "loss": 32.0444, "step": 214050 }, { "epoch": 0.432414743229758, "grad_norm": 221.98826599121094, "learning_rate": 7.063614636571586e-06, "loss": 19.9235, "step": 214060 }, { "epoch": 0.43243494386244175, "grad_norm": 1.0397236347198486, "learning_rate": 7.063296682601701e-06, "loss": 31.5466, "step": 214070 }, { "epoch": 0.43245514449512557, "grad_norm": 97.03449249267578, "learning_rate": 7.062978718575558e-06, "loss": 25.4533, "step": 214080 }, { "epoch": 0.4324753451278094, "grad_norm": 291.8382873535156, "learning_rate": 7.062660744494706e-06, "loss": 34.5286, "step": 214090 }, { "epoch": 0.4324955457604932, "grad_norm": 122.28335571289062, "learning_rate": 7.0623427603606965e-06, "loss": 17.019, "step": 214100 }, { "epoch": 0.432515746393177, "grad_norm": 444.2857971191406, "learning_rate": 7.062024766175077e-06, "loss": 16.7993, "step": 214110 }, { "epoch": 0.43253594702586085, "grad_norm": 1357.626708984375, "learning_rate": 7.061706761939402e-06, "loss": 22.7957, "step": 214120 }, { "epoch": 0.43255614765854467, "grad_norm": 509.154052734375, "learning_rate": 7.0613887476552145e-06, "loss": 16.4702, "step": 214130 }, { "epoch": 0.4325763482912285, "grad_norm": 234.2130584716797, "learning_rate": 7.061070723324071e-06, "loss": 11.7002, "step": 214140 }, { "epoch": 0.4325965489239123, "grad_norm": 493.207763671875, "learning_rate": 7.060752688947519e-06, "loss": 17.9663, "step": 214150 }, { "epoch": 0.4326167495565961, "grad_norm": 579.5936279296875, "learning_rate": 7.060434644527105e-06, "loss": 24.1063, "step": 214160 }, { "epoch": 0.43263695018927995, "grad_norm": 264.56549072265625, "learning_rate": 7.060116590064385e-06, "loss": 18.8271, "step": 214170 }, { "epoch": 0.43265715082196377, "grad_norm": 257.78607177734375, "learning_rate": 7.059798525560907e-06, "loss": 20.6671, "step": 214180 }, { "epoch": 0.4326773514546476, "grad_norm": 192.1004638671875, "learning_rate": 7.05948045101822e-06, "loss": 9.7443, "step": 214190 }, { "epoch": 0.43269755208733135, "grad_norm": 178.0238494873047, "learning_rate": 7.059162366437875e-06, "loss": 18.8894, "step": 214200 }, { "epoch": 0.43271775272001517, "grad_norm": 490.48089599609375, "learning_rate": 7.058844271821423e-06, "loss": 21.5204, "step": 214210 }, { "epoch": 0.432737953352699, "grad_norm": 7.368875503540039, "learning_rate": 7.058526167170413e-06, "loss": 17.0344, "step": 214220 }, { "epoch": 0.4327581539853828, "grad_norm": 458.9912414550781, "learning_rate": 7.058208052486397e-06, "loss": 14.8924, "step": 214230 }, { "epoch": 0.43277835461806663, "grad_norm": 453.0133056640625, "learning_rate": 7.057889927770922e-06, "loss": 10.8933, "step": 214240 }, { "epoch": 0.43279855525075045, "grad_norm": 331.8202819824219, "learning_rate": 7.057571793025545e-06, "loss": 11.8419, "step": 214250 }, { "epoch": 0.43281875588343427, "grad_norm": 729.1732177734375, "learning_rate": 7.057253648251811e-06, "loss": 26.4337, "step": 214260 }, { "epoch": 0.4328389565161181, "grad_norm": 293.8382568359375, "learning_rate": 7.056935493451271e-06, "loss": 11.2388, "step": 214270 }, { "epoch": 0.4328591571488019, "grad_norm": 139.239501953125, "learning_rate": 7.056617328625479e-06, "loss": 28.4436, "step": 214280 }, { "epoch": 0.43287935778148573, "grad_norm": 132.2022247314453, "learning_rate": 7.056299153775981e-06, "loss": 17.8826, "step": 214290 }, { "epoch": 0.43289955841416955, "grad_norm": 458.7197265625, "learning_rate": 7.0559809689043325e-06, "loss": 17.1688, "step": 214300 }, { "epoch": 0.43291975904685337, "grad_norm": 20.315086364746094, "learning_rate": 7.055662774012081e-06, "loss": 22.9634, "step": 214310 }, { "epoch": 0.4329399596795372, "grad_norm": 779.4647827148438, "learning_rate": 7.0553445691007785e-06, "loss": 24.1353, "step": 214320 }, { "epoch": 0.43296016031222095, "grad_norm": 702.3358764648438, "learning_rate": 7.055026354171976e-06, "loss": 22.2894, "step": 214330 }, { "epoch": 0.4329803609449048, "grad_norm": 211.43646240234375, "learning_rate": 7.054708129227225e-06, "loss": 30.6347, "step": 214340 }, { "epoch": 0.4330005615775886, "grad_norm": 431.9747619628906, "learning_rate": 7.0543898942680755e-06, "loss": 31.5493, "step": 214350 }, { "epoch": 0.4330207622102724, "grad_norm": 178.3709716796875, "learning_rate": 7.054071649296078e-06, "loss": 20.351, "step": 214360 }, { "epoch": 0.43304096284295623, "grad_norm": 476.716552734375, "learning_rate": 7.053753394312786e-06, "loss": 29.231, "step": 214370 }, { "epoch": 0.43306116347564005, "grad_norm": 254.08023071289062, "learning_rate": 7.053435129319746e-06, "loss": 15.2788, "step": 214380 }, { "epoch": 0.4330813641083239, "grad_norm": 185.78001403808594, "learning_rate": 7.0531168543185155e-06, "loss": 24.5804, "step": 214390 }, { "epoch": 0.4331015647410077, "grad_norm": 118.17011260986328, "learning_rate": 7.052798569310641e-06, "loss": 13.5243, "step": 214400 }, { "epoch": 0.4331217653736915, "grad_norm": 284.7645263671875, "learning_rate": 7.052480274297675e-06, "loss": 23.3654, "step": 214410 }, { "epoch": 0.43314196600637533, "grad_norm": 198.52272033691406, "learning_rate": 7.0521619692811705e-06, "loss": 21.7649, "step": 214420 }, { "epoch": 0.43316216663905915, "grad_norm": 321.36224365234375, "learning_rate": 7.051843654262675e-06, "loss": 11.9821, "step": 214430 }, { "epoch": 0.433182367271743, "grad_norm": 102.5936050415039, "learning_rate": 7.051525329243745e-06, "loss": 11.121, "step": 214440 }, { "epoch": 0.4332025679044268, "grad_norm": 235.19683837890625, "learning_rate": 7.0512069942259275e-06, "loss": 18.9256, "step": 214450 }, { "epoch": 0.43322276853711056, "grad_norm": 1039.6514892578125, "learning_rate": 7.050888649210778e-06, "loss": 23.6549, "step": 214460 }, { "epoch": 0.4332429691697944, "grad_norm": 251.51922607421875, "learning_rate": 7.050570294199843e-06, "loss": 27.8066, "step": 214470 }, { "epoch": 0.4332631698024782, "grad_norm": 223.6874237060547, "learning_rate": 7.050251929194679e-06, "loss": 23.5122, "step": 214480 }, { "epoch": 0.433283370435162, "grad_norm": 283.97772216796875, "learning_rate": 7.049933554196835e-06, "loss": 31.5258, "step": 214490 }, { "epoch": 0.43330357106784584, "grad_norm": 320.8291320800781, "learning_rate": 7.049615169207864e-06, "loss": 11.4516, "step": 214500 }, { "epoch": 0.43332377170052966, "grad_norm": 694.3058471679688, "learning_rate": 7.049296774229317e-06, "loss": 19.1876, "step": 214510 }, { "epoch": 0.4333439723332135, "grad_norm": 541.5520629882812, "learning_rate": 7.048978369262747e-06, "loss": 11.8673, "step": 214520 }, { "epoch": 0.4333641729658973, "grad_norm": 609.4490356445312, "learning_rate": 7.048659954309704e-06, "loss": 26.1409, "step": 214530 }, { "epoch": 0.4333843735985811, "grad_norm": 574.5306396484375, "learning_rate": 7.048341529371741e-06, "loss": 18.618, "step": 214540 }, { "epoch": 0.43340457423126494, "grad_norm": 80.67208862304688, "learning_rate": 7.048023094450412e-06, "loss": 26.0304, "step": 214550 }, { "epoch": 0.43342477486394876, "grad_norm": 655.6376953125, "learning_rate": 7.047704649547263e-06, "loss": 31.5926, "step": 214560 }, { "epoch": 0.4334449754966326, "grad_norm": 287.29541015625, "learning_rate": 7.047386194663852e-06, "loss": 38.228, "step": 214570 }, { "epoch": 0.43346517612931634, "grad_norm": 146.16346740722656, "learning_rate": 7.047067729801727e-06, "loss": 26.6311, "step": 214580 }, { "epoch": 0.43348537676200016, "grad_norm": 62.65180969238281, "learning_rate": 7.046749254962445e-06, "loss": 27.0513, "step": 214590 }, { "epoch": 0.433505577394684, "grad_norm": 327.2711181640625, "learning_rate": 7.0464307701475544e-06, "loss": 22.0265, "step": 214600 }, { "epoch": 0.4335257780273678, "grad_norm": 269.0719299316406, "learning_rate": 7.046112275358607e-06, "loss": 12.5422, "step": 214610 }, { "epoch": 0.4335459786600516, "grad_norm": 302.6663513183594, "learning_rate": 7.045793770597159e-06, "loss": 14.6392, "step": 214620 }, { "epoch": 0.43356617929273544, "grad_norm": 785.456298828125, "learning_rate": 7.045475255864757e-06, "loss": 15.9883, "step": 214630 }, { "epoch": 0.43358637992541926, "grad_norm": 496.3552551269531, "learning_rate": 7.0451567311629596e-06, "loss": 22.0847, "step": 214640 }, { "epoch": 0.4336065805581031, "grad_norm": 567.821533203125, "learning_rate": 7.044838196493315e-06, "loss": 16.1361, "step": 214650 }, { "epoch": 0.4336267811907869, "grad_norm": 1357.6578369140625, "learning_rate": 7.044519651857378e-06, "loss": 27.535, "step": 214660 }, { "epoch": 0.4336469818234707, "grad_norm": 354.6550598144531, "learning_rate": 7.0442010972567e-06, "loss": 41.6993, "step": 214670 }, { "epoch": 0.43366718245615454, "grad_norm": 406.94598388671875, "learning_rate": 7.043882532692834e-06, "loss": 20.9676, "step": 214680 }, { "epoch": 0.43368738308883836, "grad_norm": 806.630126953125, "learning_rate": 7.043563958167331e-06, "loss": 24.1425, "step": 214690 }, { "epoch": 0.4337075837215222, "grad_norm": 221.11691284179688, "learning_rate": 7.043245373681746e-06, "loss": 29.4682, "step": 214700 }, { "epoch": 0.43372778435420595, "grad_norm": 399.14971923828125, "learning_rate": 7.042926779237631e-06, "loss": 18.2937, "step": 214710 }, { "epoch": 0.43374798498688977, "grad_norm": 538.7844848632812, "learning_rate": 7.0426081748365386e-06, "loss": 26.3239, "step": 214720 }, { "epoch": 0.4337681856195736, "grad_norm": 319.4942932128906, "learning_rate": 7.042289560480023e-06, "loss": 8.8327, "step": 214730 }, { "epoch": 0.4337883862522574, "grad_norm": 118.76900482177734, "learning_rate": 7.041970936169632e-06, "loss": 12.8371, "step": 214740 }, { "epoch": 0.4338085868849412, "grad_norm": 332.2370910644531, "learning_rate": 7.041652301906925e-06, "loss": 12.5893, "step": 214750 }, { "epoch": 0.43382878751762505, "grad_norm": 34.194908142089844, "learning_rate": 7.041333657693452e-06, "loss": 28.0618, "step": 214760 }, { "epoch": 0.43384898815030887, "grad_norm": 770.1193237304688, "learning_rate": 7.041015003530766e-06, "loss": 22.473, "step": 214770 }, { "epoch": 0.4338691887829927, "grad_norm": 317.9777526855469, "learning_rate": 7.040696339420421e-06, "loss": 16.2811, "step": 214780 }, { "epoch": 0.4338893894156765, "grad_norm": 439.4205627441406, "learning_rate": 7.0403776653639685e-06, "loss": 62.0859, "step": 214790 }, { "epoch": 0.4339095900483603, "grad_norm": 434.8787841796875, "learning_rate": 7.0400589813629645e-06, "loss": 29.797, "step": 214800 }, { "epoch": 0.43392979068104415, "grad_norm": 132.6191864013672, "learning_rate": 7.039740287418959e-06, "loss": 10.511, "step": 214810 }, { "epoch": 0.43394999131372797, "grad_norm": 264.0240478515625, "learning_rate": 7.039421583533508e-06, "loss": 28.529, "step": 214820 }, { "epoch": 0.4339701919464118, "grad_norm": 341.5426940917969, "learning_rate": 7.039102869708162e-06, "loss": 25.6444, "step": 214830 }, { "epoch": 0.43399039257909555, "grad_norm": 432.90618896484375, "learning_rate": 7.038784145944477e-06, "loss": 11.9233, "step": 214840 }, { "epoch": 0.43401059321177937, "grad_norm": 631.9166870117188, "learning_rate": 7.038465412244005e-06, "loss": 17.7358, "step": 214850 }, { "epoch": 0.4340307938444632, "grad_norm": 231.7225341796875, "learning_rate": 7.0381466686083e-06, "loss": 21.0268, "step": 214860 }, { "epoch": 0.434050994477147, "grad_norm": 366.8880920410156, "learning_rate": 7.037827915038915e-06, "loss": 16.1726, "step": 214870 }, { "epoch": 0.43407119510983083, "grad_norm": 320.87042236328125, "learning_rate": 7.037509151537404e-06, "loss": 16.9134, "step": 214880 }, { "epoch": 0.43409139574251465, "grad_norm": 163.25094604492188, "learning_rate": 7.0371903781053215e-06, "loss": 34.1174, "step": 214890 }, { "epoch": 0.43411159637519847, "grad_norm": 1105.0751953125, "learning_rate": 7.036871594744218e-06, "loss": 26.4776, "step": 214900 }, { "epoch": 0.4341317970078823, "grad_norm": 586.7778930664062, "learning_rate": 7.036552801455652e-06, "loss": 16.3743, "step": 214910 }, { "epoch": 0.4341519976405661, "grad_norm": 141.29327392578125, "learning_rate": 7.0362339982411735e-06, "loss": 19.7854, "step": 214920 }, { "epoch": 0.43417219827324993, "grad_norm": 428.3002014160156, "learning_rate": 7.035915185102337e-06, "loss": 32.9638, "step": 214930 }, { "epoch": 0.43419239890593375, "grad_norm": 609.0887451171875, "learning_rate": 7.035596362040697e-06, "loss": 25.8629, "step": 214940 }, { "epoch": 0.43421259953861757, "grad_norm": 614.7772216796875, "learning_rate": 7.035277529057807e-06, "loss": 18.8787, "step": 214950 }, { "epoch": 0.4342328001713014, "grad_norm": 168.0975799560547, "learning_rate": 7.034958686155222e-06, "loss": 13.9449, "step": 214960 }, { "epoch": 0.43425300080398516, "grad_norm": 537.1471557617188, "learning_rate": 7.0346398333344945e-06, "loss": 19.1792, "step": 214970 }, { "epoch": 0.434273201436669, "grad_norm": 922.3345336914062, "learning_rate": 7.03432097059718e-06, "loss": 21.4447, "step": 214980 }, { "epoch": 0.4342934020693528, "grad_norm": 476.7845458984375, "learning_rate": 7.03400209794483e-06, "loss": 21.6334, "step": 214990 }, { "epoch": 0.4343136027020366, "grad_norm": 291.10595703125, "learning_rate": 7.033683215379002e-06, "loss": 18.2016, "step": 215000 }, { "epoch": 0.43433380333472044, "grad_norm": 514.0755004882812, "learning_rate": 7.033364322901248e-06, "loss": 23.4727, "step": 215010 }, { "epoch": 0.43435400396740426, "grad_norm": 285.6300048828125, "learning_rate": 7.0330454205131235e-06, "loss": 22.901, "step": 215020 }, { "epoch": 0.4343742046000881, "grad_norm": 875.8580932617188, "learning_rate": 7.032726508216182e-06, "loss": 21.1357, "step": 215030 }, { "epoch": 0.4343944052327719, "grad_norm": 328.4040222167969, "learning_rate": 7.032407586011978e-06, "loss": 18.7843, "step": 215040 }, { "epoch": 0.4344146058654557, "grad_norm": 1020.8289794921875, "learning_rate": 7.0320886539020674e-06, "loss": 22.0729, "step": 215050 }, { "epoch": 0.43443480649813954, "grad_norm": 462.8836364746094, "learning_rate": 7.031769711887999e-06, "loss": 32.8359, "step": 215060 }, { "epoch": 0.43445500713082336, "grad_norm": 214.752197265625, "learning_rate": 7.031450759971335e-06, "loss": 13.637, "step": 215070 }, { "epoch": 0.4344752077635072, "grad_norm": 255.45242309570312, "learning_rate": 7.031131798153625e-06, "loss": 22.2565, "step": 215080 }, { "epoch": 0.434495408396191, "grad_norm": 161.22865295410156, "learning_rate": 7.030812826436426e-06, "loss": 18.9131, "step": 215090 }, { "epoch": 0.43451560902887476, "grad_norm": 231.98321533203125, "learning_rate": 7.030493844821291e-06, "loss": 18.8109, "step": 215100 }, { "epoch": 0.4345358096615586, "grad_norm": 290.0347900390625, "learning_rate": 7.0301748533097745e-06, "loss": 19.043, "step": 215110 }, { "epoch": 0.4345560102942424, "grad_norm": 517.3612060546875, "learning_rate": 7.029855851903433e-06, "loss": 28.0285, "step": 215120 }, { "epoch": 0.4345762109269262, "grad_norm": 255.6366729736328, "learning_rate": 7.029536840603821e-06, "loss": 20.5103, "step": 215130 }, { "epoch": 0.43459641155961004, "grad_norm": 461.9790954589844, "learning_rate": 7.029217819412491e-06, "loss": 18.1368, "step": 215140 }, { "epoch": 0.43461661219229386, "grad_norm": 342.81768798828125, "learning_rate": 7.028898788331e-06, "loss": 15.0202, "step": 215150 }, { "epoch": 0.4346368128249777, "grad_norm": 196.1928253173828, "learning_rate": 7.028579747360903e-06, "loss": 7.83, "step": 215160 }, { "epoch": 0.4346570134576615, "grad_norm": 275.57867431640625, "learning_rate": 7.028260696503754e-06, "loss": 15.5337, "step": 215170 }, { "epoch": 0.4346772140903453, "grad_norm": 387.40374755859375, "learning_rate": 7.027941635761109e-06, "loss": 11.6378, "step": 215180 }, { "epoch": 0.43469741472302914, "grad_norm": 224.36241149902344, "learning_rate": 7.02762256513452e-06, "loss": 20.288, "step": 215190 }, { "epoch": 0.43471761535571296, "grad_norm": 324.6615295410156, "learning_rate": 7.027303484625547e-06, "loss": 14.4402, "step": 215200 }, { "epoch": 0.4347378159883968, "grad_norm": 1438.098876953125, "learning_rate": 7.026984394235742e-06, "loss": 22.0813, "step": 215210 }, { "epoch": 0.43475801662108055, "grad_norm": 643.7030029296875, "learning_rate": 7.0266652939666605e-06, "loss": 15.3364, "step": 215220 }, { "epoch": 0.43477821725376437, "grad_norm": 738.4295043945312, "learning_rate": 7.026346183819859e-06, "loss": 28.5164, "step": 215230 }, { "epoch": 0.4347984178864482, "grad_norm": 210.78024291992188, "learning_rate": 7.026027063796891e-06, "loss": 22.1025, "step": 215240 }, { "epoch": 0.434818618519132, "grad_norm": 348.28533935546875, "learning_rate": 7.025707933899314e-06, "loss": 15.153, "step": 215250 }, { "epoch": 0.4348388191518158, "grad_norm": 639.33203125, "learning_rate": 7.0253887941286816e-06, "loss": 16.5251, "step": 215260 }, { "epoch": 0.43485901978449965, "grad_norm": 374.78662109375, "learning_rate": 7.025069644486549e-06, "loss": 24.9781, "step": 215270 }, { "epoch": 0.43487922041718347, "grad_norm": 248.15109252929688, "learning_rate": 7.024750484974473e-06, "loss": 16.7164, "step": 215280 }, { "epoch": 0.4348994210498673, "grad_norm": 412.46148681640625, "learning_rate": 7.02443131559401e-06, "loss": 43.2204, "step": 215290 }, { "epoch": 0.4349196216825511, "grad_norm": 514.1223754882812, "learning_rate": 7.024112136346713e-06, "loss": 28.6112, "step": 215300 }, { "epoch": 0.4349398223152349, "grad_norm": 399.0684814453125, "learning_rate": 7.023792947234139e-06, "loss": 17.6464, "step": 215310 }, { "epoch": 0.43496002294791875, "grad_norm": 405.3045654296875, "learning_rate": 7.023473748257844e-06, "loss": 13.2634, "step": 215320 }, { "epoch": 0.43498022358060257, "grad_norm": 205.40782165527344, "learning_rate": 7.023154539419383e-06, "loss": 12.7478, "step": 215330 }, { "epoch": 0.4350004242132864, "grad_norm": 507.6519470214844, "learning_rate": 7.0228353207203136e-06, "loss": 16.6266, "step": 215340 }, { "epoch": 0.43502062484597015, "grad_norm": 561.2252197265625, "learning_rate": 7.022516092162187e-06, "loss": 18.6296, "step": 215350 }, { "epoch": 0.43504082547865397, "grad_norm": 462.3230285644531, "learning_rate": 7.022196853746565e-06, "loss": 21.4073, "step": 215360 }, { "epoch": 0.4350610261113378, "grad_norm": 945.115478515625, "learning_rate": 7.0218776054750004e-06, "loss": 15.2625, "step": 215370 }, { "epoch": 0.4350812267440216, "grad_norm": 993.54443359375, "learning_rate": 7.021558347349049e-06, "loss": 16.8026, "step": 215380 }, { "epoch": 0.43510142737670543, "grad_norm": 605.0009155273438, "learning_rate": 7.0212390793702675e-06, "loss": 18.7984, "step": 215390 }, { "epoch": 0.43512162800938925, "grad_norm": 86.10658264160156, "learning_rate": 7.0209198015402115e-06, "loss": 17.231, "step": 215400 }, { "epoch": 0.43514182864207307, "grad_norm": 92.33135223388672, "learning_rate": 7.02060051386044e-06, "loss": 20.2986, "step": 215410 }, { "epoch": 0.4351620292747569, "grad_norm": 1052.357177734375, "learning_rate": 7.020281216332503e-06, "loss": 24.0735, "step": 215420 }, { "epoch": 0.4351822299074407, "grad_norm": 511.3278503417969, "learning_rate": 7.019961908957962e-06, "loss": 33.2723, "step": 215430 }, { "epoch": 0.43520243054012453, "grad_norm": 548.94921875, "learning_rate": 7.019642591738372e-06, "loss": 20.2783, "step": 215440 }, { "epoch": 0.43522263117280835, "grad_norm": 207.590087890625, "learning_rate": 7.01932326467529e-06, "loss": 19.8643, "step": 215450 }, { "epoch": 0.43524283180549217, "grad_norm": 499.21429443359375, "learning_rate": 7.0190039277702685e-06, "loss": 13.7157, "step": 215460 }, { "epoch": 0.435263032438176, "grad_norm": 287.5592346191406, "learning_rate": 7.018684581024868e-06, "loss": 13.873, "step": 215470 }, { "epoch": 0.43528323307085975, "grad_norm": 971.934814453125, "learning_rate": 7.018365224440644e-06, "loss": 36.0666, "step": 215480 }, { "epoch": 0.4353034337035436, "grad_norm": 443.7062683105469, "learning_rate": 7.018045858019154e-06, "loss": 46.004, "step": 215490 }, { "epoch": 0.4353236343362274, "grad_norm": 612.0048217773438, "learning_rate": 7.0177264817619514e-06, "loss": 28.7301, "step": 215500 }, { "epoch": 0.4353438349689112, "grad_norm": 1242.58740234375, "learning_rate": 7.017407095670594e-06, "loss": 24.5226, "step": 215510 }, { "epoch": 0.43536403560159503, "grad_norm": 31.768735885620117, "learning_rate": 7.0170876997466406e-06, "loss": 21.7206, "step": 215520 }, { "epoch": 0.43538423623427885, "grad_norm": 445.52276611328125, "learning_rate": 7.016768293991647e-06, "loss": 14.7207, "step": 215530 }, { "epoch": 0.4354044368669627, "grad_norm": 141.01197814941406, "learning_rate": 7.016448878407167e-06, "loss": 15.1699, "step": 215540 }, { "epoch": 0.4354246374996465, "grad_norm": 396.162109375, "learning_rate": 7.016129452994761e-06, "loss": 18.822, "step": 215550 }, { "epoch": 0.4354448381323303, "grad_norm": 728.1718139648438, "learning_rate": 7.015810017755985e-06, "loss": 16.6068, "step": 215560 }, { "epoch": 0.43546503876501413, "grad_norm": 348.1802673339844, "learning_rate": 7.015490572692396e-06, "loss": 23.2858, "step": 215570 }, { "epoch": 0.43548523939769795, "grad_norm": 184.91587829589844, "learning_rate": 7.015171117805549e-06, "loss": 14.2151, "step": 215580 }, { "epoch": 0.4355054400303818, "grad_norm": 417.75201416015625, "learning_rate": 7.014851653097003e-06, "loss": 20.5845, "step": 215590 }, { "epoch": 0.4355256406630656, "grad_norm": 115.35167694091797, "learning_rate": 7.014532178568314e-06, "loss": 14.5059, "step": 215600 }, { "epoch": 0.43554584129574936, "grad_norm": 2028.61865234375, "learning_rate": 7.014212694221041e-06, "loss": 37.7481, "step": 215610 }, { "epoch": 0.4355660419284332, "grad_norm": 1195.914306640625, "learning_rate": 7.013893200056736e-06, "loss": 32.3363, "step": 215620 }, { "epoch": 0.435586242561117, "grad_norm": 465.3851623535156, "learning_rate": 7.013573696076964e-06, "loss": 12.0181, "step": 215630 }, { "epoch": 0.4356064431938008, "grad_norm": 441.920166015625, "learning_rate": 7.013254182283275e-06, "loss": 12.5004, "step": 215640 }, { "epoch": 0.43562664382648464, "grad_norm": 9.876458168029785, "learning_rate": 7.01293465867723e-06, "loss": 10.3304, "step": 215650 }, { "epoch": 0.43564684445916846, "grad_norm": 187.7135009765625, "learning_rate": 7.012615125260388e-06, "loss": 10.8502, "step": 215660 }, { "epoch": 0.4356670450918523, "grad_norm": 384.7646484375, "learning_rate": 7.012295582034301e-06, "loss": 29.0559, "step": 215670 }, { "epoch": 0.4356872457245361, "grad_norm": 507.1191711425781, "learning_rate": 7.011976029000531e-06, "loss": 24.8476, "step": 215680 }, { "epoch": 0.4357074463572199, "grad_norm": 37.014530181884766, "learning_rate": 7.011656466160633e-06, "loss": 15.289, "step": 215690 }, { "epoch": 0.43572764698990374, "grad_norm": 1234.5159912109375, "learning_rate": 7.011336893516167e-06, "loss": 17.1629, "step": 215700 }, { "epoch": 0.43574784762258756, "grad_norm": 187.715087890625, "learning_rate": 7.011017311068686e-06, "loss": 16.9608, "step": 215710 }, { "epoch": 0.4357680482552714, "grad_norm": 1021.8418579101562, "learning_rate": 7.010697718819753e-06, "loss": 18.2547, "step": 215720 }, { "epoch": 0.43578824888795514, "grad_norm": 48.73601531982422, "learning_rate": 7.0103781167709224e-06, "loss": 8.0319, "step": 215730 }, { "epoch": 0.43580844952063896, "grad_norm": 460.2627868652344, "learning_rate": 7.010058504923753e-06, "loss": 18.5694, "step": 215740 }, { "epoch": 0.4358286501533228, "grad_norm": 301.5263977050781, "learning_rate": 7.009738883279802e-06, "loss": 19.9788, "step": 215750 }, { "epoch": 0.4358488507860066, "grad_norm": 260.9577331542969, "learning_rate": 7.009419251840627e-06, "loss": 23.0658, "step": 215760 }, { "epoch": 0.4358690514186904, "grad_norm": 200.27781677246094, "learning_rate": 7.0090996106077855e-06, "loss": 16.2399, "step": 215770 }, { "epoch": 0.43588925205137424, "grad_norm": 558.4069213867188, "learning_rate": 7.008779959582838e-06, "loss": 33.8017, "step": 215780 }, { "epoch": 0.43590945268405806, "grad_norm": 409.48443603515625, "learning_rate": 7.008460298767341e-06, "loss": 17.3062, "step": 215790 }, { "epoch": 0.4359296533167419, "grad_norm": 188.6319122314453, "learning_rate": 7.008140628162851e-06, "loss": 27.7887, "step": 215800 }, { "epoch": 0.4359498539494257, "grad_norm": 521.9065551757812, "learning_rate": 7.007820947770927e-06, "loss": 13.1792, "step": 215810 }, { "epoch": 0.4359700545821095, "grad_norm": 1069.1414794921875, "learning_rate": 7.007501257593126e-06, "loss": 28.8951, "step": 215820 }, { "epoch": 0.43599025521479334, "grad_norm": 448.6017761230469, "learning_rate": 7.0071815576310085e-06, "loss": 22.1577, "step": 215830 }, { "epoch": 0.43601045584747716, "grad_norm": 487.28936767578125, "learning_rate": 7.006861847886133e-06, "loss": 18.2779, "step": 215840 }, { "epoch": 0.436030656480161, "grad_norm": 676.8814697265625, "learning_rate": 7.006542128360054e-06, "loss": 20.9958, "step": 215850 }, { "epoch": 0.43605085711284475, "grad_norm": 53.80535125732422, "learning_rate": 7.006222399054334e-06, "loss": 14.9174, "step": 215860 }, { "epoch": 0.43607105774552857, "grad_norm": 377.7115783691406, "learning_rate": 7.005902659970528e-06, "loss": 22.759, "step": 215870 }, { "epoch": 0.4360912583782124, "grad_norm": 105.19438934326172, "learning_rate": 7.0055829111101954e-06, "loss": 22.7473, "step": 215880 }, { "epoch": 0.4361114590108962, "grad_norm": 594.7811889648438, "learning_rate": 7.005263152474896e-06, "loss": 17.0541, "step": 215890 }, { "epoch": 0.43613165964358, "grad_norm": 73.36730194091797, "learning_rate": 7.0049433840661875e-06, "loss": 13.0328, "step": 215900 }, { "epoch": 0.43615186027626385, "grad_norm": 130.21876525878906, "learning_rate": 7.004623605885628e-06, "loss": 17.4, "step": 215910 }, { "epoch": 0.43617206090894767, "grad_norm": 804.5039672851562, "learning_rate": 7.004303817934775e-06, "loss": 17.1268, "step": 215920 }, { "epoch": 0.4361922615416315, "grad_norm": 992.1928100585938, "learning_rate": 7.003984020215189e-06, "loss": 16.6337, "step": 215930 }, { "epoch": 0.4362124621743153, "grad_norm": 72.60612487792969, "learning_rate": 7.003664212728427e-06, "loss": 15.7889, "step": 215940 }, { "epoch": 0.4362326628069991, "grad_norm": 757.6968383789062, "learning_rate": 7.003344395476051e-06, "loss": 18.0091, "step": 215950 }, { "epoch": 0.43625286343968295, "grad_norm": 1845.9464111328125, "learning_rate": 7.003024568459614e-06, "loss": 37.3103, "step": 215960 }, { "epoch": 0.43627306407236677, "grad_norm": 293.3792724609375, "learning_rate": 7.002704731680682e-06, "loss": 19.8846, "step": 215970 }, { "epoch": 0.4362932647050506, "grad_norm": 773.8984985351562, "learning_rate": 7.0023848851408076e-06, "loss": 19.4425, "step": 215980 }, { "epoch": 0.43631346533773435, "grad_norm": 584.5510864257812, "learning_rate": 7.002065028841553e-06, "loss": 13.9668, "step": 215990 }, { "epoch": 0.43633366597041817, "grad_norm": 253.05511474609375, "learning_rate": 7.0017451627844765e-06, "loss": 14.3967, "step": 216000 }, { "epoch": 0.436353866603102, "grad_norm": 199.73092651367188, "learning_rate": 7.001425286971135e-06, "loss": 15.918, "step": 216010 }, { "epoch": 0.4363740672357858, "grad_norm": 307.1466369628906, "learning_rate": 7.001105401403091e-06, "loss": 14.1764, "step": 216020 }, { "epoch": 0.43639426786846963, "grad_norm": 338.7720947265625, "learning_rate": 7.000785506081902e-06, "loss": 26.9864, "step": 216030 }, { "epoch": 0.43641446850115345, "grad_norm": 511.8108825683594, "learning_rate": 7.000465601009126e-06, "loss": 15.8379, "step": 216040 }, { "epoch": 0.43643466913383727, "grad_norm": 679.4248657226562, "learning_rate": 7.0001456861863236e-06, "loss": 23.5186, "step": 216050 }, { "epoch": 0.4364548697665211, "grad_norm": 295.2950439453125, "learning_rate": 6.999825761615055e-06, "loss": 15.3735, "step": 216060 }, { "epoch": 0.4364750703992049, "grad_norm": 46.097312927246094, "learning_rate": 6.999505827296877e-06, "loss": 18.0488, "step": 216070 }, { "epoch": 0.43649527103188873, "grad_norm": 473.28607177734375, "learning_rate": 6.999185883233351e-06, "loss": 30.239, "step": 216080 }, { "epoch": 0.43651547166457255, "grad_norm": 194.43380737304688, "learning_rate": 6.998865929426035e-06, "loss": 8.9002, "step": 216090 }, { "epoch": 0.43653567229725637, "grad_norm": 463.8258361816406, "learning_rate": 6.998545965876489e-06, "loss": 16.6704, "step": 216100 }, { "epoch": 0.4365558729299402, "grad_norm": 449.43377685546875, "learning_rate": 6.998225992586273e-06, "loss": 28.3768, "step": 216110 }, { "epoch": 0.43657607356262396, "grad_norm": 538.9699096679688, "learning_rate": 6.997906009556943e-06, "loss": 19.3899, "step": 216120 }, { "epoch": 0.4365962741953078, "grad_norm": 500.4964599609375, "learning_rate": 6.997586016790065e-06, "loss": 12.6349, "step": 216130 }, { "epoch": 0.4366164748279916, "grad_norm": 287.2041320800781, "learning_rate": 6.997266014287193e-06, "loss": 20.6508, "step": 216140 }, { "epoch": 0.4366366754606754, "grad_norm": 228.54196166992188, "learning_rate": 6.996946002049889e-06, "loss": 14.7713, "step": 216150 }, { "epoch": 0.43665687609335924, "grad_norm": 634.4945068359375, "learning_rate": 6.9966259800797124e-06, "loss": 29.2191, "step": 216160 }, { "epoch": 0.43667707672604306, "grad_norm": 224.03050231933594, "learning_rate": 6.996305948378223e-06, "loss": 18.9435, "step": 216170 }, { "epoch": 0.4366972773587269, "grad_norm": 117.95034790039062, "learning_rate": 6.995985906946982e-06, "loss": 34.8892, "step": 216180 }, { "epoch": 0.4367174779914107, "grad_norm": 339.0070495605469, "learning_rate": 6.995665855787546e-06, "loss": 13.897, "step": 216190 }, { "epoch": 0.4367376786240945, "grad_norm": 437.2546691894531, "learning_rate": 6.995345794901477e-06, "loss": 13.7998, "step": 216200 }, { "epoch": 0.43675787925677834, "grad_norm": 409.4122619628906, "learning_rate": 6.995025724290334e-06, "loss": 11.4524, "step": 216210 }, { "epoch": 0.43677807988946216, "grad_norm": 552.8837890625, "learning_rate": 6.994705643955678e-06, "loss": 21.521, "step": 216220 }, { "epoch": 0.436798280522146, "grad_norm": 403.9027099609375, "learning_rate": 6.994385553899069e-06, "loss": 20.7522, "step": 216230 }, { "epoch": 0.4368184811548298, "grad_norm": 280.6409912109375, "learning_rate": 6.9940654541220675e-06, "loss": 26.3352, "step": 216240 }, { "epoch": 0.43683868178751356, "grad_norm": 434.9540100097656, "learning_rate": 6.993745344626232e-06, "loss": 28.3306, "step": 216250 }, { "epoch": 0.4368588824201974, "grad_norm": 170.8972625732422, "learning_rate": 6.993425225413123e-06, "loss": 20.7315, "step": 216260 }, { "epoch": 0.4368790830528812, "grad_norm": 142.85736083984375, "learning_rate": 6.993105096484303e-06, "loss": 32.1516, "step": 216270 }, { "epoch": 0.436899283685565, "grad_norm": 128.5509796142578, "learning_rate": 6.992784957841328e-06, "loss": 11.9072, "step": 216280 }, { "epoch": 0.43691948431824884, "grad_norm": 327.773193359375, "learning_rate": 6.9924648094857625e-06, "loss": 23.4574, "step": 216290 }, { "epoch": 0.43693968495093266, "grad_norm": 327.75848388671875, "learning_rate": 6.992144651419163e-06, "loss": 11.3785, "step": 216300 }, { "epoch": 0.4369598855836165, "grad_norm": 161.98997497558594, "learning_rate": 6.991824483643095e-06, "loss": 20.4968, "step": 216310 }, { "epoch": 0.4369800862163003, "grad_norm": 442.284423828125, "learning_rate": 6.991504306159115e-06, "loss": 10.4146, "step": 216320 }, { "epoch": 0.4370002868489841, "grad_norm": 244.4092559814453, "learning_rate": 6.991184118968783e-06, "loss": 20.7462, "step": 216330 }, { "epoch": 0.43702048748166794, "grad_norm": 0.0, "learning_rate": 6.990863922073664e-06, "loss": 15.3136, "step": 216340 }, { "epoch": 0.43704068811435176, "grad_norm": 487.59368896484375, "learning_rate": 6.990543715475314e-06, "loss": 21.1463, "step": 216350 }, { "epoch": 0.4370608887470356, "grad_norm": 315.670654296875, "learning_rate": 6.9902234991752945e-06, "loss": 14.6671, "step": 216360 }, { "epoch": 0.43708108937971935, "grad_norm": 662.347900390625, "learning_rate": 6.989903273175169e-06, "loss": 68.7131, "step": 216370 }, { "epoch": 0.43710129001240317, "grad_norm": 312.63812255859375, "learning_rate": 6.989583037476495e-06, "loss": 18.9875, "step": 216380 }, { "epoch": 0.437121490645087, "grad_norm": 1367.5877685546875, "learning_rate": 6.989262792080834e-06, "loss": 22.7746, "step": 216390 }, { "epoch": 0.4371416912777708, "grad_norm": 18.924123764038086, "learning_rate": 6.98894253698975e-06, "loss": 18.3445, "step": 216400 }, { "epoch": 0.4371618919104546, "grad_norm": 238.9813690185547, "learning_rate": 6.988622272204799e-06, "loss": 23.9051, "step": 216410 }, { "epoch": 0.43718209254313845, "grad_norm": 231.65159606933594, "learning_rate": 6.9883019977275446e-06, "loss": 20.7348, "step": 216420 }, { "epoch": 0.43720229317582227, "grad_norm": 356.8558349609375, "learning_rate": 6.987981713559548e-06, "loss": 19.909, "step": 216430 }, { "epoch": 0.4372224938085061, "grad_norm": 564.163330078125, "learning_rate": 6.98766141970237e-06, "loss": 12.3083, "step": 216440 }, { "epoch": 0.4372426944411899, "grad_norm": 188.4165802001953, "learning_rate": 6.987341116157571e-06, "loss": 15.2723, "step": 216450 }, { "epoch": 0.4372628950738737, "grad_norm": 297.6818542480469, "learning_rate": 6.987020802926711e-06, "loss": 20.2872, "step": 216460 }, { "epoch": 0.43728309570655755, "grad_norm": 532.7806396484375, "learning_rate": 6.986700480011353e-06, "loss": 13.1729, "step": 216470 }, { "epoch": 0.43730329633924137, "grad_norm": 70.1969985961914, "learning_rate": 6.986380147413059e-06, "loss": 18.947, "step": 216480 }, { "epoch": 0.4373234969719252, "grad_norm": 120.35257720947266, "learning_rate": 6.986059805133389e-06, "loss": 13.2831, "step": 216490 }, { "epoch": 0.43734369760460895, "grad_norm": 105.15730285644531, "learning_rate": 6.985739453173903e-06, "loss": 19.2324, "step": 216500 }, { "epoch": 0.43736389823729277, "grad_norm": 7.287929534912109, "learning_rate": 6.985419091536167e-06, "loss": 50.7144, "step": 216510 }, { "epoch": 0.4373840988699766, "grad_norm": 258.4720458984375, "learning_rate": 6.985098720221736e-06, "loss": 35.159, "step": 216520 }, { "epoch": 0.4374042995026604, "grad_norm": 632.69677734375, "learning_rate": 6.984778339232174e-06, "loss": 18.8524, "step": 216530 }, { "epoch": 0.43742450013534423, "grad_norm": 504.6958312988281, "learning_rate": 6.984457948569045e-06, "loss": 23.1049, "step": 216540 }, { "epoch": 0.43744470076802805, "grad_norm": 799.5957641601562, "learning_rate": 6.984137548233909e-06, "loss": 19.1404, "step": 216550 }, { "epoch": 0.43746490140071187, "grad_norm": 452.9576721191406, "learning_rate": 6.983817138228327e-06, "loss": 24.7578, "step": 216560 }, { "epoch": 0.4374851020333957, "grad_norm": 709.0180053710938, "learning_rate": 6.983496718553859e-06, "loss": 25.9065, "step": 216570 }, { "epoch": 0.4375053026660795, "grad_norm": 290.4211730957031, "learning_rate": 6.98317628921207e-06, "loss": 16.1081, "step": 216580 }, { "epoch": 0.43752550329876333, "grad_norm": 191.25637817382812, "learning_rate": 6.982855850204521e-06, "loss": 13.7262, "step": 216590 }, { "epoch": 0.43754570393144715, "grad_norm": 530.4448852539062, "learning_rate": 6.9825354015327715e-06, "loss": 41.3313, "step": 216600 }, { "epoch": 0.43756590456413097, "grad_norm": 155.168212890625, "learning_rate": 6.9822149431983865e-06, "loss": 20.3135, "step": 216610 }, { "epoch": 0.4375861051968148, "grad_norm": 409.7743835449219, "learning_rate": 6.981894475202924e-06, "loss": 19.4899, "step": 216620 }, { "epoch": 0.43760630582949855, "grad_norm": 627.4774169921875, "learning_rate": 6.981573997547951e-06, "loss": 22.509, "step": 216630 }, { "epoch": 0.4376265064621824, "grad_norm": 423.2906494140625, "learning_rate": 6.981253510235025e-06, "loss": 26.1528, "step": 216640 }, { "epoch": 0.4376467070948662, "grad_norm": 391.2195129394531, "learning_rate": 6.98093301326571e-06, "loss": 41.3183, "step": 216650 }, { "epoch": 0.43766690772755, "grad_norm": 507.5215759277344, "learning_rate": 6.980612506641567e-06, "loss": 14.901, "step": 216660 }, { "epoch": 0.43768710836023383, "grad_norm": 401.25927734375, "learning_rate": 6.9802919903641605e-06, "loss": 11.358, "step": 216670 }, { "epoch": 0.43770730899291765, "grad_norm": 378.6192626953125, "learning_rate": 6.9799714644350504e-06, "loss": 23.1479, "step": 216680 }, { "epoch": 0.4377275096256015, "grad_norm": 244.60025024414062, "learning_rate": 6.979650928855799e-06, "loss": 18.0241, "step": 216690 }, { "epoch": 0.4377477102582853, "grad_norm": 659.0491333007812, "learning_rate": 6.979330383627969e-06, "loss": 30.2683, "step": 216700 }, { "epoch": 0.4377679108909691, "grad_norm": 385.2704772949219, "learning_rate": 6.9790098287531225e-06, "loss": 8.4695, "step": 216710 }, { "epoch": 0.43778811152365293, "grad_norm": 0.0, "learning_rate": 6.978689264232824e-06, "loss": 28.6678, "step": 216720 }, { "epoch": 0.43780831215633675, "grad_norm": 408.3829345703125, "learning_rate": 6.978368690068632e-06, "loss": 13.1668, "step": 216730 }, { "epoch": 0.4378285127890206, "grad_norm": 107.53667449951172, "learning_rate": 6.9780481062621115e-06, "loss": 23.9724, "step": 216740 }, { "epoch": 0.4378487134217044, "grad_norm": 707.2608032226562, "learning_rate": 6.977727512814826e-06, "loss": 31.421, "step": 216750 }, { "epoch": 0.43786891405438816, "grad_norm": 634.0804443359375, "learning_rate": 6.977406909728335e-06, "loss": 22.6222, "step": 216760 }, { "epoch": 0.437889114687072, "grad_norm": 9.855452537536621, "learning_rate": 6.9770862970042015e-06, "loss": 11.3352, "step": 216770 }, { "epoch": 0.4379093153197558, "grad_norm": 424.2359924316406, "learning_rate": 6.97676567464399e-06, "loss": 17.6337, "step": 216780 }, { "epoch": 0.4379295159524396, "grad_norm": 414.2570495605469, "learning_rate": 6.976445042649265e-06, "loss": 13.9813, "step": 216790 }, { "epoch": 0.43794971658512344, "grad_norm": 434.4364318847656, "learning_rate": 6.976124401021583e-06, "loss": 54.4137, "step": 216800 }, { "epoch": 0.43796991721780726, "grad_norm": 276.7896423339844, "learning_rate": 6.975803749762512e-06, "loss": 16.4736, "step": 216810 }, { "epoch": 0.4379901178504911, "grad_norm": 531.3176879882812, "learning_rate": 6.975483088873613e-06, "loss": 29.0812, "step": 216820 }, { "epoch": 0.4380103184831749, "grad_norm": 423.0602111816406, "learning_rate": 6.975162418356448e-06, "loss": 36.8104, "step": 216830 }, { "epoch": 0.4380305191158587, "grad_norm": 3095.189453125, "learning_rate": 6.974841738212581e-06, "loss": 26.0828, "step": 216840 }, { "epoch": 0.43805071974854254, "grad_norm": 116.5901870727539, "learning_rate": 6.974521048443577e-06, "loss": 17.5627, "step": 216850 }, { "epoch": 0.43807092038122636, "grad_norm": 175.02687072753906, "learning_rate": 6.974200349050996e-06, "loss": 12.1302, "step": 216860 }, { "epoch": 0.4380911210139102, "grad_norm": 485.75958251953125, "learning_rate": 6.9738796400364005e-06, "loss": 17.7153, "step": 216870 }, { "epoch": 0.438111321646594, "grad_norm": 255.5109100341797, "learning_rate": 6.973558921401356e-06, "loss": 17.5615, "step": 216880 }, { "epoch": 0.43813152227927776, "grad_norm": 117.12784576416016, "learning_rate": 6.973238193147425e-06, "loss": 20.0242, "step": 216890 }, { "epoch": 0.4381517229119616, "grad_norm": 122.49821472167969, "learning_rate": 6.97291745527617e-06, "loss": 31.9352, "step": 216900 }, { "epoch": 0.4381719235446454, "grad_norm": 884.3511962890625, "learning_rate": 6.972596707789154e-06, "loss": 21.7809, "step": 216910 }, { "epoch": 0.4381921241773292, "grad_norm": 9.070160865783691, "learning_rate": 6.972275950687942e-06, "loss": 20.1085, "step": 216920 }, { "epoch": 0.43821232481001304, "grad_norm": 374.5420227050781, "learning_rate": 6.9719551839740964e-06, "loss": 13.6864, "step": 216930 }, { "epoch": 0.43823252544269686, "grad_norm": 780.5281982421875, "learning_rate": 6.971634407649179e-06, "loss": 25.9084, "step": 216940 }, { "epoch": 0.4382527260753807, "grad_norm": 31.735071182250977, "learning_rate": 6.971313621714756e-06, "loss": 23.8748, "step": 216950 }, { "epoch": 0.4382729267080645, "grad_norm": 383.90399169921875, "learning_rate": 6.970992826172389e-06, "loss": 44.4712, "step": 216960 }, { "epoch": 0.4382931273407483, "grad_norm": 389.1132507324219, "learning_rate": 6.970672021023641e-06, "loss": 12.3641, "step": 216970 }, { "epoch": 0.43831332797343214, "grad_norm": 405.3794860839844, "learning_rate": 6.970351206270079e-06, "loss": 30.3496, "step": 216980 }, { "epoch": 0.43833352860611596, "grad_norm": 328.0166015625, "learning_rate": 6.970030381913262e-06, "loss": 20.1953, "step": 216990 }, { "epoch": 0.4383537292387998, "grad_norm": 261.0073547363281, "learning_rate": 6.9697095479547564e-06, "loss": 17.1022, "step": 217000 }, { "epoch": 0.43837392987148355, "grad_norm": 274.3747863769531, "learning_rate": 6.969388704396126e-06, "loss": 20.3654, "step": 217010 }, { "epoch": 0.43839413050416737, "grad_norm": 481.7867126464844, "learning_rate": 6.969067851238933e-06, "loss": 14.2299, "step": 217020 }, { "epoch": 0.4384143311368512, "grad_norm": 18.3186092376709, "learning_rate": 6.968746988484742e-06, "loss": 20.9748, "step": 217030 }, { "epoch": 0.438434531769535, "grad_norm": 316.3203125, "learning_rate": 6.968426116135118e-06, "loss": 27.5381, "step": 217040 }, { "epoch": 0.4384547324022188, "grad_norm": 562.3090209960938, "learning_rate": 6.968105234191623e-06, "loss": 31.5549, "step": 217050 }, { "epoch": 0.43847493303490265, "grad_norm": 532.0624389648438, "learning_rate": 6.9677843426558235e-06, "loss": 23.4905, "step": 217060 }, { "epoch": 0.43849513366758647, "grad_norm": 282.7160339355469, "learning_rate": 6.967463441529278e-06, "loss": 22.0337, "step": 217070 }, { "epoch": 0.4385153343002703, "grad_norm": 542.9619140625, "learning_rate": 6.967142530813558e-06, "loss": 22.199, "step": 217080 }, { "epoch": 0.4385355349329541, "grad_norm": 66.30181884765625, "learning_rate": 6.966821610510222e-06, "loss": 19.1166, "step": 217090 }, { "epoch": 0.4385557355656379, "grad_norm": 444.0246887207031, "learning_rate": 6.966500680620837e-06, "loss": 31.164, "step": 217100 }, { "epoch": 0.43857593619832175, "grad_norm": 257.4731140136719, "learning_rate": 6.966179741146966e-06, "loss": 15.1363, "step": 217110 }, { "epoch": 0.43859613683100557, "grad_norm": 441.1191101074219, "learning_rate": 6.965858792090174e-06, "loss": 15.4256, "step": 217120 }, { "epoch": 0.4386163374636894, "grad_norm": 88.1796646118164, "learning_rate": 6.965537833452024e-06, "loss": 30.0568, "step": 217130 }, { "epoch": 0.43863653809637315, "grad_norm": 404.3753967285156, "learning_rate": 6.9652168652340804e-06, "loss": 23.0397, "step": 217140 }, { "epoch": 0.43865673872905697, "grad_norm": 726.0257568359375, "learning_rate": 6.9648958874379084e-06, "loss": 17.76, "step": 217150 }, { "epoch": 0.4386769393617408, "grad_norm": 195.17135620117188, "learning_rate": 6.964574900065072e-06, "loss": 16.7495, "step": 217160 }, { "epoch": 0.4386971399944246, "grad_norm": 626.55029296875, "learning_rate": 6.964253903117138e-06, "loss": 14.5461, "step": 217170 }, { "epoch": 0.43871734062710843, "grad_norm": 391.9192810058594, "learning_rate": 6.963932896595665e-06, "loss": 34.7739, "step": 217180 }, { "epoch": 0.43873754125979225, "grad_norm": 13.609729766845703, "learning_rate": 6.963611880502225e-06, "loss": 102.5089, "step": 217190 }, { "epoch": 0.43875774189247607, "grad_norm": 424.7698669433594, "learning_rate": 6.963290854838376e-06, "loss": 17.1916, "step": 217200 }, { "epoch": 0.4387779425251599, "grad_norm": 598.4286499023438, "learning_rate": 6.962969819605686e-06, "loss": 16.3738, "step": 217210 }, { "epoch": 0.4387981431578437, "grad_norm": 51.94447708129883, "learning_rate": 6.96264877480572e-06, "loss": 14.15, "step": 217220 }, { "epoch": 0.43881834379052753, "grad_norm": 5.548483371734619, "learning_rate": 6.96232772044004e-06, "loss": 22.1524, "step": 217230 }, { "epoch": 0.43883854442321135, "grad_norm": 883.7322998046875, "learning_rate": 6.962006656510216e-06, "loss": 24.5439, "step": 217240 }, { "epoch": 0.43885874505589517, "grad_norm": 505.0666809082031, "learning_rate": 6.961685583017808e-06, "loss": 13.2514, "step": 217250 }, { "epoch": 0.438878945688579, "grad_norm": 446.1742858886719, "learning_rate": 6.961364499964383e-06, "loss": 20.9679, "step": 217260 }, { "epoch": 0.43889914632126276, "grad_norm": 307.6526184082031, "learning_rate": 6.961043407351505e-06, "loss": 29.1635, "step": 217270 }, { "epoch": 0.4389193469539466, "grad_norm": 246.08106994628906, "learning_rate": 6.960722305180737e-06, "loss": 17.7958, "step": 217280 }, { "epoch": 0.4389395475866304, "grad_norm": 671.756591796875, "learning_rate": 6.9604011934536495e-06, "loss": 26.1312, "step": 217290 }, { "epoch": 0.4389597482193142, "grad_norm": 52.21709442138672, "learning_rate": 6.960080072171802e-06, "loss": 11.9219, "step": 217300 }, { "epoch": 0.43897994885199804, "grad_norm": 382.4366455078125, "learning_rate": 6.959758941336762e-06, "loss": 26.336, "step": 217310 }, { "epoch": 0.43900014948468186, "grad_norm": 495.8280334472656, "learning_rate": 6.959437800950097e-06, "loss": 25.7054, "step": 217320 }, { "epoch": 0.4390203501173657, "grad_norm": 1479.1221923828125, "learning_rate": 6.959116651013369e-06, "loss": 46.8976, "step": 217330 }, { "epoch": 0.4390405507500495, "grad_norm": 2274.858642578125, "learning_rate": 6.958795491528142e-06, "loss": 46.3309, "step": 217340 }, { "epoch": 0.4390607513827333, "grad_norm": 584.8311157226562, "learning_rate": 6.9584743224959846e-06, "loss": 17.9077, "step": 217350 }, { "epoch": 0.43908095201541714, "grad_norm": 705.3248901367188, "learning_rate": 6.958153143918462e-06, "loss": 14.4178, "step": 217360 }, { "epoch": 0.43910115264810096, "grad_norm": 208.1329803466797, "learning_rate": 6.957831955797137e-06, "loss": 14.8227, "step": 217370 }, { "epoch": 0.4391213532807848, "grad_norm": 682.8501586914062, "learning_rate": 6.957510758133579e-06, "loss": 27.366, "step": 217380 }, { "epoch": 0.4391415539134686, "grad_norm": 204.91993713378906, "learning_rate": 6.957189550929346e-06, "loss": 21.1082, "step": 217390 }, { "epoch": 0.43916175454615236, "grad_norm": 384.63140869140625, "learning_rate": 6.9568683341860135e-06, "loss": 15.1485, "step": 217400 }, { "epoch": 0.4391819551788362, "grad_norm": 491.8144836425781, "learning_rate": 6.9565471079051395e-06, "loss": 19.7581, "step": 217410 }, { "epoch": 0.43920215581152, "grad_norm": 814.9802856445312, "learning_rate": 6.956225872088292e-06, "loss": 11.183, "step": 217420 }, { "epoch": 0.4392223564442038, "grad_norm": 157.53225708007812, "learning_rate": 6.9559046267370375e-06, "loss": 18.0584, "step": 217430 }, { "epoch": 0.43924255707688764, "grad_norm": 151.10498046875, "learning_rate": 6.955583371852942e-06, "loss": 17.8701, "step": 217440 }, { "epoch": 0.43926275770957146, "grad_norm": 848.97607421875, "learning_rate": 6.95526210743757e-06, "loss": 26.1495, "step": 217450 }, { "epoch": 0.4392829583422553, "grad_norm": 218.88198852539062, "learning_rate": 6.954940833492487e-06, "loss": 12.4294, "step": 217460 }, { "epoch": 0.4393031589749391, "grad_norm": 418.4537048339844, "learning_rate": 6.954619550019259e-06, "loss": 22.6387, "step": 217470 }, { "epoch": 0.4393233596076229, "grad_norm": 504.738525390625, "learning_rate": 6.954298257019454e-06, "loss": 15.7061, "step": 217480 }, { "epoch": 0.43934356024030674, "grad_norm": 1237.7802734375, "learning_rate": 6.953976954494636e-06, "loss": 24.4484, "step": 217490 }, { "epoch": 0.43936376087299056, "grad_norm": 202.9380645751953, "learning_rate": 6.953655642446368e-06, "loss": 22.1801, "step": 217500 }, { "epoch": 0.4393839615056744, "grad_norm": 251.24620056152344, "learning_rate": 6.953334320876224e-06, "loss": 16.4192, "step": 217510 }, { "epoch": 0.4394041621383582, "grad_norm": 571.8933715820312, "learning_rate": 6.9530129897857626e-06, "loss": 17.6492, "step": 217520 }, { "epoch": 0.43942436277104197, "grad_norm": 291.2666015625, "learning_rate": 6.952691649176554e-06, "loss": 39.2445, "step": 217530 }, { "epoch": 0.4394445634037258, "grad_norm": 227.22714233398438, "learning_rate": 6.952370299050163e-06, "loss": 15.9309, "step": 217540 }, { "epoch": 0.4394647640364096, "grad_norm": 772.4575805664062, "learning_rate": 6.952048939408156e-06, "loss": 27.742, "step": 217550 }, { "epoch": 0.4394849646690934, "grad_norm": 367.7803649902344, "learning_rate": 6.9517275702521e-06, "loss": 13.9836, "step": 217560 }, { "epoch": 0.43950516530177725, "grad_norm": 1871.306396484375, "learning_rate": 6.9514061915835584e-06, "loss": 47.7978, "step": 217570 }, { "epoch": 0.43952536593446107, "grad_norm": 611.9116821289062, "learning_rate": 6.9510848034041e-06, "loss": 44.3175, "step": 217580 }, { "epoch": 0.4395455665671449, "grad_norm": 442.2686462402344, "learning_rate": 6.950763405715292e-06, "loss": 25.4758, "step": 217590 }, { "epoch": 0.4395657671998287, "grad_norm": 166.6293182373047, "learning_rate": 6.950441998518699e-06, "loss": 21.7418, "step": 217600 }, { "epoch": 0.4395859678325125, "grad_norm": 649.2433471679688, "learning_rate": 6.950120581815889e-06, "loss": 46.6833, "step": 217610 }, { "epoch": 0.43960616846519635, "grad_norm": 182.2321014404297, "learning_rate": 6.9497991556084275e-06, "loss": 15.3815, "step": 217620 }, { "epoch": 0.43962636909788017, "grad_norm": 702.5481567382812, "learning_rate": 6.9494777198978815e-06, "loss": 16.7604, "step": 217630 }, { "epoch": 0.439646569730564, "grad_norm": 124.78889465332031, "learning_rate": 6.949156274685818e-06, "loss": 15.8259, "step": 217640 }, { "epoch": 0.43966677036324775, "grad_norm": 327.0403137207031, "learning_rate": 6.948834819973803e-06, "loss": 19.646, "step": 217650 }, { "epoch": 0.43968697099593157, "grad_norm": 238.36106872558594, "learning_rate": 6.948513355763402e-06, "loss": 24.9178, "step": 217660 }, { "epoch": 0.4397071716286154, "grad_norm": 343.8514404296875, "learning_rate": 6.948191882056185e-06, "loss": 28.2836, "step": 217670 }, { "epoch": 0.4397273722612992, "grad_norm": 1091.2371826171875, "learning_rate": 6.947870398853716e-06, "loss": 37.9444, "step": 217680 }, { "epoch": 0.43974757289398303, "grad_norm": 5.781420707702637, "learning_rate": 6.947548906157563e-06, "loss": 18.4193, "step": 217690 }, { "epoch": 0.43976777352666685, "grad_norm": 294.8966064453125, "learning_rate": 6.947227403969293e-06, "loss": 17.5239, "step": 217700 }, { "epoch": 0.43978797415935067, "grad_norm": 380.2729187011719, "learning_rate": 6.946905892290473e-06, "loss": 30.6968, "step": 217710 }, { "epoch": 0.4398081747920345, "grad_norm": 121.35004425048828, "learning_rate": 6.946584371122671e-06, "loss": 8.2712, "step": 217720 }, { "epoch": 0.4398283754247183, "grad_norm": 403.33270263671875, "learning_rate": 6.94626284046745e-06, "loss": 17.1258, "step": 217730 }, { "epoch": 0.43984857605740213, "grad_norm": 856.6432495117188, "learning_rate": 6.945941300326382e-06, "loss": 26.1067, "step": 217740 }, { "epoch": 0.43986877669008595, "grad_norm": 500.9569091796875, "learning_rate": 6.9456197507010315e-06, "loss": 26.9629, "step": 217750 }, { "epoch": 0.43988897732276977, "grad_norm": 193.00595092773438, "learning_rate": 6.945298191592967e-06, "loss": 23.0274, "step": 217760 }, { "epoch": 0.4399091779554536, "grad_norm": 256.3966369628906, "learning_rate": 6.944976623003754e-06, "loss": 33.6679, "step": 217770 }, { "epoch": 0.43992937858813735, "grad_norm": 1096.0462646484375, "learning_rate": 6.944655044934962e-06, "loss": 18.0047, "step": 217780 }, { "epoch": 0.4399495792208212, "grad_norm": 347.51373291015625, "learning_rate": 6.944333457388156e-06, "loss": 21.4259, "step": 217790 }, { "epoch": 0.439969779853505, "grad_norm": 256.20904541015625, "learning_rate": 6.944011860364905e-06, "loss": 30.3539, "step": 217800 }, { "epoch": 0.4399899804861888, "grad_norm": 359.13592529296875, "learning_rate": 6.9436902538667775e-06, "loss": 20.0677, "step": 217810 }, { "epoch": 0.44001018111887263, "grad_norm": 349.65802001953125, "learning_rate": 6.943368637895338e-06, "loss": 27.8022, "step": 217820 }, { "epoch": 0.44003038175155645, "grad_norm": 624.5690307617188, "learning_rate": 6.943047012452156e-06, "loss": 12.4032, "step": 217830 }, { "epoch": 0.4400505823842403, "grad_norm": 460.8302917480469, "learning_rate": 6.942725377538797e-06, "loss": 28.531, "step": 217840 }, { "epoch": 0.4400707830169241, "grad_norm": 397.0054016113281, "learning_rate": 6.942403733156832e-06, "loss": 17.3211, "step": 217850 }, { "epoch": 0.4400909836496079, "grad_norm": 149.7487335205078, "learning_rate": 6.942082079307826e-06, "loss": 20.93, "step": 217860 }, { "epoch": 0.44011118428229173, "grad_norm": 281.08526611328125, "learning_rate": 6.941760415993346e-06, "loss": 19.6336, "step": 217870 }, { "epoch": 0.44013138491497555, "grad_norm": 406.5328674316406, "learning_rate": 6.941438743214963e-06, "loss": 16.7679, "step": 217880 }, { "epoch": 0.4401515855476594, "grad_norm": 326.6893005371094, "learning_rate": 6.941117060974243e-06, "loss": 23.4696, "step": 217890 }, { "epoch": 0.4401717861803432, "grad_norm": 362.5688781738281, "learning_rate": 6.940795369272754e-06, "loss": 25.0945, "step": 217900 }, { "epoch": 0.44019198681302696, "grad_norm": 204.62315368652344, "learning_rate": 6.940473668112063e-06, "loss": 25.728, "step": 217910 }, { "epoch": 0.4402121874457108, "grad_norm": 332.57855224609375, "learning_rate": 6.940151957493739e-06, "loss": 21.0858, "step": 217920 }, { "epoch": 0.4402323880783946, "grad_norm": 79.63465881347656, "learning_rate": 6.939830237419349e-06, "loss": 15.6943, "step": 217930 }, { "epoch": 0.4402525887110784, "grad_norm": 238.8327178955078, "learning_rate": 6.939508507890464e-06, "loss": 20.8919, "step": 217940 }, { "epoch": 0.44027278934376224, "grad_norm": 681.3981323242188, "learning_rate": 6.939186768908647e-06, "loss": 39.2945, "step": 217950 }, { "epoch": 0.44029298997644606, "grad_norm": 474.71160888671875, "learning_rate": 6.938865020475471e-06, "loss": 34.5285, "step": 217960 }, { "epoch": 0.4403131906091299, "grad_norm": 596.3753051757812, "learning_rate": 6.9385432625925006e-06, "loss": 20.1723, "step": 217970 }, { "epoch": 0.4403333912418137, "grad_norm": 409.4012145996094, "learning_rate": 6.938221495261306e-06, "loss": 19.905, "step": 217980 }, { "epoch": 0.4403535918744975, "grad_norm": 393.483154296875, "learning_rate": 6.937899718483456e-06, "loss": 17.1246, "step": 217990 }, { "epoch": 0.44037379250718134, "grad_norm": 278.825927734375, "learning_rate": 6.9375779322605154e-06, "loss": 22.2015, "step": 218000 }, { "epoch": 0.44039399313986516, "grad_norm": 643.759765625, "learning_rate": 6.937256136594057e-06, "loss": 20.2364, "step": 218010 }, { "epoch": 0.440414193772549, "grad_norm": 357.60296630859375, "learning_rate": 6.936934331485646e-06, "loss": 18.5588, "step": 218020 }, { "epoch": 0.4404343944052328, "grad_norm": 159.6121826171875, "learning_rate": 6.936612516936852e-06, "loss": 24.4171, "step": 218030 }, { "epoch": 0.44045459503791656, "grad_norm": 198.94183349609375, "learning_rate": 6.936290692949243e-06, "loss": 16.4759, "step": 218040 }, { "epoch": 0.4404747956706004, "grad_norm": 256.5325622558594, "learning_rate": 6.935968859524389e-06, "loss": 24.06, "step": 218050 }, { "epoch": 0.4404949963032842, "grad_norm": 238.43252563476562, "learning_rate": 6.935647016663859e-06, "loss": 34.2544, "step": 218060 }, { "epoch": 0.440515196935968, "grad_norm": 321.5218811035156, "learning_rate": 6.935325164369219e-06, "loss": 32.8428, "step": 218070 }, { "epoch": 0.44053539756865184, "grad_norm": 54.25544357299805, "learning_rate": 6.935003302642038e-06, "loss": 19.1141, "step": 218080 }, { "epoch": 0.44055559820133566, "grad_norm": 422.23712158203125, "learning_rate": 6.934681431483886e-06, "loss": 23.3319, "step": 218090 }, { "epoch": 0.4405757988340195, "grad_norm": 150.90109252929688, "learning_rate": 6.934359550896332e-06, "loss": 21.2584, "step": 218100 }, { "epoch": 0.4405959994667033, "grad_norm": 514.95751953125, "learning_rate": 6.934037660880942e-06, "loss": 21.9861, "step": 218110 }, { "epoch": 0.4406162000993871, "grad_norm": 735.240966796875, "learning_rate": 6.93371576143929e-06, "loss": 26.3322, "step": 218120 }, { "epoch": 0.44063640073207094, "grad_norm": 220.24258422851562, "learning_rate": 6.9333938525729396e-06, "loss": 17.7656, "step": 218130 }, { "epoch": 0.44065660136475476, "grad_norm": 145.22857666015625, "learning_rate": 6.9330719342834644e-06, "loss": 21.6354, "step": 218140 }, { "epoch": 0.4406768019974386, "grad_norm": 143.77865600585938, "learning_rate": 6.932750006572428e-06, "loss": 26.5324, "step": 218150 }, { "epoch": 0.4406970026301224, "grad_norm": 286.95574951171875, "learning_rate": 6.932428069441405e-06, "loss": 12.5363, "step": 218160 }, { "epoch": 0.44071720326280617, "grad_norm": 207.35633850097656, "learning_rate": 6.932106122891961e-06, "loss": 29.8767, "step": 218170 }, { "epoch": 0.44073740389549, "grad_norm": 323.6469421386719, "learning_rate": 6.931784166925667e-06, "loss": 23.2252, "step": 218180 }, { "epoch": 0.4407576045281738, "grad_norm": 99.18583679199219, "learning_rate": 6.93146220154409e-06, "loss": 12.3409, "step": 218190 }, { "epoch": 0.4407778051608576, "grad_norm": 283.7394104003906, "learning_rate": 6.9311402267488004e-06, "loss": 18.442, "step": 218200 }, { "epoch": 0.44079800579354145, "grad_norm": 216.76153564453125, "learning_rate": 6.930818242541368e-06, "loss": 18.5097, "step": 218210 }, { "epoch": 0.44081820642622527, "grad_norm": 311.0485534667969, "learning_rate": 6.9304962489233615e-06, "loss": 12.1743, "step": 218220 }, { "epoch": 0.4408384070589091, "grad_norm": 643.7454833984375, "learning_rate": 6.930174245896352e-06, "loss": 23.9982, "step": 218230 }, { "epoch": 0.4408586076915929, "grad_norm": 1482.6033935546875, "learning_rate": 6.929852233461906e-06, "loss": 52.5193, "step": 218240 }, { "epoch": 0.4408788083242767, "grad_norm": 505.75299072265625, "learning_rate": 6.929530211621593e-06, "loss": 29.9107, "step": 218250 }, { "epoch": 0.44089900895696055, "grad_norm": 690.9946899414062, "learning_rate": 6.9292081803769865e-06, "loss": 25.7389, "step": 218260 }, { "epoch": 0.44091920958964437, "grad_norm": 690.6004638671875, "learning_rate": 6.928886139729652e-06, "loss": 20.0891, "step": 218270 }, { "epoch": 0.4409394102223282, "grad_norm": 255.3621826171875, "learning_rate": 6.92856408968116e-06, "loss": 19.1689, "step": 218280 }, { "epoch": 0.44095961085501195, "grad_norm": 188.1383514404297, "learning_rate": 6.92824203023308e-06, "loss": 11.7769, "step": 218290 }, { "epoch": 0.44097981148769577, "grad_norm": 297.4443359375, "learning_rate": 6.927919961386984e-06, "loss": 23.7473, "step": 218300 }, { "epoch": 0.4410000121203796, "grad_norm": 460.88055419921875, "learning_rate": 6.927597883144439e-06, "loss": 15.4438, "step": 218310 }, { "epoch": 0.4410202127530634, "grad_norm": 400.71966552734375, "learning_rate": 6.9272757955070146e-06, "loss": 27.2158, "step": 218320 }, { "epoch": 0.44104041338574723, "grad_norm": 285.7626953125, "learning_rate": 6.926953698476284e-06, "loss": 15.3149, "step": 218330 }, { "epoch": 0.44106061401843105, "grad_norm": 693.8849487304688, "learning_rate": 6.926631592053812e-06, "loss": 25.8479, "step": 218340 }, { "epoch": 0.44108081465111487, "grad_norm": 493.76361083984375, "learning_rate": 6.926309476241174e-06, "loss": 10.8633, "step": 218350 }, { "epoch": 0.4411010152837987, "grad_norm": 228.04771423339844, "learning_rate": 6.925987351039936e-06, "loss": 21.5013, "step": 218360 }, { "epoch": 0.4411212159164825, "grad_norm": 432.6797790527344, "learning_rate": 6.925665216451669e-06, "loss": 12.5181, "step": 218370 }, { "epoch": 0.44114141654916633, "grad_norm": 462.1549377441406, "learning_rate": 6.925343072477943e-06, "loss": 18.1943, "step": 218380 }, { "epoch": 0.44116161718185015, "grad_norm": 445.35955810546875, "learning_rate": 6.925020919120331e-06, "loss": 14.6549, "step": 218390 }, { "epoch": 0.44118181781453397, "grad_norm": 518.8099365234375, "learning_rate": 6.924698756380398e-06, "loss": 23.1013, "step": 218400 }, { "epoch": 0.4412020184472178, "grad_norm": 496.41107177734375, "learning_rate": 6.924376584259718e-06, "loss": 13.32, "step": 218410 }, { "epoch": 0.44122221907990156, "grad_norm": 1270.318359375, "learning_rate": 6.924054402759858e-06, "loss": 24.7812, "step": 218420 }, { "epoch": 0.4412424197125854, "grad_norm": 10.630510330200195, "learning_rate": 6.923732211882391e-06, "loss": 19.0398, "step": 218430 }, { "epoch": 0.4412626203452692, "grad_norm": 528.0415649414062, "learning_rate": 6.9234100116288886e-06, "loss": 19.2299, "step": 218440 }, { "epoch": 0.441282820977953, "grad_norm": 986.3802490234375, "learning_rate": 6.923087802000916e-06, "loss": 23.4119, "step": 218450 }, { "epoch": 0.44130302161063684, "grad_norm": 0.0, "learning_rate": 6.9227655830000485e-06, "loss": 38.5934, "step": 218460 }, { "epoch": 0.44132322224332066, "grad_norm": 287.9686279296875, "learning_rate": 6.922443354627855e-06, "loss": 20.5077, "step": 218470 }, { "epoch": 0.4413434228760045, "grad_norm": 509.0263671875, "learning_rate": 6.922121116885905e-06, "loss": 18.8017, "step": 218480 }, { "epoch": 0.4413636235086883, "grad_norm": 413.716552734375, "learning_rate": 6.921798869775769e-06, "loss": 15.485, "step": 218490 }, { "epoch": 0.4413838241413721, "grad_norm": 306.12896728515625, "learning_rate": 6.921476613299018e-06, "loss": 14.0145, "step": 218500 }, { "epoch": 0.44140402477405594, "grad_norm": 273.29681396484375, "learning_rate": 6.921154347457226e-06, "loss": 19.0963, "step": 218510 }, { "epoch": 0.44142422540673976, "grad_norm": 433.2372131347656, "learning_rate": 6.9208320722519594e-06, "loss": 23.0324, "step": 218520 }, { "epoch": 0.4414444260394236, "grad_norm": 486.3014221191406, "learning_rate": 6.920509787684789e-06, "loss": 18.287, "step": 218530 }, { "epoch": 0.4414646266721074, "grad_norm": 655.7376098632812, "learning_rate": 6.920187493757288e-06, "loss": 21.3021, "step": 218540 }, { "epoch": 0.44148482730479116, "grad_norm": 1174.218505859375, "learning_rate": 6.919865190471027e-06, "loss": 36.7262, "step": 218550 }, { "epoch": 0.441505027937475, "grad_norm": 222.35841369628906, "learning_rate": 6.919542877827573e-06, "loss": 14.9355, "step": 218560 }, { "epoch": 0.4415252285701588, "grad_norm": 563.6325073242188, "learning_rate": 6.919220555828502e-06, "loss": 24.8963, "step": 218570 }, { "epoch": 0.4415454292028426, "grad_norm": 188.27462768554688, "learning_rate": 6.918898224475382e-06, "loss": 26.0509, "step": 218580 }, { "epoch": 0.44156562983552644, "grad_norm": 415.9721984863281, "learning_rate": 6.918575883769784e-06, "loss": 17.2536, "step": 218590 }, { "epoch": 0.44158583046821026, "grad_norm": 365.4659423828125, "learning_rate": 6.9182535337132824e-06, "loss": 17.1436, "step": 218600 }, { "epoch": 0.4416060311008941, "grad_norm": 134.54942321777344, "learning_rate": 6.917931174307444e-06, "loss": 17.5852, "step": 218610 }, { "epoch": 0.4416262317335779, "grad_norm": 482.1897277832031, "learning_rate": 6.917608805553843e-06, "loss": 19.3952, "step": 218620 }, { "epoch": 0.4416464323662617, "grad_norm": 648.8010864257812, "learning_rate": 6.917286427454048e-06, "loss": 21.4208, "step": 218630 }, { "epoch": 0.44166663299894554, "grad_norm": 309.67236328125, "learning_rate": 6.916964040009631e-06, "loss": 20.092, "step": 218640 }, { "epoch": 0.44168683363162936, "grad_norm": 170.3636474609375, "learning_rate": 6.9166416432221636e-06, "loss": 20.4238, "step": 218650 }, { "epoch": 0.4417070342643132, "grad_norm": 241.44003295898438, "learning_rate": 6.916319237093219e-06, "loss": 25.8699, "step": 218660 }, { "epoch": 0.441727234896997, "grad_norm": 174.6161346435547, "learning_rate": 6.915996821624366e-06, "loss": 28.2445, "step": 218670 }, { "epoch": 0.44174743552968077, "grad_norm": 156.77670288085938, "learning_rate": 6.915674396817177e-06, "loss": 23.5601, "step": 218680 }, { "epoch": 0.4417676361623646, "grad_norm": 440.65863037109375, "learning_rate": 6.9153519626732225e-06, "loss": 28.8264, "step": 218690 }, { "epoch": 0.4417878367950484, "grad_norm": 316.6114501953125, "learning_rate": 6.915029519194076e-06, "loss": 17.6017, "step": 218700 }, { "epoch": 0.4418080374277322, "grad_norm": 210.33470153808594, "learning_rate": 6.914707066381308e-06, "loss": 21.3246, "step": 218710 }, { "epoch": 0.44182823806041605, "grad_norm": 116.99150848388672, "learning_rate": 6.914384604236488e-06, "loss": 14.9222, "step": 218720 }, { "epoch": 0.44184843869309987, "grad_norm": 176.82261657714844, "learning_rate": 6.914062132761192e-06, "loss": 30.5506, "step": 218730 }, { "epoch": 0.4418686393257837, "grad_norm": 772.8134765625, "learning_rate": 6.913739651956989e-06, "loss": 16.5471, "step": 218740 }, { "epoch": 0.4418888399584675, "grad_norm": 251.96591186523438, "learning_rate": 6.913417161825449e-06, "loss": 10.0527, "step": 218750 }, { "epoch": 0.4419090405911513, "grad_norm": 480.7281799316406, "learning_rate": 6.913094662368147e-06, "loss": 28.3984, "step": 218760 }, { "epoch": 0.44192924122383515, "grad_norm": 144.40867614746094, "learning_rate": 6.912772153586654e-06, "loss": 12.8837, "step": 218770 }, { "epoch": 0.44194944185651897, "grad_norm": 181.55413818359375, "learning_rate": 6.9124496354825435e-06, "loss": 17.7858, "step": 218780 }, { "epoch": 0.4419696424892028, "grad_norm": 337.3177185058594, "learning_rate": 6.912127108057381e-06, "loss": 20.0702, "step": 218790 }, { "epoch": 0.44198984312188655, "grad_norm": 575.0049438476562, "learning_rate": 6.911804571312746e-06, "loss": 45.3986, "step": 218800 }, { "epoch": 0.44201004375457037, "grad_norm": 318.1145935058594, "learning_rate": 6.911482025250207e-06, "loss": 18.0525, "step": 218810 }, { "epoch": 0.4420302443872542, "grad_norm": 183.11764526367188, "learning_rate": 6.911159469871335e-06, "loss": 22.6033, "step": 218820 }, { "epoch": 0.442050445019938, "grad_norm": 275.8320007324219, "learning_rate": 6.9108369051777045e-06, "loss": 28.4958, "step": 218830 }, { "epoch": 0.44207064565262183, "grad_norm": 471.90777587890625, "learning_rate": 6.910514331170888e-06, "loss": 17.0451, "step": 218840 }, { "epoch": 0.44209084628530565, "grad_norm": 406.13275146484375, "learning_rate": 6.910191747852455e-06, "loss": 33.3191, "step": 218850 }, { "epoch": 0.44211104691798947, "grad_norm": 95.6614990234375, "learning_rate": 6.909869155223978e-06, "loss": 20.2539, "step": 218860 }, { "epoch": 0.4421312475506733, "grad_norm": 217.68885803222656, "learning_rate": 6.909546553287032e-06, "loss": 33.5004, "step": 218870 }, { "epoch": 0.4421514481833571, "grad_norm": 498.65191650390625, "learning_rate": 6.909223942043187e-06, "loss": 17.5515, "step": 218880 }, { "epoch": 0.44217164881604093, "grad_norm": 545.9909057617188, "learning_rate": 6.908901321494017e-06, "loss": 25.4967, "step": 218890 }, { "epoch": 0.44219184944872475, "grad_norm": 597.2545166015625, "learning_rate": 6.908578691641092e-06, "loss": 16.2348, "step": 218900 }, { "epoch": 0.44221205008140857, "grad_norm": 441.7108459472656, "learning_rate": 6.9082560524859875e-06, "loss": 17.0352, "step": 218910 }, { "epoch": 0.4422322507140924, "grad_norm": 46.86520767211914, "learning_rate": 6.907933404030274e-06, "loss": 11.285, "step": 218920 }, { "epoch": 0.44225245134677615, "grad_norm": 321.8306579589844, "learning_rate": 6.907610746275524e-06, "loss": 16.6901, "step": 218930 }, { "epoch": 0.44227265197946, "grad_norm": 546.273193359375, "learning_rate": 6.907288079223311e-06, "loss": 24.1094, "step": 218940 }, { "epoch": 0.4422928526121438, "grad_norm": 152.28660583496094, "learning_rate": 6.906965402875207e-06, "loss": 13.4603, "step": 218950 }, { "epoch": 0.4423130532448276, "grad_norm": 220.2197723388672, "learning_rate": 6.906642717232786e-06, "loss": 9.6003, "step": 218960 }, { "epoch": 0.44233325387751143, "grad_norm": 496.92596435546875, "learning_rate": 6.906320022297618e-06, "loss": 17.6962, "step": 218970 }, { "epoch": 0.44235345451019525, "grad_norm": 561.6021728515625, "learning_rate": 6.905997318071278e-06, "loss": 33.4298, "step": 218980 }, { "epoch": 0.4423736551428791, "grad_norm": 459.9938049316406, "learning_rate": 6.905674604555337e-06, "loss": 13.3946, "step": 218990 }, { "epoch": 0.4423938557755629, "grad_norm": 73.86894989013672, "learning_rate": 6.905351881751372e-06, "loss": 29.4571, "step": 219000 }, { "epoch": 0.4424140564082467, "grad_norm": 339.7744445800781, "learning_rate": 6.905029149660951e-06, "loss": 14.2523, "step": 219010 }, { "epoch": 0.44243425704093053, "grad_norm": 366.2799987792969, "learning_rate": 6.904706408285649e-06, "loss": 22.1335, "step": 219020 }, { "epoch": 0.44245445767361435, "grad_norm": 750.3668823242188, "learning_rate": 6.90438365762704e-06, "loss": 16.5124, "step": 219030 }, { "epoch": 0.4424746583062982, "grad_norm": 103.09490203857422, "learning_rate": 6.904060897686695e-06, "loss": 14.9539, "step": 219040 }, { "epoch": 0.442494858938982, "grad_norm": 1246.744873046875, "learning_rate": 6.903738128466189e-06, "loss": 31.4349, "step": 219050 }, { "epoch": 0.44251505957166576, "grad_norm": 225.55918884277344, "learning_rate": 6.903415349967092e-06, "loss": 19.1666, "step": 219060 }, { "epoch": 0.4425352602043496, "grad_norm": 290.3597106933594, "learning_rate": 6.903092562190983e-06, "loss": 17.0541, "step": 219070 }, { "epoch": 0.4425554608370334, "grad_norm": 400.8343200683594, "learning_rate": 6.902769765139429e-06, "loss": 19.3249, "step": 219080 }, { "epoch": 0.4425756614697172, "grad_norm": 175.66539001464844, "learning_rate": 6.902446958814007e-06, "loss": 14.8605, "step": 219090 }, { "epoch": 0.44259586210240104, "grad_norm": 979.9549560546875, "learning_rate": 6.9021241432162886e-06, "loss": 24.2692, "step": 219100 }, { "epoch": 0.44261606273508486, "grad_norm": 302.62091064453125, "learning_rate": 6.901801318347848e-06, "loss": 27.2775, "step": 219110 }, { "epoch": 0.4426362633677687, "grad_norm": 633.2466430664062, "learning_rate": 6.90147848421026e-06, "loss": 50.8793, "step": 219120 }, { "epoch": 0.4426564640004525, "grad_norm": 130.0436553955078, "learning_rate": 6.901155640805095e-06, "loss": 26.6634, "step": 219130 }, { "epoch": 0.4426766646331363, "grad_norm": 346.499755859375, "learning_rate": 6.900832788133928e-06, "loss": 17.779, "step": 219140 }, { "epoch": 0.44269686526582014, "grad_norm": 549.9331665039062, "learning_rate": 6.900509926198332e-06, "loss": 42.8939, "step": 219150 }, { "epoch": 0.44271706589850396, "grad_norm": 501.3892517089844, "learning_rate": 6.900187054999883e-06, "loss": 24.4063, "step": 219160 }, { "epoch": 0.4427372665311878, "grad_norm": 452.5372314453125, "learning_rate": 6.899864174540151e-06, "loss": 16.4124, "step": 219170 }, { "epoch": 0.4427574671638716, "grad_norm": 458.61090087890625, "learning_rate": 6.899541284820712e-06, "loss": 24.1362, "step": 219180 }, { "epoch": 0.44277766779655536, "grad_norm": 415.2669982910156, "learning_rate": 6.899218385843139e-06, "loss": 11.8202, "step": 219190 }, { "epoch": 0.4427978684292392, "grad_norm": 659.0968017578125, "learning_rate": 6.898895477609007e-06, "loss": 26.5903, "step": 219200 }, { "epoch": 0.442818069061923, "grad_norm": 1159.6007080078125, "learning_rate": 6.898572560119888e-06, "loss": 44.2738, "step": 219210 }, { "epoch": 0.4428382696946068, "grad_norm": 209.72499084472656, "learning_rate": 6.898249633377355e-06, "loss": 15.2883, "step": 219220 }, { "epoch": 0.44285847032729064, "grad_norm": 231.04542541503906, "learning_rate": 6.897926697382986e-06, "loss": 16.2038, "step": 219230 }, { "epoch": 0.44287867095997446, "grad_norm": 271.1341857910156, "learning_rate": 6.897603752138351e-06, "loss": 27.1958, "step": 219240 }, { "epoch": 0.4428988715926583, "grad_norm": 824.4793701171875, "learning_rate": 6.897280797645026e-06, "loss": 23.8028, "step": 219250 }, { "epoch": 0.4429190722253421, "grad_norm": 307.6236877441406, "learning_rate": 6.8969578339045855e-06, "loss": 23.5481, "step": 219260 }, { "epoch": 0.4429392728580259, "grad_norm": 413.1080627441406, "learning_rate": 6.8966348609186005e-06, "loss": 22.1092, "step": 219270 }, { "epoch": 0.44295947349070974, "grad_norm": 480.2059020996094, "learning_rate": 6.896311878688648e-06, "loss": 25.5122, "step": 219280 }, { "epoch": 0.44297967412339356, "grad_norm": 254.49496459960938, "learning_rate": 6.895988887216303e-06, "loss": 13.311, "step": 219290 }, { "epoch": 0.4429998747560774, "grad_norm": 784.9171752929688, "learning_rate": 6.895665886503136e-06, "loss": 23.6449, "step": 219300 }, { "epoch": 0.4430200753887612, "grad_norm": 355.4720764160156, "learning_rate": 6.895342876550724e-06, "loss": 9.4687, "step": 219310 }, { "epoch": 0.44304027602144497, "grad_norm": 431.9592590332031, "learning_rate": 6.895019857360641e-06, "loss": 36.8282, "step": 219320 }, { "epoch": 0.4430604766541288, "grad_norm": 108.13883209228516, "learning_rate": 6.8946968289344605e-06, "loss": 11.7617, "step": 219330 }, { "epoch": 0.4430806772868126, "grad_norm": 415.89080810546875, "learning_rate": 6.894373791273758e-06, "loss": 18.5024, "step": 219340 }, { "epoch": 0.4431008779194964, "grad_norm": 3.8527114391326904, "learning_rate": 6.8940507443801076e-06, "loss": 15.8009, "step": 219350 }, { "epoch": 0.44312107855218025, "grad_norm": 230.16094970703125, "learning_rate": 6.893727688255083e-06, "loss": 18.3914, "step": 219360 }, { "epoch": 0.44314127918486407, "grad_norm": 476.1678161621094, "learning_rate": 6.8934046229002605e-06, "loss": 11.7016, "step": 219370 }, { "epoch": 0.4431614798175479, "grad_norm": 706.7972412109375, "learning_rate": 6.893081548317212e-06, "loss": 26.295, "step": 219380 }, { "epoch": 0.4431816804502317, "grad_norm": 592.8897705078125, "learning_rate": 6.8927584645075154e-06, "loss": 10.9637, "step": 219390 }, { "epoch": 0.4432018810829155, "grad_norm": 326.7542724609375, "learning_rate": 6.892435371472741e-06, "loss": 21.9441, "step": 219400 }, { "epoch": 0.44322208171559935, "grad_norm": 340.3212585449219, "learning_rate": 6.892112269214468e-06, "loss": 41.4171, "step": 219410 }, { "epoch": 0.44324228234828317, "grad_norm": 194.18162536621094, "learning_rate": 6.8917891577342685e-06, "loss": 17.9069, "step": 219420 }, { "epoch": 0.443262482980967, "grad_norm": 225.82171630859375, "learning_rate": 6.891466037033718e-06, "loss": 41.562, "step": 219430 }, { "epoch": 0.44328268361365075, "grad_norm": 573.9820556640625, "learning_rate": 6.891142907114392e-06, "loss": 23.5243, "step": 219440 }, { "epoch": 0.44330288424633457, "grad_norm": 1166.9981689453125, "learning_rate": 6.890819767977865e-06, "loss": 24.1516, "step": 219450 }, { "epoch": 0.4433230848790184, "grad_norm": 253.97682189941406, "learning_rate": 6.890496619625713e-06, "loss": 23.7371, "step": 219460 }, { "epoch": 0.4433432855117022, "grad_norm": 495.72625732421875, "learning_rate": 6.890173462059508e-06, "loss": 23.6401, "step": 219470 }, { "epoch": 0.44336348614438603, "grad_norm": 618.368408203125, "learning_rate": 6.889850295280827e-06, "loss": 10.3683, "step": 219480 }, { "epoch": 0.44338368677706985, "grad_norm": 560.4053955078125, "learning_rate": 6.8895271192912435e-06, "loss": 18.8768, "step": 219490 }, { "epoch": 0.44340388740975367, "grad_norm": 249.0582733154297, "learning_rate": 6.889203934092337e-06, "loss": 25.7241, "step": 219500 }, { "epoch": 0.4434240880424375, "grad_norm": 222.59014892578125, "learning_rate": 6.888880739685677e-06, "loss": 12.0046, "step": 219510 }, { "epoch": 0.4434442886751213, "grad_norm": 619.3270874023438, "learning_rate": 6.888557536072843e-06, "loss": 21.5745, "step": 219520 }, { "epoch": 0.44346448930780513, "grad_norm": 507.25189208984375, "learning_rate": 6.888234323255408e-06, "loss": 29.5272, "step": 219530 }, { "epoch": 0.44348468994048895, "grad_norm": 303.3486022949219, "learning_rate": 6.8879111012349475e-06, "loss": 21.201, "step": 219540 }, { "epoch": 0.44350489057317277, "grad_norm": 186.70721435546875, "learning_rate": 6.887587870013039e-06, "loss": 14.6315, "step": 219550 }, { "epoch": 0.4435250912058566, "grad_norm": 179.59512329101562, "learning_rate": 6.887264629591254e-06, "loss": 23.6044, "step": 219560 }, { "epoch": 0.44354529183854036, "grad_norm": 166.09658813476562, "learning_rate": 6.886941379971172e-06, "loss": 25.149, "step": 219570 }, { "epoch": 0.4435654924712242, "grad_norm": 434.411865234375, "learning_rate": 6.886618121154364e-06, "loss": 19.2683, "step": 219580 }, { "epoch": 0.443585693103908, "grad_norm": 0.0, "learning_rate": 6.88629485314241e-06, "loss": 24.9773, "step": 219590 }, { "epoch": 0.4436058937365918, "grad_norm": 530.9507446289062, "learning_rate": 6.885971575936884e-06, "loss": 22.3455, "step": 219600 }, { "epoch": 0.44362609436927564, "grad_norm": 517.7140502929688, "learning_rate": 6.885648289539362e-06, "loss": 13.0993, "step": 219610 }, { "epoch": 0.44364629500195946, "grad_norm": 580.7538452148438, "learning_rate": 6.8853249939514165e-06, "loss": 17.3157, "step": 219620 }, { "epoch": 0.4436664956346433, "grad_norm": 0.0, "learning_rate": 6.885001689174627e-06, "loss": 31.4602, "step": 219630 }, { "epoch": 0.4436866962673271, "grad_norm": 295.22637939453125, "learning_rate": 6.884678375210568e-06, "loss": 13.4412, "step": 219640 }, { "epoch": 0.4437068969000109, "grad_norm": 632.1878051757812, "learning_rate": 6.884355052060814e-06, "loss": 23.5613, "step": 219650 }, { "epoch": 0.44372709753269474, "grad_norm": 657.80029296875, "learning_rate": 6.884031719726943e-06, "loss": 32.8196, "step": 219660 }, { "epoch": 0.44374729816537856, "grad_norm": 558.5362548828125, "learning_rate": 6.8837083782105296e-06, "loss": 15.4952, "step": 219670 }, { "epoch": 0.4437674987980624, "grad_norm": 266.2539367675781, "learning_rate": 6.883385027513151e-06, "loss": 29.817, "step": 219680 }, { "epoch": 0.4437876994307462, "grad_norm": 463.96905517578125, "learning_rate": 6.88306166763638e-06, "loss": 14.0948, "step": 219690 }, { "epoch": 0.44380790006342996, "grad_norm": 404.00384521484375, "learning_rate": 6.882738298581797e-06, "loss": 26.9113, "step": 219700 }, { "epoch": 0.4438281006961138, "grad_norm": 637.414794921875, "learning_rate": 6.882414920350975e-06, "loss": 22.0788, "step": 219710 }, { "epoch": 0.4438483013287976, "grad_norm": 785.491455078125, "learning_rate": 6.882091532945491e-06, "loss": 22.8759, "step": 219720 }, { "epoch": 0.4438685019614814, "grad_norm": 506.03900146484375, "learning_rate": 6.881768136366922e-06, "loss": 19.3762, "step": 219730 }, { "epoch": 0.44388870259416524, "grad_norm": 1084.349853515625, "learning_rate": 6.881444730616842e-06, "loss": 19.5856, "step": 219740 }, { "epoch": 0.44390890322684906, "grad_norm": 337.2403869628906, "learning_rate": 6.881121315696828e-06, "loss": 28.647, "step": 219750 }, { "epoch": 0.4439291038595329, "grad_norm": 615.3025512695312, "learning_rate": 6.880797891608458e-06, "loss": 25.024, "step": 219760 }, { "epoch": 0.4439493044922167, "grad_norm": 195.57460021972656, "learning_rate": 6.880474458353309e-06, "loss": 22.0026, "step": 219770 }, { "epoch": 0.4439695051249005, "grad_norm": 328.4720764160156, "learning_rate": 6.880151015932952e-06, "loss": 18.3296, "step": 219780 }, { "epoch": 0.44398970575758434, "grad_norm": 192.4298553466797, "learning_rate": 6.87982756434897e-06, "loss": 10.177, "step": 219790 }, { "epoch": 0.44400990639026816, "grad_norm": 484.88909912109375, "learning_rate": 6.879504103602934e-06, "loss": 19.0882, "step": 219800 }, { "epoch": 0.444030107022952, "grad_norm": 285.41278076171875, "learning_rate": 6.879180633696425e-06, "loss": 8.9307, "step": 219810 }, { "epoch": 0.4440503076556358, "grad_norm": 945.0198974609375, "learning_rate": 6.878857154631016e-06, "loss": 46.455, "step": 219820 }, { "epoch": 0.44407050828831957, "grad_norm": 355.7682800292969, "learning_rate": 6.878533666408286e-06, "loss": 13.8178, "step": 219830 }, { "epoch": 0.4440907089210034, "grad_norm": 1002.5101318359375, "learning_rate": 6.878210169029811e-06, "loss": 29.3335, "step": 219840 }, { "epoch": 0.4441109095536872, "grad_norm": 97.81233978271484, "learning_rate": 6.877886662497165e-06, "loss": 19.1359, "step": 219850 }, { "epoch": 0.444131110186371, "grad_norm": 103.65803527832031, "learning_rate": 6.877563146811931e-06, "loss": 24.5209, "step": 219860 }, { "epoch": 0.44415131081905485, "grad_norm": 601.9344482421875, "learning_rate": 6.87723962197568e-06, "loss": 12.1354, "step": 219870 }, { "epoch": 0.44417151145173867, "grad_norm": 181.5896759033203, "learning_rate": 6.87691608798999e-06, "loss": 32.641, "step": 219880 }, { "epoch": 0.4441917120844225, "grad_norm": 541.8621215820312, "learning_rate": 6.87659254485644e-06, "loss": 21.6538, "step": 219890 }, { "epoch": 0.4442119127171063, "grad_norm": 790.8692016601562, "learning_rate": 6.876268992576605e-06, "loss": 18.3269, "step": 219900 }, { "epoch": 0.4442321133497901, "grad_norm": 397.9839172363281, "learning_rate": 6.875945431152063e-06, "loss": 24.26, "step": 219910 }, { "epoch": 0.44425231398247395, "grad_norm": 789.2221069335938, "learning_rate": 6.875621860584389e-06, "loss": 19.794, "step": 219920 }, { "epoch": 0.44427251461515777, "grad_norm": 249.17762756347656, "learning_rate": 6.875298280875163e-06, "loss": 27.8191, "step": 219930 }, { "epoch": 0.4442927152478416, "grad_norm": 268.7455749511719, "learning_rate": 6.874974692025959e-06, "loss": 27.2405, "step": 219940 }, { "epoch": 0.4443129158805254, "grad_norm": 305.5923156738281, "learning_rate": 6.874651094038358e-06, "loss": 11.3509, "step": 219950 }, { "epoch": 0.44433311651320917, "grad_norm": 430.91143798828125, "learning_rate": 6.874327486913933e-06, "loss": 24.2091, "step": 219960 }, { "epoch": 0.444353317145893, "grad_norm": 457.19342041015625, "learning_rate": 6.874003870654265e-06, "loss": 16.9818, "step": 219970 }, { "epoch": 0.4443735177785768, "grad_norm": 1346.923583984375, "learning_rate": 6.873680245260929e-06, "loss": 29.6735, "step": 219980 }, { "epoch": 0.44439371841126063, "grad_norm": 722.2505493164062, "learning_rate": 6.8733566107355e-06, "loss": 15.069, "step": 219990 }, { "epoch": 0.44441391904394445, "grad_norm": 642.849853515625, "learning_rate": 6.873032967079562e-06, "loss": 10.8828, "step": 220000 }, { "epoch": 0.44443411967662827, "grad_norm": 240.14439392089844, "learning_rate": 6.872709314294685e-06, "loss": 22.4157, "step": 220010 }, { "epoch": 0.4444543203093121, "grad_norm": 381.982421875, "learning_rate": 6.872385652382452e-06, "loss": 21.5925, "step": 220020 }, { "epoch": 0.4444745209419959, "grad_norm": 540.565673828125, "learning_rate": 6.872061981344438e-06, "loss": 18.8366, "step": 220030 }, { "epoch": 0.44449472157467973, "grad_norm": 931.19921875, "learning_rate": 6.871738301182221e-06, "loss": 26.1174, "step": 220040 }, { "epoch": 0.44451492220736355, "grad_norm": 309.64837646484375, "learning_rate": 6.87141461189738e-06, "loss": 33.8266, "step": 220050 }, { "epoch": 0.44453512284004737, "grad_norm": 339.34674072265625, "learning_rate": 6.87109091349149e-06, "loss": 17.4143, "step": 220060 }, { "epoch": 0.4445553234727312, "grad_norm": 538.2361450195312, "learning_rate": 6.870767205966128e-06, "loss": 28.7001, "step": 220070 }, { "epoch": 0.44457552410541495, "grad_norm": 506.4960021972656, "learning_rate": 6.870443489322875e-06, "loss": 20.3924, "step": 220080 }, { "epoch": 0.4445957247380988, "grad_norm": 381.004150390625, "learning_rate": 6.870119763563308e-06, "loss": 24.2316, "step": 220090 }, { "epoch": 0.4446159253707826, "grad_norm": 149.9341278076172, "learning_rate": 6.869796028689002e-06, "loss": 10.0959, "step": 220100 }, { "epoch": 0.4446361260034664, "grad_norm": 496.5892028808594, "learning_rate": 6.86947228470154e-06, "loss": 12.7109, "step": 220110 }, { "epoch": 0.44465632663615023, "grad_norm": 401.718505859375, "learning_rate": 6.8691485316024945e-06, "loss": 19.4941, "step": 220120 }, { "epoch": 0.44467652726883405, "grad_norm": 302.5918884277344, "learning_rate": 6.8688247693934465e-06, "loss": 6.8404, "step": 220130 }, { "epoch": 0.4446967279015179, "grad_norm": 346.9261474609375, "learning_rate": 6.868500998075973e-06, "loss": 40.9869, "step": 220140 }, { "epoch": 0.4447169285342017, "grad_norm": 426.3053894042969, "learning_rate": 6.8681772176516525e-06, "loss": 32.7499, "step": 220150 }, { "epoch": 0.4447371291668855, "grad_norm": 525.0901489257812, "learning_rate": 6.867853428122063e-06, "loss": 16.9985, "step": 220160 }, { "epoch": 0.44475732979956933, "grad_norm": 332.4767761230469, "learning_rate": 6.867529629488782e-06, "loss": 19.9982, "step": 220170 }, { "epoch": 0.44477753043225315, "grad_norm": 349.4854736328125, "learning_rate": 6.867205821753389e-06, "loss": 33.0529, "step": 220180 }, { "epoch": 0.444797731064937, "grad_norm": 8.316431999206543, "learning_rate": 6.86688200491746e-06, "loss": 12.1505, "step": 220190 }, { "epoch": 0.4448179316976208, "grad_norm": 544.8107299804688, "learning_rate": 6.866558178982575e-06, "loss": 8.8382, "step": 220200 }, { "epoch": 0.44483813233030456, "grad_norm": 677.3682250976562, "learning_rate": 6.866234343950312e-06, "loss": 30.8832, "step": 220210 }, { "epoch": 0.4448583329629884, "grad_norm": 361.1523742675781, "learning_rate": 6.86591049982225e-06, "loss": 30.8473, "step": 220220 }, { "epoch": 0.4448785335956722, "grad_norm": 596.5050659179688, "learning_rate": 6.865586646599965e-06, "loss": 27.6241, "step": 220230 }, { "epoch": 0.444898734228356, "grad_norm": 196.83078002929688, "learning_rate": 6.8652627842850374e-06, "loss": 17.0317, "step": 220240 }, { "epoch": 0.44491893486103984, "grad_norm": 213.5590362548828, "learning_rate": 6.8649389128790455e-06, "loss": 13.1203, "step": 220250 }, { "epoch": 0.44493913549372366, "grad_norm": 319.96099853515625, "learning_rate": 6.864615032383567e-06, "loss": 9.0173, "step": 220260 }, { "epoch": 0.4449593361264075, "grad_norm": 687.814453125, "learning_rate": 6.864291142800183e-06, "loss": 16.3831, "step": 220270 }, { "epoch": 0.4449795367590913, "grad_norm": 492.3997497558594, "learning_rate": 6.863967244130467e-06, "loss": 14.2196, "step": 220280 }, { "epoch": 0.4449997373917751, "grad_norm": 398.268310546875, "learning_rate": 6.8636433363760025e-06, "loss": 15.2738, "step": 220290 }, { "epoch": 0.44501993802445894, "grad_norm": 95.0443115234375, "learning_rate": 6.863319419538366e-06, "loss": 22.0605, "step": 220300 }, { "epoch": 0.44504013865714276, "grad_norm": 98.15335083007812, "learning_rate": 6.862995493619137e-06, "loss": 17.2972, "step": 220310 }, { "epoch": 0.4450603392898266, "grad_norm": 43.280120849609375, "learning_rate": 6.862671558619894e-06, "loss": 26.5221, "step": 220320 }, { "epoch": 0.4450805399225104, "grad_norm": 417.2856750488281, "learning_rate": 6.862347614542214e-06, "loss": 31.7517, "step": 220330 }, { "epoch": 0.44510074055519416, "grad_norm": 396.2840576171875, "learning_rate": 6.86202366138768e-06, "loss": 27.3912, "step": 220340 }, { "epoch": 0.445120941187878, "grad_norm": 568.3402099609375, "learning_rate": 6.861699699157868e-06, "loss": 17.0884, "step": 220350 }, { "epoch": 0.4451411418205618, "grad_norm": 148.70721435546875, "learning_rate": 6.861375727854356e-06, "loss": 21.2353, "step": 220360 }, { "epoch": 0.4451613424532456, "grad_norm": 509.0034484863281, "learning_rate": 6.861051747478727e-06, "loss": 20.7965, "step": 220370 }, { "epoch": 0.44518154308592944, "grad_norm": 309.30206298828125, "learning_rate": 6.860727758032555e-06, "loss": 23.0757, "step": 220380 }, { "epoch": 0.44520174371861326, "grad_norm": 29.304731369018555, "learning_rate": 6.860403759517422e-06, "loss": 9.9736, "step": 220390 }, { "epoch": 0.4452219443512971, "grad_norm": 858.1065063476562, "learning_rate": 6.860079751934908e-06, "loss": 31.402, "step": 220400 }, { "epoch": 0.4452421449839809, "grad_norm": 444.6431884765625, "learning_rate": 6.859755735286589e-06, "loss": 17.8292, "step": 220410 }, { "epoch": 0.4452623456166647, "grad_norm": 171.51792907714844, "learning_rate": 6.859431709574048e-06, "loss": 18.3242, "step": 220420 }, { "epoch": 0.44528254624934854, "grad_norm": 310.8061828613281, "learning_rate": 6.859107674798863e-06, "loss": 41.8775, "step": 220430 }, { "epoch": 0.44530274688203236, "grad_norm": 431.1271057128906, "learning_rate": 6.85878363096261e-06, "loss": 14.0809, "step": 220440 }, { "epoch": 0.4453229475147162, "grad_norm": 518.21826171875, "learning_rate": 6.858459578066873e-06, "loss": 26.5326, "step": 220450 }, { "epoch": 0.4453431481474, "grad_norm": 476.1693420410156, "learning_rate": 6.858135516113226e-06, "loss": 33.7109, "step": 220460 }, { "epoch": 0.44536334878008377, "grad_norm": 565.9754638671875, "learning_rate": 6.857811445103257e-06, "loss": 24.4651, "step": 220470 }, { "epoch": 0.4453835494127676, "grad_norm": 838.7970581054688, "learning_rate": 6.857487365038537e-06, "loss": 29.9847, "step": 220480 }, { "epoch": 0.4454037500454514, "grad_norm": 55.276100158691406, "learning_rate": 6.857163275920651e-06, "loss": 17.5076, "step": 220490 }, { "epoch": 0.4454239506781352, "grad_norm": 852.9766845703125, "learning_rate": 6.856839177751175e-06, "loss": 19.5113, "step": 220500 }, { "epoch": 0.44544415131081905, "grad_norm": 701.2528076171875, "learning_rate": 6.85651507053169e-06, "loss": 15.4285, "step": 220510 }, { "epoch": 0.44546435194350287, "grad_norm": 863.6895141601562, "learning_rate": 6.856190954263776e-06, "loss": 20.8058, "step": 220520 }, { "epoch": 0.4454845525761867, "grad_norm": 336.2017822265625, "learning_rate": 6.8558668289490126e-06, "loss": 32.8068, "step": 220530 }, { "epoch": 0.4455047532088705, "grad_norm": 598.2466430664062, "learning_rate": 6.855542694588979e-06, "loss": 20.53, "step": 220540 }, { "epoch": 0.4455249538415543, "grad_norm": 662.9794311523438, "learning_rate": 6.8552185511852555e-06, "loss": 9.7046, "step": 220550 }, { "epoch": 0.44554515447423815, "grad_norm": 0.0, "learning_rate": 6.854894398739422e-06, "loss": 12.0585, "step": 220560 }, { "epoch": 0.44556535510692197, "grad_norm": 249.34548950195312, "learning_rate": 6.854570237253059e-06, "loss": 13.9427, "step": 220570 }, { "epoch": 0.4455855557396058, "grad_norm": 1056.8892822265625, "learning_rate": 6.854246066727743e-06, "loss": 30.9743, "step": 220580 }, { "epoch": 0.4456057563722896, "grad_norm": 420.1027526855469, "learning_rate": 6.8539218871650605e-06, "loss": 20.473, "step": 220590 }, { "epoch": 0.44562595700497337, "grad_norm": 560.8649291992188, "learning_rate": 6.853597698566583e-06, "loss": 20.4571, "step": 220600 }, { "epoch": 0.4456461576376572, "grad_norm": 421.0732421875, "learning_rate": 6.853273500933899e-06, "loss": 25.3511, "step": 220610 }, { "epoch": 0.445666358270341, "grad_norm": 424.44268798828125, "learning_rate": 6.852949294268582e-06, "loss": 26.6552, "step": 220620 }, { "epoch": 0.44568655890302483, "grad_norm": 814.9902954101562, "learning_rate": 6.852625078572217e-06, "loss": 25.6921, "step": 220630 }, { "epoch": 0.44570675953570865, "grad_norm": 214.76417541503906, "learning_rate": 6.852300853846381e-06, "loss": 10.8756, "step": 220640 }, { "epoch": 0.44572696016839247, "grad_norm": 639.0645141601562, "learning_rate": 6.851976620092655e-06, "loss": 18.1716, "step": 220650 }, { "epoch": 0.4457471608010763, "grad_norm": 479.9458312988281, "learning_rate": 6.851652377312621e-06, "loss": 30.2227, "step": 220660 }, { "epoch": 0.4457673614337601, "grad_norm": 64.48029327392578, "learning_rate": 6.851328125507856e-06, "loss": 43.543, "step": 220670 }, { "epoch": 0.44578756206644393, "grad_norm": 961.3707275390625, "learning_rate": 6.851003864679943e-06, "loss": 24.3358, "step": 220680 }, { "epoch": 0.44580776269912775, "grad_norm": 1021.1846923828125, "learning_rate": 6.850679594830461e-06, "loss": 28.7365, "step": 220690 }, { "epoch": 0.44582796333181157, "grad_norm": 1020.2935791015625, "learning_rate": 6.850355315960992e-06, "loss": 19.7658, "step": 220700 }, { "epoch": 0.4458481639644954, "grad_norm": 511.8146667480469, "learning_rate": 6.850031028073115e-06, "loss": 19.4434, "step": 220710 }, { "epoch": 0.44586836459717916, "grad_norm": 129.0845489501953, "learning_rate": 6.849706731168413e-06, "loss": 16.4215, "step": 220720 }, { "epoch": 0.445888565229863, "grad_norm": 483.9500732421875, "learning_rate": 6.849382425248463e-06, "loss": 20.441, "step": 220730 }, { "epoch": 0.4459087658625468, "grad_norm": 215.6471405029297, "learning_rate": 6.849058110314848e-06, "loss": 8.7657, "step": 220740 }, { "epoch": 0.4459289664952306, "grad_norm": 588.5631713867188, "learning_rate": 6.848733786369147e-06, "loss": 19.9991, "step": 220750 }, { "epoch": 0.44594916712791444, "grad_norm": 94.149169921875, "learning_rate": 6.848409453412943e-06, "loss": 23.1848, "step": 220760 }, { "epoch": 0.44596936776059826, "grad_norm": 458.5713806152344, "learning_rate": 6.848085111447815e-06, "loss": 11.2409, "step": 220770 }, { "epoch": 0.4459895683932821, "grad_norm": 333.8248596191406, "learning_rate": 6.847760760475344e-06, "loss": 23.8046, "step": 220780 }, { "epoch": 0.4460097690259659, "grad_norm": 186.26602172851562, "learning_rate": 6.8474364004971115e-06, "loss": 16.6932, "step": 220790 }, { "epoch": 0.4460299696586497, "grad_norm": 570.8720092773438, "learning_rate": 6.847112031514698e-06, "loss": 21.8735, "step": 220800 }, { "epoch": 0.44605017029133354, "grad_norm": 262.8045959472656, "learning_rate": 6.8467876535296855e-06, "loss": 8.6507, "step": 220810 }, { "epoch": 0.44607037092401736, "grad_norm": 310.4090576171875, "learning_rate": 6.846463266543653e-06, "loss": 34.8979, "step": 220820 }, { "epoch": 0.4460905715567012, "grad_norm": 473.4156188964844, "learning_rate": 6.846138870558181e-06, "loss": 19.9906, "step": 220830 }, { "epoch": 0.446110772189385, "grad_norm": 256.7278137207031, "learning_rate": 6.845814465574855e-06, "loss": 17.3198, "step": 220840 }, { "epoch": 0.44613097282206876, "grad_norm": 511.5408630371094, "learning_rate": 6.845490051595252e-06, "loss": 13.9553, "step": 220850 }, { "epoch": 0.4461511734547526, "grad_norm": 214.9493408203125, "learning_rate": 6.8451656286209535e-06, "loss": 34.4136, "step": 220860 }, { "epoch": 0.4461713740874364, "grad_norm": 467.59490966796875, "learning_rate": 6.844841196653541e-06, "loss": 9.1946, "step": 220870 }, { "epoch": 0.4461915747201202, "grad_norm": 576.91650390625, "learning_rate": 6.844516755694599e-06, "loss": 17.083, "step": 220880 }, { "epoch": 0.44621177535280404, "grad_norm": 195.52423095703125, "learning_rate": 6.844192305745702e-06, "loss": 21.3189, "step": 220890 }, { "epoch": 0.44623197598548786, "grad_norm": 591.4985961914062, "learning_rate": 6.843867846808438e-06, "loss": 39.7037, "step": 220900 }, { "epoch": 0.4462521766181717, "grad_norm": 275.8883056640625, "learning_rate": 6.8435433788843865e-06, "loss": 10.8072, "step": 220910 }, { "epoch": 0.4462723772508555, "grad_norm": 129.10862731933594, "learning_rate": 6.843218901975127e-06, "loss": 21.2382, "step": 220920 }, { "epoch": 0.4462925778835393, "grad_norm": 384.60406494140625, "learning_rate": 6.842894416082243e-06, "loss": 15.3655, "step": 220930 }, { "epoch": 0.44631277851622314, "grad_norm": 249.91744995117188, "learning_rate": 6.842569921207314e-06, "loss": 13.1972, "step": 220940 }, { "epoch": 0.44633297914890696, "grad_norm": 83.22366333007812, "learning_rate": 6.842245417351923e-06, "loss": 11.8011, "step": 220950 }, { "epoch": 0.4463531797815908, "grad_norm": 585.205810546875, "learning_rate": 6.841920904517652e-06, "loss": 20.8354, "step": 220960 }, { "epoch": 0.4463733804142746, "grad_norm": 316.5137939453125, "learning_rate": 6.84159638270608e-06, "loss": 7.4139, "step": 220970 }, { "epoch": 0.44639358104695837, "grad_norm": 281.46441650390625, "learning_rate": 6.8412718519187916e-06, "loss": 19.4551, "step": 220980 }, { "epoch": 0.4464137816796422, "grad_norm": 1354.536865234375, "learning_rate": 6.840947312157367e-06, "loss": 22.3534, "step": 220990 }, { "epoch": 0.446433982312326, "grad_norm": 616.942626953125, "learning_rate": 6.840622763423391e-06, "loss": 21.2377, "step": 221000 }, { "epoch": 0.4464541829450098, "grad_norm": 1267.2545166015625, "learning_rate": 6.840298205718441e-06, "loss": 34.4087, "step": 221010 }, { "epoch": 0.44647438357769365, "grad_norm": 317.268310546875, "learning_rate": 6.839973639044101e-06, "loss": 18.1336, "step": 221020 }, { "epoch": 0.44649458421037747, "grad_norm": 410.1036682128906, "learning_rate": 6.839649063401952e-06, "loss": 22.5897, "step": 221030 }, { "epoch": 0.4465147848430613, "grad_norm": 146.34117126464844, "learning_rate": 6.8393244787935775e-06, "loss": 16.7547, "step": 221040 }, { "epoch": 0.4465349854757451, "grad_norm": 494.58782958984375, "learning_rate": 6.838999885220558e-06, "loss": 29.628, "step": 221050 }, { "epoch": 0.4465551861084289, "grad_norm": 530.4630737304688, "learning_rate": 6.838675282684477e-06, "loss": 26.4836, "step": 221060 }, { "epoch": 0.44657538674111275, "grad_norm": 259.78839111328125, "learning_rate": 6.838350671186914e-06, "loss": 21.3799, "step": 221070 }, { "epoch": 0.44659558737379657, "grad_norm": 490.98724365234375, "learning_rate": 6.838026050729454e-06, "loss": 15.9772, "step": 221080 }, { "epoch": 0.4466157880064804, "grad_norm": 2814.173095703125, "learning_rate": 6.837701421313677e-06, "loss": 40.5265, "step": 221090 }, { "epoch": 0.4466359886391642, "grad_norm": 393.2685546875, "learning_rate": 6.837376782941168e-06, "loss": 29.2508, "step": 221100 }, { "epoch": 0.44665618927184797, "grad_norm": 194.7854461669922, "learning_rate": 6.837052135613507e-06, "loss": 12.3636, "step": 221110 }, { "epoch": 0.4466763899045318, "grad_norm": 524.756103515625, "learning_rate": 6.8367274793322745e-06, "loss": 9.756, "step": 221120 }, { "epoch": 0.4466965905372156, "grad_norm": 458.5486145019531, "learning_rate": 6.836402814099057e-06, "loss": 21.9322, "step": 221130 }, { "epoch": 0.44671679116989943, "grad_norm": 1.953729510307312, "learning_rate": 6.836078139915434e-06, "loss": 20.1561, "step": 221140 }, { "epoch": 0.44673699180258325, "grad_norm": 325.73577880859375, "learning_rate": 6.83575345678299e-06, "loss": 15.4104, "step": 221150 }, { "epoch": 0.44675719243526707, "grad_norm": 199.3097686767578, "learning_rate": 6.8354287647033046e-06, "loss": 16.0741, "step": 221160 }, { "epoch": 0.4467773930679509, "grad_norm": 321.97662353515625, "learning_rate": 6.835104063677964e-06, "loss": 24.8787, "step": 221170 }, { "epoch": 0.4467975937006347, "grad_norm": 488.0107421875, "learning_rate": 6.8347793537085474e-06, "loss": 37.2962, "step": 221180 }, { "epoch": 0.44681779433331853, "grad_norm": 371.7288513183594, "learning_rate": 6.834454634796639e-06, "loss": 13.6815, "step": 221190 }, { "epoch": 0.44683799496600235, "grad_norm": 293.863525390625, "learning_rate": 6.834129906943822e-06, "loss": 17.7457, "step": 221200 }, { "epoch": 0.44685819559868617, "grad_norm": 390.8061828613281, "learning_rate": 6.833805170151676e-06, "loss": 36.4255, "step": 221210 }, { "epoch": 0.44687839623137, "grad_norm": 511.901123046875, "learning_rate": 6.8334804244217885e-06, "loss": 19.4919, "step": 221220 }, { "epoch": 0.4468985968640538, "grad_norm": 391.38848876953125, "learning_rate": 6.833155669755738e-06, "loss": 22.7092, "step": 221230 }, { "epoch": 0.4469187974967376, "grad_norm": 413.0695495605469, "learning_rate": 6.8328309061551105e-06, "loss": 18.0782, "step": 221240 }, { "epoch": 0.4469389981294214, "grad_norm": 606.9026489257812, "learning_rate": 6.832506133621487e-06, "loss": 19.8127, "step": 221250 }, { "epoch": 0.4469591987621052, "grad_norm": 770.3826904296875, "learning_rate": 6.832181352156451e-06, "loss": 24.4051, "step": 221260 }, { "epoch": 0.44697939939478903, "grad_norm": 594.4346313476562, "learning_rate": 6.831856561761585e-06, "loss": 31.0357, "step": 221270 }, { "epoch": 0.44699960002747285, "grad_norm": 301.72650146484375, "learning_rate": 6.831531762438472e-06, "loss": 22.9298, "step": 221280 }, { "epoch": 0.4470198006601567, "grad_norm": 61.547115325927734, "learning_rate": 6.8312069541886964e-06, "loss": 20.4524, "step": 221290 }, { "epoch": 0.4470400012928405, "grad_norm": 266.6343994140625, "learning_rate": 6.830882137013839e-06, "loss": 25.9055, "step": 221300 }, { "epoch": 0.4470602019255243, "grad_norm": 550.8693237304688, "learning_rate": 6.830557310915484e-06, "loss": 39.7968, "step": 221310 }, { "epoch": 0.44708040255820813, "grad_norm": 444.9831237792969, "learning_rate": 6.830232475895215e-06, "loss": 34.9837, "step": 221320 }, { "epoch": 0.44710060319089195, "grad_norm": 189.2568817138672, "learning_rate": 6.829907631954618e-06, "loss": 17.8648, "step": 221330 }, { "epoch": 0.4471208038235758, "grad_norm": 554.810791015625, "learning_rate": 6.829582779095269e-06, "loss": 29.1685, "step": 221340 }, { "epoch": 0.4471410044562596, "grad_norm": 81.38941955566406, "learning_rate": 6.829257917318757e-06, "loss": 16.7415, "step": 221350 }, { "epoch": 0.44716120508894336, "grad_norm": 115.8245849609375, "learning_rate": 6.8289330466266635e-06, "loss": 14.2278, "step": 221360 }, { "epoch": 0.4471814057216272, "grad_norm": 247.37832641601562, "learning_rate": 6.828608167020572e-06, "loss": 15.3853, "step": 221370 }, { "epoch": 0.447201606354311, "grad_norm": 292.91973876953125, "learning_rate": 6.828283278502067e-06, "loss": 21.835, "step": 221380 }, { "epoch": 0.4472218069869948, "grad_norm": 28.080339431762695, "learning_rate": 6.827958381072729e-06, "loss": 23.6165, "step": 221390 }, { "epoch": 0.44724200761967864, "grad_norm": 355.7489318847656, "learning_rate": 6.827633474734145e-06, "loss": 19.5352, "step": 221400 }, { "epoch": 0.44726220825236246, "grad_norm": 1030.5733642578125, "learning_rate": 6.827308559487897e-06, "loss": 23.4393, "step": 221410 }, { "epoch": 0.4472824088850463, "grad_norm": 181.12928771972656, "learning_rate": 6.826983635335569e-06, "loss": 17.1553, "step": 221420 }, { "epoch": 0.4473026095177301, "grad_norm": 205.83755493164062, "learning_rate": 6.826658702278745e-06, "loss": 13.2372, "step": 221430 }, { "epoch": 0.4473228101504139, "grad_norm": 560.5516967773438, "learning_rate": 6.826333760319006e-06, "loss": 26.3961, "step": 221440 }, { "epoch": 0.44734301078309774, "grad_norm": 43.00218200683594, "learning_rate": 6.82600880945794e-06, "loss": 29.598, "step": 221450 }, { "epoch": 0.44736321141578156, "grad_norm": 88.67811584472656, "learning_rate": 6.825683849697127e-06, "loss": 19.7466, "step": 221460 }, { "epoch": 0.4473834120484654, "grad_norm": 367.19464111328125, "learning_rate": 6.825358881038153e-06, "loss": 14.5548, "step": 221470 }, { "epoch": 0.4474036126811492, "grad_norm": 220.63648986816406, "learning_rate": 6.825033903482601e-06, "loss": 30.0329, "step": 221480 }, { "epoch": 0.44742381331383296, "grad_norm": 340.2915344238281, "learning_rate": 6.824708917032056e-06, "loss": 17.1634, "step": 221490 }, { "epoch": 0.4474440139465168, "grad_norm": 290.98419189453125, "learning_rate": 6.824383921688098e-06, "loss": 12.8677, "step": 221500 }, { "epoch": 0.4474642145792006, "grad_norm": 252.28880310058594, "learning_rate": 6.824058917452318e-06, "loss": 21.9886, "step": 221510 }, { "epoch": 0.4474844152118844, "grad_norm": 209.39837646484375, "learning_rate": 6.823733904326293e-06, "loss": 15.2154, "step": 221520 }, { "epoch": 0.44750461584456824, "grad_norm": 715.6478271484375, "learning_rate": 6.823408882311612e-06, "loss": 26.3106, "step": 221530 }, { "epoch": 0.44752481647725206, "grad_norm": 783.3381958007812, "learning_rate": 6.823083851409857e-06, "loss": 23.6285, "step": 221540 }, { "epoch": 0.4475450171099359, "grad_norm": 1062.1817626953125, "learning_rate": 6.822758811622611e-06, "loss": 32.6851, "step": 221550 }, { "epoch": 0.4475652177426197, "grad_norm": 37.7314338684082, "learning_rate": 6.8224337629514615e-06, "loss": 13.1089, "step": 221560 }, { "epoch": 0.4475854183753035, "grad_norm": 552.5439453125, "learning_rate": 6.8221087053979894e-06, "loss": 11.862, "step": 221570 }, { "epoch": 0.44760561900798734, "grad_norm": 888.235107421875, "learning_rate": 6.821783638963782e-06, "loss": 35.1384, "step": 221580 }, { "epoch": 0.44762581964067116, "grad_norm": 267.38714599609375, "learning_rate": 6.82145856365042e-06, "loss": 13.1128, "step": 221590 }, { "epoch": 0.447646020273355, "grad_norm": 205.28140258789062, "learning_rate": 6.821133479459492e-06, "loss": 15.3797, "step": 221600 }, { "epoch": 0.4476662209060388, "grad_norm": 265.0107421875, "learning_rate": 6.820808386392579e-06, "loss": 10.0269, "step": 221610 }, { "epoch": 0.44768642153872257, "grad_norm": 30.379993438720703, "learning_rate": 6.820483284451267e-06, "loss": 23.4607, "step": 221620 }, { "epoch": 0.4477066221714064, "grad_norm": 330.9953308105469, "learning_rate": 6.820158173637142e-06, "loss": 19.6689, "step": 221630 }, { "epoch": 0.4477268228040902, "grad_norm": 201.62255859375, "learning_rate": 6.819833053951783e-06, "loss": 19.4625, "step": 221640 }, { "epoch": 0.447747023436774, "grad_norm": 118.86680603027344, "learning_rate": 6.819507925396782e-06, "loss": 13.1599, "step": 221650 }, { "epoch": 0.44776722406945785, "grad_norm": 616.4024658203125, "learning_rate": 6.819182787973717e-06, "loss": 21.929, "step": 221660 }, { "epoch": 0.44778742470214167, "grad_norm": 148.64752197265625, "learning_rate": 6.818857641684179e-06, "loss": 22.8448, "step": 221670 }, { "epoch": 0.4478076253348255, "grad_norm": 361.9415588378906, "learning_rate": 6.8185324865297475e-06, "loss": 18.2555, "step": 221680 }, { "epoch": 0.4478278259675093, "grad_norm": 137.9051513671875, "learning_rate": 6.81820732251201e-06, "loss": 11.5808, "step": 221690 }, { "epoch": 0.4478480266001931, "grad_norm": 661.150634765625, "learning_rate": 6.81788214963255e-06, "loss": 29.458, "step": 221700 }, { "epoch": 0.44786822723287695, "grad_norm": 193.84556579589844, "learning_rate": 6.817556967892953e-06, "loss": 12.4532, "step": 221710 }, { "epoch": 0.44788842786556077, "grad_norm": 1289.806884765625, "learning_rate": 6.817231777294804e-06, "loss": 30.7461, "step": 221720 }, { "epoch": 0.4479086284982446, "grad_norm": 181.6709442138672, "learning_rate": 6.816906577839688e-06, "loss": 15.6548, "step": 221730 }, { "epoch": 0.4479288291309284, "grad_norm": 422.0000305175781, "learning_rate": 6.816581369529189e-06, "loss": 27.0599, "step": 221740 }, { "epoch": 0.44794902976361217, "grad_norm": 529.2969970703125, "learning_rate": 6.816256152364893e-06, "loss": 17.3799, "step": 221750 }, { "epoch": 0.447969230396296, "grad_norm": 374.6127624511719, "learning_rate": 6.815930926348384e-06, "loss": 21.8464, "step": 221760 }, { "epoch": 0.4479894310289798, "grad_norm": 108.05406188964844, "learning_rate": 6.8156056914812486e-06, "loss": 15.8189, "step": 221770 }, { "epoch": 0.44800963166166363, "grad_norm": 197.88031005859375, "learning_rate": 6.815280447765073e-06, "loss": 9.593, "step": 221780 }, { "epoch": 0.44802983229434745, "grad_norm": 366.7749328613281, "learning_rate": 6.814955195201438e-06, "loss": 21.9934, "step": 221790 }, { "epoch": 0.44805003292703127, "grad_norm": 324.75244140625, "learning_rate": 6.814629933791932e-06, "loss": 15.2373, "step": 221800 }, { "epoch": 0.4480702335597151, "grad_norm": 403.6629943847656, "learning_rate": 6.814304663538142e-06, "loss": 14.6087, "step": 221810 }, { "epoch": 0.4480904341923989, "grad_norm": 423.86431884765625, "learning_rate": 6.813979384441648e-06, "loss": 31.6844, "step": 221820 }, { "epoch": 0.44811063482508273, "grad_norm": 2697.802734375, "learning_rate": 6.813654096504041e-06, "loss": 27.9102, "step": 221830 }, { "epoch": 0.44813083545776655, "grad_norm": 500.7284240722656, "learning_rate": 6.813328799726901e-06, "loss": 29.9251, "step": 221840 }, { "epoch": 0.44815103609045037, "grad_norm": 699.9358520507812, "learning_rate": 6.8130034941118185e-06, "loss": 38.8371, "step": 221850 }, { "epoch": 0.4481712367231342, "grad_norm": 858.3467407226562, "learning_rate": 6.812678179660377e-06, "loss": 33.7234, "step": 221860 }, { "epoch": 0.44819143735581796, "grad_norm": 297.064453125, "learning_rate": 6.812352856374162e-06, "loss": 23.5032, "step": 221870 }, { "epoch": 0.4482116379885018, "grad_norm": 71.46343231201172, "learning_rate": 6.812027524254758e-06, "loss": 19.5076, "step": 221880 }, { "epoch": 0.4482318386211856, "grad_norm": 15.395904541015625, "learning_rate": 6.8117021833037514e-06, "loss": 21.745, "step": 221890 }, { "epoch": 0.4482520392538694, "grad_norm": 252.263671875, "learning_rate": 6.811376833522729e-06, "loss": 21.7261, "step": 221900 }, { "epoch": 0.44827223988655324, "grad_norm": 505.7231140136719, "learning_rate": 6.811051474913275e-06, "loss": 19.5103, "step": 221910 }, { "epoch": 0.44829244051923706, "grad_norm": 532.5568237304688, "learning_rate": 6.810726107476977e-06, "loss": 20.0245, "step": 221920 }, { "epoch": 0.4483126411519209, "grad_norm": 585.125244140625, "learning_rate": 6.8104007312154185e-06, "loss": 15.5491, "step": 221930 }, { "epoch": 0.4483328417846047, "grad_norm": 240.69189453125, "learning_rate": 6.810075346130187e-06, "loss": 21.6393, "step": 221940 }, { "epoch": 0.4483530424172885, "grad_norm": 363.57904052734375, "learning_rate": 6.809749952222867e-06, "loss": 15.7399, "step": 221950 }, { "epoch": 0.44837324304997234, "grad_norm": 270.8522033691406, "learning_rate": 6.809424549495045e-06, "loss": 15.6743, "step": 221960 }, { "epoch": 0.44839344368265616, "grad_norm": 632.1456909179688, "learning_rate": 6.809099137948309e-06, "loss": 19.7913, "step": 221970 }, { "epoch": 0.44841364431534, "grad_norm": 312.0612487792969, "learning_rate": 6.80877371758424e-06, "loss": 24.0879, "step": 221980 }, { "epoch": 0.4484338449480238, "grad_norm": 1596.5499267578125, "learning_rate": 6.808448288404431e-06, "loss": 33.8926, "step": 221990 }, { "epoch": 0.44845404558070756, "grad_norm": 172.30804443359375, "learning_rate": 6.808122850410461e-06, "loss": 29.8875, "step": 222000 }, { "epoch": 0.4484742462133914, "grad_norm": 106.3112564086914, "learning_rate": 6.807797403603923e-06, "loss": 23.2489, "step": 222010 }, { "epoch": 0.4484944468460752, "grad_norm": 206.79513549804688, "learning_rate": 6.8074719479863974e-06, "loss": 41.5476, "step": 222020 }, { "epoch": 0.448514647478759, "grad_norm": 358.0508728027344, "learning_rate": 6.8071464835594735e-06, "loss": 17.2828, "step": 222030 }, { "epoch": 0.44853484811144284, "grad_norm": 276.592529296875, "learning_rate": 6.806821010324738e-06, "loss": 30.7488, "step": 222040 }, { "epoch": 0.44855504874412666, "grad_norm": 282.1035461425781, "learning_rate": 6.806495528283772e-06, "loss": 40.0614, "step": 222050 }, { "epoch": 0.4485752493768105, "grad_norm": 157.2418670654297, "learning_rate": 6.80617003743817e-06, "loss": 11.9531, "step": 222060 }, { "epoch": 0.4485954500094943, "grad_norm": 390.024169921875, "learning_rate": 6.805844537789512e-06, "loss": 15.1986, "step": 222070 }, { "epoch": 0.4486156506421781, "grad_norm": 398.4321594238281, "learning_rate": 6.805519029339388e-06, "loss": 16.9989, "step": 222080 }, { "epoch": 0.44863585127486194, "grad_norm": 608.80712890625, "learning_rate": 6.805193512089383e-06, "loss": 20.4634, "step": 222090 }, { "epoch": 0.44865605190754576, "grad_norm": 582.0148315429688, "learning_rate": 6.804867986041084e-06, "loss": 18.6298, "step": 222100 }, { "epoch": 0.4486762525402296, "grad_norm": 667.9501342773438, "learning_rate": 6.804542451196075e-06, "loss": 20.4488, "step": 222110 }, { "epoch": 0.4486964531729134, "grad_norm": 121.29228973388672, "learning_rate": 6.804216907555948e-06, "loss": 17.3846, "step": 222120 }, { "epoch": 0.44871665380559717, "grad_norm": 431.90704345703125, "learning_rate": 6.8038913551222864e-06, "loss": 36.2312, "step": 222130 }, { "epoch": 0.448736854438281, "grad_norm": 271.4874572753906, "learning_rate": 6.803565793896676e-06, "loss": 19.1641, "step": 222140 }, { "epoch": 0.4487570550709648, "grad_norm": 335.38275146484375, "learning_rate": 6.803240223880705e-06, "loss": 24.8926, "step": 222150 }, { "epoch": 0.4487772557036486, "grad_norm": 389.165283203125, "learning_rate": 6.802914645075959e-06, "loss": 10.8841, "step": 222160 }, { "epoch": 0.44879745633633245, "grad_norm": 753.0094604492188, "learning_rate": 6.802589057484027e-06, "loss": 22.3376, "step": 222170 }, { "epoch": 0.44881765696901627, "grad_norm": 184.64968872070312, "learning_rate": 6.8022634611064945e-06, "loss": 18.6461, "step": 222180 }, { "epoch": 0.4488378576017001, "grad_norm": 349.143310546875, "learning_rate": 6.801937855944946e-06, "loss": 13.9254, "step": 222190 }, { "epoch": 0.4488580582343839, "grad_norm": 148.9762725830078, "learning_rate": 6.8016122420009745e-06, "loss": 22.2679, "step": 222200 }, { "epoch": 0.4488782588670677, "grad_norm": 294.5908508300781, "learning_rate": 6.801286619276161e-06, "loss": 21.1456, "step": 222210 }, { "epoch": 0.44889845949975155, "grad_norm": 742.6663208007812, "learning_rate": 6.800960987772096e-06, "loss": 16.5063, "step": 222220 }, { "epoch": 0.44891866013243537, "grad_norm": 649.3998413085938, "learning_rate": 6.800635347490365e-06, "loss": 22.3299, "step": 222230 }, { "epoch": 0.4489388607651192, "grad_norm": 689.6530151367188, "learning_rate": 6.800309698432557e-06, "loss": 13.688, "step": 222240 }, { "epoch": 0.448959061397803, "grad_norm": 453.74615478515625, "learning_rate": 6.799984040600257e-06, "loss": 19.618, "step": 222250 }, { "epoch": 0.44897926203048677, "grad_norm": 727.8592529296875, "learning_rate": 6.799658373995054e-06, "loss": 17.2367, "step": 222260 }, { "epoch": 0.4489994626631706, "grad_norm": 170.2805633544922, "learning_rate": 6.7993326986185315e-06, "loss": 32.9658, "step": 222270 }, { "epoch": 0.4490196632958544, "grad_norm": 346.58978271484375, "learning_rate": 6.799007014472283e-06, "loss": 13.2505, "step": 222280 }, { "epoch": 0.44903986392853823, "grad_norm": 317.31060791015625, "learning_rate": 6.798681321557891e-06, "loss": 10.2113, "step": 222290 }, { "epoch": 0.44906006456122205, "grad_norm": 23.51372718811035, "learning_rate": 6.798355619876944e-06, "loss": 9.2266, "step": 222300 }, { "epoch": 0.44908026519390587, "grad_norm": 1001.768798828125, "learning_rate": 6.798029909431031e-06, "loss": 20.929, "step": 222310 }, { "epoch": 0.4491004658265897, "grad_norm": 255.8820037841797, "learning_rate": 6.797704190221737e-06, "loss": 27.7979, "step": 222320 }, { "epoch": 0.4491206664592735, "grad_norm": 380.12359619140625, "learning_rate": 6.797378462250653e-06, "loss": 25.8956, "step": 222330 }, { "epoch": 0.44914086709195733, "grad_norm": 352.653564453125, "learning_rate": 6.797052725519362e-06, "loss": 15.2067, "step": 222340 }, { "epoch": 0.44916106772464115, "grad_norm": 254.15542602539062, "learning_rate": 6.796726980029454e-06, "loss": 13.2863, "step": 222350 }, { "epoch": 0.44918126835732497, "grad_norm": 330.99774169921875, "learning_rate": 6.796401225782517e-06, "loss": 13.3235, "step": 222360 }, { "epoch": 0.4492014689900088, "grad_norm": 496.7027282714844, "learning_rate": 6.796075462780139e-06, "loss": 19.9533, "step": 222370 }, { "epoch": 0.4492216696226926, "grad_norm": 85.53564453125, "learning_rate": 6.7957496910239075e-06, "loss": 23.17, "step": 222380 }, { "epoch": 0.4492418702553764, "grad_norm": 216.68374633789062, "learning_rate": 6.7954239105154084e-06, "loss": 24.8749, "step": 222390 }, { "epoch": 0.4492620708880602, "grad_norm": 365.98907470703125, "learning_rate": 6.7950981212562315e-06, "loss": 16.9925, "step": 222400 }, { "epoch": 0.449282271520744, "grad_norm": 433.2200622558594, "learning_rate": 6.794772323247965e-06, "loss": 14.6022, "step": 222410 }, { "epoch": 0.44930247215342783, "grad_norm": 888.5516967773438, "learning_rate": 6.794446516492195e-06, "loss": 26.1488, "step": 222420 }, { "epoch": 0.44932267278611165, "grad_norm": 703.0285034179688, "learning_rate": 6.794120700990509e-06, "loss": 14.7055, "step": 222430 }, { "epoch": 0.4493428734187955, "grad_norm": 9.99472713470459, "learning_rate": 6.793794876744499e-06, "loss": 13.5452, "step": 222440 }, { "epoch": 0.4493630740514793, "grad_norm": 421.5027770996094, "learning_rate": 6.793469043755747e-06, "loss": 40.9224, "step": 222450 }, { "epoch": 0.4493832746841631, "grad_norm": 555.3636474609375, "learning_rate": 6.793143202025848e-06, "loss": 23.906, "step": 222460 }, { "epoch": 0.44940347531684693, "grad_norm": 242.57211303710938, "learning_rate": 6.792817351556384e-06, "loss": 13.0388, "step": 222470 }, { "epoch": 0.44942367594953075, "grad_norm": 1042.263427734375, "learning_rate": 6.792491492348947e-06, "loss": 15.2862, "step": 222480 }, { "epoch": 0.4494438765822146, "grad_norm": 423.35992431640625, "learning_rate": 6.792165624405124e-06, "loss": 20.0324, "step": 222490 }, { "epoch": 0.4494640772148984, "grad_norm": 726.4552612304688, "learning_rate": 6.7918397477265e-06, "loss": 16.9545, "step": 222500 }, { "epoch": 0.44948427784758216, "grad_norm": 502.55914306640625, "learning_rate": 6.791513862314672e-06, "loss": 29.1973, "step": 222510 }, { "epoch": 0.449504478480266, "grad_norm": 272.6805725097656, "learning_rate": 6.791187968171219e-06, "loss": 10.5815, "step": 222520 }, { "epoch": 0.4495246791129498, "grad_norm": 459.9281005859375, "learning_rate": 6.790862065297733e-06, "loss": 21.6328, "step": 222530 }, { "epoch": 0.4495448797456336, "grad_norm": 335.8197021484375, "learning_rate": 6.7905361536958035e-06, "loss": 17.3794, "step": 222540 }, { "epoch": 0.44956508037831744, "grad_norm": 535.16259765625, "learning_rate": 6.7902102333670185e-06, "loss": 8.7648, "step": 222550 }, { "epoch": 0.44958528101100126, "grad_norm": 263.1183166503906, "learning_rate": 6.789884304312965e-06, "loss": 17.3051, "step": 222560 }, { "epoch": 0.4496054816436851, "grad_norm": 445.25994873046875, "learning_rate": 6.789558366535232e-06, "loss": 16.7604, "step": 222570 }, { "epoch": 0.4496256822763689, "grad_norm": 459.8729553222656, "learning_rate": 6.78923242003541e-06, "loss": 14.4448, "step": 222580 }, { "epoch": 0.4496458829090527, "grad_norm": 62.0632438659668, "learning_rate": 6.788906464815085e-06, "loss": 26.4532, "step": 222590 }, { "epoch": 0.44966608354173654, "grad_norm": 547.4466552734375, "learning_rate": 6.788580500875848e-06, "loss": 28.4726, "step": 222600 }, { "epoch": 0.44968628417442036, "grad_norm": 6680.791015625, "learning_rate": 6.788254528219285e-06, "loss": 76.5322, "step": 222610 }, { "epoch": 0.4497064848071042, "grad_norm": 291.7486877441406, "learning_rate": 6.787928546846987e-06, "loss": 18.212, "step": 222620 }, { "epoch": 0.449726685439788, "grad_norm": 520.095947265625, "learning_rate": 6.787602556760542e-06, "loss": 28.5444, "step": 222630 }, { "epoch": 0.44974688607247176, "grad_norm": 343.7140197753906, "learning_rate": 6.78727655796154e-06, "loss": 31.3997, "step": 222640 }, { "epoch": 0.4497670867051556, "grad_norm": 850.1187133789062, "learning_rate": 6.786950550451568e-06, "loss": 23.2061, "step": 222650 }, { "epoch": 0.4497872873378394, "grad_norm": 671.8289184570312, "learning_rate": 6.786624534232215e-06, "loss": 21.8656, "step": 222660 }, { "epoch": 0.4498074879705232, "grad_norm": 340.3204650878906, "learning_rate": 6.786298509305072e-06, "loss": 27.215, "step": 222670 }, { "epoch": 0.44982768860320704, "grad_norm": 291.07452392578125, "learning_rate": 6.785972475671726e-06, "loss": 16.2252, "step": 222680 }, { "epoch": 0.44984788923589086, "grad_norm": 305.812255859375, "learning_rate": 6.785646433333767e-06, "loss": 25.1343, "step": 222690 }, { "epoch": 0.4498680898685747, "grad_norm": 67.56078338623047, "learning_rate": 6.785320382292783e-06, "loss": 12.049, "step": 222700 }, { "epoch": 0.4498882905012585, "grad_norm": 29.37464714050293, "learning_rate": 6.784994322550367e-06, "loss": 17.6114, "step": 222710 }, { "epoch": 0.4499084911339423, "grad_norm": 37.972652435302734, "learning_rate": 6.7846682541081024e-06, "loss": 22.8585, "step": 222720 }, { "epoch": 0.44992869176662614, "grad_norm": 216.0361328125, "learning_rate": 6.784342176967581e-06, "loss": 9.1523, "step": 222730 }, { "epoch": 0.44994889239930996, "grad_norm": 780.3121337890625, "learning_rate": 6.784016091130393e-06, "loss": 32.2775, "step": 222740 }, { "epoch": 0.4499690930319938, "grad_norm": 1125.4534912109375, "learning_rate": 6.783689996598126e-06, "loss": 25.3061, "step": 222750 }, { "epoch": 0.4499892936646776, "grad_norm": 264.4049987792969, "learning_rate": 6.783363893372372e-06, "loss": 23.4717, "step": 222760 }, { "epoch": 0.45000949429736137, "grad_norm": 358.88214111328125, "learning_rate": 6.783037781454718e-06, "loss": 20.7717, "step": 222770 }, { "epoch": 0.4500296949300452, "grad_norm": 525.8593139648438, "learning_rate": 6.782711660846755e-06, "loss": 12.3507, "step": 222780 }, { "epoch": 0.450049895562729, "grad_norm": 816.92236328125, "learning_rate": 6.78238553155007e-06, "loss": 29.601, "step": 222790 }, { "epoch": 0.4500700961954128, "grad_norm": 49.18048858642578, "learning_rate": 6.782059393566254e-06, "loss": 17.1982, "step": 222800 }, { "epoch": 0.45009029682809665, "grad_norm": 210.97425842285156, "learning_rate": 6.781733246896898e-06, "loss": 20.1037, "step": 222810 }, { "epoch": 0.45011049746078047, "grad_norm": 1031.457763671875, "learning_rate": 6.781407091543589e-06, "loss": 28.7679, "step": 222820 }, { "epoch": 0.4501306980934643, "grad_norm": 925.4967041015625, "learning_rate": 6.781080927507919e-06, "loss": 12.5863, "step": 222830 }, { "epoch": 0.4501508987261481, "grad_norm": 544.8528442382812, "learning_rate": 6.780754754791476e-06, "loss": 19.6443, "step": 222840 }, { "epoch": 0.4501710993588319, "grad_norm": 33.40279769897461, "learning_rate": 6.7804285733958495e-06, "loss": 32.5941, "step": 222850 }, { "epoch": 0.45019129999151575, "grad_norm": 529.589599609375, "learning_rate": 6.780102383322631e-06, "loss": 29.3027, "step": 222860 }, { "epoch": 0.45021150062419957, "grad_norm": 750.5447998046875, "learning_rate": 6.7797761845734115e-06, "loss": 19.2539, "step": 222870 }, { "epoch": 0.4502317012568834, "grad_norm": 384.02264404296875, "learning_rate": 6.779449977149774e-06, "loss": 9.5737, "step": 222880 }, { "epoch": 0.4502519018895672, "grad_norm": 490.29400634765625, "learning_rate": 6.779123761053317e-06, "loss": 19.3089, "step": 222890 }, { "epoch": 0.45027210252225097, "grad_norm": 31.462303161621094, "learning_rate": 6.778797536285625e-06, "loss": 13.2727, "step": 222900 }, { "epoch": 0.4502923031549348, "grad_norm": 362.80755615234375, "learning_rate": 6.778471302848291e-06, "loss": 11.4522, "step": 222910 }, { "epoch": 0.4503125037876186, "grad_norm": 336.7137756347656, "learning_rate": 6.778145060742902e-06, "loss": 21.5893, "step": 222920 }, { "epoch": 0.45033270442030243, "grad_norm": 529.6342163085938, "learning_rate": 6.777818809971048e-06, "loss": 22.9397, "step": 222930 }, { "epoch": 0.45035290505298625, "grad_norm": 222.5026397705078, "learning_rate": 6.777492550534325e-06, "loss": 22.5689, "step": 222940 }, { "epoch": 0.45037310568567007, "grad_norm": 647.3722534179688, "learning_rate": 6.777166282434316e-06, "loss": 21.2208, "step": 222950 }, { "epoch": 0.4503933063183539, "grad_norm": 389.1189880371094, "learning_rate": 6.776840005672615e-06, "loss": 12.6924, "step": 222960 }, { "epoch": 0.4504135069510377, "grad_norm": 128.2410888671875, "learning_rate": 6.77651372025081e-06, "loss": 15.6193, "step": 222970 }, { "epoch": 0.45043370758372153, "grad_norm": 279.0683898925781, "learning_rate": 6.776187426170494e-06, "loss": 21.5215, "step": 222980 }, { "epoch": 0.45045390821640535, "grad_norm": 1050.976806640625, "learning_rate": 6.775861123433256e-06, "loss": 28.3963, "step": 222990 }, { "epoch": 0.45047410884908917, "grad_norm": 435.2922668457031, "learning_rate": 6.775534812040686e-06, "loss": 13.145, "step": 223000 }, { "epoch": 0.450494309481773, "grad_norm": 228.21713256835938, "learning_rate": 6.775208491994375e-06, "loss": 16.705, "step": 223010 }, { "epoch": 0.4505145101144568, "grad_norm": 339.0024108886719, "learning_rate": 6.7748821632959126e-06, "loss": 17.5873, "step": 223020 }, { "epoch": 0.4505347107471406, "grad_norm": 99.26470947265625, "learning_rate": 6.774555825946889e-06, "loss": 25.5734, "step": 223030 }, { "epoch": 0.4505549113798244, "grad_norm": 343.356689453125, "learning_rate": 6.7742294799488965e-06, "loss": 22.6062, "step": 223040 }, { "epoch": 0.4505751120125082, "grad_norm": 416.54144287109375, "learning_rate": 6.773903125303525e-06, "loss": 28.9425, "step": 223050 }, { "epoch": 0.45059531264519204, "grad_norm": 243.22572326660156, "learning_rate": 6.773576762012365e-06, "loss": 33.0823, "step": 223060 }, { "epoch": 0.45061551327787586, "grad_norm": 371.2916564941406, "learning_rate": 6.773250390077006e-06, "loss": 12.2203, "step": 223070 }, { "epoch": 0.4506357139105597, "grad_norm": 217.40724182128906, "learning_rate": 6.77292400949904e-06, "loss": 19.6364, "step": 223080 }, { "epoch": 0.4506559145432435, "grad_norm": 237.15167236328125, "learning_rate": 6.772597620280057e-06, "loss": 16.4248, "step": 223090 }, { "epoch": 0.4506761151759273, "grad_norm": 509.1220397949219, "learning_rate": 6.772271222421649e-06, "loss": 26.5038, "step": 223100 }, { "epoch": 0.45069631580861114, "grad_norm": 391.43890380859375, "learning_rate": 6.771944815925405e-06, "loss": 16.6508, "step": 223110 }, { "epoch": 0.45071651644129496, "grad_norm": 521.5532836914062, "learning_rate": 6.771618400792919e-06, "loss": 22.183, "step": 223120 }, { "epoch": 0.4507367170739788, "grad_norm": 277.815185546875, "learning_rate": 6.771291977025778e-06, "loss": 15.3895, "step": 223130 }, { "epoch": 0.4507569177066626, "grad_norm": 247.5813751220703, "learning_rate": 6.770965544625574e-06, "loss": 18.2332, "step": 223140 }, { "epoch": 0.45077711833934636, "grad_norm": 1147.00390625, "learning_rate": 6.7706391035939e-06, "loss": 39.6065, "step": 223150 }, { "epoch": 0.4507973189720302, "grad_norm": 293.8579406738281, "learning_rate": 6.770312653932346e-06, "loss": 22.2251, "step": 223160 }, { "epoch": 0.450817519604714, "grad_norm": 230.84765625, "learning_rate": 6.769986195642503e-06, "loss": 16.5617, "step": 223170 }, { "epoch": 0.4508377202373978, "grad_norm": 288.72161865234375, "learning_rate": 6.76965972872596e-06, "loss": 12.05, "step": 223180 }, { "epoch": 0.45085792087008164, "grad_norm": 717.0557861328125, "learning_rate": 6.769333253184312e-06, "loss": 10.6119, "step": 223190 }, { "epoch": 0.45087812150276546, "grad_norm": 377.36029052734375, "learning_rate": 6.769006769019147e-06, "loss": 16.843, "step": 223200 }, { "epoch": 0.4508983221354493, "grad_norm": 542.556640625, "learning_rate": 6.76868027623206e-06, "loss": 16.8681, "step": 223210 }, { "epoch": 0.4509185227681331, "grad_norm": 870.3871459960938, "learning_rate": 6.768353774824636e-06, "loss": 12.3779, "step": 223220 }, { "epoch": 0.4509387234008169, "grad_norm": 58.22195816040039, "learning_rate": 6.7680272647984734e-06, "loss": 14.6801, "step": 223230 }, { "epoch": 0.45095892403350074, "grad_norm": 460.4647521972656, "learning_rate": 6.767700746155159e-06, "loss": 26.8794, "step": 223240 }, { "epoch": 0.45097912466618456, "grad_norm": 291.2055969238281, "learning_rate": 6.767374218896286e-06, "loss": 19.5938, "step": 223250 }, { "epoch": 0.4509993252988684, "grad_norm": 308.76104736328125, "learning_rate": 6.767047683023447e-06, "loss": 30.2626, "step": 223260 }, { "epoch": 0.4510195259315522, "grad_norm": 183.4350128173828, "learning_rate": 6.766721138538228e-06, "loss": 13.9079, "step": 223270 }, { "epoch": 0.45103972656423597, "grad_norm": 255.0755157470703, "learning_rate": 6.766394585442228e-06, "loss": 12.5789, "step": 223280 }, { "epoch": 0.4510599271969198, "grad_norm": 522.11181640625, "learning_rate": 6.766068023737034e-06, "loss": 31.9759, "step": 223290 }, { "epoch": 0.4510801278296036, "grad_norm": 246.31863403320312, "learning_rate": 6.765741453424237e-06, "loss": 17.7365, "step": 223300 }, { "epoch": 0.4511003284622874, "grad_norm": 165.8690948486328, "learning_rate": 6.765414874505431e-06, "loss": 17.5843, "step": 223310 }, { "epoch": 0.45112052909497125, "grad_norm": 350.4022216796875, "learning_rate": 6.765088286982209e-06, "loss": 47.494, "step": 223320 }, { "epoch": 0.45114072972765507, "grad_norm": 133.50668334960938, "learning_rate": 6.7647616908561595e-06, "loss": 13.5222, "step": 223330 }, { "epoch": 0.4511609303603389, "grad_norm": 479.4945373535156, "learning_rate": 6.764435086128876e-06, "loss": 13.6611, "step": 223340 }, { "epoch": 0.4511811309930227, "grad_norm": 202.6797332763672, "learning_rate": 6.764108472801949e-06, "loss": 10.2501, "step": 223350 }, { "epoch": 0.4512013316257065, "grad_norm": 836.3070678710938, "learning_rate": 6.763781850876972e-06, "loss": 11.0629, "step": 223360 }, { "epoch": 0.45122153225839035, "grad_norm": 213.8543243408203, "learning_rate": 6.763455220355536e-06, "loss": 23.6617, "step": 223370 }, { "epoch": 0.45124173289107417, "grad_norm": 1030.3858642578125, "learning_rate": 6.763128581239231e-06, "loss": 40.2191, "step": 223380 }, { "epoch": 0.451261933523758, "grad_norm": 424.92510986328125, "learning_rate": 6.762801933529655e-06, "loss": 13.2672, "step": 223390 }, { "epoch": 0.4512821341564418, "grad_norm": 393.67071533203125, "learning_rate": 6.762475277228393e-06, "loss": 15.9861, "step": 223400 }, { "epoch": 0.45130233478912557, "grad_norm": 439.38031005859375, "learning_rate": 6.762148612337042e-06, "loss": 27.0009, "step": 223410 }, { "epoch": 0.4513225354218094, "grad_norm": 544.7018432617188, "learning_rate": 6.761821938857191e-06, "loss": 14.2156, "step": 223420 }, { "epoch": 0.4513427360544932, "grad_norm": 557.1956787109375, "learning_rate": 6.761495256790434e-06, "loss": 15.6513, "step": 223430 }, { "epoch": 0.45136293668717703, "grad_norm": 203.35861206054688, "learning_rate": 6.761168566138366e-06, "loss": 15.0615, "step": 223440 }, { "epoch": 0.45138313731986085, "grad_norm": 349.0697326660156, "learning_rate": 6.760841866902572e-06, "loss": 49.9705, "step": 223450 }, { "epoch": 0.45140333795254467, "grad_norm": 235.16610717773438, "learning_rate": 6.7605151590846494e-06, "loss": 19.9045, "step": 223460 }, { "epoch": 0.4514235385852285, "grad_norm": 417.69879150390625, "learning_rate": 6.760188442686189e-06, "loss": 19.2494, "step": 223470 }, { "epoch": 0.4514437392179123, "grad_norm": 127.5810775756836, "learning_rate": 6.759861717708785e-06, "loss": 10.0971, "step": 223480 }, { "epoch": 0.45146393985059613, "grad_norm": 486.7557373046875, "learning_rate": 6.759534984154027e-06, "loss": 15.5324, "step": 223490 }, { "epoch": 0.45148414048327995, "grad_norm": 117.85852813720703, "learning_rate": 6.759208242023509e-06, "loss": 14.1374, "step": 223500 }, { "epoch": 0.45150434111596377, "grad_norm": 442.956298828125, "learning_rate": 6.758881491318825e-06, "loss": 14.8639, "step": 223510 }, { "epoch": 0.4515245417486476, "grad_norm": 617.3826904296875, "learning_rate": 6.758554732041564e-06, "loss": 19.7456, "step": 223520 }, { "epoch": 0.4515447423813314, "grad_norm": 732.301025390625, "learning_rate": 6.758227964193323e-06, "loss": 19.6029, "step": 223530 }, { "epoch": 0.4515649430140152, "grad_norm": 489.2425231933594, "learning_rate": 6.757901187775689e-06, "loss": 12.1993, "step": 223540 }, { "epoch": 0.451585143646699, "grad_norm": 610.5255737304688, "learning_rate": 6.75757440279026e-06, "loss": 17.4517, "step": 223550 }, { "epoch": 0.4516053442793828, "grad_norm": 476.91864013671875, "learning_rate": 6.757247609238625e-06, "loss": 10.3758, "step": 223560 }, { "epoch": 0.45162554491206663, "grad_norm": 163.54229736328125, "learning_rate": 6.75692080712238e-06, "loss": 15.9277, "step": 223570 }, { "epoch": 0.45164574554475045, "grad_norm": 721.86865234375, "learning_rate": 6.756593996443115e-06, "loss": 23.1408, "step": 223580 }, { "epoch": 0.4516659461774343, "grad_norm": 372.91082763671875, "learning_rate": 6.756267177202425e-06, "loss": 6.0081, "step": 223590 }, { "epoch": 0.4516861468101181, "grad_norm": 226.74749755859375, "learning_rate": 6.755940349401901e-06, "loss": 42.1689, "step": 223600 }, { "epoch": 0.4517063474428019, "grad_norm": 339.08819580078125, "learning_rate": 6.755613513043136e-06, "loss": 21.0595, "step": 223610 }, { "epoch": 0.45172654807548573, "grad_norm": 179.5376739501953, "learning_rate": 6.755286668127724e-06, "loss": 15.6352, "step": 223620 }, { "epoch": 0.45174674870816955, "grad_norm": 1601.601806640625, "learning_rate": 6.7549598146572584e-06, "loss": 32.942, "step": 223630 }, { "epoch": 0.4517669493408534, "grad_norm": 425.022216796875, "learning_rate": 6.7546329526333305e-06, "loss": 20.5134, "step": 223640 }, { "epoch": 0.4517871499735372, "grad_norm": 255.98838806152344, "learning_rate": 6.754306082057534e-06, "loss": 20.1227, "step": 223650 }, { "epoch": 0.451807350606221, "grad_norm": 275.3392639160156, "learning_rate": 6.753979202931466e-06, "loss": 13.5676, "step": 223660 }, { "epoch": 0.4518275512389048, "grad_norm": 196.0421142578125, "learning_rate": 6.753652315256712e-06, "loss": 14.2024, "step": 223670 }, { "epoch": 0.4518477518715886, "grad_norm": 374.6175842285156, "learning_rate": 6.753325419034871e-06, "loss": 21.9876, "step": 223680 }, { "epoch": 0.4518679525042724, "grad_norm": 272.6880187988281, "learning_rate": 6.752998514267534e-06, "loss": 17.4269, "step": 223690 }, { "epoch": 0.45188815313695624, "grad_norm": 476.31658935546875, "learning_rate": 6.752671600956295e-06, "loss": 26.3126, "step": 223700 }, { "epoch": 0.45190835376964006, "grad_norm": 440.00982666015625, "learning_rate": 6.752344679102749e-06, "loss": 62.6821, "step": 223710 }, { "epoch": 0.4519285544023239, "grad_norm": 282.3997497558594, "learning_rate": 6.752017748708485e-06, "loss": 15.0602, "step": 223720 }, { "epoch": 0.4519487550350077, "grad_norm": 643.2767944335938, "learning_rate": 6.7516908097751e-06, "loss": 26.9476, "step": 223730 }, { "epoch": 0.4519689556676915, "grad_norm": 494.6185302734375, "learning_rate": 6.751363862304186e-06, "loss": 16.5201, "step": 223740 }, { "epoch": 0.45198915630037534, "grad_norm": 165.56671142578125, "learning_rate": 6.751036906297338e-06, "loss": 22.5521, "step": 223750 }, { "epoch": 0.45200935693305916, "grad_norm": 467.59576416015625, "learning_rate": 6.750709941756147e-06, "loss": 14.1566, "step": 223760 }, { "epoch": 0.452029557565743, "grad_norm": 461.00543212890625, "learning_rate": 6.7503829686822095e-06, "loss": 25.7327, "step": 223770 }, { "epoch": 0.4520497581984268, "grad_norm": 448.5977783203125, "learning_rate": 6.750055987077118e-06, "loss": 21.1888, "step": 223780 }, { "epoch": 0.45206995883111056, "grad_norm": 605.9522705078125, "learning_rate": 6.749728996942465e-06, "loss": 14.3863, "step": 223790 }, { "epoch": 0.4520901594637944, "grad_norm": 416.8888854980469, "learning_rate": 6.749401998279845e-06, "loss": 26.7473, "step": 223800 }, { "epoch": 0.4521103600964782, "grad_norm": 558.3778686523438, "learning_rate": 6.749074991090852e-06, "loss": 24.0431, "step": 223810 }, { "epoch": 0.452130560729162, "grad_norm": 659.2818603515625, "learning_rate": 6.74874797537708e-06, "loss": 20.5936, "step": 223820 }, { "epoch": 0.45215076136184584, "grad_norm": 584.5877075195312, "learning_rate": 6.748420951140121e-06, "loss": 36.6595, "step": 223830 }, { "epoch": 0.45217096199452966, "grad_norm": 650.2181396484375, "learning_rate": 6.748093918381572e-06, "loss": 21.7985, "step": 223840 }, { "epoch": 0.4521911626272135, "grad_norm": 249.53504943847656, "learning_rate": 6.747766877103025e-06, "loss": 21.3749, "step": 223850 }, { "epoch": 0.4522113632598973, "grad_norm": 280.7706298828125, "learning_rate": 6.7474398273060725e-06, "loss": 14.0659, "step": 223860 }, { "epoch": 0.4522315638925811, "grad_norm": 329.1269836425781, "learning_rate": 6.747112768992313e-06, "loss": 13.8137, "step": 223870 }, { "epoch": 0.45225176452526494, "grad_norm": 1071.5306396484375, "learning_rate": 6.7467857021633354e-06, "loss": 34.511, "step": 223880 }, { "epoch": 0.45227196515794876, "grad_norm": 534.2124633789062, "learning_rate": 6.746458626820738e-06, "loss": 20.4756, "step": 223890 }, { "epoch": 0.4522921657906326, "grad_norm": 224.7541046142578, "learning_rate": 6.746131542966112e-06, "loss": 24.4721, "step": 223900 }, { "epoch": 0.4523123664233164, "grad_norm": 214.91180419921875, "learning_rate": 6.745804450601053e-06, "loss": 13.8816, "step": 223910 }, { "epoch": 0.45233256705600017, "grad_norm": 539.7750244140625, "learning_rate": 6.745477349727154e-06, "loss": 19.0091, "step": 223920 }, { "epoch": 0.452352767688684, "grad_norm": 20.45575523376465, "learning_rate": 6.74515024034601e-06, "loss": 18.5851, "step": 223930 }, { "epoch": 0.4523729683213678, "grad_norm": 295.1483459472656, "learning_rate": 6.744823122459217e-06, "loss": 11.5418, "step": 223940 }, { "epoch": 0.4523931689540516, "grad_norm": 123.01981353759766, "learning_rate": 6.744495996068367e-06, "loss": 11.6932, "step": 223950 }, { "epoch": 0.45241336958673545, "grad_norm": 325.3513488769531, "learning_rate": 6.744168861175056e-06, "loss": 15.107, "step": 223960 }, { "epoch": 0.45243357021941927, "grad_norm": 332.8097839355469, "learning_rate": 6.743841717780876e-06, "loss": 7.7247, "step": 223970 }, { "epoch": 0.4524537708521031, "grad_norm": 283.2469482421875, "learning_rate": 6.743514565887424e-06, "loss": 14.3612, "step": 223980 }, { "epoch": 0.4524739714847869, "grad_norm": 493.7138366699219, "learning_rate": 6.743187405496292e-06, "loss": 28.5663, "step": 223990 }, { "epoch": 0.4524941721174707, "grad_norm": 248.72547912597656, "learning_rate": 6.7428602366090764e-06, "loss": 16.5097, "step": 224000 }, { "epoch": 0.45251437275015455, "grad_norm": 521.6533813476562, "learning_rate": 6.742533059227372e-06, "loss": 12.9653, "step": 224010 }, { "epoch": 0.45253457338283837, "grad_norm": 376.3984375, "learning_rate": 6.742205873352773e-06, "loss": 13.4344, "step": 224020 }, { "epoch": 0.4525547740155222, "grad_norm": 913.5752563476562, "learning_rate": 6.741878678986873e-06, "loss": 26.3094, "step": 224030 }, { "epoch": 0.452574974648206, "grad_norm": 478.7392272949219, "learning_rate": 6.741551476131269e-06, "loss": 30.4023, "step": 224040 }, { "epoch": 0.45259517528088977, "grad_norm": 262.20526123046875, "learning_rate": 6.741224264787553e-06, "loss": 23.2195, "step": 224050 }, { "epoch": 0.4526153759135736, "grad_norm": 560.4160766601562, "learning_rate": 6.740897044957322e-06, "loss": 22.4177, "step": 224060 }, { "epoch": 0.4526355765462574, "grad_norm": 435.0744323730469, "learning_rate": 6.74056981664217e-06, "loss": 20.3772, "step": 224070 }, { "epoch": 0.45265577717894123, "grad_norm": 4965.35400390625, "learning_rate": 6.740242579843691e-06, "loss": 73.2704, "step": 224080 }, { "epoch": 0.45267597781162505, "grad_norm": 402.28302001953125, "learning_rate": 6.739915334563481e-06, "loss": 28.474, "step": 224090 }, { "epoch": 0.45269617844430887, "grad_norm": 104.3423080444336, "learning_rate": 6.739588080803134e-06, "loss": 12.5469, "step": 224100 }, { "epoch": 0.4527163790769927, "grad_norm": 491.0937805175781, "learning_rate": 6.739260818564248e-06, "loss": 17.2166, "step": 224110 }, { "epoch": 0.4527365797096765, "grad_norm": 181.10830688476562, "learning_rate": 6.738933547848414e-06, "loss": 19.3685, "step": 224120 }, { "epoch": 0.45275678034236033, "grad_norm": 435.29534912109375, "learning_rate": 6.7386062686572286e-06, "loss": 22.2347, "step": 224130 }, { "epoch": 0.45277698097504415, "grad_norm": 51.51202392578125, "learning_rate": 6.738278980992289e-06, "loss": 21.777, "step": 224140 }, { "epoch": 0.45279718160772797, "grad_norm": 428.5145568847656, "learning_rate": 6.737951684855185e-06, "loss": 16.9964, "step": 224150 }, { "epoch": 0.4528173822404118, "grad_norm": 715.14306640625, "learning_rate": 6.737624380247519e-06, "loss": 15.9501, "step": 224160 }, { "epoch": 0.4528375828730956, "grad_norm": 42.78643798828125, "learning_rate": 6.737297067170879e-06, "loss": 20.7435, "step": 224170 }, { "epoch": 0.4528577835057794, "grad_norm": 387.38189697265625, "learning_rate": 6.736969745626867e-06, "loss": 16.737, "step": 224180 }, { "epoch": 0.4528779841384632, "grad_norm": 52.91683578491211, "learning_rate": 6.736642415617073e-06, "loss": 15.4142, "step": 224190 }, { "epoch": 0.452898184771147, "grad_norm": 2.1474220752716064, "learning_rate": 6.736315077143095e-06, "loss": 27.8739, "step": 224200 }, { "epoch": 0.45291838540383084, "grad_norm": 271.73291015625, "learning_rate": 6.735987730206529e-06, "loss": 13.633, "step": 224210 }, { "epoch": 0.45293858603651466, "grad_norm": 542.3934326171875, "learning_rate": 6.735660374808969e-06, "loss": 25.4332, "step": 224220 }, { "epoch": 0.4529587866691985, "grad_norm": 7.41579532623291, "learning_rate": 6.73533301095201e-06, "loss": 15.1197, "step": 224230 }, { "epoch": 0.4529789873018823, "grad_norm": 362.191650390625, "learning_rate": 6.7350056386372485e-06, "loss": 18.961, "step": 224240 }, { "epoch": 0.4529991879345661, "grad_norm": 606.1195068359375, "learning_rate": 6.7346782578662795e-06, "loss": 23.9658, "step": 224250 }, { "epoch": 0.45301938856724994, "grad_norm": 488.9588623046875, "learning_rate": 6.7343508686407e-06, "loss": 28.3714, "step": 224260 }, { "epoch": 0.45303958919993376, "grad_norm": 119.45907592773438, "learning_rate": 6.734023470962106e-06, "loss": 22.3117, "step": 224270 }, { "epoch": 0.4530597898326176, "grad_norm": 89.31884002685547, "learning_rate": 6.733696064832089e-06, "loss": 21.5681, "step": 224280 }, { "epoch": 0.4530799904653014, "grad_norm": 162.55581665039062, "learning_rate": 6.733368650252249e-06, "loss": 18.6347, "step": 224290 }, { "epoch": 0.4531001910979852, "grad_norm": 354.7278747558594, "learning_rate": 6.733041227224182e-06, "loss": 37.7039, "step": 224300 }, { "epoch": 0.453120391730669, "grad_norm": 279.0623474121094, "learning_rate": 6.732713795749479e-06, "loss": 20.6569, "step": 224310 }, { "epoch": 0.4531405923633528, "grad_norm": 291.3842468261719, "learning_rate": 6.732386355829742e-06, "loss": 22.4131, "step": 224320 }, { "epoch": 0.4531607929960366, "grad_norm": 306.61767578125, "learning_rate": 6.7320589074665606e-06, "loss": 25.2244, "step": 224330 }, { "epoch": 0.45318099362872044, "grad_norm": 435.5203552246094, "learning_rate": 6.7317314506615385e-06, "loss": 10.8718, "step": 224340 }, { "epoch": 0.45320119426140426, "grad_norm": 598.0634765625, "learning_rate": 6.731403985416265e-06, "loss": 21.7381, "step": 224350 }, { "epoch": 0.4532213948940881, "grad_norm": 50.27971267700195, "learning_rate": 6.731076511732338e-06, "loss": 21.2074, "step": 224360 }, { "epoch": 0.4532415955267719, "grad_norm": 279.5244445800781, "learning_rate": 6.730749029611354e-06, "loss": 12.9486, "step": 224370 }, { "epoch": 0.4532617961594557, "grad_norm": 84.92265319824219, "learning_rate": 6.730421539054911e-06, "loss": 17.9265, "step": 224380 }, { "epoch": 0.45328199679213954, "grad_norm": 196.0972137451172, "learning_rate": 6.730094040064602e-06, "loss": 43.3192, "step": 224390 }, { "epoch": 0.45330219742482336, "grad_norm": 481.4926452636719, "learning_rate": 6.729766532642024e-06, "loss": 22.348, "step": 224400 }, { "epoch": 0.4533223980575072, "grad_norm": 509.6544189453125, "learning_rate": 6.729439016788774e-06, "loss": 14.9963, "step": 224410 }, { "epoch": 0.453342598690191, "grad_norm": 81.28437805175781, "learning_rate": 6.72911149250645e-06, "loss": 9.7053, "step": 224420 }, { "epoch": 0.45336279932287477, "grad_norm": 365.74481201171875, "learning_rate": 6.7287839597966444e-06, "loss": 22.3584, "step": 224430 }, { "epoch": 0.4533829999555586, "grad_norm": 287.0074462890625, "learning_rate": 6.728456418660954e-06, "loss": 17.6767, "step": 224440 }, { "epoch": 0.4534032005882424, "grad_norm": 754.4468994140625, "learning_rate": 6.7281288691009795e-06, "loss": 25.3387, "step": 224450 }, { "epoch": 0.4534234012209262, "grad_norm": 496.4479675292969, "learning_rate": 6.727801311118314e-06, "loss": 23.3791, "step": 224460 }, { "epoch": 0.45344360185361005, "grad_norm": 401.7496643066406, "learning_rate": 6.727473744714554e-06, "loss": 14.0117, "step": 224470 }, { "epoch": 0.45346380248629387, "grad_norm": 221.49588012695312, "learning_rate": 6.727146169891297e-06, "loss": 20.1051, "step": 224480 }, { "epoch": 0.4534840031189777, "grad_norm": 53.74786376953125, "learning_rate": 6.726818586650137e-06, "loss": 20.79, "step": 224490 }, { "epoch": 0.4535042037516615, "grad_norm": 272.5646667480469, "learning_rate": 6.7264909949926735e-06, "loss": 16.9446, "step": 224500 }, { "epoch": 0.4535244043843453, "grad_norm": 380.64703369140625, "learning_rate": 6.726163394920503e-06, "loss": 42.3083, "step": 224510 }, { "epoch": 0.45354460501702915, "grad_norm": 193.45034790039062, "learning_rate": 6.725835786435222e-06, "loss": 16.3565, "step": 224520 }, { "epoch": 0.45356480564971297, "grad_norm": 494.8837585449219, "learning_rate": 6.725508169538425e-06, "loss": 30.3393, "step": 224530 }, { "epoch": 0.4535850062823968, "grad_norm": 393.85882568359375, "learning_rate": 6.725180544231711e-06, "loss": 14.4976, "step": 224540 }, { "epoch": 0.4536052069150806, "grad_norm": 404.4794921875, "learning_rate": 6.7248529105166785e-06, "loss": 15.4334, "step": 224550 }, { "epoch": 0.45362540754776437, "grad_norm": 846.5159301757812, "learning_rate": 6.724525268394919e-06, "loss": 25.1088, "step": 224560 }, { "epoch": 0.4536456081804482, "grad_norm": 659.883056640625, "learning_rate": 6.7241976178680335e-06, "loss": 23.7296, "step": 224570 }, { "epoch": 0.453665808813132, "grad_norm": 489.0628967285156, "learning_rate": 6.723869958937619e-06, "loss": 17.5904, "step": 224580 }, { "epoch": 0.45368600944581583, "grad_norm": 516.4916381835938, "learning_rate": 6.723542291605271e-06, "loss": 16.8007, "step": 224590 }, { "epoch": 0.45370621007849965, "grad_norm": 22.261503219604492, "learning_rate": 6.723214615872585e-06, "loss": 15.365, "step": 224600 }, { "epoch": 0.45372641071118347, "grad_norm": 216.9540557861328, "learning_rate": 6.722886931741163e-06, "loss": 21.4706, "step": 224610 }, { "epoch": 0.4537466113438673, "grad_norm": 172.14511108398438, "learning_rate": 6.7225592392125975e-06, "loss": 23.9545, "step": 224620 }, { "epoch": 0.4537668119765511, "grad_norm": 213.1126251220703, "learning_rate": 6.722231538288486e-06, "loss": 18.9931, "step": 224630 }, { "epoch": 0.45378701260923493, "grad_norm": 397.17864990234375, "learning_rate": 6.7219038289704294e-06, "loss": 17.2522, "step": 224640 }, { "epoch": 0.45380721324191875, "grad_norm": 338.5043640136719, "learning_rate": 6.72157611126002e-06, "loss": 18.2936, "step": 224650 }, { "epoch": 0.45382741387460257, "grad_norm": 69.46562194824219, "learning_rate": 6.721248385158859e-06, "loss": 12.1464, "step": 224660 }, { "epoch": 0.4538476145072864, "grad_norm": 509.01617431640625, "learning_rate": 6.720920650668542e-06, "loss": 20.8537, "step": 224670 }, { "epoch": 0.4538678151399702, "grad_norm": 457.7559509277344, "learning_rate": 6.720592907790667e-06, "loss": 17.0811, "step": 224680 }, { "epoch": 0.453888015772654, "grad_norm": 399.3314208984375, "learning_rate": 6.720265156526828e-06, "loss": 15.585, "step": 224690 }, { "epoch": 0.4539082164053378, "grad_norm": 176.3326873779297, "learning_rate": 6.719937396878628e-06, "loss": 20.0065, "step": 224700 }, { "epoch": 0.4539284170380216, "grad_norm": 99.04472351074219, "learning_rate": 6.719609628847662e-06, "loss": 10.5466, "step": 224710 }, { "epoch": 0.45394861767070543, "grad_norm": 11.641637802124023, "learning_rate": 6.7192818524355266e-06, "loss": 14.1317, "step": 224720 }, { "epoch": 0.45396881830338925, "grad_norm": 356.7647399902344, "learning_rate": 6.7189540676438195e-06, "loss": 16.1533, "step": 224730 }, { "epoch": 0.4539890189360731, "grad_norm": 532.47705078125, "learning_rate": 6.718626274474138e-06, "loss": 16.446, "step": 224740 }, { "epoch": 0.4540092195687569, "grad_norm": 143.8042755126953, "learning_rate": 6.718298472928082e-06, "loss": 24.8809, "step": 224750 }, { "epoch": 0.4540294202014407, "grad_norm": 692.6828002929688, "learning_rate": 6.717970663007245e-06, "loss": 14.8526, "step": 224760 }, { "epoch": 0.45404962083412453, "grad_norm": 855.0104370117188, "learning_rate": 6.71764284471323e-06, "loss": 22.105, "step": 224770 }, { "epoch": 0.45406982146680835, "grad_norm": 213.2781219482422, "learning_rate": 6.717315018047631e-06, "loss": 19.6701, "step": 224780 }, { "epoch": 0.4540900220994922, "grad_norm": 436.2740173339844, "learning_rate": 6.716987183012048e-06, "loss": 21.4793, "step": 224790 }, { "epoch": 0.454110222732176, "grad_norm": 242.42498779296875, "learning_rate": 6.716659339608077e-06, "loss": 27.442, "step": 224800 }, { "epoch": 0.4541304233648598, "grad_norm": 389.03875732421875, "learning_rate": 6.7163314878373166e-06, "loss": 19.0603, "step": 224810 }, { "epoch": 0.4541506239975436, "grad_norm": 16.420045852661133, "learning_rate": 6.716003627701365e-06, "loss": 23.3658, "step": 224820 }, { "epoch": 0.4541708246302274, "grad_norm": 807.7119750976562, "learning_rate": 6.71567575920182e-06, "loss": 20.549, "step": 224830 }, { "epoch": 0.4541910252629112, "grad_norm": 405.46063232421875, "learning_rate": 6.715347882340278e-06, "loss": 20.5764, "step": 224840 }, { "epoch": 0.45421122589559504, "grad_norm": 301.35699462890625, "learning_rate": 6.7150199971183395e-06, "loss": 27.6921, "step": 224850 }, { "epoch": 0.45423142652827886, "grad_norm": 528.4838256835938, "learning_rate": 6.714692103537601e-06, "loss": 15.2521, "step": 224860 }, { "epoch": 0.4542516271609627, "grad_norm": 877.6928100585938, "learning_rate": 6.7143642015996626e-06, "loss": 21.6614, "step": 224870 }, { "epoch": 0.4542718277936465, "grad_norm": 28.3657169342041, "learning_rate": 6.714036291306121e-06, "loss": 18.8145, "step": 224880 }, { "epoch": 0.4542920284263303, "grad_norm": 690.2528686523438, "learning_rate": 6.7137083726585724e-06, "loss": 17.7149, "step": 224890 }, { "epoch": 0.45431222905901414, "grad_norm": 767.5856323242188, "learning_rate": 6.713380445658618e-06, "loss": 11.7709, "step": 224900 }, { "epoch": 0.45433242969169796, "grad_norm": 504.2879638671875, "learning_rate": 6.713052510307856e-06, "loss": 15.2106, "step": 224910 }, { "epoch": 0.4543526303243818, "grad_norm": 431.6560363769531, "learning_rate": 6.712724566607882e-06, "loss": 14.1891, "step": 224920 }, { "epoch": 0.4543728309570656, "grad_norm": 486.3409118652344, "learning_rate": 6.712396614560298e-06, "loss": 21.3397, "step": 224930 }, { "epoch": 0.45439303158974936, "grad_norm": 399.1619567871094, "learning_rate": 6.712068654166699e-06, "loss": 20.931, "step": 224940 }, { "epoch": 0.4544132322224332, "grad_norm": 874.1525268554688, "learning_rate": 6.711740685428687e-06, "loss": 18.339, "step": 224950 }, { "epoch": 0.454433432855117, "grad_norm": 626.2977294921875, "learning_rate": 6.711412708347857e-06, "loss": 41.3339, "step": 224960 }, { "epoch": 0.4544536334878008, "grad_norm": 573.0205688476562, "learning_rate": 6.711084722925809e-06, "loss": 26.5318, "step": 224970 }, { "epoch": 0.45447383412048464, "grad_norm": 427.6914367675781, "learning_rate": 6.7107567291641425e-06, "loss": 23.2325, "step": 224980 }, { "epoch": 0.45449403475316846, "grad_norm": 544.6525268554688, "learning_rate": 6.710428727064454e-06, "loss": 10.5366, "step": 224990 }, { "epoch": 0.4545142353858523, "grad_norm": 89.42131805419922, "learning_rate": 6.710100716628345e-06, "loss": 16.2111, "step": 225000 }, { "epoch": 0.4545344360185361, "grad_norm": 193.01864624023438, "learning_rate": 6.709772697857411e-06, "loss": 20.8958, "step": 225010 }, { "epoch": 0.4545546366512199, "grad_norm": 55.706871032714844, "learning_rate": 6.709444670753252e-06, "loss": 15.9876, "step": 225020 }, { "epoch": 0.45457483728390374, "grad_norm": 261.5452880859375, "learning_rate": 6.709116635317469e-06, "loss": 20.595, "step": 225030 }, { "epoch": 0.45459503791658756, "grad_norm": 322.2333679199219, "learning_rate": 6.708788591551658e-06, "loss": 10.3576, "step": 225040 }, { "epoch": 0.4546152385492714, "grad_norm": 182.4540252685547, "learning_rate": 6.708460539457418e-06, "loss": 27.0368, "step": 225050 }, { "epoch": 0.4546354391819552, "grad_norm": 252.28440856933594, "learning_rate": 6.708132479036349e-06, "loss": 18.9177, "step": 225060 }, { "epoch": 0.45465563981463897, "grad_norm": 294.22955322265625, "learning_rate": 6.707804410290049e-06, "loss": 16.5476, "step": 225070 }, { "epoch": 0.4546758404473228, "grad_norm": 678.5457153320312, "learning_rate": 6.707476333220116e-06, "loss": 18.4188, "step": 225080 }, { "epoch": 0.4546960410800066, "grad_norm": 1026.339599609375, "learning_rate": 6.707148247828153e-06, "loss": 26.8379, "step": 225090 }, { "epoch": 0.4547162417126904, "grad_norm": 236.086181640625, "learning_rate": 6.7068201541157555e-06, "loss": 24.4431, "step": 225100 }, { "epoch": 0.45473644234537425, "grad_norm": 733.8051147460938, "learning_rate": 6.706492052084524e-06, "loss": 20.2767, "step": 225110 }, { "epoch": 0.45475664297805807, "grad_norm": 366.6125183105469, "learning_rate": 6.706163941736057e-06, "loss": 17.8546, "step": 225120 }, { "epoch": 0.4547768436107419, "grad_norm": 425.5577087402344, "learning_rate": 6.705835823071953e-06, "loss": 15.3798, "step": 225130 }, { "epoch": 0.4547970442434257, "grad_norm": 246.91073608398438, "learning_rate": 6.7055076960938135e-06, "loss": 11.608, "step": 225140 }, { "epoch": 0.4548172448761095, "grad_norm": 77.5411148071289, "learning_rate": 6.705179560803236e-06, "loss": 7.9056, "step": 225150 }, { "epoch": 0.45483744550879335, "grad_norm": 2.3171474933624268, "learning_rate": 6.704851417201821e-06, "loss": 15.201, "step": 225160 }, { "epoch": 0.45485764614147717, "grad_norm": 199.99977111816406, "learning_rate": 6.704523265291165e-06, "loss": 13.8719, "step": 225170 }, { "epoch": 0.454877846774161, "grad_norm": 611.3469848632812, "learning_rate": 6.704195105072871e-06, "loss": 11.1166, "step": 225180 }, { "epoch": 0.4548980474068448, "grad_norm": 300.5711669921875, "learning_rate": 6.703866936548534e-06, "loss": 20.5433, "step": 225190 }, { "epoch": 0.45491824803952857, "grad_norm": 583.619873046875, "learning_rate": 6.70353875971976e-06, "loss": 26.941, "step": 225200 }, { "epoch": 0.4549384486722124, "grad_norm": 640.599853515625, "learning_rate": 6.703210574588142e-06, "loss": 16.2709, "step": 225210 }, { "epoch": 0.4549586493048962, "grad_norm": 740.7960815429688, "learning_rate": 6.702882381155283e-06, "loss": 26.6456, "step": 225220 }, { "epoch": 0.45497884993758003, "grad_norm": 45.49562072753906, "learning_rate": 6.702554179422782e-06, "loss": 18.9018, "step": 225230 }, { "epoch": 0.45499905057026385, "grad_norm": 732.8751831054688, "learning_rate": 6.702225969392238e-06, "loss": 20.7339, "step": 225240 }, { "epoch": 0.45501925120294767, "grad_norm": 1911.6829833984375, "learning_rate": 6.701897751065251e-06, "loss": 21.852, "step": 225250 }, { "epoch": 0.4550394518356315, "grad_norm": 242.33111572265625, "learning_rate": 6.701569524443421e-06, "loss": 28.6399, "step": 225260 }, { "epoch": 0.4550596524683153, "grad_norm": 728.8468017578125, "learning_rate": 6.701241289528348e-06, "loss": 17.6148, "step": 225270 }, { "epoch": 0.45507985310099913, "grad_norm": 755.6907958984375, "learning_rate": 6.700913046321631e-06, "loss": 16.1734, "step": 225280 }, { "epoch": 0.45510005373368295, "grad_norm": 472.9110412597656, "learning_rate": 6.700584794824871e-06, "loss": 29.4159, "step": 225290 }, { "epoch": 0.45512025436636677, "grad_norm": 404.24493408203125, "learning_rate": 6.700256535039665e-06, "loss": 25.2351, "step": 225300 }, { "epoch": 0.4551404549990506, "grad_norm": 634.1502075195312, "learning_rate": 6.6999282669676155e-06, "loss": 13.6189, "step": 225310 }, { "epoch": 0.4551606556317344, "grad_norm": 1.6085678339004517, "learning_rate": 6.699599990610324e-06, "loss": 19.2426, "step": 225320 }, { "epoch": 0.4551808562644182, "grad_norm": 300.082763671875, "learning_rate": 6.699271705969386e-06, "loss": 19.8193, "step": 225330 }, { "epoch": 0.455201056897102, "grad_norm": 119.96390533447266, "learning_rate": 6.698943413046404e-06, "loss": 23.3312, "step": 225340 }, { "epoch": 0.4552212575297858, "grad_norm": 372.2552795410156, "learning_rate": 6.698615111842977e-06, "loss": 19.8844, "step": 225350 }, { "epoch": 0.45524145816246964, "grad_norm": 392.03778076171875, "learning_rate": 6.698286802360708e-06, "loss": 22.2316, "step": 225360 }, { "epoch": 0.45526165879515346, "grad_norm": 127.15750122070312, "learning_rate": 6.697958484601193e-06, "loss": 22.944, "step": 225370 }, { "epoch": 0.4552818594278373, "grad_norm": 162.3270263671875, "learning_rate": 6.697630158566038e-06, "loss": 14.8616, "step": 225380 }, { "epoch": 0.4553020600605211, "grad_norm": 462.6418762207031, "learning_rate": 6.697301824256836e-06, "loss": 31.5704, "step": 225390 }, { "epoch": 0.4553222606932049, "grad_norm": 575.5155639648438, "learning_rate": 6.6969734816751906e-06, "loss": 14.8419, "step": 225400 }, { "epoch": 0.45534246132588874, "grad_norm": 397.850830078125, "learning_rate": 6.696645130822704e-06, "loss": 20.0939, "step": 225410 }, { "epoch": 0.45536266195857256, "grad_norm": 236.34840393066406, "learning_rate": 6.6963167717009745e-06, "loss": 14.465, "step": 225420 }, { "epoch": 0.4553828625912564, "grad_norm": 277.0977783203125, "learning_rate": 6.695988404311603e-06, "loss": 28.5895, "step": 225430 }, { "epoch": 0.4554030632239402, "grad_norm": 553.6280517578125, "learning_rate": 6.695660028656189e-06, "loss": 22.1155, "step": 225440 }, { "epoch": 0.455423263856624, "grad_norm": 379.44427490234375, "learning_rate": 6.6953316447363335e-06, "loss": 26.2365, "step": 225450 }, { "epoch": 0.4554434644893078, "grad_norm": 708.2862548828125, "learning_rate": 6.695003252553638e-06, "loss": 21.1336, "step": 225460 }, { "epoch": 0.4554636651219916, "grad_norm": 75.57585144042969, "learning_rate": 6.694674852109701e-06, "loss": 9.9841, "step": 225470 }, { "epoch": 0.4554838657546754, "grad_norm": 230.75897216796875, "learning_rate": 6.694346443406126e-06, "loss": 16.4379, "step": 225480 }, { "epoch": 0.45550406638735924, "grad_norm": 595.7857666015625, "learning_rate": 6.694018026444511e-06, "loss": 21.4264, "step": 225490 }, { "epoch": 0.45552426702004306, "grad_norm": 300.95037841796875, "learning_rate": 6.693689601226458e-06, "loss": 11.351, "step": 225500 }, { "epoch": 0.4555444676527269, "grad_norm": 620.47412109375, "learning_rate": 6.693361167753567e-06, "loss": 44.0126, "step": 225510 }, { "epoch": 0.4555646682854107, "grad_norm": 602.3052978515625, "learning_rate": 6.693032726027438e-06, "loss": 23.7361, "step": 225520 }, { "epoch": 0.4555848689180945, "grad_norm": 14.683794975280762, "learning_rate": 6.692704276049674e-06, "loss": 17.5383, "step": 225530 }, { "epoch": 0.45560506955077834, "grad_norm": 403.2425842285156, "learning_rate": 6.6923758178218756e-06, "loss": 8.8611, "step": 225540 }, { "epoch": 0.45562527018346216, "grad_norm": 289.0656433105469, "learning_rate": 6.692047351345641e-06, "loss": 30.0383, "step": 225550 }, { "epoch": 0.455645470816146, "grad_norm": 423.5158386230469, "learning_rate": 6.6917188766225736e-06, "loss": 9.0815, "step": 225560 }, { "epoch": 0.4556656714488298, "grad_norm": 630.5218505859375, "learning_rate": 6.691390393654274e-06, "loss": 31.1418, "step": 225570 }, { "epoch": 0.45568587208151357, "grad_norm": 403.40777587890625, "learning_rate": 6.691061902442342e-06, "loss": 31.6541, "step": 225580 }, { "epoch": 0.4557060727141974, "grad_norm": 296.65716552734375, "learning_rate": 6.69073340298838e-06, "loss": 9.8349, "step": 225590 }, { "epoch": 0.4557262733468812, "grad_norm": 360.7072448730469, "learning_rate": 6.690404895293987e-06, "loss": 16.6198, "step": 225600 }, { "epoch": 0.455746473979565, "grad_norm": 168.9312286376953, "learning_rate": 6.690076379360767e-06, "loss": 7.6697, "step": 225610 }, { "epoch": 0.45576667461224885, "grad_norm": 1287.2730712890625, "learning_rate": 6.689747855190319e-06, "loss": 36.319, "step": 225620 }, { "epoch": 0.45578687524493267, "grad_norm": 1017.265380859375, "learning_rate": 6.689419322784245e-06, "loss": 22.6657, "step": 225630 }, { "epoch": 0.4558070758776165, "grad_norm": 391.20941162109375, "learning_rate": 6.689090782144146e-06, "loss": 7.4136, "step": 225640 }, { "epoch": 0.4558272765103003, "grad_norm": 758.8549194335938, "learning_rate": 6.688762233271625e-06, "loss": 19.2401, "step": 225650 }, { "epoch": 0.4558474771429841, "grad_norm": 501.3313293457031, "learning_rate": 6.68843367616828e-06, "loss": 24.5548, "step": 225660 }, { "epoch": 0.45586767777566795, "grad_norm": 1015.6280517578125, "learning_rate": 6.6881051108357146e-06, "loss": 19.7456, "step": 225670 }, { "epoch": 0.45588787840835177, "grad_norm": 314.453125, "learning_rate": 6.68777653727553e-06, "loss": 27.9557, "step": 225680 }, { "epoch": 0.4559080790410356, "grad_norm": 257.3055725097656, "learning_rate": 6.687447955489326e-06, "loss": 23.2862, "step": 225690 }, { "epoch": 0.4559282796737194, "grad_norm": 604.030029296875, "learning_rate": 6.687119365478707e-06, "loss": 27.7463, "step": 225700 }, { "epoch": 0.45594848030640317, "grad_norm": 238.85536193847656, "learning_rate": 6.68679076724527e-06, "loss": 18.7839, "step": 225710 }, { "epoch": 0.455968680939087, "grad_norm": 575.19287109375, "learning_rate": 6.686462160790623e-06, "loss": 17.8359, "step": 225720 }, { "epoch": 0.4559888815717708, "grad_norm": 370.54681396484375, "learning_rate": 6.686133546116363e-06, "loss": 17.924, "step": 225730 }, { "epoch": 0.45600908220445463, "grad_norm": 100.73015594482422, "learning_rate": 6.685804923224091e-06, "loss": 12.0782, "step": 225740 }, { "epoch": 0.45602928283713845, "grad_norm": 471.8533630371094, "learning_rate": 6.685476292115411e-06, "loss": 14.371, "step": 225750 }, { "epoch": 0.45604948346982227, "grad_norm": 667.66650390625, "learning_rate": 6.6851476527919235e-06, "loss": 12.8826, "step": 225760 }, { "epoch": 0.4560696841025061, "grad_norm": 221.37005615234375, "learning_rate": 6.684819005255232e-06, "loss": 30.8509, "step": 225770 }, { "epoch": 0.4560898847351899, "grad_norm": 447.97076416015625, "learning_rate": 6.684490349506937e-06, "loss": 21.0851, "step": 225780 }, { "epoch": 0.45611008536787373, "grad_norm": 219.70057678222656, "learning_rate": 6.6841616855486395e-06, "loss": 18.8927, "step": 225790 }, { "epoch": 0.45613028600055755, "grad_norm": 265.75439453125, "learning_rate": 6.683833013381942e-06, "loss": 15.736, "step": 225800 }, { "epoch": 0.45615048663324137, "grad_norm": 383.4543151855469, "learning_rate": 6.683504333008448e-06, "loss": 21.2642, "step": 225810 }, { "epoch": 0.4561706872659252, "grad_norm": 231.30374145507812, "learning_rate": 6.683175644429756e-06, "loss": 25.2174, "step": 225820 }, { "epoch": 0.456190887898609, "grad_norm": 736.73583984375, "learning_rate": 6.682846947647472e-06, "loss": 12.7884, "step": 225830 }, { "epoch": 0.4562110885312928, "grad_norm": 321.27716064453125, "learning_rate": 6.682518242663195e-06, "loss": 16.2992, "step": 225840 }, { "epoch": 0.4562312891639766, "grad_norm": 222.0, "learning_rate": 6.682189529478528e-06, "loss": 18.1253, "step": 225850 }, { "epoch": 0.4562514897966604, "grad_norm": 425.1649475097656, "learning_rate": 6.681860808095074e-06, "loss": 31.4157, "step": 225860 }, { "epoch": 0.45627169042934423, "grad_norm": 445.70001220703125, "learning_rate": 6.681532078514434e-06, "loss": 21.3062, "step": 225870 }, { "epoch": 0.45629189106202805, "grad_norm": 12.19681167602539, "learning_rate": 6.681203340738212e-06, "loss": 20.5821, "step": 225880 }, { "epoch": 0.4563120916947119, "grad_norm": 615.259521484375, "learning_rate": 6.680874594768006e-06, "loss": 24.3428, "step": 225890 }, { "epoch": 0.4563322923273957, "grad_norm": 291.0129699707031, "learning_rate": 6.680545840605423e-06, "loss": 34.5789, "step": 225900 }, { "epoch": 0.4563524929600795, "grad_norm": 379.67230224609375, "learning_rate": 6.680217078252063e-06, "loss": 14.0362, "step": 225910 }, { "epoch": 0.45637269359276333, "grad_norm": 347.0411376953125, "learning_rate": 6.6798883077095276e-06, "loss": 21.1224, "step": 225920 }, { "epoch": 0.45639289422544715, "grad_norm": 96.60426330566406, "learning_rate": 6.679559528979423e-06, "loss": 24.0811, "step": 225930 }, { "epoch": 0.456413094858131, "grad_norm": 246.80841064453125, "learning_rate": 6.679230742063347e-06, "loss": 17.1332, "step": 225940 }, { "epoch": 0.4564332954908148, "grad_norm": 203.31515502929688, "learning_rate": 6.6789019469629034e-06, "loss": 27.6699, "step": 225950 }, { "epoch": 0.4564534961234986, "grad_norm": 1652.2716064453125, "learning_rate": 6.678573143679696e-06, "loss": 40.7394, "step": 225960 }, { "epoch": 0.4564736967561824, "grad_norm": 1629.622314453125, "learning_rate": 6.678244332215329e-06, "loss": 29.8662, "step": 225970 }, { "epoch": 0.4564938973888662, "grad_norm": 305.15765380859375, "learning_rate": 6.677915512571399e-06, "loss": 14.7993, "step": 225980 }, { "epoch": 0.45651409802155, "grad_norm": 489.23284912109375, "learning_rate": 6.6775866847495155e-06, "loss": 18.2312, "step": 225990 }, { "epoch": 0.45653429865423384, "grad_norm": 248.98838806152344, "learning_rate": 6.677257848751276e-06, "loss": 28.0202, "step": 226000 }, { "epoch": 0.45655449928691766, "grad_norm": 723.9705810546875, "learning_rate": 6.676929004578286e-06, "loss": 21.031, "step": 226010 }, { "epoch": 0.4565746999196015, "grad_norm": 1153.4454345703125, "learning_rate": 6.676600152232147e-06, "loss": 26.4975, "step": 226020 }, { "epoch": 0.4565949005522853, "grad_norm": 252.0647735595703, "learning_rate": 6.676271291714461e-06, "loss": 25.1416, "step": 226030 }, { "epoch": 0.4566151011849691, "grad_norm": 438.14453125, "learning_rate": 6.675942423026834e-06, "loss": 15.9802, "step": 226040 }, { "epoch": 0.45663530181765294, "grad_norm": 505.6163635253906, "learning_rate": 6.675613546170866e-06, "loss": 21.4501, "step": 226050 }, { "epoch": 0.45665550245033676, "grad_norm": 724.9590454101562, "learning_rate": 6.675284661148162e-06, "loss": 18.4366, "step": 226060 }, { "epoch": 0.4566757030830206, "grad_norm": 144.9850616455078, "learning_rate": 6.6749557679603225e-06, "loss": 14.9486, "step": 226070 }, { "epoch": 0.4566959037157044, "grad_norm": 391.7661437988281, "learning_rate": 6.674626866608951e-06, "loss": 12.2022, "step": 226080 }, { "epoch": 0.4567161043483882, "grad_norm": 2.0429892539978027, "learning_rate": 6.674297957095652e-06, "loss": 14.1796, "step": 226090 }, { "epoch": 0.456736304981072, "grad_norm": 697.7444458007812, "learning_rate": 6.673969039422029e-06, "loss": 20.4768, "step": 226100 }, { "epoch": 0.4567565056137558, "grad_norm": 382.4043884277344, "learning_rate": 6.673640113589683e-06, "loss": 15.7105, "step": 226110 }, { "epoch": 0.4567767062464396, "grad_norm": 588.4155883789062, "learning_rate": 6.673311179600218e-06, "loss": 29.4429, "step": 226120 }, { "epoch": 0.45679690687912344, "grad_norm": 756.751220703125, "learning_rate": 6.672982237455238e-06, "loss": 13.6926, "step": 226130 }, { "epoch": 0.45681710751180726, "grad_norm": 431.67742919921875, "learning_rate": 6.672653287156345e-06, "loss": 24.9859, "step": 226140 }, { "epoch": 0.4568373081444911, "grad_norm": 438.0408935546875, "learning_rate": 6.672324328705142e-06, "loss": 24.0636, "step": 226150 }, { "epoch": 0.4568575087771749, "grad_norm": 113.45287322998047, "learning_rate": 6.671995362103233e-06, "loss": 15.0136, "step": 226160 }, { "epoch": 0.4568777094098587, "grad_norm": 843.9808349609375, "learning_rate": 6.671666387352223e-06, "loss": 60.3752, "step": 226170 }, { "epoch": 0.45689791004254254, "grad_norm": 517.6583862304688, "learning_rate": 6.671337404453713e-06, "loss": 21.3241, "step": 226180 }, { "epoch": 0.45691811067522636, "grad_norm": 383.1948547363281, "learning_rate": 6.671008413409306e-06, "loss": 16.1406, "step": 226190 }, { "epoch": 0.4569383113079102, "grad_norm": 370.60137939453125, "learning_rate": 6.6706794142206085e-06, "loss": 25.1204, "step": 226200 }, { "epoch": 0.456958511940594, "grad_norm": 252.67543029785156, "learning_rate": 6.67035040688922e-06, "loss": 26.6554, "step": 226210 }, { "epoch": 0.45697871257327777, "grad_norm": 417.4878234863281, "learning_rate": 6.6700213914167485e-06, "loss": 23.3417, "step": 226220 }, { "epoch": 0.4569989132059616, "grad_norm": 382.7886047363281, "learning_rate": 6.669692367804795e-06, "loss": 25.7658, "step": 226230 }, { "epoch": 0.4570191138386454, "grad_norm": 274.5790710449219, "learning_rate": 6.6693633360549615e-06, "loss": 37.9028, "step": 226240 }, { "epoch": 0.4570393144713292, "grad_norm": 443.1168518066406, "learning_rate": 6.669034296168855e-06, "loss": 8.0102, "step": 226250 }, { "epoch": 0.45705951510401305, "grad_norm": 38.52375411987305, "learning_rate": 6.668705248148079e-06, "loss": 15.8677, "step": 226260 }, { "epoch": 0.45707971573669687, "grad_norm": 512.3353271484375, "learning_rate": 6.668376191994234e-06, "loss": 26.6839, "step": 226270 }, { "epoch": 0.4570999163693807, "grad_norm": 57.37399673461914, "learning_rate": 6.668047127708927e-06, "loss": 25.7264, "step": 226280 }, { "epoch": 0.4571201170020645, "grad_norm": 101.6399917602539, "learning_rate": 6.667718055293759e-06, "loss": 15.4918, "step": 226290 }, { "epoch": 0.4571403176347483, "grad_norm": 349.11993408203125, "learning_rate": 6.6673889747503364e-06, "loss": 18.6506, "step": 226300 }, { "epoch": 0.45716051826743215, "grad_norm": 308.615966796875, "learning_rate": 6.667059886080263e-06, "loss": 18.3663, "step": 226310 }, { "epoch": 0.45718071890011597, "grad_norm": 138.72930908203125, "learning_rate": 6.66673078928514e-06, "loss": 12.4551, "step": 226320 }, { "epoch": 0.4572009195327998, "grad_norm": 262.0975646972656, "learning_rate": 6.666401684366575e-06, "loss": 16.5631, "step": 226330 }, { "epoch": 0.4572211201654836, "grad_norm": 270.44305419921875, "learning_rate": 6.66607257132617e-06, "loss": 22.5709, "step": 226340 }, { "epoch": 0.45724132079816737, "grad_norm": 727.0762329101562, "learning_rate": 6.665743450165528e-06, "loss": 13.9652, "step": 226350 }, { "epoch": 0.4572615214308512, "grad_norm": 270.78704833984375, "learning_rate": 6.665414320886256e-06, "loss": 10.4592, "step": 226360 }, { "epoch": 0.457281722063535, "grad_norm": 568.377197265625, "learning_rate": 6.665085183489955e-06, "loss": 18.0877, "step": 226370 }, { "epoch": 0.45730192269621883, "grad_norm": 613.829833984375, "learning_rate": 6.664756037978233e-06, "loss": 17.4813, "step": 226380 }, { "epoch": 0.45732212332890265, "grad_norm": 503.44622802734375, "learning_rate": 6.664426884352691e-06, "loss": 30.1703, "step": 226390 }, { "epoch": 0.45734232396158647, "grad_norm": 189.15289306640625, "learning_rate": 6.664097722614934e-06, "loss": 10.3167, "step": 226400 }, { "epoch": 0.4573625245942703, "grad_norm": 450.5589294433594, "learning_rate": 6.663768552766566e-06, "loss": 32.9329, "step": 226410 }, { "epoch": 0.4573827252269541, "grad_norm": 104.55570220947266, "learning_rate": 6.663439374809194e-06, "loss": 28.3364, "step": 226420 }, { "epoch": 0.45740292585963793, "grad_norm": 301.110595703125, "learning_rate": 6.663110188744417e-06, "loss": 15.3579, "step": 226430 }, { "epoch": 0.45742312649232175, "grad_norm": 536.1530151367188, "learning_rate": 6.662780994573846e-06, "loss": 17.2823, "step": 226440 }, { "epoch": 0.45744332712500557, "grad_norm": 612.8405151367188, "learning_rate": 6.6624517922990795e-06, "loss": 25.9868, "step": 226450 }, { "epoch": 0.4574635277576894, "grad_norm": 389.212158203125, "learning_rate": 6.662122581921726e-06, "loss": 23.0075, "step": 226460 }, { "epoch": 0.4574837283903732, "grad_norm": 421.0704345703125, "learning_rate": 6.661793363443389e-06, "loss": 22.7034, "step": 226470 }, { "epoch": 0.457503929023057, "grad_norm": 280.3844299316406, "learning_rate": 6.661464136865671e-06, "loss": 14.6026, "step": 226480 }, { "epoch": 0.4575241296557408, "grad_norm": 525.3418579101562, "learning_rate": 6.6611349021901795e-06, "loss": 15.6766, "step": 226490 }, { "epoch": 0.4575443302884246, "grad_norm": 506.8186340332031, "learning_rate": 6.6608056594185166e-06, "loss": 23.4258, "step": 226500 }, { "epoch": 0.45756453092110844, "grad_norm": 372.74200439453125, "learning_rate": 6.66047640855229e-06, "loss": 22.0078, "step": 226510 }, { "epoch": 0.45758473155379226, "grad_norm": 74.90923309326172, "learning_rate": 6.660147149593102e-06, "loss": 20.9625, "step": 226520 }, { "epoch": 0.4576049321864761, "grad_norm": 52.75895309448242, "learning_rate": 6.659817882542559e-06, "loss": 26.064, "step": 226530 }, { "epoch": 0.4576251328191599, "grad_norm": 638.3198852539062, "learning_rate": 6.659488607402265e-06, "loss": 37.9739, "step": 226540 }, { "epoch": 0.4576453334518437, "grad_norm": 1.6899328231811523, "learning_rate": 6.659159324173823e-06, "loss": 14.513, "step": 226550 }, { "epoch": 0.45766553408452754, "grad_norm": 185.67491149902344, "learning_rate": 6.658830032858841e-06, "loss": 11.7518, "step": 226560 }, { "epoch": 0.45768573471721136, "grad_norm": 451.95306396484375, "learning_rate": 6.658500733458922e-06, "loss": 20.5763, "step": 226570 }, { "epoch": 0.4577059353498952, "grad_norm": 817.3878173828125, "learning_rate": 6.658171425975673e-06, "loss": 19.2115, "step": 226580 }, { "epoch": 0.457726135982579, "grad_norm": 373.3409423828125, "learning_rate": 6.657842110410695e-06, "loss": 31.3388, "step": 226590 }, { "epoch": 0.4577463366152628, "grad_norm": 236.69320678710938, "learning_rate": 6.657512786765599e-06, "loss": 10.0431, "step": 226600 }, { "epoch": 0.4577665372479466, "grad_norm": 173.1103057861328, "learning_rate": 6.657183455041984e-06, "loss": 16.616, "step": 226610 }, { "epoch": 0.4577867378806304, "grad_norm": 672.570556640625, "learning_rate": 6.656854115241458e-06, "loss": 17.7288, "step": 226620 }, { "epoch": 0.4578069385133142, "grad_norm": 399.2940368652344, "learning_rate": 6.656524767365629e-06, "loss": 19.0576, "step": 226630 }, { "epoch": 0.45782713914599804, "grad_norm": 275.1471862792969, "learning_rate": 6.656195411416094e-06, "loss": 9.5872, "step": 226640 }, { "epoch": 0.45784733977868186, "grad_norm": 281.56378173828125, "learning_rate": 6.655866047394468e-06, "loss": 15.1387, "step": 226650 }, { "epoch": 0.4578675404113657, "grad_norm": 1057.09326171875, "learning_rate": 6.655536675302349e-06, "loss": 29.3993, "step": 226660 }, { "epoch": 0.4578877410440495, "grad_norm": 365.1173400878906, "learning_rate": 6.655207295141346e-06, "loss": 13.6611, "step": 226670 }, { "epoch": 0.4579079416767333, "grad_norm": 83.75810241699219, "learning_rate": 6.654877906913064e-06, "loss": 17.9194, "step": 226680 }, { "epoch": 0.45792814230941714, "grad_norm": 437.2760314941406, "learning_rate": 6.654548510619108e-06, "loss": 24.4468, "step": 226690 }, { "epoch": 0.45794834294210096, "grad_norm": 246.66741943359375, "learning_rate": 6.654219106261082e-06, "loss": 20.8572, "step": 226700 }, { "epoch": 0.4579685435747848, "grad_norm": 113.89134216308594, "learning_rate": 6.6538896938405935e-06, "loss": 10.9767, "step": 226710 }, { "epoch": 0.4579887442074686, "grad_norm": 366.5423889160156, "learning_rate": 6.6535602733592465e-06, "loss": 22.3123, "step": 226720 }, { "epoch": 0.4580089448401524, "grad_norm": 701.4179077148438, "learning_rate": 6.653230844818648e-06, "loss": 15.3845, "step": 226730 }, { "epoch": 0.4580291454728362, "grad_norm": 414.45611572265625, "learning_rate": 6.6529014082204025e-06, "loss": 15.3358, "step": 226740 }, { "epoch": 0.45804934610552, "grad_norm": 425.706787109375, "learning_rate": 6.652571963566116e-06, "loss": 12.4742, "step": 226750 }, { "epoch": 0.4580695467382038, "grad_norm": 746.2068481445312, "learning_rate": 6.652242510857395e-06, "loss": 29.1822, "step": 226760 }, { "epoch": 0.45808974737088765, "grad_norm": 695.0816040039062, "learning_rate": 6.651913050095842e-06, "loss": 20.5539, "step": 226770 }, { "epoch": 0.45810994800357147, "grad_norm": 553.7020874023438, "learning_rate": 6.651583581283068e-06, "loss": 21.6167, "step": 226780 }, { "epoch": 0.4581301486362553, "grad_norm": 190.50221252441406, "learning_rate": 6.651254104420674e-06, "loss": 16.591, "step": 226790 }, { "epoch": 0.4581503492689391, "grad_norm": 173.61050415039062, "learning_rate": 6.6509246195102685e-06, "loss": 13.1653, "step": 226800 }, { "epoch": 0.4581705499016229, "grad_norm": 338.4618225097656, "learning_rate": 6.650595126553459e-06, "loss": 15.7322, "step": 226810 }, { "epoch": 0.45819075053430675, "grad_norm": 3.598544120788574, "learning_rate": 6.6502656255518435e-06, "loss": 28.801, "step": 226820 }, { "epoch": 0.45821095116699057, "grad_norm": 188.61846923828125, "learning_rate": 6.649936116507039e-06, "loss": 24.9716, "step": 226830 }, { "epoch": 0.4582311517996744, "grad_norm": 576.9122924804688, "learning_rate": 6.649606599420643e-06, "loss": 36.46, "step": 226840 }, { "epoch": 0.4582513524323582, "grad_norm": 161.2891845703125, "learning_rate": 6.649277074294265e-06, "loss": 25.622, "step": 226850 }, { "epoch": 0.45827155306504197, "grad_norm": 216.29861450195312, "learning_rate": 6.648947541129511e-06, "loss": 28.6866, "step": 226860 }, { "epoch": 0.4582917536977258, "grad_norm": 488.2857360839844, "learning_rate": 6.648617999927986e-06, "loss": 20.2214, "step": 226870 }, { "epoch": 0.4583119543304096, "grad_norm": 314.35699462890625, "learning_rate": 6.648288450691298e-06, "loss": 22.3733, "step": 226880 }, { "epoch": 0.45833215496309343, "grad_norm": 1308.2734375, "learning_rate": 6.647958893421051e-06, "loss": 22.9058, "step": 226890 }, { "epoch": 0.45835235559577725, "grad_norm": 525.2289428710938, "learning_rate": 6.647629328118852e-06, "loss": 26.3447, "step": 226900 }, { "epoch": 0.45837255622846107, "grad_norm": 586.7451171875, "learning_rate": 6.647299754786308e-06, "loss": 19.6053, "step": 226910 }, { "epoch": 0.4583927568611449, "grad_norm": 796.2258911132812, "learning_rate": 6.646970173425026e-06, "loss": 31.2414, "step": 226920 }, { "epoch": 0.4584129574938287, "grad_norm": 226.70315551757812, "learning_rate": 6.646640584036609e-06, "loss": 24.8402, "step": 226930 }, { "epoch": 0.45843315812651253, "grad_norm": 2001.764404296875, "learning_rate": 6.6463109866226675e-06, "loss": 11.9321, "step": 226940 }, { "epoch": 0.45845335875919635, "grad_norm": 695.1205444335938, "learning_rate": 6.645981381184804e-06, "loss": 22.2517, "step": 226950 }, { "epoch": 0.45847355939188017, "grad_norm": 575.7254638671875, "learning_rate": 6.645651767724628e-06, "loss": 16.088, "step": 226960 }, { "epoch": 0.458493760024564, "grad_norm": 365.404052734375, "learning_rate": 6.645322146243744e-06, "loss": 16.3673, "step": 226970 }, { "epoch": 0.4585139606572478, "grad_norm": 152.20553588867188, "learning_rate": 6.6449925167437604e-06, "loss": 16.6647, "step": 226980 }, { "epoch": 0.4585341612899316, "grad_norm": 698.5183715820312, "learning_rate": 6.644662879226282e-06, "loss": 26.9254, "step": 226990 }, { "epoch": 0.4585543619226154, "grad_norm": 123.05801391601562, "learning_rate": 6.644333233692917e-06, "loss": 9.6099, "step": 227000 }, { "epoch": 0.4585745625552992, "grad_norm": 378.0152282714844, "learning_rate": 6.6440035801452705e-06, "loss": 26.5745, "step": 227010 }, { "epoch": 0.45859476318798303, "grad_norm": 508.7265319824219, "learning_rate": 6.643673918584951e-06, "loss": 15.1178, "step": 227020 }, { "epoch": 0.45861496382066685, "grad_norm": 447.2159423828125, "learning_rate": 6.643344249013562e-06, "loss": 25.3622, "step": 227030 }, { "epoch": 0.4586351644533507, "grad_norm": 1060.10986328125, "learning_rate": 6.643014571432715e-06, "loss": 27.1324, "step": 227040 }, { "epoch": 0.4586553650860345, "grad_norm": 620.6658325195312, "learning_rate": 6.642684885844013e-06, "loss": 18.6872, "step": 227050 }, { "epoch": 0.4586755657187183, "grad_norm": 566.2651977539062, "learning_rate": 6.642355192249065e-06, "loss": 14.9459, "step": 227060 }, { "epoch": 0.45869576635140213, "grad_norm": 620.1211547851562, "learning_rate": 6.642025490649475e-06, "loss": 19.6324, "step": 227070 }, { "epoch": 0.45871596698408595, "grad_norm": 571.9847412109375, "learning_rate": 6.6416957810468555e-06, "loss": 24.1564, "step": 227080 }, { "epoch": 0.4587361676167698, "grad_norm": 301.05316162109375, "learning_rate": 6.641366063442806e-06, "loss": 10.7162, "step": 227090 }, { "epoch": 0.4587563682494536, "grad_norm": 515.2897338867188, "learning_rate": 6.64103633783894e-06, "loss": 14.8232, "step": 227100 }, { "epoch": 0.4587765688821374, "grad_norm": 167.2313995361328, "learning_rate": 6.64070660423686e-06, "loss": 7.9229, "step": 227110 }, { "epoch": 0.4587967695148212, "grad_norm": 423.76812744140625, "learning_rate": 6.640376862638176e-06, "loss": 20.9157, "step": 227120 }, { "epoch": 0.458816970147505, "grad_norm": 487.71142578125, "learning_rate": 6.640047113044493e-06, "loss": 19.3634, "step": 227130 }, { "epoch": 0.4588371707801888, "grad_norm": 193.6186981201172, "learning_rate": 6.63971735545742e-06, "loss": 26.5241, "step": 227140 }, { "epoch": 0.45885737141287264, "grad_norm": 93.84468078613281, "learning_rate": 6.6393875898785655e-06, "loss": 24.1329, "step": 227150 }, { "epoch": 0.45887757204555646, "grad_norm": 403.93365478515625, "learning_rate": 6.639057816309532e-06, "loss": 16.5268, "step": 227160 }, { "epoch": 0.4588977726782403, "grad_norm": 106.60189056396484, "learning_rate": 6.638728034751931e-06, "loss": 12.1873, "step": 227170 }, { "epoch": 0.4589179733109241, "grad_norm": 219.35626220703125, "learning_rate": 6.638398245207367e-06, "loss": 11.526, "step": 227180 }, { "epoch": 0.4589381739436079, "grad_norm": 569.699951171875, "learning_rate": 6.638068447677449e-06, "loss": 26.8491, "step": 227190 }, { "epoch": 0.45895837457629174, "grad_norm": 655.5698852539062, "learning_rate": 6.637738642163785e-06, "loss": 24.5225, "step": 227200 }, { "epoch": 0.45897857520897556, "grad_norm": 374.3693542480469, "learning_rate": 6.637408828667982e-06, "loss": 19.3083, "step": 227210 }, { "epoch": 0.4589987758416594, "grad_norm": 121.1509780883789, "learning_rate": 6.6370790071916456e-06, "loss": 15.769, "step": 227220 }, { "epoch": 0.4590189764743432, "grad_norm": 385.0738525390625, "learning_rate": 6.6367491777363845e-06, "loss": 19.7486, "step": 227230 }, { "epoch": 0.459039177107027, "grad_norm": 286.7579345703125, "learning_rate": 6.636419340303808e-06, "loss": 35.0856, "step": 227240 }, { "epoch": 0.4590593777397108, "grad_norm": 497.38653564453125, "learning_rate": 6.63608949489552e-06, "loss": 22.0173, "step": 227250 }, { "epoch": 0.4590795783723946, "grad_norm": 49.78174591064453, "learning_rate": 6.635759641513132e-06, "loss": 23.5303, "step": 227260 }, { "epoch": 0.4590997790050784, "grad_norm": 488.2474365234375, "learning_rate": 6.635429780158248e-06, "loss": 18.4013, "step": 227270 }, { "epoch": 0.45911997963776224, "grad_norm": 582.5839233398438, "learning_rate": 6.63509991083248e-06, "loss": 14.5305, "step": 227280 }, { "epoch": 0.45914018027044606, "grad_norm": 505.1718444824219, "learning_rate": 6.634770033537432e-06, "loss": 43.9581, "step": 227290 }, { "epoch": 0.4591603809031299, "grad_norm": 431.9947814941406, "learning_rate": 6.634440148274712e-06, "loss": 16.2944, "step": 227300 }, { "epoch": 0.4591805815358137, "grad_norm": 713.578857421875, "learning_rate": 6.634110255045931e-06, "loss": 14.6432, "step": 227310 }, { "epoch": 0.4592007821684975, "grad_norm": 388.4759216308594, "learning_rate": 6.633780353852695e-06, "loss": 25.6398, "step": 227320 }, { "epoch": 0.45922098280118134, "grad_norm": 1456.6639404296875, "learning_rate": 6.6334504446966095e-06, "loss": 24.7451, "step": 227330 }, { "epoch": 0.45924118343386516, "grad_norm": 248.88380432128906, "learning_rate": 6.633120527579286e-06, "loss": 22.3931, "step": 227340 }, { "epoch": 0.459261384066549, "grad_norm": 345.763916015625, "learning_rate": 6.632790602502331e-06, "loss": 37.7585, "step": 227350 }, { "epoch": 0.4592815846992328, "grad_norm": 503.90740966796875, "learning_rate": 6.632460669467353e-06, "loss": 11.0966, "step": 227360 }, { "epoch": 0.4593017853319166, "grad_norm": 771.942626953125, "learning_rate": 6.632130728475961e-06, "loss": 15.9836, "step": 227370 }, { "epoch": 0.4593219859646004, "grad_norm": 521.92529296875, "learning_rate": 6.631800779529759e-06, "loss": 14.3884, "step": 227380 }, { "epoch": 0.4593421865972842, "grad_norm": 194.33541870117188, "learning_rate": 6.6314708226303596e-06, "loss": 10.5635, "step": 227390 }, { "epoch": 0.459362387229968, "grad_norm": 233.3264923095703, "learning_rate": 6.631140857779368e-06, "loss": 14.9375, "step": 227400 }, { "epoch": 0.45938258786265185, "grad_norm": 571.1205444335938, "learning_rate": 6.6308108849783936e-06, "loss": 16.6, "step": 227410 }, { "epoch": 0.45940278849533567, "grad_norm": 977.36669921875, "learning_rate": 6.630480904229047e-06, "loss": 23.7895, "step": 227420 }, { "epoch": 0.4594229891280195, "grad_norm": 753.84130859375, "learning_rate": 6.6301509155329315e-06, "loss": 19.1079, "step": 227430 }, { "epoch": 0.4594431897607033, "grad_norm": 268.7891845703125, "learning_rate": 6.629820918891661e-06, "loss": 23.4215, "step": 227440 }, { "epoch": 0.4594633903933871, "grad_norm": 312.3785400390625, "learning_rate": 6.629490914306839e-06, "loss": 19.9039, "step": 227450 }, { "epoch": 0.45948359102607095, "grad_norm": 374.58233642578125, "learning_rate": 6.629160901780076e-06, "loss": 29.6784, "step": 227460 }, { "epoch": 0.45950379165875477, "grad_norm": 196.46884155273438, "learning_rate": 6.62883088131298e-06, "loss": 14.1337, "step": 227470 }, { "epoch": 0.4595239922914386, "grad_norm": 387.6122741699219, "learning_rate": 6.6285008529071615e-06, "loss": 17.6004, "step": 227480 }, { "epoch": 0.4595441929241224, "grad_norm": 421.771484375, "learning_rate": 6.628170816564227e-06, "loss": 22.1367, "step": 227490 }, { "epoch": 0.45956439355680617, "grad_norm": 334.7427978515625, "learning_rate": 6.627840772285784e-06, "loss": 17.2208, "step": 227500 }, { "epoch": 0.45958459418949, "grad_norm": 100.2383804321289, "learning_rate": 6.627510720073443e-06, "loss": 18.1849, "step": 227510 }, { "epoch": 0.4596047948221738, "grad_norm": 1945.5546875, "learning_rate": 6.627180659928812e-06, "loss": 20.4495, "step": 227520 }, { "epoch": 0.45962499545485763, "grad_norm": 441.0237121582031, "learning_rate": 6.626850591853502e-06, "loss": 32.192, "step": 227530 }, { "epoch": 0.45964519608754145, "grad_norm": 317.0296936035156, "learning_rate": 6.626520515849117e-06, "loss": 27.84, "step": 227540 }, { "epoch": 0.45966539672022527, "grad_norm": 362.31488037109375, "learning_rate": 6.62619043191727e-06, "loss": 25.4779, "step": 227550 }, { "epoch": 0.4596855973529091, "grad_norm": 173.9813995361328, "learning_rate": 6.625860340059567e-06, "loss": 8.8232, "step": 227560 }, { "epoch": 0.4597057979855929, "grad_norm": 383.3540344238281, "learning_rate": 6.6255302402776175e-06, "loss": 20.7251, "step": 227570 }, { "epoch": 0.45972599861827673, "grad_norm": 107.57487487792969, "learning_rate": 6.625200132573032e-06, "loss": 12.143, "step": 227580 }, { "epoch": 0.45974619925096055, "grad_norm": 222.10330200195312, "learning_rate": 6.624870016947417e-06, "loss": 19.9065, "step": 227590 }, { "epoch": 0.45976639988364437, "grad_norm": 23.899377822875977, "learning_rate": 6.624539893402383e-06, "loss": 17.0291, "step": 227600 }, { "epoch": 0.4597866005163282, "grad_norm": 578.3126831054688, "learning_rate": 6.624209761939539e-06, "loss": 12.0289, "step": 227610 }, { "epoch": 0.459806801149012, "grad_norm": 296.71173095703125, "learning_rate": 6.623879622560493e-06, "loss": 16.731, "step": 227620 }, { "epoch": 0.4598270017816958, "grad_norm": 255.4971466064453, "learning_rate": 6.623549475266855e-06, "loss": 22.7688, "step": 227630 }, { "epoch": 0.4598472024143796, "grad_norm": 30.626436233520508, "learning_rate": 6.6232193200602335e-06, "loss": 12.6875, "step": 227640 }, { "epoch": 0.4598674030470634, "grad_norm": 6.265193939208984, "learning_rate": 6.622889156942239e-06, "loss": 9.0322, "step": 227650 }, { "epoch": 0.45988760367974724, "grad_norm": 318.8857421875, "learning_rate": 6.622558985914478e-06, "loss": 50.1292, "step": 227660 }, { "epoch": 0.45990780431243106, "grad_norm": 100.06502532958984, "learning_rate": 6.622228806978562e-06, "loss": 24.8582, "step": 227670 }, { "epoch": 0.4599280049451149, "grad_norm": 244.34930419921875, "learning_rate": 6.6218986201361e-06, "loss": 16.2447, "step": 227680 }, { "epoch": 0.4599482055777987, "grad_norm": 411.37603759765625, "learning_rate": 6.621568425388701e-06, "loss": 13.6881, "step": 227690 }, { "epoch": 0.4599684062104825, "grad_norm": 79.83688354492188, "learning_rate": 6.6212382227379726e-06, "loss": 27.1068, "step": 227700 }, { "epoch": 0.45998860684316634, "grad_norm": 278.11492919921875, "learning_rate": 6.620908012185528e-06, "loss": 13.4156, "step": 227710 }, { "epoch": 0.46000880747585016, "grad_norm": 158.6947479248047, "learning_rate": 6.6205777937329715e-06, "loss": 17.897, "step": 227720 }, { "epoch": 0.460029008108534, "grad_norm": 377.27593994140625, "learning_rate": 6.620247567381918e-06, "loss": 37.6702, "step": 227730 }, { "epoch": 0.4600492087412178, "grad_norm": 372.9764709472656, "learning_rate": 6.619917333133973e-06, "loss": 32.4328, "step": 227740 }, { "epoch": 0.4600694093739016, "grad_norm": 268.5848083496094, "learning_rate": 6.619587090990748e-06, "loss": 16.2318, "step": 227750 }, { "epoch": 0.4600896100065854, "grad_norm": 558.2783203125, "learning_rate": 6.619256840953852e-06, "loss": 32.6995, "step": 227760 }, { "epoch": 0.4601098106392692, "grad_norm": 357.1181335449219, "learning_rate": 6.618926583024894e-06, "loss": 21.9979, "step": 227770 }, { "epoch": 0.460130011271953, "grad_norm": 887.5396118164062, "learning_rate": 6.618596317205485e-06, "loss": 16.884, "step": 227780 }, { "epoch": 0.46015021190463684, "grad_norm": 393.46160888671875, "learning_rate": 6.6182660434972325e-06, "loss": 27.323, "step": 227790 }, { "epoch": 0.46017041253732066, "grad_norm": 1260.691162109375, "learning_rate": 6.617935761901748e-06, "loss": 31.7005, "step": 227800 }, { "epoch": 0.4601906131700045, "grad_norm": 57.322059631347656, "learning_rate": 6.61760547242064e-06, "loss": 39.236, "step": 227810 }, { "epoch": 0.4602108138026883, "grad_norm": 257.4314270019531, "learning_rate": 6.617275175055522e-06, "loss": 39.7078, "step": 227820 }, { "epoch": 0.4602310144353721, "grad_norm": 430.6855773925781, "learning_rate": 6.616944869807999e-06, "loss": 45.0984, "step": 227830 }, { "epoch": 0.46025121506805594, "grad_norm": 118.204345703125, "learning_rate": 6.616614556679684e-06, "loss": 25.9735, "step": 227840 }, { "epoch": 0.46027141570073976, "grad_norm": 189.1195526123047, "learning_rate": 6.616284235672184e-06, "loss": 10.9118, "step": 227850 }, { "epoch": 0.4602916163334236, "grad_norm": 497.6725158691406, "learning_rate": 6.6159539067871114e-06, "loss": 24.2337, "step": 227860 }, { "epoch": 0.4603118169661074, "grad_norm": 55.70745086669922, "learning_rate": 6.615623570026076e-06, "loss": 16.5123, "step": 227870 }, { "epoch": 0.4603320175987912, "grad_norm": 752.7064819335938, "learning_rate": 6.615293225390686e-06, "loss": 29.5757, "step": 227880 }, { "epoch": 0.460352218231475, "grad_norm": 137.93495178222656, "learning_rate": 6.6149628728825535e-06, "loss": 16.8261, "step": 227890 }, { "epoch": 0.4603724188641588, "grad_norm": 127.68555450439453, "learning_rate": 6.614632512503289e-06, "loss": 21.0727, "step": 227900 }, { "epoch": 0.4603926194968426, "grad_norm": 883.1185913085938, "learning_rate": 6.614302144254498e-06, "loss": 25.1303, "step": 227910 }, { "epoch": 0.46041282012952645, "grad_norm": 485.72283935546875, "learning_rate": 6.613971768137799e-06, "loss": 19.8868, "step": 227920 }, { "epoch": 0.46043302076221027, "grad_norm": 580.4060668945312, "learning_rate": 6.613641384154794e-06, "loss": 12.1131, "step": 227930 }, { "epoch": 0.4604532213948941, "grad_norm": 318.0347900390625, "learning_rate": 6.613310992307097e-06, "loss": 22.3772, "step": 227940 }, { "epoch": 0.4604734220275779, "grad_norm": 928.0330200195312, "learning_rate": 6.612980592596319e-06, "loss": 18.6223, "step": 227950 }, { "epoch": 0.4604936226602617, "grad_norm": 196.65948486328125, "learning_rate": 6.612650185024068e-06, "loss": 20.8005, "step": 227960 }, { "epoch": 0.46051382329294555, "grad_norm": 203.98886108398438, "learning_rate": 6.612319769591955e-06, "loss": 24.5998, "step": 227970 }, { "epoch": 0.46053402392562937, "grad_norm": 447.0224914550781, "learning_rate": 6.611989346301594e-06, "loss": 19.2729, "step": 227980 }, { "epoch": 0.4605542245583132, "grad_norm": 9.080937385559082, "learning_rate": 6.611658915154589e-06, "loss": 13.6669, "step": 227990 }, { "epoch": 0.460574425190997, "grad_norm": 334.2945251464844, "learning_rate": 6.611328476152557e-06, "loss": 18.59, "step": 228000 }, { "epoch": 0.46059462582368077, "grad_norm": 392.900390625, "learning_rate": 6.610998029297103e-06, "loss": 17.4096, "step": 228010 }, { "epoch": 0.4606148264563646, "grad_norm": 375.4248962402344, "learning_rate": 6.610667574589841e-06, "loss": 27.4371, "step": 228020 }, { "epoch": 0.4606350270890484, "grad_norm": 266.73040771484375, "learning_rate": 6.610337112032381e-06, "loss": 23.259, "step": 228030 }, { "epoch": 0.46065522772173223, "grad_norm": 22.642879486083984, "learning_rate": 6.610006641626332e-06, "loss": 8.5521, "step": 228040 }, { "epoch": 0.46067542835441605, "grad_norm": 181.7366485595703, "learning_rate": 6.6096761633733065e-06, "loss": 27.2741, "step": 228050 }, { "epoch": 0.46069562898709987, "grad_norm": 358.1246643066406, "learning_rate": 6.6093456772749155e-06, "loss": 15.0568, "step": 228060 }, { "epoch": 0.4607158296197837, "grad_norm": 423.1742858886719, "learning_rate": 6.609015183332767e-06, "loss": 18.3091, "step": 228070 }, { "epoch": 0.4607360302524675, "grad_norm": 274.8656921386719, "learning_rate": 6.608684681548475e-06, "loss": 27.7, "step": 228080 }, { "epoch": 0.46075623088515133, "grad_norm": 284.6326599121094, "learning_rate": 6.608354171923649e-06, "loss": 56.2296, "step": 228090 }, { "epoch": 0.46077643151783515, "grad_norm": 419.8416748046875, "learning_rate": 6.6080236544599e-06, "loss": 32.4822, "step": 228100 }, { "epoch": 0.46079663215051897, "grad_norm": 161.27183532714844, "learning_rate": 6.6076931291588375e-06, "loss": 29.2, "step": 228110 }, { "epoch": 0.4608168327832028, "grad_norm": 602.0552368164062, "learning_rate": 6.607362596022074e-06, "loss": 27.7334, "step": 228120 }, { "epoch": 0.4608370334158866, "grad_norm": 424.15740966796875, "learning_rate": 6.607032055051221e-06, "loss": 11.4215, "step": 228130 }, { "epoch": 0.4608572340485704, "grad_norm": 76.31950378417969, "learning_rate": 6.606701506247889e-06, "loss": 10.4816, "step": 228140 }, { "epoch": 0.4608774346812542, "grad_norm": 395.2456970214844, "learning_rate": 6.606370949613688e-06, "loss": 11.1584, "step": 228150 }, { "epoch": 0.460897635313938, "grad_norm": 574.4743041992188, "learning_rate": 6.60604038515023e-06, "loss": 24.3671, "step": 228160 }, { "epoch": 0.46091783594662183, "grad_norm": 318.00225830078125, "learning_rate": 6.605709812859126e-06, "loss": 11.456, "step": 228170 }, { "epoch": 0.46093803657930565, "grad_norm": 676.1705932617188, "learning_rate": 6.605379232741986e-06, "loss": 11.9738, "step": 228180 }, { "epoch": 0.4609582372119895, "grad_norm": 306.6190490722656, "learning_rate": 6.605048644800425e-06, "loss": 11.9495, "step": 228190 }, { "epoch": 0.4609784378446733, "grad_norm": 686.2730712890625, "learning_rate": 6.604718049036047e-06, "loss": 9.0638, "step": 228200 }, { "epoch": 0.4609986384773571, "grad_norm": 309.8995361328125, "learning_rate": 6.604387445450472e-06, "loss": 8.2225, "step": 228210 }, { "epoch": 0.46101883911004093, "grad_norm": 328.4812927246094, "learning_rate": 6.604056834045306e-06, "loss": 23.514, "step": 228220 }, { "epoch": 0.46103903974272475, "grad_norm": 361.141357421875, "learning_rate": 6.603726214822161e-06, "loss": 25.217, "step": 228230 }, { "epoch": 0.4610592403754086, "grad_norm": 428.0789794921875, "learning_rate": 6.6033955877826495e-06, "loss": 14.9675, "step": 228240 }, { "epoch": 0.4610794410080924, "grad_norm": 147.8428955078125, "learning_rate": 6.603064952928382e-06, "loss": 38.2697, "step": 228250 }, { "epoch": 0.4610996416407762, "grad_norm": 849.7745361328125, "learning_rate": 6.6027343102609705e-06, "loss": 18.2229, "step": 228260 }, { "epoch": 0.46111984227346, "grad_norm": 338.63262939453125, "learning_rate": 6.602403659782026e-06, "loss": 19.8484, "step": 228270 }, { "epoch": 0.4611400429061438, "grad_norm": 272.4281005859375, "learning_rate": 6.602073001493161e-06, "loss": 13.1957, "step": 228280 }, { "epoch": 0.4611602435388276, "grad_norm": 341.6337890625, "learning_rate": 6.601742335395987e-06, "loss": 13.4541, "step": 228290 }, { "epoch": 0.46118044417151144, "grad_norm": 440.431640625, "learning_rate": 6.601411661492114e-06, "loss": 18.1025, "step": 228300 }, { "epoch": 0.46120064480419526, "grad_norm": 63.806026458740234, "learning_rate": 6.601080979783155e-06, "loss": 10.9558, "step": 228310 }, { "epoch": 0.4612208454368791, "grad_norm": 375.86090087890625, "learning_rate": 6.600750290270722e-06, "loss": 36.3965, "step": 228320 }, { "epoch": 0.4612410460695629, "grad_norm": 280.7741394042969, "learning_rate": 6.600419592956427e-06, "loss": 30.3312, "step": 228330 }, { "epoch": 0.4612612467022467, "grad_norm": 32.59760665893555, "learning_rate": 6.600088887841879e-06, "loss": 16.0028, "step": 228340 }, { "epoch": 0.46128144733493054, "grad_norm": 2014.97412109375, "learning_rate": 6.599758174928692e-06, "loss": 37.2913, "step": 228350 }, { "epoch": 0.46130164796761436, "grad_norm": 282.12762451171875, "learning_rate": 6.599427454218479e-06, "loss": 23.0998, "step": 228360 }, { "epoch": 0.4613218486002982, "grad_norm": 764.3007202148438, "learning_rate": 6.59909672571285e-06, "loss": 50.8008, "step": 228370 }, { "epoch": 0.461342049232982, "grad_norm": 117.80643463134766, "learning_rate": 6.598765989413419e-06, "loss": 8.7945, "step": 228380 }, { "epoch": 0.4613622498656658, "grad_norm": 164.44436645507812, "learning_rate": 6.598435245321794e-06, "loss": 36.3478, "step": 228390 }, { "epoch": 0.4613824504983496, "grad_norm": 1353.674072265625, "learning_rate": 6.59810449343959e-06, "loss": 31.4991, "step": 228400 }, { "epoch": 0.4614026511310334, "grad_norm": 244.39486694335938, "learning_rate": 6.597773733768419e-06, "loss": 9.2037, "step": 228410 }, { "epoch": 0.4614228517637172, "grad_norm": 318.91986083984375, "learning_rate": 6.597442966309893e-06, "loss": 18.2771, "step": 228420 }, { "epoch": 0.46144305239640104, "grad_norm": 593.6725463867188, "learning_rate": 6.5971121910656245e-06, "loss": 16.5071, "step": 228430 }, { "epoch": 0.46146325302908486, "grad_norm": 297.6046447753906, "learning_rate": 6.5967814080372224e-06, "loss": 25.4071, "step": 228440 }, { "epoch": 0.4614834536617687, "grad_norm": 351.06573486328125, "learning_rate": 6.596450617226303e-06, "loss": 25.4135, "step": 228450 }, { "epoch": 0.4615036542944525, "grad_norm": 239.96615600585938, "learning_rate": 6.596119818634478e-06, "loss": 19.2743, "step": 228460 }, { "epoch": 0.4615238549271363, "grad_norm": 339.3836364746094, "learning_rate": 6.595789012263356e-06, "loss": 12.7965, "step": 228470 }, { "epoch": 0.46154405555982014, "grad_norm": 513.2958374023438, "learning_rate": 6.5954581981145536e-06, "loss": 27.8609, "step": 228480 }, { "epoch": 0.46156425619250396, "grad_norm": 80.27755737304688, "learning_rate": 6.5951273761896794e-06, "loss": 15.2299, "step": 228490 }, { "epoch": 0.4615844568251878, "grad_norm": 939.7469482421875, "learning_rate": 6.594796546490351e-06, "loss": 26.2262, "step": 228500 }, { "epoch": 0.4616046574578716, "grad_norm": 262.9824523925781, "learning_rate": 6.594465709018175e-06, "loss": 11.674, "step": 228510 }, { "epoch": 0.4616248580905554, "grad_norm": 1236.662353515625, "learning_rate": 6.594134863774768e-06, "loss": 24.4545, "step": 228520 }, { "epoch": 0.4616450587232392, "grad_norm": 291.9767150878906, "learning_rate": 6.593804010761742e-06, "loss": 27.847, "step": 228530 }, { "epoch": 0.461665259355923, "grad_norm": 806.2813720703125, "learning_rate": 6.593473149980707e-06, "loss": 24.3102, "step": 228540 }, { "epoch": 0.46168545998860683, "grad_norm": 698.558837890625, "learning_rate": 6.593142281433277e-06, "loss": 30.923, "step": 228550 }, { "epoch": 0.46170566062129065, "grad_norm": 240.34564208984375, "learning_rate": 6.592811405121064e-06, "loss": 26.97, "step": 228560 }, { "epoch": 0.46172586125397447, "grad_norm": 55.996376037597656, "learning_rate": 6.592480521045683e-06, "loss": 44.2196, "step": 228570 }, { "epoch": 0.4617460618866583, "grad_norm": 530.7982788085938, "learning_rate": 6.592149629208744e-06, "loss": 27.613, "step": 228580 }, { "epoch": 0.4617662625193421, "grad_norm": 235.2656707763672, "learning_rate": 6.591818729611863e-06, "loss": 12.0532, "step": 228590 }, { "epoch": 0.46178646315202593, "grad_norm": 327.0687561035156, "learning_rate": 6.591487822256648e-06, "loss": 14.905, "step": 228600 }, { "epoch": 0.46180666378470975, "grad_norm": 698.6280517578125, "learning_rate": 6.591156907144716e-06, "loss": 25.5789, "step": 228610 }, { "epoch": 0.46182686441739357, "grad_norm": 624.7056884765625, "learning_rate": 6.590825984277677e-06, "loss": 34.0949, "step": 228620 }, { "epoch": 0.4618470650500774, "grad_norm": 99.8488540649414, "learning_rate": 6.590495053657145e-06, "loss": 12.8664, "step": 228630 }, { "epoch": 0.4618672656827612, "grad_norm": 396.48907470703125, "learning_rate": 6.590164115284734e-06, "loss": 15.4031, "step": 228640 }, { "epoch": 0.46188746631544497, "grad_norm": 361.1356506347656, "learning_rate": 6.589833169162055e-06, "loss": 23.8085, "step": 228650 }, { "epoch": 0.4619076669481288, "grad_norm": 763.4234619140625, "learning_rate": 6.589502215290723e-06, "loss": 19.241, "step": 228660 }, { "epoch": 0.4619278675808126, "grad_norm": 871.5690307617188, "learning_rate": 6.5891712536723495e-06, "loss": 24.2829, "step": 228670 }, { "epoch": 0.46194806821349643, "grad_norm": 497.9557189941406, "learning_rate": 6.588840284308548e-06, "loss": 29.1579, "step": 228680 }, { "epoch": 0.46196826884618025, "grad_norm": 176.30181884765625, "learning_rate": 6.588509307200932e-06, "loss": 24.5342, "step": 228690 }, { "epoch": 0.46198846947886407, "grad_norm": 193.85894775390625, "learning_rate": 6.588178322351113e-06, "loss": 28.7056, "step": 228700 }, { "epoch": 0.4620086701115479, "grad_norm": 47.11268997192383, "learning_rate": 6.587847329760708e-06, "loss": 26.5757, "step": 228710 }, { "epoch": 0.4620288707442317, "grad_norm": 516.3763427734375, "learning_rate": 6.587516329431326e-06, "loss": 14.0678, "step": 228720 }, { "epoch": 0.46204907137691553, "grad_norm": 57.38300323486328, "learning_rate": 6.587185321364582e-06, "loss": 12.3186, "step": 228730 }, { "epoch": 0.46206927200959935, "grad_norm": 596.0176391601562, "learning_rate": 6.5868543055620895e-06, "loss": 14.9874, "step": 228740 }, { "epoch": 0.46208947264228317, "grad_norm": 2.5874407291412354, "learning_rate": 6.586523282025462e-06, "loss": 45.7023, "step": 228750 }, { "epoch": 0.462109673274967, "grad_norm": 102.36325073242188, "learning_rate": 6.586192250756312e-06, "loss": 28.644, "step": 228760 }, { "epoch": 0.4621298739076508, "grad_norm": 295.92864990234375, "learning_rate": 6.585861211756253e-06, "loss": 19.5812, "step": 228770 }, { "epoch": 0.4621500745403346, "grad_norm": 271.26055908203125, "learning_rate": 6.585530165026899e-06, "loss": 30.6603, "step": 228780 }, { "epoch": 0.4621702751730184, "grad_norm": 250.38360595703125, "learning_rate": 6.585199110569863e-06, "loss": 10.7057, "step": 228790 }, { "epoch": 0.4621904758057022, "grad_norm": 258.42181396484375, "learning_rate": 6.58486804838676e-06, "loss": 14.4258, "step": 228800 }, { "epoch": 0.46221067643838604, "grad_norm": 288.42681884765625, "learning_rate": 6.5845369784792e-06, "loss": 32.4417, "step": 228810 }, { "epoch": 0.46223087707106986, "grad_norm": 205.974853515625, "learning_rate": 6.584205900848801e-06, "loss": 22.5565, "step": 228820 }, { "epoch": 0.4622510777037537, "grad_norm": 526.0134887695312, "learning_rate": 6.583874815497174e-06, "loss": 19.7797, "step": 228830 }, { "epoch": 0.4622712783364375, "grad_norm": 112.11205291748047, "learning_rate": 6.583543722425934e-06, "loss": 11.1141, "step": 228840 }, { "epoch": 0.4622914789691213, "grad_norm": 596.88818359375, "learning_rate": 6.583212621636693e-06, "loss": 19.2989, "step": 228850 }, { "epoch": 0.46231167960180514, "grad_norm": 76.48033905029297, "learning_rate": 6.582881513131065e-06, "loss": 16.9819, "step": 228860 }, { "epoch": 0.46233188023448896, "grad_norm": 341.16387939453125, "learning_rate": 6.5825503969106675e-06, "loss": 26.5284, "step": 228870 }, { "epoch": 0.4623520808671728, "grad_norm": 133.86705017089844, "learning_rate": 6.582219272977108e-06, "loss": 29.1873, "step": 228880 }, { "epoch": 0.4623722814998566, "grad_norm": 453.6415100097656, "learning_rate": 6.581888141332004e-06, "loss": 19.164, "step": 228890 }, { "epoch": 0.4623924821325404, "grad_norm": 143.84637451171875, "learning_rate": 6.58155700197697e-06, "loss": 20.8901, "step": 228900 }, { "epoch": 0.4624126827652242, "grad_norm": 113.33052825927734, "learning_rate": 6.581225854913621e-06, "loss": 21.8837, "step": 228910 }, { "epoch": 0.462432883397908, "grad_norm": 339.52899169921875, "learning_rate": 6.580894700143565e-06, "loss": 20.7568, "step": 228920 }, { "epoch": 0.4624530840305918, "grad_norm": 173.9373321533203, "learning_rate": 6.580563537668423e-06, "loss": 14.7747, "step": 228930 }, { "epoch": 0.46247328466327564, "grad_norm": 280.2523193359375, "learning_rate": 6.580232367489805e-06, "loss": 20.7438, "step": 228940 }, { "epoch": 0.46249348529595946, "grad_norm": 355.7958068847656, "learning_rate": 6.579901189609325e-06, "loss": 18.4877, "step": 228950 }, { "epoch": 0.4625136859286433, "grad_norm": 247.1247100830078, "learning_rate": 6.5795700040286014e-06, "loss": 18.1621, "step": 228960 }, { "epoch": 0.4625338865613271, "grad_norm": 499.34686279296875, "learning_rate": 6.579238810749241e-06, "loss": 19.4621, "step": 228970 }, { "epoch": 0.4625540871940109, "grad_norm": 497.579833984375, "learning_rate": 6.578907609772866e-06, "loss": 43.1037, "step": 228980 }, { "epoch": 0.46257428782669474, "grad_norm": 186.67184448242188, "learning_rate": 6.578576401101084e-06, "loss": 16.3681, "step": 228990 }, { "epoch": 0.46259448845937856, "grad_norm": 256.8697814941406, "learning_rate": 6.578245184735513e-06, "loss": 12.9051, "step": 229000 }, { "epoch": 0.4626146890920624, "grad_norm": 166.88180541992188, "learning_rate": 6.577913960677766e-06, "loss": 12.9451, "step": 229010 }, { "epoch": 0.4626348897247462, "grad_norm": 606.2781372070312, "learning_rate": 6.577582728929458e-06, "loss": 20.379, "step": 229020 }, { "epoch": 0.46265509035743, "grad_norm": 271.86834716796875, "learning_rate": 6.5772514894922034e-06, "loss": 11.8683, "step": 229030 }, { "epoch": 0.4626752909901138, "grad_norm": 218.47808837890625, "learning_rate": 6.576920242367617e-06, "loss": 23.3083, "step": 229040 }, { "epoch": 0.4626954916227976, "grad_norm": 333.4864807128906, "learning_rate": 6.576588987557312e-06, "loss": 19.5914, "step": 229050 }, { "epoch": 0.4627156922554814, "grad_norm": 265.64501953125, "learning_rate": 6.576257725062903e-06, "loss": 11.2602, "step": 229060 }, { "epoch": 0.46273589288816525, "grad_norm": 109.08692169189453, "learning_rate": 6.575926454886005e-06, "loss": 39.5215, "step": 229070 }, { "epoch": 0.46275609352084907, "grad_norm": 620.376220703125, "learning_rate": 6.575595177028233e-06, "loss": 14.2546, "step": 229080 }, { "epoch": 0.4627762941535329, "grad_norm": 280.4469299316406, "learning_rate": 6.575263891491203e-06, "loss": 15.089, "step": 229090 }, { "epoch": 0.4627964947862167, "grad_norm": 257.61138916015625, "learning_rate": 6.574932598276524e-06, "loss": 24.787, "step": 229100 }, { "epoch": 0.4628166954189005, "grad_norm": 239.0186004638672, "learning_rate": 6.574601297385817e-06, "loss": 13.9121, "step": 229110 }, { "epoch": 0.46283689605158435, "grad_norm": 178.89210510253906, "learning_rate": 6.574269988820694e-06, "loss": 15.2393, "step": 229120 }, { "epoch": 0.46285709668426817, "grad_norm": 774.9666137695312, "learning_rate": 6.573938672582769e-06, "loss": 11.8043, "step": 229130 }, { "epoch": 0.462877297316952, "grad_norm": 909.0729370117188, "learning_rate": 6.5736073486736606e-06, "loss": 13.9207, "step": 229140 }, { "epoch": 0.4628974979496358, "grad_norm": 745.9268188476562, "learning_rate": 6.573276017094977e-06, "loss": 21.3982, "step": 229150 }, { "epoch": 0.4629176985823196, "grad_norm": 373.2799987792969, "learning_rate": 6.5729446778483395e-06, "loss": 28.3705, "step": 229160 }, { "epoch": 0.4629378992150034, "grad_norm": 72.70282745361328, "learning_rate": 6.57261333093536e-06, "loss": 16.3056, "step": 229170 }, { "epoch": 0.4629580998476872, "grad_norm": 103.14777374267578, "learning_rate": 6.5722819763576525e-06, "loss": 18.3655, "step": 229180 }, { "epoch": 0.46297830048037103, "grad_norm": 327.900634765625, "learning_rate": 6.571950614116835e-06, "loss": 20.0955, "step": 229190 }, { "epoch": 0.46299850111305485, "grad_norm": 519.2888793945312, "learning_rate": 6.571619244214521e-06, "loss": 14.4365, "step": 229200 }, { "epoch": 0.46301870174573867, "grad_norm": 105.24832153320312, "learning_rate": 6.571287866652325e-06, "loss": 18.3877, "step": 229210 }, { "epoch": 0.4630389023784225, "grad_norm": 464.1259460449219, "learning_rate": 6.570956481431862e-06, "loss": 23.1351, "step": 229220 }, { "epoch": 0.4630591030111063, "grad_norm": 163.41204833984375, "learning_rate": 6.570625088554747e-06, "loss": 19.7771, "step": 229230 }, { "epoch": 0.46307930364379013, "grad_norm": 194.02304077148438, "learning_rate": 6.570293688022597e-06, "loss": 20.9636, "step": 229240 }, { "epoch": 0.46309950427647395, "grad_norm": 550.7429809570312, "learning_rate": 6.569962279837025e-06, "loss": 13.2235, "step": 229250 }, { "epoch": 0.46311970490915777, "grad_norm": 485.3947448730469, "learning_rate": 6.5696308639996475e-06, "loss": 21.1499, "step": 229260 }, { "epoch": 0.4631399055418416, "grad_norm": 318.07037353515625, "learning_rate": 6.569299440512081e-06, "loss": 19.188, "step": 229270 }, { "epoch": 0.4631601061745254, "grad_norm": 390.4097595214844, "learning_rate": 6.568968009375938e-06, "loss": 15.8124, "step": 229280 }, { "epoch": 0.4631803068072092, "grad_norm": 128.84483337402344, "learning_rate": 6.568636570592835e-06, "loss": 14.8368, "step": 229290 }, { "epoch": 0.463200507439893, "grad_norm": 303.23114013671875, "learning_rate": 6.5683051241643894e-06, "loss": 16.1054, "step": 229300 }, { "epoch": 0.4632207080725768, "grad_norm": 401.9518127441406, "learning_rate": 6.567973670092212e-06, "loss": 12.3538, "step": 229310 }, { "epoch": 0.46324090870526063, "grad_norm": 130.42996215820312, "learning_rate": 6.567642208377924e-06, "loss": 20.0103, "step": 229320 }, { "epoch": 0.46326110933794445, "grad_norm": 159.85983276367188, "learning_rate": 6.567310739023136e-06, "loss": 18.0162, "step": 229330 }, { "epoch": 0.4632813099706283, "grad_norm": 8.865955352783203, "learning_rate": 6.566979262029467e-06, "loss": 21.846, "step": 229340 }, { "epoch": 0.4633015106033121, "grad_norm": 691.6495971679688, "learning_rate": 6.566647777398529e-06, "loss": 22.9702, "step": 229350 }, { "epoch": 0.4633217112359959, "grad_norm": 735.4181518554688, "learning_rate": 6.566316285131943e-06, "loss": 34.7296, "step": 229360 }, { "epoch": 0.46334191186867973, "grad_norm": 192.17556762695312, "learning_rate": 6.5659847852313184e-06, "loss": 12.5642, "step": 229370 }, { "epoch": 0.46336211250136355, "grad_norm": 338.44293212890625, "learning_rate": 6.5656532776982765e-06, "loss": 26.8633, "step": 229380 }, { "epoch": 0.4633823131340474, "grad_norm": 92.99593353271484, "learning_rate": 6.565321762534428e-06, "loss": 13.8771, "step": 229390 }, { "epoch": 0.4634025137667312, "grad_norm": 810.649658203125, "learning_rate": 6.5649902397413915e-06, "loss": 22.4203, "step": 229400 }, { "epoch": 0.463422714399415, "grad_norm": 543.7352905273438, "learning_rate": 6.564658709320783e-06, "loss": 22.6507, "step": 229410 }, { "epoch": 0.4634429150320988, "grad_norm": 236.32470703125, "learning_rate": 6.564327171274217e-06, "loss": 24.3408, "step": 229420 }, { "epoch": 0.4634631156647826, "grad_norm": 389.9688415527344, "learning_rate": 6.563995625603312e-06, "loss": 18.6648, "step": 229430 }, { "epoch": 0.4634833162974664, "grad_norm": 594.2128295898438, "learning_rate": 6.56366407230968e-06, "loss": 22.824, "step": 229440 }, { "epoch": 0.46350351693015024, "grad_norm": 431.28973388671875, "learning_rate": 6.5633325113949395e-06, "loss": 21.5923, "step": 229450 }, { "epoch": 0.46352371756283406, "grad_norm": 763.2753295898438, "learning_rate": 6.5630009428607065e-06, "loss": 23.1118, "step": 229460 }, { "epoch": 0.4635439181955179, "grad_norm": 183.33277893066406, "learning_rate": 6.562669366708596e-06, "loss": 13.4126, "step": 229470 }, { "epoch": 0.4635641188282017, "grad_norm": 403.41351318359375, "learning_rate": 6.562337782940224e-06, "loss": 26.614, "step": 229480 }, { "epoch": 0.4635843194608855, "grad_norm": 673.7896118164062, "learning_rate": 6.562006191557209e-06, "loss": 20.5815, "step": 229490 }, { "epoch": 0.46360452009356934, "grad_norm": 188.78221130371094, "learning_rate": 6.561674592561164e-06, "loss": 20.2226, "step": 229500 }, { "epoch": 0.46362472072625316, "grad_norm": 211.59988403320312, "learning_rate": 6.561342985953706e-06, "loss": 11.8335, "step": 229510 }, { "epoch": 0.463644921358937, "grad_norm": 391.2408142089844, "learning_rate": 6.561011371736452e-06, "loss": 10.9201, "step": 229520 }, { "epoch": 0.4636651219916208, "grad_norm": 197.4485321044922, "learning_rate": 6.560679749911018e-06, "loss": 13.5148, "step": 229530 }, { "epoch": 0.4636853226243046, "grad_norm": 439.3060302734375, "learning_rate": 6.560348120479021e-06, "loss": 31.6213, "step": 229540 }, { "epoch": 0.4637055232569884, "grad_norm": 495.3293151855469, "learning_rate": 6.5600164834420754e-06, "loss": 18.4101, "step": 229550 }, { "epoch": 0.4637257238896722, "grad_norm": 244.08631896972656, "learning_rate": 6.559684838801798e-06, "loss": 39.2556, "step": 229560 }, { "epoch": 0.463745924522356, "grad_norm": 453.98846435546875, "learning_rate": 6.559353186559808e-06, "loss": 16.6811, "step": 229570 }, { "epoch": 0.46376612515503984, "grad_norm": 868.4921875, "learning_rate": 6.559021526717717e-06, "loss": 25.1985, "step": 229580 }, { "epoch": 0.46378632578772366, "grad_norm": 325.8453674316406, "learning_rate": 6.558689859277148e-06, "loss": 18.4847, "step": 229590 }, { "epoch": 0.4638065264204075, "grad_norm": 184.70367431640625, "learning_rate": 6.558358184239709e-06, "loss": 22.3115, "step": 229600 }, { "epoch": 0.4638267270530913, "grad_norm": 184.07736206054688, "learning_rate": 6.5580265016070245e-06, "loss": 37.0262, "step": 229610 }, { "epoch": 0.4638469276857751, "grad_norm": 866.11669921875, "learning_rate": 6.557694811380707e-06, "loss": 32.8891, "step": 229620 }, { "epoch": 0.46386712831845894, "grad_norm": 275.5018615722656, "learning_rate": 6.5573631135623736e-06, "loss": 21.9039, "step": 229630 }, { "epoch": 0.46388732895114276, "grad_norm": 197.97389221191406, "learning_rate": 6.557031408153642e-06, "loss": 20.2326, "step": 229640 }, { "epoch": 0.4639075295838266, "grad_norm": 481.6684265136719, "learning_rate": 6.556699695156128e-06, "loss": 18.4726, "step": 229650 }, { "epoch": 0.4639277302165104, "grad_norm": 607.4542236328125, "learning_rate": 6.556367974571448e-06, "loss": 16.4958, "step": 229660 }, { "epoch": 0.4639479308491942, "grad_norm": 517.954345703125, "learning_rate": 6.556036246401218e-06, "loss": 18.0444, "step": 229670 }, { "epoch": 0.463968131481878, "grad_norm": 388.73162841796875, "learning_rate": 6.555704510647059e-06, "loss": 12.3879, "step": 229680 }, { "epoch": 0.4639883321145618, "grad_norm": 66.10597229003906, "learning_rate": 6.555372767310582e-06, "loss": 27.1608, "step": 229690 }, { "epoch": 0.46400853274724563, "grad_norm": 634.7944946289062, "learning_rate": 6.55504101639341e-06, "loss": 26.6912, "step": 229700 }, { "epoch": 0.46402873337992945, "grad_norm": 320.9844665527344, "learning_rate": 6.554709257897153e-06, "loss": 14.519, "step": 229710 }, { "epoch": 0.46404893401261327, "grad_norm": 155.81512451171875, "learning_rate": 6.554377491823434e-06, "loss": 18.6841, "step": 229720 }, { "epoch": 0.4640691346452971, "grad_norm": 466.6041564941406, "learning_rate": 6.554045718173867e-06, "loss": 26.129, "step": 229730 }, { "epoch": 0.4640893352779809, "grad_norm": 1391.3883056640625, "learning_rate": 6.5537139369500705e-06, "loss": 34.4066, "step": 229740 }, { "epoch": 0.46410953591066473, "grad_norm": 235.05258178710938, "learning_rate": 6.55338214815366e-06, "loss": 19.4836, "step": 229750 }, { "epoch": 0.46412973654334855, "grad_norm": 365.6845703125, "learning_rate": 6.553050351786252e-06, "loss": 22.4419, "step": 229760 }, { "epoch": 0.46414993717603237, "grad_norm": 543.2626953125, "learning_rate": 6.552718547849467e-06, "loss": 12.3587, "step": 229770 }, { "epoch": 0.4641701378087162, "grad_norm": 613.8858032226562, "learning_rate": 6.55238673634492e-06, "loss": 20.5729, "step": 229780 }, { "epoch": 0.4641903384414, "grad_norm": 747.0809936523438, "learning_rate": 6.552054917274226e-06, "loss": 20.9929, "step": 229790 }, { "epoch": 0.46421053907408383, "grad_norm": 855.6516723632812, "learning_rate": 6.551723090639008e-06, "loss": 25.5416, "step": 229800 }, { "epoch": 0.4642307397067676, "grad_norm": 356.35699462890625, "learning_rate": 6.551391256440877e-06, "loss": 33.6675, "step": 229810 }, { "epoch": 0.4642509403394514, "grad_norm": 330.6403503417969, "learning_rate": 6.551059414681455e-06, "loss": 19.6082, "step": 229820 }, { "epoch": 0.46427114097213523, "grad_norm": 749.244140625, "learning_rate": 6.550727565362357e-06, "loss": 22.3681, "step": 229830 }, { "epoch": 0.46429134160481905, "grad_norm": 367.9692687988281, "learning_rate": 6.5503957084852e-06, "loss": 26.3204, "step": 229840 }, { "epoch": 0.46431154223750287, "grad_norm": 314.1671142578125, "learning_rate": 6.550063844051603e-06, "loss": 16.2764, "step": 229850 }, { "epoch": 0.4643317428701867, "grad_norm": 263.6180725097656, "learning_rate": 6.549731972063183e-06, "loss": 15.8397, "step": 229860 }, { "epoch": 0.4643519435028705, "grad_norm": 573.0430908203125, "learning_rate": 6.549400092521557e-06, "loss": 14.4382, "step": 229870 }, { "epoch": 0.46437214413555433, "grad_norm": 1.8975880146026611, "learning_rate": 6.549068205428344e-06, "loss": 22.1632, "step": 229880 }, { "epoch": 0.46439234476823815, "grad_norm": 777.577880859375, "learning_rate": 6.54873631078516e-06, "loss": 23.0008, "step": 229890 }, { "epoch": 0.46441254540092197, "grad_norm": 492.24761962890625, "learning_rate": 6.548404408593622e-06, "loss": 17.066, "step": 229900 }, { "epoch": 0.4644327460336058, "grad_norm": 152.51919555664062, "learning_rate": 6.54807249885535e-06, "loss": 34.6613, "step": 229910 }, { "epoch": 0.4644529466662896, "grad_norm": 428.359619140625, "learning_rate": 6.547740581571959e-06, "loss": 18.024, "step": 229920 }, { "epoch": 0.4644731472989734, "grad_norm": 293.5292053222656, "learning_rate": 6.547408656745069e-06, "loss": 11.4237, "step": 229930 }, { "epoch": 0.4644933479316572, "grad_norm": 388.554931640625, "learning_rate": 6.547076724376296e-06, "loss": 15.9199, "step": 229940 }, { "epoch": 0.464513548564341, "grad_norm": 921.075439453125, "learning_rate": 6.546744784467261e-06, "loss": 23.9284, "step": 229950 }, { "epoch": 0.46453374919702484, "grad_norm": 239.83203125, "learning_rate": 6.546412837019577e-06, "loss": 28.0474, "step": 229960 }, { "epoch": 0.46455394982970866, "grad_norm": 221.23048400878906, "learning_rate": 6.546080882034866e-06, "loss": 15.9598, "step": 229970 }, { "epoch": 0.4645741504623925, "grad_norm": 1186.389892578125, "learning_rate": 6.545748919514743e-06, "loss": 21.552, "step": 229980 }, { "epoch": 0.4645943510950763, "grad_norm": 376.7621765136719, "learning_rate": 6.545416949460828e-06, "loss": 27.2045, "step": 229990 }, { "epoch": 0.4646145517277601, "grad_norm": 584.0380249023438, "learning_rate": 6.545084971874738e-06, "loss": 23.3419, "step": 230000 }, { "epoch": 0.46463475236044394, "grad_norm": 573.6217041015625, "learning_rate": 6.544752986758092e-06, "loss": 64.9675, "step": 230010 }, { "epoch": 0.46465495299312776, "grad_norm": 594.05615234375, "learning_rate": 6.5444209941125056e-06, "loss": 10.5585, "step": 230020 }, { "epoch": 0.4646751536258116, "grad_norm": 236.2047119140625, "learning_rate": 6.544088993939599e-06, "loss": 11.2551, "step": 230030 }, { "epoch": 0.4646953542584954, "grad_norm": 867.623779296875, "learning_rate": 6.543756986240992e-06, "loss": 21.3639, "step": 230040 }, { "epoch": 0.4647155548911792, "grad_norm": 252.49024963378906, "learning_rate": 6.543424971018298e-06, "loss": 24.9629, "step": 230050 }, { "epoch": 0.464735755523863, "grad_norm": 161.431396484375, "learning_rate": 6.54309294827314e-06, "loss": 7.0627, "step": 230060 }, { "epoch": 0.4647559561565468, "grad_norm": 388.12982177734375, "learning_rate": 6.542760918007133e-06, "loss": 15.0796, "step": 230070 }, { "epoch": 0.4647761567892306, "grad_norm": 631.7340087890625, "learning_rate": 6.542428880221896e-06, "loss": 28.6985, "step": 230080 }, { "epoch": 0.46479635742191444, "grad_norm": 211.25621032714844, "learning_rate": 6.542096834919049e-06, "loss": 11.3219, "step": 230090 }, { "epoch": 0.46481655805459826, "grad_norm": 331.23504638671875, "learning_rate": 6.541764782100208e-06, "loss": 13.1166, "step": 230100 }, { "epoch": 0.4648367586872821, "grad_norm": 148.95367431640625, "learning_rate": 6.541432721766994e-06, "loss": 32.3821, "step": 230110 }, { "epoch": 0.4648569593199659, "grad_norm": 206.0067596435547, "learning_rate": 6.541100653921022e-06, "loss": 24.1519, "step": 230120 }, { "epoch": 0.4648771599526497, "grad_norm": 371.63330078125, "learning_rate": 6.540768578563913e-06, "loss": 30.2664, "step": 230130 }, { "epoch": 0.46489736058533354, "grad_norm": 370.63690185546875, "learning_rate": 6.540436495697284e-06, "loss": 30.1104, "step": 230140 }, { "epoch": 0.46491756121801736, "grad_norm": 398.7852783203125, "learning_rate": 6.540104405322757e-06, "loss": 24.7988, "step": 230150 }, { "epoch": 0.4649377618507012, "grad_norm": 116.38355255126953, "learning_rate": 6.5397723074419454e-06, "loss": 26.7308, "step": 230160 }, { "epoch": 0.464957962483385, "grad_norm": 715.37841796875, "learning_rate": 6.53944020205647e-06, "loss": 18.5669, "step": 230170 }, { "epoch": 0.4649781631160688, "grad_norm": 595.7755737304688, "learning_rate": 6.539108089167953e-06, "loss": 29.9836, "step": 230180 }, { "epoch": 0.4649983637487526, "grad_norm": 656.9431762695312, "learning_rate": 6.538775968778006e-06, "loss": 15.6107, "step": 230190 }, { "epoch": 0.4650185643814364, "grad_norm": 298.2591552734375, "learning_rate": 6.538443840888254e-06, "loss": 20.1788, "step": 230200 }, { "epoch": 0.4650387650141202, "grad_norm": 668.5206909179688, "learning_rate": 6.538111705500312e-06, "loss": 18.9093, "step": 230210 }, { "epoch": 0.46505896564680405, "grad_norm": 294.69122314453125, "learning_rate": 6.537779562615801e-06, "loss": 13.5656, "step": 230220 }, { "epoch": 0.46507916627948787, "grad_norm": 174.49681091308594, "learning_rate": 6.537447412236338e-06, "loss": 23.3252, "step": 230230 }, { "epoch": 0.4650993669121717, "grad_norm": 190.39427185058594, "learning_rate": 6.537115254363544e-06, "loss": 16.2051, "step": 230240 }, { "epoch": 0.4651195675448555, "grad_norm": 368.1281433105469, "learning_rate": 6.536783088999037e-06, "loss": 17.0869, "step": 230250 }, { "epoch": 0.4651397681775393, "grad_norm": 471.6187438964844, "learning_rate": 6.536450916144435e-06, "loss": 23.9824, "step": 230260 }, { "epoch": 0.46515996881022315, "grad_norm": 669.5811767578125, "learning_rate": 6.536118735801356e-06, "loss": 19.156, "step": 230270 }, { "epoch": 0.46518016944290697, "grad_norm": 586.4672241210938, "learning_rate": 6.535786547971421e-06, "loss": 22.2844, "step": 230280 }, { "epoch": 0.4652003700755908, "grad_norm": 519.2525634765625, "learning_rate": 6.53545435265625e-06, "loss": 20.4509, "step": 230290 }, { "epoch": 0.4652205707082746, "grad_norm": 474.3525085449219, "learning_rate": 6.53512214985746e-06, "loss": 22.7389, "step": 230300 }, { "epoch": 0.4652407713409584, "grad_norm": 334.4240417480469, "learning_rate": 6.534789939576672e-06, "loss": 10.9933, "step": 230310 }, { "epoch": 0.4652609719736422, "grad_norm": 486.189208984375, "learning_rate": 6.534457721815502e-06, "loss": 11.7563, "step": 230320 }, { "epoch": 0.465281172606326, "grad_norm": 174.62841796875, "learning_rate": 6.534125496575573e-06, "loss": 12.3943, "step": 230330 }, { "epoch": 0.46530137323900983, "grad_norm": 411.12823486328125, "learning_rate": 6.533793263858501e-06, "loss": 20.1812, "step": 230340 }, { "epoch": 0.46532157387169365, "grad_norm": 295.273193359375, "learning_rate": 6.533461023665907e-06, "loss": 32.0141, "step": 230350 }, { "epoch": 0.46534177450437747, "grad_norm": 161.42010498046875, "learning_rate": 6.533128775999411e-06, "loss": 22.363, "step": 230360 }, { "epoch": 0.4653619751370613, "grad_norm": 328.110107421875, "learning_rate": 6.532796520860629e-06, "loss": 15.8858, "step": 230370 }, { "epoch": 0.4653821757697451, "grad_norm": 536.037353515625, "learning_rate": 6.532464258251185e-06, "loss": 13.6617, "step": 230380 }, { "epoch": 0.46540237640242893, "grad_norm": 361.0971374511719, "learning_rate": 6.532131988172695e-06, "loss": 10.4187, "step": 230390 }, { "epoch": 0.46542257703511275, "grad_norm": 623.8751220703125, "learning_rate": 6.53179971062678e-06, "loss": 18.2029, "step": 230400 }, { "epoch": 0.46544277766779657, "grad_norm": 470.6343688964844, "learning_rate": 6.531467425615059e-06, "loss": 15.9785, "step": 230410 }, { "epoch": 0.4654629783004804, "grad_norm": 180.88169860839844, "learning_rate": 6.531135133139152e-06, "loss": 17.0625, "step": 230420 }, { "epoch": 0.4654831789331642, "grad_norm": 638.9075317382812, "learning_rate": 6.530802833200677e-06, "loss": 26.4864, "step": 230430 }, { "epoch": 0.46550337956584803, "grad_norm": 375.173828125, "learning_rate": 6.530470525801254e-06, "loss": 22.1161, "step": 230440 }, { "epoch": 0.4655235801985318, "grad_norm": 154.70245361328125, "learning_rate": 6.530138210942505e-06, "loss": 9.1623, "step": 230450 }, { "epoch": 0.4655437808312156, "grad_norm": 100.7583999633789, "learning_rate": 6.529805888626046e-06, "loss": 15.7135, "step": 230460 }, { "epoch": 0.46556398146389943, "grad_norm": 315.9190368652344, "learning_rate": 6.529473558853501e-06, "loss": 23.7646, "step": 230470 }, { "epoch": 0.46558418209658325, "grad_norm": 69.15559387207031, "learning_rate": 6.529141221626485e-06, "loss": 13.7641, "step": 230480 }, { "epoch": 0.4656043827292671, "grad_norm": 472.7928161621094, "learning_rate": 6.528808876946622e-06, "loss": 10.4498, "step": 230490 }, { "epoch": 0.4656245833619509, "grad_norm": 363.53656005859375, "learning_rate": 6.5284765248155295e-06, "loss": 18.3428, "step": 230500 }, { "epoch": 0.4656447839946347, "grad_norm": 180.32015991210938, "learning_rate": 6.5281441652348266e-06, "loss": 12.5083, "step": 230510 }, { "epoch": 0.46566498462731853, "grad_norm": 503.92083740234375, "learning_rate": 6.527811798206136e-06, "loss": 20.311, "step": 230520 }, { "epoch": 0.46568518526000235, "grad_norm": 516.410888671875, "learning_rate": 6.527479423731074e-06, "loss": 14.5096, "step": 230530 }, { "epoch": 0.4657053858926862, "grad_norm": 426.80767822265625, "learning_rate": 6.527147041811266e-06, "loss": 22.1971, "step": 230540 }, { "epoch": 0.46572558652537, "grad_norm": 491.8183288574219, "learning_rate": 6.526814652448325e-06, "loss": 17.7891, "step": 230550 }, { "epoch": 0.4657457871580538, "grad_norm": 562.8279418945312, "learning_rate": 6.526482255643877e-06, "loss": 23.9458, "step": 230560 }, { "epoch": 0.4657659877907376, "grad_norm": 51.70831298828125, "learning_rate": 6.526149851399538e-06, "loss": 16.1752, "step": 230570 }, { "epoch": 0.4657861884234214, "grad_norm": 501.7652587890625, "learning_rate": 6.525817439716932e-06, "loss": 15.4438, "step": 230580 }, { "epoch": 0.4658063890561052, "grad_norm": 321.99163818359375, "learning_rate": 6.525485020597675e-06, "loss": 10.7271, "step": 230590 }, { "epoch": 0.46582658968878904, "grad_norm": 174.6129150390625, "learning_rate": 6.525152594043389e-06, "loss": 15.1069, "step": 230600 }, { "epoch": 0.46584679032147286, "grad_norm": 311.6827697753906, "learning_rate": 6.524820160055694e-06, "loss": 22.2453, "step": 230610 }, { "epoch": 0.4658669909541567, "grad_norm": 23.80679702758789, "learning_rate": 6.5244877186362095e-06, "loss": 19.2407, "step": 230620 }, { "epoch": 0.4658871915868405, "grad_norm": 463.9993896484375, "learning_rate": 6.52415526978656e-06, "loss": 31.6258, "step": 230630 }, { "epoch": 0.4659073922195243, "grad_norm": 226.607421875, "learning_rate": 6.523822813508359e-06, "loss": 19.5136, "step": 230640 }, { "epoch": 0.46592759285220814, "grad_norm": 212.09646606445312, "learning_rate": 6.5234903498032345e-06, "loss": 16.432, "step": 230650 }, { "epoch": 0.46594779348489196, "grad_norm": 125.5946273803711, "learning_rate": 6.523157878672799e-06, "loss": 17.5256, "step": 230660 }, { "epoch": 0.4659679941175758, "grad_norm": 484.95208740234375, "learning_rate": 6.522825400118679e-06, "loss": 26.0193, "step": 230670 }, { "epoch": 0.4659881947502596, "grad_norm": 571.1227416992188, "learning_rate": 6.5224929141424906e-06, "loss": 23.7222, "step": 230680 }, { "epoch": 0.4660083953829434, "grad_norm": 333.61602783203125, "learning_rate": 6.522160420745857e-06, "loss": 23.6479, "step": 230690 }, { "epoch": 0.4660285960156272, "grad_norm": 619.595458984375, "learning_rate": 6.5218279199304014e-06, "loss": 21.6691, "step": 230700 }, { "epoch": 0.466048796648311, "grad_norm": 124.25101470947266, "learning_rate": 6.521495411697738e-06, "loss": 16.7347, "step": 230710 }, { "epoch": 0.4660689972809948, "grad_norm": 165.28903198242188, "learning_rate": 6.521162896049491e-06, "loss": 17.5535, "step": 230720 }, { "epoch": 0.46608919791367864, "grad_norm": 380.7362060546875, "learning_rate": 6.52083037298728e-06, "loss": 15.7127, "step": 230730 }, { "epoch": 0.46610939854636246, "grad_norm": 42.50484085083008, "learning_rate": 6.520497842512728e-06, "loss": 9.2298, "step": 230740 }, { "epoch": 0.4661295991790463, "grad_norm": 203.9617919921875, "learning_rate": 6.520165304627452e-06, "loss": 22.1434, "step": 230750 }, { "epoch": 0.4661497998117301, "grad_norm": 470.6728515625, "learning_rate": 6.519832759333076e-06, "loss": 29.8831, "step": 230760 }, { "epoch": 0.4661700004444139, "grad_norm": 306.70489501953125, "learning_rate": 6.519500206631218e-06, "loss": 31.5376, "step": 230770 }, { "epoch": 0.46619020107709774, "grad_norm": 368.716064453125, "learning_rate": 6.519167646523501e-06, "loss": 9.9128, "step": 230780 }, { "epoch": 0.46621040170978156, "grad_norm": 586.248779296875, "learning_rate": 6.518835079011548e-06, "loss": 17.9391, "step": 230790 }, { "epoch": 0.4662306023424654, "grad_norm": 96.07538604736328, "learning_rate": 6.518502504096972e-06, "loss": 24.7334, "step": 230800 }, { "epoch": 0.4662508029751492, "grad_norm": 231.39561462402344, "learning_rate": 6.5181699217814025e-06, "loss": 18.3506, "step": 230810 }, { "epoch": 0.466271003607833, "grad_norm": 397.648193359375, "learning_rate": 6.517837332066455e-06, "loss": 25.5665, "step": 230820 }, { "epoch": 0.4662912042405168, "grad_norm": 285.528564453125, "learning_rate": 6.5175047349537535e-06, "loss": 25.5306, "step": 230830 }, { "epoch": 0.4663114048732006, "grad_norm": 320.0257263183594, "learning_rate": 6.517172130444918e-06, "loss": 10.6003, "step": 230840 }, { "epoch": 0.46633160550588443, "grad_norm": 410.9398498535156, "learning_rate": 6.516839518541569e-06, "loss": 21.0209, "step": 230850 }, { "epoch": 0.46635180613856825, "grad_norm": 269.03729248046875, "learning_rate": 6.516506899245329e-06, "loss": 10.5766, "step": 230860 }, { "epoch": 0.46637200677125207, "grad_norm": 585.1954956054688, "learning_rate": 6.516174272557817e-06, "loss": 22.4784, "step": 230870 }, { "epoch": 0.4663922074039359, "grad_norm": 508.3545837402344, "learning_rate": 6.515841638480656e-06, "loss": 13.3804, "step": 230880 }, { "epoch": 0.4664124080366197, "grad_norm": 423.1375427246094, "learning_rate": 6.515508997015467e-06, "loss": 22.1213, "step": 230890 }, { "epoch": 0.46643260866930353, "grad_norm": 372.9940490722656, "learning_rate": 6.5151763481638705e-06, "loss": 14.8557, "step": 230900 }, { "epoch": 0.46645280930198735, "grad_norm": 250.304931640625, "learning_rate": 6.5148436919274884e-06, "loss": 36.5328, "step": 230910 }, { "epoch": 0.46647300993467117, "grad_norm": 300.4207763671875, "learning_rate": 6.514511028307943e-06, "loss": 13.9809, "step": 230920 }, { "epoch": 0.466493210567355, "grad_norm": 238.9508514404297, "learning_rate": 6.5141783573068525e-06, "loss": 15.1482, "step": 230930 }, { "epoch": 0.4665134112000388, "grad_norm": 311.8534240722656, "learning_rate": 6.513845678925842e-06, "loss": 31.2771, "step": 230940 }, { "epoch": 0.46653361183272263, "grad_norm": 696.7138061523438, "learning_rate": 6.5135129931665305e-06, "loss": 34.5944, "step": 230950 }, { "epoch": 0.4665538124654064, "grad_norm": 573.7080688476562, "learning_rate": 6.5131803000305405e-06, "loss": 47.3493, "step": 230960 }, { "epoch": 0.4665740130980902, "grad_norm": 638.0385131835938, "learning_rate": 6.512847599519494e-06, "loss": 16.6461, "step": 230970 }, { "epoch": 0.46659421373077403, "grad_norm": 460.6133117675781, "learning_rate": 6.5125148916350086e-06, "loss": 15.5097, "step": 230980 }, { "epoch": 0.46661441436345785, "grad_norm": 531.8438110351562, "learning_rate": 6.512182176378713e-06, "loss": 29.2572, "step": 230990 }, { "epoch": 0.4666346149961417, "grad_norm": 670.6133422851562, "learning_rate": 6.5118494537522235e-06, "loss": 17.1781, "step": 231000 }, { "epoch": 0.4666548156288255, "grad_norm": 283.95538330078125, "learning_rate": 6.511516723757163e-06, "loss": 30.5466, "step": 231010 }, { "epoch": 0.4666750162615093, "grad_norm": 516.88330078125, "learning_rate": 6.511183986395153e-06, "loss": 21.4761, "step": 231020 }, { "epoch": 0.46669521689419313, "grad_norm": 562.6792602539062, "learning_rate": 6.510851241667816e-06, "loss": 12.9656, "step": 231030 }, { "epoch": 0.46671541752687695, "grad_norm": 134.06849670410156, "learning_rate": 6.510518489576774e-06, "loss": 30.0356, "step": 231040 }, { "epoch": 0.4667356181595608, "grad_norm": 541.930908203125, "learning_rate": 6.510185730123646e-06, "loss": 15.1213, "step": 231050 }, { "epoch": 0.4667558187922446, "grad_norm": 326.9928894042969, "learning_rate": 6.509852963310057e-06, "loss": 19.1856, "step": 231060 }, { "epoch": 0.4667760194249284, "grad_norm": 66.58686828613281, "learning_rate": 6.509520189137628e-06, "loss": 23.8234, "step": 231070 }, { "epoch": 0.4667962200576122, "grad_norm": 116.25985717773438, "learning_rate": 6.509187407607981e-06, "loss": 18.7779, "step": 231080 }, { "epoch": 0.466816420690296, "grad_norm": 350.497802734375, "learning_rate": 6.508854618722735e-06, "loss": 19.0562, "step": 231090 }, { "epoch": 0.4668366213229798, "grad_norm": 1122.8433837890625, "learning_rate": 6.508521822483518e-06, "loss": 30.5725, "step": 231100 }, { "epoch": 0.46685682195566364, "grad_norm": 347.474365234375, "learning_rate": 6.508189018891948e-06, "loss": 17.4511, "step": 231110 }, { "epoch": 0.46687702258834746, "grad_norm": 139.44105529785156, "learning_rate": 6.507856207949647e-06, "loss": 19.9628, "step": 231120 }, { "epoch": 0.4668972232210313, "grad_norm": 510.9095153808594, "learning_rate": 6.507523389658238e-06, "loss": 9.4397, "step": 231130 }, { "epoch": 0.4669174238537151, "grad_norm": 329.23095703125, "learning_rate": 6.507190564019341e-06, "loss": 11.1078, "step": 231140 }, { "epoch": 0.4669376244863989, "grad_norm": 528.880615234375, "learning_rate": 6.506857731034582e-06, "loss": 24.5451, "step": 231150 }, { "epoch": 0.46695782511908274, "grad_norm": 266.6522521972656, "learning_rate": 6.506524890705581e-06, "loss": 23.1448, "step": 231160 }, { "epoch": 0.46697802575176656, "grad_norm": 708.8331298828125, "learning_rate": 6.50619204303396e-06, "loss": 18.703, "step": 231170 }, { "epoch": 0.4669982263844504, "grad_norm": 283.2425537109375, "learning_rate": 6.5058591880213414e-06, "loss": 15.9134, "step": 231180 }, { "epoch": 0.4670184270171342, "grad_norm": 66.12435913085938, "learning_rate": 6.505526325669348e-06, "loss": 14.2454, "step": 231190 }, { "epoch": 0.467038627649818, "grad_norm": 340.502197265625, "learning_rate": 6.505193455979603e-06, "loss": 35.1657, "step": 231200 }, { "epoch": 0.4670588282825018, "grad_norm": 302.19989013671875, "learning_rate": 6.504860578953727e-06, "loss": 20.8116, "step": 231210 }, { "epoch": 0.4670790289151856, "grad_norm": 443.4750671386719, "learning_rate": 6.504527694593342e-06, "loss": 17.1766, "step": 231220 }, { "epoch": 0.4670992295478694, "grad_norm": 376.025390625, "learning_rate": 6.504194802900072e-06, "loss": 11.6998, "step": 231230 }, { "epoch": 0.46711943018055324, "grad_norm": 254.54635620117188, "learning_rate": 6.50386190387554e-06, "loss": 7.7445, "step": 231240 }, { "epoch": 0.46713963081323706, "grad_norm": 1528.227783203125, "learning_rate": 6.503528997521365e-06, "loss": 27.1577, "step": 231250 }, { "epoch": 0.4671598314459209, "grad_norm": 499.67095947265625, "learning_rate": 6.503196083839175e-06, "loss": 26.5145, "step": 231260 }, { "epoch": 0.4671800320786047, "grad_norm": 544.8344116210938, "learning_rate": 6.502863162830589e-06, "loss": 21.0486, "step": 231270 }, { "epoch": 0.4672002327112885, "grad_norm": 452.4975891113281, "learning_rate": 6.502530234497229e-06, "loss": 15.3538, "step": 231280 }, { "epoch": 0.46722043334397234, "grad_norm": 130.9033966064453, "learning_rate": 6.50219729884072e-06, "loss": 19.6417, "step": 231290 }, { "epoch": 0.46724063397665616, "grad_norm": 264.6180725097656, "learning_rate": 6.501864355862682e-06, "loss": 16.1505, "step": 231300 }, { "epoch": 0.46726083460934, "grad_norm": 202.18988037109375, "learning_rate": 6.50153140556474e-06, "loss": 18.2257, "step": 231310 }, { "epoch": 0.4672810352420238, "grad_norm": 120.94511413574219, "learning_rate": 6.5011984479485165e-06, "loss": 32.4436, "step": 231320 }, { "epoch": 0.4673012358747076, "grad_norm": 350.2729797363281, "learning_rate": 6.500865483015634e-06, "loss": 19.1796, "step": 231330 }, { "epoch": 0.4673214365073914, "grad_norm": 214.12326049804688, "learning_rate": 6.5005325107677145e-06, "loss": 22.4059, "step": 231340 }, { "epoch": 0.4673416371400752, "grad_norm": 198.7315673828125, "learning_rate": 6.500199531206381e-06, "loss": 14.6877, "step": 231350 }, { "epoch": 0.467361837772759, "grad_norm": 354.2320251464844, "learning_rate": 6.4998665443332575e-06, "loss": 56.6931, "step": 231360 }, { "epoch": 0.46738203840544285, "grad_norm": 292.1263122558594, "learning_rate": 6.499533550149968e-06, "loss": 17.7263, "step": 231370 }, { "epoch": 0.46740223903812667, "grad_norm": 258.74591064453125, "learning_rate": 6.499200548658132e-06, "loss": 20.6822, "step": 231380 }, { "epoch": 0.4674224396708105, "grad_norm": 295.50738525390625, "learning_rate": 6.498867539859375e-06, "loss": 31.1578, "step": 231390 }, { "epoch": 0.4674426403034943, "grad_norm": 316.6698303222656, "learning_rate": 6.49853452375532e-06, "loss": 10.2732, "step": 231400 }, { "epoch": 0.4674628409361781, "grad_norm": 1588.327880859375, "learning_rate": 6.498201500347587e-06, "loss": 33.3242, "step": 231410 }, { "epoch": 0.46748304156886195, "grad_norm": 536.5111694335938, "learning_rate": 6.497868469637805e-06, "loss": 31.4936, "step": 231420 }, { "epoch": 0.46750324220154577, "grad_norm": 499.2315979003906, "learning_rate": 6.497535431627591e-06, "loss": 29.7853, "step": 231430 }, { "epoch": 0.4675234428342296, "grad_norm": 315.7078552246094, "learning_rate": 6.497202386318573e-06, "loss": 27.0389, "step": 231440 }, { "epoch": 0.4675436434669134, "grad_norm": 646.50927734375, "learning_rate": 6.49686933371237e-06, "loss": 10.0395, "step": 231450 }, { "epoch": 0.4675638440995972, "grad_norm": 666.9247436523438, "learning_rate": 6.496536273810609e-06, "loss": 23.477, "step": 231460 }, { "epoch": 0.467584044732281, "grad_norm": 448.87274169921875, "learning_rate": 6.496203206614912e-06, "loss": 17.5754, "step": 231470 }, { "epoch": 0.4676042453649648, "grad_norm": 479.738525390625, "learning_rate": 6.495870132126901e-06, "loss": 21.3026, "step": 231480 }, { "epoch": 0.46762444599764863, "grad_norm": 262.3644104003906, "learning_rate": 6.495537050348201e-06, "loss": 17.3864, "step": 231490 }, { "epoch": 0.46764464663033245, "grad_norm": 439.9342956542969, "learning_rate": 6.495203961280434e-06, "loss": 17.5187, "step": 231500 }, { "epoch": 0.46766484726301627, "grad_norm": 542.8916015625, "learning_rate": 6.494870864925225e-06, "loss": 16.6481, "step": 231510 }, { "epoch": 0.4676850478957001, "grad_norm": 467.0146179199219, "learning_rate": 6.494537761284197e-06, "loss": 15.2403, "step": 231520 }, { "epoch": 0.4677052485283839, "grad_norm": 363.3900146484375, "learning_rate": 6.494204650358973e-06, "loss": 11.0023, "step": 231530 }, { "epoch": 0.46772544916106773, "grad_norm": 192.93788146972656, "learning_rate": 6.493871532151176e-06, "loss": 26.1454, "step": 231540 }, { "epoch": 0.46774564979375155, "grad_norm": 643.8673095703125, "learning_rate": 6.493538406662429e-06, "loss": 15.8792, "step": 231550 }, { "epoch": 0.46776585042643537, "grad_norm": 546.372314453125, "learning_rate": 6.493205273894361e-06, "loss": 17.0198, "step": 231560 }, { "epoch": 0.4677860510591192, "grad_norm": 22.62688446044922, "learning_rate": 6.492872133848588e-06, "loss": 15.3674, "step": 231570 }, { "epoch": 0.467806251691803, "grad_norm": 274.5195007324219, "learning_rate": 6.49253898652674e-06, "loss": 22.0984, "step": 231580 }, { "epoch": 0.46782645232448683, "grad_norm": 873.9613647460938, "learning_rate": 6.492205831930435e-06, "loss": 19.9182, "step": 231590 }, { "epoch": 0.4678466529571706, "grad_norm": 208.98648071289062, "learning_rate": 6.491872670061302e-06, "loss": 29.9326, "step": 231600 }, { "epoch": 0.4678668535898544, "grad_norm": 234.10870361328125, "learning_rate": 6.491539500920962e-06, "loss": 20.4067, "step": 231610 }, { "epoch": 0.46788705422253823, "grad_norm": 90.78465270996094, "learning_rate": 6.491206324511039e-06, "loss": 8.997, "step": 231620 }, { "epoch": 0.46790725485522205, "grad_norm": 0.0, "learning_rate": 6.490873140833158e-06, "loss": 28.0562, "step": 231630 }, { "epoch": 0.4679274554879059, "grad_norm": 563.7119750976562, "learning_rate": 6.490539949888942e-06, "loss": 25.4656, "step": 231640 }, { "epoch": 0.4679476561205897, "grad_norm": 307.3618469238281, "learning_rate": 6.490206751680015e-06, "loss": 19.8245, "step": 231650 }, { "epoch": 0.4679678567532735, "grad_norm": 498.5671081542969, "learning_rate": 6.489873546208e-06, "loss": 24.3951, "step": 231660 }, { "epoch": 0.46798805738595733, "grad_norm": 1016.558349609375, "learning_rate": 6.489540333474522e-06, "loss": 31.5882, "step": 231670 }, { "epoch": 0.46800825801864115, "grad_norm": 422.6470642089844, "learning_rate": 6.4892071134812065e-06, "loss": 21.794, "step": 231680 }, { "epoch": 0.468028458651325, "grad_norm": 89.04255676269531, "learning_rate": 6.4888738862296765e-06, "loss": 16.5311, "step": 231690 }, { "epoch": 0.4680486592840088, "grad_norm": 9.757550239562988, "learning_rate": 6.4885406517215535e-06, "loss": 17.0194, "step": 231700 }, { "epoch": 0.4680688599166926, "grad_norm": 326.01116943359375, "learning_rate": 6.488207409958466e-06, "loss": 16.9723, "step": 231710 }, { "epoch": 0.4680890605493764, "grad_norm": 95.29586791992188, "learning_rate": 6.487874160942035e-06, "loss": 30.1869, "step": 231720 }, { "epoch": 0.4681092611820602, "grad_norm": 422.89642333984375, "learning_rate": 6.487540904673886e-06, "loss": 21.0942, "step": 231730 }, { "epoch": 0.468129461814744, "grad_norm": 326.724609375, "learning_rate": 6.4872076411556436e-06, "loss": 12.9984, "step": 231740 }, { "epoch": 0.46814966244742784, "grad_norm": 118.9988784790039, "learning_rate": 6.48687437038893e-06, "loss": 14.6035, "step": 231750 }, { "epoch": 0.46816986308011166, "grad_norm": 640.1672973632812, "learning_rate": 6.486541092375374e-06, "loss": 14.3454, "step": 231760 }, { "epoch": 0.4681900637127955, "grad_norm": 1.1657824516296387, "learning_rate": 6.486207807116593e-06, "loss": 20.8807, "step": 231770 }, { "epoch": 0.4682102643454793, "grad_norm": 1463.0650634765625, "learning_rate": 6.485874514614219e-06, "loss": 25.1981, "step": 231780 }, { "epoch": 0.4682304649781631, "grad_norm": 79.5938949584961, "learning_rate": 6.4855412148698704e-06, "loss": 9.8355, "step": 231790 }, { "epoch": 0.46825066561084694, "grad_norm": 933.3094482421875, "learning_rate": 6.485207907885175e-06, "loss": 27.1165, "step": 231800 }, { "epoch": 0.46827086624353076, "grad_norm": 319.1587219238281, "learning_rate": 6.484874593661757e-06, "loss": 19.2754, "step": 231810 }, { "epoch": 0.4682910668762146, "grad_norm": 182.3033447265625, "learning_rate": 6.484541272201239e-06, "loss": 28.2243, "step": 231820 }, { "epoch": 0.4683112675088984, "grad_norm": 468.4356689453125, "learning_rate": 6.484207943505249e-06, "loss": 17.5454, "step": 231830 }, { "epoch": 0.4683314681415822, "grad_norm": 511.0302734375, "learning_rate": 6.483874607575407e-06, "loss": 17.7681, "step": 231840 }, { "epoch": 0.468351668774266, "grad_norm": 397.6280517578125, "learning_rate": 6.483541264413342e-06, "loss": 20.0303, "step": 231850 }, { "epoch": 0.4683718694069498, "grad_norm": 184.28292846679688, "learning_rate": 6.483207914020675e-06, "loss": 20.3128, "step": 231860 }, { "epoch": 0.4683920700396336, "grad_norm": 248.45782470703125, "learning_rate": 6.4828745563990345e-06, "loss": 25.6381, "step": 231870 }, { "epoch": 0.46841227067231744, "grad_norm": 478.2823791503906, "learning_rate": 6.482541191550042e-06, "loss": 30.6648, "step": 231880 }, { "epoch": 0.46843247130500126, "grad_norm": 233.37548828125, "learning_rate": 6.482207819475324e-06, "loss": 11.1748, "step": 231890 }, { "epoch": 0.4684526719376851, "grad_norm": 274.9101257324219, "learning_rate": 6.481874440176506e-06, "loss": 10.3556, "step": 231900 }, { "epoch": 0.4684728725703689, "grad_norm": 147.09332275390625, "learning_rate": 6.481541053655209e-06, "loss": 30.1691, "step": 231910 }, { "epoch": 0.4684930732030527, "grad_norm": 805.603515625, "learning_rate": 6.481207659913062e-06, "loss": 24.9236, "step": 231920 }, { "epoch": 0.46851327383573654, "grad_norm": 211.09478759765625, "learning_rate": 6.4808742589516895e-06, "loss": 35.2855, "step": 231930 }, { "epoch": 0.46853347446842036, "grad_norm": 349.984130859375, "learning_rate": 6.480540850772714e-06, "loss": 21.1277, "step": 231940 }, { "epoch": 0.4685536751011042, "grad_norm": 667.3995361328125, "learning_rate": 6.4802074353777625e-06, "loss": 12.8221, "step": 231950 }, { "epoch": 0.468573875733788, "grad_norm": 82.84886932373047, "learning_rate": 6.479874012768459e-06, "loss": 23.2999, "step": 231960 }, { "epoch": 0.4685940763664718, "grad_norm": 388.8245544433594, "learning_rate": 6.479540582946431e-06, "loss": 24.1587, "step": 231970 }, { "epoch": 0.4686142769991556, "grad_norm": 190.8773956298828, "learning_rate": 6.4792071459133e-06, "loss": 14.9611, "step": 231980 }, { "epoch": 0.4686344776318394, "grad_norm": 443.36151123046875, "learning_rate": 6.478873701670693e-06, "loss": 36.3784, "step": 231990 }, { "epoch": 0.46865467826452323, "grad_norm": 67.5339584350586, "learning_rate": 6.4785402502202345e-06, "loss": 22.0931, "step": 232000 }, { "epoch": 0.46867487889720705, "grad_norm": 460.312744140625, "learning_rate": 6.47820679156355e-06, "loss": 21.1524, "step": 232010 }, { "epoch": 0.46869507952989087, "grad_norm": 754.8125, "learning_rate": 6.477873325702265e-06, "loss": 13.3966, "step": 232020 }, { "epoch": 0.4687152801625747, "grad_norm": 234.09780883789062, "learning_rate": 6.477539852638006e-06, "loss": 10.7828, "step": 232030 }, { "epoch": 0.4687354807952585, "grad_norm": 301.2977600097656, "learning_rate": 6.477206372372396e-06, "loss": 10.8494, "step": 232040 }, { "epoch": 0.46875568142794233, "grad_norm": 1041.564208984375, "learning_rate": 6.476872884907061e-06, "loss": 32.9868, "step": 232050 }, { "epoch": 0.46877588206062615, "grad_norm": 717.1864013671875, "learning_rate": 6.476539390243627e-06, "loss": 38.6186, "step": 232060 }, { "epoch": 0.46879608269330997, "grad_norm": 424.36114501953125, "learning_rate": 6.47620588838372e-06, "loss": 14.4405, "step": 232070 }, { "epoch": 0.4688162833259938, "grad_norm": 99.09552764892578, "learning_rate": 6.475872379328964e-06, "loss": 21.134, "step": 232080 }, { "epoch": 0.4688364839586776, "grad_norm": 40.13679504394531, "learning_rate": 6.475538863080985e-06, "loss": 34.1756, "step": 232090 }, { "epoch": 0.46885668459136143, "grad_norm": 166.4034881591797, "learning_rate": 6.4752053396414075e-06, "loss": 10.8068, "step": 232100 }, { "epoch": 0.4688768852240452, "grad_norm": 345.4293518066406, "learning_rate": 6.474871809011858e-06, "loss": 21.3451, "step": 232110 }, { "epoch": 0.468897085856729, "grad_norm": 188.2816619873047, "learning_rate": 6.474538271193963e-06, "loss": 25.8371, "step": 232120 }, { "epoch": 0.46891728648941283, "grad_norm": 328.0193786621094, "learning_rate": 6.4742047261893485e-06, "loss": 11.4861, "step": 232130 }, { "epoch": 0.46893748712209665, "grad_norm": 221.19754028320312, "learning_rate": 6.473871173999637e-06, "loss": 17.6138, "step": 232140 }, { "epoch": 0.4689576877547805, "grad_norm": 13.92487907409668, "learning_rate": 6.473537614626457e-06, "loss": 13.3422, "step": 232150 }, { "epoch": 0.4689778883874643, "grad_norm": 3.4051811695098877, "learning_rate": 6.473204048071433e-06, "loss": 25.9466, "step": 232160 }, { "epoch": 0.4689980890201481, "grad_norm": 426.1505126953125, "learning_rate": 6.472870474336192e-06, "loss": 21.1222, "step": 232170 }, { "epoch": 0.46901828965283193, "grad_norm": 475.679931640625, "learning_rate": 6.472536893422359e-06, "loss": 29.9708, "step": 232180 }, { "epoch": 0.46903849028551575, "grad_norm": 247.21754455566406, "learning_rate": 6.472203305331559e-06, "loss": 16.6034, "step": 232190 }, { "epoch": 0.4690586909181996, "grad_norm": 332.2191467285156, "learning_rate": 6.471869710065418e-06, "loss": 21.3127, "step": 232200 }, { "epoch": 0.4690788915508834, "grad_norm": 336.51300048828125, "learning_rate": 6.471536107625565e-06, "loss": 23.8785, "step": 232210 }, { "epoch": 0.4690990921835672, "grad_norm": 659.5236206054688, "learning_rate": 6.4712024980136215e-06, "loss": 35.8184, "step": 232220 }, { "epoch": 0.46911929281625103, "grad_norm": 129.71156311035156, "learning_rate": 6.470868881231215e-06, "loss": 14.7575, "step": 232230 }, { "epoch": 0.4691394934489348, "grad_norm": 256.0760803222656, "learning_rate": 6.470535257279974e-06, "loss": 28.4628, "step": 232240 }, { "epoch": 0.4691596940816186, "grad_norm": 171.26637268066406, "learning_rate": 6.47020162616152e-06, "loss": 14.8522, "step": 232250 }, { "epoch": 0.46917989471430244, "grad_norm": 358.0352783203125, "learning_rate": 6.469867987877484e-06, "loss": 18.9665, "step": 232260 }, { "epoch": 0.46920009534698626, "grad_norm": 452.86004638671875, "learning_rate": 6.469534342429489e-06, "loss": 20.4396, "step": 232270 }, { "epoch": 0.4692202959796701, "grad_norm": 326.33551025390625, "learning_rate": 6.469200689819161e-06, "loss": 15.69, "step": 232280 }, { "epoch": 0.4692404966123539, "grad_norm": 127.25655364990234, "learning_rate": 6.468867030048127e-06, "loss": 17.9054, "step": 232290 }, { "epoch": 0.4692606972450377, "grad_norm": 549.651611328125, "learning_rate": 6.4685333631180145e-06, "loss": 31.6002, "step": 232300 }, { "epoch": 0.46928089787772154, "grad_norm": 684.8788452148438, "learning_rate": 6.468199689030448e-06, "loss": 20.9694, "step": 232310 }, { "epoch": 0.46930109851040536, "grad_norm": 249.0142364501953, "learning_rate": 6.467866007787053e-06, "loss": 17.6288, "step": 232320 }, { "epoch": 0.4693212991430892, "grad_norm": 173.32032775878906, "learning_rate": 6.467532319389458e-06, "loss": 23.8902, "step": 232330 }, { "epoch": 0.469341499775773, "grad_norm": 417.7165832519531, "learning_rate": 6.467198623839288e-06, "loss": 16.1906, "step": 232340 }, { "epoch": 0.4693617004084568, "grad_norm": 436.4908447265625, "learning_rate": 6.466864921138171e-06, "loss": 29.664, "step": 232350 }, { "epoch": 0.4693819010411406, "grad_norm": 123.5679931640625, "learning_rate": 6.4665312112877325e-06, "loss": 14.6397, "step": 232360 }, { "epoch": 0.4694021016738244, "grad_norm": 414.0674743652344, "learning_rate": 6.466197494289598e-06, "loss": 24.4675, "step": 232370 }, { "epoch": 0.4694223023065082, "grad_norm": 621.0228881835938, "learning_rate": 6.465863770145394e-06, "loss": 22.5904, "step": 232380 }, { "epoch": 0.46944250293919204, "grad_norm": 57.8876838684082, "learning_rate": 6.4655300388567485e-06, "loss": 13.7389, "step": 232390 }, { "epoch": 0.46946270357187586, "grad_norm": 732.6996459960938, "learning_rate": 6.465196300425287e-06, "loss": 30.7162, "step": 232400 }, { "epoch": 0.4694829042045597, "grad_norm": 599.5005493164062, "learning_rate": 6.464862554852638e-06, "loss": 31.595, "step": 232410 }, { "epoch": 0.4695031048372435, "grad_norm": 547.4244995117188, "learning_rate": 6.464528802140425e-06, "loss": 17.4058, "step": 232420 }, { "epoch": 0.4695233054699273, "grad_norm": 567.2337646484375, "learning_rate": 6.464195042290277e-06, "loss": 17.078, "step": 232430 }, { "epoch": 0.46954350610261114, "grad_norm": 141.6189422607422, "learning_rate": 6.463861275303819e-06, "loss": 14.2407, "step": 232440 }, { "epoch": 0.46956370673529496, "grad_norm": 1400.07861328125, "learning_rate": 6.463527501182679e-06, "loss": 31.0269, "step": 232450 }, { "epoch": 0.4695839073679788, "grad_norm": 473.0020751953125, "learning_rate": 6.4631937199284845e-06, "loss": 12.3634, "step": 232460 }, { "epoch": 0.4696041080006626, "grad_norm": 1.2929986715316772, "learning_rate": 6.46285993154286e-06, "loss": 10.6551, "step": 232470 }, { "epoch": 0.4696243086333464, "grad_norm": 431.5660400390625, "learning_rate": 6.462526136027435e-06, "loss": 17.7755, "step": 232480 }, { "epoch": 0.4696445092660302, "grad_norm": 192.18548583984375, "learning_rate": 6.462192333383834e-06, "loss": 13.9088, "step": 232490 }, { "epoch": 0.469664709898714, "grad_norm": 379.816650390625, "learning_rate": 6.461858523613684e-06, "loss": 34.9893, "step": 232500 }, { "epoch": 0.4696849105313978, "grad_norm": 66.27000427246094, "learning_rate": 6.461524706718615e-06, "loss": 12.5309, "step": 232510 }, { "epoch": 0.46970511116408165, "grad_norm": 12.445015907287598, "learning_rate": 6.4611908827002504e-06, "loss": 26.512, "step": 232520 }, { "epoch": 0.46972531179676547, "grad_norm": 149.20140075683594, "learning_rate": 6.460857051560219e-06, "loss": 13.1563, "step": 232530 }, { "epoch": 0.4697455124294493, "grad_norm": 261.16400146484375, "learning_rate": 6.4605232133001474e-06, "loss": 17.0952, "step": 232540 }, { "epoch": 0.4697657130621331, "grad_norm": 249.5340118408203, "learning_rate": 6.460189367921663e-06, "loss": 14.9214, "step": 232550 }, { "epoch": 0.4697859136948169, "grad_norm": 364.1491394042969, "learning_rate": 6.459855515426392e-06, "loss": 28.4356, "step": 232560 }, { "epoch": 0.46980611432750075, "grad_norm": 661.8367919921875, "learning_rate": 6.459521655815962e-06, "loss": 21.1061, "step": 232570 }, { "epoch": 0.46982631496018457, "grad_norm": 31.491783142089844, "learning_rate": 6.459187789092003e-06, "loss": 17.2922, "step": 232580 }, { "epoch": 0.4698465155928684, "grad_norm": 816.8544921875, "learning_rate": 6.4588539152561384e-06, "loss": 29.5527, "step": 232590 }, { "epoch": 0.4698667162255522, "grad_norm": 295.40594482421875, "learning_rate": 6.458520034309995e-06, "loss": 43.5878, "step": 232600 }, { "epoch": 0.469886916858236, "grad_norm": 474.0346374511719, "learning_rate": 6.458186146255203e-06, "loss": 34.6073, "step": 232610 }, { "epoch": 0.4699071174909198, "grad_norm": 348.7222595214844, "learning_rate": 6.457852251093391e-06, "loss": 12.7735, "step": 232620 }, { "epoch": 0.4699273181236036, "grad_norm": 727.2521362304688, "learning_rate": 6.4575183488261795e-06, "loss": 44.7666, "step": 232630 }, { "epoch": 0.46994751875628743, "grad_norm": 194.83692932128906, "learning_rate": 6.457184439455204e-06, "loss": 19.4839, "step": 232640 }, { "epoch": 0.46996771938897125, "grad_norm": 595.656494140625, "learning_rate": 6.456850522982086e-06, "loss": 14.4586, "step": 232650 }, { "epoch": 0.46998792002165507, "grad_norm": 530.2792358398438, "learning_rate": 6.456516599408457e-06, "loss": 17.6902, "step": 232660 }, { "epoch": 0.4700081206543389, "grad_norm": 15.545038223266602, "learning_rate": 6.456182668735941e-06, "loss": 20.8662, "step": 232670 }, { "epoch": 0.4700283212870227, "grad_norm": 284.81988525390625, "learning_rate": 6.455848730966168e-06, "loss": 39.0358, "step": 232680 }, { "epoch": 0.47004852191970653, "grad_norm": 609.2344360351562, "learning_rate": 6.455514786100766e-06, "loss": 13.1211, "step": 232690 }, { "epoch": 0.47006872255239035, "grad_norm": 836.0491943359375, "learning_rate": 6.455180834141359e-06, "loss": 20.7911, "step": 232700 }, { "epoch": 0.47008892318507417, "grad_norm": 499.107666015625, "learning_rate": 6.45484687508958e-06, "loss": 18.5463, "step": 232710 }, { "epoch": 0.470109123817758, "grad_norm": 301.69720458984375, "learning_rate": 6.454512908947052e-06, "loss": 9.9112, "step": 232720 }, { "epoch": 0.4701293244504418, "grad_norm": 328.8058166503906, "learning_rate": 6.454178935715405e-06, "loss": 16.6532, "step": 232730 }, { "epoch": 0.47014952508312563, "grad_norm": 451.3977966308594, "learning_rate": 6.453844955396265e-06, "loss": 20.1564, "step": 232740 }, { "epoch": 0.4701697257158094, "grad_norm": 58.86642074584961, "learning_rate": 6.453510967991263e-06, "loss": 19.8341, "step": 232750 }, { "epoch": 0.4701899263484932, "grad_norm": 240.06796264648438, "learning_rate": 6.453176973502024e-06, "loss": 31.4836, "step": 232760 }, { "epoch": 0.47021012698117703, "grad_norm": 611.53173828125, "learning_rate": 6.4528429719301764e-06, "loss": 17.5353, "step": 232770 }, { "epoch": 0.47023032761386085, "grad_norm": 603.2828369140625, "learning_rate": 6.452508963277348e-06, "loss": 15.3629, "step": 232780 }, { "epoch": 0.4702505282465447, "grad_norm": 93.18273162841797, "learning_rate": 6.452174947545169e-06, "loss": 16.2431, "step": 232790 }, { "epoch": 0.4702707288792285, "grad_norm": 113.83590698242188, "learning_rate": 6.451840924735264e-06, "loss": 27.0131, "step": 232800 }, { "epoch": 0.4702909295119123, "grad_norm": 4.318406581878662, "learning_rate": 6.451506894849262e-06, "loss": 9.0512, "step": 232810 }, { "epoch": 0.47031113014459613, "grad_norm": 69.14533996582031, "learning_rate": 6.451172857888792e-06, "loss": 19.8808, "step": 232820 }, { "epoch": 0.47033133077727995, "grad_norm": 780.8925170898438, "learning_rate": 6.450838813855482e-06, "loss": 22.9376, "step": 232830 }, { "epoch": 0.4703515314099638, "grad_norm": 415.5531311035156, "learning_rate": 6.450504762750959e-06, "loss": 27.1629, "step": 232840 }, { "epoch": 0.4703717320426476, "grad_norm": 351.7154846191406, "learning_rate": 6.4501707045768524e-06, "loss": 42.6802, "step": 232850 }, { "epoch": 0.4703919326753314, "grad_norm": 242.80615234375, "learning_rate": 6.449836639334788e-06, "loss": 20.4225, "step": 232860 }, { "epoch": 0.47041213330801523, "grad_norm": 526.6072387695312, "learning_rate": 6.449502567026398e-06, "loss": 22.3538, "step": 232870 }, { "epoch": 0.470432333940699, "grad_norm": 86.08837127685547, "learning_rate": 6.449168487653305e-06, "loss": 22.0289, "step": 232880 }, { "epoch": 0.4704525345733828, "grad_norm": 244.63906860351562, "learning_rate": 6.448834401217143e-06, "loss": 12.4748, "step": 232890 }, { "epoch": 0.47047273520606664, "grad_norm": 541.3038330078125, "learning_rate": 6.448500307719537e-06, "loss": 24.2339, "step": 232900 }, { "epoch": 0.47049293583875046, "grad_norm": 239.39019775390625, "learning_rate": 6.448166207162119e-06, "loss": 15.4841, "step": 232910 }, { "epoch": 0.4705131364714343, "grad_norm": 338.6671142578125, "learning_rate": 6.447832099546512e-06, "loss": 12.1724, "step": 232920 }, { "epoch": 0.4705333371041181, "grad_norm": 416.7694091796875, "learning_rate": 6.4474979848743455e-06, "loss": 16.8376, "step": 232930 }, { "epoch": 0.4705535377368019, "grad_norm": 503.6643371582031, "learning_rate": 6.447163863147251e-06, "loss": 27.0924, "step": 232940 }, { "epoch": 0.47057373836948574, "grad_norm": 351.7838439941406, "learning_rate": 6.446829734366855e-06, "loss": 30.1301, "step": 232950 }, { "epoch": 0.47059393900216956, "grad_norm": 394.0791931152344, "learning_rate": 6.446495598534787e-06, "loss": 32.1762, "step": 232960 }, { "epoch": 0.4706141396348534, "grad_norm": 664.0892333984375, "learning_rate": 6.446161455652674e-06, "loss": 21.3008, "step": 232970 }, { "epoch": 0.4706343402675372, "grad_norm": 419.5210876464844, "learning_rate": 6.445827305722148e-06, "loss": 8.2237, "step": 232980 }, { "epoch": 0.470654540900221, "grad_norm": 240.32484436035156, "learning_rate": 6.445493148744832e-06, "loss": 17.5572, "step": 232990 }, { "epoch": 0.4706747415329048, "grad_norm": 127.21864318847656, "learning_rate": 6.445158984722358e-06, "loss": 13.9824, "step": 233000 }, { "epoch": 0.4706949421655886, "grad_norm": 535.8229370117188, "learning_rate": 6.444824813656356e-06, "loss": 24.5616, "step": 233010 }, { "epoch": 0.4707151427982724, "grad_norm": 567.6218872070312, "learning_rate": 6.444490635548451e-06, "loss": 16.5069, "step": 233020 }, { "epoch": 0.47073534343095624, "grad_norm": 982.9053955078125, "learning_rate": 6.444156450400276e-06, "loss": 11.4735, "step": 233030 }, { "epoch": 0.47075554406364006, "grad_norm": 616.2341918945312, "learning_rate": 6.443822258213457e-06, "loss": 21.5657, "step": 233040 }, { "epoch": 0.4707757446963239, "grad_norm": 132.17913818359375, "learning_rate": 6.443488058989623e-06, "loss": 16.1341, "step": 233050 }, { "epoch": 0.4707959453290077, "grad_norm": 271.3033447265625, "learning_rate": 6.443153852730404e-06, "loss": 20.707, "step": 233060 }, { "epoch": 0.4708161459616915, "grad_norm": 282.2535095214844, "learning_rate": 6.4428196394374275e-06, "loss": 23.6165, "step": 233070 }, { "epoch": 0.47083634659437534, "grad_norm": 43.08664321899414, "learning_rate": 6.442485419112322e-06, "loss": 16.8165, "step": 233080 }, { "epoch": 0.47085654722705916, "grad_norm": 310.480224609375, "learning_rate": 6.44215119175672e-06, "loss": 31.9848, "step": 233090 }, { "epoch": 0.470876747859743, "grad_norm": 231.5115966796875, "learning_rate": 6.441816957372247e-06, "loss": 21.1428, "step": 233100 }, { "epoch": 0.4708969484924268, "grad_norm": 263.9593811035156, "learning_rate": 6.441482715960532e-06, "loss": 13.3092, "step": 233110 }, { "epoch": 0.4709171491251106, "grad_norm": 534.0245971679688, "learning_rate": 6.441148467523206e-06, "loss": 21.6064, "step": 233120 }, { "epoch": 0.4709373497577944, "grad_norm": 38.840782165527344, "learning_rate": 6.440814212061897e-06, "loss": 62.0073, "step": 233130 }, { "epoch": 0.4709575503904782, "grad_norm": 383.1385803222656, "learning_rate": 6.440479949578234e-06, "loss": 16.9425, "step": 233140 }, { "epoch": 0.47097775102316203, "grad_norm": 328.57696533203125, "learning_rate": 6.440145680073847e-06, "loss": 14.7871, "step": 233150 }, { "epoch": 0.47099795165584585, "grad_norm": 221.1512908935547, "learning_rate": 6.4398114035503644e-06, "loss": 14.7821, "step": 233160 }, { "epoch": 0.47101815228852967, "grad_norm": 716.730224609375, "learning_rate": 6.4394771200094156e-06, "loss": 19.8269, "step": 233170 }, { "epoch": 0.4710383529212135, "grad_norm": 356.3135681152344, "learning_rate": 6.439142829452629e-06, "loss": 21.0467, "step": 233180 }, { "epoch": 0.4710585535538973, "grad_norm": 279.4208068847656, "learning_rate": 6.438808531881637e-06, "loss": 24.2735, "step": 233190 }, { "epoch": 0.47107875418658113, "grad_norm": 310.1206359863281, "learning_rate": 6.438474227298065e-06, "loss": 16.764, "step": 233200 }, { "epoch": 0.47109895481926495, "grad_norm": 325.89068603515625, "learning_rate": 6.438139915703544e-06, "loss": 19.6593, "step": 233210 }, { "epoch": 0.47111915545194877, "grad_norm": 230.9476776123047, "learning_rate": 6.437805597099704e-06, "loss": 40.1894, "step": 233220 }, { "epoch": 0.4711393560846326, "grad_norm": 389.3857116699219, "learning_rate": 6.437471271488174e-06, "loss": 10.1524, "step": 233230 }, { "epoch": 0.4711595567173164, "grad_norm": 348.37701416015625, "learning_rate": 6.437136938870583e-06, "loss": 19.6702, "step": 233240 }, { "epoch": 0.47117975735000023, "grad_norm": 791.6915283203125, "learning_rate": 6.4368025992485615e-06, "loss": 19.8193, "step": 233250 }, { "epoch": 0.471199957982684, "grad_norm": 0.0, "learning_rate": 6.436468252623738e-06, "loss": 10.4314, "step": 233260 }, { "epoch": 0.4712201586153678, "grad_norm": 883.5969848632812, "learning_rate": 6.436133898997742e-06, "loss": 33.7937, "step": 233270 }, { "epoch": 0.47124035924805163, "grad_norm": 662.7801513671875, "learning_rate": 6.4357995383722025e-06, "loss": 12.4613, "step": 233280 }, { "epoch": 0.47126055988073545, "grad_norm": 454.23809814453125, "learning_rate": 6.435465170748753e-06, "loss": 10.3335, "step": 233290 }, { "epoch": 0.4712807605134193, "grad_norm": 128.9053497314453, "learning_rate": 6.435130796129019e-06, "loss": 11.611, "step": 233300 }, { "epoch": 0.4713009611461031, "grad_norm": 222.30667114257812, "learning_rate": 6.43479641451463e-06, "loss": 24.3128, "step": 233310 }, { "epoch": 0.4713211617787869, "grad_norm": 512.7349243164062, "learning_rate": 6.43446202590722e-06, "loss": 15.7066, "step": 233320 }, { "epoch": 0.47134136241147073, "grad_norm": 1404.3638916015625, "learning_rate": 6.434127630308415e-06, "loss": 34.325, "step": 233330 }, { "epoch": 0.47136156304415455, "grad_norm": 303.31195068359375, "learning_rate": 6.433793227719845e-06, "loss": 23.5098, "step": 233340 }, { "epoch": 0.4713817636768384, "grad_norm": 454.6119079589844, "learning_rate": 6.4334588181431424e-06, "loss": 22.5974, "step": 233350 }, { "epoch": 0.4714019643095222, "grad_norm": 658.8758544921875, "learning_rate": 6.433124401579936e-06, "loss": 10.9955, "step": 233360 }, { "epoch": 0.471422164942206, "grad_norm": 393.7095947265625, "learning_rate": 6.432789978031852e-06, "loss": 13.9028, "step": 233370 }, { "epoch": 0.47144236557488983, "grad_norm": 268.5219421386719, "learning_rate": 6.432455547500525e-06, "loss": 12.0937, "step": 233380 }, { "epoch": 0.4714625662075736, "grad_norm": 206.73312377929688, "learning_rate": 6.432121109987584e-06, "loss": 38.5037, "step": 233390 }, { "epoch": 0.4714827668402574, "grad_norm": 417.64654541015625, "learning_rate": 6.431786665494657e-06, "loss": 12.2168, "step": 233400 }, { "epoch": 0.47150296747294124, "grad_norm": 756.30615234375, "learning_rate": 6.431452214023377e-06, "loss": 19.4205, "step": 233410 }, { "epoch": 0.47152316810562506, "grad_norm": 21.44666862487793, "learning_rate": 6.431117755575371e-06, "loss": 14.3705, "step": 233420 }, { "epoch": 0.4715433687383089, "grad_norm": 223.37791442871094, "learning_rate": 6.430783290152272e-06, "loss": 18.1995, "step": 233430 }, { "epoch": 0.4715635693709927, "grad_norm": 299.15960693359375, "learning_rate": 6.430448817755708e-06, "loss": 13.9114, "step": 233440 }, { "epoch": 0.4715837700036765, "grad_norm": 111.82083129882812, "learning_rate": 6.43011433838731e-06, "loss": 9.9036, "step": 233450 }, { "epoch": 0.47160397063636034, "grad_norm": 395.947998046875, "learning_rate": 6.429779852048709e-06, "loss": 16.2541, "step": 233460 }, { "epoch": 0.47162417126904416, "grad_norm": 320.6033020019531, "learning_rate": 6.429445358741533e-06, "loss": 18.566, "step": 233470 }, { "epoch": 0.471644371901728, "grad_norm": 0.041875384747982025, "learning_rate": 6.429110858467414e-06, "loss": 12.0097, "step": 233480 }, { "epoch": 0.4716645725344118, "grad_norm": 554.3045043945312, "learning_rate": 6.428776351227984e-06, "loss": 23.7955, "step": 233490 }, { "epoch": 0.4716847731670956, "grad_norm": 627.7864990234375, "learning_rate": 6.428441837024868e-06, "loss": 18.7712, "step": 233500 }, { "epoch": 0.47170497379977944, "grad_norm": 320.4233703613281, "learning_rate": 6.428107315859702e-06, "loss": 21.9909, "step": 233510 }, { "epoch": 0.4717251744324632, "grad_norm": 382.1122741699219, "learning_rate": 6.427772787734114e-06, "loss": 24.3574, "step": 233520 }, { "epoch": 0.471745375065147, "grad_norm": 340.03533935546875, "learning_rate": 6.4274382526497335e-06, "loss": 18.4787, "step": 233530 }, { "epoch": 0.47176557569783084, "grad_norm": 311.5838928222656, "learning_rate": 6.427103710608193e-06, "loss": 20.3582, "step": 233540 }, { "epoch": 0.47178577633051466, "grad_norm": 575.9215698242188, "learning_rate": 6.426769161611122e-06, "loss": 21.8079, "step": 233550 }, { "epoch": 0.4718059769631985, "grad_norm": 239.95668029785156, "learning_rate": 6.426434605660151e-06, "loss": 16.3507, "step": 233560 }, { "epoch": 0.4718261775958823, "grad_norm": 160.08863830566406, "learning_rate": 6.426100042756912e-06, "loss": 20.5992, "step": 233570 }, { "epoch": 0.4718463782285661, "grad_norm": 149.10824584960938, "learning_rate": 6.425765472903032e-06, "loss": 23.4933, "step": 233580 }, { "epoch": 0.47186657886124994, "grad_norm": 652.010986328125, "learning_rate": 6.425430896100145e-06, "loss": 23.7501, "step": 233590 }, { "epoch": 0.47188677949393376, "grad_norm": 285.45513916015625, "learning_rate": 6.425096312349881e-06, "loss": 11.7523, "step": 233600 }, { "epoch": 0.4719069801266176, "grad_norm": 456.1563720703125, "learning_rate": 6.424761721653871e-06, "loss": 33.2282, "step": 233610 }, { "epoch": 0.4719271807593014, "grad_norm": 529.824951171875, "learning_rate": 6.4244271240137435e-06, "loss": 20.6466, "step": 233620 }, { "epoch": 0.4719473813919852, "grad_norm": 483.4859619140625, "learning_rate": 6.424092519431132e-06, "loss": 29.0304, "step": 233630 }, { "epoch": 0.471967582024669, "grad_norm": 487.1600341796875, "learning_rate": 6.423757907907667e-06, "loss": 13.2332, "step": 233640 }, { "epoch": 0.4719877826573528, "grad_norm": 618.8986206054688, "learning_rate": 6.423423289444978e-06, "loss": 19.602, "step": 233650 }, { "epoch": 0.4720079832900366, "grad_norm": 1082.475830078125, "learning_rate": 6.423088664044696e-06, "loss": 24.639, "step": 233660 }, { "epoch": 0.47202818392272045, "grad_norm": 0.0, "learning_rate": 6.422754031708453e-06, "loss": 17.1816, "step": 233670 }, { "epoch": 0.47204838455540427, "grad_norm": 472.9530029296875, "learning_rate": 6.422419392437879e-06, "loss": 16.0823, "step": 233680 }, { "epoch": 0.4720685851880881, "grad_norm": 11.948860168457031, "learning_rate": 6.422084746234605e-06, "loss": 15.01, "step": 233690 }, { "epoch": 0.4720887858207719, "grad_norm": 82.46209716796875, "learning_rate": 6.421750093100264e-06, "loss": 11.0773, "step": 233700 }, { "epoch": 0.4721089864534557, "grad_norm": 339.6632995605469, "learning_rate": 6.421415433036484e-06, "loss": 26.4501, "step": 233710 }, { "epoch": 0.47212918708613955, "grad_norm": 376.2914123535156, "learning_rate": 6.421080766044898e-06, "loss": 28.2944, "step": 233720 }, { "epoch": 0.47214938771882337, "grad_norm": 724.7662963867188, "learning_rate": 6.420746092127138e-06, "loss": 30.572, "step": 233730 }, { "epoch": 0.4721695883515072, "grad_norm": 336.7801208496094, "learning_rate": 6.420411411284831e-06, "loss": 24.8382, "step": 233740 }, { "epoch": 0.472189788984191, "grad_norm": 318.15106201171875, "learning_rate": 6.420076723519615e-06, "loss": 13.2367, "step": 233750 }, { "epoch": 0.4722099896168748, "grad_norm": 161.84442138671875, "learning_rate": 6.419742028833114e-06, "loss": 18.2227, "step": 233760 }, { "epoch": 0.4722301902495586, "grad_norm": 792.44140625, "learning_rate": 6.419407327226963e-06, "loss": 24.4637, "step": 233770 }, { "epoch": 0.4722503908822424, "grad_norm": 566.54443359375, "learning_rate": 6.419072618702794e-06, "loss": 23.9833, "step": 233780 }, { "epoch": 0.47227059151492623, "grad_norm": 690.1046142578125, "learning_rate": 6.4187379032622355e-06, "loss": 30.394, "step": 233790 }, { "epoch": 0.47229079214761005, "grad_norm": 372.13397216796875, "learning_rate": 6.418403180906923e-06, "loss": 29.5859, "step": 233800 }, { "epoch": 0.47231099278029387, "grad_norm": 73.77745819091797, "learning_rate": 6.418068451638484e-06, "loss": 33.5413, "step": 233810 }, { "epoch": 0.4723311934129777, "grad_norm": 362.9255676269531, "learning_rate": 6.4177337154585514e-06, "loss": 10.622, "step": 233820 }, { "epoch": 0.4723513940456615, "grad_norm": 509.4954528808594, "learning_rate": 6.417398972368756e-06, "loss": 25.9802, "step": 233830 }, { "epoch": 0.47237159467834533, "grad_norm": 346.18218994140625, "learning_rate": 6.41706422237073e-06, "loss": 18.6955, "step": 233840 }, { "epoch": 0.47239179531102915, "grad_norm": 19.44637107849121, "learning_rate": 6.416729465466106e-06, "loss": 48.4805, "step": 233850 }, { "epoch": 0.47241199594371297, "grad_norm": 788.625244140625, "learning_rate": 6.416394701656514e-06, "loss": 17.8316, "step": 233860 }, { "epoch": 0.4724321965763968, "grad_norm": 17.05004119873047, "learning_rate": 6.416059930943586e-06, "loss": 21.2076, "step": 233870 }, { "epoch": 0.4724523972090806, "grad_norm": 653.5442504882812, "learning_rate": 6.415725153328953e-06, "loss": 34.2984, "step": 233880 }, { "epoch": 0.47247259784176443, "grad_norm": 792.2357788085938, "learning_rate": 6.41539036881425e-06, "loss": 19.9054, "step": 233890 }, { "epoch": 0.4724927984744482, "grad_norm": 198.41261291503906, "learning_rate": 6.415055577401101e-06, "loss": 23.0116, "step": 233900 }, { "epoch": 0.472512999107132, "grad_norm": 1255.3897705078125, "learning_rate": 6.414720779091147e-06, "loss": 33.3987, "step": 233910 }, { "epoch": 0.47253319973981583, "grad_norm": 705.5669555664062, "learning_rate": 6.414385973886012e-06, "loss": 16.4641, "step": 233920 }, { "epoch": 0.47255340037249965, "grad_norm": 1978.4207763671875, "learning_rate": 6.414051161787334e-06, "loss": 24.5816, "step": 233930 }, { "epoch": 0.4725736010051835, "grad_norm": 384.2996826171875, "learning_rate": 6.4137163427967415e-06, "loss": 26.6482, "step": 233940 }, { "epoch": 0.4725938016378673, "grad_norm": 217.927490234375, "learning_rate": 6.413381516915868e-06, "loss": 25.0769, "step": 233950 }, { "epoch": 0.4726140022705511, "grad_norm": 209.63067626953125, "learning_rate": 6.413046684146343e-06, "loss": 19.4135, "step": 233960 }, { "epoch": 0.47263420290323493, "grad_norm": 7.532628536224365, "learning_rate": 6.412711844489801e-06, "loss": 15.6465, "step": 233970 }, { "epoch": 0.47265440353591875, "grad_norm": 361.89581298828125, "learning_rate": 6.4123769979478715e-06, "loss": 31.4909, "step": 233980 }, { "epoch": 0.4726746041686026, "grad_norm": 448.7689208984375, "learning_rate": 6.412042144522188e-06, "loss": 16.613, "step": 233990 }, { "epoch": 0.4726948048012864, "grad_norm": 454.71240234375, "learning_rate": 6.411707284214384e-06, "loss": 22.9245, "step": 234000 }, { "epoch": 0.4727150054339702, "grad_norm": 209.35000610351562, "learning_rate": 6.411372417026087e-06, "loss": 31.0641, "step": 234010 }, { "epoch": 0.47273520606665403, "grad_norm": 932.4186401367188, "learning_rate": 6.411037542958935e-06, "loss": 32.0332, "step": 234020 }, { "epoch": 0.4727554066993378, "grad_norm": 49.623966217041016, "learning_rate": 6.410702662014554e-06, "loss": 20.1529, "step": 234030 }, { "epoch": 0.4727756073320216, "grad_norm": 179.2115478515625, "learning_rate": 6.410367774194583e-06, "loss": 18.3027, "step": 234040 }, { "epoch": 0.47279580796470544, "grad_norm": 573.8726196289062, "learning_rate": 6.410032879500647e-06, "loss": 20.464, "step": 234050 }, { "epoch": 0.47281600859738926, "grad_norm": 353.9385986328125, "learning_rate": 6.409697977934384e-06, "loss": 32.2044, "step": 234060 }, { "epoch": 0.4728362092300731, "grad_norm": 297.4496765136719, "learning_rate": 6.409363069497424e-06, "loss": 11.7943, "step": 234070 }, { "epoch": 0.4728564098627569, "grad_norm": 558.5277099609375, "learning_rate": 6.4090281541913975e-06, "loss": 18.8906, "step": 234080 }, { "epoch": 0.4728766104954407, "grad_norm": 306.1007080078125, "learning_rate": 6.408693232017942e-06, "loss": 17.146, "step": 234090 }, { "epoch": 0.47289681112812454, "grad_norm": 295.1012878417969, "learning_rate": 6.408358302978683e-06, "loss": 31.9165, "step": 234100 }, { "epoch": 0.47291701176080836, "grad_norm": 766.9622802734375, "learning_rate": 6.408023367075258e-06, "loss": 21.9427, "step": 234110 }, { "epoch": 0.4729372123934922, "grad_norm": 239.5560302734375, "learning_rate": 6.4076884243092975e-06, "loss": 18.5846, "step": 234120 }, { "epoch": 0.472957413026176, "grad_norm": 99.82518768310547, "learning_rate": 6.407353474682436e-06, "loss": 11.0251, "step": 234130 }, { "epoch": 0.4729776136588598, "grad_norm": 246.14927673339844, "learning_rate": 6.407018518196303e-06, "loss": 27.4023, "step": 234140 }, { "epoch": 0.4729978142915436, "grad_norm": 343.39886474609375, "learning_rate": 6.406683554852532e-06, "loss": 26.9017, "step": 234150 }, { "epoch": 0.4730180149242274, "grad_norm": 508.7539367675781, "learning_rate": 6.406348584652756e-06, "loss": 23.6979, "step": 234160 }, { "epoch": 0.4730382155569112, "grad_norm": 1494.3701171875, "learning_rate": 6.4060136075986076e-06, "loss": 20.9132, "step": 234170 }, { "epoch": 0.47305841618959504, "grad_norm": 256.2373962402344, "learning_rate": 6.405678623691721e-06, "loss": 17.7332, "step": 234180 }, { "epoch": 0.47307861682227886, "grad_norm": 583.6516723632812, "learning_rate": 6.405343632933725e-06, "loss": 9.6859, "step": 234190 }, { "epoch": 0.4730988174549627, "grad_norm": 395.7854919433594, "learning_rate": 6.4050086353262565e-06, "loss": 13.114, "step": 234200 }, { "epoch": 0.4731190180876465, "grad_norm": 945.669189453125, "learning_rate": 6.404673630870946e-06, "loss": 14.7819, "step": 234210 }, { "epoch": 0.4731392187203303, "grad_norm": 281.0338134765625, "learning_rate": 6.404338619569425e-06, "loss": 34.6639, "step": 234220 }, { "epoch": 0.47315941935301414, "grad_norm": 171.36349487304688, "learning_rate": 6.40400360142333e-06, "loss": 9.9086, "step": 234230 }, { "epoch": 0.47317961998569796, "grad_norm": 601.5171508789062, "learning_rate": 6.403668576434289e-06, "loss": 17.3577, "step": 234240 }, { "epoch": 0.4731998206183818, "grad_norm": 301.04840087890625, "learning_rate": 6.40333354460394e-06, "loss": 15.3694, "step": 234250 }, { "epoch": 0.4732200212510656, "grad_norm": 215.11199951171875, "learning_rate": 6.402998505933913e-06, "loss": 18.9071, "step": 234260 }, { "epoch": 0.4732402218837494, "grad_norm": 186.56710815429688, "learning_rate": 6.4026634604258404e-06, "loss": 16.7488, "step": 234270 }, { "epoch": 0.4732604225164332, "grad_norm": 447.1479187011719, "learning_rate": 6.402328408081358e-06, "loss": 20.4462, "step": 234280 }, { "epoch": 0.473280623149117, "grad_norm": 558.9010620117188, "learning_rate": 6.401993348902095e-06, "loss": 21.6987, "step": 234290 }, { "epoch": 0.47330082378180083, "grad_norm": 360.9781188964844, "learning_rate": 6.401658282889689e-06, "loss": 23.5205, "step": 234300 }, { "epoch": 0.47332102441448465, "grad_norm": 217.78128051757812, "learning_rate": 6.401323210045768e-06, "loss": 23.361, "step": 234310 }, { "epoch": 0.47334122504716847, "grad_norm": 386.0395812988281, "learning_rate": 6.400988130371969e-06, "loss": 19.8657, "step": 234320 }, { "epoch": 0.4733614256798523, "grad_norm": 235.39915466308594, "learning_rate": 6.400653043869924e-06, "loss": 18.4033, "step": 234330 }, { "epoch": 0.4733816263125361, "grad_norm": 509.7830505371094, "learning_rate": 6.400317950541265e-06, "loss": 30.7332, "step": 234340 }, { "epoch": 0.47340182694521993, "grad_norm": 585.4219360351562, "learning_rate": 6.399982850387625e-06, "loss": 19.8323, "step": 234350 }, { "epoch": 0.47342202757790375, "grad_norm": 1028.9339599609375, "learning_rate": 6.3996477434106405e-06, "loss": 24.8702, "step": 234360 }, { "epoch": 0.47344222821058757, "grad_norm": 802.591064453125, "learning_rate": 6.399312629611941e-06, "loss": 24.8163, "step": 234370 }, { "epoch": 0.4734624288432714, "grad_norm": 0.0, "learning_rate": 6.398977508993164e-06, "loss": 8.7213, "step": 234380 }, { "epoch": 0.4734826294759552, "grad_norm": 668.1257934570312, "learning_rate": 6.3986423815559386e-06, "loss": 26.5238, "step": 234390 }, { "epoch": 0.47350283010863903, "grad_norm": 252.56785583496094, "learning_rate": 6.3983072473019e-06, "loss": 7.3386, "step": 234400 }, { "epoch": 0.4735230307413228, "grad_norm": 457.1835021972656, "learning_rate": 6.3979721062326815e-06, "loss": 28.3048, "step": 234410 }, { "epoch": 0.4735432313740066, "grad_norm": 412.34381103515625, "learning_rate": 6.397636958349918e-06, "loss": 11.678, "step": 234420 }, { "epoch": 0.47356343200669043, "grad_norm": 648.4039916992188, "learning_rate": 6.397301803655239e-06, "loss": 20.3825, "step": 234430 }, { "epoch": 0.47358363263937425, "grad_norm": 105.66046905517578, "learning_rate": 6.396966642150282e-06, "loss": 19.9678, "step": 234440 }, { "epoch": 0.4736038332720581, "grad_norm": 358.0011291503906, "learning_rate": 6.396631473836677e-06, "loss": 27.233, "step": 234450 }, { "epoch": 0.4736240339047419, "grad_norm": 141.00003051757812, "learning_rate": 6.396296298716061e-06, "loss": 15.7124, "step": 234460 }, { "epoch": 0.4736442345374257, "grad_norm": 604.2069702148438, "learning_rate": 6.3959611167900685e-06, "loss": 20.4789, "step": 234470 }, { "epoch": 0.47366443517010953, "grad_norm": 471.7048645019531, "learning_rate": 6.395625928060328e-06, "loss": 13.4348, "step": 234480 }, { "epoch": 0.47368463580279335, "grad_norm": 99.30219268798828, "learning_rate": 6.395290732528476e-06, "loss": 9.8516, "step": 234490 }, { "epoch": 0.4737048364354772, "grad_norm": 121.78633117675781, "learning_rate": 6.3949555301961474e-06, "loss": 32.6659, "step": 234500 }, { "epoch": 0.473725037068161, "grad_norm": 192.4600372314453, "learning_rate": 6.3946203210649734e-06, "loss": 21.0008, "step": 234510 }, { "epoch": 0.4737452377008448, "grad_norm": 320.25244140625, "learning_rate": 6.39428510513659e-06, "loss": 14.6393, "step": 234520 }, { "epoch": 0.47376543833352863, "grad_norm": 491.1825256347656, "learning_rate": 6.393949882412629e-06, "loss": 34.5892, "step": 234530 }, { "epoch": 0.4737856389662124, "grad_norm": 678.1868286132812, "learning_rate": 6.393614652894727e-06, "loss": 16.4696, "step": 234540 }, { "epoch": 0.4738058395988962, "grad_norm": 449.6884765625, "learning_rate": 6.3932794165845156e-06, "loss": 19.0783, "step": 234550 }, { "epoch": 0.47382604023158004, "grad_norm": 326.8847351074219, "learning_rate": 6.39294417348363e-06, "loss": 14.5435, "step": 234560 }, { "epoch": 0.47384624086426386, "grad_norm": 517.9143676757812, "learning_rate": 6.392608923593703e-06, "loss": 16.6445, "step": 234570 }, { "epoch": 0.4738664414969477, "grad_norm": 56.59879684448242, "learning_rate": 6.392273666916369e-06, "loss": 12.9108, "step": 234580 }, { "epoch": 0.4738866421296315, "grad_norm": 247.04637145996094, "learning_rate": 6.391938403453262e-06, "loss": 20.0154, "step": 234590 }, { "epoch": 0.4739068427623153, "grad_norm": 277.41070556640625, "learning_rate": 6.391603133206015e-06, "loss": 19.3081, "step": 234600 }, { "epoch": 0.47392704339499914, "grad_norm": 304.9445495605469, "learning_rate": 6.391267856176263e-06, "loss": 23.4097, "step": 234610 }, { "epoch": 0.47394724402768296, "grad_norm": 638.6124267578125, "learning_rate": 6.390932572365641e-06, "loss": 32.8187, "step": 234620 }, { "epoch": 0.4739674446603668, "grad_norm": 279.404052734375, "learning_rate": 6.390597281775783e-06, "loss": 17.2522, "step": 234630 }, { "epoch": 0.4739876452930506, "grad_norm": 719.03173828125, "learning_rate": 6.390261984408322e-06, "loss": 24.9276, "step": 234640 }, { "epoch": 0.4740078459257344, "grad_norm": 469.1488952636719, "learning_rate": 6.389926680264893e-06, "loss": 20.5542, "step": 234650 }, { "epoch": 0.47402804655841824, "grad_norm": 165.4766082763672, "learning_rate": 6.389591369347129e-06, "loss": 38.6157, "step": 234660 }, { "epoch": 0.474048247191102, "grad_norm": 217.4148712158203, "learning_rate": 6.389256051656665e-06, "loss": 20.6019, "step": 234670 }, { "epoch": 0.4740684478237858, "grad_norm": 348.1098937988281, "learning_rate": 6.388920727195138e-06, "loss": 14.3781, "step": 234680 }, { "epoch": 0.47408864845646964, "grad_norm": 523.4564208984375, "learning_rate": 6.3885853959641765e-06, "loss": 14.9741, "step": 234690 }, { "epoch": 0.47410884908915346, "grad_norm": 287.4259948730469, "learning_rate": 6.388250057965421e-06, "loss": 19.5019, "step": 234700 }, { "epoch": 0.4741290497218373, "grad_norm": 714.7274780273438, "learning_rate": 6.387914713200502e-06, "loss": 21.1032, "step": 234710 }, { "epoch": 0.4741492503545211, "grad_norm": 599.4790649414062, "learning_rate": 6.387579361671054e-06, "loss": 21.3773, "step": 234720 }, { "epoch": 0.4741694509872049, "grad_norm": 457.8786315917969, "learning_rate": 6.387244003378713e-06, "loss": 25.5746, "step": 234730 }, { "epoch": 0.47418965161988874, "grad_norm": 130.84654235839844, "learning_rate": 6.386908638325114e-06, "loss": 22.1176, "step": 234740 }, { "epoch": 0.47420985225257256, "grad_norm": 177.48048400878906, "learning_rate": 6.386573266511891e-06, "loss": 29.9546, "step": 234750 }, { "epoch": 0.4742300528852564, "grad_norm": 354.9969177246094, "learning_rate": 6.3862378879406765e-06, "loss": 36.502, "step": 234760 }, { "epoch": 0.4742502535179402, "grad_norm": 642.2314453125, "learning_rate": 6.385902502613106e-06, "loss": 20.1804, "step": 234770 }, { "epoch": 0.474270454150624, "grad_norm": 561.5035400390625, "learning_rate": 6.385567110530816e-06, "loss": 23.3928, "step": 234780 }, { "epoch": 0.4742906547833078, "grad_norm": 469.0497131347656, "learning_rate": 6.385231711695441e-06, "loss": 27.8145, "step": 234790 }, { "epoch": 0.4743108554159916, "grad_norm": 353.9854431152344, "learning_rate": 6.384896306108612e-06, "loss": 19.0264, "step": 234800 }, { "epoch": 0.4743310560486754, "grad_norm": 304.20697021484375, "learning_rate": 6.384560893771968e-06, "loss": 17.292, "step": 234810 }, { "epoch": 0.47435125668135925, "grad_norm": 342.96832275390625, "learning_rate": 6.3842254746871424e-06, "loss": 29.5318, "step": 234820 }, { "epoch": 0.47437145731404307, "grad_norm": 763.3519287109375, "learning_rate": 6.3838900488557695e-06, "loss": 18.7512, "step": 234830 }, { "epoch": 0.4743916579467269, "grad_norm": 170.58364868164062, "learning_rate": 6.383554616279485e-06, "loss": 21.7464, "step": 234840 }, { "epoch": 0.4744118585794107, "grad_norm": 248.70811462402344, "learning_rate": 6.383219176959921e-06, "loss": 15.3511, "step": 234850 }, { "epoch": 0.4744320592120945, "grad_norm": 243.34239196777344, "learning_rate": 6.382883730898717e-06, "loss": 39.5096, "step": 234860 }, { "epoch": 0.47445225984477835, "grad_norm": 437.47540283203125, "learning_rate": 6.382548278097503e-06, "loss": 22.9937, "step": 234870 }, { "epoch": 0.47447246047746217, "grad_norm": 127.16067504882812, "learning_rate": 6.382212818557918e-06, "loss": 13.2628, "step": 234880 }, { "epoch": 0.474492661110146, "grad_norm": 453.02056884765625, "learning_rate": 6.381877352281594e-06, "loss": 19.5698, "step": 234890 }, { "epoch": 0.4745128617428298, "grad_norm": 460.7250671386719, "learning_rate": 6.3815418792701686e-06, "loss": 17.8669, "step": 234900 }, { "epoch": 0.4745330623755136, "grad_norm": 26.176515579223633, "learning_rate": 6.381206399525276e-06, "loss": 10.9431, "step": 234910 }, { "epoch": 0.4745532630081974, "grad_norm": 156.2840576171875, "learning_rate": 6.38087091304855e-06, "loss": 19.0455, "step": 234920 }, { "epoch": 0.4745734636408812, "grad_norm": 272.0287170410156, "learning_rate": 6.380535419841627e-06, "loss": 10.9583, "step": 234930 }, { "epoch": 0.47459366427356503, "grad_norm": 241.13514709472656, "learning_rate": 6.380199919906141e-06, "loss": 13.9992, "step": 234940 }, { "epoch": 0.47461386490624885, "grad_norm": 980.8489379882812, "learning_rate": 6.3798644132437304e-06, "loss": 28.1995, "step": 234950 }, { "epoch": 0.47463406553893267, "grad_norm": 162.93238830566406, "learning_rate": 6.379528899856025e-06, "loss": 41.1193, "step": 234960 }, { "epoch": 0.4746542661716165, "grad_norm": 282.31512451171875, "learning_rate": 6.3791933797446644e-06, "loss": 28.5602, "step": 234970 }, { "epoch": 0.4746744668043003, "grad_norm": 285.1656494140625, "learning_rate": 6.378857852911283e-06, "loss": 20.8154, "step": 234980 }, { "epoch": 0.47469466743698413, "grad_norm": 280.6553955078125, "learning_rate": 6.378522319357515e-06, "loss": 12.6222, "step": 234990 }, { "epoch": 0.47471486806966795, "grad_norm": 257.03887939453125, "learning_rate": 6.378186779084996e-06, "loss": 21.6741, "step": 235000 }, { "epoch": 0.47473506870235177, "grad_norm": 260.839599609375, "learning_rate": 6.377851232095362e-06, "loss": 13.5192, "step": 235010 }, { "epoch": 0.4747552693350356, "grad_norm": 37.195953369140625, "learning_rate": 6.37751567839025e-06, "loss": 20.0982, "step": 235020 }, { "epoch": 0.4747754699677194, "grad_norm": 309.1211853027344, "learning_rate": 6.377180117971292e-06, "loss": 12.8097, "step": 235030 }, { "epoch": 0.47479567060040323, "grad_norm": 271.5387878417969, "learning_rate": 6.376844550840126e-06, "loss": 20.8148, "step": 235040 }, { "epoch": 0.474815871233087, "grad_norm": 221.25985717773438, "learning_rate": 6.376508976998385e-06, "loss": 24.5826, "step": 235050 }, { "epoch": 0.4748360718657708, "grad_norm": 555.2584228515625, "learning_rate": 6.3761733964477066e-06, "loss": 18.8689, "step": 235060 }, { "epoch": 0.47485627249845463, "grad_norm": 29.00701141357422, "learning_rate": 6.375837809189726e-06, "loss": 18.4605, "step": 235070 }, { "epoch": 0.47487647313113845, "grad_norm": 520.4061279296875, "learning_rate": 6.375502215226082e-06, "loss": 16.8058, "step": 235080 }, { "epoch": 0.4748966737638223, "grad_norm": 232.8343963623047, "learning_rate": 6.375166614558404e-06, "loss": 13.5121, "step": 235090 }, { "epoch": 0.4749168743965061, "grad_norm": 981.6873779296875, "learning_rate": 6.374831007188331e-06, "loss": 23.061, "step": 235100 }, { "epoch": 0.4749370750291899, "grad_norm": 643.386474609375, "learning_rate": 6.374495393117499e-06, "loss": 32.28, "step": 235110 }, { "epoch": 0.47495727566187373, "grad_norm": 336.3600769042969, "learning_rate": 6.374159772347541e-06, "loss": 16.8826, "step": 235120 }, { "epoch": 0.47497747629455755, "grad_norm": 307.8014831542969, "learning_rate": 6.373824144880099e-06, "loss": 12.2089, "step": 235130 }, { "epoch": 0.4749976769272414, "grad_norm": 633.9989624023438, "learning_rate": 6.3734885107168e-06, "loss": 13.723, "step": 235140 }, { "epoch": 0.4750178775599252, "grad_norm": 506.0127868652344, "learning_rate": 6.373152869859288e-06, "loss": 20.6466, "step": 235150 }, { "epoch": 0.475038078192609, "grad_norm": 538.2627563476562, "learning_rate": 6.372817222309194e-06, "loss": 23.1575, "step": 235160 }, { "epoch": 0.47505827882529283, "grad_norm": 259.8069152832031, "learning_rate": 6.372481568068156e-06, "loss": 29.974, "step": 235170 }, { "epoch": 0.4750784794579766, "grad_norm": 889.163818359375, "learning_rate": 6.37214590713781e-06, "loss": 31.5299, "step": 235180 }, { "epoch": 0.4750986800906604, "grad_norm": 150.32533264160156, "learning_rate": 6.37181023951979e-06, "loss": 13.2184, "step": 235190 }, { "epoch": 0.47511888072334424, "grad_norm": 514.8124389648438, "learning_rate": 6.371474565215734e-06, "loss": 21.9721, "step": 235200 }, { "epoch": 0.47513908135602806, "grad_norm": 331.0513610839844, "learning_rate": 6.371138884227277e-06, "loss": 18.7461, "step": 235210 }, { "epoch": 0.4751592819887119, "grad_norm": 310.6734313964844, "learning_rate": 6.3708031965560545e-06, "loss": 33.3973, "step": 235220 }, { "epoch": 0.4751794826213957, "grad_norm": 627.3316040039062, "learning_rate": 6.370467502203704e-06, "loss": 24.008, "step": 235230 }, { "epoch": 0.4751996832540795, "grad_norm": 507.3321838378906, "learning_rate": 6.370131801171863e-06, "loss": 23.7958, "step": 235240 }, { "epoch": 0.47521988388676334, "grad_norm": 509.7972412109375, "learning_rate": 6.369796093462164e-06, "loss": 21.6889, "step": 235250 }, { "epoch": 0.47524008451944716, "grad_norm": 0.0, "learning_rate": 6.369460379076244e-06, "loss": 20.57, "step": 235260 }, { "epoch": 0.475260285152131, "grad_norm": 533.4564819335938, "learning_rate": 6.369124658015742e-06, "loss": 10.4068, "step": 235270 }, { "epoch": 0.4752804857848148, "grad_norm": 143.0695343017578, "learning_rate": 6.368788930282292e-06, "loss": 22.7546, "step": 235280 }, { "epoch": 0.4753006864174986, "grad_norm": 21.005603790283203, "learning_rate": 6.368453195877531e-06, "loss": 24.8897, "step": 235290 }, { "epoch": 0.47532088705018244, "grad_norm": 187.2565155029297, "learning_rate": 6.368117454803093e-06, "loss": 12.8817, "step": 235300 }, { "epoch": 0.4753410876828662, "grad_norm": 341.4812316894531, "learning_rate": 6.36778170706062e-06, "loss": 28.8768, "step": 235310 }, { "epoch": 0.47536128831555, "grad_norm": 273.7339782714844, "learning_rate": 6.367445952651742e-06, "loss": 38.7234, "step": 235320 }, { "epoch": 0.47538148894823384, "grad_norm": 520.7974243164062, "learning_rate": 6.367110191578099e-06, "loss": 25.4437, "step": 235330 }, { "epoch": 0.47540168958091766, "grad_norm": 475.5674133300781, "learning_rate": 6.366774423841326e-06, "loss": 27.3372, "step": 235340 }, { "epoch": 0.4754218902136015, "grad_norm": 122.72430419921875, "learning_rate": 6.366438649443062e-06, "loss": 10.3798, "step": 235350 }, { "epoch": 0.4754420908462853, "grad_norm": 371.865478515625, "learning_rate": 6.366102868384942e-06, "loss": 18.0436, "step": 235360 }, { "epoch": 0.4754622914789691, "grad_norm": 194.8443145751953, "learning_rate": 6.365767080668601e-06, "loss": 30.0203, "step": 235370 }, { "epoch": 0.47548249211165294, "grad_norm": 261.30224609375, "learning_rate": 6.365431286295677e-06, "loss": 20.0374, "step": 235380 }, { "epoch": 0.47550269274433676, "grad_norm": 298.7358703613281, "learning_rate": 6.365095485267807e-06, "loss": 10.4789, "step": 235390 }, { "epoch": 0.4755228933770206, "grad_norm": 209.228759765625, "learning_rate": 6.364759677586627e-06, "loss": 15.6559, "step": 235400 }, { "epoch": 0.4755430940097044, "grad_norm": 0.0, "learning_rate": 6.364423863253772e-06, "loss": 16.6951, "step": 235410 }, { "epoch": 0.4755632946423882, "grad_norm": 508.7265625, "learning_rate": 6.364088042270884e-06, "loss": 16.7934, "step": 235420 }, { "epoch": 0.475583495275072, "grad_norm": 396.3540954589844, "learning_rate": 6.363752214639595e-06, "loss": 15.8091, "step": 235430 }, { "epoch": 0.4756036959077558, "grad_norm": 6.936513423919678, "learning_rate": 6.363416380361542e-06, "loss": 15.8172, "step": 235440 }, { "epoch": 0.47562389654043963, "grad_norm": 148.58872985839844, "learning_rate": 6.363080539438364e-06, "loss": 16.6924, "step": 235450 }, { "epoch": 0.47564409717312345, "grad_norm": 146.972900390625, "learning_rate": 6.3627446918716965e-06, "loss": 18.2104, "step": 235460 }, { "epoch": 0.47566429780580727, "grad_norm": 856.9461669921875, "learning_rate": 6.362408837663177e-06, "loss": 25.8049, "step": 235470 }, { "epoch": 0.4756844984384911, "grad_norm": 262.8402404785156, "learning_rate": 6.3620729768144415e-06, "loss": 22.2888, "step": 235480 }, { "epoch": 0.4757046990711749, "grad_norm": 412.86749267578125, "learning_rate": 6.361737109327128e-06, "loss": 20.7995, "step": 235490 }, { "epoch": 0.47572489970385873, "grad_norm": 110.34162902832031, "learning_rate": 6.361401235202872e-06, "loss": 21.2092, "step": 235500 }, { "epoch": 0.47574510033654255, "grad_norm": 345.8625183105469, "learning_rate": 6.361065354443312e-06, "loss": 12.1448, "step": 235510 }, { "epoch": 0.47576530096922637, "grad_norm": 269.2815246582031, "learning_rate": 6.360729467050086e-06, "loss": 16.7931, "step": 235520 }, { "epoch": 0.4757855016019102, "grad_norm": 1235.232421875, "learning_rate": 6.360393573024828e-06, "loss": 27.6663, "step": 235530 }, { "epoch": 0.475805702234594, "grad_norm": 136.81576538085938, "learning_rate": 6.360057672369177e-06, "loss": 20.3639, "step": 235540 }, { "epoch": 0.47582590286727783, "grad_norm": 524.9102172851562, "learning_rate": 6.35972176508477e-06, "loss": 17.5077, "step": 235550 }, { "epoch": 0.4758461034999616, "grad_norm": 235.1743621826172, "learning_rate": 6.3593858511732446e-06, "loss": 24.1391, "step": 235560 }, { "epoch": 0.4758663041326454, "grad_norm": 696.58984375, "learning_rate": 6.359049930636235e-06, "loss": 17.0567, "step": 235570 }, { "epoch": 0.47588650476532923, "grad_norm": 602.1285400390625, "learning_rate": 6.3587140034753836e-06, "loss": 19.6464, "step": 235580 }, { "epoch": 0.47590670539801305, "grad_norm": 289.7513122558594, "learning_rate": 6.358378069692324e-06, "loss": 24.3361, "step": 235590 }, { "epoch": 0.4759269060306969, "grad_norm": 367.5796203613281, "learning_rate": 6.358042129288694e-06, "loss": 23.8514, "step": 235600 }, { "epoch": 0.4759471066633807, "grad_norm": 189.98202514648438, "learning_rate": 6.3577061822661326e-06, "loss": 14.4241, "step": 235610 }, { "epoch": 0.4759673072960645, "grad_norm": 156.3752899169922, "learning_rate": 6.357370228626274e-06, "loss": 11.945, "step": 235620 }, { "epoch": 0.47598750792874833, "grad_norm": 111.87229919433594, "learning_rate": 6.3570342683707595e-06, "loss": 18.5781, "step": 235630 }, { "epoch": 0.47600770856143215, "grad_norm": 161.64215087890625, "learning_rate": 6.356698301501224e-06, "loss": 40.5645, "step": 235640 }, { "epoch": 0.476027909194116, "grad_norm": 281.6654357910156, "learning_rate": 6.356362328019305e-06, "loss": 15.8512, "step": 235650 }, { "epoch": 0.4760481098267998, "grad_norm": 608.2230224609375, "learning_rate": 6.35602634792664e-06, "loss": 31.2502, "step": 235660 }, { "epoch": 0.4760683104594836, "grad_norm": 292.78631591796875, "learning_rate": 6.355690361224869e-06, "loss": 19.3586, "step": 235670 }, { "epoch": 0.47608851109216743, "grad_norm": 299.5194396972656, "learning_rate": 6.355354367915626e-06, "loss": 18.2044, "step": 235680 }, { "epoch": 0.4761087117248512, "grad_norm": 342.9618225097656, "learning_rate": 6.355018368000552e-06, "loss": 12.7424, "step": 235690 }, { "epoch": 0.476128912357535, "grad_norm": 465.0326232910156, "learning_rate": 6.35468236148128e-06, "loss": 21.8289, "step": 235700 }, { "epoch": 0.47614911299021884, "grad_norm": 442.16748046875, "learning_rate": 6.354346348359452e-06, "loss": 17.4674, "step": 235710 }, { "epoch": 0.47616931362290266, "grad_norm": 489.3575134277344, "learning_rate": 6.354010328636705e-06, "loss": 18.5888, "step": 235720 }, { "epoch": 0.4761895142555865, "grad_norm": 228.85992431640625, "learning_rate": 6.3536743023146744e-06, "loss": 8.9061, "step": 235730 }, { "epoch": 0.4762097148882703, "grad_norm": 532.1124877929688, "learning_rate": 6.353338269395e-06, "loss": 19.8909, "step": 235740 }, { "epoch": 0.4762299155209541, "grad_norm": 942.0388793945312, "learning_rate": 6.353002229879318e-06, "loss": 34.5855, "step": 235750 }, { "epoch": 0.47625011615363794, "grad_norm": 314.45513916015625, "learning_rate": 6.352666183769269e-06, "loss": 10.9754, "step": 235760 }, { "epoch": 0.47627031678632176, "grad_norm": 633.0263671875, "learning_rate": 6.352330131066489e-06, "loss": 18.347, "step": 235770 }, { "epoch": 0.4762905174190056, "grad_norm": 891.4623413085938, "learning_rate": 6.351994071772615e-06, "loss": 20.04, "step": 235780 }, { "epoch": 0.4763107180516894, "grad_norm": 462.0434265136719, "learning_rate": 6.351658005889286e-06, "loss": 18.791, "step": 235790 }, { "epoch": 0.4763309186843732, "grad_norm": 370.8762512207031, "learning_rate": 6.35132193341814e-06, "loss": 22.8174, "step": 235800 }, { "epoch": 0.47635111931705704, "grad_norm": 257.43975830078125, "learning_rate": 6.350985854360815e-06, "loss": 42.8529, "step": 235810 }, { "epoch": 0.4763713199497408, "grad_norm": 713.8231811523438, "learning_rate": 6.350649768718948e-06, "loss": 22.5843, "step": 235820 }, { "epoch": 0.4763915205824246, "grad_norm": 171.21658325195312, "learning_rate": 6.3503136764941786e-06, "loss": 13.8042, "step": 235830 }, { "epoch": 0.47641172121510844, "grad_norm": 57.68280029296875, "learning_rate": 6.349977577688144e-06, "loss": 19.2037, "step": 235840 }, { "epoch": 0.47643192184779226, "grad_norm": 431.3582763671875, "learning_rate": 6.349641472302484e-06, "loss": 19.0848, "step": 235850 }, { "epoch": 0.4764521224804761, "grad_norm": 1326.8524169921875, "learning_rate": 6.349305360338832e-06, "loss": 20.5687, "step": 235860 }, { "epoch": 0.4764723231131599, "grad_norm": 556.9224243164062, "learning_rate": 6.34896924179883e-06, "loss": 37.0349, "step": 235870 }, { "epoch": 0.4764925237458437, "grad_norm": 208.58554077148438, "learning_rate": 6.348633116684117e-06, "loss": 18.2168, "step": 235880 }, { "epoch": 0.47651272437852754, "grad_norm": 28.971874237060547, "learning_rate": 6.348296984996329e-06, "loss": 15.1086, "step": 235890 }, { "epoch": 0.47653292501121136, "grad_norm": 0.8386023044586182, "learning_rate": 6.3479608467371055e-06, "loss": 21.6404, "step": 235900 }, { "epoch": 0.4765531256438952, "grad_norm": 479.8161315917969, "learning_rate": 6.3476247019080826e-06, "loss": 19.4333, "step": 235910 }, { "epoch": 0.476573326276579, "grad_norm": 383.649169921875, "learning_rate": 6.347288550510903e-06, "loss": 24.6602, "step": 235920 }, { "epoch": 0.4765935269092628, "grad_norm": 11.255179405212402, "learning_rate": 6.346952392547201e-06, "loss": 14.3584, "step": 235930 }, { "epoch": 0.47661372754194664, "grad_norm": 181.38755798339844, "learning_rate": 6.3466162280186164e-06, "loss": 11.1322, "step": 235940 }, { "epoch": 0.4766339281746304, "grad_norm": 320.8212585449219, "learning_rate": 6.346280056926788e-06, "loss": 17.2683, "step": 235950 }, { "epoch": 0.4766541288073142, "grad_norm": 41.16318893432617, "learning_rate": 6.345943879273353e-06, "loss": 15.3891, "step": 235960 }, { "epoch": 0.47667432943999805, "grad_norm": 157.09657287597656, "learning_rate": 6.3456076950599525e-06, "loss": 13.0402, "step": 235970 }, { "epoch": 0.47669453007268187, "grad_norm": 592.3854370117188, "learning_rate": 6.345271504288222e-06, "loss": 20.3103, "step": 235980 }, { "epoch": 0.4767147307053657, "grad_norm": 531.6246337890625, "learning_rate": 6.344935306959801e-06, "loss": 14.9921, "step": 235990 }, { "epoch": 0.4767349313380495, "grad_norm": 775.4092407226562, "learning_rate": 6.344599103076329e-06, "loss": 30.2433, "step": 236000 }, { "epoch": 0.4767551319707333, "grad_norm": 804.9729614257812, "learning_rate": 6.3442628926394455e-06, "loss": 18.611, "step": 236010 }, { "epoch": 0.47677533260341715, "grad_norm": 399.3684387207031, "learning_rate": 6.3439266756507846e-06, "loss": 14.1972, "step": 236020 }, { "epoch": 0.47679553323610097, "grad_norm": 257.4033203125, "learning_rate": 6.343590452111991e-06, "loss": 25.993, "step": 236030 }, { "epoch": 0.4768157338687848, "grad_norm": 291.25286865234375, "learning_rate": 6.343254222024699e-06, "loss": 13.1766, "step": 236040 }, { "epoch": 0.4768359345014686, "grad_norm": 0.25358515977859497, "learning_rate": 6.3429179853905485e-06, "loss": 27.1687, "step": 236050 }, { "epoch": 0.4768561351341524, "grad_norm": 1633.2650146484375, "learning_rate": 6.34258174221118e-06, "loss": 26.8737, "step": 236060 }, { "epoch": 0.4768763357668362, "grad_norm": 472.8817138671875, "learning_rate": 6.342245492488228e-06, "loss": 19.0416, "step": 236070 }, { "epoch": 0.47689653639952, "grad_norm": 280.98504638671875, "learning_rate": 6.341909236223338e-06, "loss": 21.0544, "step": 236080 }, { "epoch": 0.47691673703220383, "grad_norm": 631.6600341796875, "learning_rate": 6.341572973418143e-06, "loss": 17.0014, "step": 236090 }, { "epoch": 0.47693693766488765, "grad_norm": 416.7015686035156, "learning_rate": 6.341236704074285e-06, "loss": 24.6626, "step": 236100 }, { "epoch": 0.47695713829757147, "grad_norm": 370.92547607421875, "learning_rate": 6.340900428193401e-06, "loss": 14.3205, "step": 236110 }, { "epoch": 0.4769773389302553, "grad_norm": 154.2776336669922, "learning_rate": 6.340564145777131e-06, "loss": 17.0812, "step": 236120 }, { "epoch": 0.4769975395629391, "grad_norm": 414.4328308105469, "learning_rate": 6.340227856827116e-06, "loss": 9.2969, "step": 236130 }, { "epoch": 0.47701774019562293, "grad_norm": 434.3299865722656, "learning_rate": 6.339891561344991e-06, "loss": 19.4123, "step": 236140 }, { "epoch": 0.47703794082830675, "grad_norm": 108.04666900634766, "learning_rate": 6.339555259332398e-06, "loss": 23.9393, "step": 236150 }, { "epoch": 0.47705814146099057, "grad_norm": 1590.479248046875, "learning_rate": 6.339218950790973e-06, "loss": 45.2912, "step": 236160 }, { "epoch": 0.4770783420936744, "grad_norm": 473.1038818359375, "learning_rate": 6.33888263572236e-06, "loss": 34.732, "step": 236170 }, { "epoch": 0.4770985427263582, "grad_norm": 116.94279479980469, "learning_rate": 6.338546314128193e-06, "loss": 9.5652, "step": 236180 }, { "epoch": 0.47711874335904203, "grad_norm": 158.46392822265625, "learning_rate": 6.3382099860101154e-06, "loss": 16.3361, "step": 236190 }, { "epoch": 0.4771389439917258, "grad_norm": 113.47792053222656, "learning_rate": 6.337873651369764e-06, "loss": 20.9386, "step": 236200 }, { "epoch": 0.4771591446244096, "grad_norm": 356.636474609375, "learning_rate": 6.337537310208779e-06, "loss": 15.0066, "step": 236210 }, { "epoch": 0.47717934525709343, "grad_norm": 1007.4440307617188, "learning_rate": 6.3372009625288e-06, "loss": 23.4929, "step": 236220 }, { "epoch": 0.47719954588977725, "grad_norm": 389.4663391113281, "learning_rate": 6.336864608331463e-06, "loss": 14.9125, "step": 236230 }, { "epoch": 0.4772197465224611, "grad_norm": 567.827880859375, "learning_rate": 6.336528247618413e-06, "loss": 17.2285, "step": 236240 }, { "epoch": 0.4772399471551449, "grad_norm": 433.66021728515625, "learning_rate": 6.336191880391285e-06, "loss": 43.0203, "step": 236250 }, { "epoch": 0.4772601477878287, "grad_norm": 317.4508361816406, "learning_rate": 6.335855506651721e-06, "loss": 21.6446, "step": 236260 }, { "epoch": 0.47728034842051253, "grad_norm": 261.9701232910156, "learning_rate": 6.335519126401357e-06, "loss": 22.2381, "step": 236270 }, { "epoch": 0.47730054905319635, "grad_norm": 583.7076416015625, "learning_rate": 6.335182739641837e-06, "loss": 22.632, "step": 236280 }, { "epoch": 0.4773207496858802, "grad_norm": 650.14111328125, "learning_rate": 6.334846346374797e-06, "loss": 17.9627, "step": 236290 }, { "epoch": 0.477340950318564, "grad_norm": 286.3322448730469, "learning_rate": 6.334509946601879e-06, "loss": 19.6228, "step": 236300 }, { "epoch": 0.4773611509512478, "grad_norm": 632.01123046875, "learning_rate": 6.334173540324721e-06, "loss": 20.1217, "step": 236310 }, { "epoch": 0.47738135158393163, "grad_norm": 154.14942932128906, "learning_rate": 6.3338371275449614e-06, "loss": 17.7689, "step": 236320 }, { "epoch": 0.4774015522166154, "grad_norm": 4.355168342590332, "learning_rate": 6.333500708264243e-06, "loss": 21.0828, "step": 236330 }, { "epoch": 0.4774217528492992, "grad_norm": 437.45831298828125, "learning_rate": 6.3331642824842034e-06, "loss": 17.5286, "step": 236340 }, { "epoch": 0.47744195348198304, "grad_norm": 130.97360229492188, "learning_rate": 6.3328278502064835e-06, "loss": 17.0982, "step": 236350 }, { "epoch": 0.47746215411466686, "grad_norm": 417.3623046875, "learning_rate": 6.3324914114327206e-06, "loss": 16.7422, "step": 236360 }, { "epoch": 0.4774823547473507, "grad_norm": 349.117431640625, "learning_rate": 6.332154966164558e-06, "loss": 18.2595, "step": 236370 }, { "epoch": 0.4775025553800345, "grad_norm": 579.3184814453125, "learning_rate": 6.3318185144036325e-06, "loss": 35.9562, "step": 236380 }, { "epoch": 0.4775227560127183, "grad_norm": 364.48846435546875, "learning_rate": 6.3314820561515854e-06, "loss": 16.838, "step": 236390 }, { "epoch": 0.47754295664540214, "grad_norm": 101.16961669921875, "learning_rate": 6.331145591410057e-06, "loss": 18.6463, "step": 236400 }, { "epoch": 0.47756315727808596, "grad_norm": 483.03875732421875, "learning_rate": 6.330809120180685e-06, "loss": 15.5044, "step": 236410 }, { "epoch": 0.4775833579107698, "grad_norm": 60.906856536865234, "learning_rate": 6.330472642465113e-06, "loss": 19.4903, "step": 236420 }, { "epoch": 0.4776035585434536, "grad_norm": 60.40165328979492, "learning_rate": 6.330136158264977e-06, "loss": 14.2993, "step": 236430 }, { "epoch": 0.4776237591761374, "grad_norm": 666.2178955078125, "learning_rate": 6.329799667581918e-06, "loss": 28.4868, "step": 236440 }, { "epoch": 0.47764395980882124, "grad_norm": 441.0329895019531, "learning_rate": 6.329463170417578e-06, "loss": 25.9949, "step": 236450 }, { "epoch": 0.477664160441505, "grad_norm": 627.822998046875, "learning_rate": 6.329126666773596e-06, "loss": 22.5725, "step": 236460 }, { "epoch": 0.4776843610741888, "grad_norm": 1271.912109375, "learning_rate": 6.328790156651611e-06, "loss": 9.4034, "step": 236470 }, { "epoch": 0.47770456170687264, "grad_norm": 181.57403564453125, "learning_rate": 6.328453640053264e-06, "loss": 20.7942, "step": 236480 }, { "epoch": 0.47772476233955646, "grad_norm": 412.6011962890625, "learning_rate": 6.3281171169801944e-06, "loss": 19.137, "step": 236490 }, { "epoch": 0.4777449629722403, "grad_norm": 345.78594970703125, "learning_rate": 6.327780587434045e-06, "loss": 20.7337, "step": 236500 }, { "epoch": 0.4777651636049241, "grad_norm": 280.6048889160156, "learning_rate": 6.3274440514164535e-06, "loss": 25.2685, "step": 236510 }, { "epoch": 0.4777853642376079, "grad_norm": 302.62237548828125, "learning_rate": 6.327107508929059e-06, "loss": 22.2104, "step": 236520 }, { "epoch": 0.47780556487029174, "grad_norm": 321.1506042480469, "learning_rate": 6.326770959973505e-06, "loss": 12.1764, "step": 236530 }, { "epoch": 0.47782576550297556, "grad_norm": 647.4296264648438, "learning_rate": 6.32643440455143e-06, "loss": 20.1817, "step": 236540 }, { "epoch": 0.4778459661356594, "grad_norm": 369.7336120605469, "learning_rate": 6.3260978426644735e-06, "loss": 18.1902, "step": 236550 }, { "epoch": 0.4778661667683432, "grad_norm": 767.9469604492188, "learning_rate": 6.325761274314279e-06, "loss": 23.7414, "step": 236560 }, { "epoch": 0.477886367401027, "grad_norm": 589.987548828125, "learning_rate": 6.325424699502482e-06, "loss": 22.7409, "step": 236570 }, { "epoch": 0.47790656803371084, "grad_norm": 301.1730041503906, "learning_rate": 6.3250881182307285e-06, "loss": 23.0615, "step": 236580 }, { "epoch": 0.4779267686663946, "grad_norm": 195.98422241210938, "learning_rate": 6.324751530500656e-06, "loss": 27.2593, "step": 236590 }, { "epoch": 0.47794696929907843, "grad_norm": 376.743896484375, "learning_rate": 6.324414936313904e-06, "loss": 11.7994, "step": 236600 }, { "epoch": 0.47796716993176225, "grad_norm": 369.4232482910156, "learning_rate": 6.324078335672115e-06, "loss": 20.3665, "step": 236610 }, { "epoch": 0.47798737056444607, "grad_norm": 194.3318634033203, "learning_rate": 6.323741728576928e-06, "loss": 16.0633, "step": 236620 }, { "epoch": 0.4780075711971299, "grad_norm": 119.3884506225586, "learning_rate": 6.323405115029986e-06, "loss": 14.5608, "step": 236630 }, { "epoch": 0.4780277718298137, "grad_norm": 848.0132446289062, "learning_rate": 6.323068495032927e-06, "loss": 27.1935, "step": 236640 }, { "epoch": 0.47804797246249753, "grad_norm": 700.3411865234375, "learning_rate": 6.3227318685873915e-06, "loss": 21.1771, "step": 236650 }, { "epoch": 0.47806817309518135, "grad_norm": 371.2930908203125, "learning_rate": 6.322395235695022e-06, "loss": 7.3183, "step": 236660 }, { "epoch": 0.47808837372786517, "grad_norm": 523.4465942382812, "learning_rate": 6.32205859635746e-06, "loss": 20.1393, "step": 236670 }, { "epoch": 0.478108574360549, "grad_norm": 454.619384765625, "learning_rate": 6.3217219505763426e-06, "loss": 19.4088, "step": 236680 }, { "epoch": 0.4781287749932328, "grad_norm": 205.50643920898438, "learning_rate": 6.321385298353314e-06, "loss": 27.9032, "step": 236690 }, { "epoch": 0.47814897562591663, "grad_norm": 320.5482177734375, "learning_rate": 6.321048639690013e-06, "loss": 16.4477, "step": 236700 }, { "epoch": 0.4781691762586004, "grad_norm": 257.9223937988281, "learning_rate": 6.320711974588082e-06, "loss": 15.9631, "step": 236710 }, { "epoch": 0.4781893768912842, "grad_norm": 469.4799499511719, "learning_rate": 6.32037530304916e-06, "loss": 15.2386, "step": 236720 }, { "epoch": 0.47820957752396803, "grad_norm": 460.84051513671875, "learning_rate": 6.320038625074889e-06, "loss": 21.4524, "step": 236730 }, { "epoch": 0.47822977815665185, "grad_norm": 277.2516784667969, "learning_rate": 6.319701940666911e-06, "loss": 13.5471, "step": 236740 }, { "epoch": 0.4782499787893357, "grad_norm": 256.4875183105469, "learning_rate": 6.3193652498268656e-06, "loss": 23.3701, "step": 236750 }, { "epoch": 0.4782701794220195, "grad_norm": 153.87103271484375, "learning_rate": 6.319028552556393e-06, "loss": 16.1749, "step": 236760 }, { "epoch": 0.4782903800547033, "grad_norm": 174.68017578125, "learning_rate": 6.318691848857136e-06, "loss": 16.622, "step": 236770 }, { "epoch": 0.47831058068738713, "grad_norm": 131.1770782470703, "learning_rate": 6.318355138730735e-06, "loss": 23.3666, "step": 236780 }, { "epoch": 0.47833078132007095, "grad_norm": 239.27786254882812, "learning_rate": 6.318018422178829e-06, "loss": 23.2029, "step": 236790 }, { "epoch": 0.4783509819527548, "grad_norm": 374.8211364746094, "learning_rate": 6.317681699203065e-06, "loss": 25.8021, "step": 236800 }, { "epoch": 0.4783711825854386, "grad_norm": 264.9538269042969, "learning_rate": 6.317344969805077e-06, "loss": 18.8978, "step": 236810 }, { "epoch": 0.4783913832181224, "grad_norm": 338.7409973144531, "learning_rate": 6.317008233986509e-06, "loss": 11.726, "step": 236820 }, { "epoch": 0.47841158385080623, "grad_norm": 551.9564208984375, "learning_rate": 6.316671491749005e-06, "loss": 21.1559, "step": 236830 }, { "epoch": 0.47843178448349, "grad_norm": 0.0, "learning_rate": 6.316334743094201e-06, "loss": 15.9817, "step": 236840 }, { "epoch": 0.4784519851161738, "grad_norm": 500.169677734375, "learning_rate": 6.315997988023744e-06, "loss": 24.3848, "step": 236850 }, { "epoch": 0.47847218574885764, "grad_norm": 380.1037902832031, "learning_rate": 6.31566122653927e-06, "loss": 21.1684, "step": 236860 }, { "epoch": 0.47849238638154146, "grad_norm": 146.63589477539062, "learning_rate": 6.315324458642424e-06, "loss": 8.5004, "step": 236870 }, { "epoch": 0.4785125870142253, "grad_norm": 357.74359130859375, "learning_rate": 6.314987684334847e-06, "loss": 33.0014, "step": 236880 }, { "epoch": 0.4785327876469091, "grad_norm": 745.3784790039062, "learning_rate": 6.314650903618178e-06, "loss": 23.9109, "step": 236890 }, { "epoch": 0.4785529882795929, "grad_norm": 444.4207458496094, "learning_rate": 6.314314116494061e-06, "loss": 12.2537, "step": 236900 }, { "epoch": 0.47857318891227674, "grad_norm": 856.6294555664062, "learning_rate": 6.313977322964136e-06, "loss": 48.7711, "step": 236910 }, { "epoch": 0.47859338954496056, "grad_norm": 329.8413391113281, "learning_rate": 6.313640523030045e-06, "loss": 19.5563, "step": 236920 }, { "epoch": 0.4786135901776444, "grad_norm": 443.108642578125, "learning_rate": 6.313303716693428e-06, "loss": 19.4823, "step": 236930 }, { "epoch": 0.4786337908103282, "grad_norm": 453.31756591796875, "learning_rate": 6.3129669039559295e-06, "loss": 15.88, "step": 236940 }, { "epoch": 0.478653991443012, "grad_norm": 512.3207397460938, "learning_rate": 6.312630084819189e-06, "loss": 17.9663, "step": 236950 }, { "epoch": 0.47867419207569584, "grad_norm": 206.0601043701172, "learning_rate": 6.3122932592848495e-06, "loss": 16.0576, "step": 236960 }, { "epoch": 0.4786943927083796, "grad_norm": 483.4388122558594, "learning_rate": 6.311956427354552e-06, "loss": 33.9894, "step": 236970 }, { "epoch": 0.4787145933410634, "grad_norm": 236.13270568847656, "learning_rate": 6.311619589029937e-06, "loss": 24.7015, "step": 236980 }, { "epoch": 0.47873479397374724, "grad_norm": 624.0055541992188, "learning_rate": 6.311282744312647e-06, "loss": 18.0067, "step": 236990 }, { "epoch": 0.47875499460643106, "grad_norm": 808.8917846679688, "learning_rate": 6.310945893204324e-06, "loss": 27.9448, "step": 237000 }, { "epoch": 0.4787751952391149, "grad_norm": 343.2776184082031, "learning_rate": 6.310609035706611e-06, "loss": 18.2245, "step": 237010 }, { "epoch": 0.4787953958717987, "grad_norm": 345.72296142578125, "learning_rate": 6.310272171821145e-06, "loss": 24.3908, "step": 237020 }, { "epoch": 0.4788155965044825, "grad_norm": 198.91836547851562, "learning_rate": 6.3099353015495766e-06, "loss": 13.6703, "step": 237030 }, { "epoch": 0.47883579713716634, "grad_norm": 339.0745849609375, "learning_rate": 6.309598424893539e-06, "loss": 13.3374, "step": 237040 }, { "epoch": 0.47885599776985016, "grad_norm": 782.8482666015625, "learning_rate": 6.309261541854679e-06, "loss": 23.3637, "step": 237050 }, { "epoch": 0.478876198402534, "grad_norm": 586.35595703125, "learning_rate": 6.308924652434636e-06, "loss": 19.0398, "step": 237060 }, { "epoch": 0.4788963990352178, "grad_norm": 368.9461364746094, "learning_rate": 6.308587756635054e-06, "loss": 20.1478, "step": 237070 }, { "epoch": 0.4789165996679016, "grad_norm": 290.1398010253906, "learning_rate": 6.308250854457572e-06, "loss": 19.0407, "step": 237080 }, { "epoch": 0.47893680030058544, "grad_norm": 19.14689826965332, "learning_rate": 6.307913945903836e-06, "loss": 18.9975, "step": 237090 }, { "epoch": 0.4789570009332692, "grad_norm": 569.9417724609375, "learning_rate": 6.307577030975485e-06, "loss": 27.712, "step": 237100 }, { "epoch": 0.478977201565953, "grad_norm": 1169.605224609375, "learning_rate": 6.307240109674162e-06, "loss": 25.0702, "step": 237110 }, { "epoch": 0.47899740219863685, "grad_norm": 411.0592956542969, "learning_rate": 6.3069031820015116e-06, "loss": 48.7659, "step": 237120 }, { "epoch": 0.47901760283132067, "grad_norm": 402.3951416015625, "learning_rate": 6.306566247959169e-06, "loss": 26.0601, "step": 237130 }, { "epoch": 0.4790378034640045, "grad_norm": 657.840576171875, "learning_rate": 6.3062293075487854e-06, "loss": 29.0505, "step": 237140 }, { "epoch": 0.4790580040966883, "grad_norm": 524.6390991210938, "learning_rate": 6.305892360771997e-06, "loss": 16.1245, "step": 237150 }, { "epoch": 0.4790782047293721, "grad_norm": 527.5153198242188, "learning_rate": 6.305555407630447e-06, "loss": 30.8938, "step": 237160 }, { "epoch": 0.47909840536205595, "grad_norm": 299.8638000488281, "learning_rate": 6.3052184481257795e-06, "loss": 23.2073, "step": 237170 }, { "epoch": 0.47911860599473977, "grad_norm": 525.2657470703125, "learning_rate": 6.304881482259634e-06, "loss": 29.5828, "step": 237180 }, { "epoch": 0.4791388066274236, "grad_norm": 337.80889892578125, "learning_rate": 6.304544510033656e-06, "loss": 17.2977, "step": 237190 }, { "epoch": 0.4791590072601074, "grad_norm": 735.634765625, "learning_rate": 6.304207531449486e-06, "loss": 19.0968, "step": 237200 }, { "epoch": 0.4791792078927912, "grad_norm": 121.38893127441406, "learning_rate": 6.303870546508766e-06, "loss": 19.7644, "step": 237210 }, { "epoch": 0.479199408525475, "grad_norm": 702.9309692382812, "learning_rate": 6.3035335552131395e-06, "loss": 16.5914, "step": 237220 }, { "epoch": 0.4792196091581588, "grad_norm": 433.74114990234375, "learning_rate": 6.303196557564249e-06, "loss": 13.2496, "step": 237230 }, { "epoch": 0.47923980979084263, "grad_norm": 472.25042724609375, "learning_rate": 6.302859553563736e-06, "loss": 26.2887, "step": 237240 }, { "epoch": 0.47926001042352645, "grad_norm": 454.13775634765625, "learning_rate": 6.3025225432132434e-06, "loss": 21.894, "step": 237250 }, { "epoch": 0.47928021105621027, "grad_norm": 189.90338134765625, "learning_rate": 6.302185526514413e-06, "loss": 20.9103, "step": 237260 }, { "epoch": 0.4793004116888941, "grad_norm": 306.26824951171875, "learning_rate": 6.301848503468889e-06, "loss": 17.7821, "step": 237270 }, { "epoch": 0.4793206123215779, "grad_norm": 384.7181701660156, "learning_rate": 6.301511474078315e-06, "loss": 22.2573, "step": 237280 }, { "epoch": 0.47934081295426173, "grad_norm": 432.1720275878906, "learning_rate": 6.301174438344329e-06, "loss": 19.971, "step": 237290 }, { "epoch": 0.47936101358694555, "grad_norm": 407.7176513671875, "learning_rate": 6.3008373962685785e-06, "loss": 13.3768, "step": 237300 }, { "epoch": 0.47938121421962937, "grad_norm": 49.56460952758789, "learning_rate": 6.3005003478527036e-06, "loss": 18.5751, "step": 237310 }, { "epoch": 0.4794014148523132, "grad_norm": 408.12225341796875, "learning_rate": 6.300163293098348e-06, "loss": 28.2353, "step": 237320 }, { "epoch": 0.479421615484997, "grad_norm": 376.2996826171875, "learning_rate": 6.2998262320071546e-06, "loss": 20.6713, "step": 237330 }, { "epoch": 0.47944181611768083, "grad_norm": 380.1006164550781, "learning_rate": 6.299489164580765e-06, "loss": 17.1596, "step": 237340 }, { "epoch": 0.4794620167503646, "grad_norm": 699.7108764648438, "learning_rate": 6.2991520908208235e-06, "loss": 27.1828, "step": 237350 }, { "epoch": 0.4794822173830484, "grad_norm": 355.4895935058594, "learning_rate": 6.298815010728972e-06, "loss": 19.8443, "step": 237360 }, { "epoch": 0.47950241801573223, "grad_norm": 225.16407775878906, "learning_rate": 6.298477924306854e-06, "loss": 13.6358, "step": 237370 }, { "epoch": 0.47952261864841605, "grad_norm": 517.8504028320312, "learning_rate": 6.298140831556112e-06, "loss": 15.2491, "step": 237380 }, { "epoch": 0.4795428192810999, "grad_norm": 415.0279235839844, "learning_rate": 6.2978037324783894e-06, "loss": 19.1637, "step": 237390 }, { "epoch": 0.4795630199137837, "grad_norm": 187.1905975341797, "learning_rate": 6.297466627075327e-06, "loss": 18.4905, "step": 237400 }, { "epoch": 0.4795832205464675, "grad_norm": 791.8858642578125, "learning_rate": 6.2971295153485725e-06, "loss": 19.4019, "step": 237410 }, { "epoch": 0.47960342117915133, "grad_norm": 618.9378662109375, "learning_rate": 6.296792397299764e-06, "loss": 26.3401, "step": 237420 }, { "epoch": 0.47962362181183515, "grad_norm": 310.3319091796875, "learning_rate": 6.296455272930546e-06, "loss": 38.8718, "step": 237430 }, { "epoch": 0.479643822444519, "grad_norm": 164.62466430664062, "learning_rate": 6.2961181422425645e-06, "loss": 18.0805, "step": 237440 }, { "epoch": 0.4796640230772028, "grad_norm": 294.67950439453125, "learning_rate": 6.295781005237458e-06, "loss": 15.5688, "step": 237450 }, { "epoch": 0.4796842237098866, "grad_norm": 301.582275390625, "learning_rate": 6.295443861916875e-06, "loss": 21.2663, "step": 237460 }, { "epoch": 0.47970442434257043, "grad_norm": 540.5686645507812, "learning_rate": 6.2951067122824515e-06, "loss": 25.5175, "step": 237470 }, { "epoch": 0.4797246249752542, "grad_norm": 337.8367614746094, "learning_rate": 6.294769556335839e-06, "loss": 12.4853, "step": 237480 }, { "epoch": 0.479744825607938, "grad_norm": 233.81678771972656, "learning_rate": 6.294432394078675e-06, "loss": 12.7851, "step": 237490 }, { "epoch": 0.47976502624062184, "grad_norm": 697.8088989257812, "learning_rate": 6.294095225512604e-06, "loss": 18.0509, "step": 237500 }, { "epoch": 0.47978522687330566, "grad_norm": 693.8338623046875, "learning_rate": 6.293758050639272e-06, "loss": 16.9721, "step": 237510 }, { "epoch": 0.4798054275059895, "grad_norm": 637.6820068359375, "learning_rate": 6.293420869460318e-06, "loss": 18.5767, "step": 237520 }, { "epoch": 0.4798256281386733, "grad_norm": 714.9810180664062, "learning_rate": 6.2930836819773874e-06, "loss": 22.6833, "step": 237530 }, { "epoch": 0.4798458287713571, "grad_norm": 301.07733154296875, "learning_rate": 6.292746488192125e-06, "loss": 16.1072, "step": 237540 }, { "epoch": 0.47986602940404094, "grad_norm": 62.74266815185547, "learning_rate": 6.292409288106173e-06, "loss": 13.7772, "step": 237550 }, { "epoch": 0.47988623003672476, "grad_norm": 208.51495361328125, "learning_rate": 6.292072081721173e-06, "loss": 23.2082, "step": 237560 }, { "epoch": 0.4799064306694086, "grad_norm": 435.7124938964844, "learning_rate": 6.291734869038773e-06, "loss": 16.1793, "step": 237570 }, { "epoch": 0.4799266313020924, "grad_norm": 360.6304931640625, "learning_rate": 6.291397650060613e-06, "loss": 19.7922, "step": 237580 }, { "epoch": 0.4799468319347762, "grad_norm": 36.94089889526367, "learning_rate": 6.291060424788336e-06, "loss": 19.1318, "step": 237590 }, { "epoch": 0.47996703256746004, "grad_norm": 500.7837219238281, "learning_rate": 6.290723193223589e-06, "loss": 22.2277, "step": 237600 }, { "epoch": 0.4799872332001438, "grad_norm": 213.74905395507812, "learning_rate": 6.290385955368012e-06, "loss": 29.0919, "step": 237610 }, { "epoch": 0.4800074338328276, "grad_norm": 241.23556518554688, "learning_rate": 6.2900487112232534e-06, "loss": 23.703, "step": 237620 }, { "epoch": 0.48002763446551144, "grad_norm": 177.02154541015625, "learning_rate": 6.289711460790951e-06, "loss": 7.9541, "step": 237630 }, { "epoch": 0.48004783509819526, "grad_norm": 276.55133056640625, "learning_rate": 6.289374204072752e-06, "loss": 24.3936, "step": 237640 }, { "epoch": 0.4800680357308791, "grad_norm": 375.5913391113281, "learning_rate": 6.2890369410703e-06, "loss": 18.9819, "step": 237650 }, { "epoch": 0.4800882363635629, "grad_norm": 297.7219543457031, "learning_rate": 6.2886996717852374e-06, "loss": 23.8997, "step": 237660 }, { "epoch": 0.4801084369962467, "grad_norm": 21.32270050048828, "learning_rate": 6.28836239621921e-06, "loss": 14.397, "step": 237670 }, { "epoch": 0.48012863762893054, "grad_norm": 306.45330810546875, "learning_rate": 6.288025114373862e-06, "loss": 20.0237, "step": 237680 }, { "epoch": 0.48014883826161436, "grad_norm": 252.57337951660156, "learning_rate": 6.287687826250832e-06, "loss": 11.4426, "step": 237690 }, { "epoch": 0.4801690388942982, "grad_norm": 421.5823669433594, "learning_rate": 6.28735053185177e-06, "loss": 24.9128, "step": 237700 }, { "epoch": 0.480189239526982, "grad_norm": 171.04920959472656, "learning_rate": 6.287013231178316e-06, "loss": 11.0669, "step": 237710 }, { "epoch": 0.4802094401596658, "grad_norm": 303.7307434082031, "learning_rate": 6.286675924232117e-06, "loss": 19.8289, "step": 237720 }, { "epoch": 0.48022964079234964, "grad_norm": 521.14453125, "learning_rate": 6.286338611014817e-06, "loss": 14.5308, "step": 237730 }, { "epoch": 0.4802498414250334, "grad_norm": 226.71551513671875, "learning_rate": 6.286001291528056e-06, "loss": 14.6215, "step": 237740 }, { "epoch": 0.48027004205771723, "grad_norm": 337.58154296875, "learning_rate": 6.285663965773482e-06, "loss": 30.0312, "step": 237750 }, { "epoch": 0.48029024269040105, "grad_norm": 348.5039367675781, "learning_rate": 6.285326633752737e-06, "loss": 18.0105, "step": 237760 }, { "epoch": 0.48031044332308487, "grad_norm": 0.5514386296272278, "learning_rate": 6.284989295467466e-06, "loss": 16.2173, "step": 237770 }, { "epoch": 0.4803306439557687, "grad_norm": 97.46475219726562, "learning_rate": 6.284651950919315e-06, "loss": 18.553, "step": 237780 }, { "epoch": 0.4803508445884525, "grad_norm": 512.9586181640625, "learning_rate": 6.284314600109923e-06, "loss": 23.7827, "step": 237790 }, { "epoch": 0.48037104522113633, "grad_norm": 259.66802978515625, "learning_rate": 6.28397724304094e-06, "loss": 13.5792, "step": 237800 }, { "epoch": 0.48039124585382015, "grad_norm": 293.5666198730469, "learning_rate": 6.283639879714006e-06, "loss": 32.8681, "step": 237810 }, { "epoch": 0.48041144648650397, "grad_norm": 240.16566467285156, "learning_rate": 6.283302510130768e-06, "loss": 16.7174, "step": 237820 }, { "epoch": 0.4804316471191878, "grad_norm": 550.2548217773438, "learning_rate": 6.282965134292869e-06, "loss": 14.6657, "step": 237830 }, { "epoch": 0.4804518477518716, "grad_norm": 408.5176696777344, "learning_rate": 6.282627752201953e-06, "loss": 27.4887, "step": 237840 }, { "epoch": 0.48047204838455543, "grad_norm": 0.1315782070159912, "learning_rate": 6.2822903638596654e-06, "loss": 14.9212, "step": 237850 }, { "epoch": 0.4804922490172392, "grad_norm": 614.2935180664062, "learning_rate": 6.28195296926765e-06, "loss": 28.5032, "step": 237860 }, { "epoch": 0.480512449649923, "grad_norm": 370.79547119140625, "learning_rate": 6.281615568427551e-06, "loss": 36.7822, "step": 237870 }, { "epoch": 0.48053265028260683, "grad_norm": 413.24578857421875, "learning_rate": 6.281278161341013e-06, "loss": 17.2436, "step": 237880 }, { "epoch": 0.48055285091529065, "grad_norm": 16.01764488220215, "learning_rate": 6.280940748009682e-06, "loss": 23.5995, "step": 237890 }, { "epoch": 0.4805730515479745, "grad_norm": 494.7488098144531, "learning_rate": 6.280603328435199e-06, "loss": 18.618, "step": 237900 }, { "epoch": 0.4805932521806583, "grad_norm": 82.51104736328125, "learning_rate": 6.2802659026192124e-06, "loss": 13.8566, "step": 237910 }, { "epoch": 0.4806134528133421, "grad_norm": 429.82122802734375, "learning_rate": 6.279928470563365e-06, "loss": 18.2455, "step": 237920 }, { "epoch": 0.48063365344602593, "grad_norm": 132.4311065673828, "learning_rate": 6.2795910322693e-06, "loss": 28.503, "step": 237930 }, { "epoch": 0.48065385407870975, "grad_norm": 102.37493133544922, "learning_rate": 6.279253587738664e-06, "loss": 17.0488, "step": 237940 }, { "epoch": 0.4806740547113936, "grad_norm": 705.259765625, "learning_rate": 6.278916136973102e-06, "loss": 22.8581, "step": 237950 }, { "epoch": 0.4806942553440774, "grad_norm": 400.8526611328125, "learning_rate": 6.278578679974259e-06, "loss": 19.653, "step": 237960 }, { "epoch": 0.4807144559767612, "grad_norm": 221.90017700195312, "learning_rate": 6.278241216743777e-06, "loss": 16.4257, "step": 237970 }, { "epoch": 0.48073465660944503, "grad_norm": 318.0158996582031, "learning_rate": 6.277903747283302e-06, "loss": 19.8692, "step": 237980 }, { "epoch": 0.4807548572421288, "grad_norm": 603.3626098632812, "learning_rate": 6.277566271594478e-06, "loss": 16.3699, "step": 237990 }, { "epoch": 0.4807750578748126, "grad_norm": 581.0230712890625, "learning_rate": 6.277228789678953e-06, "loss": 25.7005, "step": 238000 }, { "epoch": 0.48079525850749644, "grad_norm": 185.18804931640625, "learning_rate": 6.2768913015383696e-06, "loss": 19.0532, "step": 238010 }, { "epoch": 0.48081545914018026, "grad_norm": 130.06170654296875, "learning_rate": 6.276553807174373e-06, "loss": 37.6985, "step": 238020 }, { "epoch": 0.4808356597728641, "grad_norm": 544.4740600585938, "learning_rate": 6.276216306588607e-06, "loss": 14.2061, "step": 238030 }, { "epoch": 0.4808558604055479, "grad_norm": 461.09722900390625, "learning_rate": 6.275878799782719e-06, "loss": 21.4767, "step": 238040 }, { "epoch": 0.4808760610382317, "grad_norm": 212.8883819580078, "learning_rate": 6.275541286758352e-06, "loss": 20.6277, "step": 238050 }, { "epoch": 0.48089626167091554, "grad_norm": 262.19769287109375, "learning_rate": 6.2752037675171495e-06, "loss": 27.4836, "step": 238060 }, { "epoch": 0.48091646230359936, "grad_norm": 230.4353485107422, "learning_rate": 6.274866242060761e-06, "loss": 19.0922, "step": 238070 }, { "epoch": 0.4809366629362832, "grad_norm": 290.68408203125, "learning_rate": 6.274528710390826e-06, "loss": 19.4623, "step": 238080 }, { "epoch": 0.480956863568967, "grad_norm": 68.90953063964844, "learning_rate": 6.274191172508996e-06, "loss": 22.9101, "step": 238090 }, { "epoch": 0.4809770642016508, "grad_norm": 164.39871215820312, "learning_rate": 6.273853628416911e-06, "loss": 11.4798, "step": 238100 }, { "epoch": 0.48099726483433464, "grad_norm": 342.7237548828125, "learning_rate": 6.273516078116218e-06, "loss": 14.394, "step": 238110 }, { "epoch": 0.4810174654670184, "grad_norm": 293.46392822265625, "learning_rate": 6.273178521608564e-06, "loss": 28.5658, "step": 238120 }, { "epoch": 0.4810376660997022, "grad_norm": 436.5867004394531, "learning_rate": 6.272840958895591e-06, "loss": 14.1033, "step": 238130 }, { "epoch": 0.48105786673238604, "grad_norm": 237.17486572265625, "learning_rate": 6.272503389978945e-06, "loss": 30.1534, "step": 238140 }, { "epoch": 0.48107806736506986, "grad_norm": 598.215576171875, "learning_rate": 6.272165814860272e-06, "loss": 27.7402, "step": 238150 }, { "epoch": 0.4810982679977537, "grad_norm": 324.96685791015625, "learning_rate": 6.271828233541218e-06, "loss": 15.9308, "step": 238160 }, { "epoch": 0.4811184686304375, "grad_norm": 179.6580352783203, "learning_rate": 6.271490646023426e-06, "loss": 11.6563, "step": 238170 }, { "epoch": 0.4811386692631213, "grad_norm": 430.6047058105469, "learning_rate": 6.271153052308544e-06, "loss": 19.4031, "step": 238180 }, { "epoch": 0.48115886989580514, "grad_norm": 1718.493896484375, "learning_rate": 6.2708154523982155e-06, "loss": 45.0198, "step": 238190 }, { "epoch": 0.48117907052848896, "grad_norm": 456.6887512207031, "learning_rate": 6.270477846294086e-06, "loss": 20.1995, "step": 238200 }, { "epoch": 0.4811992711611728, "grad_norm": 291.59515380859375, "learning_rate": 6.270140233997803e-06, "loss": 11.6422, "step": 238210 }, { "epoch": 0.4812194717938566, "grad_norm": 400.2921447753906, "learning_rate": 6.269802615511009e-06, "loss": 10.4124, "step": 238220 }, { "epoch": 0.4812396724265404, "grad_norm": 87.04499816894531, "learning_rate": 6.269464990835353e-06, "loss": 11.0158, "step": 238230 }, { "epoch": 0.48125987305922424, "grad_norm": 492.2239990234375, "learning_rate": 6.2691273599724765e-06, "loss": 17.3587, "step": 238240 }, { "epoch": 0.481280073691908, "grad_norm": 536.57666015625, "learning_rate": 6.268789722924029e-06, "loss": 17.8366, "step": 238250 }, { "epoch": 0.4813002743245918, "grad_norm": 226.37255859375, "learning_rate": 6.268452079691654e-06, "loss": 17.9656, "step": 238260 }, { "epoch": 0.48132047495727565, "grad_norm": 184.16017150878906, "learning_rate": 6.268114430276996e-06, "loss": 20.7677, "step": 238270 }, { "epoch": 0.48134067558995947, "grad_norm": 448.76898193359375, "learning_rate": 6.267776774681703e-06, "loss": 25.1282, "step": 238280 }, { "epoch": 0.4813608762226433, "grad_norm": 783.249755859375, "learning_rate": 6.26743911290742e-06, "loss": 18.0381, "step": 238290 }, { "epoch": 0.4813810768553271, "grad_norm": 144.61526489257812, "learning_rate": 6.267101444955792e-06, "loss": 28.28, "step": 238300 }, { "epoch": 0.4814012774880109, "grad_norm": 57.12580490112305, "learning_rate": 6.2667637708284655e-06, "loss": 42.403, "step": 238310 }, { "epoch": 0.48142147812069475, "grad_norm": 272.0799255371094, "learning_rate": 6.266426090527087e-06, "loss": 6.1611, "step": 238320 }, { "epoch": 0.48144167875337857, "grad_norm": 225.3270263671875, "learning_rate": 6.2660884040533e-06, "loss": 54.5982, "step": 238330 }, { "epoch": 0.4814618793860624, "grad_norm": 162.77500915527344, "learning_rate": 6.2657507114087525e-06, "loss": 16.513, "step": 238340 }, { "epoch": 0.4814820800187462, "grad_norm": 381.93902587890625, "learning_rate": 6.265413012595088e-06, "loss": 26.8493, "step": 238350 }, { "epoch": 0.48150228065143, "grad_norm": 395.8524475097656, "learning_rate": 6.265075307613956e-06, "loss": 24.0672, "step": 238360 }, { "epoch": 0.48152248128411385, "grad_norm": 332.8500061035156, "learning_rate": 6.264737596466999e-06, "loss": 33.6149, "step": 238370 }, { "epoch": 0.4815426819167976, "grad_norm": 264.1964416503906, "learning_rate": 6.264399879155865e-06, "loss": 24.1436, "step": 238380 }, { "epoch": 0.48156288254948143, "grad_norm": 519.498779296875, "learning_rate": 6.2640621556822e-06, "loss": 15.3351, "step": 238390 }, { "epoch": 0.48158308318216525, "grad_norm": 542.74365234375, "learning_rate": 6.2637244260476474e-06, "loss": 21.5293, "step": 238400 }, { "epoch": 0.48160328381484907, "grad_norm": 609.6380615234375, "learning_rate": 6.2633866902538564e-06, "loss": 12.4965, "step": 238410 }, { "epoch": 0.4816234844475329, "grad_norm": 614.1182250976562, "learning_rate": 6.263048948302471e-06, "loss": 21.9679, "step": 238420 }, { "epoch": 0.4816436850802167, "grad_norm": 547.7650756835938, "learning_rate": 6.26271120019514e-06, "loss": 24.0661, "step": 238430 }, { "epoch": 0.48166388571290053, "grad_norm": 548.20458984375, "learning_rate": 6.262373445933506e-06, "loss": 14.4791, "step": 238440 }, { "epoch": 0.48168408634558435, "grad_norm": 257.6809997558594, "learning_rate": 6.262035685519218e-06, "loss": 17.3105, "step": 238450 }, { "epoch": 0.48170428697826817, "grad_norm": 262.88385009765625, "learning_rate": 6.261697918953922e-06, "loss": 20.1521, "step": 238460 }, { "epoch": 0.481724487610952, "grad_norm": 409.8096008300781, "learning_rate": 6.261360146239261e-06, "loss": 18.6579, "step": 238470 }, { "epoch": 0.4817446882436358, "grad_norm": 352.5834045410156, "learning_rate": 6.261022367376886e-06, "loss": 14.1261, "step": 238480 }, { "epoch": 0.48176488887631963, "grad_norm": 32.62371826171875, "learning_rate": 6.260684582368439e-06, "loss": 39.7104, "step": 238490 }, { "epoch": 0.4817850895090034, "grad_norm": 437.8738708496094, "learning_rate": 6.26034679121557e-06, "loss": 19.1444, "step": 238500 }, { "epoch": 0.4818052901416872, "grad_norm": 340.09320068359375, "learning_rate": 6.260008993919922e-06, "loss": 15.8661, "step": 238510 }, { "epoch": 0.48182549077437103, "grad_norm": 305.18121337890625, "learning_rate": 6.259671190483143e-06, "loss": 13.8897, "step": 238520 }, { "epoch": 0.48184569140705485, "grad_norm": 117.7383804321289, "learning_rate": 6.25933338090688e-06, "loss": 15.7199, "step": 238530 }, { "epoch": 0.4818658920397387, "grad_norm": 441.84149169921875, "learning_rate": 6.258995565192779e-06, "loss": 16.5689, "step": 238540 }, { "epoch": 0.4818860926724225, "grad_norm": 120.50872039794922, "learning_rate": 6.258657743342486e-06, "loss": 18.1548, "step": 238550 }, { "epoch": 0.4819062933051063, "grad_norm": 168.04864501953125, "learning_rate": 6.258319915357648e-06, "loss": 18.8593, "step": 238560 }, { "epoch": 0.48192649393779013, "grad_norm": 27.290313720703125, "learning_rate": 6.257982081239912e-06, "loss": 15.7458, "step": 238570 }, { "epoch": 0.48194669457047395, "grad_norm": 677.3406982421875, "learning_rate": 6.257644240990923e-06, "loss": 21.2616, "step": 238580 }, { "epoch": 0.4819668952031578, "grad_norm": 853.0466918945312, "learning_rate": 6.257306394612328e-06, "loss": 30.1345, "step": 238590 }, { "epoch": 0.4819870958358416, "grad_norm": 482.6874084472656, "learning_rate": 6.256968542105775e-06, "loss": 19.709, "step": 238600 }, { "epoch": 0.4820072964685254, "grad_norm": 713.67578125, "learning_rate": 6.2566306834729095e-06, "loss": 29.0597, "step": 238610 }, { "epoch": 0.48202749710120923, "grad_norm": 72.65042114257812, "learning_rate": 6.256292818715378e-06, "loss": 29.3256, "step": 238620 }, { "epoch": 0.482047697733893, "grad_norm": 707.7577514648438, "learning_rate": 6.255954947834831e-06, "loss": 13.887, "step": 238630 }, { "epoch": 0.4820678983665768, "grad_norm": 587.9662475585938, "learning_rate": 6.255617070832908e-06, "loss": 22.2096, "step": 238640 }, { "epoch": 0.48208809899926064, "grad_norm": 1038.5172119140625, "learning_rate": 6.25527918771126e-06, "loss": 42.1712, "step": 238650 }, { "epoch": 0.48210829963194446, "grad_norm": 178.3859100341797, "learning_rate": 6.254941298471535e-06, "loss": 20.0805, "step": 238660 }, { "epoch": 0.4821285002646283, "grad_norm": 423.32806396484375, "learning_rate": 6.254603403115377e-06, "loss": 25.6779, "step": 238670 }, { "epoch": 0.4821487008973121, "grad_norm": 209.841552734375, "learning_rate": 6.254265501644435e-06, "loss": 16.6777, "step": 238680 }, { "epoch": 0.4821689015299959, "grad_norm": 612.9212036132812, "learning_rate": 6.253927594060354e-06, "loss": 32.2635, "step": 238690 }, { "epoch": 0.48218910216267974, "grad_norm": 688.3139038085938, "learning_rate": 6.2535896803647845e-06, "loss": 15.5334, "step": 238700 }, { "epoch": 0.48220930279536356, "grad_norm": 152.21890258789062, "learning_rate": 6.253251760559369e-06, "loss": 18.4623, "step": 238710 }, { "epoch": 0.4822295034280474, "grad_norm": 249.14146423339844, "learning_rate": 6.252913834645757e-06, "loss": 32.7701, "step": 238720 }, { "epoch": 0.4822497040607312, "grad_norm": 576.8262329101562, "learning_rate": 6.252575902625595e-06, "loss": 17.6127, "step": 238730 }, { "epoch": 0.482269904693415, "grad_norm": 291.13018798828125, "learning_rate": 6.25223796450053e-06, "loss": 10.3005, "step": 238740 }, { "epoch": 0.48229010532609884, "grad_norm": 155.48092651367188, "learning_rate": 6.251900020272208e-06, "loss": 17.9134, "step": 238750 }, { "epoch": 0.4823103059587826, "grad_norm": 942.543212890625, "learning_rate": 6.2515620699422775e-06, "loss": 31.9345, "step": 238760 }, { "epoch": 0.4823305065914664, "grad_norm": 1022.882080078125, "learning_rate": 6.2512241135123856e-06, "loss": 22.4983, "step": 238770 }, { "epoch": 0.48235070722415024, "grad_norm": 345.819091796875, "learning_rate": 6.250886150984179e-06, "loss": 18.4888, "step": 238780 }, { "epoch": 0.48237090785683406, "grad_norm": 420.4615783691406, "learning_rate": 6.2505481823593065e-06, "loss": 21.7047, "step": 238790 }, { "epoch": 0.4823911084895179, "grad_norm": 175.2515869140625, "learning_rate": 6.250210207639411e-06, "loss": 18.4462, "step": 238800 }, { "epoch": 0.4824113091222017, "grad_norm": 582.9957275390625, "learning_rate": 6.249872226826145e-06, "loss": 13.5178, "step": 238810 }, { "epoch": 0.4824315097548855, "grad_norm": 409.69757080078125, "learning_rate": 6.249534239921154e-06, "loss": 20.5632, "step": 238820 }, { "epoch": 0.48245171038756934, "grad_norm": 496.896240234375, "learning_rate": 6.24919624692608e-06, "loss": 20.3248, "step": 238830 }, { "epoch": 0.48247191102025316, "grad_norm": 838.0579223632812, "learning_rate": 6.2488582478425795e-06, "loss": 29.6529, "step": 238840 }, { "epoch": 0.482492111652937, "grad_norm": 346.7917175292969, "learning_rate": 6.2485202426722925e-06, "loss": 38.4679, "step": 238850 }, { "epoch": 0.4825123122856208, "grad_norm": 212.93829345703125, "learning_rate": 6.248182231416872e-06, "loss": 13.1648, "step": 238860 }, { "epoch": 0.4825325129183046, "grad_norm": 1920.0400390625, "learning_rate": 6.247844214077962e-06, "loss": 33.3601, "step": 238870 }, { "epoch": 0.48255271355098844, "grad_norm": 658.6197509765625, "learning_rate": 6.247506190657209e-06, "loss": 24.9037, "step": 238880 }, { "epoch": 0.4825729141836722, "grad_norm": 281.5344543457031, "learning_rate": 6.247168161156264e-06, "loss": 20.6129, "step": 238890 }, { "epoch": 0.48259311481635603, "grad_norm": 356.06890869140625, "learning_rate": 6.24683012557677e-06, "loss": 14.7911, "step": 238900 }, { "epoch": 0.48261331544903985, "grad_norm": 580.4341430664062, "learning_rate": 6.2464920839203805e-06, "loss": 22.8333, "step": 238910 }, { "epoch": 0.48263351608172367, "grad_norm": 341.4069519042969, "learning_rate": 6.2461540361887386e-06, "loss": 11.302, "step": 238920 }, { "epoch": 0.4826537167144075, "grad_norm": 412.54620361328125, "learning_rate": 6.245815982383492e-06, "loss": 24.5794, "step": 238930 }, { "epoch": 0.4826739173470913, "grad_norm": 245.6103515625, "learning_rate": 6.24547792250629e-06, "loss": 18.9905, "step": 238940 }, { "epoch": 0.48269411797977513, "grad_norm": 530.7523193359375, "learning_rate": 6.24513985655878e-06, "loss": 29.9537, "step": 238950 }, { "epoch": 0.48271431861245895, "grad_norm": 271.4397888183594, "learning_rate": 6.244801784542609e-06, "loss": 25.8615, "step": 238960 }, { "epoch": 0.48273451924514277, "grad_norm": 275.6568603515625, "learning_rate": 6.244463706459426e-06, "loss": 16.2746, "step": 238970 }, { "epoch": 0.4827547198778266, "grad_norm": 192.43406677246094, "learning_rate": 6.244125622310877e-06, "loss": 17.3146, "step": 238980 }, { "epoch": 0.4827749205105104, "grad_norm": 382.0212097167969, "learning_rate": 6.243787532098611e-06, "loss": 15.6681, "step": 238990 }, { "epoch": 0.48279512114319423, "grad_norm": 333.97064208984375, "learning_rate": 6.243449435824276e-06, "loss": 16.0727, "step": 239000 }, { "epoch": 0.48281532177587805, "grad_norm": 186.2528533935547, "learning_rate": 6.243111333489516e-06, "loss": 5.797, "step": 239010 }, { "epoch": 0.4828355224085618, "grad_norm": 275.0234680175781, "learning_rate": 6.242773225095986e-06, "loss": 15.1636, "step": 239020 }, { "epoch": 0.48285572304124563, "grad_norm": 2234.659423828125, "learning_rate": 6.242435110645328e-06, "loss": 30.0221, "step": 239030 }, { "epoch": 0.48287592367392945, "grad_norm": 297.8598937988281, "learning_rate": 6.242096990139192e-06, "loss": 34.7394, "step": 239040 }, { "epoch": 0.4828961243066133, "grad_norm": 379.6488952636719, "learning_rate": 6.241758863579227e-06, "loss": 38.5201, "step": 239050 }, { "epoch": 0.4829163249392971, "grad_norm": 465.7540283203125, "learning_rate": 6.241420730967079e-06, "loss": 21.9894, "step": 239060 }, { "epoch": 0.4829365255719809, "grad_norm": 590.3614501953125, "learning_rate": 6.241082592304398e-06, "loss": 18.3803, "step": 239070 }, { "epoch": 0.48295672620466473, "grad_norm": 251.89111328125, "learning_rate": 6.24074444759283e-06, "loss": 18.2033, "step": 239080 }, { "epoch": 0.48297692683734855, "grad_norm": 609.8746337890625, "learning_rate": 6.240406296834024e-06, "loss": 26.0109, "step": 239090 }, { "epoch": 0.4829971274700324, "grad_norm": 150.3726806640625, "learning_rate": 6.240068140029628e-06, "loss": 18.8098, "step": 239100 }, { "epoch": 0.4830173281027162, "grad_norm": 306.13739013671875, "learning_rate": 6.2397299771812925e-06, "loss": 22.8141, "step": 239110 }, { "epoch": 0.4830375287354, "grad_norm": 483.5748596191406, "learning_rate": 6.23939180829066e-06, "loss": 17.9819, "step": 239120 }, { "epoch": 0.48305772936808383, "grad_norm": 285.5124206542969, "learning_rate": 6.239053633359384e-06, "loss": 12.5764, "step": 239130 }, { "epoch": 0.4830779300007676, "grad_norm": 551.8235473632812, "learning_rate": 6.2387154523891115e-06, "loss": 20.5377, "step": 239140 }, { "epoch": 0.4830981306334514, "grad_norm": 1192.864013671875, "learning_rate": 6.238377265381489e-06, "loss": 33.758, "step": 239150 }, { "epoch": 0.48311833126613524, "grad_norm": 126.6272201538086, "learning_rate": 6.2380390723381666e-06, "loss": 11.3716, "step": 239160 }, { "epoch": 0.48313853189881906, "grad_norm": 402.6373291015625, "learning_rate": 6.23770087326079e-06, "loss": 14.0015, "step": 239170 }, { "epoch": 0.4831587325315029, "grad_norm": 408.8807067871094, "learning_rate": 6.237362668151013e-06, "loss": 20.2099, "step": 239180 }, { "epoch": 0.4831789331641867, "grad_norm": 1.8079912662506104, "learning_rate": 6.237024457010478e-06, "loss": 21.0077, "step": 239190 }, { "epoch": 0.4831991337968705, "grad_norm": 341.6095275878906, "learning_rate": 6.236686239840836e-06, "loss": 22.3597, "step": 239200 }, { "epoch": 0.48321933442955434, "grad_norm": 251.46356201171875, "learning_rate": 6.236348016643735e-06, "loss": 19.5792, "step": 239210 }, { "epoch": 0.48323953506223816, "grad_norm": 217.58238220214844, "learning_rate": 6.236009787420824e-06, "loss": 19.7335, "step": 239220 }, { "epoch": 0.483259735694922, "grad_norm": 21.656570434570312, "learning_rate": 6.235671552173752e-06, "loss": 18.0882, "step": 239230 }, { "epoch": 0.4832799363276058, "grad_norm": 303.45770263671875, "learning_rate": 6.2353333109041655e-06, "loss": 14.4671, "step": 239240 }, { "epoch": 0.4833001369602896, "grad_norm": 430.2806701660156, "learning_rate": 6.234995063613716e-06, "loss": 17.2651, "step": 239250 }, { "epoch": 0.48332033759297344, "grad_norm": 926.8755493164062, "learning_rate": 6.234656810304048e-06, "loss": 26.6614, "step": 239260 }, { "epoch": 0.4833405382256572, "grad_norm": 159.474853515625, "learning_rate": 6.234318550976815e-06, "loss": 17.417, "step": 239270 }, { "epoch": 0.483360738858341, "grad_norm": 194.66876220703125, "learning_rate": 6.233980285633661e-06, "loss": 19.7228, "step": 239280 }, { "epoch": 0.48338093949102484, "grad_norm": 384.2812194824219, "learning_rate": 6.233642014276238e-06, "loss": 20.2494, "step": 239290 }, { "epoch": 0.48340114012370866, "grad_norm": 193.2677001953125, "learning_rate": 6.233303736906193e-06, "loss": 24.9815, "step": 239300 }, { "epoch": 0.4834213407563925, "grad_norm": 496.50653076171875, "learning_rate": 6.232965453525175e-06, "loss": 21.5377, "step": 239310 }, { "epoch": 0.4834415413890763, "grad_norm": 177.0562286376953, "learning_rate": 6.2326271641348325e-06, "loss": 13.9061, "step": 239320 }, { "epoch": 0.4834617420217601, "grad_norm": 369.60211181640625, "learning_rate": 6.232288868736816e-06, "loss": 11.1647, "step": 239330 }, { "epoch": 0.48348194265444394, "grad_norm": 730.1735229492188, "learning_rate": 6.231950567332773e-06, "loss": 18.1089, "step": 239340 }, { "epoch": 0.48350214328712776, "grad_norm": 169.8363494873047, "learning_rate": 6.231612259924351e-06, "loss": 10.8247, "step": 239350 }, { "epoch": 0.4835223439198116, "grad_norm": 234.20614624023438, "learning_rate": 6.231273946513201e-06, "loss": 15.2445, "step": 239360 }, { "epoch": 0.4835425445524954, "grad_norm": 383.68255615234375, "learning_rate": 6.23093562710097e-06, "loss": 40.6892, "step": 239370 }, { "epoch": 0.4835627451851792, "grad_norm": 396.31842041015625, "learning_rate": 6.23059730168931e-06, "loss": 26.1263, "step": 239380 }, { "epoch": 0.48358294581786304, "grad_norm": 251.06167602539062, "learning_rate": 6.230258970279867e-06, "loss": 10.0616, "step": 239390 }, { "epoch": 0.4836031464505468, "grad_norm": 24.388273239135742, "learning_rate": 6.229920632874291e-06, "loss": 26.5303, "step": 239400 }, { "epoch": 0.4836233470832306, "grad_norm": 123.3339614868164, "learning_rate": 6.229582289474231e-06, "loss": 23.5905, "step": 239410 }, { "epoch": 0.48364354771591445, "grad_norm": 373.2325134277344, "learning_rate": 6.229243940081336e-06, "loss": 28.0521, "step": 239420 }, { "epoch": 0.48366374834859827, "grad_norm": 304.2738952636719, "learning_rate": 6.228905584697254e-06, "loss": 21.776, "step": 239430 }, { "epoch": 0.4836839489812821, "grad_norm": 659.3280029296875, "learning_rate": 6.228567223323637e-06, "loss": 17.784, "step": 239440 }, { "epoch": 0.4837041496139659, "grad_norm": 370.91400146484375, "learning_rate": 6.228228855962133e-06, "loss": 15.5512, "step": 239450 }, { "epoch": 0.4837243502466497, "grad_norm": 281.05889892578125, "learning_rate": 6.227890482614388e-06, "loss": 27.7278, "step": 239460 }, { "epoch": 0.48374455087933355, "grad_norm": 1949.37158203125, "learning_rate": 6.227552103282056e-06, "loss": 39.1714, "step": 239470 }, { "epoch": 0.48376475151201737, "grad_norm": 98.6017074584961, "learning_rate": 6.227213717966782e-06, "loss": 18.2789, "step": 239480 }, { "epoch": 0.4837849521447012, "grad_norm": 112.09862518310547, "learning_rate": 6.226875326670218e-06, "loss": 9.7645, "step": 239490 }, { "epoch": 0.483805152777385, "grad_norm": 549.0416259765625, "learning_rate": 6.2265369293940135e-06, "loss": 20.0345, "step": 239500 }, { "epoch": 0.4838253534100688, "grad_norm": 436.4242858886719, "learning_rate": 6.226198526139815e-06, "loss": 23.6544, "step": 239510 }, { "epoch": 0.48384555404275265, "grad_norm": 631.5547485351562, "learning_rate": 6.225860116909276e-06, "loss": 17.9584, "step": 239520 }, { "epoch": 0.4838657546754364, "grad_norm": 726.14794921875, "learning_rate": 6.225521701704042e-06, "loss": 21.1192, "step": 239530 }, { "epoch": 0.48388595530812023, "grad_norm": 714.0602416992188, "learning_rate": 6.225183280525763e-06, "loss": 22.2412, "step": 239540 }, { "epoch": 0.48390615594080405, "grad_norm": 56.499000549316406, "learning_rate": 6.2248448533760895e-06, "loss": 16.5303, "step": 239550 }, { "epoch": 0.48392635657348787, "grad_norm": 0.0, "learning_rate": 6.224506420256673e-06, "loss": 16.9147, "step": 239560 }, { "epoch": 0.4839465572061717, "grad_norm": 402.64794921875, "learning_rate": 6.2241679811691595e-06, "loss": 16.0278, "step": 239570 }, { "epoch": 0.4839667578388555, "grad_norm": 761.3997192382812, "learning_rate": 6.223829536115198e-06, "loss": 17.4052, "step": 239580 }, { "epoch": 0.48398695847153933, "grad_norm": 267.6705627441406, "learning_rate": 6.223491085096441e-06, "loss": 19.644, "step": 239590 }, { "epoch": 0.48400715910422315, "grad_norm": 468.30657958984375, "learning_rate": 6.223152628114537e-06, "loss": 13.8627, "step": 239600 }, { "epoch": 0.48402735973690697, "grad_norm": 818.8458862304688, "learning_rate": 6.222814165171136e-06, "loss": 28.966, "step": 239610 }, { "epoch": 0.4840475603695908, "grad_norm": 420.2396240234375, "learning_rate": 6.222475696267885e-06, "loss": 14.0035, "step": 239620 }, { "epoch": 0.4840677610022746, "grad_norm": 150.33786010742188, "learning_rate": 6.222137221406439e-06, "loss": 15.3682, "step": 239630 }, { "epoch": 0.48408796163495843, "grad_norm": 581.35302734375, "learning_rate": 6.221798740588442e-06, "loss": 31.8241, "step": 239640 }, { "epoch": 0.48410816226764225, "grad_norm": 177.21458435058594, "learning_rate": 6.221460253815546e-06, "loss": 14.8487, "step": 239650 }, { "epoch": 0.484128362900326, "grad_norm": 217.68978881835938, "learning_rate": 6.221121761089402e-06, "loss": 23.5491, "step": 239660 }, { "epoch": 0.48414856353300983, "grad_norm": 184.8630828857422, "learning_rate": 6.220783262411658e-06, "loss": 10.7315, "step": 239670 }, { "epoch": 0.48416876416569365, "grad_norm": 1146.76904296875, "learning_rate": 6.220444757783966e-06, "loss": 21.2471, "step": 239680 }, { "epoch": 0.4841889647983775, "grad_norm": 361.0469665527344, "learning_rate": 6.220106247207972e-06, "loss": 17.7106, "step": 239690 }, { "epoch": 0.4842091654310613, "grad_norm": 628.46337890625, "learning_rate": 6.219767730685329e-06, "loss": 25.8659, "step": 239700 }, { "epoch": 0.4842293660637451, "grad_norm": 135.90708923339844, "learning_rate": 6.219429208217685e-06, "loss": 15.7509, "step": 239710 }, { "epoch": 0.48424956669642893, "grad_norm": 542.4644165039062, "learning_rate": 6.219090679806694e-06, "loss": 26.3743, "step": 239720 }, { "epoch": 0.48426976732911275, "grad_norm": 179.53170776367188, "learning_rate": 6.218752145453999e-06, "loss": 26.2833, "step": 239730 }, { "epoch": 0.4842899679617966, "grad_norm": 508.2477722167969, "learning_rate": 6.218413605161258e-06, "loss": 21.0299, "step": 239740 }, { "epoch": 0.4843101685944804, "grad_norm": 378.9328918457031, "learning_rate": 6.218075058930113e-06, "loss": 10.2118, "step": 239750 }, { "epoch": 0.4843303692271642, "grad_norm": 445.72991943359375, "learning_rate": 6.217736506762219e-06, "loss": 24.4518, "step": 239760 }, { "epoch": 0.48435056985984803, "grad_norm": 657.2543334960938, "learning_rate": 6.217397948659228e-06, "loss": 29.7278, "step": 239770 }, { "epoch": 0.4843707704925318, "grad_norm": 297.7481689453125, "learning_rate": 6.217059384622782e-06, "loss": 14.4522, "step": 239780 }, { "epoch": 0.4843909711252156, "grad_norm": 375.36016845703125, "learning_rate": 6.21672081465454e-06, "loss": 19.0674, "step": 239790 }, { "epoch": 0.48441117175789944, "grad_norm": 203.6333770751953, "learning_rate": 6.216382238756147e-06, "loss": 14.1113, "step": 239800 }, { "epoch": 0.48443137239058326, "grad_norm": 638.8234252929688, "learning_rate": 6.216043656929254e-06, "loss": 25.8044, "step": 239810 }, { "epoch": 0.4844515730232671, "grad_norm": 379.4268798828125, "learning_rate": 6.215705069175513e-06, "loss": 29.2119, "step": 239820 }, { "epoch": 0.4844717736559509, "grad_norm": 475.16302490234375, "learning_rate": 6.215366475496572e-06, "loss": 25.8795, "step": 239830 }, { "epoch": 0.4844919742886347, "grad_norm": 366.3997802734375, "learning_rate": 6.215027875894082e-06, "loss": 15.6054, "step": 239840 }, { "epoch": 0.48451217492131854, "grad_norm": 191.73252868652344, "learning_rate": 6.214689270369694e-06, "loss": 14.7612, "step": 239850 }, { "epoch": 0.48453237555400236, "grad_norm": 343.63763427734375, "learning_rate": 6.214350658925058e-06, "loss": 8.4783, "step": 239860 }, { "epoch": 0.4845525761866862, "grad_norm": 418.5944519042969, "learning_rate": 6.2140120415618235e-06, "loss": 24.3871, "step": 239870 }, { "epoch": 0.48457277681937, "grad_norm": 386.5837097167969, "learning_rate": 6.213673418281643e-06, "loss": 18.3405, "step": 239880 }, { "epoch": 0.4845929774520538, "grad_norm": 754.9140014648438, "learning_rate": 6.213334789086163e-06, "loss": 22.8767, "step": 239890 }, { "epoch": 0.48461317808473764, "grad_norm": 166.3883056640625, "learning_rate": 6.212996153977038e-06, "loss": 14.9799, "step": 239900 }, { "epoch": 0.4846333787174214, "grad_norm": 257.137939453125, "learning_rate": 6.212657512955916e-06, "loss": 17.5402, "step": 239910 }, { "epoch": 0.4846535793501052, "grad_norm": 306.62982177734375, "learning_rate": 6.212318866024449e-06, "loss": 22.9998, "step": 239920 }, { "epoch": 0.48467377998278904, "grad_norm": 242.8882293701172, "learning_rate": 6.211980213184287e-06, "loss": 18.9199, "step": 239930 }, { "epoch": 0.48469398061547286, "grad_norm": 327.93304443359375, "learning_rate": 6.21164155443708e-06, "loss": 9.0374, "step": 239940 }, { "epoch": 0.4847141812481567, "grad_norm": 179.9681854248047, "learning_rate": 6.21130288978448e-06, "loss": 16.0011, "step": 239950 }, { "epoch": 0.4847343818808405, "grad_norm": 571.282958984375, "learning_rate": 6.210964219228135e-06, "loss": 27.4455, "step": 239960 }, { "epoch": 0.4847545825135243, "grad_norm": 372.53173828125, "learning_rate": 6.2106255427697e-06, "loss": 32.171, "step": 239970 }, { "epoch": 0.48477478314620814, "grad_norm": 132.0978240966797, "learning_rate": 6.21028686041082e-06, "loss": 25.9145, "step": 239980 }, { "epoch": 0.48479498377889196, "grad_norm": 34.037010192871094, "learning_rate": 6.20994817215315e-06, "loss": 9.1799, "step": 239990 }, { "epoch": 0.4848151844115758, "grad_norm": 269.9684753417969, "learning_rate": 6.209609477998339e-06, "loss": 14.7286, "step": 240000 }, { "epoch": 0.4848353850442596, "grad_norm": 61.488460540771484, "learning_rate": 6.209270777948038e-06, "loss": 9.3359, "step": 240010 }, { "epoch": 0.4848555856769434, "grad_norm": 1023.6925048828125, "learning_rate": 6.208932072003899e-06, "loss": 26.4617, "step": 240020 }, { "epoch": 0.48487578630962724, "grad_norm": 127.65845489501953, "learning_rate": 6.208593360167571e-06, "loss": 16.4814, "step": 240030 }, { "epoch": 0.484895986942311, "grad_norm": 337.93463134765625, "learning_rate": 6.208254642440705e-06, "loss": 21.1301, "step": 240040 }, { "epoch": 0.48491618757499483, "grad_norm": 431.2547607421875, "learning_rate": 6.207915918824952e-06, "loss": 29.0501, "step": 240050 }, { "epoch": 0.48493638820767865, "grad_norm": 950.545166015625, "learning_rate": 6.207577189321965e-06, "loss": 20.9198, "step": 240060 }, { "epoch": 0.48495658884036247, "grad_norm": 599.9775390625, "learning_rate": 6.2072384539333914e-06, "loss": 27.4738, "step": 240070 }, { "epoch": 0.4849767894730463, "grad_norm": 252.0543975830078, "learning_rate": 6.206899712660887e-06, "loss": 26.734, "step": 240080 }, { "epoch": 0.4849969901057301, "grad_norm": 203.58958435058594, "learning_rate": 6.206560965506097e-06, "loss": 21.193, "step": 240090 }, { "epoch": 0.48501719073841393, "grad_norm": 541.8986206054688, "learning_rate": 6.206222212470675e-06, "loss": 20.3465, "step": 240100 }, { "epoch": 0.48503739137109775, "grad_norm": 22.384321212768555, "learning_rate": 6.205883453556274e-06, "loss": 26.7709, "step": 240110 }, { "epoch": 0.48505759200378157, "grad_norm": 559.81982421875, "learning_rate": 6.205544688764542e-06, "loss": 27.0594, "step": 240120 }, { "epoch": 0.4850777926364654, "grad_norm": 466.8587951660156, "learning_rate": 6.205205918097133e-06, "loss": 21.3222, "step": 240130 }, { "epoch": 0.4850979932691492, "grad_norm": 707.7942504882812, "learning_rate": 6.204867141555695e-06, "loss": 17.0808, "step": 240140 }, { "epoch": 0.48511819390183303, "grad_norm": 265.2966613769531, "learning_rate": 6.20452835914188e-06, "loss": 28.1372, "step": 240150 }, { "epoch": 0.48513839453451685, "grad_norm": 248.39361572265625, "learning_rate": 6.204189570857342e-06, "loss": 26.3115, "step": 240160 }, { "epoch": 0.4851585951672006, "grad_norm": 284.2188720703125, "learning_rate": 6.2038507767037295e-06, "loss": 19.0415, "step": 240170 }, { "epoch": 0.48517879579988443, "grad_norm": 466.6912536621094, "learning_rate": 6.2035119766826935e-06, "loss": 16.9674, "step": 240180 }, { "epoch": 0.48519899643256825, "grad_norm": 381.26611328125, "learning_rate": 6.203173170795887e-06, "loss": 19.6884, "step": 240190 }, { "epoch": 0.4852191970652521, "grad_norm": 532.6866455078125, "learning_rate": 6.202834359044959e-06, "loss": 17.3738, "step": 240200 }, { "epoch": 0.4852393976979359, "grad_norm": 239.88803100585938, "learning_rate": 6.2024955414315634e-06, "loss": 13.0791, "step": 240210 }, { "epoch": 0.4852595983306197, "grad_norm": 295.5515441894531, "learning_rate": 6.202156717957351e-06, "loss": 26.7166, "step": 240220 }, { "epoch": 0.48527979896330353, "grad_norm": 314.4953918457031, "learning_rate": 6.2018178886239695e-06, "loss": 23.1536, "step": 240230 }, { "epoch": 0.48529999959598735, "grad_norm": 420.4693908691406, "learning_rate": 6.201479053433077e-06, "loss": 17.1124, "step": 240240 }, { "epoch": 0.4853202002286712, "grad_norm": 90.6326904296875, "learning_rate": 6.2011402123863194e-06, "loss": 25.9831, "step": 240250 }, { "epoch": 0.485340400861355, "grad_norm": 519.2074584960938, "learning_rate": 6.2008013654853505e-06, "loss": 19.4891, "step": 240260 }, { "epoch": 0.4853606014940388, "grad_norm": 337.56329345703125, "learning_rate": 6.200462512731821e-06, "loss": 14.9792, "step": 240270 }, { "epoch": 0.48538080212672263, "grad_norm": 106.90280151367188, "learning_rate": 6.200123654127383e-06, "loss": 22.7102, "step": 240280 }, { "epoch": 0.4854010027594064, "grad_norm": 149.78073120117188, "learning_rate": 6.199784789673689e-06, "loss": 23.2156, "step": 240290 }, { "epoch": 0.4854212033920902, "grad_norm": 868.6480712890625, "learning_rate": 6.199445919372388e-06, "loss": 43.4462, "step": 240300 }, { "epoch": 0.48544140402477404, "grad_norm": 400.4078063964844, "learning_rate": 6.199107043225134e-06, "loss": 12.8822, "step": 240310 }, { "epoch": 0.48546160465745786, "grad_norm": 384.9847717285156, "learning_rate": 6.198768161233577e-06, "loss": 24.6104, "step": 240320 }, { "epoch": 0.4854818052901417, "grad_norm": 174.50941467285156, "learning_rate": 6.19842927339937e-06, "loss": 24.4269, "step": 240330 }, { "epoch": 0.4855020059228255, "grad_norm": 378.19415283203125, "learning_rate": 6.198090379724163e-06, "loss": 7.8148, "step": 240340 }, { "epoch": 0.4855222065555093, "grad_norm": 239.51068115234375, "learning_rate": 6.1977514802096105e-06, "loss": 10.5392, "step": 240350 }, { "epoch": 0.48554240718819314, "grad_norm": 537.36328125, "learning_rate": 6.197412574857361e-06, "loss": 17.7848, "step": 240360 }, { "epoch": 0.48556260782087696, "grad_norm": 346.7205505371094, "learning_rate": 6.197073663669069e-06, "loss": 14.8186, "step": 240370 }, { "epoch": 0.4855828084535608, "grad_norm": 267.4868469238281, "learning_rate": 6.196734746646384e-06, "loss": 26.4634, "step": 240380 }, { "epoch": 0.4856030090862446, "grad_norm": 225.48048400878906, "learning_rate": 6.19639582379096e-06, "loss": 10.4006, "step": 240390 }, { "epoch": 0.4856232097189284, "grad_norm": 335.59796142578125, "learning_rate": 6.1960568951044475e-06, "loss": 20.1079, "step": 240400 }, { "epoch": 0.48564341035161224, "grad_norm": 686.9068603515625, "learning_rate": 6.195717960588499e-06, "loss": 17.1123, "step": 240410 }, { "epoch": 0.485663610984296, "grad_norm": 449.8243103027344, "learning_rate": 6.195379020244765e-06, "loss": 16.0279, "step": 240420 }, { "epoch": 0.4856838116169798, "grad_norm": 833.7225952148438, "learning_rate": 6.195040074074899e-06, "loss": 29.0912, "step": 240430 }, { "epoch": 0.48570401224966364, "grad_norm": 206.11856079101562, "learning_rate": 6.1947011220805535e-06, "loss": 9.9603, "step": 240440 }, { "epoch": 0.48572421288234746, "grad_norm": 652.8059692382812, "learning_rate": 6.19436216426338e-06, "loss": 23.0246, "step": 240450 }, { "epoch": 0.4857444135150313, "grad_norm": 392.7476806640625, "learning_rate": 6.194023200625029e-06, "loss": 11.9411, "step": 240460 }, { "epoch": 0.4857646141477151, "grad_norm": 333.5895690917969, "learning_rate": 6.193684231167154e-06, "loss": 21.2221, "step": 240470 }, { "epoch": 0.4857848147803989, "grad_norm": 475.376708984375, "learning_rate": 6.193345255891407e-06, "loss": 22.862, "step": 240480 }, { "epoch": 0.48580501541308274, "grad_norm": 651.0830688476562, "learning_rate": 6.19300627479944e-06, "loss": 17.0758, "step": 240490 }, { "epoch": 0.48582521604576656, "grad_norm": 192.7631072998047, "learning_rate": 6.192667287892905e-06, "loss": 28.367, "step": 240500 }, { "epoch": 0.4858454166784504, "grad_norm": 296.6648254394531, "learning_rate": 6.192328295173455e-06, "loss": 18.921, "step": 240510 }, { "epoch": 0.4858656173111342, "grad_norm": 175.9440460205078, "learning_rate": 6.191989296642741e-06, "loss": 34.9542, "step": 240520 }, { "epoch": 0.485885817943818, "grad_norm": 734.977783203125, "learning_rate": 6.1916502923024145e-06, "loss": 29.6474, "step": 240530 }, { "epoch": 0.48590601857650184, "grad_norm": 586.5077514648438, "learning_rate": 6.191311282154131e-06, "loss": 26.4853, "step": 240540 }, { "epoch": 0.4859262192091856, "grad_norm": 535.6624145507812, "learning_rate": 6.1909722661995394e-06, "loss": 14.4432, "step": 240550 }, { "epoch": 0.4859464198418694, "grad_norm": 276.8780212402344, "learning_rate": 6.190633244440295e-06, "loss": 11.2932, "step": 240560 }, { "epoch": 0.48596662047455325, "grad_norm": 98.0551528930664, "learning_rate": 6.190294216878045e-06, "loss": 11.7655, "step": 240570 }, { "epoch": 0.48598682110723707, "grad_norm": 275.4180908203125, "learning_rate": 6.189955183514449e-06, "loss": 19.0767, "step": 240580 }, { "epoch": 0.4860070217399209, "grad_norm": 438.4539489746094, "learning_rate": 6.1896161443511546e-06, "loss": 28.8467, "step": 240590 }, { "epoch": 0.4860272223726047, "grad_norm": 488.70843505859375, "learning_rate": 6.189277099389816e-06, "loss": 31.3845, "step": 240600 }, { "epoch": 0.4860474230052885, "grad_norm": 647.655029296875, "learning_rate": 6.188938048632084e-06, "loss": 23.5975, "step": 240610 }, { "epoch": 0.48606762363797235, "grad_norm": 404.51593017578125, "learning_rate": 6.188598992079613e-06, "loss": 15.501, "step": 240620 }, { "epoch": 0.48608782427065617, "grad_norm": 723.1797485351562, "learning_rate": 6.188259929734054e-06, "loss": 29.2327, "step": 240630 }, { "epoch": 0.48610802490334, "grad_norm": 179.34674072265625, "learning_rate": 6.187920861597061e-06, "loss": 27.3343, "step": 240640 }, { "epoch": 0.4861282255360238, "grad_norm": 1395.5333251953125, "learning_rate": 6.187581787670285e-06, "loss": 24.1373, "step": 240650 }, { "epoch": 0.4861484261687076, "grad_norm": 304.010009765625, "learning_rate": 6.18724270795538e-06, "loss": 17.1675, "step": 240660 }, { "epoch": 0.48616862680139145, "grad_norm": 1029.99658203125, "learning_rate": 6.186903622453998e-06, "loss": 26.6351, "step": 240670 }, { "epoch": 0.4861888274340752, "grad_norm": 5.324008941650391, "learning_rate": 6.18656453116779e-06, "loss": 17.6654, "step": 240680 }, { "epoch": 0.48620902806675903, "grad_norm": 240.3070068359375, "learning_rate": 6.186225434098413e-06, "loss": 38.494, "step": 240690 }, { "epoch": 0.48622922869944285, "grad_norm": 137.70303344726562, "learning_rate": 6.185886331247516e-06, "loss": 11.0379, "step": 240700 }, { "epoch": 0.48624942933212667, "grad_norm": 0.020675495266914368, "learning_rate": 6.1855472226167525e-06, "loss": 14.5225, "step": 240710 }, { "epoch": 0.4862696299648105, "grad_norm": 137.741943359375, "learning_rate": 6.185208108207776e-06, "loss": 10.1962, "step": 240720 }, { "epoch": 0.4862898305974943, "grad_norm": 576.734375, "learning_rate": 6.184868988022238e-06, "loss": 32.971, "step": 240730 }, { "epoch": 0.48631003123017813, "grad_norm": 975.5730590820312, "learning_rate": 6.184529862061794e-06, "loss": 31.4935, "step": 240740 }, { "epoch": 0.48633023186286195, "grad_norm": 686.66259765625, "learning_rate": 6.184190730328095e-06, "loss": 15.9249, "step": 240750 }, { "epoch": 0.48635043249554577, "grad_norm": 419.7538146972656, "learning_rate": 6.1838515928227925e-06, "loss": 36.5501, "step": 240760 }, { "epoch": 0.4863706331282296, "grad_norm": 249.2308807373047, "learning_rate": 6.1835124495475415e-06, "loss": 28.6281, "step": 240770 }, { "epoch": 0.4863908337609134, "grad_norm": 269.2325744628906, "learning_rate": 6.183173300503995e-06, "loss": 14.3527, "step": 240780 }, { "epoch": 0.48641103439359723, "grad_norm": 336.5515441894531, "learning_rate": 6.182834145693805e-06, "loss": 34.5664, "step": 240790 }, { "epoch": 0.48643123502628105, "grad_norm": 146.33477783203125, "learning_rate": 6.182494985118625e-06, "loss": 13.8386, "step": 240800 }, { "epoch": 0.4864514356589648, "grad_norm": 209.32386779785156, "learning_rate": 6.182155818780107e-06, "loss": 21.1546, "step": 240810 }, { "epoch": 0.48647163629164863, "grad_norm": 245.36766052246094, "learning_rate": 6.181816646679904e-06, "loss": 22.4551, "step": 240820 }, { "epoch": 0.48649183692433245, "grad_norm": 66.82134246826172, "learning_rate": 6.181477468819673e-06, "loss": 6.197, "step": 240830 }, { "epoch": 0.4865120375570163, "grad_norm": 560.7149047851562, "learning_rate": 6.181138285201062e-06, "loss": 30.5613, "step": 240840 }, { "epoch": 0.4865322381897001, "grad_norm": 463.9530944824219, "learning_rate": 6.180799095825727e-06, "loss": 11.7607, "step": 240850 }, { "epoch": 0.4865524388223839, "grad_norm": 480.4827575683594, "learning_rate": 6.18045990069532e-06, "loss": 18.891, "step": 240860 }, { "epoch": 0.48657263945506773, "grad_norm": 701.4447021484375, "learning_rate": 6.180120699811495e-06, "loss": 17.7352, "step": 240870 }, { "epoch": 0.48659284008775155, "grad_norm": 586.0391845703125, "learning_rate": 6.179781493175906e-06, "loss": 17.9909, "step": 240880 }, { "epoch": 0.4866130407204354, "grad_norm": 673.2279663085938, "learning_rate": 6.179442280790202e-06, "loss": 24.3892, "step": 240890 }, { "epoch": 0.4866332413531192, "grad_norm": 284.8859558105469, "learning_rate": 6.179103062656042e-06, "loss": 26.0115, "step": 240900 }, { "epoch": 0.486653441985803, "grad_norm": 129.76742553710938, "learning_rate": 6.178763838775076e-06, "loss": 12.9221, "step": 240910 }, { "epoch": 0.48667364261848683, "grad_norm": 338.1567687988281, "learning_rate": 6.178424609148957e-06, "loss": 13.301, "step": 240920 }, { "epoch": 0.4866938432511706, "grad_norm": 299.5407409667969, "learning_rate": 6.178085373779341e-06, "loss": 20.4077, "step": 240930 }, { "epoch": 0.4867140438838544, "grad_norm": 341.9033508300781, "learning_rate": 6.17774613266788e-06, "loss": 12.6328, "step": 240940 }, { "epoch": 0.48673424451653824, "grad_norm": 285.49285888671875, "learning_rate": 6.177406885816224e-06, "loss": 30.9322, "step": 240950 }, { "epoch": 0.48675444514922206, "grad_norm": 589.399658203125, "learning_rate": 6.177067633226034e-06, "loss": 20.0082, "step": 240960 }, { "epoch": 0.4867746457819059, "grad_norm": 269.603271484375, "learning_rate": 6.1767283748989555e-06, "loss": 20.4365, "step": 240970 }, { "epoch": 0.4867948464145897, "grad_norm": 522.3572387695312, "learning_rate": 6.176389110836647e-06, "loss": 35.5067, "step": 240980 }, { "epoch": 0.4868150470472735, "grad_norm": 204.86964416503906, "learning_rate": 6.176049841040762e-06, "loss": 21.5063, "step": 240990 }, { "epoch": 0.48683524767995734, "grad_norm": 370.3238525390625, "learning_rate": 6.17571056551295e-06, "loss": 16.0572, "step": 241000 }, { "epoch": 0.48685544831264116, "grad_norm": 608.7770385742188, "learning_rate": 6.1753712842548695e-06, "loss": 21.5328, "step": 241010 }, { "epoch": 0.486875648945325, "grad_norm": 394.47198486328125, "learning_rate": 6.175031997268171e-06, "loss": 21.9332, "step": 241020 }, { "epoch": 0.4868958495780088, "grad_norm": 174.228271484375, "learning_rate": 6.174692704554509e-06, "loss": 9.0828, "step": 241030 }, { "epoch": 0.4869160502106926, "grad_norm": 536.2742309570312, "learning_rate": 6.174353406115537e-06, "loss": 34.6836, "step": 241040 }, { "epoch": 0.48693625084337644, "grad_norm": 281.7669677734375, "learning_rate": 6.17401410195291e-06, "loss": 33.572, "step": 241050 }, { "epoch": 0.4869564514760602, "grad_norm": 15.558895111083984, "learning_rate": 6.17367479206828e-06, "loss": 12.0758, "step": 241060 }, { "epoch": 0.486976652108744, "grad_norm": 274.0773010253906, "learning_rate": 6.173335476463303e-06, "loss": 49.4286, "step": 241070 }, { "epoch": 0.48699685274142784, "grad_norm": 286.81341552734375, "learning_rate": 6.172996155139629e-06, "loss": 14.5024, "step": 241080 }, { "epoch": 0.48701705337411166, "grad_norm": 158.656494140625, "learning_rate": 6.172656828098914e-06, "loss": 20.9697, "step": 241090 }, { "epoch": 0.4870372540067955, "grad_norm": 190.72592163085938, "learning_rate": 6.172317495342812e-06, "loss": 40.965, "step": 241100 }, { "epoch": 0.4870574546394793, "grad_norm": 232.5026092529297, "learning_rate": 6.171978156872978e-06, "loss": 20.3812, "step": 241110 }, { "epoch": 0.4870776552721631, "grad_norm": 78.13396453857422, "learning_rate": 6.171638812691065e-06, "loss": 28.8399, "step": 241120 }, { "epoch": 0.48709785590484694, "grad_norm": 442.35137939453125, "learning_rate": 6.171299462798725e-06, "loss": 25.7488, "step": 241130 }, { "epoch": 0.48711805653753076, "grad_norm": 113.7076416015625, "learning_rate": 6.170960107197613e-06, "loss": 10.6443, "step": 241140 }, { "epoch": 0.4871382571702146, "grad_norm": 169.17816162109375, "learning_rate": 6.1706207458893855e-06, "loss": 13.0223, "step": 241150 }, { "epoch": 0.4871584578028984, "grad_norm": 236.05850219726562, "learning_rate": 6.170281378875692e-06, "loss": 28.1193, "step": 241160 }, { "epoch": 0.4871786584355822, "grad_norm": 343.2547302246094, "learning_rate": 6.169942006158192e-06, "loss": 22.3624, "step": 241170 }, { "epoch": 0.48719885906826604, "grad_norm": 254.20347595214844, "learning_rate": 6.169602627738533e-06, "loss": 18.9047, "step": 241180 }, { "epoch": 0.4872190597009498, "grad_norm": 315.96368408203125, "learning_rate": 6.169263243618375e-06, "loss": 10.5766, "step": 241190 }, { "epoch": 0.48723926033363363, "grad_norm": 284.27606201171875, "learning_rate": 6.168923853799369e-06, "loss": 24.1813, "step": 241200 }, { "epoch": 0.48725946096631745, "grad_norm": 629.637451171875, "learning_rate": 6.16858445828317e-06, "loss": 33.6193, "step": 241210 }, { "epoch": 0.48727966159900127, "grad_norm": 573.7402954101562, "learning_rate": 6.168245057071434e-06, "loss": 26.8433, "step": 241220 }, { "epoch": 0.4872998622316851, "grad_norm": 338.18426513671875, "learning_rate": 6.167905650165811e-06, "loss": 18.7747, "step": 241230 }, { "epoch": 0.4873200628643689, "grad_norm": 467.04095458984375, "learning_rate": 6.167566237567957e-06, "loss": 30.4401, "step": 241240 }, { "epoch": 0.48734026349705273, "grad_norm": 233.25152587890625, "learning_rate": 6.1672268192795285e-06, "loss": 15.6139, "step": 241250 }, { "epoch": 0.48736046412973655, "grad_norm": 622.6288452148438, "learning_rate": 6.166887395302177e-06, "loss": 47.6286, "step": 241260 }, { "epoch": 0.48738066476242037, "grad_norm": 102.83345031738281, "learning_rate": 6.166547965637557e-06, "loss": 20.3463, "step": 241270 }, { "epoch": 0.4874008653951042, "grad_norm": 530.1830444335938, "learning_rate": 6.166208530287327e-06, "loss": 33.7022, "step": 241280 }, { "epoch": 0.487421066027788, "grad_norm": 496.9211120605469, "learning_rate": 6.165869089253134e-06, "loss": 17.6051, "step": 241290 }, { "epoch": 0.48744126666047183, "grad_norm": 454.7920837402344, "learning_rate": 6.16552964253664e-06, "loss": 13.1026, "step": 241300 }, { "epoch": 0.48746146729315565, "grad_norm": 367.9190979003906, "learning_rate": 6.165190190139494e-06, "loss": 12.3061, "step": 241310 }, { "epoch": 0.4874816679258394, "grad_norm": 107.29808807373047, "learning_rate": 6.164850732063352e-06, "loss": 15.9814, "step": 241320 }, { "epoch": 0.48750186855852323, "grad_norm": 325.2901611328125, "learning_rate": 6.164511268309871e-06, "loss": 13.5714, "step": 241330 }, { "epoch": 0.48752206919120705, "grad_norm": 538.7465209960938, "learning_rate": 6.1641717988807006e-06, "loss": 20.7551, "step": 241340 }, { "epoch": 0.4875422698238909, "grad_norm": 165.80274963378906, "learning_rate": 6.163832323777499e-06, "loss": 20.0998, "step": 241350 }, { "epoch": 0.4875624704565747, "grad_norm": 793.2183837890625, "learning_rate": 6.16349284300192e-06, "loss": 21.3794, "step": 241360 }, { "epoch": 0.4875826710892585, "grad_norm": 228.20997619628906, "learning_rate": 6.1631533565556175e-06, "loss": 17.5305, "step": 241370 }, { "epoch": 0.48760287172194233, "grad_norm": 377.90380859375, "learning_rate": 6.162813864440247e-06, "loss": 10.1957, "step": 241380 }, { "epoch": 0.48762307235462615, "grad_norm": 180.87107849121094, "learning_rate": 6.162474366657464e-06, "loss": 16.6347, "step": 241390 }, { "epoch": 0.48764327298731, "grad_norm": 209.7317352294922, "learning_rate": 6.1621348632089205e-06, "loss": 24.1603, "step": 241400 }, { "epoch": 0.4876634736199938, "grad_norm": 400.8587951660156, "learning_rate": 6.161795354096273e-06, "loss": 32.7475, "step": 241410 }, { "epoch": 0.4876836742526776, "grad_norm": 380.6274719238281, "learning_rate": 6.161455839321175e-06, "loss": 17.9665, "step": 241420 }, { "epoch": 0.48770387488536143, "grad_norm": 490.2958984375, "learning_rate": 6.161116318885283e-06, "loss": 22.4291, "step": 241430 }, { "epoch": 0.48772407551804525, "grad_norm": 253.66799926757812, "learning_rate": 6.160776792790252e-06, "loss": 10.5034, "step": 241440 }, { "epoch": 0.487744276150729, "grad_norm": 435.06976318359375, "learning_rate": 6.1604372610377335e-06, "loss": 8.3813, "step": 241450 }, { "epoch": 0.48776447678341284, "grad_norm": 684.455322265625, "learning_rate": 6.160097723629387e-06, "loss": 13.18, "step": 241460 }, { "epoch": 0.48778467741609666, "grad_norm": 356.3554382324219, "learning_rate": 6.159758180566863e-06, "loss": 14.033, "step": 241470 }, { "epoch": 0.4878048780487805, "grad_norm": 360.5289306640625, "learning_rate": 6.159418631851818e-06, "loss": 24.9207, "step": 241480 }, { "epoch": 0.4878250786814643, "grad_norm": 573.3186645507812, "learning_rate": 6.159079077485909e-06, "loss": 22.123, "step": 241490 }, { "epoch": 0.4878452793141481, "grad_norm": 21.944501876831055, "learning_rate": 6.158739517470786e-06, "loss": 10.3445, "step": 241500 }, { "epoch": 0.48786547994683194, "grad_norm": 457.6626281738281, "learning_rate": 6.158399951808111e-06, "loss": 12.3422, "step": 241510 }, { "epoch": 0.48788568057951576, "grad_norm": 420.6189270019531, "learning_rate": 6.158060380499533e-06, "loss": 11.9613, "step": 241520 }, { "epoch": 0.4879058812121996, "grad_norm": 453.964111328125, "learning_rate": 6.1577208035467095e-06, "loss": 21.848, "step": 241530 }, { "epoch": 0.4879260818448834, "grad_norm": 832.23486328125, "learning_rate": 6.157381220951295e-06, "loss": 25.5689, "step": 241540 }, { "epoch": 0.4879462824775672, "grad_norm": 364.0274963378906, "learning_rate": 6.157041632714945e-06, "loss": 20.257, "step": 241550 }, { "epoch": 0.48796648311025104, "grad_norm": 420.5585632324219, "learning_rate": 6.1567020388393155e-06, "loss": 59.8669, "step": 241560 }, { "epoch": 0.4879866837429348, "grad_norm": 215.24520874023438, "learning_rate": 6.156362439326059e-06, "loss": 16.7804, "step": 241570 }, { "epoch": 0.4880068843756186, "grad_norm": 529.3853149414062, "learning_rate": 6.156022834176832e-06, "loss": 19.6015, "step": 241580 }, { "epoch": 0.48802708500830244, "grad_norm": 113.60696411132812, "learning_rate": 6.155683223393291e-06, "loss": 12.9144, "step": 241590 }, { "epoch": 0.48804728564098626, "grad_norm": 218.59947204589844, "learning_rate": 6.155343606977091e-06, "loss": 18.9772, "step": 241600 }, { "epoch": 0.4880674862736701, "grad_norm": 187.24163818359375, "learning_rate": 6.155003984929883e-06, "loss": 12.2726, "step": 241610 }, { "epoch": 0.4880876869063539, "grad_norm": 284.59564208984375, "learning_rate": 6.15466435725333e-06, "loss": 9.7764, "step": 241620 }, { "epoch": 0.4881078875390377, "grad_norm": 40.67657470703125, "learning_rate": 6.154324723949079e-06, "loss": 11.5925, "step": 241630 }, { "epoch": 0.48812808817172154, "grad_norm": 166.55136108398438, "learning_rate": 6.153985085018792e-06, "loss": 29.3433, "step": 241640 }, { "epoch": 0.48814828880440536, "grad_norm": 546.1761474609375, "learning_rate": 6.15364544046412e-06, "loss": 36.4613, "step": 241650 }, { "epoch": 0.4881684894370892, "grad_norm": 199.06268310546875, "learning_rate": 6.153305790286721e-06, "loss": 29.4845, "step": 241660 }, { "epoch": 0.488188690069773, "grad_norm": 409.25390625, "learning_rate": 6.15296613448825e-06, "loss": 15.4448, "step": 241670 }, { "epoch": 0.4882088907024568, "grad_norm": 119.15308380126953, "learning_rate": 6.152626473070361e-06, "loss": 23.0436, "step": 241680 }, { "epoch": 0.48822909133514064, "grad_norm": 1010.7807006835938, "learning_rate": 6.152286806034711e-06, "loss": 29.5118, "step": 241690 }, { "epoch": 0.4882492919678244, "grad_norm": 545.5733032226562, "learning_rate": 6.151947133382954e-06, "loss": 24.7407, "step": 241700 }, { "epoch": 0.4882694926005082, "grad_norm": 398.5425109863281, "learning_rate": 6.151607455116746e-06, "loss": 18.063, "step": 241710 }, { "epoch": 0.48828969323319205, "grad_norm": 266.5617370605469, "learning_rate": 6.1512677712377435e-06, "loss": 23.8374, "step": 241720 }, { "epoch": 0.48830989386587587, "grad_norm": 431.0079040527344, "learning_rate": 6.150928081747603e-06, "loss": 31.6731, "step": 241730 }, { "epoch": 0.4883300944985597, "grad_norm": 576.68212890625, "learning_rate": 6.150588386647977e-06, "loss": 19.0013, "step": 241740 }, { "epoch": 0.4883502951312435, "grad_norm": 172.9222412109375, "learning_rate": 6.150248685940523e-06, "loss": 21.7811, "step": 241750 }, { "epoch": 0.4883704957639273, "grad_norm": 424.4622802734375, "learning_rate": 6.149908979626897e-06, "loss": 19.5431, "step": 241760 }, { "epoch": 0.48839069639661115, "grad_norm": 827.7529907226562, "learning_rate": 6.149569267708752e-06, "loss": 34.5501, "step": 241770 }, { "epoch": 0.48841089702929497, "grad_norm": 297.3861083984375, "learning_rate": 6.149229550187748e-06, "loss": 24.225, "step": 241780 }, { "epoch": 0.4884310976619788, "grad_norm": 812.0133666992188, "learning_rate": 6.148889827065538e-06, "loss": 26.0939, "step": 241790 }, { "epoch": 0.4884512982946626, "grad_norm": 118.50092315673828, "learning_rate": 6.148550098343778e-06, "loss": 8.6631, "step": 241800 }, { "epoch": 0.4884714989273464, "grad_norm": 176.11004638671875, "learning_rate": 6.148210364024125e-06, "loss": 15.2653, "step": 241810 }, { "epoch": 0.48849169956003025, "grad_norm": 289.4030456542969, "learning_rate": 6.147870624108233e-06, "loss": 12.5906, "step": 241820 }, { "epoch": 0.488511900192714, "grad_norm": 653.380126953125, "learning_rate": 6.147530878597761e-06, "loss": 25.3767, "step": 241830 }, { "epoch": 0.48853210082539783, "grad_norm": 577.6995239257812, "learning_rate": 6.14719112749436e-06, "loss": 24.4381, "step": 241840 }, { "epoch": 0.48855230145808165, "grad_norm": 5.02866268157959, "learning_rate": 6.146851370799689e-06, "loss": 24.4571, "step": 241850 }, { "epoch": 0.48857250209076547, "grad_norm": 338.2427673339844, "learning_rate": 6.146511608515404e-06, "loss": 14.1267, "step": 241860 }, { "epoch": 0.4885927027234493, "grad_norm": 259.8116149902344, "learning_rate": 6.146171840643161e-06, "loss": 24.3192, "step": 241870 }, { "epoch": 0.4886129033561331, "grad_norm": 146.1227264404297, "learning_rate": 6.145832067184614e-06, "loss": 19.1257, "step": 241880 }, { "epoch": 0.48863310398881693, "grad_norm": 338.2033386230469, "learning_rate": 6.145492288141422e-06, "loss": 15.5932, "step": 241890 }, { "epoch": 0.48865330462150075, "grad_norm": 264.390380859375, "learning_rate": 6.145152503515239e-06, "loss": 11.5019, "step": 241900 }, { "epoch": 0.48867350525418457, "grad_norm": 366.4673156738281, "learning_rate": 6.144812713307721e-06, "loss": 18.0583, "step": 241910 }, { "epoch": 0.4886937058868684, "grad_norm": 97.84915161132812, "learning_rate": 6.144472917520526e-06, "loss": 8.6361, "step": 241920 }, { "epoch": 0.4887139065195522, "grad_norm": 435.5666198730469, "learning_rate": 6.1441331161553065e-06, "loss": 8.8293, "step": 241930 }, { "epoch": 0.48873410715223603, "grad_norm": 257.6641540527344, "learning_rate": 6.143793309213724e-06, "loss": 23.476, "step": 241940 }, { "epoch": 0.48875430778491985, "grad_norm": 55.174983978271484, "learning_rate": 6.143453496697428e-06, "loss": 10.6772, "step": 241950 }, { "epoch": 0.4887745084176036, "grad_norm": 374.9102478027344, "learning_rate": 6.143113678608081e-06, "loss": 36.4494, "step": 241960 }, { "epoch": 0.48879470905028743, "grad_norm": 4.087224006652832, "learning_rate": 6.142773854947336e-06, "loss": 31.4217, "step": 241970 }, { "epoch": 0.48881490968297125, "grad_norm": 553.54541015625, "learning_rate": 6.14243402571685e-06, "loss": 40.1237, "step": 241980 }, { "epoch": 0.4888351103156551, "grad_norm": 262.3894348144531, "learning_rate": 6.142094190918279e-06, "loss": 15.21, "step": 241990 }, { "epoch": 0.4888553109483389, "grad_norm": 632.6431274414062, "learning_rate": 6.141754350553279e-06, "loss": 23.2838, "step": 242000 }, { "epoch": 0.4888755115810227, "grad_norm": 398.20086669921875, "learning_rate": 6.141414504623509e-06, "loss": 24.0013, "step": 242010 }, { "epoch": 0.48889571221370653, "grad_norm": 867.7739868164062, "learning_rate": 6.14107465313062e-06, "loss": 27.5092, "step": 242020 }, { "epoch": 0.48891591284639035, "grad_norm": 356.7303771972656, "learning_rate": 6.140734796076273e-06, "loss": 15.2382, "step": 242030 }, { "epoch": 0.4889361134790742, "grad_norm": 274.17041015625, "learning_rate": 6.1403949334621215e-06, "loss": 13.1964, "step": 242040 }, { "epoch": 0.488956314111758, "grad_norm": 189.7613983154297, "learning_rate": 6.140055065289826e-06, "loss": 14.1268, "step": 242050 }, { "epoch": 0.4889765147444418, "grad_norm": 364.79498291015625, "learning_rate": 6.139715191561038e-06, "loss": 29.4825, "step": 242060 }, { "epoch": 0.48899671537712563, "grad_norm": 617.8101196289062, "learning_rate": 6.139375312277418e-06, "loss": 17.7693, "step": 242070 }, { "epoch": 0.48901691600980945, "grad_norm": 470.8154296875, "learning_rate": 6.1390354274406205e-06, "loss": 18.9127, "step": 242080 }, { "epoch": 0.4890371166424932, "grad_norm": 274.7874755859375, "learning_rate": 6.138695537052301e-06, "loss": 30.696, "step": 242090 }, { "epoch": 0.48905731727517704, "grad_norm": 307.2875671386719, "learning_rate": 6.138355641114121e-06, "loss": 33.591, "step": 242100 }, { "epoch": 0.48907751790786086, "grad_norm": 295.16082763671875, "learning_rate": 6.138015739627731e-06, "loss": 37.4238, "step": 242110 }, { "epoch": 0.4890977185405447, "grad_norm": 630.1863403320312, "learning_rate": 6.137675832594792e-06, "loss": 24.3865, "step": 242120 }, { "epoch": 0.4891179191732285, "grad_norm": 339.9185791015625, "learning_rate": 6.137335920016957e-06, "loss": 17.0401, "step": 242130 }, { "epoch": 0.4891381198059123, "grad_norm": 404.68560791015625, "learning_rate": 6.136996001895885e-06, "loss": 28.5077, "step": 242140 }, { "epoch": 0.48915832043859614, "grad_norm": 204.63204956054688, "learning_rate": 6.136656078233233e-06, "loss": 20.071, "step": 242150 }, { "epoch": 0.48917852107127996, "grad_norm": 730.635009765625, "learning_rate": 6.136316149030657e-06, "loss": 16.8284, "step": 242160 }, { "epoch": 0.4891987217039638, "grad_norm": 550.9608154296875, "learning_rate": 6.135976214289814e-06, "loss": 30.223, "step": 242170 }, { "epoch": 0.4892189223366476, "grad_norm": 313.05218505859375, "learning_rate": 6.135636274012361e-06, "loss": 11.7001, "step": 242180 }, { "epoch": 0.4892391229693314, "grad_norm": 329.033203125, "learning_rate": 6.135296328199954e-06, "loss": 31.2961, "step": 242190 }, { "epoch": 0.48925932360201524, "grad_norm": 642.3125610351562, "learning_rate": 6.134956376854251e-06, "loss": 31.3009, "step": 242200 }, { "epoch": 0.489279524234699, "grad_norm": 190.77426147460938, "learning_rate": 6.134616419976908e-06, "loss": 25.9056, "step": 242210 }, { "epoch": 0.4892997248673828, "grad_norm": 554.1141967773438, "learning_rate": 6.134276457569581e-06, "loss": 33.5025, "step": 242220 }, { "epoch": 0.48931992550006664, "grad_norm": 361.7659912109375, "learning_rate": 6.133936489633929e-06, "loss": 14.0255, "step": 242230 }, { "epoch": 0.48934012613275046, "grad_norm": 389.94696044921875, "learning_rate": 6.133596516171609e-06, "loss": 28.9949, "step": 242240 }, { "epoch": 0.4893603267654343, "grad_norm": 161.18128967285156, "learning_rate": 6.133256537184276e-06, "loss": 19.2994, "step": 242250 }, { "epoch": 0.4893805273981181, "grad_norm": 258.78485107421875, "learning_rate": 6.132916552673588e-06, "loss": 25.4497, "step": 242260 }, { "epoch": 0.4894007280308019, "grad_norm": 90.20409393310547, "learning_rate": 6.132576562641203e-06, "loss": 18.4331, "step": 242270 }, { "epoch": 0.48942092866348574, "grad_norm": 187.44119262695312, "learning_rate": 6.132236567088777e-06, "loss": 18.9665, "step": 242280 }, { "epoch": 0.48944112929616956, "grad_norm": 179.86375427246094, "learning_rate": 6.131896566017967e-06, "loss": 15.6383, "step": 242290 }, { "epoch": 0.4894613299288534, "grad_norm": 829.7676391601562, "learning_rate": 6.13155655943043e-06, "loss": 19.8648, "step": 242300 }, { "epoch": 0.4894815305615372, "grad_norm": 247.04991149902344, "learning_rate": 6.131216547327824e-06, "loss": 10.0677, "step": 242310 }, { "epoch": 0.489501731194221, "grad_norm": 341.4189453125, "learning_rate": 6.130876529711806e-06, "loss": 28.5142, "step": 242320 }, { "epoch": 0.48952193182690484, "grad_norm": 138.14256286621094, "learning_rate": 6.130536506584032e-06, "loss": 32.4667, "step": 242330 }, { "epoch": 0.4895421324595886, "grad_norm": 206.76641845703125, "learning_rate": 6.130196477946162e-06, "loss": 22.5495, "step": 242340 }, { "epoch": 0.48956233309227243, "grad_norm": 207.0066375732422, "learning_rate": 6.12985644379985e-06, "loss": 22.5523, "step": 242350 }, { "epoch": 0.48958253372495625, "grad_norm": 156.70924377441406, "learning_rate": 6.1295164041467545e-06, "loss": 17.6584, "step": 242360 }, { "epoch": 0.48960273435764007, "grad_norm": 151.94192504882812, "learning_rate": 6.129176358988535e-06, "loss": 19.6377, "step": 242370 }, { "epoch": 0.4896229349903239, "grad_norm": 36.215518951416016, "learning_rate": 6.128836308326844e-06, "loss": 15.9161, "step": 242380 }, { "epoch": 0.4896431356230077, "grad_norm": 188.47653198242188, "learning_rate": 6.128496252163344e-06, "loss": 19.3989, "step": 242390 }, { "epoch": 0.48966333625569153, "grad_norm": 352.2079772949219, "learning_rate": 6.128156190499688e-06, "loss": 14.3782, "step": 242400 }, { "epoch": 0.48968353688837535, "grad_norm": 754.5715942382812, "learning_rate": 6.127816123337538e-06, "loss": 17.6178, "step": 242410 }, { "epoch": 0.48970373752105917, "grad_norm": 93.8069076538086, "learning_rate": 6.127476050678548e-06, "loss": 21.4399, "step": 242420 }, { "epoch": 0.489723938153743, "grad_norm": 91.12076568603516, "learning_rate": 6.127135972524376e-06, "loss": 15.9388, "step": 242430 }, { "epoch": 0.4897441387864268, "grad_norm": 537.439208984375, "learning_rate": 6.126795888876681e-06, "loss": 25.951, "step": 242440 }, { "epoch": 0.48976433941911063, "grad_norm": 0.15321671962738037, "learning_rate": 6.1264557997371185e-06, "loss": 11.6078, "step": 242450 }, { "epoch": 0.48978454005179445, "grad_norm": 500.2206726074219, "learning_rate": 6.126115705107347e-06, "loss": 10.685, "step": 242460 }, { "epoch": 0.4898047406844782, "grad_norm": 956.3645629882812, "learning_rate": 6.125775604989025e-06, "loss": 20.7941, "step": 242470 }, { "epoch": 0.48982494131716203, "grad_norm": 65.6939468383789, "learning_rate": 6.125435499383808e-06, "loss": 18.1657, "step": 242480 }, { "epoch": 0.48984514194984585, "grad_norm": 326.1251525878906, "learning_rate": 6.125095388293356e-06, "loss": 18.04, "step": 242490 }, { "epoch": 0.4898653425825297, "grad_norm": 208.84498596191406, "learning_rate": 6.124755271719326e-06, "loss": 7.6182, "step": 242500 }, { "epoch": 0.4898855432152135, "grad_norm": 89.06510925292969, "learning_rate": 6.124415149663374e-06, "loss": 14.4377, "step": 242510 }, { "epoch": 0.4899057438478973, "grad_norm": 541.0916137695312, "learning_rate": 6.12407502212716e-06, "loss": 22.0965, "step": 242520 }, { "epoch": 0.48992594448058113, "grad_norm": 142.01412963867188, "learning_rate": 6.12373488911234e-06, "loss": 11.4386, "step": 242530 }, { "epoch": 0.48994614511326495, "grad_norm": 1395.0645751953125, "learning_rate": 6.123394750620571e-06, "loss": 22.3274, "step": 242540 }, { "epoch": 0.4899663457459488, "grad_norm": 91.57501983642578, "learning_rate": 6.123054606653515e-06, "loss": 27.7396, "step": 242550 }, { "epoch": 0.4899865463786326, "grad_norm": 351.1894226074219, "learning_rate": 6.122714457212825e-06, "loss": 19.8754, "step": 242560 }, { "epoch": 0.4900067470113164, "grad_norm": 672.3131103515625, "learning_rate": 6.122374302300162e-06, "loss": 47.4908, "step": 242570 }, { "epoch": 0.49002694764400023, "grad_norm": 161.06809997558594, "learning_rate": 6.122034141917183e-06, "loss": 28.3776, "step": 242580 }, { "epoch": 0.49004714827668405, "grad_norm": 273.0909118652344, "learning_rate": 6.121693976065545e-06, "loss": 21.7286, "step": 242590 }, { "epoch": 0.4900673489093678, "grad_norm": 341.38665771484375, "learning_rate": 6.121353804746907e-06, "loss": 12.567, "step": 242600 }, { "epoch": 0.49008754954205164, "grad_norm": 514.90234375, "learning_rate": 6.121013627962925e-06, "loss": 31.1853, "step": 242610 }, { "epoch": 0.49010775017473546, "grad_norm": 1056.0909423828125, "learning_rate": 6.1206734457152615e-06, "loss": 28.3193, "step": 242620 }, { "epoch": 0.4901279508074193, "grad_norm": 118.93157196044922, "learning_rate": 6.12033325800557e-06, "loss": 17.8808, "step": 242630 }, { "epoch": 0.4901481514401031, "grad_norm": 204.60494995117188, "learning_rate": 6.119993064835509e-06, "loss": 12.2071, "step": 242640 }, { "epoch": 0.4901683520727869, "grad_norm": 410.5423583984375, "learning_rate": 6.119652866206739e-06, "loss": 20.8945, "step": 242650 }, { "epoch": 0.49018855270547074, "grad_norm": 348.9383544921875, "learning_rate": 6.119312662120916e-06, "loss": 13.519, "step": 242660 }, { "epoch": 0.49020875333815456, "grad_norm": 170.8218994140625, "learning_rate": 6.118972452579699e-06, "loss": 15.5436, "step": 242670 }, { "epoch": 0.4902289539708384, "grad_norm": 2507.953857421875, "learning_rate": 6.118632237584748e-06, "loss": 32.6182, "step": 242680 }, { "epoch": 0.4902491546035222, "grad_norm": 864.6946411132812, "learning_rate": 6.118292017137716e-06, "loss": 20.8481, "step": 242690 }, { "epoch": 0.490269355236206, "grad_norm": 607.1465454101562, "learning_rate": 6.117951791240265e-06, "loss": 25.8953, "step": 242700 }, { "epoch": 0.49028955586888984, "grad_norm": 1.6512856483459473, "learning_rate": 6.117611559894054e-06, "loss": 20.952, "step": 242710 }, { "epoch": 0.49030975650157366, "grad_norm": 491.7100830078125, "learning_rate": 6.117271323100739e-06, "loss": 18.1705, "step": 242720 }, { "epoch": 0.4903299571342574, "grad_norm": 297.95159912109375, "learning_rate": 6.116931080861979e-06, "loss": 27.1697, "step": 242730 }, { "epoch": 0.49035015776694124, "grad_norm": 85.9748764038086, "learning_rate": 6.116590833179432e-06, "loss": 30.2963, "step": 242740 }, { "epoch": 0.49037035839962506, "grad_norm": 49.89543914794922, "learning_rate": 6.116250580054758e-06, "loss": 17.0189, "step": 242750 }, { "epoch": 0.4903905590323089, "grad_norm": 111.95653533935547, "learning_rate": 6.115910321489613e-06, "loss": 18.4601, "step": 242760 }, { "epoch": 0.4904107596649927, "grad_norm": 389.2860412597656, "learning_rate": 6.115570057485656e-06, "loss": 13.8408, "step": 242770 }, { "epoch": 0.4904309602976765, "grad_norm": 355.4328918457031, "learning_rate": 6.1152297880445476e-06, "loss": 15.4722, "step": 242780 }, { "epoch": 0.49045116093036034, "grad_norm": 432.9563903808594, "learning_rate": 6.114889513167943e-06, "loss": 19.0556, "step": 242790 }, { "epoch": 0.49047136156304416, "grad_norm": 464.92437744140625, "learning_rate": 6.114549232857503e-06, "loss": 17.4327, "step": 242800 }, { "epoch": 0.490491562195728, "grad_norm": 285.1131591796875, "learning_rate": 6.114208947114883e-06, "loss": 23.6502, "step": 242810 }, { "epoch": 0.4905117628284118, "grad_norm": 493.794677734375, "learning_rate": 6.113868655941747e-06, "loss": 27.5541, "step": 242820 }, { "epoch": 0.4905319634610956, "grad_norm": 34.96025848388672, "learning_rate": 6.1135283593397475e-06, "loss": 21.0633, "step": 242830 }, { "epoch": 0.49055216409377944, "grad_norm": 262.76287841796875, "learning_rate": 6.113188057310548e-06, "loss": 15.749, "step": 242840 }, { "epoch": 0.4905723647264632, "grad_norm": 321.22674560546875, "learning_rate": 6.112847749855804e-06, "loss": 18.4851, "step": 242850 }, { "epoch": 0.490592565359147, "grad_norm": 383.23345947265625, "learning_rate": 6.112507436977175e-06, "loss": 26.9607, "step": 242860 }, { "epoch": 0.49061276599183085, "grad_norm": 1.3280316591262817, "learning_rate": 6.112167118676321e-06, "loss": 16.2132, "step": 242870 }, { "epoch": 0.49063296662451467, "grad_norm": 410.2691650390625, "learning_rate": 6.111826794954896e-06, "loss": 32.892, "step": 242880 }, { "epoch": 0.4906531672571985, "grad_norm": 213.3309326171875, "learning_rate": 6.1114864658145655e-06, "loss": 22.2195, "step": 242890 }, { "epoch": 0.4906733678898823, "grad_norm": 489.11309814453125, "learning_rate": 6.111146131256983e-06, "loss": 15.1283, "step": 242900 }, { "epoch": 0.4906935685225661, "grad_norm": 139.47201538085938, "learning_rate": 6.110805791283809e-06, "loss": 14.9618, "step": 242910 }, { "epoch": 0.49071376915524995, "grad_norm": 233.48304748535156, "learning_rate": 6.110465445896703e-06, "loss": 24.2609, "step": 242920 }, { "epoch": 0.49073396978793377, "grad_norm": 133.65975952148438, "learning_rate": 6.110125095097323e-06, "loss": 22.8066, "step": 242930 }, { "epoch": 0.4907541704206176, "grad_norm": 431.73089599609375, "learning_rate": 6.109784738887327e-06, "loss": 13.4274, "step": 242940 }, { "epoch": 0.4907743710533014, "grad_norm": 384.90521240234375, "learning_rate": 6.109444377268376e-06, "loss": 31.618, "step": 242950 }, { "epoch": 0.4907945716859852, "grad_norm": 913.1565551757812, "learning_rate": 6.109104010242127e-06, "loss": 30.0312, "step": 242960 }, { "epoch": 0.49081477231866905, "grad_norm": 501.4960632324219, "learning_rate": 6.10876363781024e-06, "loss": 25.9956, "step": 242970 }, { "epoch": 0.4908349729513528, "grad_norm": 190.05662536621094, "learning_rate": 6.108423259974375e-06, "loss": 12.8584, "step": 242980 }, { "epoch": 0.49085517358403663, "grad_norm": 281.55047607421875, "learning_rate": 6.108082876736185e-06, "loss": 17.4782, "step": 242990 }, { "epoch": 0.49087537421672045, "grad_norm": 406.5660705566406, "learning_rate": 6.107742488097338e-06, "loss": 27.0723, "step": 243000 }, { "epoch": 0.49089557484940427, "grad_norm": 318.7551574707031, "learning_rate": 6.107402094059485e-06, "loss": 14.0688, "step": 243010 }, { "epoch": 0.4909157754820881, "grad_norm": 64.70513153076172, "learning_rate": 6.107061694624291e-06, "loss": 10.8414, "step": 243020 }, { "epoch": 0.4909359761147719, "grad_norm": 131.3190155029297, "learning_rate": 6.1067212897934115e-06, "loss": 29.9489, "step": 243030 }, { "epoch": 0.49095617674745573, "grad_norm": 686.302001953125, "learning_rate": 6.106380879568507e-06, "loss": 26.9483, "step": 243040 }, { "epoch": 0.49097637738013955, "grad_norm": 609.8250732421875, "learning_rate": 6.106040463951237e-06, "loss": 26.0329, "step": 243050 }, { "epoch": 0.49099657801282337, "grad_norm": 180.33126831054688, "learning_rate": 6.105700042943258e-06, "loss": 30.505, "step": 243060 }, { "epoch": 0.4910167786455072, "grad_norm": 273.23455810546875, "learning_rate": 6.105359616546232e-06, "loss": 13.8137, "step": 243070 }, { "epoch": 0.491036979278191, "grad_norm": 520.9368286132812, "learning_rate": 6.105019184761818e-06, "loss": 19.1669, "step": 243080 }, { "epoch": 0.49105717991087483, "grad_norm": 85.96605682373047, "learning_rate": 6.104678747591674e-06, "loss": 13.7416, "step": 243090 }, { "epoch": 0.49107738054355865, "grad_norm": 491.14019775390625, "learning_rate": 6.10433830503746e-06, "loss": 10.9353, "step": 243100 }, { "epoch": 0.4910975811762424, "grad_norm": 325.0634460449219, "learning_rate": 6.1039978571008355e-06, "loss": 16.0997, "step": 243110 }, { "epoch": 0.49111778180892623, "grad_norm": 607.64111328125, "learning_rate": 6.103657403783458e-06, "loss": 17.9357, "step": 243120 }, { "epoch": 0.49113798244161005, "grad_norm": 355.9501953125, "learning_rate": 6.103316945086989e-06, "loss": 16.597, "step": 243130 }, { "epoch": 0.4911581830742939, "grad_norm": 448.55682373046875, "learning_rate": 6.102976481013086e-06, "loss": 30.567, "step": 243140 }, { "epoch": 0.4911783837069777, "grad_norm": 284.8009033203125, "learning_rate": 6.102636011563411e-06, "loss": 23.6241, "step": 243150 }, { "epoch": 0.4911985843396615, "grad_norm": 155.9999542236328, "learning_rate": 6.102295536739622e-06, "loss": 13.3552, "step": 243160 }, { "epoch": 0.49121878497234533, "grad_norm": 208.71463012695312, "learning_rate": 6.101955056543376e-06, "loss": 33.184, "step": 243170 }, { "epoch": 0.49123898560502915, "grad_norm": 539.50341796875, "learning_rate": 6.101614570976336e-06, "loss": 25.0127, "step": 243180 }, { "epoch": 0.491259186237713, "grad_norm": 100.99124145507812, "learning_rate": 6.101274080040161e-06, "loss": 27.4514, "step": 243190 }, { "epoch": 0.4912793868703968, "grad_norm": 693.3148193359375, "learning_rate": 6.100933583736508e-06, "loss": 17.5249, "step": 243200 }, { "epoch": 0.4912995875030806, "grad_norm": 150.2967987060547, "learning_rate": 6.10059308206704e-06, "loss": 29.5014, "step": 243210 }, { "epoch": 0.49131978813576443, "grad_norm": 358.312255859375, "learning_rate": 6.100252575033413e-06, "loss": 28.4867, "step": 243220 }, { "epoch": 0.49133998876844825, "grad_norm": 285.40460205078125, "learning_rate": 6.0999120626372895e-06, "loss": 30.495, "step": 243230 }, { "epoch": 0.491360189401132, "grad_norm": 596.3458862304688, "learning_rate": 6.099571544880328e-06, "loss": 30.6144, "step": 243240 }, { "epoch": 0.49138039003381584, "grad_norm": 765.2393188476562, "learning_rate": 6.099231021764188e-06, "loss": 35.0226, "step": 243250 }, { "epoch": 0.49140059066649966, "grad_norm": 424.7465515136719, "learning_rate": 6.098890493290529e-06, "loss": 15.3136, "step": 243260 }, { "epoch": 0.4914207912991835, "grad_norm": 634.1060180664062, "learning_rate": 6.0985499594610136e-06, "loss": 13.1761, "step": 243270 }, { "epoch": 0.4914409919318673, "grad_norm": 74.19820404052734, "learning_rate": 6.098209420277294e-06, "loss": 13.936, "step": 243280 }, { "epoch": 0.4914611925645511, "grad_norm": 488.5048522949219, "learning_rate": 6.097868875741039e-06, "loss": 25.0321, "step": 243290 }, { "epoch": 0.49148139319723494, "grad_norm": 130.7450714111328, "learning_rate": 6.097528325853903e-06, "loss": 7.7867, "step": 243300 }, { "epoch": 0.49150159382991876, "grad_norm": 580.2894287109375, "learning_rate": 6.0971877706175465e-06, "loss": 16.2283, "step": 243310 }, { "epoch": 0.4915217944626026, "grad_norm": 281.7629089355469, "learning_rate": 6.09684721003363e-06, "loss": 17.5864, "step": 243320 }, { "epoch": 0.4915419950952864, "grad_norm": 540.7857055664062, "learning_rate": 6.096506644103813e-06, "loss": 16.8208, "step": 243330 }, { "epoch": 0.4915621957279702, "grad_norm": 92.08300018310547, "learning_rate": 6.096166072829757e-06, "loss": 11.1677, "step": 243340 }, { "epoch": 0.49158239636065404, "grad_norm": 549.4891357421875, "learning_rate": 6.095825496213119e-06, "loss": 15.3587, "step": 243350 }, { "epoch": 0.4916025969933378, "grad_norm": 290.5025634765625, "learning_rate": 6.095484914255561e-06, "loss": 28.3565, "step": 243360 }, { "epoch": 0.4916227976260216, "grad_norm": 1124.6158447265625, "learning_rate": 6.0951443269587426e-06, "loss": 22.2388, "step": 243370 }, { "epoch": 0.49164299825870544, "grad_norm": 138.3731231689453, "learning_rate": 6.094803734324324e-06, "loss": 9.4421, "step": 243380 }, { "epoch": 0.49166319889138926, "grad_norm": 135.51327514648438, "learning_rate": 6.094463136353964e-06, "loss": 18.7498, "step": 243390 }, { "epoch": 0.4916833995240731, "grad_norm": 226.020263671875, "learning_rate": 6.094122533049324e-06, "loss": 31.4517, "step": 243400 }, { "epoch": 0.4917036001567569, "grad_norm": 2.206104278564453, "learning_rate": 6.093781924412063e-06, "loss": 12.6963, "step": 243410 }, { "epoch": 0.4917238007894407, "grad_norm": 43.28728485107422, "learning_rate": 6.093441310443842e-06, "loss": 10.0299, "step": 243420 }, { "epoch": 0.49174400142212454, "grad_norm": 497.6747741699219, "learning_rate": 6.093100691146321e-06, "loss": 23.7381, "step": 243430 }, { "epoch": 0.49176420205480836, "grad_norm": 508.7585144042969, "learning_rate": 6.0927600665211575e-06, "loss": 22.6063, "step": 243440 }, { "epoch": 0.4917844026874922, "grad_norm": 144.22523498535156, "learning_rate": 6.092419436570016e-06, "loss": 10.8433, "step": 243450 }, { "epoch": 0.491804603320176, "grad_norm": 283.6905822753906, "learning_rate": 6.092078801294554e-06, "loss": 9.281, "step": 243460 }, { "epoch": 0.4918248039528598, "grad_norm": 548.2412719726562, "learning_rate": 6.091738160696433e-06, "loss": 27.9931, "step": 243470 }, { "epoch": 0.49184500458554364, "grad_norm": 353.6029357910156, "learning_rate": 6.091397514777313e-06, "loss": 18.0369, "step": 243480 }, { "epoch": 0.4918652052182274, "grad_norm": 95.61614227294922, "learning_rate": 6.091056863538851e-06, "loss": 16.5977, "step": 243490 }, { "epoch": 0.49188540585091123, "grad_norm": 160.9171600341797, "learning_rate": 6.090716206982714e-06, "loss": 24.112, "step": 243500 }, { "epoch": 0.49190560648359505, "grad_norm": 591.6284790039062, "learning_rate": 6.090375545110556e-06, "loss": 29.5164, "step": 243510 }, { "epoch": 0.49192580711627887, "grad_norm": 400.39166259765625, "learning_rate": 6.090034877924041e-06, "loss": 23.6562, "step": 243520 }, { "epoch": 0.4919460077489627, "grad_norm": 3.3571367263793945, "learning_rate": 6.089694205424827e-06, "loss": 15.9371, "step": 243530 }, { "epoch": 0.4919662083816465, "grad_norm": 420.609375, "learning_rate": 6.089353527614577e-06, "loss": 14.1265, "step": 243540 }, { "epoch": 0.49198640901433033, "grad_norm": 522.5365600585938, "learning_rate": 6.08901284449495e-06, "loss": 22.5624, "step": 243550 }, { "epoch": 0.49200660964701415, "grad_norm": 277.09735107421875, "learning_rate": 6.088672156067607e-06, "loss": 17.8786, "step": 243560 }, { "epoch": 0.49202681027969797, "grad_norm": 254.58782958984375, "learning_rate": 6.088331462334206e-06, "loss": 17.6637, "step": 243570 }, { "epoch": 0.4920470109123818, "grad_norm": 76.53742980957031, "learning_rate": 6.0879907632964095e-06, "loss": 21.8426, "step": 243580 }, { "epoch": 0.4920672115450656, "grad_norm": 231.54798889160156, "learning_rate": 6.087650058955879e-06, "loss": 18.5493, "step": 243590 }, { "epoch": 0.49208741217774943, "grad_norm": 326.02880859375, "learning_rate": 6.087309349314275e-06, "loss": 18.1888, "step": 243600 }, { "epoch": 0.49210761281043325, "grad_norm": 459.2764892578125, "learning_rate": 6.086968634373256e-06, "loss": 38.9905, "step": 243610 }, { "epoch": 0.492127813443117, "grad_norm": 6.301657676696777, "learning_rate": 6.086627914134482e-06, "loss": 34.4115, "step": 243620 }, { "epoch": 0.49214801407580083, "grad_norm": 271.61883544921875, "learning_rate": 6.086287188599617e-06, "loss": 8.9168, "step": 243630 }, { "epoch": 0.49216821470848465, "grad_norm": 144.28807067871094, "learning_rate": 6.085946457770321e-06, "loss": 21.0509, "step": 243640 }, { "epoch": 0.4921884153411685, "grad_norm": 667.0874633789062, "learning_rate": 6.085605721648253e-06, "loss": 20.3682, "step": 243650 }, { "epoch": 0.4922086159738523, "grad_norm": 364.631103515625, "learning_rate": 6.085264980235075e-06, "loss": 19.8767, "step": 243660 }, { "epoch": 0.4922288166065361, "grad_norm": 464.1360168457031, "learning_rate": 6.084924233532444e-06, "loss": 12.8749, "step": 243670 }, { "epoch": 0.49224901723921993, "grad_norm": 260.3604736328125, "learning_rate": 6.084583481542028e-06, "loss": 10.4999, "step": 243680 }, { "epoch": 0.49226921787190375, "grad_norm": 134.682373046875, "learning_rate": 6.084242724265481e-06, "loss": 31.0584, "step": 243690 }, { "epoch": 0.4922894185045876, "grad_norm": 706.572265625, "learning_rate": 6.083901961704467e-06, "loss": 22.7602, "step": 243700 }, { "epoch": 0.4923096191372714, "grad_norm": 399.3299865722656, "learning_rate": 6.083561193860646e-06, "loss": 18.2616, "step": 243710 }, { "epoch": 0.4923298197699552, "grad_norm": 220.2206268310547, "learning_rate": 6.083220420735681e-06, "loss": 21.117, "step": 243720 }, { "epoch": 0.49235002040263903, "grad_norm": 304.3521423339844, "learning_rate": 6.08287964233123e-06, "loss": 15.1435, "step": 243730 }, { "epoch": 0.49237022103532285, "grad_norm": 2.1963284015655518, "learning_rate": 6.082538858648954e-06, "loss": 17.5348, "step": 243740 }, { "epoch": 0.4923904216680066, "grad_norm": 200.71510314941406, "learning_rate": 6.0821980696905145e-06, "loss": 21.6039, "step": 243750 }, { "epoch": 0.49241062230069044, "grad_norm": 290.66455078125, "learning_rate": 6.081857275457574e-06, "loss": 20.226, "step": 243760 }, { "epoch": 0.49243082293337426, "grad_norm": 276.683349609375, "learning_rate": 6.081516475951793e-06, "loss": 20.0125, "step": 243770 }, { "epoch": 0.4924510235660581, "grad_norm": 239.0572967529297, "learning_rate": 6.081175671174831e-06, "loss": 29.0291, "step": 243780 }, { "epoch": 0.4924712241987419, "grad_norm": 219.093994140625, "learning_rate": 6.0808348611283505e-06, "loss": 15.7579, "step": 243790 }, { "epoch": 0.4924914248314257, "grad_norm": 328.8896484375, "learning_rate": 6.080494045814011e-06, "loss": 12.5564, "step": 243800 }, { "epoch": 0.49251162546410954, "grad_norm": 2166.62646484375, "learning_rate": 6.080153225233475e-06, "loss": 22.8739, "step": 243810 }, { "epoch": 0.49253182609679336, "grad_norm": 174.4705810546875, "learning_rate": 6.079812399388404e-06, "loss": 9.6937, "step": 243820 }, { "epoch": 0.4925520267294772, "grad_norm": 1052.1917724609375, "learning_rate": 6.079471568280456e-06, "loss": 20.7519, "step": 243830 }, { "epoch": 0.492572227362161, "grad_norm": 328.778564453125, "learning_rate": 6.079130731911298e-06, "loss": 18.7361, "step": 243840 }, { "epoch": 0.4925924279948448, "grad_norm": 151.9322509765625, "learning_rate": 6.078789890282585e-06, "loss": 29.0582, "step": 243850 }, { "epoch": 0.49261262862752864, "grad_norm": 42.79109191894531, "learning_rate": 6.078449043395982e-06, "loss": 23.3692, "step": 243860 }, { "epoch": 0.49263282926021246, "grad_norm": 350.9899597167969, "learning_rate": 6.078108191253148e-06, "loss": 21.4167, "step": 243870 }, { "epoch": 0.4926530298928962, "grad_norm": 176.45204162597656, "learning_rate": 6.077767333855748e-06, "loss": 14.1641, "step": 243880 }, { "epoch": 0.49267323052558004, "grad_norm": 81.61347961425781, "learning_rate": 6.077426471205439e-06, "loss": 18.7199, "step": 243890 }, { "epoch": 0.49269343115826386, "grad_norm": 381.9842224121094, "learning_rate": 6.077085603303883e-06, "loss": 22.57, "step": 243900 }, { "epoch": 0.4927136317909477, "grad_norm": 185.9649200439453, "learning_rate": 6.076744730152744e-06, "loss": 23.6399, "step": 243910 }, { "epoch": 0.4927338324236315, "grad_norm": 548.74755859375, "learning_rate": 6.07640385175368e-06, "loss": 28.5055, "step": 243920 }, { "epoch": 0.4927540330563153, "grad_norm": 489.41876220703125, "learning_rate": 6.076062968108357e-06, "loss": 16.5337, "step": 243930 }, { "epoch": 0.49277423368899914, "grad_norm": 278.4815673828125, "learning_rate": 6.0757220792184314e-06, "loss": 23.2221, "step": 243940 }, { "epoch": 0.49279443432168296, "grad_norm": 220.00218200683594, "learning_rate": 6.075381185085568e-06, "loss": 16.0409, "step": 243950 }, { "epoch": 0.4928146349543668, "grad_norm": 15.607489585876465, "learning_rate": 6.075040285711427e-06, "loss": 18.7818, "step": 243960 }, { "epoch": 0.4928348355870506, "grad_norm": 366.9059753417969, "learning_rate": 6.074699381097669e-06, "loss": 16.8994, "step": 243970 }, { "epoch": 0.4928550362197344, "grad_norm": 417.60693359375, "learning_rate": 6.074358471245957e-06, "loss": 9.3807, "step": 243980 }, { "epoch": 0.49287523685241824, "grad_norm": 746.2815551757812, "learning_rate": 6.074017556157952e-06, "loss": 31.5113, "step": 243990 }, { "epoch": 0.492895437485102, "grad_norm": 147.13330078125, "learning_rate": 6.073676635835317e-06, "loss": 19.8945, "step": 244000 }, { "epoch": 0.4929156381177858, "grad_norm": 546.3687744140625, "learning_rate": 6.073335710279711e-06, "loss": 19.3393, "step": 244010 }, { "epoch": 0.49293583875046965, "grad_norm": 1691.22998046875, "learning_rate": 6.072994779492798e-06, "loss": 27.1752, "step": 244020 }, { "epoch": 0.49295603938315347, "grad_norm": 597.4349365234375, "learning_rate": 6.072653843476237e-06, "loss": 24.8999, "step": 244030 }, { "epoch": 0.4929762400158373, "grad_norm": 486.0323791503906, "learning_rate": 6.072312902231692e-06, "loss": 17.299, "step": 244040 }, { "epoch": 0.4929964406485211, "grad_norm": 390.08251953125, "learning_rate": 6.071971955760823e-06, "loss": 29.5478, "step": 244050 }, { "epoch": 0.4930166412812049, "grad_norm": 837.5003662109375, "learning_rate": 6.071631004065296e-06, "loss": 17.5977, "step": 244060 }, { "epoch": 0.49303684191388875, "grad_norm": 456.85968017578125, "learning_rate": 6.071290047146767e-06, "loss": 25.1069, "step": 244070 }, { "epoch": 0.49305704254657257, "grad_norm": 499.1066589355469, "learning_rate": 6.0709490850069e-06, "loss": 18.4598, "step": 244080 }, { "epoch": 0.4930772431792564, "grad_norm": 422.30560302734375, "learning_rate": 6.070608117647359e-06, "loss": 20.2345, "step": 244090 }, { "epoch": 0.4930974438119402, "grad_norm": 369.7128601074219, "learning_rate": 6.0702671450698005e-06, "loss": 14.9042, "step": 244100 }, { "epoch": 0.493117644444624, "grad_norm": 386.27862548828125, "learning_rate": 6.069926167275893e-06, "loss": 14.8961, "step": 244110 }, { "epoch": 0.49313784507730785, "grad_norm": 621.8338012695312, "learning_rate": 6.069585184267292e-06, "loss": 20.4106, "step": 244120 }, { "epoch": 0.4931580457099916, "grad_norm": 261.2494201660156, "learning_rate": 6.069244196045666e-06, "loss": 12.0623, "step": 244130 }, { "epoch": 0.49317824634267543, "grad_norm": 512.4036254882812, "learning_rate": 6.068903202612672e-06, "loss": 22.5381, "step": 244140 }, { "epoch": 0.49319844697535925, "grad_norm": 678.3642578125, "learning_rate": 6.068562203969972e-06, "loss": 19.9685, "step": 244150 }, { "epoch": 0.49321864760804307, "grad_norm": 988.5474243164062, "learning_rate": 6.068221200119232e-06, "loss": 12.505, "step": 244160 }, { "epoch": 0.4932388482407269, "grad_norm": 586.0654907226562, "learning_rate": 6.06788019106211e-06, "loss": 16.9205, "step": 244170 }, { "epoch": 0.4932590488734107, "grad_norm": 863.3224487304688, "learning_rate": 6.067539176800269e-06, "loss": 13.7263, "step": 244180 }, { "epoch": 0.49327924950609453, "grad_norm": 410.1658935546875, "learning_rate": 6.067198157335372e-06, "loss": 12.6717, "step": 244190 }, { "epoch": 0.49329945013877835, "grad_norm": 1063.82666015625, "learning_rate": 6.066857132669081e-06, "loss": 24.6441, "step": 244200 }, { "epoch": 0.49331965077146217, "grad_norm": 192.57086181640625, "learning_rate": 6.066516102803057e-06, "loss": 22.914, "step": 244210 }, { "epoch": 0.493339851404146, "grad_norm": 11.151764869689941, "learning_rate": 6.066175067738964e-06, "loss": 14.5301, "step": 244220 }, { "epoch": 0.4933600520368298, "grad_norm": 318.1488952636719, "learning_rate": 6.065834027478462e-06, "loss": 22.5188, "step": 244230 }, { "epoch": 0.49338025266951363, "grad_norm": 526.05078125, "learning_rate": 6.0654929820232146e-06, "loss": 11.8208, "step": 244240 }, { "epoch": 0.49340045330219745, "grad_norm": 362.418212890625, "learning_rate": 6.065151931374884e-06, "loss": 11.3675, "step": 244250 }, { "epoch": 0.4934206539348812, "grad_norm": 324.4506530761719, "learning_rate": 6.0648108755351305e-06, "loss": 23.5681, "step": 244260 }, { "epoch": 0.49344085456756503, "grad_norm": 296.7850646972656, "learning_rate": 6.06446981450562e-06, "loss": 16.1006, "step": 244270 }, { "epoch": 0.49346105520024885, "grad_norm": 910.4213256835938, "learning_rate": 6.0641287482880105e-06, "loss": 24.6717, "step": 244280 }, { "epoch": 0.4934812558329327, "grad_norm": 241.55018615722656, "learning_rate": 6.0637876768839696e-06, "loss": 20.5909, "step": 244290 }, { "epoch": 0.4935014564656165, "grad_norm": 1268.6630859375, "learning_rate": 6.0634466002951545e-06, "loss": 44.0809, "step": 244300 }, { "epoch": 0.4935216570983003, "grad_norm": 61.69091033935547, "learning_rate": 6.06310551852323e-06, "loss": 18.8025, "step": 244310 }, { "epoch": 0.49354185773098413, "grad_norm": 571.124755859375, "learning_rate": 6.0627644315698575e-06, "loss": 24.0975, "step": 244320 }, { "epoch": 0.49356205836366795, "grad_norm": 273.9856872558594, "learning_rate": 6.062423339436701e-06, "loss": 26.4877, "step": 244330 }, { "epoch": 0.4935822589963518, "grad_norm": 248.83633422851562, "learning_rate": 6.062082242125422e-06, "loss": 27.1512, "step": 244340 }, { "epoch": 0.4936024596290356, "grad_norm": 507.28875732421875, "learning_rate": 6.061741139637682e-06, "loss": 21.4276, "step": 244350 }, { "epoch": 0.4936226602617194, "grad_norm": 1421.45849609375, "learning_rate": 6.061400031975147e-06, "loss": 22.241, "step": 244360 }, { "epoch": 0.49364286089440323, "grad_norm": 259.8501892089844, "learning_rate": 6.061058919139474e-06, "loss": 17.8136, "step": 244370 }, { "epoch": 0.49366306152708705, "grad_norm": 608.4784545898438, "learning_rate": 6.060717801132329e-06, "loss": 24.4676, "step": 244380 }, { "epoch": 0.4936832621597708, "grad_norm": 439.1976623535156, "learning_rate": 6.060376677955375e-06, "loss": 11.4727, "step": 244390 }, { "epoch": 0.49370346279245464, "grad_norm": 355.0526428222656, "learning_rate": 6.060035549610275e-06, "loss": 29.9297, "step": 244400 }, { "epoch": 0.49372366342513846, "grad_norm": 439.33917236328125, "learning_rate": 6.0596944160986885e-06, "loss": 26.6631, "step": 244410 }, { "epoch": 0.4937438640578223, "grad_norm": 313.279541015625, "learning_rate": 6.0593532774222796e-06, "loss": 15.9866, "step": 244420 }, { "epoch": 0.4937640646905061, "grad_norm": 462.3720703125, "learning_rate": 6.059012133582713e-06, "loss": 31.3824, "step": 244430 }, { "epoch": 0.4937842653231899, "grad_norm": 561.0098876953125, "learning_rate": 6.058670984581647e-06, "loss": 17.5214, "step": 244440 }, { "epoch": 0.49380446595587374, "grad_norm": 229.17897033691406, "learning_rate": 6.058329830420749e-06, "loss": 20.2344, "step": 244450 }, { "epoch": 0.49382466658855756, "grad_norm": 624.6414794921875, "learning_rate": 6.057988671101679e-06, "loss": 16.3288, "step": 244460 }, { "epoch": 0.4938448672212414, "grad_norm": 838.2815551757812, "learning_rate": 6.057647506626101e-06, "loss": 24.2822, "step": 244470 }, { "epoch": 0.4938650678539252, "grad_norm": 188.3896484375, "learning_rate": 6.057306336995677e-06, "loss": 7.962, "step": 244480 }, { "epoch": 0.493885268486609, "grad_norm": 261.6092529296875, "learning_rate": 6.056965162212072e-06, "loss": 22.7007, "step": 244490 }, { "epoch": 0.49390546911929284, "grad_norm": 3.9006917476654053, "learning_rate": 6.056623982276945e-06, "loss": 14.9307, "step": 244500 }, { "epoch": 0.49392566975197666, "grad_norm": 634.9256591796875, "learning_rate": 6.05628279719196e-06, "loss": 25.0206, "step": 244510 }, { "epoch": 0.4939458703846604, "grad_norm": 368.6424255371094, "learning_rate": 6.0559416069587814e-06, "loss": 11.8245, "step": 244520 }, { "epoch": 0.49396607101734424, "grad_norm": 246.29586791992188, "learning_rate": 6.055600411579072e-06, "loss": 13.7323, "step": 244530 }, { "epoch": 0.49398627165002806, "grad_norm": 645.8884887695312, "learning_rate": 6.055259211054496e-06, "loss": 25.4784, "step": 244540 }, { "epoch": 0.4940064722827119, "grad_norm": 189.35472106933594, "learning_rate": 6.0549180053867114e-06, "loss": 11.4991, "step": 244550 }, { "epoch": 0.4940266729153957, "grad_norm": 3.043370008468628, "learning_rate": 6.054576794577387e-06, "loss": 14.5078, "step": 244560 }, { "epoch": 0.4940468735480795, "grad_norm": 370.82861328125, "learning_rate": 6.054235578628181e-06, "loss": 24.295, "step": 244570 }, { "epoch": 0.49406707418076334, "grad_norm": 152.5736846923828, "learning_rate": 6.053894357540761e-06, "loss": 9.8684, "step": 244580 }, { "epoch": 0.49408727481344716, "grad_norm": 544.0726318359375, "learning_rate": 6.053553131316785e-06, "loss": 18.8866, "step": 244590 }, { "epoch": 0.494107475446131, "grad_norm": 434.5020751953125, "learning_rate": 6.0532118999579206e-06, "loss": 22.273, "step": 244600 }, { "epoch": 0.4941276760788148, "grad_norm": 438.2577209472656, "learning_rate": 6.052870663465829e-06, "loss": 19.9347, "step": 244610 }, { "epoch": 0.4941478767114986, "grad_norm": 195.21304321289062, "learning_rate": 6.0525294218421735e-06, "loss": 12.3461, "step": 244620 }, { "epoch": 0.49416807734418244, "grad_norm": 554.744140625, "learning_rate": 6.052188175088617e-06, "loss": 29.516, "step": 244630 }, { "epoch": 0.4941882779768662, "grad_norm": 140.444580078125, "learning_rate": 6.051846923206824e-06, "loss": 10.9421, "step": 244640 }, { "epoch": 0.49420847860955003, "grad_norm": 567.934814453125, "learning_rate": 6.051505666198454e-06, "loss": 18.0941, "step": 244650 }, { "epoch": 0.49422867924223385, "grad_norm": 302.12542724609375, "learning_rate": 6.051164404065175e-06, "loss": 17.2545, "step": 244660 }, { "epoch": 0.49424887987491767, "grad_norm": 167.5221405029297, "learning_rate": 6.050823136808649e-06, "loss": 16.8512, "step": 244670 }, { "epoch": 0.4942690805076015, "grad_norm": 256.09210205078125, "learning_rate": 6.050481864430536e-06, "loss": 22.8571, "step": 244680 }, { "epoch": 0.4942892811402853, "grad_norm": 413.8460388183594, "learning_rate": 6.050140586932504e-06, "loss": 21.3646, "step": 244690 }, { "epoch": 0.49430948177296913, "grad_norm": 259.9608459472656, "learning_rate": 6.049799304316214e-06, "loss": 18.28, "step": 244700 }, { "epoch": 0.49432968240565295, "grad_norm": 309.09161376953125, "learning_rate": 6.0494580165833275e-06, "loss": 12.0999, "step": 244710 }, { "epoch": 0.49434988303833677, "grad_norm": 35.23161315917969, "learning_rate": 6.049116723735512e-06, "loss": 22.8078, "step": 244720 }, { "epoch": 0.4943700836710206, "grad_norm": 24.270263671875, "learning_rate": 6.048775425774426e-06, "loss": 18.4349, "step": 244730 }, { "epoch": 0.4943902843037044, "grad_norm": 253.10581970214844, "learning_rate": 6.048434122701738e-06, "loss": 23.2509, "step": 244740 }, { "epoch": 0.49441048493638823, "grad_norm": 193.04750061035156, "learning_rate": 6.048092814519109e-06, "loss": 14.6667, "step": 244750 }, { "epoch": 0.49443068556907205, "grad_norm": 55.17918395996094, "learning_rate": 6.047751501228203e-06, "loss": 16.3477, "step": 244760 }, { "epoch": 0.4944508862017558, "grad_norm": 168.7065887451172, "learning_rate": 6.047410182830684e-06, "loss": 23.7309, "step": 244770 }, { "epoch": 0.49447108683443963, "grad_norm": 811.4564208984375, "learning_rate": 6.047068859328213e-06, "loss": 25.548, "step": 244780 }, { "epoch": 0.49449128746712345, "grad_norm": 322.5340270996094, "learning_rate": 6.046727530722456e-06, "loss": 17.0893, "step": 244790 }, { "epoch": 0.4945114880998073, "grad_norm": 417.874755859375, "learning_rate": 6.046386197015076e-06, "loss": 39.5569, "step": 244800 }, { "epoch": 0.4945316887324911, "grad_norm": 485.04205322265625, "learning_rate": 6.046044858207737e-06, "loss": 19.9152, "step": 244810 }, { "epoch": 0.4945518893651749, "grad_norm": 586.47802734375, "learning_rate": 6.045703514302101e-06, "loss": 30.8003, "step": 244820 }, { "epoch": 0.49457208999785873, "grad_norm": 0.0, "learning_rate": 6.045362165299835e-06, "loss": 14.0122, "step": 244830 }, { "epoch": 0.49459229063054255, "grad_norm": 497.6436462402344, "learning_rate": 6.0450208112026e-06, "loss": 24.4296, "step": 244840 }, { "epoch": 0.4946124912632264, "grad_norm": 107.70179748535156, "learning_rate": 6.044679452012059e-06, "loss": 21.6951, "step": 244850 }, { "epoch": 0.4946326918959102, "grad_norm": 1035.3358154296875, "learning_rate": 6.044338087729878e-06, "loss": 39.922, "step": 244860 }, { "epoch": 0.494652892528594, "grad_norm": 615.2496948242188, "learning_rate": 6.04399671835772e-06, "loss": 26.6206, "step": 244870 }, { "epoch": 0.49467309316127783, "grad_norm": 252.15406799316406, "learning_rate": 6.043655343897249e-06, "loss": 14.9972, "step": 244880 }, { "epoch": 0.49469329379396165, "grad_norm": 9.638592720031738, "learning_rate": 6.043313964350126e-06, "loss": 10.2801, "step": 244890 }, { "epoch": 0.4947134944266454, "grad_norm": 297.4844970703125, "learning_rate": 6.04297257971802e-06, "loss": 21.5559, "step": 244900 }, { "epoch": 0.49473369505932924, "grad_norm": 33.992958068847656, "learning_rate": 6.0426311900025905e-06, "loss": 26.3419, "step": 244910 }, { "epoch": 0.49475389569201306, "grad_norm": 322.1072998046875, "learning_rate": 6.042289795205504e-06, "loss": 23.7903, "step": 244920 }, { "epoch": 0.4947740963246969, "grad_norm": 347.6963806152344, "learning_rate": 6.041948395328423e-06, "loss": 15.5776, "step": 244930 }, { "epoch": 0.4947942969573807, "grad_norm": 481.201416015625, "learning_rate": 6.041606990373012e-06, "loss": 18.0799, "step": 244940 }, { "epoch": 0.4948144975900645, "grad_norm": 299.6612548828125, "learning_rate": 6.041265580340935e-06, "loss": 18.2105, "step": 244950 }, { "epoch": 0.49483469822274834, "grad_norm": 0.0, "learning_rate": 6.040924165233856e-06, "loss": 14.3527, "step": 244960 }, { "epoch": 0.49485489885543216, "grad_norm": 501.0664978027344, "learning_rate": 6.040582745053438e-06, "loss": 18.6845, "step": 244970 }, { "epoch": 0.494875099488116, "grad_norm": 568.0910034179688, "learning_rate": 6.040241319801346e-06, "loss": 19.2887, "step": 244980 }, { "epoch": 0.4948953001207998, "grad_norm": 417.7993469238281, "learning_rate": 6.039899889479246e-06, "loss": 17.6804, "step": 244990 }, { "epoch": 0.4949155007534836, "grad_norm": 128.64581298828125, "learning_rate": 6.039558454088796e-06, "loss": 6.8771, "step": 245000 }, { "epoch": 0.49493570138616744, "grad_norm": 101.71302032470703, "learning_rate": 6.039217013631668e-06, "loss": 13.3712, "step": 245010 }, { "epoch": 0.49495590201885126, "grad_norm": 80.38015747070312, "learning_rate": 6.0388755681095216e-06, "loss": 19.9883, "step": 245020 }, { "epoch": 0.494976102651535, "grad_norm": 143.52117919921875, "learning_rate": 6.038534117524021e-06, "loss": 19.8221, "step": 245030 }, { "epoch": 0.49499630328421884, "grad_norm": 601.686767578125, "learning_rate": 6.038192661876832e-06, "loss": 25.4875, "step": 245040 }, { "epoch": 0.49501650391690266, "grad_norm": 464.03424072265625, "learning_rate": 6.0378512011696155e-06, "loss": 14.3996, "step": 245050 }, { "epoch": 0.4950367045495865, "grad_norm": 484.98480224609375, "learning_rate": 6.03750973540404e-06, "loss": 15.8818, "step": 245060 }, { "epoch": 0.4950569051822703, "grad_norm": 37.089900970458984, "learning_rate": 6.037168264581767e-06, "loss": 22.9769, "step": 245070 }, { "epoch": 0.4950771058149541, "grad_norm": 60.080257415771484, "learning_rate": 6.036826788704463e-06, "loss": 11.9262, "step": 245080 }, { "epoch": 0.49509730644763794, "grad_norm": 528.3023681640625, "learning_rate": 6.03648530777379e-06, "loss": 24.5875, "step": 245090 }, { "epoch": 0.49511750708032176, "grad_norm": 225.19271850585938, "learning_rate": 6.036143821791413e-06, "loss": 10.0997, "step": 245100 }, { "epoch": 0.4951377077130056, "grad_norm": 266.2395324707031, "learning_rate": 6.035802330758997e-06, "loss": 15.1098, "step": 245110 }, { "epoch": 0.4951579083456894, "grad_norm": 207.41989135742188, "learning_rate": 6.0354608346782075e-06, "loss": 32.8293, "step": 245120 }, { "epoch": 0.4951781089783732, "grad_norm": 154.92733764648438, "learning_rate": 6.035119333550705e-06, "loss": 15.8353, "step": 245130 }, { "epoch": 0.49519830961105704, "grad_norm": 770.8162231445312, "learning_rate": 6.034777827378157e-06, "loss": 31.304, "step": 245140 }, { "epoch": 0.49521851024374086, "grad_norm": 2.7887516021728516, "learning_rate": 6.03443631616223e-06, "loss": 12.0799, "step": 245150 }, { "epoch": 0.4952387108764246, "grad_norm": 568.9775390625, "learning_rate": 6.034094799904583e-06, "loss": 24.7265, "step": 245160 }, { "epoch": 0.49525891150910845, "grad_norm": 582.31640625, "learning_rate": 6.0337532786068846e-06, "loss": 33.4081, "step": 245170 }, { "epoch": 0.49527911214179227, "grad_norm": 827.0950317382812, "learning_rate": 6.033411752270798e-06, "loss": 16.2784, "step": 245180 }, { "epoch": 0.4952993127744761, "grad_norm": 376.9583740234375, "learning_rate": 6.033070220897988e-06, "loss": 26.7607, "step": 245190 }, { "epoch": 0.4953195134071599, "grad_norm": 850.7732543945312, "learning_rate": 6.032728684490118e-06, "loss": 29.897, "step": 245200 }, { "epoch": 0.4953397140398437, "grad_norm": 354.1410217285156, "learning_rate": 6.032387143048853e-06, "loss": 20.4953, "step": 245210 }, { "epoch": 0.49535991467252755, "grad_norm": 787.7244873046875, "learning_rate": 6.032045596575862e-06, "loss": 25.6331, "step": 245220 }, { "epoch": 0.49538011530521137, "grad_norm": 369.9317626953125, "learning_rate": 6.031704045072803e-06, "loss": 12.0731, "step": 245230 }, { "epoch": 0.4954003159378952, "grad_norm": 848.404296875, "learning_rate": 6.031362488541344e-06, "loss": 21.9786, "step": 245240 }, { "epoch": 0.495420516570579, "grad_norm": 169.3268280029297, "learning_rate": 6.031020926983149e-06, "loss": 14.8847, "step": 245250 }, { "epoch": 0.4954407172032628, "grad_norm": 28.975000381469727, "learning_rate": 6.030679360399883e-06, "loss": 20.2139, "step": 245260 }, { "epoch": 0.49546091783594665, "grad_norm": 76.87944793701172, "learning_rate": 6.030337788793212e-06, "loss": 17.1178, "step": 245270 }, { "epoch": 0.4954811184686304, "grad_norm": 118.92074584960938, "learning_rate": 6.029996212164799e-06, "loss": 18.049, "step": 245280 }, { "epoch": 0.49550131910131423, "grad_norm": 865.7176513671875, "learning_rate": 6.029654630516308e-06, "loss": 24.4408, "step": 245290 }, { "epoch": 0.49552151973399805, "grad_norm": 629.9857177734375, "learning_rate": 6.029313043849407e-06, "loss": 28.9345, "step": 245300 }, { "epoch": 0.49554172036668187, "grad_norm": 344.41119384765625, "learning_rate": 6.02897145216576e-06, "loss": 19.0488, "step": 245310 }, { "epoch": 0.4955619209993657, "grad_norm": 280.0577697753906, "learning_rate": 6.0286298554670275e-06, "loss": 20.772, "step": 245320 }, { "epoch": 0.4955821216320495, "grad_norm": 757.3346557617188, "learning_rate": 6.028288253754882e-06, "loss": 26.6095, "step": 245330 }, { "epoch": 0.49560232226473333, "grad_norm": 819.9552001953125, "learning_rate": 6.02794664703098e-06, "loss": 20.3762, "step": 245340 }, { "epoch": 0.49562252289741715, "grad_norm": 885.9320068359375, "learning_rate": 6.027605035296994e-06, "loss": 40.4341, "step": 245350 }, { "epoch": 0.49564272353010097, "grad_norm": 553.4578247070312, "learning_rate": 6.027263418554585e-06, "loss": 18.8369, "step": 245360 }, { "epoch": 0.4956629241627848, "grad_norm": 336.39227294921875, "learning_rate": 6.026921796805417e-06, "loss": 24.2733, "step": 245370 }, { "epoch": 0.4956831247954686, "grad_norm": 319.5438537597656, "learning_rate": 6.026580170051158e-06, "loss": 17.5498, "step": 245380 }, { "epoch": 0.49570332542815243, "grad_norm": 568.3425903320312, "learning_rate": 6.026238538293472e-06, "loss": 20.4626, "step": 245390 }, { "epoch": 0.49572352606083625, "grad_norm": 375.4813537597656, "learning_rate": 6.025896901534023e-06, "loss": 17.9613, "step": 245400 }, { "epoch": 0.49574372669352, "grad_norm": 345.9901428222656, "learning_rate": 6.025555259774478e-06, "loss": 29.2855, "step": 245410 }, { "epoch": 0.49576392732620383, "grad_norm": 692.1480102539062, "learning_rate": 6.025213613016501e-06, "loss": 21.2442, "step": 245420 }, { "epoch": 0.49578412795888765, "grad_norm": 4.171411514282227, "learning_rate": 6.024871961261756e-06, "loss": 16.2202, "step": 245430 }, { "epoch": 0.4958043285915715, "grad_norm": 748.0377197265625, "learning_rate": 6.024530304511911e-06, "loss": 23.804, "step": 245440 }, { "epoch": 0.4958245292242553, "grad_norm": 177.20375061035156, "learning_rate": 6.024188642768628e-06, "loss": 17.9429, "step": 245450 }, { "epoch": 0.4958447298569391, "grad_norm": 311.2812194824219, "learning_rate": 6.023846976033574e-06, "loss": 13.1427, "step": 245460 }, { "epoch": 0.49586493048962293, "grad_norm": 366.9995422363281, "learning_rate": 6.0235053043084155e-06, "loss": 38.7233, "step": 245470 }, { "epoch": 0.49588513112230675, "grad_norm": 176.72760009765625, "learning_rate": 6.023163627594813e-06, "loss": 24.8769, "step": 245480 }, { "epoch": 0.4959053317549906, "grad_norm": 608.5573120117188, "learning_rate": 6.022821945894439e-06, "loss": 25.3785, "step": 245490 }, { "epoch": 0.4959255323876744, "grad_norm": 260.3470764160156, "learning_rate": 6.022480259208951e-06, "loss": 28.7517, "step": 245500 }, { "epoch": 0.4959457330203582, "grad_norm": 43.951263427734375, "learning_rate": 6.022138567540023e-06, "loss": 16.2127, "step": 245510 }, { "epoch": 0.49596593365304203, "grad_norm": 650.9738159179688, "learning_rate": 6.021796870889311e-06, "loss": 20.0046, "step": 245520 }, { "epoch": 0.49598613428572585, "grad_norm": 903.1184692382812, "learning_rate": 6.0214551692584875e-06, "loss": 34.6528, "step": 245530 }, { "epoch": 0.4960063349184096, "grad_norm": 209.4825439453125, "learning_rate": 6.021113462649215e-06, "loss": 14.7326, "step": 245540 }, { "epoch": 0.49602653555109344, "grad_norm": 390.3143615722656, "learning_rate": 6.020771751063159e-06, "loss": 22.1797, "step": 245550 }, { "epoch": 0.49604673618377726, "grad_norm": 700.5403442382812, "learning_rate": 6.020430034501986e-06, "loss": 24.8418, "step": 245560 }, { "epoch": 0.4960669368164611, "grad_norm": 308.3287048339844, "learning_rate": 6.02008831296736e-06, "loss": 18.195, "step": 245570 }, { "epoch": 0.4960871374491449, "grad_norm": 594.5611572265625, "learning_rate": 6.019746586460947e-06, "loss": 19.572, "step": 245580 }, { "epoch": 0.4961073380818287, "grad_norm": 128.37220764160156, "learning_rate": 6.019404854984413e-06, "loss": 18.2383, "step": 245590 }, { "epoch": 0.49612753871451254, "grad_norm": 186.79930114746094, "learning_rate": 6.019063118539425e-06, "loss": 10.4839, "step": 245600 }, { "epoch": 0.49614773934719636, "grad_norm": 349.23187255859375, "learning_rate": 6.018721377127644e-06, "loss": 24.1001, "step": 245610 }, { "epoch": 0.4961679399798802, "grad_norm": 326.8674621582031, "learning_rate": 6.018379630750741e-06, "loss": 13.3707, "step": 245620 }, { "epoch": 0.496188140612564, "grad_norm": 289.19384765625, "learning_rate": 6.018037879410379e-06, "loss": 21.4503, "step": 245630 }, { "epoch": 0.4962083412452478, "grad_norm": 779.3234252929688, "learning_rate": 6.017696123108223e-06, "loss": 22.3518, "step": 245640 }, { "epoch": 0.49622854187793164, "grad_norm": 441.6877746582031, "learning_rate": 6.01735436184594e-06, "loss": 34.47, "step": 245650 }, { "epoch": 0.49624874251061546, "grad_norm": 401.6028137207031, "learning_rate": 6.0170125956251935e-06, "loss": 14.7449, "step": 245660 }, { "epoch": 0.4962689431432992, "grad_norm": 321.62713623046875, "learning_rate": 6.016670824447653e-06, "loss": 15.5, "step": 245670 }, { "epoch": 0.49628914377598304, "grad_norm": 222.53184509277344, "learning_rate": 6.0163290483149826e-06, "loss": 14.4939, "step": 245680 }, { "epoch": 0.49630934440866686, "grad_norm": 392.558349609375, "learning_rate": 6.0159872672288464e-06, "loss": 16.2634, "step": 245690 }, { "epoch": 0.4963295450413507, "grad_norm": 132.4796905517578, "learning_rate": 6.015645481190912e-06, "loss": 12.5828, "step": 245700 }, { "epoch": 0.4963497456740345, "grad_norm": 474.45721435546875, "learning_rate": 6.0153036902028435e-06, "loss": 28.608, "step": 245710 }, { "epoch": 0.4963699463067183, "grad_norm": 367.1876525878906, "learning_rate": 6.01496189426631e-06, "loss": 28.0048, "step": 245720 }, { "epoch": 0.49639014693940214, "grad_norm": 465.5663146972656, "learning_rate": 6.014620093382975e-06, "loss": 24.2542, "step": 245730 }, { "epoch": 0.49641034757208596, "grad_norm": 300.58331298828125, "learning_rate": 6.014278287554503e-06, "loss": 23.917, "step": 245740 }, { "epoch": 0.4964305482047698, "grad_norm": 362.2306823730469, "learning_rate": 6.013936476782563e-06, "loss": 23.4033, "step": 245750 }, { "epoch": 0.4964507488374536, "grad_norm": 233.8451385498047, "learning_rate": 6.01359466106882e-06, "loss": 17.0515, "step": 245760 }, { "epoch": 0.4964709494701374, "grad_norm": 190.70643615722656, "learning_rate": 6.013252840414938e-06, "loss": 15.8295, "step": 245770 }, { "epoch": 0.49649115010282124, "grad_norm": 432.47198486328125, "learning_rate": 6.012911014822586e-06, "loss": 18.9468, "step": 245780 }, { "epoch": 0.49651135073550506, "grad_norm": 445.8046875, "learning_rate": 6.012569184293427e-06, "loss": 16.7549, "step": 245790 }, { "epoch": 0.49653155136818883, "grad_norm": 206.34117126464844, "learning_rate": 6.01222734882913e-06, "loss": 17.8298, "step": 245800 }, { "epoch": 0.49655175200087265, "grad_norm": 230.3909454345703, "learning_rate": 6.0118855084313595e-06, "loss": 16.422, "step": 245810 }, { "epoch": 0.49657195263355647, "grad_norm": 522.9651489257812, "learning_rate": 6.011543663101781e-06, "loss": 23.1855, "step": 245820 }, { "epoch": 0.4965921532662403, "grad_norm": 408.2215881347656, "learning_rate": 6.011201812842062e-06, "loss": 10.4352, "step": 245830 }, { "epoch": 0.4966123538989241, "grad_norm": 618.5833740234375, "learning_rate": 6.010859957653869e-06, "loss": 28.716, "step": 245840 }, { "epoch": 0.49663255453160793, "grad_norm": 220.0326690673828, "learning_rate": 6.010518097538866e-06, "loss": 11.8949, "step": 245850 }, { "epoch": 0.49665275516429175, "grad_norm": 402.2642822265625, "learning_rate": 6.010176232498719e-06, "loss": 22.2256, "step": 245860 }, { "epoch": 0.49667295579697557, "grad_norm": 377.92431640625, "learning_rate": 6.009834362535097e-06, "loss": 18.0756, "step": 245870 }, { "epoch": 0.4966931564296594, "grad_norm": 485.857421875, "learning_rate": 6.009492487649666e-06, "loss": 16.4151, "step": 245880 }, { "epoch": 0.4967133570623432, "grad_norm": 932.8764038085938, "learning_rate": 6.00915060784409e-06, "loss": 23.2241, "step": 245890 }, { "epoch": 0.49673355769502703, "grad_norm": 539.0462036132812, "learning_rate": 6.008808723120035e-06, "loss": 22.4672, "step": 245900 }, { "epoch": 0.49675375832771085, "grad_norm": 514.6181030273438, "learning_rate": 6.0084668334791695e-06, "loss": 13.4019, "step": 245910 }, { "epoch": 0.4967739589603946, "grad_norm": 276.28265380859375, "learning_rate": 6.0081249389231615e-06, "loss": 15.6526, "step": 245920 }, { "epoch": 0.49679415959307843, "grad_norm": 175.6931610107422, "learning_rate": 6.00778303945367e-06, "loss": 11.918, "step": 245930 }, { "epoch": 0.49681436022576225, "grad_norm": 493.6187438964844, "learning_rate": 6.007441135072371e-06, "loss": 36.7857, "step": 245940 }, { "epoch": 0.4968345608584461, "grad_norm": 29.675748825073242, "learning_rate": 6.007099225780922e-06, "loss": 26.3928, "step": 245950 }, { "epoch": 0.4968547614911299, "grad_norm": 336.9743957519531, "learning_rate": 6.0067573115809965e-06, "loss": 29.182, "step": 245960 }, { "epoch": 0.4968749621238137, "grad_norm": 319.0456237792969, "learning_rate": 6.006415392474256e-06, "loss": 22.9448, "step": 245970 }, { "epoch": 0.49689516275649753, "grad_norm": 780.112548828125, "learning_rate": 6.00607346846237e-06, "loss": 29.6289, "step": 245980 }, { "epoch": 0.49691536338918135, "grad_norm": 108.16007232666016, "learning_rate": 6.005731539547004e-06, "loss": 23.4389, "step": 245990 }, { "epoch": 0.4969355640218652, "grad_norm": 53.6325569152832, "learning_rate": 6.005389605729824e-06, "loss": 13.6468, "step": 246000 }, { "epoch": 0.496955764654549, "grad_norm": 2831.445556640625, "learning_rate": 6.005047667012498e-06, "loss": 34.1367, "step": 246010 }, { "epoch": 0.4969759652872328, "grad_norm": 519.7115478515625, "learning_rate": 6.00470572339669e-06, "loss": 36.3964, "step": 246020 }, { "epoch": 0.49699616591991663, "grad_norm": 222.3622589111328, "learning_rate": 6.004363774884069e-06, "loss": 11.4347, "step": 246030 }, { "epoch": 0.49701636655260045, "grad_norm": 364.50537109375, "learning_rate": 6.0040218214763e-06, "loss": 27.0293, "step": 246040 }, { "epoch": 0.4970365671852842, "grad_norm": 283.5711669921875, "learning_rate": 6.003679863175052e-06, "loss": 14.0792, "step": 246050 }, { "epoch": 0.49705676781796804, "grad_norm": 416.9042053222656, "learning_rate": 6.003337899981989e-06, "loss": 14.5198, "step": 246060 }, { "epoch": 0.49707696845065186, "grad_norm": 477.2958679199219, "learning_rate": 6.002995931898779e-06, "loss": 25.2685, "step": 246070 }, { "epoch": 0.4970971690833357, "grad_norm": 440.80322265625, "learning_rate": 6.00265395892709e-06, "loss": 21.909, "step": 246080 }, { "epoch": 0.4971173697160195, "grad_norm": 544.5230102539062, "learning_rate": 6.002311981068584e-06, "loss": 27.2489, "step": 246090 }, { "epoch": 0.4971375703487033, "grad_norm": 136.77589416503906, "learning_rate": 6.001969998324932e-06, "loss": 31.3055, "step": 246100 }, { "epoch": 0.49715777098138714, "grad_norm": 658.7988891601562, "learning_rate": 6.0016280106978e-06, "loss": 31.7758, "step": 246110 }, { "epoch": 0.49717797161407096, "grad_norm": 379.6677551269531, "learning_rate": 6.001286018188856e-06, "loss": 13.2379, "step": 246120 }, { "epoch": 0.4971981722467548, "grad_norm": 260.6626892089844, "learning_rate": 6.000944020799764e-06, "loss": 8.8193, "step": 246130 }, { "epoch": 0.4972183728794386, "grad_norm": 377.12347412109375, "learning_rate": 6.000602018532193e-06, "loss": 26.2481, "step": 246140 }, { "epoch": 0.4972385735121224, "grad_norm": 6.269888877868652, "learning_rate": 6.000260011387809e-06, "loss": 17.3226, "step": 246150 }, { "epoch": 0.49725877414480624, "grad_norm": 206.4148406982422, "learning_rate": 5.999917999368278e-06, "loss": 22.2819, "step": 246160 }, { "epoch": 0.49727897477749006, "grad_norm": 248.40609741210938, "learning_rate": 5.999575982475269e-06, "loss": 26.5883, "step": 246170 }, { "epoch": 0.4972991754101738, "grad_norm": 159.02389526367188, "learning_rate": 5.999233960710447e-06, "loss": 15.2111, "step": 246180 }, { "epoch": 0.49731937604285764, "grad_norm": 361.6206359863281, "learning_rate": 5.99889193407548e-06, "loss": 13.3076, "step": 246190 }, { "epoch": 0.49733957667554146, "grad_norm": 463.2092590332031, "learning_rate": 5.9985499025720354e-06, "loss": 16.4278, "step": 246200 }, { "epoch": 0.4973597773082253, "grad_norm": 1122.7664794921875, "learning_rate": 5.998207866201781e-06, "loss": 51.8289, "step": 246210 }, { "epoch": 0.4973799779409091, "grad_norm": 850.292236328125, "learning_rate": 5.99786582496638e-06, "loss": 25.6876, "step": 246220 }, { "epoch": 0.4974001785735929, "grad_norm": 540.2310180664062, "learning_rate": 5.9975237788675034e-06, "loss": 20.2985, "step": 246230 }, { "epoch": 0.49742037920627674, "grad_norm": 601.8073120117188, "learning_rate": 5.997181727906816e-06, "loss": 27.2044, "step": 246240 }, { "epoch": 0.49744057983896056, "grad_norm": 1.2072869539260864, "learning_rate": 5.996839672085986e-06, "loss": 17.3348, "step": 246250 }, { "epoch": 0.4974607804716444, "grad_norm": 472.5961608886719, "learning_rate": 5.996497611406682e-06, "loss": 20.9375, "step": 246260 }, { "epoch": 0.4974809811043282, "grad_norm": 66.0411605834961, "learning_rate": 5.996155545870566e-06, "loss": 7.7181, "step": 246270 }, { "epoch": 0.497501181737012, "grad_norm": 386.55584716796875, "learning_rate": 5.995813475479313e-06, "loss": 10.0573, "step": 246280 }, { "epoch": 0.49752138236969584, "grad_norm": 406.94683837890625, "learning_rate": 5.995471400234584e-06, "loss": 13.8961, "step": 246290 }, { "epoch": 0.49754158300237966, "grad_norm": 317.17633056640625, "learning_rate": 5.995129320138047e-06, "loss": 11.3014, "step": 246300 }, { "epoch": 0.4975617836350634, "grad_norm": 399.6728820800781, "learning_rate": 5.994787235191372e-06, "loss": 13.9557, "step": 246310 }, { "epoch": 0.49758198426774725, "grad_norm": 367.6829833984375, "learning_rate": 5.994445145396223e-06, "loss": 23.8694, "step": 246320 }, { "epoch": 0.49760218490043107, "grad_norm": 453.9792785644531, "learning_rate": 5.994103050754271e-06, "loss": 22.6538, "step": 246330 }, { "epoch": 0.4976223855331149, "grad_norm": 559.6007690429688, "learning_rate": 5.99376095126718e-06, "loss": 26.9563, "step": 246340 }, { "epoch": 0.4976425861657987, "grad_norm": 649.7808227539062, "learning_rate": 5.993418846936619e-06, "loss": 35.5436, "step": 246350 }, { "epoch": 0.4976627867984825, "grad_norm": 264.1085510253906, "learning_rate": 5.993076737764254e-06, "loss": 28.9309, "step": 246360 }, { "epoch": 0.49768298743116635, "grad_norm": 277.17120361328125, "learning_rate": 5.9927346237517554e-06, "loss": 17.9807, "step": 246370 }, { "epoch": 0.49770318806385017, "grad_norm": 223.109130859375, "learning_rate": 5.992392504900786e-06, "loss": 11.1396, "step": 246380 }, { "epoch": 0.497723388696534, "grad_norm": 549.0020751953125, "learning_rate": 5.9920503812130196e-06, "loss": 20.5239, "step": 246390 }, { "epoch": 0.4977435893292178, "grad_norm": 256.79583740234375, "learning_rate": 5.991708252690117e-06, "loss": 22.6415, "step": 246400 }, { "epoch": 0.4977637899619016, "grad_norm": 874.1477661132812, "learning_rate": 5.991366119333749e-06, "loss": 19.1742, "step": 246410 }, { "epoch": 0.49778399059458545, "grad_norm": 209.1161346435547, "learning_rate": 5.991023981145585e-06, "loss": 16.0386, "step": 246420 }, { "epoch": 0.4978041912272692, "grad_norm": 412.027587890625, "learning_rate": 5.990681838127287e-06, "loss": 10.3335, "step": 246430 }, { "epoch": 0.49782439185995303, "grad_norm": 180.9374542236328, "learning_rate": 5.990339690280528e-06, "loss": 15.9683, "step": 246440 }, { "epoch": 0.49784459249263685, "grad_norm": 772.1251831054688, "learning_rate": 5.989997537606973e-06, "loss": 27.9863, "step": 246450 }, { "epoch": 0.49786479312532067, "grad_norm": 225.46536254882812, "learning_rate": 5.9896553801082906e-06, "loss": 14.2124, "step": 246460 }, { "epoch": 0.4978849937580045, "grad_norm": 174.2469482421875, "learning_rate": 5.989313217786146e-06, "loss": 18.991, "step": 246470 }, { "epoch": 0.4979051943906883, "grad_norm": 240.0699462890625, "learning_rate": 5.988971050642211e-06, "loss": 22.1914, "step": 246480 }, { "epoch": 0.49792539502337213, "grad_norm": 232.75013732910156, "learning_rate": 5.98862887867815e-06, "loss": 20.932, "step": 246490 }, { "epoch": 0.49794559565605595, "grad_norm": 236.2301788330078, "learning_rate": 5.988286701895631e-06, "loss": 29.7626, "step": 246500 }, { "epoch": 0.49796579628873977, "grad_norm": 1126.671875, "learning_rate": 5.987944520296324e-06, "loss": 17.8402, "step": 246510 }, { "epoch": 0.4979859969214236, "grad_norm": 151.82958984375, "learning_rate": 5.987602333881894e-06, "loss": 6.5217, "step": 246520 }, { "epoch": 0.4980061975541074, "grad_norm": 62.12467956542969, "learning_rate": 5.987260142654013e-06, "loss": 18.377, "step": 246530 }, { "epoch": 0.49802639818679123, "grad_norm": 371.71392822265625, "learning_rate": 5.986917946614341e-06, "loss": 10.6581, "step": 246540 }, { "epoch": 0.49804659881947505, "grad_norm": 400.0055847167969, "learning_rate": 5.986575745764553e-06, "loss": 22.4769, "step": 246550 }, { "epoch": 0.4980667994521588, "grad_norm": 335.1949157714844, "learning_rate": 5.986233540106315e-06, "loss": 21.2484, "step": 246560 }, { "epoch": 0.49808700008484263, "grad_norm": 464.89678955078125, "learning_rate": 5.985891329641294e-06, "loss": 23.757, "step": 246570 }, { "epoch": 0.49810720071752645, "grad_norm": 457.4198303222656, "learning_rate": 5.985549114371158e-06, "loss": 27.9353, "step": 246580 }, { "epoch": 0.4981274013502103, "grad_norm": 350.82025146484375, "learning_rate": 5.985206894297575e-06, "loss": 25.8677, "step": 246590 }, { "epoch": 0.4981476019828941, "grad_norm": 319.5579833984375, "learning_rate": 5.984864669422214e-06, "loss": 21.2135, "step": 246600 }, { "epoch": 0.4981678026155779, "grad_norm": 274.120849609375, "learning_rate": 5.9845224397467415e-06, "loss": 20.1567, "step": 246610 }, { "epoch": 0.49818800324826173, "grad_norm": 688.2853393554688, "learning_rate": 5.984180205272826e-06, "loss": 36.7907, "step": 246620 }, { "epoch": 0.49820820388094555, "grad_norm": 60.4741096496582, "learning_rate": 5.9838379660021354e-06, "loss": 22.8639, "step": 246630 }, { "epoch": 0.4982284045136294, "grad_norm": 178.92361450195312, "learning_rate": 5.983495721936337e-06, "loss": 18.5735, "step": 246640 }, { "epoch": 0.4982486051463132, "grad_norm": 42.693153381347656, "learning_rate": 5.9831534730771e-06, "loss": 18.6057, "step": 246650 }, { "epoch": 0.498268805778997, "grad_norm": 119.7516860961914, "learning_rate": 5.982811219426095e-06, "loss": 8.0343, "step": 246660 }, { "epoch": 0.49828900641168083, "grad_norm": 23.96198844909668, "learning_rate": 5.982468960984984e-06, "loss": 23.6935, "step": 246670 }, { "epoch": 0.49830920704436465, "grad_norm": 732.6522827148438, "learning_rate": 5.9821266977554395e-06, "loss": 24.4514, "step": 246680 }, { "epoch": 0.4983294076770484, "grad_norm": 169.86744689941406, "learning_rate": 5.981784429739129e-06, "loss": 8.1248, "step": 246690 }, { "epoch": 0.49834960830973224, "grad_norm": 861.8601684570312, "learning_rate": 5.98144215693772e-06, "loss": 22.5713, "step": 246700 }, { "epoch": 0.49836980894241606, "grad_norm": 105.1708984375, "learning_rate": 5.981099879352882e-06, "loss": 11.4246, "step": 246710 }, { "epoch": 0.4983900095750999, "grad_norm": 41.55343246459961, "learning_rate": 5.9807575969862796e-06, "loss": 12.4922, "step": 246720 }, { "epoch": 0.4984102102077837, "grad_norm": 314.224853515625, "learning_rate": 5.980415309839586e-06, "loss": 21.6313, "step": 246730 }, { "epoch": 0.4984304108404675, "grad_norm": 238.9948272705078, "learning_rate": 5.9800730179144665e-06, "loss": 18.5021, "step": 246740 }, { "epoch": 0.49845061147315134, "grad_norm": 353.32952880859375, "learning_rate": 5.979730721212589e-06, "loss": 20.3609, "step": 246750 }, { "epoch": 0.49847081210583516, "grad_norm": 521.1556396484375, "learning_rate": 5.979388419735625e-06, "loss": 12.2863, "step": 246760 }, { "epoch": 0.498491012738519, "grad_norm": 656.0430297851562, "learning_rate": 5.979046113485237e-06, "loss": 21.5405, "step": 246770 }, { "epoch": 0.4985112133712028, "grad_norm": 234.09738159179688, "learning_rate": 5.978703802463101e-06, "loss": 22.0934, "step": 246780 }, { "epoch": 0.4985314140038866, "grad_norm": 338.0233459472656, "learning_rate": 5.9783614866708785e-06, "loss": 27.4046, "step": 246790 }, { "epoch": 0.49855161463657044, "grad_norm": 473.8854064941406, "learning_rate": 5.978019166110242e-06, "loss": 16.6194, "step": 246800 }, { "epoch": 0.49857181526925426, "grad_norm": 235.27053833007812, "learning_rate": 5.977676840782858e-06, "loss": 29.6591, "step": 246810 }, { "epoch": 0.498592015901938, "grad_norm": 297.90838623046875, "learning_rate": 5.977334510690397e-06, "loss": 21.9008, "step": 246820 }, { "epoch": 0.49861221653462184, "grad_norm": 1039.3126220703125, "learning_rate": 5.9769921758345254e-06, "loss": 26.4737, "step": 246830 }, { "epoch": 0.49863241716730566, "grad_norm": 552.1608276367188, "learning_rate": 5.976649836216912e-06, "loss": 15.7347, "step": 246840 }, { "epoch": 0.4986526177999895, "grad_norm": 323.9834289550781, "learning_rate": 5.976307491839226e-06, "loss": 31.6713, "step": 246850 }, { "epoch": 0.4986728184326733, "grad_norm": 341.1391906738281, "learning_rate": 5.975965142703135e-06, "loss": 37.8399, "step": 246860 }, { "epoch": 0.4986930190653571, "grad_norm": 399.14068603515625, "learning_rate": 5.97562278881031e-06, "loss": 16.2871, "step": 246870 }, { "epoch": 0.49871321969804094, "grad_norm": 130.58828735351562, "learning_rate": 5.975280430162416e-06, "loss": 21.46, "step": 246880 }, { "epoch": 0.49873342033072476, "grad_norm": 412.4732666015625, "learning_rate": 5.974938066761124e-06, "loss": 17.2512, "step": 246890 }, { "epoch": 0.4987536209634086, "grad_norm": 516.6444091796875, "learning_rate": 5.974595698608103e-06, "loss": 10.0092, "step": 246900 }, { "epoch": 0.4987738215960924, "grad_norm": 509.6756896972656, "learning_rate": 5.974253325705021e-06, "loss": 16.4154, "step": 246910 }, { "epoch": 0.4987940222287762, "grad_norm": 544.9634399414062, "learning_rate": 5.973910948053545e-06, "loss": 231.9874, "step": 246920 }, { "epoch": 0.49881422286146004, "grad_norm": 778.5720825195312, "learning_rate": 5.973568565655345e-06, "loss": 32.9222, "step": 246930 }, { "epoch": 0.49883442349414386, "grad_norm": 67.91162109375, "learning_rate": 5.973226178512093e-06, "loss": 11.5947, "step": 246940 }, { "epoch": 0.49885462412682763, "grad_norm": 171.77670288085938, "learning_rate": 5.972883786625452e-06, "loss": 20.4505, "step": 246950 }, { "epoch": 0.49887482475951145, "grad_norm": 252.06382751464844, "learning_rate": 5.972541389997093e-06, "loss": 28.5218, "step": 246960 }, { "epoch": 0.49889502539219527, "grad_norm": 397.5555114746094, "learning_rate": 5.972198988628686e-06, "loss": 18.3177, "step": 246970 }, { "epoch": 0.4989152260248791, "grad_norm": 260.4038391113281, "learning_rate": 5.9718565825219e-06, "loss": 15.8065, "step": 246980 }, { "epoch": 0.4989354266575629, "grad_norm": 368.58477783203125, "learning_rate": 5.971514171678401e-06, "loss": 14.7694, "step": 246990 }, { "epoch": 0.49895562729024673, "grad_norm": 570.1238403320312, "learning_rate": 5.97117175609986e-06, "loss": 22.3038, "step": 247000 }, { "epoch": 0.49897582792293055, "grad_norm": 369.3289489746094, "learning_rate": 5.970829335787946e-06, "loss": 14.8231, "step": 247010 }, { "epoch": 0.49899602855561437, "grad_norm": 505.1964416503906, "learning_rate": 5.9704869107443285e-06, "loss": 25.5647, "step": 247020 }, { "epoch": 0.4990162291882982, "grad_norm": 618.31396484375, "learning_rate": 5.970144480970676e-06, "loss": 19.858, "step": 247030 }, { "epoch": 0.499036429820982, "grad_norm": 393.52008056640625, "learning_rate": 5.969802046468655e-06, "loss": 29.8178, "step": 247040 }, { "epoch": 0.49905663045366583, "grad_norm": 172.65785217285156, "learning_rate": 5.969459607239938e-06, "loss": 9.4711, "step": 247050 }, { "epoch": 0.49907683108634965, "grad_norm": 759.6141357421875, "learning_rate": 5.969117163286191e-06, "loss": 46.934, "step": 247060 }, { "epoch": 0.4990970317190334, "grad_norm": 377.9792785644531, "learning_rate": 5.968774714609086e-06, "loss": 29.1313, "step": 247070 }, { "epoch": 0.49911723235171723, "grad_norm": 218.08351135253906, "learning_rate": 5.96843226121029e-06, "loss": 13.6984, "step": 247080 }, { "epoch": 0.49913743298440105, "grad_norm": 219.00875854492188, "learning_rate": 5.968089803091471e-06, "loss": 44.8839, "step": 247090 }, { "epoch": 0.4991576336170849, "grad_norm": 50.55318069458008, "learning_rate": 5.967747340254303e-06, "loss": 19.8831, "step": 247100 }, { "epoch": 0.4991778342497687, "grad_norm": 2.389132499694824, "learning_rate": 5.967404872700449e-06, "loss": 23.1413, "step": 247110 }, { "epoch": 0.4991980348824525, "grad_norm": 376.08905029296875, "learning_rate": 5.967062400431583e-06, "loss": 21.1722, "step": 247120 }, { "epoch": 0.49921823551513633, "grad_norm": 579.8134155273438, "learning_rate": 5.96671992344937e-06, "loss": 17.7002, "step": 247130 }, { "epoch": 0.49923843614782015, "grad_norm": 359.7259826660156, "learning_rate": 5.966377441755482e-06, "loss": 24.0683, "step": 247140 }, { "epoch": 0.499258636780504, "grad_norm": 246.16751098632812, "learning_rate": 5.966034955351588e-06, "loss": 19.4919, "step": 247150 }, { "epoch": 0.4992788374131878, "grad_norm": 211.00054931640625, "learning_rate": 5.965692464239358e-06, "loss": 22.6834, "step": 247160 }, { "epoch": 0.4992990380458716, "grad_norm": 0.9173119068145752, "learning_rate": 5.965349968420458e-06, "loss": 14.9866, "step": 247170 }, { "epoch": 0.49931923867855543, "grad_norm": 418.03851318359375, "learning_rate": 5.965007467896561e-06, "loss": 25.7016, "step": 247180 }, { "epoch": 0.49933943931123925, "grad_norm": 340.6460876464844, "learning_rate": 5.9646649626693335e-06, "loss": 11.2745, "step": 247190 }, { "epoch": 0.499359639943923, "grad_norm": 226.1961669921875, "learning_rate": 5.964322452740445e-06, "loss": 27.3938, "step": 247200 }, { "epoch": 0.49937984057660684, "grad_norm": 790.119384765625, "learning_rate": 5.963979938111569e-06, "loss": 28.7273, "step": 247210 }, { "epoch": 0.49940004120929066, "grad_norm": 382.8558654785156, "learning_rate": 5.9636374187843686e-06, "loss": 29.2865, "step": 247220 }, { "epoch": 0.4994202418419745, "grad_norm": 1164.3673095703125, "learning_rate": 5.963294894760518e-06, "loss": 22.1883, "step": 247230 }, { "epoch": 0.4994404424746583, "grad_norm": 700.4686889648438, "learning_rate": 5.962952366041685e-06, "loss": 16.1192, "step": 247240 }, { "epoch": 0.4994606431073421, "grad_norm": 296.78546142578125, "learning_rate": 5.962609832629538e-06, "loss": 14.6575, "step": 247250 }, { "epoch": 0.49948084374002594, "grad_norm": 8.724777221679688, "learning_rate": 5.962267294525747e-06, "loss": 26.685, "step": 247260 }, { "epoch": 0.49950104437270976, "grad_norm": 466.71270751953125, "learning_rate": 5.961924751731985e-06, "loss": 27.2313, "step": 247270 }, { "epoch": 0.4995212450053936, "grad_norm": 497.3843078613281, "learning_rate": 5.961582204249915e-06, "loss": 30.4072, "step": 247280 }, { "epoch": 0.4995414456380774, "grad_norm": 196.78329467773438, "learning_rate": 5.961239652081211e-06, "loss": 13.2455, "step": 247290 }, { "epoch": 0.4995616462707612, "grad_norm": 404.5957946777344, "learning_rate": 5.960897095227541e-06, "loss": 12.8591, "step": 247300 }, { "epoch": 0.49958184690344504, "grad_norm": 254.23377990722656, "learning_rate": 5.960554533690576e-06, "loss": 27.1197, "step": 247310 }, { "epoch": 0.49960204753612886, "grad_norm": 0.0, "learning_rate": 5.9602119674719846e-06, "loss": 27.6494, "step": 247320 }, { "epoch": 0.4996222481688126, "grad_norm": 210.02853393554688, "learning_rate": 5.959869396573435e-06, "loss": 12.8636, "step": 247330 }, { "epoch": 0.49964244880149644, "grad_norm": 314.3268737792969, "learning_rate": 5.959526820996602e-06, "loss": 17.5871, "step": 247340 }, { "epoch": 0.49966264943418026, "grad_norm": 675.576904296875, "learning_rate": 5.959184240743149e-06, "loss": 21.24, "step": 247350 }, { "epoch": 0.4996828500668641, "grad_norm": 470.36102294921875, "learning_rate": 5.958841655814749e-06, "loss": 9.8373, "step": 247360 }, { "epoch": 0.4997030506995479, "grad_norm": 463.9069519042969, "learning_rate": 5.958499066213071e-06, "loss": 18.167, "step": 247370 }, { "epoch": 0.4997232513322317, "grad_norm": 44.60001754760742, "learning_rate": 5.958156471939783e-06, "loss": 18.1484, "step": 247380 }, { "epoch": 0.49974345196491554, "grad_norm": 105.177490234375, "learning_rate": 5.95781387299656e-06, "loss": 23.1097, "step": 247390 }, { "epoch": 0.49976365259759936, "grad_norm": 214.38209533691406, "learning_rate": 5.957471269385065e-06, "loss": 13.2777, "step": 247400 }, { "epoch": 0.4997838532302832, "grad_norm": 262.89080810546875, "learning_rate": 5.957128661106973e-06, "loss": 32.9925, "step": 247410 }, { "epoch": 0.499804053862967, "grad_norm": 761.3569946289062, "learning_rate": 5.956786048163951e-06, "loss": 35.6258, "step": 247420 }, { "epoch": 0.4998242544956508, "grad_norm": 776.53662109375, "learning_rate": 5.9564434305576726e-06, "loss": 10.4372, "step": 247430 }, { "epoch": 0.49984445512833464, "grad_norm": 380.9548034667969, "learning_rate": 5.956100808289802e-06, "loss": 8.5099, "step": 247440 }, { "epoch": 0.49986465576101846, "grad_norm": 481.3288269042969, "learning_rate": 5.955758181362012e-06, "loss": 23.8243, "step": 247450 }, { "epoch": 0.4998848563937022, "grad_norm": 415.660888671875, "learning_rate": 5.955415549775975e-06, "loss": 34.6802, "step": 247460 }, { "epoch": 0.49990505702638605, "grad_norm": 307.20465087890625, "learning_rate": 5.955072913533357e-06, "loss": 26.1964, "step": 247470 }, { "epoch": 0.49992525765906987, "grad_norm": 128.6803741455078, "learning_rate": 5.954730272635829e-06, "loss": 19.4235, "step": 247480 }, { "epoch": 0.4999454582917537, "grad_norm": 169.08700561523438, "learning_rate": 5.954387627085061e-06, "loss": 13.3309, "step": 247490 }, { "epoch": 0.4999656589244375, "grad_norm": 7.09989070892334, "learning_rate": 5.954044976882725e-06, "loss": 10.8221, "step": 247500 }, { "epoch": 0.4999858595571213, "grad_norm": 669.5408325195312, "learning_rate": 5.953702322030489e-06, "loss": 12.0256, "step": 247510 }, { "epoch": 0.5000060601898051, "grad_norm": 289.7550964355469, "learning_rate": 5.9533596625300224e-06, "loss": 12.2125, "step": 247520 }, { "epoch": 0.5000262608224889, "grad_norm": 31.32509422302246, "learning_rate": 5.9530169983829974e-06, "loss": 17.0451, "step": 247530 }, { "epoch": 0.5000464614551727, "grad_norm": 276.2655334472656, "learning_rate": 5.952674329591083e-06, "loss": 13.5936, "step": 247540 }, { "epoch": 0.5000666620878566, "grad_norm": 341.00665283203125, "learning_rate": 5.952331656155951e-06, "loss": 18.6661, "step": 247550 }, { "epoch": 0.5000868627205404, "grad_norm": 1379.0341796875, "learning_rate": 5.951988978079268e-06, "loss": 19.0653, "step": 247560 }, { "epoch": 0.5001070633532242, "grad_norm": 607.9564208984375, "learning_rate": 5.951646295362706e-06, "loss": 22.1378, "step": 247570 }, { "epoch": 0.500127263985908, "grad_norm": 489.8138732910156, "learning_rate": 5.951303608007936e-06, "loss": 27.8729, "step": 247580 }, { "epoch": 0.5001474646185918, "grad_norm": 111.98165130615234, "learning_rate": 5.950960916016629e-06, "loss": 21.304, "step": 247590 }, { "epoch": 0.5001676652512757, "grad_norm": 435.2226257324219, "learning_rate": 5.950618219390451e-06, "loss": 19.9937, "step": 247600 }, { "epoch": 0.5001878658839595, "grad_norm": 249.712646484375, "learning_rate": 5.9502755181310774e-06, "loss": 12.5553, "step": 247610 }, { "epoch": 0.5002080665166433, "grad_norm": 714.4030151367188, "learning_rate": 5.949932812240176e-06, "loss": 14.9255, "step": 247620 }, { "epoch": 0.5002282671493271, "grad_norm": 16.345062255859375, "learning_rate": 5.949590101719416e-06, "loss": 18.0893, "step": 247630 }, { "epoch": 0.5002484677820109, "grad_norm": 10.792337417602539, "learning_rate": 5.949247386570471e-06, "loss": 27.6014, "step": 247640 }, { "epoch": 0.5002686684146948, "grad_norm": 188.4339599609375, "learning_rate": 5.948904666795007e-06, "loss": 22.4003, "step": 247650 }, { "epoch": 0.5002888690473786, "grad_norm": 235.25634765625, "learning_rate": 5.948561942394698e-06, "loss": 12.6585, "step": 247660 }, { "epoch": 0.5003090696800624, "grad_norm": 179.9253692626953, "learning_rate": 5.948219213371212e-06, "loss": 20.8502, "step": 247670 }, { "epoch": 0.5003292703127462, "grad_norm": 292.9441833496094, "learning_rate": 5.9478764797262225e-06, "loss": 14.198, "step": 247680 }, { "epoch": 0.50034947094543, "grad_norm": 399.9299621582031, "learning_rate": 5.947533741461398e-06, "loss": 24.5034, "step": 247690 }, { "epoch": 0.5003696715781139, "grad_norm": 0.9099647402763367, "learning_rate": 5.947190998578407e-06, "loss": 18.4859, "step": 247700 }, { "epoch": 0.5003898722107977, "grad_norm": 853.4749755859375, "learning_rate": 5.946848251078924e-06, "loss": 29.3719, "step": 247710 }, { "epoch": 0.5004100728434815, "grad_norm": 336.0867614746094, "learning_rate": 5.946505498964616e-06, "loss": 9.8737, "step": 247720 }, { "epoch": 0.5004302734761653, "grad_norm": 517.2442016601562, "learning_rate": 5.9461627422371545e-06, "loss": 20.1705, "step": 247730 }, { "epoch": 0.5004504741088491, "grad_norm": 265.9022216796875, "learning_rate": 5.945819980898212e-06, "loss": 26.8305, "step": 247740 }, { "epoch": 0.500470674741533, "grad_norm": 350.12762451171875, "learning_rate": 5.945477214949457e-06, "loss": 16.4993, "step": 247750 }, { "epoch": 0.5004908753742168, "grad_norm": 357.43536376953125, "learning_rate": 5.945134444392561e-06, "loss": 12.5029, "step": 247760 }, { "epoch": 0.5005110760069006, "grad_norm": 229.77108764648438, "learning_rate": 5.944791669229195e-06, "loss": 36.2875, "step": 247770 }, { "epoch": 0.5005312766395843, "grad_norm": 132.93592834472656, "learning_rate": 5.944448889461027e-06, "loss": 18.5758, "step": 247780 }, { "epoch": 0.5005514772722681, "grad_norm": 164.86135864257812, "learning_rate": 5.9441061050897304e-06, "loss": 19.3645, "step": 247790 }, { "epoch": 0.5005716779049519, "grad_norm": 75.09053802490234, "learning_rate": 5.943763316116977e-06, "loss": 13.1365, "step": 247800 }, { "epoch": 0.5005918785376358, "grad_norm": 34.7217903137207, "learning_rate": 5.943420522544433e-06, "loss": 12.3629, "step": 247810 }, { "epoch": 0.5006120791703196, "grad_norm": 379.7716064453125, "learning_rate": 5.9430777243737744e-06, "loss": 22.4949, "step": 247820 }, { "epoch": 0.5006322798030034, "grad_norm": 560.445068359375, "learning_rate": 5.942734921606667e-06, "loss": 32.3667, "step": 247830 }, { "epoch": 0.5006524804356872, "grad_norm": 584.4889526367188, "learning_rate": 5.942392114244786e-06, "loss": 16.6033, "step": 247840 }, { "epoch": 0.500672681068371, "grad_norm": 98.14608764648438, "learning_rate": 5.942049302289798e-06, "loss": 21.5487, "step": 247850 }, { "epoch": 0.5006928817010549, "grad_norm": 156.64923095703125, "learning_rate": 5.941706485743377e-06, "loss": 19.767, "step": 247860 }, { "epoch": 0.5007130823337387, "grad_norm": 192.50341796875, "learning_rate": 5.941363664607193e-06, "loss": 24.0534, "step": 247870 }, { "epoch": 0.5007332829664225, "grad_norm": 495.8795471191406, "learning_rate": 5.9410208388829174e-06, "loss": 18.2998, "step": 247880 }, { "epoch": 0.5007534835991063, "grad_norm": 505.1311950683594, "learning_rate": 5.9406780085722194e-06, "loss": 24.7423, "step": 247890 }, { "epoch": 0.5007736842317901, "grad_norm": 491.1490478515625, "learning_rate": 5.94033517367677e-06, "loss": 24.222, "step": 247900 }, { "epoch": 0.500793884864474, "grad_norm": 531.7823486328125, "learning_rate": 5.939992334198242e-06, "loss": 18.489, "step": 247910 }, { "epoch": 0.5008140854971578, "grad_norm": 11.863853454589844, "learning_rate": 5.939649490138305e-06, "loss": 18.112, "step": 247920 }, { "epoch": 0.5008342861298416, "grad_norm": 398.89837646484375, "learning_rate": 5.939306641498632e-06, "loss": 12.7243, "step": 247930 }, { "epoch": 0.5008544867625254, "grad_norm": 379.3194274902344, "learning_rate": 5.938963788280889e-06, "loss": 21.9992, "step": 247940 }, { "epoch": 0.5008746873952092, "grad_norm": 519.755859375, "learning_rate": 5.938620930486754e-06, "loss": 22.7895, "step": 247950 }, { "epoch": 0.5008948880278931, "grad_norm": 734.8353271484375, "learning_rate": 5.9382780681178935e-06, "loss": 20.345, "step": 247960 }, { "epoch": 0.5009150886605769, "grad_norm": 435.9640197753906, "learning_rate": 5.9379352011759775e-06, "loss": 18.0929, "step": 247970 }, { "epoch": 0.5009352892932607, "grad_norm": 293.85467529296875, "learning_rate": 5.9375923296626815e-06, "loss": 17.6831, "step": 247980 }, { "epoch": 0.5009554899259445, "grad_norm": 534.4434204101562, "learning_rate": 5.937249453579672e-06, "loss": 22.6555, "step": 247990 }, { "epoch": 0.5009756905586283, "grad_norm": 575.6947631835938, "learning_rate": 5.936906572928625e-06, "loss": 15.8428, "step": 248000 }, { "epoch": 0.5009958911913122, "grad_norm": 724.4385986328125, "learning_rate": 5.936563687711206e-06, "loss": 26.7899, "step": 248010 }, { "epoch": 0.501016091823996, "grad_norm": 549.7718505859375, "learning_rate": 5.936220797929091e-06, "loss": 17.8817, "step": 248020 }, { "epoch": 0.5010362924566797, "grad_norm": 364.80584716796875, "learning_rate": 5.935877903583949e-06, "loss": 16.6788, "step": 248030 }, { "epoch": 0.5010564930893635, "grad_norm": 204.8345489501953, "learning_rate": 5.9355350046774515e-06, "loss": 6.9864, "step": 248040 }, { "epoch": 0.5010766937220473, "grad_norm": 596.9667358398438, "learning_rate": 5.93519210121127e-06, "loss": 39.3945, "step": 248050 }, { "epoch": 0.5010968943547311, "grad_norm": 300.4778137207031, "learning_rate": 5.934849193187075e-06, "loss": 26.5876, "step": 248060 }, { "epoch": 0.501117094987415, "grad_norm": 248.50503540039062, "learning_rate": 5.93450628060654e-06, "loss": 13.7055, "step": 248070 }, { "epoch": 0.5011372956200988, "grad_norm": 480.83367919921875, "learning_rate": 5.934163363471333e-06, "loss": 16.9877, "step": 248080 }, { "epoch": 0.5011574962527826, "grad_norm": 191.77659606933594, "learning_rate": 5.933820441783129e-06, "loss": 16.6442, "step": 248090 }, { "epoch": 0.5011776968854664, "grad_norm": 324.451904296875, "learning_rate": 5.933477515543595e-06, "loss": 23.7166, "step": 248100 }, { "epoch": 0.5011978975181502, "grad_norm": 454.1504821777344, "learning_rate": 5.933134584754407e-06, "loss": 19.2951, "step": 248110 }, { "epoch": 0.5012180981508341, "grad_norm": 866.717529296875, "learning_rate": 5.932791649417233e-06, "loss": 31.053, "step": 248120 }, { "epoch": 0.5012382987835179, "grad_norm": 1051.307861328125, "learning_rate": 5.932448709533746e-06, "loss": 19.5458, "step": 248130 }, { "epoch": 0.5012584994162017, "grad_norm": 777.1536254882812, "learning_rate": 5.932105765105618e-06, "loss": 15.0236, "step": 248140 }, { "epoch": 0.5012787000488855, "grad_norm": 476.7993469238281, "learning_rate": 5.931762816134517e-06, "loss": 25.5232, "step": 248150 }, { "epoch": 0.5012989006815693, "grad_norm": 572.0438842773438, "learning_rate": 5.9314198626221185e-06, "loss": 16.7977, "step": 248160 }, { "epoch": 0.5013191013142532, "grad_norm": 373.8711242675781, "learning_rate": 5.931076904570094e-06, "loss": 16.8697, "step": 248170 }, { "epoch": 0.501339301946937, "grad_norm": 1195.9730224609375, "learning_rate": 5.930733941980111e-06, "loss": 26.0967, "step": 248180 }, { "epoch": 0.5013595025796208, "grad_norm": 405.83148193359375, "learning_rate": 5.9303909748538444e-06, "loss": 22.255, "step": 248190 }, { "epoch": 0.5013797032123046, "grad_norm": 368.86798095703125, "learning_rate": 5.930048003192965e-06, "loss": 24.7736, "step": 248200 }, { "epoch": 0.5013999038449884, "grad_norm": 348.43695068359375, "learning_rate": 5.929705026999145e-06, "loss": 22.9456, "step": 248210 }, { "epoch": 0.5014201044776723, "grad_norm": 545.2449340820312, "learning_rate": 5.929362046274057e-06, "loss": 17.2474, "step": 248220 }, { "epoch": 0.5014403051103561, "grad_norm": 556.1205444335938, "learning_rate": 5.929019061019369e-06, "loss": 18.0067, "step": 248230 }, { "epoch": 0.5014605057430399, "grad_norm": 574.1353149414062, "learning_rate": 5.928676071236756e-06, "loss": 24.04, "step": 248240 }, { "epoch": 0.5014807063757237, "grad_norm": 158.31907653808594, "learning_rate": 5.928333076927888e-06, "loss": 16.593, "step": 248250 }, { "epoch": 0.5015009070084075, "grad_norm": 493.0557556152344, "learning_rate": 5.927990078094435e-06, "loss": 20.1031, "step": 248260 }, { "epoch": 0.5015211076410914, "grad_norm": 412.5963134765625, "learning_rate": 5.927647074738074e-06, "loss": 15.0178, "step": 248270 }, { "epoch": 0.5015413082737752, "grad_norm": 653.412841796875, "learning_rate": 5.927304066860471e-06, "loss": 20.8307, "step": 248280 }, { "epoch": 0.5015615089064589, "grad_norm": 467.2701416015625, "learning_rate": 5.926961054463303e-06, "loss": 21.3891, "step": 248290 }, { "epoch": 0.5015817095391427, "grad_norm": 370.8492431640625, "learning_rate": 5.926618037548237e-06, "loss": 13.9218, "step": 248300 }, { "epoch": 0.5016019101718265, "grad_norm": 258.85211181640625, "learning_rate": 5.926275016116949e-06, "loss": 18.0736, "step": 248310 }, { "epoch": 0.5016221108045104, "grad_norm": 275.01434326171875, "learning_rate": 5.925931990171109e-06, "loss": 17.8099, "step": 248320 }, { "epoch": 0.5016423114371942, "grad_norm": 391.4570007324219, "learning_rate": 5.925588959712387e-06, "loss": 13.1118, "step": 248330 }, { "epoch": 0.501662512069878, "grad_norm": 655.0743408203125, "learning_rate": 5.925245924742458e-06, "loss": 30.9306, "step": 248340 }, { "epoch": 0.5016827127025618, "grad_norm": 332.4405822753906, "learning_rate": 5.924902885262992e-06, "loss": 17.6808, "step": 248350 }, { "epoch": 0.5017029133352456, "grad_norm": 107.97994995117188, "learning_rate": 5.924559841275661e-06, "loss": 41.9068, "step": 248360 }, { "epoch": 0.5017231139679295, "grad_norm": 515.9050903320312, "learning_rate": 5.924216792782138e-06, "loss": 29.0406, "step": 248370 }, { "epoch": 0.5017433146006133, "grad_norm": 110.2987289428711, "learning_rate": 5.9238737397840966e-06, "loss": 11.4013, "step": 248380 }, { "epoch": 0.5017635152332971, "grad_norm": 131.7434844970703, "learning_rate": 5.923530682283204e-06, "loss": 21.3107, "step": 248390 }, { "epoch": 0.5017837158659809, "grad_norm": 645.4819946289062, "learning_rate": 5.923187620281135e-06, "loss": 35.8132, "step": 248400 }, { "epoch": 0.5018039164986647, "grad_norm": 295.7440185546875, "learning_rate": 5.922844553779563e-06, "loss": 21.2253, "step": 248410 }, { "epoch": 0.5018241171313486, "grad_norm": 754.93212890625, "learning_rate": 5.922501482780156e-06, "loss": 18.5812, "step": 248420 }, { "epoch": 0.5018443177640324, "grad_norm": 88.51136779785156, "learning_rate": 5.9221584072845914e-06, "loss": 17.2722, "step": 248430 }, { "epoch": 0.5018645183967162, "grad_norm": 553.53466796875, "learning_rate": 5.921815327294537e-06, "loss": 15.6545, "step": 248440 }, { "epoch": 0.5018847190294, "grad_norm": 233.0700225830078, "learning_rate": 5.9214722428116675e-06, "loss": 22.5623, "step": 248450 }, { "epoch": 0.5019049196620838, "grad_norm": 180.30812072753906, "learning_rate": 5.921129153837654e-06, "loss": 17.7093, "step": 248460 }, { "epoch": 0.5019251202947677, "grad_norm": 178.22335815429688, "learning_rate": 5.9207860603741674e-06, "loss": 13.4829, "step": 248470 }, { "epoch": 0.5019453209274515, "grad_norm": 268.0635070800781, "learning_rate": 5.920442962422883e-06, "loss": 20.989, "step": 248480 }, { "epoch": 0.5019655215601353, "grad_norm": 407.7901306152344, "learning_rate": 5.920099859985469e-06, "loss": 12.4519, "step": 248490 }, { "epoch": 0.5019857221928191, "grad_norm": 552.140380859375, "learning_rate": 5.919756753063601e-06, "loss": 18.7923, "step": 248500 }, { "epoch": 0.5020059228255029, "grad_norm": 363.47967529296875, "learning_rate": 5.919413641658951e-06, "loss": 15.1383, "step": 248510 }, { "epoch": 0.5020261234581868, "grad_norm": 680.4986572265625, "learning_rate": 5.91907052577319e-06, "loss": 32.3416, "step": 248520 }, { "epoch": 0.5020463240908706, "grad_norm": 870.9769897460938, "learning_rate": 5.9187274054079895e-06, "loss": 29.1395, "step": 248530 }, { "epoch": 0.5020665247235544, "grad_norm": 207.03797912597656, "learning_rate": 5.918384280565025e-06, "loss": 11.6947, "step": 248540 }, { "epoch": 0.5020867253562381, "grad_norm": 300.1256103515625, "learning_rate": 5.9180411512459655e-06, "loss": 22.3148, "step": 248550 }, { "epoch": 0.5021069259889219, "grad_norm": 122.85758209228516, "learning_rate": 5.917698017452484e-06, "loss": 18.0818, "step": 248560 }, { "epoch": 0.5021271266216057, "grad_norm": 18.23651695251465, "learning_rate": 5.9173548791862556e-06, "loss": 19.688, "step": 248570 }, { "epoch": 0.5021473272542896, "grad_norm": 423.9542541503906, "learning_rate": 5.91701173644895e-06, "loss": 18.1402, "step": 248580 }, { "epoch": 0.5021675278869734, "grad_norm": 1059.9700927734375, "learning_rate": 5.916668589242241e-06, "loss": 29.2666, "step": 248590 }, { "epoch": 0.5021877285196572, "grad_norm": 482.63092041015625, "learning_rate": 5.9163254375677995e-06, "loss": 22.8889, "step": 248600 }, { "epoch": 0.502207929152341, "grad_norm": 797.1237182617188, "learning_rate": 5.9159822814272995e-06, "loss": 21.7161, "step": 248610 }, { "epoch": 0.5022281297850248, "grad_norm": 147.08187866210938, "learning_rate": 5.915639120822413e-06, "loss": 29.4135, "step": 248620 }, { "epoch": 0.5022483304177087, "grad_norm": 324.49053955078125, "learning_rate": 5.915295955754812e-06, "loss": 8.4809, "step": 248630 }, { "epoch": 0.5022685310503925, "grad_norm": 2.9765026569366455, "learning_rate": 5.914952786226169e-06, "loss": 29.6125, "step": 248640 }, { "epoch": 0.5022887316830763, "grad_norm": 431.4524841308594, "learning_rate": 5.914609612238159e-06, "loss": 13.3484, "step": 248650 }, { "epoch": 0.5023089323157601, "grad_norm": 364.72161865234375, "learning_rate": 5.914266433792452e-06, "loss": 31.8002, "step": 248660 }, { "epoch": 0.502329132948444, "grad_norm": 282.8038330078125, "learning_rate": 5.913923250890721e-06, "loss": 15.4929, "step": 248670 }, { "epoch": 0.5023493335811278, "grad_norm": 420.2165832519531, "learning_rate": 5.9135800635346385e-06, "loss": 17.169, "step": 248680 }, { "epoch": 0.5023695342138116, "grad_norm": 516.7526245117188, "learning_rate": 5.913236871725877e-06, "loss": 26.3079, "step": 248690 }, { "epoch": 0.5023897348464954, "grad_norm": 637.6509399414062, "learning_rate": 5.912893675466112e-06, "loss": 23.7227, "step": 248700 }, { "epoch": 0.5024099354791792, "grad_norm": 174.11920166015625, "learning_rate": 5.912550474757011e-06, "loss": 23.6315, "step": 248710 }, { "epoch": 0.502430136111863, "grad_norm": 190.7968292236328, "learning_rate": 5.912207269600252e-06, "loss": 37.7386, "step": 248720 }, { "epoch": 0.5024503367445469, "grad_norm": 326.266845703125, "learning_rate": 5.911864059997504e-06, "loss": 13.011, "step": 248730 }, { "epoch": 0.5024705373772307, "grad_norm": 2355.5771484375, "learning_rate": 5.911520845950442e-06, "loss": 22.8896, "step": 248740 }, { "epoch": 0.5024907380099145, "grad_norm": 413.2026062011719, "learning_rate": 5.911177627460739e-06, "loss": 19.1259, "step": 248750 }, { "epoch": 0.5025109386425983, "grad_norm": 515.444091796875, "learning_rate": 5.910834404530064e-06, "loss": 22.6092, "step": 248760 }, { "epoch": 0.5025311392752821, "grad_norm": 353.85015869140625, "learning_rate": 5.910491177160094e-06, "loss": 17.7772, "step": 248770 }, { "epoch": 0.502551339907966, "grad_norm": 1164.6044921875, "learning_rate": 5.910147945352501e-06, "loss": 28.7933, "step": 248780 }, { "epoch": 0.5025715405406498, "grad_norm": 325.5775451660156, "learning_rate": 5.909804709108957e-06, "loss": 41.7351, "step": 248790 }, { "epoch": 0.5025917411733335, "grad_norm": 345.25152587890625, "learning_rate": 5.909461468431135e-06, "loss": 25.2505, "step": 248800 }, { "epoch": 0.5026119418060173, "grad_norm": 671.2576904296875, "learning_rate": 5.9091182233207075e-06, "loss": 23.5605, "step": 248810 }, { "epoch": 0.5026321424387011, "grad_norm": 381.11334228515625, "learning_rate": 5.90877497377935e-06, "loss": 29.831, "step": 248820 }, { "epoch": 0.502652343071385, "grad_norm": 727.1109619140625, "learning_rate": 5.908431719808731e-06, "loss": 20.2548, "step": 248830 }, { "epoch": 0.5026725437040688, "grad_norm": 5.139888286590576, "learning_rate": 5.908088461410529e-06, "loss": 45.1709, "step": 248840 }, { "epoch": 0.5026927443367526, "grad_norm": 257.7228088378906, "learning_rate": 5.907745198586411e-06, "loss": 24.0451, "step": 248850 }, { "epoch": 0.5027129449694364, "grad_norm": 268.2099609375, "learning_rate": 5.907401931338055e-06, "loss": 11.4164, "step": 248860 }, { "epoch": 0.5027331456021202, "grad_norm": 350.30584716796875, "learning_rate": 5.90705865966713e-06, "loss": 28.8541, "step": 248870 }, { "epoch": 0.502753346234804, "grad_norm": 721.6278686523438, "learning_rate": 5.9067153835753125e-06, "loss": 9.0915, "step": 248880 }, { "epoch": 0.5027735468674879, "grad_norm": 492.8309020996094, "learning_rate": 5.906372103064274e-06, "loss": 11.1449, "step": 248890 }, { "epoch": 0.5027937475001717, "grad_norm": 524.3689575195312, "learning_rate": 5.906028818135687e-06, "loss": 22.7391, "step": 248900 }, { "epoch": 0.5028139481328555, "grad_norm": 220.1304931640625, "learning_rate": 5.905685528791226e-06, "loss": 7.5041, "step": 248910 }, { "epoch": 0.5028341487655393, "grad_norm": 437.30859375, "learning_rate": 5.905342235032564e-06, "loss": 38.2453, "step": 248920 }, { "epoch": 0.5028543493982232, "grad_norm": 430.1686096191406, "learning_rate": 5.904998936861375e-06, "loss": 22.9035, "step": 248930 }, { "epoch": 0.502874550030907, "grad_norm": 306.7922058105469, "learning_rate": 5.904655634279328e-06, "loss": 25.6524, "step": 248940 }, { "epoch": 0.5028947506635908, "grad_norm": 0.8928514719009399, "learning_rate": 5.904312327288101e-06, "loss": 22.5635, "step": 248950 }, { "epoch": 0.5029149512962746, "grad_norm": 590.5730590820312, "learning_rate": 5.903969015889365e-06, "loss": 13.5564, "step": 248960 }, { "epoch": 0.5029351519289584, "grad_norm": 627.0333862304688, "learning_rate": 5.903625700084794e-06, "loss": 15.467, "step": 248970 }, { "epoch": 0.5029553525616423, "grad_norm": 83.18756866455078, "learning_rate": 5.9032823798760595e-06, "loss": 21.6662, "step": 248980 }, { "epoch": 0.5029755531943261, "grad_norm": 730.4940795898438, "learning_rate": 5.902939055264838e-06, "loss": 25.6206, "step": 248990 }, { "epoch": 0.5029957538270099, "grad_norm": 174.6398468017578, "learning_rate": 5.902595726252801e-06, "loss": 12.4195, "step": 249000 }, { "epoch": 0.5030159544596937, "grad_norm": 294.2000427246094, "learning_rate": 5.902252392841621e-06, "loss": 21.5018, "step": 249010 }, { "epoch": 0.5030361550923775, "grad_norm": 544.2066650390625, "learning_rate": 5.901909055032974e-06, "loss": 30.3573, "step": 249020 }, { "epoch": 0.5030563557250614, "grad_norm": 297.4364013671875, "learning_rate": 5.901565712828528e-06, "loss": 16.5915, "step": 249030 }, { "epoch": 0.5030765563577452, "grad_norm": 126.29948425292969, "learning_rate": 5.901222366229964e-06, "loss": 19.41, "step": 249040 }, { "epoch": 0.503096756990429, "grad_norm": 149.2904510498047, "learning_rate": 5.900879015238948e-06, "loss": 19.3939, "step": 249050 }, { "epoch": 0.5031169576231127, "grad_norm": 426.88946533203125, "learning_rate": 5.90053565985716e-06, "loss": 16.0953, "step": 249060 }, { "epoch": 0.5031371582557965, "grad_norm": 497.24383544921875, "learning_rate": 5.900192300086268e-06, "loss": 19.2877, "step": 249070 }, { "epoch": 0.5031573588884803, "grad_norm": 187.9085693359375, "learning_rate": 5.89984893592795e-06, "loss": 18.1587, "step": 249080 }, { "epoch": 0.5031775595211642, "grad_norm": 241.67221069335938, "learning_rate": 5.899505567383877e-06, "loss": 33.4961, "step": 249090 }, { "epoch": 0.503197760153848, "grad_norm": 216.5790557861328, "learning_rate": 5.8991621944557224e-06, "loss": 26.2352, "step": 249100 }, { "epoch": 0.5032179607865318, "grad_norm": 249.6385498046875, "learning_rate": 5.8988188171451596e-06, "loss": 14.572, "step": 249110 }, { "epoch": 0.5032381614192156, "grad_norm": 397.8631286621094, "learning_rate": 5.898475435453863e-06, "loss": 10.3243, "step": 249120 }, { "epoch": 0.5032583620518994, "grad_norm": 245.76161193847656, "learning_rate": 5.898132049383507e-06, "loss": 11.1525, "step": 249130 }, { "epoch": 0.5032785626845833, "grad_norm": 15.009568214416504, "learning_rate": 5.897788658935764e-06, "loss": 30.3441, "step": 249140 }, { "epoch": 0.5032987633172671, "grad_norm": 262.6147766113281, "learning_rate": 5.897445264112309e-06, "loss": 11.2807, "step": 249150 }, { "epoch": 0.5033189639499509, "grad_norm": 1088.070068359375, "learning_rate": 5.897101864914814e-06, "loss": 30.4854, "step": 249160 }, { "epoch": 0.5033391645826347, "grad_norm": 177.8507080078125, "learning_rate": 5.8967584613449525e-06, "loss": 15.7606, "step": 249170 }, { "epoch": 0.5033593652153185, "grad_norm": 207.3918914794922, "learning_rate": 5.896415053404399e-06, "loss": 15.7608, "step": 249180 }, { "epoch": 0.5033795658480024, "grad_norm": 315.90283203125, "learning_rate": 5.896071641094827e-06, "loss": 16.8296, "step": 249190 }, { "epoch": 0.5033997664806862, "grad_norm": 424.2550354003906, "learning_rate": 5.8957282244179125e-06, "loss": 15.4169, "step": 249200 }, { "epoch": 0.50341996711337, "grad_norm": 205.88717651367188, "learning_rate": 5.895384803375325e-06, "loss": 26.7485, "step": 249210 }, { "epoch": 0.5034401677460538, "grad_norm": 21.79161834716797, "learning_rate": 5.895041377968742e-06, "loss": 21.0781, "step": 249220 }, { "epoch": 0.5034603683787376, "grad_norm": 265.3016357421875, "learning_rate": 5.894697948199836e-06, "loss": 8.8661, "step": 249230 }, { "epoch": 0.5034805690114215, "grad_norm": 210.5745849609375, "learning_rate": 5.89435451407028e-06, "loss": 11.2092, "step": 249240 }, { "epoch": 0.5035007696441053, "grad_norm": 152.37144470214844, "learning_rate": 5.8940110755817484e-06, "loss": 32.8861, "step": 249250 }, { "epoch": 0.5035209702767891, "grad_norm": 545.8831787109375, "learning_rate": 5.893667632735915e-06, "loss": 12.1267, "step": 249260 }, { "epoch": 0.5035411709094729, "grad_norm": 262.19146728515625, "learning_rate": 5.893324185534456e-06, "loss": 7.8067, "step": 249270 }, { "epoch": 0.5035613715421567, "grad_norm": 417.0307312011719, "learning_rate": 5.892980733979041e-06, "loss": 17.7089, "step": 249280 }, { "epoch": 0.5035815721748406, "grad_norm": 840.2435302734375, "learning_rate": 5.892637278071347e-06, "loss": 13.7266, "step": 249290 }, { "epoch": 0.5036017728075244, "grad_norm": 406.7433166503906, "learning_rate": 5.892293817813048e-06, "loss": 18.5383, "step": 249300 }, { "epoch": 0.5036219734402081, "grad_norm": 691.2929077148438, "learning_rate": 5.891950353205817e-06, "loss": 25.414, "step": 249310 }, { "epoch": 0.5036421740728919, "grad_norm": 112.32616424560547, "learning_rate": 5.891606884251326e-06, "loss": 9.8455, "step": 249320 }, { "epoch": 0.5036623747055757, "grad_norm": 497.7851257324219, "learning_rate": 5.8912634109512534e-06, "loss": 19.669, "step": 249330 }, { "epoch": 0.5036825753382596, "grad_norm": 263.6553955078125, "learning_rate": 5.89091993330727e-06, "loss": 20.7168, "step": 249340 }, { "epoch": 0.5037027759709434, "grad_norm": 568.7107543945312, "learning_rate": 5.89057645132105e-06, "loss": 14.7107, "step": 249350 }, { "epoch": 0.5037229766036272, "grad_norm": 129.31324768066406, "learning_rate": 5.8902329649942715e-06, "loss": 45.3528, "step": 249360 }, { "epoch": 0.503743177236311, "grad_norm": 178.8046417236328, "learning_rate": 5.889889474328603e-06, "loss": 16.6972, "step": 249370 }, { "epoch": 0.5037633778689948, "grad_norm": 346.4656066894531, "learning_rate": 5.889545979325722e-06, "loss": 20.1498, "step": 249380 }, { "epoch": 0.5037835785016787, "grad_norm": 207.06436157226562, "learning_rate": 5.889202479987301e-06, "loss": 32.255, "step": 249390 }, { "epoch": 0.5038037791343625, "grad_norm": 495.1197509765625, "learning_rate": 5.8888589763150165e-06, "loss": 20.4992, "step": 249400 }, { "epoch": 0.5038239797670463, "grad_norm": 109.89571380615234, "learning_rate": 5.8885154683105395e-06, "loss": 19.9023, "step": 249410 }, { "epoch": 0.5038441803997301, "grad_norm": 454.82562255859375, "learning_rate": 5.8881719559755454e-06, "loss": 16.5533, "step": 249420 }, { "epoch": 0.5038643810324139, "grad_norm": 327.36627197265625, "learning_rate": 5.887828439311712e-06, "loss": 19.4268, "step": 249430 }, { "epoch": 0.5038845816650978, "grad_norm": 348.3185729980469, "learning_rate": 5.887484918320708e-06, "loss": 11.3746, "step": 249440 }, { "epoch": 0.5039047822977816, "grad_norm": 779.6866455078125, "learning_rate": 5.887141393004211e-06, "loss": 37.914, "step": 249450 }, { "epoch": 0.5039249829304654, "grad_norm": 275.3816833496094, "learning_rate": 5.8867978633638935e-06, "loss": 10.3895, "step": 249460 }, { "epoch": 0.5039451835631492, "grad_norm": 832.8724975585938, "learning_rate": 5.886454329401432e-06, "loss": 36.9358, "step": 249470 }, { "epoch": 0.503965384195833, "grad_norm": 519.391357421875, "learning_rate": 5.8861107911184975e-06, "loss": 12.3229, "step": 249480 }, { "epoch": 0.5039855848285169, "grad_norm": 265.33380126953125, "learning_rate": 5.885767248516769e-06, "loss": 35.1115, "step": 249490 }, { "epoch": 0.5040057854612007, "grad_norm": 675.9965209960938, "learning_rate": 5.885423701597918e-06, "loss": 30.9103, "step": 249500 }, { "epoch": 0.5040259860938845, "grad_norm": 1510.18603515625, "learning_rate": 5.885080150363618e-06, "loss": 27.4959, "step": 249510 }, { "epoch": 0.5040461867265683, "grad_norm": 1.5162192583084106, "learning_rate": 5.884736594815545e-06, "loss": 25.0254, "step": 249520 }, { "epoch": 0.5040663873592521, "grad_norm": 606.4171142578125, "learning_rate": 5.884393034955373e-06, "loss": 14.7794, "step": 249530 }, { "epoch": 0.504086587991936, "grad_norm": 14.640703201293945, "learning_rate": 5.8840494707847786e-06, "loss": 12.6323, "step": 249540 }, { "epoch": 0.5041067886246198, "grad_norm": 12.581164360046387, "learning_rate": 5.883705902305432e-06, "loss": 15.9409, "step": 249550 }, { "epoch": 0.5041269892573036, "grad_norm": 697.4895629882812, "learning_rate": 5.8833623295190104e-06, "loss": 21.9033, "step": 249560 }, { "epoch": 0.5041471898899873, "grad_norm": 276.90704345703125, "learning_rate": 5.883018752427189e-06, "loss": 15.0847, "step": 249570 }, { "epoch": 0.5041673905226711, "grad_norm": 244.14535522460938, "learning_rate": 5.8826751710316395e-06, "loss": 13.4849, "step": 249580 }, { "epoch": 0.5041875911553549, "grad_norm": 1217.48193359375, "learning_rate": 5.882331585334039e-06, "loss": 38.1977, "step": 249590 }, { "epoch": 0.5042077917880388, "grad_norm": 53.94611358642578, "learning_rate": 5.881987995336062e-06, "loss": 14.0265, "step": 249600 }, { "epoch": 0.5042279924207226, "grad_norm": 332.9171447753906, "learning_rate": 5.881644401039382e-06, "loss": 11.8391, "step": 249610 }, { "epoch": 0.5042481930534064, "grad_norm": 562.229736328125, "learning_rate": 5.881300802445675e-06, "loss": 14.8923, "step": 249620 }, { "epoch": 0.5042683936860902, "grad_norm": 260.7309875488281, "learning_rate": 5.880957199556615e-06, "loss": 17.2081, "step": 249630 }, { "epoch": 0.504288594318774, "grad_norm": 221.5662078857422, "learning_rate": 5.880613592373874e-06, "loss": 9.9935, "step": 249640 }, { "epoch": 0.5043087949514579, "grad_norm": 240.99302673339844, "learning_rate": 5.880269980899132e-06, "loss": 14.6933, "step": 249650 }, { "epoch": 0.5043289955841417, "grad_norm": 270.6983947753906, "learning_rate": 5.879926365134059e-06, "loss": 14.4931, "step": 249660 }, { "epoch": 0.5043491962168255, "grad_norm": 0.22573330998420715, "learning_rate": 5.879582745080333e-06, "loss": 28.6685, "step": 249670 }, { "epoch": 0.5043693968495093, "grad_norm": 395.80535888671875, "learning_rate": 5.879239120739626e-06, "loss": 17.6597, "step": 249680 }, { "epoch": 0.5043895974821931, "grad_norm": 159.0031280517578, "learning_rate": 5.878895492113614e-06, "loss": 16.4434, "step": 249690 }, { "epoch": 0.504409798114877, "grad_norm": 802.7413940429688, "learning_rate": 5.878551859203974e-06, "loss": 21.5858, "step": 249700 }, { "epoch": 0.5044299987475608, "grad_norm": 29.40521812438965, "learning_rate": 5.878208222012377e-06, "loss": 15.8672, "step": 249710 }, { "epoch": 0.5044501993802446, "grad_norm": 1343.5350341796875, "learning_rate": 5.8778645805405e-06, "loss": 28.0506, "step": 249720 }, { "epoch": 0.5044704000129284, "grad_norm": 246.5369873046875, "learning_rate": 5.8775209347900174e-06, "loss": 16.6771, "step": 249730 }, { "epoch": 0.5044906006456122, "grad_norm": 355.10162353515625, "learning_rate": 5.877177284762605e-06, "loss": 24.7707, "step": 249740 }, { "epoch": 0.5045108012782961, "grad_norm": 145.2834930419922, "learning_rate": 5.876833630459936e-06, "loss": 11.5092, "step": 249750 }, { "epoch": 0.5045310019109799, "grad_norm": 130.55377197265625, "learning_rate": 5.876489971883688e-06, "loss": 18.376, "step": 249760 }, { "epoch": 0.5045512025436637, "grad_norm": 125.49608612060547, "learning_rate": 5.876146309035532e-06, "loss": 12.7758, "step": 249770 }, { "epoch": 0.5045714031763475, "grad_norm": 174.88815307617188, "learning_rate": 5.8758026419171455e-06, "loss": 27.505, "step": 249780 }, { "epoch": 0.5045916038090313, "grad_norm": 312.1806335449219, "learning_rate": 5.875458970530204e-06, "loss": 22.9433, "step": 249790 }, { "epoch": 0.5046118044417152, "grad_norm": 50.86980438232422, "learning_rate": 5.8751152948763815e-06, "loss": 12.9315, "step": 249800 }, { "epoch": 0.504632005074399, "grad_norm": 197.81698608398438, "learning_rate": 5.874771614957353e-06, "loss": 26.7791, "step": 249810 }, { "epoch": 0.5046522057070827, "grad_norm": 535.0093383789062, "learning_rate": 5.874427930774792e-06, "loss": 23.5996, "step": 249820 }, { "epoch": 0.5046724063397665, "grad_norm": 416.4787292480469, "learning_rate": 5.874084242330378e-06, "loss": 23.4198, "step": 249830 }, { "epoch": 0.5046926069724503, "grad_norm": 259.2166442871094, "learning_rate": 5.873740549625783e-06, "loss": 15.2562, "step": 249840 }, { "epoch": 0.5047128076051342, "grad_norm": 533.5535888671875, "learning_rate": 5.873396852662682e-06, "loss": 21.7211, "step": 249850 }, { "epoch": 0.504733008237818, "grad_norm": 166.01058959960938, "learning_rate": 5.873053151442749e-06, "loss": 26.0265, "step": 249860 }, { "epoch": 0.5047532088705018, "grad_norm": 476.9961242675781, "learning_rate": 5.872709445967662e-06, "loss": 23.062, "step": 249870 }, { "epoch": 0.5047734095031856, "grad_norm": 233.0247802734375, "learning_rate": 5.872365736239097e-06, "loss": 7.2233, "step": 249880 }, { "epoch": 0.5047936101358694, "grad_norm": 439.13409423828125, "learning_rate": 5.872022022258726e-06, "loss": 25.7809, "step": 249890 }, { "epoch": 0.5048138107685533, "grad_norm": 262.7398681640625, "learning_rate": 5.871678304028224e-06, "loss": 14.5744, "step": 249900 }, { "epoch": 0.5048340114012371, "grad_norm": 228.85159301757812, "learning_rate": 5.8713345815492695e-06, "loss": 10.7308, "step": 249910 }, { "epoch": 0.5048542120339209, "grad_norm": 158.81594848632812, "learning_rate": 5.8709908548235365e-06, "loss": 12.7449, "step": 249920 }, { "epoch": 0.5048744126666047, "grad_norm": 414.7752685546875, "learning_rate": 5.870647123852696e-06, "loss": 16.0366, "step": 249930 }, { "epoch": 0.5048946132992885, "grad_norm": 259.3028869628906, "learning_rate": 5.870303388638431e-06, "loss": 14.5456, "step": 249940 }, { "epoch": 0.5049148139319724, "grad_norm": 281.9948425292969, "learning_rate": 5.86995964918241e-06, "loss": 17.1693, "step": 249950 }, { "epoch": 0.5049350145646562, "grad_norm": 413.28155517578125, "learning_rate": 5.869615905486313e-06, "loss": 16.3743, "step": 249960 }, { "epoch": 0.50495521519734, "grad_norm": 92.13639831542969, "learning_rate": 5.869272157551814e-06, "loss": 18.2124, "step": 249970 }, { "epoch": 0.5049754158300238, "grad_norm": 530.0686645507812, "learning_rate": 5.868928405380585e-06, "loss": 19.9189, "step": 249980 }, { "epoch": 0.5049956164627076, "grad_norm": 446.3638916015625, "learning_rate": 5.868584648974308e-06, "loss": 14.097, "step": 249990 }, { "epoch": 0.5050158170953915, "grad_norm": 23.526782989501953, "learning_rate": 5.8682408883346535e-06, "loss": 20.2342, "step": 250000 }, { "epoch": 0.5050360177280753, "grad_norm": 3428.04833984375, "learning_rate": 5.8678971234632965e-06, "loss": 36.5053, "step": 250010 }, { "epoch": 0.5050562183607591, "grad_norm": 387.45599365234375, "learning_rate": 5.8675533543619155e-06, "loss": 9.7146, "step": 250020 }, { "epoch": 0.5050764189934429, "grad_norm": 1221.8453369140625, "learning_rate": 5.867209581032184e-06, "loss": 35.8046, "step": 250030 }, { "epoch": 0.5050966196261267, "grad_norm": 359.765625, "learning_rate": 5.8668658034757795e-06, "loss": 16.4776, "step": 250040 }, { "epoch": 0.5051168202588106, "grad_norm": 617.654052734375, "learning_rate": 5.866522021694376e-06, "loss": 22.2486, "step": 250050 }, { "epoch": 0.5051370208914944, "grad_norm": 417.5396728515625, "learning_rate": 5.866178235689648e-06, "loss": 17.4365, "step": 250060 }, { "epoch": 0.5051572215241782, "grad_norm": 445.7264404296875, "learning_rate": 5.865834445463273e-06, "loss": 17.3608, "step": 250070 }, { "epoch": 0.5051774221568619, "grad_norm": 95.29048919677734, "learning_rate": 5.865490651016927e-06, "loss": 19.9819, "step": 250080 }, { "epoch": 0.5051976227895457, "grad_norm": 512.3966674804688, "learning_rate": 5.865146852352283e-06, "loss": 18.7347, "step": 250090 }, { "epoch": 0.5052178234222295, "grad_norm": 603.2786865234375, "learning_rate": 5.8648030494710195e-06, "loss": 17.141, "step": 250100 }, { "epoch": 0.5052380240549134, "grad_norm": 1903.7728271484375, "learning_rate": 5.864459242374809e-06, "loss": 28.3695, "step": 250110 }, { "epoch": 0.5052582246875972, "grad_norm": 370.3611755371094, "learning_rate": 5.86411543106533e-06, "loss": 18.9759, "step": 250120 }, { "epoch": 0.505278425320281, "grad_norm": 186.92010498046875, "learning_rate": 5.863771615544258e-06, "loss": 11.4139, "step": 250130 }, { "epoch": 0.5052986259529648, "grad_norm": 348.0819396972656, "learning_rate": 5.863427795813266e-06, "loss": 24.1246, "step": 250140 }, { "epoch": 0.5053188265856486, "grad_norm": 291.275390625, "learning_rate": 5.863083971874034e-06, "loss": 19.7654, "step": 250150 }, { "epoch": 0.5053390272183325, "grad_norm": 517.228271484375, "learning_rate": 5.8627401437282334e-06, "loss": 26.8513, "step": 250160 }, { "epoch": 0.5053592278510163, "grad_norm": 224.11453247070312, "learning_rate": 5.862396311377543e-06, "loss": 13.223, "step": 250170 }, { "epoch": 0.5053794284837001, "grad_norm": 0.0, "learning_rate": 5.862052474823637e-06, "loss": 20.4579, "step": 250180 }, { "epoch": 0.5053996291163839, "grad_norm": 254.05532836914062, "learning_rate": 5.861708634068193e-06, "loss": 15.5208, "step": 250190 }, { "epoch": 0.5054198297490677, "grad_norm": 451.5887145996094, "learning_rate": 5.8613647891128845e-06, "loss": 17.1956, "step": 250200 }, { "epoch": 0.5054400303817516, "grad_norm": 34.32612609863281, "learning_rate": 5.861020939959389e-06, "loss": 13.5152, "step": 250210 }, { "epoch": 0.5054602310144354, "grad_norm": 694.2782592773438, "learning_rate": 5.860677086609381e-06, "loss": 18.3581, "step": 250220 }, { "epoch": 0.5054804316471192, "grad_norm": 176.43052673339844, "learning_rate": 5.860333229064539e-06, "loss": 13.6246, "step": 250230 }, { "epoch": 0.505500632279803, "grad_norm": 393.8753662109375, "learning_rate": 5.859989367326535e-06, "loss": 16.0354, "step": 250240 }, { "epoch": 0.5055208329124868, "grad_norm": 754.1749877929688, "learning_rate": 5.859645501397048e-06, "loss": 14.098, "step": 250250 }, { "epoch": 0.5055410335451707, "grad_norm": 317.06646728515625, "learning_rate": 5.859301631277754e-06, "loss": 23.7296, "step": 250260 }, { "epoch": 0.5055612341778545, "grad_norm": 153.16233825683594, "learning_rate": 5.858957756970326e-06, "loss": 24.6719, "step": 250270 }, { "epoch": 0.5055814348105383, "grad_norm": 282.1354675292969, "learning_rate": 5.858613878476445e-06, "loss": 16.8805, "step": 250280 }, { "epoch": 0.5056016354432221, "grad_norm": 244.73968505859375, "learning_rate": 5.858269995797781e-06, "loss": 11.7205, "step": 250290 }, { "epoch": 0.5056218360759059, "grad_norm": 478.359130859375, "learning_rate": 5.857926108936015e-06, "loss": 34.8297, "step": 250300 }, { "epoch": 0.5056420367085898, "grad_norm": 168.78770446777344, "learning_rate": 5.8575822178928225e-06, "loss": 21.1783, "step": 250310 }, { "epoch": 0.5056622373412736, "grad_norm": 524.9314575195312, "learning_rate": 5.857238322669875e-06, "loss": 38.7921, "step": 250320 }, { "epoch": 0.5056824379739574, "grad_norm": 714.8871459960938, "learning_rate": 5.8568944232688554e-06, "loss": 18.5099, "step": 250330 }, { "epoch": 0.5057026386066411, "grad_norm": 373.43292236328125, "learning_rate": 5.856550519691433e-06, "loss": 22.0461, "step": 250340 }, { "epoch": 0.5057228392393249, "grad_norm": 1048.020751953125, "learning_rate": 5.856206611939289e-06, "loss": 34.6704, "step": 250350 }, { "epoch": 0.5057430398720087, "grad_norm": 454.6194152832031, "learning_rate": 5.855862700014096e-06, "loss": 13.2749, "step": 250360 }, { "epoch": 0.5057632405046926, "grad_norm": 81.307373046875, "learning_rate": 5.855518783917535e-06, "loss": 10.3817, "step": 250370 }, { "epoch": 0.5057834411373764, "grad_norm": 193.7640380859375, "learning_rate": 5.855174863651279e-06, "loss": 21.205, "step": 250380 }, { "epoch": 0.5058036417700602, "grad_norm": 0.0, "learning_rate": 5.854830939217002e-06, "loss": 15.3016, "step": 250390 }, { "epoch": 0.505823842402744, "grad_norm": 604.971923828125, "learning_rate": 5.854487010616384e-06, "loss": 25.737, "step": 250400 }, { "epoch": 0.5058440430354278, "grad_norm": 413.559326171875, "learning_rate": 5.8541430778511e-06, "loss": 21.6659, "step": 250410 }, { "epoch": 0.5058642436681117, "grad_norm": 13.928187370300293, "learning_rate": 5.853799140922827e-06, "loss": 11.1542, "step": 250420 }, { "epoch": 0.5058844443007955, "grad_norm": 465.09954833984375, "learning_rate": 5.853455199833238e-06, "loss": 16.7608, "step": 250430 }, { "epoch": 0.5059046449334793, "grad_norm": 211.7120819091797, "learning_rate": 5.853111254584014e-06, "loss": 19.3896, "step": 250440 }, { "epoch": 0.5059248455661631, "grad_norm": 299.7850646972656, "learning_rate": 5.852767305176829e-06, "loss": 15.0662, "step": 250450 }, { "epoch": 0.505945046198847, "grad_norm": 503.6282958984375, "learning_rate": 5.852423351613359e-06, "loss": 26.4411, "step": 250460 }, { "epoch": 0.5059652468315308, "grad_norm": 141.0774383544922, "learning_rate": 5.852079393895281e-06, "loss": 12.8351, "step": 250470 }, { "epoch": 0.5059854474642146, "grad_norm": 458.46051025390625, "learning_rate": 5.85173543202427e-06, "loss": 23.6347, "step": 250480 }, { "epoch": 0.5060056480968984, "grad_norm": 17.644128799438477, "learning_rate": 5.851391466002008e-06, "loss": 21.8069, "step": 250490 }, { "epoch": 0.5060258487295822, "grad_norm": 280.13995361328125, "learning_rate": 5.851047495830163e-06, "loss": 23.8772, "step": 250500 }, { "epoch": 0.506046049362266, "grad_norm": 475.9883117675781, "learning_rate": 5.850703521510418e-06, "loss": 27.7783, "step": 250510 }, { "epoch": 0.5060662499949499, "grad_norm": 284.2987365722656, "learning_rate": 5.850359543044446e-06, "loss": 11.7047, "step": 250520 }, { "epoch": 0.5060864506276337, "grad_norm": 470.3309020996094, "learning_rate": 5.850015560433926e-06, "loss": 20.9905, "step": 250530 }, { "epoch": 0.5061066512603175, "grad_norm": 375.3161926269531, "learning_rate": 5.849671573680532e-06, "loss": 9.8238, "step": 250540 }, { "epoch": 0.5061268518930013, "grad_norm": 307.17529296875, "learning_rate": 5.849327582785943e-06, "loss": 12.0072, "step": 250550 }, { "epoch": 0.5061470525256851, "grad_norm": 270.5221862792969, "learning_rate": 5.848983587751833e-06, "loss": 24.3678, "step": 250560 }, { "epoch": 0.506167253158369, "grad_norm": 412.85595703125, "learning_rate": 5.848639588579881e-06, "loss": 16.4555, "step": 250570 }, { "epoch": 0.5061874537910528, "grad_norm": 292.0421142578125, "learning_rate": 5.848295585271764e-06, "loss": 22.1581, "step": 250580 }, { "epoch": 0.5062076544237365, "grad_norm": 634.596435546875, "learning_rate": 5.847951577829153e-06, "loss": 16.357, "step": 250590 }, { "epoch": 0.5062278550564203, "grad_norm": 366.3712463378906, "learning_rate": 5.847607566253732e-06, "loss": 19.6015, "step": 250600 }, { "epoch": 0.5062480556891041, "grad_norm": 319.39727783203125, "learning_rate": 5.847263550547174e-06, "loss": 9.9481, "step": 250610 }, { "epoch": 0.506268256321788, "grad_norm": 373.22998046875, "learning_rate": 5.8469195307111555e-06, "loss": 22.338, "step": 250620 }, { "epoch": 0.5062884569544718, "grad_norm": 552.7167358398438, "learning_rate": 5.846575506747355e-06, "loss": 22.6938, "step": 250630 }, { "epoch": 0.5063086575871556, "grad_norm": 81.49649810791016, "learning_rate": 5.846231478657447e-06, "loss": 11.8021, "step": 250640 }, { "epoch": 0.5063288582198394, "grad_norm": 186.39144897460938, "learning_rate": 5.8458874464431115e-06, "loss": 16.271, "step": 250650 }, { "epoch": 0.5063490588525232, "grad_norm": 485.9855041503906, "learning_rate": 5.845543410106021e-06, "loss": 27.9823, "step": 250660 }, { "epoch": 0.5063692594852071, "grad_norm": 348.76275634765625, "learning_rate": 5.845199369647856e-06, "loss": 19.1944, "step": 250670 }, { "epoch": 0.5063894601178909, "grad_norm": 426.27056884765625, "learning_rate": 5.84485532507029e-06, "loss": 10.4866, "step": 250680 }, { "epoch": 0.5064096607505747, "grad_norm": 452.5116271972656, "learning_rate": 5.844511276375003e-06, "loss": 19.0644, "step": 250690 }, { "epoch": 0.5064298613832585, "grad_norm": 496.2027893066406, "learning_rate": 5.844167223563669e-06, "loss": 15.054, "step": 250700 }, { "epoch": 0.5064500620159423, "grad_norm": 269.6355285644531, "learning_rate": 5.8438231666379685e-06, "loss": 12.0207, "step": 250710 }, { "epoch": 0.5064702626486262, "grad_norm": 226.2335662841797, "learning_rate": 5.843479105599576e-06, "loss": 22.7242, "step": 250720 }, { "epoch": 0.50649046328131, "grad_norm": 126.28277587890625, "learning_rate": 5.843135040450168e-06, "loss": 16.7569, "step": 250730 }, { "epoch": 0.5065106639139938, "grad_norm": 127.23039245605469, "learning_rate": 5.842790971191422e-06, "loss": 19.1778, "step": 250740 }, { "epoch": 0.5065308645466776, "grad_norm": 298.5974426269531, "learning_rate": 5.842446897825014e-06, "loss": 39.9161, "step": 250750 }, { "epoch": 0.5065510651793614, "grad_norm": 450.5945739746094, "learning_rate": 5.842102820352623e-06, "loss": 12.08, "step": 250760 }, { "epoch": 0.5065712658120453, "grad_norm": 257.14300537109375, "learning_rate": 5.841758738775923e-06, "loss": 13.8731, "step": 250770 }, { "epoch": 0.5065914664447291, "grad_norm": 373.70355224609375, "learning_rate": 5.841414653096597e-06, "loss": 25.917, "step": 250780 }, { "epoch": 0.5066116670774129, "grad_norm": 348.8895568847656, "learning_rate": 5.841070563316316e-06, "loss": 19.5707, "step": 250790 }, { "epoch": 0.5066318677100967, "grad_norm": 2.082367181777954, "learning_rate": 5.840726469436758e-06, "loss": 32.1478, "step": 250800 }, { "epoch": 0.5066520683427805, "grad_norm": 360.0582275390625, "learning_rate": 5.840382371459603e-06, "loss": 10.6786, "step": 250810 }, { "epoch": 0.5066722689754644, "grad_norm": 214.38682556152344, "learning_rate": 5.8400382693865255e-06, "loss": 19.9817, "step": 250820 }, { "epoch": 0.5066924696081482, "grad_norm": 155.58001708984375, "learning_rate": 5.839694163219203e-06, "loss": 12.8881, "step": 250830 }, { "epoch": 0.506712670240832, "grad_norm": 572.53662109375, "learning_rate": 5.839350052959313e-06, "loss": 26.189, "step": 250840 }, { "epoch": 0.5067328708735157, "grad_norm": 106.50894927978516, "learning_rate": 5.839005938608533e-06, "loss": 12.8005, "step": 250850 }, { "epoch": 0.5067530715061995, "grad_norm": 314.3960266113281, "learning_rate": 5.838661820168539e-06, "loss": 21.8497, "step": 250860 }, { "epoch": 0.5067732721388833, "grad_norm": 221.12440490722656, "learning_rate": 5.838317697641011e-06, "loss": 12.328, "step": 250870 }, { "epoch": 0.5067934727715672, "grad_norm": 362.97442626953125, "learning_rate": 5.837973571027621e-06, "loss": 29.0551, "step": 250880 }, { "epoch": 0.506813673404251, "grad_norm": 234.4754180908203, "learning_rate": 5.837629440330053e-06, "loss": 16.8788, "step": 250890 }, { "epoch": 0.5068338740369348, "grad_norm": 167.98399353027344, "learning_rate": 5.837285305549978e-06, "loss": 16.4452, "step": 250900 }, { "epoch": 0.5068540746696186, "grad_norm": 617.789306640625, "learning_rate": 5.836941166689077e-06, "loss": 25.1946, "step": 250910 }, { "epoch": 0.5068742753023024, "grad_norm": 176.49386596679688, "learning_rate": 5.836597023749028e-06, "loss": 6.8574, "step": 250920 }, { "epoch": 0.5068944759349863, "grad_norm": 235.6635284423828, "learning_rate": 5.836252876731503e-06, "loss": 20.3856, "step": 250930 }, { "epoch": 0.5069146765676701, "grad_norm": 480.59088134765625, "learning_rate": 5.835908725638186e-06, "loss": 26.9011, "step": 250940 }, { "epoch": 0.5069348772003539, "grad_norm": 165.0316619873047, "learning_rate": 5.83556457047075e-06, "loss": 20.5717, "step": 250950 }, { "epoch": 0.5069550778330377, "grad_norm": 48.574119567871094, "learning_rate": 5.835220411230873e-06, "loss": 23.1372, "step": 250960 }, { "epoch": 0.5069752784657215, "grad_norm": 476.2835693359375, "learning_rate": 5.834876247920233e-06, "loss": 15.4405, "step": 250970 }, { "epoch": 0.5069954790984054, "grad_norm": 425.82135009765625, "learning_rate": 5.83453208054051e-06, "loss": 19.0542, "step": 250980 }, { "epoch": 0.5070156797310892, "grad_norm": 557.5946044921875, "learning_rate": 5.834187909093376e-06, "loss": 22.1702, "step": 250990 }, { "epoch": 0.507035880363773, "grad_norm": 140.038330078125, "learning_rate": 5.8338437335805124e-06, "loss": 13.9805, "step": 251000 }, { "epoch": 0.5070560809964568, "grad_norm": 126.53459930419922, "learning_rate": 5.833499554003596e-06, "loss": 16.044, "step": 251010 }, { "epoch": 0.5070762816291406, "grad_norm": 181.307861328125, "learning_rate": 5.833155370364302e-06, "loss": 13.4199, "step": 251020 }, { "epoch": 0.5070964822618245, "grad_norm": 483.58795166015625, "learning_rate": 5.832811182664312e-06, "loss": 11.5067, "step": 251030 }, { "epoch": 0.5071166828945083, "grad_norm": 498.8536071777344, "learning_rate": 5.832466990905299e-06, "loss": 15.1246, "step": 251040 }, { "epoch": 0.5071368835271921, "grad_norm": 667.5546264648438, "learning_rate": 5.8321227950889455e-06, "loss": 20.355, "step": 251050 }, { "epoch": 0.5071570841598759, "grad_norm": 239.9335174560547, "learning_rate": 5.8317785952169245e-06, "loss": 19.6082, "step": 251060 }, { "epoch": 0.5071772847925597, "grad_norm": 232.27671813964844, "learning_rate": 5.8314343912909165e-06, "loss": 15.5812, "step": 251070 }, { "epoch": 0.5071974854252436, "grad_norm": 221.61940002441406, "learning_rate": 5.831090183312599e-06, "loss": 9.0896, "step": 251080 }, { "epoch": 0.5072176860579274, "grad_norm": 337.8977966308594, "learning_rate": 5.830745971283646e-06, "loss": 28.5588, "step": 251090 }, { "epoch": 0.5072378866906111, "grad_norm": 1154.0655517578125, "learning_rate": 5.83040175520574e-06, "loss": 26.3353, "step": 251100 }, { "epoch": 0.5072580873232949, "grad_norm": 484.496826171875, "learning_rate": 5.8300575350805555e-06, "loss": 18.4101, "step": 251110 }, { "epoch": 0.5072782879559787, "grad_norm": 516.824462890625, "learning_rate": 5.8297133109097715e-06, "loss": 23.6727, "step": 251120 }, { "epoch": 0.5072984885886626, "grad_norm": 518.7554931640625, "learning_rate": 5.829369082695066e-06, "loss": 29.8526, "step": 251130 }, { "epoch": 0.5073186892213464, "grad_norm": 459.8927307128906, "learning_rate": 5.8290248504381165e-06, "loss": 18.8124, "step": 251140 }, { "epoch": 0.5073388898540302, "grad_norm": 227.15487670898438, "learning_rate": 5.828680614140599e-06, "loss": 15.377, "step": 251150 }, { "epoch": 0.507359090486714, "grad_norm": 309.8097229003906, "learning_rate": 5.8283363738041945e-06, "loss": 23.4304, "step": 251160 }, { "epoch": 0.5073792911193978, "grad_norm": 334.9891357421875, "learning_rate": 5.827992129430578e-06, "loss": 28.0611, "step": 251170 }, { "epoch": 0.5073994917520817, "grad_norm": 742.1702270507812, "learning_rate": 5.827647881021428e-06, "loss": 7.7474, "step": 251180 }, { "epoch": 0.5074196923847655, "grad_norm": 364.31622314453125, "learning_rate": 5.827303628578424e-06, "loss": 14.3056, "step": 251190 }, { "epoch": 0.5074398930174493, "grad_norm": 50.556453704833984, "learning_rate": 5.826959372103239e-06, "loss": 21.4795, "step": 251200 }, { "epoch": 0.5074600936501331, "grad_norm": 287.05853271484375, "learning_rate": 5.826615111597558e-06, "loss": 16.0893, "step": 251210 }, { "epoch": 0.5074802942828169, "grad_norm": 238.78404235839844, "learning_rate": 5.826270847063053e-06, "loss": 26.9163, "step": 251220 }, { "epoch": 0.5075004949155008, "grad_norm": 780.0631713867188, "learning_rate": 5.8259265785014054e-06, "loss": 27.5146, "step": 251230 }, { "epoch": 0.5075206955481846, "grad_norm": 262.5494689941406, "learning_rate": 5.82558230591429e-06, "loss": 20.3833, "step": 251240 }, { "epoch": 0.5075408961808684, "grad_norm": 645.9493408203125, "learning_rate": 5.825238029303388e-06, "loss": 22.9718, "step": 251250 }, { "epoch": 0.5075610968135522, "grad_norm": 572.199462890625, "learning_rate": 5.824893748670377e-06, "loss": 16.7412, "step": 251260 }, { "epoch": 0.507581297446236, "grad_norm": 315.9741516113281, "learning_rate": 5.824549464016933e-06, "loss": 23.4883, "step": 251270 }, { "epoch": 0.5076014980789199, "grad_norm": 11.374568939208984, "learning_rate": 5.824205175344735e-06, "loss": 24.5944, "step": 251280 }, { "epoch": 0.5076216987116037, "grad_norm": 324.0670471191406, "learning_rate": 5.82386088265546e-06, "loss": 29.2769, "step": 251290 }, { "epoch": 0.5076418993442875, "grad_norm": 503.2557067871094, "learning_rate": 5.823516585950787e-06, "loss": 25.7352, "step": 251300 }, { "epoch": 0.5076620999769713, "grad_norm": 333.05328369140625, "learning_rate": 5.823172285232394e-06, "loss": 17.1377, "step": 251310 }, { "epoch": 0.5076823006096551, "grad_norm": 811.625732421875, "learning_rate": 5.822827980501962e-06, "loss": 21.4771, "step": 251320 }, { "epoch": 0.507702501242339, "grad_norm": 373.1739196777344, "learning_rate": 5.822483671761164e-06, "loss": 13.2281, "step": 251330 }, { "epoch": 0.5077227018750228, "grad_norm": 341.700439453125, "learning_rate": 5.82213935901168e-06, "loss": 11.4042, "step": 251340 }, { "epoch": 0.5077429025077066, "grad_norm": 473.21063232421875, "learning_rate": 5.821795042255189e-06, "loss": 22.6602, "step": 251350 }, { "epoch": 0.5077631031403903, "grad_norm": 268.0250244140625, "learning_rate": 5.8214507214933666e-06, "loss": 18.5216, "step": 251360 }, { "epoch": 0.5077833037730741, "grad_norm": 366.32745361328125, "learning_rate": 5.821106396727897e-06, "loss": 10.8505, "step": 251370 }, { "epoch": 0.5078035044057579, "grad_norm": 469.3739318847656, "learning_rate": 5.820762067960451e-06, "loss": 24.6283, "step": 251380 }, { "epoch": 0.5078237050384418, "grad_norm": 320.2749328613281, "learning_rate": 5.820417735192712e-06, "loss": 31.9227, "step": 251390 }, { "epoch": 0.5078439056711256, "grad_norm": 76.45555114746094, "learning_rate": 5.8200733984263556e-06, "loss": 22.3928, "step": 251400 }, { "epoch": 0.5078641063038094, "grad_norm": 402.581787109375, "learning_rate": 5.819729057663062e-06, "loss": 25.8906, "step": 251410 }, { "epoch": 0.5078843069364932, "grad_norm": 353.9494934082031, "learning_rate": 5.819384712904508e-06, "loss": 20.1779, "step": 251420 }, { "epoch": 0.507904507569177, "grad_norm": 234.92645263671875, "learning_rate": 5.819040364152372e-06, "loss": 24.8317, "step": 251430 }, { "epoch": 0.5079247082018609, "grad_norm": 440.35650634765625, "learning_rate": 5.8186960114083325e-06, "loss": 15.5205, "step": 251440 }, { "epoch": 0.5079449088345447, "grad_norm": 169.0201873779297, "learning_rate": 5.818351654674067e-06, "loss": 13.7567, "step": 251450 }, { "epoch": 0.5079651094672285, "grad_norm": 264.24188232421875, "learning_rate": 5.818007293951255e-06, "loss": 32.0262, "step": 251460 }, { "epoch": 0.5079853100999123, "grad_norm": 0.0, "learning_rate": 5.817662929241576e-06, "loss": 24.136, "step": 251470 }, { "epoch": 0.5080055107325961, "grad_norm": 789.7282104492188, "learning_rate": 5.817318560546708e-06, "loss": 42.3933, "step": 251480 }, { "epoch": 0.50802571136528, "grad_norm": 419.7528381347656, "learning_rate": 5.8169741878683265e-06, "loss": 13.2167, "step": 251490 }, { "epoch": 0.5080459119979638, "grad_norm": 12.599907875061035, "learning_rate": 5.816629811208112e-06, "loss": 22.8365, "step": 251500 }, { "epoch": 0.5080661126306476, "grad_norm": 86.8816909790039, "learning_rate": 5.816285430567743e-06, "loss": 19.1378, "step": 251510 }, { "epoch": 0.5080863132633314, "grad_norm": 216.23777770996094, "learning_rate": 5.815941045948898e-06, "loss": 21.5719, "step": 251520 }, { "epoch": 0.5081065138960152, "grad_norm": 589.9947509765625, "learning_rate": 5.815596657353257e-06, "loss": 10.7729, "step": 251530 }, { "epoch": 0.5081267145286991, "grad_norm": 20.174942016601562, "learning_rate": 5.815252264782493e-06, "loss": 18.9093, "step": 251540 }, { "epoch": 0.5081469151613829, "grad_norm": 552.2474365234375, "learning_rate": 5.814907868238291e-06, "loss": 51.3108, "step": 251550 }, { "epoch": 0.5081671157940667, "grad_norm": 189.50640869140625, "learning_rate": 5.814563467722328e-06, "loss": 16.073, "step": 251560 }, { "epoch": 0.5081873164267505, "grad_norm": 664.8473510742188, "learning_rate": 5.8142190632362785e-06, "loss": 30.1821, "step": 251570 }, { "epoch": 0.5082075170594343, "grad_norm": 403.3890075683594, "learning_rate": 5.813874654781825e-06, "loss": 21.0588, "step": 251580 }, { "epoch": 0.5082277176921182, "grad_norm": 323.449951171875, "learning_rate": 5.813530242360647e-06, "loss": 15.1011, "step": 251590 }, { "epoch": 0.508247918324802, "grad_norm": 279.55914306640625, "learning_rate": 5.813185825974419e-06, "loss": 18.119, "step": 251600 }, { "epoch": 0.5082681189574858, "grad_norm": 26.502635955810547, "learning_rate": 5.812841405624823e-06, "loss": 14.0624, "step": 251610 }, { "epoch": 0.5082883195901695, "grad_norm": 614.7074584960938, "learning_rate": 5.812496981313536e-06, "loss": 22.1417, "step": 251620 }, { "epoch": 0.5083085202228533, "grad_norm": 539.9589233398438, "learning_rate": 5.8121525530422375e-06, "loss": 23.0432, "step": 251630 }, { "epoch": 0.5083287208555372, "grad_norm": 368.3820495605469, "learning_rate": 5.811808120812607e-06, "loss": 14.6848, "step": 251640 }, { "epoch": 0.508348921488221, "grad_norm": 301.2969665527344, "learning_rate": 5.811463684626319e-06, "loss": 16.6718, "step": 251650 }, { "epoch": 0.5083691221209048, "grad_norm": 349.03167724609375, "learning_rate": 5.8111192444850586e-06, "loss": 18.4352, "step": 251660 }, { "epoch": 0.5083893227535886, "grad_norm": 709.9803466796875, "learning_rate": 5.8107748003905e-06, "loss": 30.3211, "step": 251670 }, { "epoch": 0.5084095233862724, "grad_norm": 171.62179565429688, "learning_rate": 5.810430352344324e-06, "loss": 24.3387, "step": 251680 }, { "epoch": 0.5084297240189563, "grad_norm": 160.02499389648438, "learning_rate": 5.810085900348209e-06, "loss": 24.2539, "step": 251690 }, { "epoch": 0.5084499246516401, "grad_norm": 749.2161865234375, "learning_rate": 5.809741444403831e-06, "loss": 19.7885, "step": 251700 }, { "epoch": 0.5084701252843239, "grad_norm": 212.47677612304688, "learning_rate": 5.809396984512875e-06, "loss": 26.7362, "step": 251710 }, { "epoch": 0.5084903259170077, "grad_norm": 433.6282958984375, "learning_rate": 5.8090525206770145e-06, "loss": 14.2857, "step": 251720 }, { "epoch": 0.5085105265496915, "grad_norm": 226.4599609375, "learning_rate": 5.808708052897931e-06, "loss": 18.0523, "step": 251730 }, { "epoch": 0.5085307271823754, "grad_norm": 71.1861572265625, "learning_rate": 5.808363581177301e-06, "loss": 16.9545, "step": 251740 }, { "epoch": 0.5085509278150592, "grad_norm": 45.71283721923828, "learning_rate": 5.8080191055168064e-06, "loss": 22.1804, "step": 251750 }, { "epoch": 0.508571128447743, "grad_norm": 186.9651641845703, "learning_rate": 5.807674625918125e-06, "loss": 13.9291, "step": 251760 }, { "epoch": 0.5085913290804268, "grad_norm": 365.13323974609375, "learning_rate": 5.807330142382934e-06, "loss": 13.7237, "step": 251770 }, { "epoch": 0.5086115297131106, "grad_norm": 274.8147277832031, "learning_rate": 5.806985654912915e-06, "loss": 23.8939, "step": 251780 }, { "epoch": 0.5086317303457945, "grad_norm": 50.02983856201172, "learning_rate": 5.806641163509744e-06, "loss": 12.7504, "step": 251790 }, { "epoch": 0.5086519309784783, "grad_norm": 92.12311553955078, "learning_rate": 5.8062966681751046e-06, "loss": 9.1826, "step": 251800 }, { "epoch": 0.5086721316111621, "grad_norm": 304.03076171875, "learning_rate": 5.805952168910669e-06, "loss": 22.5051, "step": 251810 }, { "epoch": 0.5086923322438459, "grad_norm": 247.33364868164062, "learning_rate": 5.805607665718124e-06, "loss": 20.1043, "step": 251820 }, { "epoch": 0.5087125328765297, "grad_norm": 162.25067138671875, "learning_rate": 5.805263158599143e-06, "loss": 11.2727, "step": 251830 }, { "epoch": 0.5087327335092136, "grad_norm": 436.57501220703125, "learning_rate": 5.804918647555408e-06, "loss": 18.7904, "step": 251840 }, { "epoch": 0.5087529341418974, "grad_norm": 581.3582763671875, "learning_rate": 5.8045741325885965e-06, "loss": 20.1834, "step": 251850 }, { "epoch": 0.5087731347745812, "grad_norm": 701.9457397460938, "learning_rate": 5.804229613700389e-06, "loss": 37.8337, "step": 251860 }, { "epoch": 0.5087933354072649, "grad_norm": 313.239501953125, "learning_rate": 5.803885090892464e-06, "loss": 21.4028, "step": 251870 }, { "epoch": 0.5088135360399487, "grad_norm": 387.37603759765625, "learning_rate": 5.8035405641665e-06, "loss": 28.1262, "step": 251880 }, { "epoch": 0.5088337366726325, "grad_norm": 724.362548828125, "learning_rate": 5.803196033524176e-06, "loss": 19.9967, "step": 251890 }, { "epoch": 0.5088539373053164, "grad_norm": 1209.24462890625, "learning_rate": 5.802851498967173e-06, "loss": 34.1658, "step": 251900 }, { "epoch": 0.5088741379380002, "grad_norm": 819.9832153320312, "learning_rate": 5.802506960497168e-06, "loss": 33.4126, "step": 251910 }, { "epoch": 0.508894338570684, "grad_norm": 118.30169677734375, "learning_rate": 5.802162418115842e-06, "loss": 17.9593, "step": 251920 }, { "epoch": 0.5089145392033678, "grad_norm": 468.00677490234375, "learning_rate": 5.801817871824876e-06, "loss": 17.9862, "step": 251930 }, { "epoch": 0.5089347398360516, "grad_norm": 931.7262573242188, "learning_rate": 5.801473321625944e-06, "loss": 8.3095, "step": 251940 }, { "epoch": 0.5089549404687355, "grad_norm": 596.0464477539062, "learning_rate": 5.80112876752073e-06, "loss": 18.7461, "step": 251950 }, { "epoch": 0.5089751411014193, "grad_norm": 527.8135375976562, "learning_rate": 5.80078420951091e-06, "loss": 32.7817, "step": 251960 }, { "epoch": 0.5089953417341031, "grad_norm": 1024.0352783203125, "learning_rate": 5.800439647598165e-06, "loss": 24.7098, "step": 251970 }, { "epoch": 0.5090155423667869, "grad_norm": 235.0797882080078, "learning_rate": 5.800095081784176e-06, "loss": 25.3951, "step": 251980 }, { "epoch": 0.5090357429994707, "grad_norm": 352.7430419921875, "learning_rate": 5.799750512070618e-06, "loss": 16.6318, "step": 251990 }, { "epoch": 0.5090559436321546, "grad_norm": 589.234130859375, "learning_rate": 5.799405938459175e-06, "loss": 29.4818, "step": 252000 }, { "epoch": 0.5090761442648384, "grad_norm": 716.2567138671875, "learning_rate": 5.7990613609515235e-06, "loss": 19.7052, "step": 252010 }, { "epoch": 0.5090963448975222, "grad_norm": 107.27327728271484, "learning_rate": 5.798716779549344e-06, "loss": 31.656, "step": 252020 }, { "epoch": 0.509116545530206, "grad_norm": 171.72300720214844, "learning_rate": 5.798372194254317e-06, "loss": 25.5078, "step": 252030 }, { "epoch": 0.5091367461628898, "grad_norm": 158.5902557373047, "learning_rate": 5.7980276050681195e-06, "loss": 17.2632, "step": 252040 }, { "epoch": 0.5091569467955737, "grad_norm": 378.5718078613281, "learning_rate": 5.797683011992432e-06, "loss": 8.9759, "step": 252050 }, { "epoch": 0.5091771474282575, "grad_norm": 105.90921783447266, "learning_rate": 5.797338415028934e-06, "loss": 25.6245, "step": 252060 }, { "epoch": 0.5091973480609413, "grad_norm": 2.521341562271118, "learning_rate": 5.796993814179307e-06, "loss": 25.0027, "step": 252070 }, { "epoch": 0.5092175486936251, "grad_norm": 0.7935197353363037, "learning_rate": 5.796649209445227e-06, "loss": 16.7548, "step": 252080 }, { "epoch": 0.5092377493263089, "grad_norm": 747.4075927734375, "learning_rate": 5.7963046008283775e-06, "loss": 28.5457, "step": 252090 }, { "epoch": 0.5092579499589928, "grad_norm": 412.5313720703125, "learning_rate": 5.795959988330434e-06, "loss": 22.6423, "step": 252100 }, { "epoch": 0.5092781505916766, "grad_norm": 444.43450927734375, "learning_rate": 5.795615371953078e-06, "loss": 23.1515, "step": 252110 }, { "epoch": 0.5092983512243604, "grad_norm": 58.15255355834961, "learning_rate": 5.795270751697991e-06, "loss": 14.8879, "step": 252120 }, { "epoch": 0.5093185518570441, "grad_norm": 1216.7862548828125, "learning_rate": 5.794926127566849e-06, "loss": 15.4683, "step": 252130 }, { "epoch": 0.5093387524897279, "grad_norm": 250.36837768554688, "learning_rate": 5.794581499561335e-06, "loss": 22.2441, "step": 252140 }, { "epoch": 0.5093589531224118, "grad_norm": 504.7484130859375, "learning_rate": 5.794236867683125e-06, "loss": 17.2118, "step": 252150 }, { "epoch": 0.5093791537550956, "grad_norm": 375.27716064453125, "learning_rate": 5.793892231933903e-06, "loss": 18.3234, "step": 252160 }, { "epoch": 0.5093993543877794, "grad_norm": 766.3939208984375, "learning_rate": 5.793547592315345e-06, "loss": 45.5151, "step": 252170 }, { "epoch": 0.5094195550204632, "grad_norm": 1855.5390625, "learning_rate": 5.793202948829133e-06, "loss": 28.2413, "step": 252180 }, { "epoch": 0.509439755653147, "grad_norm": 433.4533386230469, "learning_rate": 5.792858301476946e-06, "loss": 21.4306, "step": 252190 }, { "epoch": 0.5094599562858309, "grad_norm": 436.69561767578125, "learning_rate": 5.792513650260465e-06, "loss": 18.5973, "step": 252200 }, { "epoch": 0.5094801569185147, "grad_norm": 900.4495849609375, "learning_rate": 5.792168995181366e-06, "loss": 30.9446, "step": 252210 }, { "epoch": 0.5095003575511985, "grad_norm": 286.23773193359375, "learning_rate": 5.791824336241334e-06, "loss": 8.588, "step": 252220 }, { "epoch": 0.5095205581838823, "grad_norm": 244.795166015625, "learning_rate": 5.791479673442044e-06, "loss": 34.9819, "step": 252230 }, { "epoch": 0.5095407588165661, "grad_norm": 282.7953186035156, "learning_rate": 5.791135006785179e-06, "loss": 10.0487, "step": 252240 }, { "epoch": 0.50956095944925, "grad_norm": 665.7476806640625, "learning_rate": 5.7907903362724195e-06, "loss": 22.644, "step": 252250 }, { "epoch": 0.5095811600819338, "grad_norm": 290.9110107421875, "learning_rate": 5.790445661905441e-06, "loss": 21.4393, "step": 252260 }, { "epoch": 0.5096013607146176, "grad_norm": 214.25326538085938, "learning_rate": 5.790100983685928e-06, "loss": 14.4459, "step": 252270 }, { "epoch": 0.5096215613473014, "grad_norm": 601.002197265625, "learning_rate": 5.789756301615558e-06, "loss": 15.1507, "step": 252280 }, { "epoch": 0.5096417619799852, "grad_norm": 554.1863403320312, "learning_rate": 5.7894116156960115e-06, "loss": 15.7812, "step": 252290 }, { "epoch": 0.509661962612669, "grad_norm": 393.3742370605469, "learning_rate": 5.78906692592897e-06, "loss": 16.9574, "step": 252300 }, { "epoch": 0.5096821632453529, "grad_norm": 228.7798614501953, "learning_rate": 5.788722232316109e-06, "loss": 20.7032, "step": 252310 }, { "epoch": 0.5097023638780367, "grad_norm": 166.04469299316406, "learning_rate": 5.7883775348591146e-06, "loss": 16.6758, "step": 252320 }, { "epoch": 0.5097225645107205, "grad_norm": 538.080322265625, "learning_rate": 5.788032833559661e-06, "loss": 25.5036, "step": 252330 }, { "epoch": 0.5097427651434043, "grad_norm": 511.0445556640625, "learning_rate": 5.787688128419433e-06, "loss": 19.0578, "step": 252340 }, { "epoch": 0.5097629657760882, "grad_norm": 162.63348388671875, "learning_rate": 5.787343419440108e-06, "loss": 10.8651, "step": 252350 }, { "epoch": 0.509783166408772, "grad_norm": 405.1139831542969, "learning_rate": 5.786998706623365e-06, "loss": 24.8399, "step": 252360 }, { "epoch": 0.5098033670414558, "grad_norm": 380.32696533203125, "learning_rate": 5.786653989970889e-06, "loss": 11.9572, "step": 252370 }, { "epoch": 0.5098235676741395, "grad_norm": 551.205078125, "learning_rate": 5.786309269484355e-06, "loss": 19.7167, "step": 252380 }, { "epoch": 0.5098437683068233, "grad_norm": 614.0591430664062, "learning_rate": 5.785964545165446e-06, "loss": 13.8383, "step": 252390 }, { "epoch": 0.5098639689395071, "grad_norm": 268.666748046875, "learning_rate": 5.78561981701584e-06, "loss": 14.0354, "step": 252400 }, { "epoch": 0.509884169572191, "grad_norm": 324.810546875, "learning_rate": 5.785275085037218e-06, "loss": 23.4903, "step": 252410 }, { "epoch": 0.5099043702048748, "grad_norm": 612.3726806640625, "learning_rate": 5.7849303492312605e-06, "loss": 26.5904, "step": 252420 }, { "epoch": 0.5099245708375586, "grad_norm": 768.1057739257812, "learning_rate": 5.784585609599649e-06, "loss": 21.4947, "step": 252430 }, { "epoch": 0.5099447714702424, "grad_norm": 147.74560546875, "learning_rate": 5.784240866144062e-06, "loss": 10.7797, "step": 252440 }, { "epoch": 0.5099649721029262, "grad_norm": 275.04443359375, "learning_rate": 5.783896118866179e-06, "loss": 17.4282, "step": 252450 }, { "epoch": 0.5099851727356101, "grad_norm": 835.8368530273438, "learning_rate": 5.783551367767683e-06, "loss": 27.2984, "step": 252460 }, { "epoch": 0.5100053733682939, "grad_norm": 524.8331909179688, "learning_rate": 5.783206612850251e-06, "loss": 17.2435, "step": 252470 }, { "epoch": 0.5100255740009777, "grad_norm": 152.20242309570312, "learning_rate": 5.782861854115567e-06, "loss": 19.86, "step": 252480 }, { "epoch": 0.5100457746336615, "grad_norm": 120.97262573242188, "learning_rate": 5.782517091565308e-06, "loss": 17.7812, "step": 252490 }, { "epoch": 0.5100659752663453, "grad_norm": 725.4302368164062, "learning_rate": 5.782172325201155e-06, "loss": 24.184, "step": 252500 }, { "epoch": 0.5100861758990292, "grad_norm": 344.2830810546875, "learning_rate": 5.78182755502479e-06, "loss": 12.7987, "step": 252510 }, { "epoch": 0.510106376531713, "grad_norm": 522.1859130859375, "learning_rate": 5.781482781037892e-06, "loss": 14.7735, "step": 252520 }, { "epoch": 0.5101265771643968, "grad_norm": 416.50677490234375, "learning_rate": 5.781138003242141e-06, "loss": 24.5774, "step": 252530 }, { "epoch": 0.5101467777970806, "grad_norm": 499.1576843261719, "learning_rate": 5.780793221639219e-06, "loss": 17.9837, "step": 252540 }, { "epoch": 0.5101669784297644, "grad_norm": 476.3972473144531, "learning_rate": 5.780448436230805e-06, "loss": 11.0461, "step": 252550 }, { "epoch": 0.5101871790624483, "grad_norm": 1126.03125, "learning_rate": 5.7801036470185815e-06, "loss": 48.5763, "step": 252560 }, { "epoch": 0.5102073796951321, "grad_norm": 354.3380126953125, "learning_rate": 5.779758854004226e-06, "loss": 21.4074, "step": 252570 }, { "epoch": 0.5102275803278159, "grad_norm": 568.8021850585938, "learning_rate": 5.77941405718942e-06, "loss": 15.7251, "step": 252580 }, { "epoch": 0.5102477809604997, "grad_norm": 391.58837890625, "learning_rate": 5.779069256575846e-06, "loss": 34.3455, "step": 252590 }, { "epoch": 0.5102679815931835, "grad_norm": 257.9892883300781, "learning_rate": 5.778724452165181e-06, "loss": 15.345, "step": 252600 }, { "epoch": 0.5102881822258674, "grad_norm": 639.9202880859375, "learning_rate": 5.7783796439591085e-06, "loss": 21.1505, "step": 252610 }, { "epoch": 0.5103083828585512, "grad_norm": 66.07951354980469, "learning_rate": 5.778034831959308e-06, "loss": 21.2015, "step": 252620 }, { "epoch": 0.510328583491235, "grad_norm": 434.8056640625, "learning_rate": 5.77769001616746e-06, "loss": 34.4328, "step": 252630 }, { "epoch": 0.5103487841239187, "grad_norm": 177.5913543701172, "learning_rate": 5.777345196585247e-06, "loss": 26.5298, "step": 252640 }, { "epoch": 0.5103689847566025, "grad_norm": 686.2937622070312, "learning_rate": 5.777000373214345e-06, "loss": 18.0848, "step": 252650 }, { "epoch": 0.5103891853892863, "grad_norm": 237.0764923095703, "learning_rate": 5.776655546056439e-06, "loss": 14.7615, "step": 252660 }, { "epoch": 0.5104093860219702, "grad_norm": 405.82220458984375, "learning_rate": 5.776310715113207e-06, "loss": 25.7658, "step": 252670 }, { "epoch": 0.510429586654654, "grad_norm": 750.3215942382812, "learning_rate": 5.77596588038633e-06, "loss": 26.7581, "step": 252680 }, { "epoch": 0.5104497872873378, "grad_norm": 160.1605987548828, "learning_rate": 5.775621041877491e-06, "loss": 30.4518, "step": 252690 }, { "epoch": 0.5104699879200216, "grad_norm": 259.1350402832031, "learning_rate": 5.77527619958837e-06, "loss": 13.9909, "step": 252700 }, { "epoch": 0.5104901885527054, "grad_norm": 233.79591369628906, "learning_rate": 5.774931353520645e-06, "loss": 14.9685, "step": 252710 }, { "epoch": 0.5105103891853893, "grad_norm": 95.31790161132812, "learning_rate": 5.774586503676e-06, "loss": 20.0699, "step": 252720 }, { "epoch": 0.5105305898180731, "grad_norm": 421.1565246582031, "learning_rate": 5.774241650056114e-06, "loss": 28.0121, "step": 252730 }, { "epoch": 0.5105507904507569, "grad_norm": 364.62060546875, "learning_rate": 5.773896792662666e-06, "loss": 21.6366, "step": 252740 }, { "epoch": 0.5105709910834407, "grad_norm": 695.878173828125, "learning_rate": 5.773551931497342e-06, "loss": 20.6741, "step": 252750 }, { "epoch": 0.5105911917161245, "grad_norm": 794.6965942382812, "learning_rate": 5.773207066561817e-06, "loss": 23.7065, "step": 252760 }, { "epoch": 0.5106113923488084, "grad_norm": 835.0409545898438, "learning_rate": 5.772862197857776e-06, "loss": 25.0538, "step": 252770 }, { "epoch": 0.5106315929814922, "grad_norm": 234.1491241455078, "learning_rate": 5.772517325386898e-06, "loss": 6.7746, "step": 252780 }, { "epoch": 0.510651793614176, "grad_norm": 401.83001708984375, "learning_rate": 5.772172449150865e-06, "loss": 16.4155, "step": 252790 }, { "epoch": 0.5106719942468598, "grad_norm": 479.2789306640625, "learning_rate": 5.771827569151357e-06, "loss": 18.3182, "step": 252800 }, { "epoch": 0.5106921948795436, "grad_norm": 499.20068359375, "learning_rate": 5.771482685390053e-06, "loss": 18.7645, "step": 252810 }, { "epoch": 0.5107123955122275, "grad_norm": 444.3695373535156, "learning_rate": 5.7711377978686385e-06, "loss": 23.063, "step": 252820 }, { "epoch": 0.5107325961449113, "grad_norm": 402.5401916503906, "learning_rate": 5.770792906588791e-06, "loss": 24.9544, "step": 252830 }, { "epoch": 0.5107527967775951, "grad_norm": 206.49508666992188, "learning_rate": 5.770448011552192e-06, "loss": 13.1938, "step": 252840 }, { "epoch": 0.5107729974102789, "grad_norm": 178.63812255859375, "learning_rate": 5.770103112760523e-06, "loss": 19.3979, "step": 252850 }, { "epoch": 0.5107931980429627, "grad_norm": 398.7472229003906, "learning_rate": 5.769758210215466e-06, "loss": 14.4969, "step": 252860 }, { "epoch": 0.5108133986756466, "grad_norm": 195.69021606445312, "learning_rate": 5.7694133039186986e-06, "loss": 19.1735, "step": 252870 }, { "epoch": 0.5108335993083304, "grad_norm": 182.81033325195312, "learning_rate": 5.7690683938719065e-06, "loss": 18.1955, "step": 252880 }, { "epoch": 0.5108537999410141, "grad_norm": 49.81098175048828, "learning_rate": 5.7687234800767666e-06, "loss": 9.6713, "step": 252890 }, { "epoch": 0.5108740005736979, "grad_norm": 829.9020385742188, "learning_rate": 5.768378562534962e-06, "loss": 13.2165, "step": 252900 }, { "epoch": 0.5108942012063817, "grad_norm": 767.9072875976562, "learning_rate": 5.768033641248174e-06, "loss": 31.3949, "step": 252910 }, { "epoch": 0.5109144018390656, "grad_norm": 1046.8443603515625, "learning_rate": 5.767688716218083e-06, "loss": 15.0424, "step": 252920 }, { "epoch": 0.5109346024717494, "grad_norm": 657.3392333984375, "learning_rate": 5.76734378744637e-06, "loss": 18.826, "step": 252930 }, { "epoch": 0.5109548031044332, "grad_norm": 34.600929260253906, "learning_rate": 5.766998854934716e-06, "loss": 21.9367, "step": 252940 }, { "epoch": 0.510975003737117, "grad_norm": 368.00738525390625, "learning_rate": 5.766653918684803e-06, "loss": 20.0687, "step": 252950 }, { "epoch": 0.5109952043698008, "grad_norm": 178.34298706054688, "learning_rate": 5.766308978698313e-06, "loss": 14.0907, "step": 252960 }, { "epoch": 0.5110154050024847, "grad_norm": 337.9991455078125, "learning_rate": 5.765964034976924e-06, "loss": 21.5121, "step": 252970 }, { "epoch": 0.5110356056351685, "grad_norm": 248.5225372314453, "learning_rate": 5.765619087522322e-06, "loss": 14.3779, "step": 252980 }, { "epoch": 0.5110558062678523, "grad_norm": 1093.2027587890625, "learning_rate": 5.765274136336183e-06, "loss": 24.5636, "step": 252990 }, { "epoch": 0.5110760069005361, "grad_norm": 521.7022094726562, "learning_rate": 5.764929181420191e-06, "loss": 14.9611, "step": 253000 }, { "epoch": 0.5110962075332199, "grad_norm": 223.78298950195312, "learning_rate": 5.7645842227760274e-06, "loss": 24.0574, "step": 253010 }, { "epoch": 0.5111164081659038, "grad_norm": 751.35107421875, "learning_rate": 5.764239260405373e-06, "loss": 27.4194, "step": 253020 }, { "epoch": 0.5111366087985876, "grad_norm": 38.23499298095703, "learning_rate": 5.763894294309909e-06, "loss": 12.9641, "step": 253030 }, { "epoch": 0.5111568094312714, "grad_norm": 295.8575439453125, "learning_rate": 5.763549324491317e-06, "loss": 19.4094, "step": 253040 }, { "epoch": 0.5111770100639552, "grad_norm": 838.633544921875, "learning_rate": 5.763204350951278e-06, "loss": 14.7185, "step": 253050 }, { "epoch": 0.511197210696639, "grad_norm": 187.49452209472656, "learning_rate": 5.762859373691473e-06, "loss": 21.8893, "step": 253060 }, { "epoch": 0.5112174113293229, "grad_norm": 103.93560028076172, "learning_rate": 5.7625143927135854e-06, "loss": 16.6399, "step": 253070 }, { "epoch": 0.5112376119620067, "grad_norm": 755.8477783203125, "learning_rate": 5.762169408019293e-06, "loss": 31.7384, "step": 253080 }, { "epoch": 0.5112578125946905, "grad_norm": 473.8900146484375, "learning_rate": 5.761824419610282e-06, "loss": 10.6826, "step": 253090 }, { "epoch": 0.5112780132273743, "grad_norm": 146.130126953125, "learning_rate": 5.761479427488229e-06, "loss": 13.3441, "step": 253100 }, { "epoch": 0.5112982138600581, "grad_norm": 576.4959716796875, "learning_rate": 5.761134431654819e-06, "loss": 27.9051, "step": 253110 }, { "epoch": 0.511318414492742, "grad_norm": 323.2041015625, "learning_rate": 5.760789432111731e-06, "loss": 19.4123, "step": 253120 }, { "epoch": 0.5113386151254258, "grad_norm": 352.3463134765625, "learning_rate": 5.760444428860648e-06, "loss": 20.8338, "step": 253130 }, { "epoch": 0.5113588157581096, "grad_norm": 374.3561706542969, "learning_rate": 5.760099421903253e-06, "loss": 23.0967, "step": 253140 }, { "epoch": 0.5113790163907933, "grad_norm": 342.5068664550781, "learning_rate": 5.7597544112412225e-06, "loss": 14.8995, "step": 253150 }, { "epoch": 0.5113992170234771, "grad_norm": 0.0, "learning_rate": 5.759409396876242e-06, "loss": 14.4945, "step": 253160 }, { "epoch": 0.511419417656161, "grad_norm": 573.2758178710938, "learning_rate": 5.759064378809993e-06, "loss": 22.1909, "step": 253170 }, { "epoch": 0.5114396182888448, "grad_norm": 178.08120727539062, "learning_rate": 5.758719357044157e-06, "loss": 25.4603, "step": 253180 }, { "epoch": 0.5114598189215286, "grad_norm": 398.0236511230469, "learning_rate": 5.758374331580412e-06, "loss": 21.4598, "step": 253190 }, { "epoch": 0.5114800195542124, "grad_norm": 313.60833740234375, "learning_rate": 5.7580293024204455e-06, "loss": 10.8735, "step": 253200 }, { "epoch": 0.5115002201868962, "grad_norm": 372.1860656738281, "learning_rate": 5.7576842695659344e-06, "loss": 9.1743, "step": 253210 }, { "epoch": 0.51152042081958, "grad_norm": 416.085693359375, "learning_rate": 5.757339233018563e-06, "loss": 23.3086, "step": 253220 }, { "epoch": 0.5115406214522639, "grad_norm": 387.45050048828125, "learning_rate": 5.756994192780011e-06, "loss": 19.3354, "step": 253230 }, { "epoch": 0.5115608220849477, "grad_norm": 243.62307739257812, "learning_rate": 5.756649148851962e-06, "loss": 24.3164, "step": 253240 }, { "epoch": 0.5115810227176315, "grad_norm": 541.02392578125, "learning_rate": 5.7563041012360975e-06, "loss": 23.0536, "step": 253250 }, { "epoch": 0.5116012233503153, "grad_norm": 83.83334350585938, "learning_rate": 5.7559590499340965e-06, "loss": 22.3992, "step": 253260 }, { "epoch": 0.5116214239829991, "grad_norm": 95.6950454711914, "learning_rate": 5.7556139949476445e-06, "loss": 14.3515, "step": 253270 }, { "epoch": 0.511641624615683, "grad_norm": 318.7217712402344, "learning_rate": 5.755268936278421e-06, "loss": 30.4365, "step": 253280 }, { "epoch": 0.5116618252483668, "grad_norm": 155.037841796875, "learning_rate": 5.754923873928108e-06, "loss": 22.2358, "step": 253290 }, { "epoch": 0.5116820258810506, "grad_norm": 143.9044647216797, "learning_rate": 5.7545788078983875e-06, "loss": 16.5542, "step": 253300 }, { "epoch": 0.5117022265137344, "grad_norm": 230.00341796875, "learning_rate": 5.754233738190942e-06, "loss": 26.4446, "step": 253310 }, { "epoch": 0.5117224271464182, "grad_norm": 146.52598571777344, "learning_rate": 5.753888664807452e-06, "loss": 24.9038, "step": 253320 }, { "epoch": 0.5117426277791021, "grad_norm": 275.0461730957031, "learning_rate": 5.753543587749601e-06, "loss": 29.9561, "step": 253330 }, { "epoch": 0.5117628284117859, "grad_norm": 716.48974609375, "learning_rate": 5.753198507019068e-06, "loss": 44.3443, "step": 253340 }, { "epoch": 0.5117830290444697, "grad_norm": 387.1690979003906, "learning_rate": 5.752853422617539e-06, "loss": 15.3674, "step": 253350 }, { "epoch": 0.5118032296771535, "grad_norm": 387.6558837890625, "learning_rate": 5.752508334546695e-06, "loss": 16.1463, "step": 253360 }, { "epoch": 0.5118234303098373, "grad_norm": 194.35902404785156, "learning_rate": 5.7521632428082135e-06, "loss": 13.0076, "step": 253370 }, { "epoch": 0.5118436309425212, "grad_norm": 469.7440185546875, "learning_rate": 5.75181814740378e-06, "loss": 35.026, "step": 253380 }, { "epoch": 0.511863831575205, "grad_norm": 282.68035888671875, "learning_rate": 5.751473048335078e-06, "loss": 13.1574, "step": 253390 }, { "epoch": 0.5118840322078888, "grad_norm": 79.40476989746094, "learning_rate": 5.751127945603786e-06, "loss": 4.7272, "step": 253400 }, { "epoch": 0.5119042328405725, "grad_norm": 745.161376953125, "learning_rate": 5.750782839211588e-06, "loss": 19.8504, "step": 253410 }, { "epoch": 0.5119244334732563, "grad_norm": 469.4684753417969, "learning_rate": 5.750437729160165e-06, "loss": 15.7141, "step": 253420 }, { "epoch": 0.5119446341059402, "grad_norm": 262.9376220703125, "learning_rate": 5.7500926154512e-06, "loss": 9.5011, "step": 253430 }, { "epoch": 0.511964834738624, "grad_norm": 455.0301208496094, "learning_rate": 5.749747498086374e-06, "loss": 12.2369, "step": 253440 }, { "epoch": 0.5119850353713078, "grad_norm": 1525.0181884765625, "learning_rate": 5.7494023770673705e-06, "loss": 22.2308, "step": 253450 }, { "epoch": 0.5120052360039916, "grad_norm": 812.306640625, "learning_rate": 5.74905725239587e-06, "loss": 21.5862, "step": 253460 }, { "epoch": 0.5120254366366754, "grad_norm": 190.3341064453125, "learning_rate": 5.748712124073556e-06, "loss": 12.51, "step": 253470 }, { "epoch": 0.5120456372693593, "grad_norm": 274.8276062011719, "learning_rate": 5.74836699210211e-06, "loss": 18.3088, "step": 253480 }, { "epoch": 0.5120658379020431, "grad_norm": 347.3299865722656, "learning_rate": 5.748021856483212e-06, "loss": 13.7744, "step": 253490 }, { "epoch": 0.5120860385347269, "grad_norm": 299.65985107421875, "learning_rate": 5.747676717218549e-06, "loss": 15.5967, "step": 253500 }, { "epoch": 0.5121062391674107, "grad_norm": 403.5169677734375, "learning_rate": 5.747331574309798e-06, "loss": 18.5492, "step": 253510 }, { "epoch": 0.5121264398000945, "grad_norm": 1063.2816162109375, "learning_rate": 5.746986427758645e-06, "loss": 29.8813, "step": 253520 }, { "epoch": 0.5121466404327784, "grad_norm": 6.9237847328186035, "learning_rate": 5.74664127756677e-06, "loss": 24.8315, "step": 253530 }, { "epoch": 0.5121668410654622, "grad_norm": 442.2906494140625, "learning_rate": 5.746296123735857e-06, "loss": 34.304, "step": 253540 }, { "epoch": 0.512187041698146, "grad_norm": 857.4563598632812, "learning_rate": 5.745950966267586e-06, "loss": 16.9816, "step": 253550 }, { "epoch": 0.5122072423308298, "grad_norm": 299.3878173828125, "learning_rate": 5.745605805163641e-06, "loss": 19.3526, "step": 253560 }, { "epoch": 0.5122274429635136, "grad_norm": 259.07257080078125, "learning_rate": 5.745260640425704e-06, "loss": 14.5288, "step": 253570 }, { "epoch": 0.5122476435961975, "grad_norm": 36.66910171508789, "learning_rate": 5.744915472055457e-06, "loss": 14.4111, "step": 253580 }, { "epoch": 0.5122678442288813, "grad_norm": 726.9290161132812, "learning_rate": 5.744570300054583e-06, "loss": 28.6098, "step": 253590 }, { "epoch": 0.5122880448615651, "grad_norm": 689.5191650390625, "learning_rate": 5.744225124424762e-06, "loss": 39.731, "step": 253600 }, { "epoch": 0.5123082454942489, "grad_norm": 485.571533203125, "learning_rate": 5.743879945167678e-06, "loss": 25.2861, "step": 253610 }, { "epoch": 0.5123284461269327, "grad_norm": 499.5935363769531, "learning_rate": 5.7435347622850146e-06, "loss": 20.1543, "step": 253620 }, { "epoch": 0.5123486467596166, "grad_norm": 41.875450134277344, "learning_rate": 5.743189575778452e-06, "loss": 11.8493, "step": 253630 }, { "epoch": 0.5123688473923004, "grad_norm": 240.07386779785156, "learning_rate": 5.742844385649674e-06, "loss": 19.8759, "step": 253640 }, { "epoch": 0.5123890480249842, "grad_norm": 681.5988159179688, "learning_rate": 5.742499191900364e-06, "loss": 21.1939, "step": 253650 }, { "epoch": 0.5124092486576679, "grad_norm": 275.4928894042969, "learning_rate": 5.7421539945322006e-06, "loss": 16.5793, "step": 253660 }, { "epoch": 0.5124294492903517, "grad_norm": 194.4099884033203, "learning_rate": 5.7418087935468706e-06, "loss": 16.2816, "step": 253670 }, { "epoch": 0.5124496499230355, "grad_norm": 355.28955078125, "learning_rate": 5.741463588946053e-06, "loss": 12.8818, "step": 253680 }, { "epoch": 0.5124698505557194, "grad_norm": 100.03331756591797, "learning_rate": 5.741118380731432e-06, "loss": 18.4986, "step": 253690 }, { "epoch": 0.5124900511884032, "grad_norm": 741.4827270507812, "learning_rate": 5.740773168904691e-06, "loss": 16.5638, "step": 253700 }, { "epoch": 0.512510251821087, "grad_norm": 311.86761474609375, "learning_rate": 5.74042795346751e-06, "loss": 15.7162, "step": 253710 }, { "epoch": 0.5125304524537708, "grad_norm": 492.0791931152344, "learning_rate": 5.740082734421574e-06, "loss": 19.1631, "step": 253720 }, { "epoch": 0.5125506530864546, "grad_norm": 187.37074279785156, "learning_rate": 5.7397375117685635e-06, "loss": 13.953, "step": 253730 }, { "epoch": 0.5125708537191385, "grad_norm": 188.18902587890625, "learning_rate": 5.739392285510162e-06, "loss": 29.3611, "step": 253740 }, { "epoch": 0.5125910543518223, "grad_norm": 133.94537353515625, "learning_rate": 5.7390470556480545e-06, "loss": 21.0016, "step": 253750 }, { "epoch": 0.5126112549845061, "grad_norm": 392.44189453125, "learning_rate": 5.7387018221839195e-06, "loss": 15.3782, "step": 253760 }, { "epoch": 0.5126314556171899, "grad_norm": 450.6122131347656, "learning_rate": 5.738356585119441e-06, "loss": 15.1867, "step": 253770 }, { "epoch": 0.5126516562498737, "grad_norm": 307.326416015625, "learning_rate": 5.738011344456302e-06, "loss": 20.9072, "step": 253780 }, { "epoch": 0.5126718568825576, "grad_norm": 113.2793197631836, "learning_rate": 5.737666100196188e-06, "loss": 9.1972, "step": 253790 }, { "epoch": 0.5126920575152414, "grad_norm": 57.68220901489258, "learning_rate": 5.737320852340776e-06, "loss": 19.2187, "step": 253800 }, { "epoch": 0.5127122581479252, "grad_norm": 539.6422729492188, "learning_rate": 5.736975600891752e-06, "loss": 25.7557, "step": 253810 }, { "epoch": 0.512732458780609, "grad_norm": 309.9414978027344, "learning_rate": 5.7366303458507986e-06, "loss": 18.8251, "step": 253820 }, { "epoch": 0.5127526594132928, "grad_norm": 106.2264404296875, "learning_rate": 5.736285087219599e-06, "loss": 15.3974, "step": 253830 }, { "epoch": 0.5127728600459767, "grad_norm": 1031.8822021484375, "learning_rate": 5.7359398249998335e-06, "loss": 18.8748, "step": 253840 }, { "epoch": 0.5127930606786605, "grad_norm": 19.903709411621094, "learning_rate": 5.735594559193187e-06, "loss": 32.3088, "step": 253850 }, { "epoch": 0.5128132613113443, "grad_norm": 494.54180908203125, "learning_rate": 5.735249289801343e-06, "loss": 21.3223, "step": 253860 }, { "epoch": 0.5128334619440281, "grad_norm": 357.4226379394531, "learning_rate": 5.734904016825982e-06, "loss": 13.4894, "step": 253870 }, { "epoch": 0.5128536625767119, "grad_norm": 520.6104125976562, "learning_rate": 5.73455874026879e-06, "loss": 23.4609, "step": 253880 }, { "epoch": 0.5128738632093958, "grad_norm": 293.92706298828125, "learning_rate": 5.7342134601314445e-06, "loss": 18.8432, "step": 253890 }, { "epoch": 0.5128940638420796, "grad_norm": 383.59796142578125, "learning_rate": 5.733868176415633e-06, "loss": 28.6413, "step": 253900 }, { "epoch": 0.5129142644747634, "grad_norm": 313.1064758300781, "learning_rate": 5.733522889123038e-06, "loss": 14.5493, "step": 253910 }, { "epoch": 0.5129344651074471, "grad_norm": 289.10418701171875, "learning_rate": 5.733177598255341e-06, "loss": 36.0446, "step": 253920 }, { "epoch": 0.5129546657401309, "grad_norm": 383.3257141113281, "learning_rate": 5.732832303814225e-06, "loss": 14.9592, "step": 253930 }, { "epoch": 0.5129748663728148, "grad_norm": 341.5558166503906, "learning_rate": 5.7324870058013736e-06, "loss": 23.8471, "step": 253940 }, { "epoch": 0.5129950670054986, "grad_norm": 283.3092346191406, "learning_rate": 5.732141704218469e-06, "loss": 25.5903, "step": 253950 }, { "epoch": 0.5130152676381824, "grad_norm": 407.0079040527344, "learning_rate": 5.731796399067194e-06, "loss": 17.412, "step": 253960 }, { "epoch": 0.5130354682708662, "grad_norm": 158.69200134277344, "learning_rate": 5.731451090349234e-06, "loss": 22.5824, "step": 253970 }, { "epoch": 0.51305566890355, "grad_norm": 172.71466064453125, "learning_rate": 5.731105778066268e-06, "loss": 20.4837, "step": 253980 }, { "epoch": 0.5130758695362339, "grad_norm": 0.0, "learning_rate": 5.730760462219983e-06, "loss": 13.3426, "step": 253990 }, { "epoch": 0.5130960701689177, "grad_norm": 682.0313110351562, "learning_rate": 5.730415142812059e-06, "loss": 22.8439, "step": 254000 }, { "epoch": 0.5131162708016015, "grad_norm": 618.0068969726562, "learning_rate": 5.73006981984418e-06, "loss": 21.3711, "step": 254010 }, { "epoch": 0.5131364714342853, "grad_norm": 84.22988891601562, "learning_rate": 5.7297244933180306e-06, "loss": 12.3084, "step": 254020 }, { "epoch": 0.5131566720669691, "grad_norm": 698.24951171875, "learning_rate": 5.72937916323529e-06, "loss": 18.7768, "step": 254030 }, { "epoch": 0.513176872699653, "grad_norm": 535.3914184570312, "learning_rate": 5.729033829597646e-06, "loss": 22.168, "step": 254040 }, { "epoch": 0.5131970733323368, "grad_norm": 288.82470703125, "learning_rate": 5.728688492406778e-06, "loss": 10.6456, "step": 254050 }, { "epoch": 0.5132172739650206, "grad_norm": 397.6792297363281, "learning_rate": 5.728343151664371e-06, "loss": 11.5455, "step": 254060 }, { "epoch": 0.5132374745977044, "grad_norm": 196.13861083984375, "learning_rate": 5.727997807372109e-06, "loss": 24.8786, "step": 254070 }, { "epoch": 0.5132576752303882, "grad_norm": 317.0704040527344, "learning_rate": 5.727652459531674e-06, "loss": 39.4602, "step": 254080 }, { "epoch": 0.513277875863072, "grad_norm": 300.1142883300781, "learning_rate": 5.727307108144749e-06, "loss": 16.143, "step": 254090 }, { "epoch": 0.5132980764957559, "grad_norm": 767.6620483398438, "learning_rate": 5.726961753213016e-06, "loss": 23.4947, "step": 254100 }, { "epoch": 0.5133182771284397, "grad_norm": 219.3327178955078, "learning_rate": 5.726616394738161e-06, "loss": 16.8684, "step": 254110 }, { "epoch": 0.5133384777611235, "grad_norm": 303.88433837890625, "learning_rate": 5.726271032721864e-06, "loss": 11.7734, "step": 254120 }, { "epoch": 0.5133586783938073, "grad_norm": 467.8100280761719, "learning_rate": 5.725925667165812e-06, "loss": 14.3304, "step": 254130 }, { "epoch": 0.5133788790264912, "grad_norm": 532.1683349609375, "learning_rate": 5.725580298071685e-06, "loss": 16.8608, "step": 254140 }, { "epoch": 0.513399079659175, "grad_norm": 19.614660263061523, "learning_rate": 5.725234925441169e-06, "loss": 21.0036, "step": 254150 }, { "epoch": 0.5134192802918588, "grad_norm": 589.5601196289062, "learning_rate": 5.724889549275945e-06, "loss": 21.3069, "step": 254160 }, { "epoch": 0.5134394809245425, "grad_norm": 658.7628784179688, "learning_rate": 5.724544169577697e-06, "loss": 20.0233, "step": 254170 }, { "epoch": 0.5134596815572263, "grad_norm": 92.69924926757812, "learning_rate": 5.72419878634811e-06, "loss": 17.4411, "step": 254180 }, { "epoch": 0.5134798821899101, "grad_norm": 798.9140014648438, "learning_rate": 5.7238533995888645e-06, "loss": 23.9981, "step": 254190 }, { "epoch": 0.513500082822594, "grad_norm": 697.9685668945312, "learning_rate": 5.723508009301646e-06, "loss": 24.8597, "step": 254200 }, { "epoch": 0.5135202834552778, "grad_norm": 155.75975036621094, "learning_rate": 5.723162615488137e-06, "loss": 16.6985, "step": 254210 }, { "epoch": 0.5135404840879616, "grad_norm": 227.90110778808594, "learning_rate": 5.722817218150021e-06, "loss": 14.4569, "step": 254220 }, { "epoch": 0.5135606847206454, "grad_norm": 166.38514709472656, "learning_rate": 5.722471817288982e-06, "loss": 11.5888, "step": 254230 }, { "epoch": 0.5135808853533292, "grad_norm": 557.6221923828125, "learning_rate": 5.722126412906703e-06, "loss": 19.4762, "step": 254240 }, { "epoch": 0.5136010859860131, "grad_norm": 299.7841491699219, "learning_rate": 5.721781005004866e-06, "loss": 23.5099, "step": 254250 }, { "epoch": 0.5136212866186969, "grad_norm": 454.24481201171875, "learning_rate": 5.721435593585158e-06, "loss": 22.7358, "step": 254260 }, { "epoch": 0.5136414872513807, "grad_norm": 813.3714599609375, "learning_rate": 5.72109017864926e-06, "loss": 23.3991, "step": 254270 }, { "epoch": 0.5136616878840645, "grad_norm": 79.51116943359375, "learning_rate": 5.720744760198855e-06, "loss": 25.27, "step": 254280 }, { "epoch": 0.5136818885167483, "grad_norm": 695.857177734375, "learning_rate": 5.720399338235628e-06, "loss": 23.4554, "step": 254290 }, { "epoch": 0.5137020891494322, "grad_norm": 651.18359375, "learning_rate": 5.720053912761261e-06, "loss": 27.6443, "step": 254300 }, { "epoch": 0.513722289782116, "grad_norm": 587.54345703125, "learning_rate": 5.719708483777441e-06, "loss": 17.0722, "step": 254310 }, { "epoch": 0.5137424904147998, "grad_norm": 564.2921752929688, "learning_rate": 5.719363051285847e-06, "loss": 27.6548, "step": 254320 }, { "epoch": 0.5137626910474836, "grad_norm": 407.90386962890625, "learning_rate": 5.719017615288165e-06, "loss": 29.3676, "step": 254330 }, { "epoch": 0.5137828916801674, "grad_norm": 158.3983612060547, "learning_rate": 5.718672175786078e-06, "loss": 11.9976, "step": 254340 }, { "epoch": 0.5138030923128513, "grad_norm": 33.34981155395508, "learning_rate": 5.718326732781271e-06, "loss": 17.7713, "step": 254350 }, { "epoch": 0.5138232929455351, "grad_norm": 328.87640380859375, "learning_rate": 5.7179812862754265e-06, "loss": 18.6988, "step": 254360 }, { "epoch": 0.5138434935782189, "grad_norm": 370.8194580078125, "learning_rate": 5.717635836270228e-06, "loss": 20.8561, "step": 254370 }, { "epoch": 0.5138636942109027, "grad_norm": 5.150504112243652, "learning_rate": 5.71729038276736e-06, "loss": 18.7786, "step": 254380 }, { "epoch": 0.5138838948435865, "grad_norm": 172.74937438964844, "learning_rate": 5.716944925768505e-06, "loss": 16.3021, "step": 254390 }, { "epoch": 0.5139040954762704, "grad_norm": 236.12741088867188, "learning_rate": 5.716599465275347e-06, "loss": 32.3623, "step": 254400 }, { "epoch": 0.5139242961089542, "grad_norm": 445.7738037109375, "learning_rate": 5.716254001289571e-06, "loss": 22.8093, "step": 254410 }, { "epoch": 0.513944496741638, "grad_norm": 161.113525390625, "learning_rate": 5.7159085338128595e-06, "loss": 9.8593, "step": 254420 }, { "epoch": 0.5139646973743217, "grad_norm": 244.25999450683594, "learning_rate": 5.7155630628468974e-06, "loss": 15.116, "step": 254430 }, { "epoch": 0.5139848980070055, "grad_norm": 507.08868408203125, "learning_rate": 5.715217588393367e-06, "loss": 27.5761, "step": 254440 }, { "epoch": 0.5140050986396894, "grad_norm": 587.4240112304688, "learning_rate": 5.714872110453952e-06, "loss": 25.517, "step": 254450 }, { "epoch": 0.5140252992723732, "grad_norm": 156.8381805419922, "learning_rate": 5.714526629030338e-06, "loss": 8.3755, "step": 254460 }, { "epoch": 0.514045499905057, "grad_norm": 501.0858154296875, "learning_rate": 5.714181144124209e-06, "loss": 12.8682, "step": 254470 }, { "epoch": 0.5140657005377408, "grad_norm": 896.33056640625, "learning_rate": 5.7138356557372444e-06, "loss": 22.6873, "step": 254480 }, { "epoch": 0.5140859011704246, "grad_norm": 536.6281127929688, "learning_rate": 5.713490163871135e-06, "loss": 27.9533, "step": 254490 }, { "epoch": 0.5141061018031085, "grad_norm": 204.81321716308594, "learning_rate": 5.7131446685275595e-06, "loss": 35.4812, "step": 254500 }, { "epoch": 0.5141263024357923, "grad_norm": 371.892822265625, "learning_rate": 5.712799169708203e-06, "loss": 18.543, "step": 254510 }, { "epoch": 0.5141465030684761, "grad_norm": 446.13824462890625, "learning_rate": 5.71245366741475e-06, "loss": 20.1133, "step": 254520 }, { "epoch": 0.5141667037011599, "grad_norm": 416.196533203125, "learning_rate": 5.712108161648885e-06, "loss": 37.1317, "step": 254530 }, { "epoch": 0.5141869043338437, "grad_norm": 200.76622009277344, "learning_rate": 5.7117626524122905e-06, "loss": 16.0216, "step": 254540 }, { "epoch": 0.5142071049665276, "grad_norm": 258.2995300292969, "learning_rate": 5.711417139706651e-06, "loss": 14.974, "step": 254550 }, { "epoch": 0.5142273055992114, "grad_norm": 272.2379455566406, "learning_rate": 5.711071623533651e-06, "loss": 25.0506, "step": 254560 }, { "epoch": 0.5142475062318952, "grad_norm": 580.2442626953125, "learning_rate": 5.710726103894974e-06, "loss": 17.8842, "step": 254570 }, { "epoch": 0.514267706864579, "grad_norm": 308.1773376464844, "learning_rate": 5.710380580792305e-06, "loss": 11.4507, "step": 254580 }, { "epoch": 0.5142879074972628, "grad_norm": 1430.5892333984375, "learning_rate": 5.710035054227326e-06, "loss": 28.2847, "step": 254590 }, { "epoch": 0.5143081081299467, "grad_norm": 448.6597900390625, "learning_rate": 5.709689524201723e-06, "loss": 10.425, "step": 254600 }, { "epoch": 0.5143283087626305, "grad_norm": 723.5948486328125, "learning_rate": 5.709343990717179e-06, "loss": 14.4196, "step": 254610 }, { "epoch": 0.5143485093953143, "grad_norm": 1252.2452392578125, "learning_rate": 5.708998453775378e-06, "loss": 35.5113, "step": 254620 }, { "epoch": 0.5143687100279981, "grad_norm": 598.4287109375, "learning_rate": 5.708652913378005e-06, "loss": 18.4721, "step": 254630 }, { "epoch": 0.5143889106606819, "grad_norm": 101.59346008300781, "learning_rate": 5.7083073695267435e-06, "loss": 15.5913, "step": 254640 }, { "epoch": 0.5144091112933658, "grad_norm": 389.8990173339844, "learning_rate": 5.707961822223279e-06, "loss": 18.3069, "step": 254650 }, { "epoch": 0.5144293119260496, "grad_norm": 328.4405517578125, "learning_rate": 5.707616271469293e-06, "loss": 31.6081, "step": 254660 }, { "epoch": 0.5144495125587334, "grad_norm": 305.9642639160156, "learning_rate": 5.707270717266471e-06, "loss": 27.7813, "step": 254670 }, { "epoch": 0.5144697131914172, "grad_norm": 509.5992126464844, "learning_rate": 5.7069251596164975e-06, "loss": 23.0576, "step": 254680 }, { "epoch": 0.5144899138241009, "grad_norm": 334.6846008300781, "learning_rate": 5.706579598521058e-06, "loss": 11.9694, "step": 254690 }, { "epoch": 0.5145101144567847, "grad_norm": 301.08782958984375, "learning_rate": 5.706234033981835e-06, "loss": 18.8511, "step": 254700 }, { "epoch": 0.5145303150894686, "grad_norm": 750.560302734375, "learning_rate": 5.705888466000511e-06, "loss": 27.2115, "step": 254710 }, { "epoch": 0.5145505157221524, "grad_norm": 107.52172088623047, "learning_rate": 5.705542894578773e-06, "loss": 23.0592, "step": 254720 }, { "epoch": 0.5145707163548362, "grad_norm": 131.147216796875, "learning_rate": 5.705197319718304e-06, "loss": 42.1324, "step": 254730 }, { "epoch": 0.51459091698752, "grad_norm": 138.3438262939453, "learning_rate": 5.704851741420792e-06, "loss": 14.9189, "step": 254740 }, { "epoch": 0.5146111176202038, "grad_norm": 681.9591064453125, "learning_rate": 5.704506159687914e-06, "loss": 29.1809, "step": 254750 }, { "epoch": 0.5146313182528877, "grad_norm": 560.5260620117188, "learning_rate": 5.7041605745213605e-06, "loss": 41.0378, "step": 254760 }, { "epoch": 0.5146515188855715, "grad_norm": 606.7002563476562, "learning_rate": 5.703814985922813e-06, "loss": 19.3052, "step": 254770 }, { "epoch": 0.5146717195182553, "grad_norm": 437.7874755859375, "learning_rate": 5.703469393893957e-06, "loss": 18.2557, "step": 254780 }, { "epoch": 0.5146919201509391, "grad_norm": 73.53416442871094, "learning_rate": 5.7031237984364776e-06, "loss": 17.7876, "step": 254790 }, { "epoch": 0.5147121207836229, "grad_norm": 738.1157836914062, "learning_rate": 5.702778199552055e-06, "loss": 31.4386, "step": 254800 }, { "epoch": 0.5147323214163068, "grad_norm": 198.610107421875, "learning_rate": 5.7024325972423795e-06, "loss": 14.7151, "step": 254810 }, { "epoch": 0.5147525220489906, "grad_norm": 172.3389434814453, "learning_rate": 5.702086991509133e-06, "loss": 15.3419, "step": 254820 }, { "epoch": 0.5147727226816744, "grad_norm": 172.26531982421875, "learning_rate": 5.701741382353998e-06, "loss": 14.9749, "step": 254830 }, { "epoch": 0.5147929233143582, "grad_norm": 178.2786407470703, "learning_rate": 5.70139576977866e-06, "loss": 17.5487, "step": 254840 }, { "epoch": 0.514813123947042, "grad_norm": 287.3568115234375, "learning_rate": 5.701050153784806e-06, "loss": 11.0949, "step": 254850 }, { "epoch": 0.5148333245797259, "grad_norm": 710.2113647460938, "learning_rate": 5.7007045343741176e-06, "loss": 31.9813, "step": 254860 }, { "epoch": 0.5148535252124097, "grad_norm": 251.64366149902344, "learning_rate": 5.70035891154828e-06, "loss": 27.172, "step": 254870 }, { "epoch": 0.5148737258450935, "grad_norm": 385.65277099609375, "learning_rate": 5.700013285308979e-06, "loss": 36.05, "step": 254880 }, { "epoch": 0.5148939264777773, "grad_norm": 385.455322265625, "learning_rate": 5.699667655657898e-06, "loss": 17.7484, "step": 254890 }, { "epoch": 0.5149141271104611, "grad_norm": 1162.52734375, "learning_rate": 5.6993220225967214e-06, "loss": 33.4321, "step": 254900 }, { "epoch": 0.514934327743145, "grad_norm": 420.1855163574219, "learning_rate": 5.698976386127133e-06, "loss": 17.1253, "step": 254910 }, { "epoch": 0.5149545283758288, "grad_norm": 322.5575256347656, "learning_rate": 5.69863074625082e-06, "loss": 10.0489, "step": 254920 }, { "epoch": 0.5149747290085126, "grad_norm": 43.054256439208984, "learning_rate": 5.6982851029694645e-06, "loss": 19.1232, "step": 254930 }, { "epoch": 0.5149949296411963, "grad_norm": 0.0, "learning_rate": 5.697939456284753e-06, "loss": 14.9083, "step": 254940 }, { "epoch": 0.5150151302738801, "grad_norm": 12.182699203491211, "learning_rate": 5.697593806198369e-06, "loss": 22.7151, "step": 254950 }, { "epoch": 0.515035330906564, "grad_norm": 2079.461669921875, "learning_rate": 5.697248152711997e-06, "loss": 34.4558, "step": 254960 }, { "epoch": 0.5150555315392478, "grad_norm": 351.67364501953125, "learning_rate": 5.696902495827323e-06, "loss": 21.6426, "step": 254970 }, { "epoch": 0.5150757321719316, "grad_norm": 834.896240234375, "learning_rate": 5.69655683554603e-06, "loss": 34.1588, "step": 254980 }, { "epoch": 0.5150959328046154, "grad_norm": 592.2312622070312, "learning_rate": 5.6962111718698035e-06, "loss": 13.6797, "step": 254990 }, { "epoch": 0.5151161334372992, "grad_norm": 207.23638916015625, "learning_rate": 5.695865504800328e-06, "loss": 16.1975, "step": 255000 }, { "epoch": 0.515136334069983, "grad_norm": 103.85392761230469, "learning_rate": 5.695519834339288e-06, "loss": 22.8707, "step": 255010 }, { "epoch": 0.5151565347026669, "grad_norm": 413.5932922363281, "learning_rate": 5.695174160488369e-06, "loss": 15.2407, "step": 255020 }, { "epoch": 0.5151767353353507, "grad_norm": 397.1807861328125, "learning_rate": 5.694828483249257e-06, "loss": 20.2696, "step": 255030 }, { "epoch": 0.5151969359680345, "grad_norm": 159.94862365722656, "learning_rate": 5.694482802623634e-06, "loss": 10.7502, "step": 255040 }, { "epoch": 0.5152171366007183, "grad_norm": 633.3711547851562, "learning_rate": 5.694137118613185e-06, "loss": 21.5194, "step": 255050 }, { "epoch": 0.5152373372334021, "grad_norm": 14.009786605834961, "learning_rate": 5.693791431219599e-06, "loss": 13.4572, "step": 255060 }, { "epoch": 0.515257537866086, "grad_norm": 236.19485473632812, "learning_rate": 5.693445740444554e-06, "loss": 24.037, "step": 255070 }, { "epoch": 0.5152777384987698, "grad_norm": 209.95321655273438, "learning_rate": 5.693100046289741e-06, "loss": 25.8267, "step": 255080 }, { "epoch": 0.5152979391314536, "grad_norm": 211.92384338378906, "learning_rate": 5.692754348756841e-06, "loss": 10.0126, "step": 255090 }, { "epoch": 0.5153181397641374, "grad_norm": 791.6719360351562, "learning_rate": 5.692408647847542e-06, "loss": 30.305, "step": 255100 }, { "epoch": 0.5153383403968212, "grad_norm": 304.9013671875, "learning_rate": 5.692062943563525e-06, "loss": 17.5825, "step": 255110 }, { "epoch": 0.5153585410295051, "grad_norm": 293.64532470703125, "learning_rate": 5.691717235906479e-06, "loss": 18.0099, "step": 255120 }, { "epoch": 0.5153787416621889, "grad_norm": 357.52215576171875, "learning_rate": 5.691371524878087e-06, "loss": 16.9815, "step": 255130 }, { "epoch": 0.5153989422948727, "grad_norm": 311.4905090332031, "learning_rate": 5.6910258104800335e-06, "loss": 13.2764, "step": 255140 }, { "epoch": 0.5154191429275565, "grad_norm": 135.3758544921875, "learning_rate": 5.690680092714004e-06, "loss": 14.3897, "step": 255150 }, { "epoch": 0.5154393435602403, "grad_norm": 468.88287353515625, "learning_rate": 5.690334371581683e-06, "loss": 23.5022, "step": 255160 }, { "epoch": 0.5154595441929242, "grad_norm": 467.7669372558594, "learning_rate": 5.689988647084756e-06, "loss": 30.0204, "step": 255170 }, { "epoch": 0.515479744825608, "grad_norm": 1561.510009765625, "learning_rate": 5.6896429192249085e-06, "loss": 23.8157, "step": 255180 }, { "epoch": 0.5154999454582918, "grad_norm": 206.41876220703125, "learning_rate": 5.689297188003826e-06, "loss": 16.2573, "step": 255190 }, { "epoch": 0.5155201460909755, "grad_norm": 300.7821044921875, "learning_rate": 5.68895145342319e-06, "loss": 16.7788, "step": 255200 }, { "epoch": 0.5155403467236593, "grad_norm": 885.2864379882812, "learning_rate": 5.688605715484691e-06, "loss": 13.9834, "step": 255210 }, { "epoch": 0.5155605473563432, "grad_norm": 438.3362731933594, "learning_rate": 5.68825997419001e-06, "loss": 12.7685, "step": 255220 }, { "epoch": 0.515580747989027, "grad_norm": 236.73117065429688, "learning_rate": 5.687914229540833e-06, "loss": 19.0074, "step": 255230 }, { "epoch": 0.5156009486217108, "grad_norm": 315.9634094238281, "learning_rate": 5.6875684815388475e-06, "loss": 25.4186, "step": 255240 }, { "epoch": 0.5156211492543946, "grad_norm": 386.8504943847656, "learning_rate": 5.687222730185733e-06, "loss": 19.1703, "step": 255250 }, { "epoch": 0.5156413498870784, "grad_norm": 126.34518432617188, "learning_rate": 5.686876975483182e-06, "loss": 16.9827, "step": 255260 }, { "epoch": 0.5156615505197623, "grad_norm": 511.374267578125, "learning_rate": 5.686531217432873e-06, "loss": 30.4377, "step": 255270 }, { "epoch": 0.5156817511524461, "grad_norm": 253.56813049316406, "learning_rate": 5.686185456036496e-06, "loss": 10.2572, "step": 255280 }, { "epoch": 0.5157019517851299, "grad_norm": 184.04869079589844, "learning_rate": 5.685839691295734e-06, "loss": 14.3178, "step": 255290 }, { "epoch": 0.5157221524178137, "grad_norm": 416.50103759765625, "learning_rate": 5.685493923212273e-06, "loss": 26.5885, "step": 255300 }, { "epoch": 0.5157423530504975, "grad_norm": 392.1858215332031, "learning_rate": 5.685148151787796e-06, "loss": 11.0449, "step": 255310 }, { "epoch": 0.5157625536831814, "grad_norm": 315.25482177734375, "learning_rate": 5.684802377023991e-06, "loss": 18.649, "step": 255320 }, { "epoch": 0.5157827543158652, "grad_norm": 436.5318603515625, "learning_rate": 5.684456598922542e-06, "loss": 20.3775, "step": 255330 }, { "epoch": 0.515802954948549, "grad_norm": 343.0895690917969, "learning_rate": 5.684110817485135e-06, "loss": 19.2569, "step": 255340 }, { "epoch": 0.5158231555812328, "grad_norm": 271.3983459472656, "learning_rate": 5.683765032713455e-06, "loss": 17.9499, "step": 255350 }, { "epoch": 0.5158433562139166, "grad_norm": 192.4423065185547, "learning_rate": 5.683419244609185e-06, "loss": 18.4982, "step": 255360 }, { "epoch": 0.5158635568466005, "grad_norm": 201.5713348388672, "learning_rate": 5.683073453174016e-06, "loss": 18.923, "step": 255370 }, { "epoch": 0.5158837574792843, "grad_norm": 523.779541015625, "learning_rate": 5.682727658409628e-06, "loss": 10.2711, "step": 255380 }, { "epoch": 0.5159039581119681, "grad_norm": 304.9748840332031, "learning_rate": 5.682381860317708e-06, "loss": 15.609, "step": 255390 }, { "epoch": 0.5159241587446519, "grad_norm": 253.09251403808594, "learning_rate": 5.682036058899942e-06, "loss": 22.8758, "step": 255400 }, { "epoch": 0.5159443593773357, "grad_norm": 494.39862060546875, "learning_rate": 5.681690254158015e-06, "loss": 20.438, "step": 255410 }, { "epoch": 0.5159645600100196, "grad_norm": 597.162353515625, "learning_rate": 5.681344446093613e-06, "loss": 33.4464, "step": 255420 }, { "epoch": 0.5159847606427034, "grad_norm": 52.22596740722656, "learning_rate": 5.680998634708419e-06, "loss": 23.1126, "step": 255430 }, { "epoch": 0.5160049612753872, "grad_norm": 234.77084350585938, "learning_rate": 5.6806528200041226e-06, "loss": 16.2024, "step": 255440 }, { "epoch": 0.5160251619080709, "grad_norm": 235.65931701660156, "learning_rate": 5.680307001982405e-06, "loss": 7.5566, "step": 255450 }, { "epoch": 0.5160453625407547, "grad_norm": 200.08096313476562, "learning_rate": 5.679961180644954e-06, "loss": 16.4467, "step": 255460 }, { "epoch": 0.5160655631734385, "grad_norm": 535.2581787109375, "learning_rate": 5.679615355993455e-06, "loss": 19.9863, "step": 255470 }, { "epoch": 0.5160857638061224, "grad_norm": 243.5124053955078, "learning_rate": 5.679269528029593e-06, "loss": 23.1248, "step": 255480 }, { "epoch": 0.5161059644388062, "grad_norm": 613.87939453125, "learning_rate": 5.678923696755054e-06, "loss": 19.6434, "step": 255490 }, { "epoch": 0.51612616507149, "grad_norm": 233.92079162597656, "learning_rate": 5.678577862171523e-06, "loss": 10.7584, "step": 255500 }, { "epoch": 0.5161463657041738, "grad_norm": 133.3798065185547, "learning_rate": 5.678232024280687e-06, "loss": 27.0341, "step": 255510 }, { "epoch": 0.5161665663368576, "grad_norm": 252.52610778808594, "learning_rate": 5.677886183084227e-06, "loss": 16.9974, "step": 255520 }, { "epoch": 0.5161867669695415, "grad_norm": 406.6451416015625, "learning_rate": 5.677540338583836e-06, "loss": 15.9542, "step": 255530 }, { "epoch": 0.5162069676022253, "grad_norm": 301.5099182128906, "learning_rate": 5.677194490781192e-06, "loss": 21.4673, "step": 255540 }, { "epoch": 0.5162271682349091, "grad_norm": 574.8076171875, "learning_rate": 5.676848639677987e-06, "loss": 29.9033, "step": 255550 }, { "epoch": 0.5162473688675929, "grad_norm": 0.0, "learning_rate": 5.6765027852759015e-06, "loss": 18.6978, "step": 255560 }, { "epoch": 0.5162675695002767, "grad_norm": 9.07258415222168, "learning_rate": 5.6761569275766246e-06, "loss": 11.8524, "step": 255570 }, { "epoch": 0.5162877701329606, "grad_norm": 361.3944396972656, "learning_rate": 5.675811066581842e-06, "loss": 19.791, "step": 255580 }, { "epoch": 0.5163079707656444, "grad_norm": 601.8685913085938, "learning_rate": 5.675465202293238e-06, "loss": 31.5645, "step": 255590 }, { "epoch": 0.5163281713983282, "grad_norm": 343.0535888671875, "learning_rate": 5.675119334712496e-06, "loss": 13.8398, "step": 255600 }, { "epoch": 0.516348372031012, "grad_norm": 982.5330810546875, "learning_rate": 5.674773463841306e-06, "loss": 23.0152, "step": 255610 }, { "epoch": 0.5163685726636958, "grad_norm": 190.50270080566406, "learning_rate": 5.674427589681353e-06, "loss": 30.673, "step": 255620 }, { "epoch": 0.5163887732963797, "grad_norm": 3.095463275909424, "learning_rate": 5.674081712234319e-06, "loss": 23.7618, "step": 255630 }, { "epoch": 0.5164089739290635, "grad_norm": 694.7069091796875, "learning_rate": 5.6737358315018954e-06, "loss": 13.362, "step": 255640 }, { "epoch": 0.5164291745617473, "grad_norm": 39.145450592041016, "learning_rate": 5.673389947485763e-06, "loss": 24.3825, "step": 255650 }, { "epoch": 0.5164493751944311, "grad_norm": 281.1506042480469, "learning_rate": 5.673044060187612e-06, "loss": 13.3317, "step": 255660 }, { "epoch": 0.516469575827115, "grad_norm": 504.1978454589844, "learning_rate": 5.672698169609125e-06, "loss": 18.3809, "step": 255670 }, { "epoch": 0.5164897764597988, "grad_norm": 1009.9043579101562, "learning_rate": 5.672352275751986e-06, "loss": 21.8472, "step": 255680 }, { "epoch": 0.5165099770924826, "grad_norm": 262.5923767089844, "learning_rate": 5.672006378617887e-06, "loss": 13.7169, "step": 255690 }, { "epoch": 0.5165301777251664, "grad_norm": 264.2748718261719, "learning_rate": 5.671660478208508e-06, "loss": 15.8185, "step": 255700 }, { "epoch": 0.5165503783578501, "grad_norm": 118.52084350585938, "learning_rate": 5.671314574525539e-06, "loss": 35.465, "step": 255710 }, { "epoch": 0.5165705789905339, "grad_norm": 80.22938537597656, "learning_rate": 5.670968667570663e-06, "loss": 24.3935, "step": 255720 }, { "epoch": 0.5165907796232178, "grad_norm": 654.9724731445312, "learning_rate": 5.670622757345567e-06, "loss": 39.5951, "step": 255730 }, { "epoch": 0.5166109802559016, "grad_norm": 1020.5020141601562, "learning_rate": 5.670276843851939e-06, "loss": 21.9963, "step": 255740 }, { "epoch": 0.5166311808885854, "grad_norm": 456.8036804199219, "learning_rate": 5.6699309270914615e-06, "loss": 14.9634, "step": 255750 }, { "epoch": 0.5166513815212692, "grad_norm": 238.6225128173828, "learning_rate": 5.669585007065822e-06, "loss": 33.4033, "step": 255760 }, { "epoch": 0.516671582153953, "grad_norm": 328.130615234375, "learning_rate": 5.669239083776705e-06, "loss": 13.8304, "step": 255770 }, { "epoch": 0.5166917827866369, "grad_norm": 489.61151123046875, "learning_rate": 5.6688931572258e-06, "loss": 45.9713, "step": 255780 }, { "epoch": 0.5167119834193207, "grad_norm": 192.0399169921875, "learning_rate": 5.66854722741479e-06, "loss": 13.0071, "step": 255790 }, { "epoch": 0.5167321840520045, "grad_norm": 159.27377319335938, "learning_rate": 5.668201294345363e-06, "loss": 10.714, "step": 255800 }, { "epoch": 0.5167523846846883, "grad_norm": 150.52395629882812, "learning_rate": 5.667855358019203e-06, "loss": 34.7655, "step": 255810 }, { "epoch": 0.5167725853173721, "grad_norm": 546.414306640625, "learning_rate": 5.667509418437996e-06, "loss": 18.7668, "step": 255820 }, { "epoch": 0.516792785950056, "grad_norm": 1164.419677734375, "learning_rate": 5.66716347560343e-06, "loss": 19.0089, "step": 255830 }, { "epoch": 0.5168129865827398, "grad_norm": 578.8226928710938, "learning_rate": 5.66681752951719e-06, "loss": 11.6692, "step": 255840 }, { "epoch": 0.5168331872154236, "grad_norm": 773.8395385742188, "learning_rate": 5.666471580180963e-06, "loss": 20.3244, "step": 255850 }, { "epoch": 0.5168533878481074, "grad_norm": 224.7852020263672, "learning_rate": 5.666125627596433e-06, "loss": 26.6597, "step": 255860 }, { "epoch": 0.5168735884807912, "grad_norm": 71.54790496826172, "learning_rate": 5.665779671765289e-06, "loss": 14.9398, "step": 255870 }, { "epoch": 0.516893789113475, "grad_norm": 738.0344848632812, "learning_rate": 5.665433712689214e-06, "loss": 24.7984, "step": 255880 }, { "epoch": 0.5169139897461589, "grad_norm": 74.12384033203125, "learning_rate": 5.665087750369898e-06, "loss": 14.5951, "step": 255890 }, { "epoch": 0.5169341903788427, "grad_norm": 331.020263671875, "learning_rate": 5.6647417848090225e-06, "loss": 18.2839, "step": 255900 }, { "epoch": 0.5169543910115265, "grad_norm": 277.72784423828125, "learning_rate": 5.664395816008277e-06, "loss": 17.7138, "step": 255910 }, { "epoch": 0.5169745916442103, "grad_norm": 527.2340087890625, "learning_rate": 5.664049843969348e-06, "loss": 14.3661, "step": 255920 }, { "epoch": 0.5169947922768942, "grad_norm": 615.870849609375, "learning_rate": 5.66370386869392e-06, "loss": 28.072, "step": 255930 }, { "epoch": 0.517014992909578, "grad_norm": 428.3338317871094, "learning_rate": 5.663357890183679e-06, "loss": 18.4579, "step": 255940 }, { "epoch": 0.5170351935422618, "grad_norm": 350.446533203125, "learning_rate": 5.6630119084403125e-06, "loss": 32.2803, "step": 255950 }, { "epoch": 0.5170553941749455, "grad_norm": 884.24853515625, "learning_rate": 5.662665923465508e-06, "loss": 39.4761, "step": 255960 }, { "epoch": 0.5170755948076293, "grad_norm": 3960.321044921875, "learning_rate": 5.662319935260947e-06, "loss": 31.6966, "step": 255970 }, { "epoch": 0.5170957954403131, "grad_norm": 258.8409729003906, "learning_rate": 5.661973943828321e-06, "loss": 19.2896, "step": 255980 }, { "epoch": 0.517115996072997, "grad_norm": 458.2825927734375, "learning_rate": 5.661627949169315e-06, "loss": 37.7257, "step": 255990 }, { "epoch": 0.5171361967056808, "grad_norm": 535.2298583984375, "learning_rate": 5.661281951285613e-06, "loss": 11.7949, "step": 256000 }, { "epoch": 0.5171563973383646, "grad_norm": 327.1224670410156, "learning_rate": 5.660935950178904e-06, "loss": 23.6848, "step": 256010 }, { "epoch": 0.5171765979710484, "grad_norm": 125.35787963867188, "learning_rate": 5.660589945850872e-06, "loss": 11.1664, "step": 256020 }, { "epoch": 0.5171967986037322, "grad_norm": 385.37615966796875, "learning_rate": 5.660243938303206e-06, "loss": 20.3847, "step": 256030 }, { "epoch": 0.5172169992364161, "grad_norm": 309.416259765625, "learning_rate": 5.659897927537591e-06, "loss": 15.0541, "step": 256040 }, { "epoch": 0.5172371998690999, "grad_norm": 349.4208679199219, "learning_rate": 5.659551913555713e-06, "loss": 15.7538, "step": 256050 }, { "epoch": 0.5172574005017837, "grad_norm": 311.5920104980469, "learning_rate": 5.659205896359259e-06, "loss": 18.7777, "step": 256060 }, { "epoch": 0.5172776011344675, "grad_norm": 543.2024536132812, "learning_rate": 5.658859875949916e-06, "loss": 22.1736, "step": 256070 }, { "epoch": 0.5172978017671513, "grad_norm": 397.7626953125, "learning_rate": 5.65851385232937e-06, "loss": 24.9859, "step": 256080 }, { "epoch": 0.5173180023998352, "grad_norm": 277.24700927734375, "learning_rate": 5.658167825499306e-06, "loss": 15.9078, "step": 256090 }, { "epoch": 0.517338203032519, "grad_norm": 455.8639221191406, "learning_rate": 5.657821795461413e-06, "loss": 21.7814, "step": 256100 }, { "epoch": 0.5173584036652028, "grad_norm": 209.8695068359375, "learning_rate": 5.657475762217376e-06, "loss": 21.7525, "step": 256110 }, { "epoch": 0.5173786042978866, "grad_norm": 203.76258850097656, "learning_rate": 5.657129725768883e-06, "loss": 18.5085, "step": 256120 }, { "epoch": 0.5173988049305704, "grad_norm": 13.486207008361816, "learning_rate": 5.656783686117617e-06, "loss": 10.8061, "step": 256130 }, { "epoch": 0.5174190055632543, "grad_norm": 496.200927734375, "learning_rate": 5.656437643265269e-06, "loss": 27.5956, "step": 256140 }, { "epoch": 0.5174392061959381, "grad_norm": 521.7025756835938, "learning_rate": 5.656091597213523e-06, "loss": 20.9313, "step": 256150 }, { "epoch": 0.5174594068286219, "grad_norm": 312.62640380859375, "learning_rate": 5.655745547964067e-06, "loss": 13.854, "step": 256160 }, { "epoch": 0.5174796074613057, "grad_norm": 339.16986083984375, "learning_rate": 5.6553994955185846e-06, "loss": 54.9214, "step": 256170 }, { "epoch": 0.5174998080939895, "grad_norm": 616.1905517578125, "learning_rate": 5.655053439878766e-06, "loss": 28.8726, "step": 256180 }, { "epoch": 0.5175200087266734, "grad_norm": 497.0537414550781, "learning_rate": 5.654707381046296e-06, "loss": 18.8176, "step": 256190 }, { "epoch": 0.5175402093593572, "grad_norm": 904.5796508789062, "learning_rate": 5.654361319022862e-06, "loss": 18.2005, "step": 256200 }, { "epoch": 0.517560409992041, "grad_norm": 452.16131591796875, "learning_rate": 5.65401525381015e-06, "loss": 32.7201, "step": 256210 }, { "epoch": 0.5175806106247247, "grad_norm": 398.46832275390625, "learning_rate": 5.653669185409847e-06, "loss": 10.5908, "step": 256220 }, { "epoch": 0.5176008112574085, "grad_norm": 341.22216796875, "learning_rate": 5.653323113823639e-06, "loss": 18.9073, "step": 256230 }, { "epoch": 0.5176210118900924, "grad_norm": 2.6971940994262695, "learning_rate": 5.652977039053213e-06, "loss": 23.6222, "step": 256240 }, { "epoch": 0.5176412125227762, "grad_norm": 532.8876342773438, "learning_rate": 5.65263096110026e-06, "loss": 16.4939, "step": 256250 }, { "epoch": 0.51766141315546, "grad_norm": 805.7850952148438, "learning_rate": 5.652284879966459e-06, "loss": 17.0429, "step": 256260 }, { "epoch": 0.5176816137881438, "grad_norm": 589.1388549804688, "learning_rate": 5.651938795653501e-06, "loss": 18.2808, "step": 256270 }, { "epoch": 0.5177018144208276, "grad_norm": 146.6456756591797, "learning_rate": 5.651592708163074e-06, "loss": 10.7991, "step": 256280 }, { "epoch": 0.5177220150535115, "grad_norm": 174.4547576904297, "learning_rate": 5.651246617496861e-06, "loss": 17.1768, "step": 256290 }, { "epoch": 0.5177422156861953, "grad_norm": 425.0965270996094, "learning_rate": 5.650900523656553e-06, "loss": 13.5376, "step": 256300 }, { "epoch": 0.5177624163188791, "grad_norm": 299.1045227050781, "learning_rate": 5.6505544266438325e-06, "loss": 18.0883, "step": 256310 }, { "epoch": 0.5177826169515629, "grad_norm": 0.0, "learning_rate": 5.650208326460392e-06, "loss": 20.7979, "step": 256320 }, { "epoch": 0.5178028175842467, "grad_norm": 253.4033966064453, "learning_rate": 5.649862223107913e-06, "loss": 27.3647, "step": 256330 }, { "epoch": 0.5178230182169306, "grad_norm": 298.28375244140625, "learning_rate": 5.6495161165880826e-06, "loss": 12.8182, "step": 256340 }, { "epoch": 0.5178432188496144, "grad_norm": 451.9802551269531, "learning_rate": 5.649170006902592e-06, "loss": 26.2502, "step": 256350 }, { "epoch": 0.5178634194822982, "grad_norm": 655.0781860351562, "learning_rate": 5.6488238940531256e-06, "loss": 16.0616, "step": 256360 }, { "epoch": 0.517883620114982, "grad_norm": 150.7916259765625, "learning_rate": 5.648477778041369e-06, "loss": 17.0442, "step": 256370 }, { "epoch": 0.5179038207476658, "grad_norm": 264.4485168457031, "learning_rate": 5.6481316588690105e-06, "loss": 24.9096, "step": 256380 }, { "epoch": 0.5179240213803497, "grad_norm": 297.2204895019531, "learning_rate": 5.647785536537737e-06, "loss": 14.4927, "step": 256390 }, { "epoch": 0.5179442220130335, "grad_norm": 497.6724548339844, "learning_rate": 5.647439411049235e-06, "loss": 20.867, "step": 256400 }, { "epoch": 0.5179644226457173, "grad_norm": 389.5704650878906, "learning_rate": 5.647093282405194e-06, "loss": 29.3018, "step": 256410 }, { "epoch": 0.5179846232784011, "grad_norm": 0.0, "learning_rate": 5.646747150607297e-06, "loss": 15.364, "step": 256420 }, { "epoch": 0.5180048239110849, "grad_norm": 279.0824890136719, "learning_rate": 5.646401015657232e-06, "loss": 14.5491, "step": 256430 }, { "epoch": 0.5180250245437688, "grad_norm": 250.67369079589844, "learning_rate": 5.646054877556688e-06, "loss": 17.5029, "step": 256440 }, { "epoch": 0.5180452251764526, "grad_norm": 55.99262237548828, "learning_rate": 5.6457087363073505e-06, "loss": 20.637, "step": 256450 }, { "epoch": 0.5180654258091364, "grad_norm": 783.3364868164062, "learning_rate": 5.645362591910908e-06, "loss": 21.0268, "step": 256460 }, { "epoch": 0.5180856264418202, "grad_norm": 461.7561950683594, "learning_rate": 5.645016444369045e-06, "loss": 25.593, "step": 256470 }, { "epoch": 0.5181058270745039, "grad_norm": 461.1914978027344, "learning_rate": 5.644670293683451e-06, "loss": 12.1642, "step": 256480 }, { "epoch": 0.5181260277071877, "grad_norm": 238.78421020507812, "learning_rate": 5.6443241398558115e-06, "loss": 18.0042, "step": 256490 }, { "epoch": 0.5181462283398716, "grad_norm": 340.0740051269531, "learning_rate": 5.643977982887815e-06, "loss": 20.3984, "step": 256500 }, { "epoch": 0.5181664289725554, "grad_norm": 660.90087890625, "learning_rate": 5.643631822781147e-06, "loss": 19.8156, "step": 256510 }, { "epoch": 0.5181866296052392, "grad_norm": 591.1057739257812, "learning_rate": 5.643285659537496e-06, "loss": 33.449, "step": 256520 }, { "epoch": 0.518206830237923, "grad_norm": 96.46746063232422, "learning_rate": 5.64293949315855e-06, "loss": 70.5526, "step": 256530 }, { "epoch": 0.5182270308706068, "grad_norm": 258.6355285644531, "learning_rate": 5.642593323645993e-06, "loss": 13.3898, "step": 256540 }, { "epoch": 0.5182472315032907, "grad_norm": 210.60028076171875, "learning_rate": 5.642247151001515e-06, "loss": 17.1673, "step": 256550 }, { "epoch": 0.5182674321359745, "grad_norm": 407.2357177734375, "learning_rate": 5.6419009752268015e-06, "loss": 14.4717, "step": 256560 }, { "epoch": 0.5182876327686583, "grad_norm": 554.6689453125, "learning_rate": 5.641554796323543e-06, "loss": 16.5293, "step": 256570 }, { "epoch": 0.5183078334013421, "grad_norm": 238.739501953125, "learning_rate": 5.641208614293421e-06, "loss": 9.929, "step": 256580 }, { "epoch": 0.5183280340340259, "grad_norm": 374.9056091308594, "learning_rate": 5.640862429138128e-06, "loss": 28.3125, "step": 256590 }, { "epoch": 0.5183482346667098, "grad_norm": 441.8193664550781, "learning_rate": 5.640516240859348e-06, "loss": 24.4329, "step": 256600 }, { "epoch": 0.5183684352993936, "grad_norm": 262.81146240234375, "learning_rate": 5.64017004945877e-06, "loss": 26.0223, "step": 256610 }, { "epoch": 0.5183886359320774, "grad_norm": 374.5070495605469, "learning_rate": 5.639823854938082e-06, "loss": 24.7278, "step": 256620 }, { "epoch": 0.5184088365647612, "grad_norm": 157.4990234375, "learning_rate": 5.639477657298968e-06, "loss": 9.6328, "step": 256630 }, { "epoch": 0.518429037197445, "grad_norm": 170.0662078857422, "learning_rate": 5.639131456543119e-06, "loss": 15.2021, "step": 256640 }, { "epoch": 0.5184492378301289, "grad_norm": 92.25371551513672, "learning_rate": 5.63878525267222e-06, "loss": 14.6738, "step": 256650 }, { "epoch": 0.5184694384628127, "grad_norm": 490.31964111328125, "learning_rate": 5.63843904568796e-06, "loss": 10.2476, "step": 256660 }, { "epoch": 0.5184896390954965, "grad_norm": 584.7594604492188, "learning_rate": 5.638092835592024e-06, "loss": 20.0524, "step": 256670 }, { "epoch": 0.5185098397281803, "grad_norm": 375.0758361816406, "learning_rate": 5.637746622386102e-06, "loss": 19.8257, "step": 256680 }, { "epoch": 0.5185300403608641, "grad_norm": 136.49530029296875, "learning_rate": 5.637400406071881e-06, "loss": 14.1598, "step": 256690 }, { "epoch": 0.518550240993548, "grad_norm": 609.0701293945312, "learning_rate": 5.6370541866510476e-06, "loss": 20.9267, "step": 256700 }, { "epoch": 0.5185704416262318, "grad_norm": 675.3168334960938, "learning_rate": 5.6367079641252874e-06, "loss": 20.4015, "step": 256710 }, { "epoch": 0.5185906422589156, "grad_norm": 60.84113693237305, "learning_rate": 5.636361738496291e-06, "loss": 9.445, "step": 256720 }, { "epoch": 0.5186108428915993, "grad_norm": 318.3699645996094, "learning_rate": 5.636015509765747e-06, "loss": 18.0882, "step": 256730 }, { "epoch": 0.5186310435242831, "grad_norm": 500.5754089355469, "learning_rate": 5.6356692779353365e-06, "loss": 21.8586, "step": 256740 }, { "epoch": 0.518651244156967, "grad_norm": 176.42054748535156, "learning_rate": 5.635323043006753e-06, "loss": 13.9599, "step": 256750 }, { "epoch": 0.5186714447896508, "grad_norm": 24.978870391845703, "learning_rate": 5.634976804981682e-06, "loss": 27.3057, "step": 256760 }, { "epoch": 0.5186916454223346, "grad_norm": 2007.6275634765625, "learning_rate": 5.634630563861811e-06, "loss": 40.0491, "step": 256770 }, { "epoch": 0.5187118460550184, "grad_norm": 627.5809326171875, "learning_rate": 5.634284319648827e-06, "loss": 43.1722, "step": 256780 }, { "epoch": 0.5187320466877022, "grad_norm": 529.4752197265625, "learning_rate": 5.633938072344419e-06, "loss": 34.4752, "step": 256790 }, { "epoch": 0.518752247320386, "grad_norm": 513.8336791992188, "learning_rate": 5.633591821950274e-06, "loss": 27.9681, "step": 256800 }, { "epoch": 0.5187724479530699, "grad_norm": 1000.4723510742188, "learning_rate": 5.633245568468079e-06, "loss": 41.3979, "step": 256810 }, { "epoch": 0.5187926485857537, "grad_norm": 423.302978515625, "learning_rate": 5.6328993118995215e-06, "loss": 27.1045, "step": 256820 }, { "epoch": 0.5188128492184375, "grad_norm": 366.0521240234375, "learning_rate": 5.632553052246289e-06, "loss": 11.7155, "step": 256830 }, { "epoch": 0.5188330498511213, "grad_norm": 580.5076293945312, "learning_rate": 5.6322067895100705e-06, "loss": 14.904, "step": 256840 }, { "epoch": 0.5188532504838052, "grad_norm": 498.6604919433594, "learning_rate": 5.631860523692553e-06, "loss": 16.6257, "step": 256850 }, { "epoch": 0.518873451116489, "grad_norm": 164.28042602539062, "learning_rate": 5.631514254795424e-06, "loss": 17.0511, "step": 256860 }, { "epoch": 0.5188936517491728, "grad_norm": 600.585693359375, "learning_rate": 5.6311679828203706e-06, "loss": 22.245, "step": 256870 }, { "epoch": 0.5189138523818566, "grad_norm": 828.4389038085938, "learning_rate": 5.630821707769081e-06, "loss": 16.5592, "step": 256880 }, { "epoch": 0.5189340530145404, "grad_norm": 26.64689064025879, "learning_rate": 5.630475429643244e-06, "loss": 21.9752, "step": 256890 }, { "epoch": 0.5189542536472243, "grad_norm": 557.02734375, "learning_rate": 5.630129148444543e-06, "loss": 10.7585, "step": 256900 }, { "epoch": 0.5189744542799081, "grad_norm": 202.89645385742188, "learning_rate": 5.629782864174672e-06, "loss": 7.845, "step": 256910 }, { "epoch": 0.5189946549125919, "grad_norm": 397.0294494628906, "learning_rate": 5.629436576835315e-06, "loss": 18.1145, "step": 256920 }, { "epoch": 0.5190148555452757, "grad_norm": 602.2669677734375, "learning_rate": 5.6290902864281605e-06, "loss": 28.0946, "step": 256930 }, { "epoch": 0.5190350561779595, "grad_norm": 387.1973571777344, "learning_rate": 5.628743992954896e-06, "loss": 17.2151, "step": 256940 }, { "epoch": 0.5190552568106434, "grad_norm": 123.08863830566406, "learning_rate": 5.62839769641721e-06, "loss": 22.2356, "step": 256950 }, { "epoch": 0.5190754574433272, "grad_norm": 201.61253356933594, "learning_rate": 5.6280513968167895e-06, "loss": 23.8974, "step": 256960 }, { "epoch": 0.519095658076011, "grad_norm": 435.9090270996094, "learning_rate": 5.627705094155322e-06, "loss": 21.9942, "step": 256970 }, { "epoch": 0.5191158587086948, "grad_norm": 895.2130126953125, "learning_rate": 5.627358788434497e-06, "loss": 24.3738, "step": 256980 }, { "epoch": 0.5191360593413785, "grad_norm": 663.9862670898438, "learning_rate": 5.627012479656001e-06, "loss": 33.8452, "step": 256990 }, { "epoch": 0.5191562599740623, "grad_norm": 660.0105590820312, "learning_rate": 5.626666167821522e-06, "loss": 35.4031, "step": 257000 }, { "epoch": 0.5191764606067462, "grad_norm": 354.1170959472656, "learning_rate": 5.626319852932748e-06, "loss": 47.348, "step": 257010 }, { "epoch": 0.51919666123943, "grad_norm": 278.7491760253906, "learning_rate": 5.625973534991368e-06, "loss": 22.948, "step": 257020 }, { "epoch": 0.5192168618721138, "grad_norm": 220.43814086914062, "learning_rate": 5.625627213999067e-06, "loss": 16.3012, "step": 257030 }, { "epoch": 0.5192370625047976, "grad_norm": 676.5736694335938, "learning_rate": 5.6252808899575375e-06, "loss": 12.6801, "step": 257040 }, { "epoch": 0.5192572631374814, "grad_norm": 198.6671142578125, "learning_rate": 5.624934562868463e-06, "loss": 19.5471, "step": 257050 }, { "epoch": 0.5192774637701653, "grad_norm": 668.9926147460938, "learning_rate": 5.624588232733533e-06, "loss": 18.102, "step": 257060 }, { "epoch": 0.5192976644028491, "grad_norm": 286.4676818847656, "learning_rate": 5.624241899554437e-06, "loss": 18.2227, "step": 257070 }, { "epoch": 0.5193178650355329, "grad_norm": 109.82481384277344, "learning_rate": 5.62389556333286e-06, "loss": 22.1474, "step": 257080 }, { "epoch": 0.5193380656682167, "grad_norm": 107.15418243408203, "learning_rate": 5.623549224070494e-06, "loss": 21.2177, "step": 257090 }, { "epoch": 0.5193582663009005, "grad_norm": 214.92942810058594, "learning_rate": 5.623202881769023e-06, "loss": 13.6254, "step": 257100 }, { "epoch": 0.5193784669335844, "grad_norm": 227.6421356201172, "learning_rate": 5.622856536430137e-06, "loss": 11.4191, "step": 257110 }, { "epoch": 0.5193986675662682, "grad_norm": 725.64453125, "learning_rate": 5.622510188055523e-06, "loss": 27.8473, "step": 257120 }, { "epoch": 0.519418868198952, "grad_norm": 737.1107177734375, "learning_rate": 5.622163836646871e-06, "loss": 38.0646, "step": 257130 }, { "epoch": 0.5194390688316358, "grad_norm": 222.0409698486328, "learning_rate": 5.621817482205868e-06, "loss": 34.0105, "step": 257140 }, { "epoch": 0.5194592694643196, "grad_norm": 802.1209716796875, "learning_rate": 5.6214711247342015e-06, "loss": 18.7768, "step": 257150 }, { "epoch": 0.5194794700970035, "grad_norm": 191.45761108398438, "learning_rate": 5.621124764233561e-06, "loss": 29.7747, "step": 257160 }, { "epoch": 0.5194996707296873, "grad_norm": 81.80042266845703, "learning_rate": 5.620778400705632e-06, "loss": 18.1627, "step": 257170 }, { "epoch": 0.5195198713623711, "grad_norm": 382.63897705078125, "learning_rate": 5.620432034152107e-06, "loss": 8.3434, "step": 257180 }, { "epoch": 0.5195400719950549, "grad_norm": 583.813720703125, "learning_rate": 5.620085664574668e-06, "loss": 20.5988, "step": 257190 }, { "epoch": 0.5195602726277387, "grad_norm": 727.8671264648438, "learning_rate": 5.6197392919750095e-06, "loss": 23.7689, "step": 257200 }, { "epoch": 0.5195804732604226, "grad_norm": 570.191162109375, "learning_rate": 5.619392916354815e-06, "loss": 23.2146, "step": 257210 }, { "epoch": 0.5196006738931064, "grad_norm": 453.07550048828125, "learning_rate": 5.619046537715776e-06, "loss": 19.9638, "step": 257220 }, { "epoch": 0.5196208745257902, "grad_norm": 162.64599609375, "learning_rate": 5.61870015605958e-06, "loss": 25.7242, "step": 257230 }, { "epoch": 0.5196410751584739, "grad_norm": 451.34259033203125, "learning_rate": 5.618353771387912e-06, "loss": 47.1894, "step": 257240 }, { "epoch": 0.5196612757911577, "grad_norm": 460.22637939453125, "learning_rate": 5.618007383702464e-06, "loss": 18.1238, "step": 257250 }, { "epoch": 0.5196814764238415, "grad_norm": 284.9989318847656, "learning_rate": 5.617660993004923e-06, "loss": 22.8709, "step": 257260 }, { "epoch": 0.5197016770565254, "grad_norm": 375.36834716796875, "learning_rate": 5.617314599296977e-06, "loss": 9.8315, "step": 257270 }, { "epoch": 0.5197218776892092, "grad_norm": 505.88555908203125, "learning_rate": 5.616968202580315e-06, "loss": 12.7147, "step": 257280 }, { "epoch": 0.519742078321893, "grad_norm": 341.1786804199219, "learning_rate": 5.6166218028566246e-06, "loss": 19.2148, "step": 257290 }, { "epoch": 0.5197622789545768, "grad_norm": 212.43516540527344, "learning_rate": 5.616275400127594e-06, "loss": 11.8919, "step": 257300 }, { "epoch": 0.5197824795872606, "grad_norm": 330.664306640625, "learning_rate": 5.615928994394913e-06, "loss": 17.9222, "step": 257310 }, { "epoch": 0.5198026802199445, "grad_norm": 320.563232421875, "learning_rate": 5.615582585660266e-06, "loss": 22.1219, "step": 257320 }, { "epoch": 0.5198228808526283, "grad_norm": 721.0665893554688, "learning_rate": 5.615236173925347e-06, "loss": 30.3213, "step": 257330 }, { "epoch": 0.5198430814853121, "grad_norm": 286.6113586425781, "learning_rate": 5.61488975919184e-06, "loss": 21.4331, "step": 257340 }, { "epoch": 0.5198632821179959, "grad_norm": 349.9152526855469, "learning_rate": 5.6145433414614345e-06, "loss": 17.5125, "step": 257350 }, { "epoch": 0.5198834827506797, "grad_norm": 214.19204711914062, "learning_rate": 5.614196920735822e-06, "loss": 14.9003, "step": 257360 }, { "epoch": 0.5199036833833636, "grad_norm": 443.8629150390625, "learning_rate": 5.613850497016687e-06, "loss": 26.6429, "step": 257370 }, { "epoch": 0.5199238840160474, "grad_norm": 262.8409423828125, "learning_rate": 5.613504070305717e-06, "loss": 12.8079, "step": 257380 }, { "epoch": 0.5199440846487312, "grad_norm": 423.7279357910156, "learning_rate": 5.613157640604605e-06, "loss": 34.1807, "step": 257390 }, { "epoch": 0.519964285281415, "grad_norm": 560.2799072265625, "learning_rate": 5.612811207915034e-06, "loss": 20.5849, "step": 257400 }, { "epoch": 0.5199844859140988, "grad_norm": 783.8386840820312, "learning_rate": 5.6124647722386996e-06, "loss": 32.4919, "step": 257410 }, { "epoch": 0.5200046865467827, "grad_norm": 269.1947326660156, "learning_rate": 5.612118333577283e-06, "loss": 27.8402, "step": 257420 }, { "epoch": 0.5200248871794665, "grad_norm": 212.03346252441406, "learning_rate": 5.611771891932477e-06, "loss": 12.0659, "step": 257430 }, { "epoch": 0.5200450878121503, "grad_norm": 248.64772033691406, "learning_rate": 5.611425447305969e-06, "loss": 22.6025, "step": 257440 }, { "epoch": 0.5200652884448341, "grad_norm": 114.89862823486328, "learning_rate": 5.611078999699448e-06, "loss": 15.9903, "step": 257450 }, { "epoch": 0.520085489077518, "grad_norm": 453.12615966796875, "learning_rate": 5.6107325491146024e-06, "loss": 25.1079, "step": 257460 }, { "epoch": 0.5201056897102018, "grad_norm": 623.2423706054688, "learning_rate": 5.61038609555312e-06, "loss": 25.0207, "step": 257470 }, { "epoch": 0.5201258903428856, "grad_norm": 266.9727478027344, "learning_rate": 5.610039639016689e-06, "loss": 18.0565, "step": 257480 }, { "epoch": 0.5201460909755694, "grad_norm": 0.0, "learning_rate": 5.609693179506999e-06, "loss": 21.2829, "step": 257490 }, { "epoch": 0.5201662916082531, "grad_norm": 68.14176940917969, "learning_rate": 5.609346717025738e-06, "loss": 34.4678, "step": 257500 }, { "epoch": 0.5201864922409369, "grad_norm": 354.24346923828125, "learning_rate": 5.609000251574596e-06, "loss": 15.353, "step": 257510 }, { "epoch": 0.5202066928736208, "grad_norm": 228.03439331054688, "learning_rate": 5.60865378315526e-06, "loss": 22.66, "step": 257520 }, { "epoch": 0.5202268935063046, "grad_norm": 407.65802001953125, "learning_rate": 5.6083073117694186e-06, "loss": 16.2957, "step": 257530 }, { "epoch": 0.5202470941389884, "grad_norm": 619.2686767578125, "learning_rate": 5.607960837418763e-06, "loss": 17.9244, "step": 257540 }, { "epoch": 0.5202672947716722, "grad_norm": 528.3983764648438, "learning_rate": 5.6076143601049795e-06, "loss": 24.1809, "step": 257550 }, { "epoch": 0.520287495404356, "grad_norm": 326.4344177246094, "learning_rate": 5.607267879829757e-06, "loss": 15.3803, "step": 257560 }, { "epoch": 0.5203076960370399, "grad_norm": 7.018338680267334, "learning_rate": 5.606921396594785e-06, "loss": 22.5545, "step": 257570 }, { "epoch": 0.5203278966697237, "grad_norm": 53.98643112182617, "learning_rate": 5.60657491040175e-06, "loss": 47.3502, "step": 257580 }, { "epoch": 0.5203480973024075, "grad_norm": 408.90850830078125, "learning_rate": 5.606228421252344e-06, "loss": 22.2837, "step": 257590 }, { "epoch": 0.5203682979350913, "grad_norm": 629.2796630859375, "learning_rate": 5.605881929148254e-06, "loss": 19.7562, "step": 257600 }, { "epoch": 0.5203884985677751, "grad_norm": 234.08175659179688, "learning_rate": 5.605535434091168e-06, "loss": 16.3395, "step": 257610 }, { "epoch": 0.520408699200459, "grad_norm": 240.43798828125, "learning_rate": 5.605188936082776e-06, "loss": 13.7598, "step": 257620 }, { "epoch": 0.5204288998331428, "grad_norm": 423.3980712890625, "learning_rate": 5.604842435124769e-06, "loss": 20.0048, "step": 257630 }, { "epoch": 0.5204491004658266, "grad_norm": 493.5419616699219, "learning_rate": 5.604495931218831e-06, "loss": 16.8068, "step": 257640 }, { "epoch": 0.5204693010985104, "grad_norm": 586.6019287109375, "learning_rate": 5.604149424366653e-06, "loss": 25.5807, "step": 257650 }, { "epoch": 0.5204895017311942, "grad_norm": 421.7281494140625, "learning_rate": 5.603802914569924e-06, "loss": 13.6986, "step": 257660 }, { "epoch": 0.5205097023638781, "grad_norm": 160.74330139160156, "learning_rate": 5.603456401830333e-06, "loss": 21.5895, "step": 257670 }, { "epoch": 0.5205299029965619, "grad_norm": 458.26141357421875, "learning_rate": 5.60310988614957e-06, "loss": 19.0367, "step": 257680 }, { "epoch": 0.5205501036292457, "grad_norm": 532.3012084960938, "learning_rate": 5.60276336752932e-06, "loss": 21.3122, "step": 257690 }, { "epoch": 0.5205703042619295, "grad_norm": 474.4313049316406, "learning_rate": 5.6024168459712765e-06, "loss": 13.374, "step": 257700 }, { "epoch": 0.5205905048946133, "grad_norm": 104.30779266357422, "learning_rate": 5.602070321477126e-06, "loss": 22.9658, "step": 257710 }, { "epoch": 0.5206107055272972, "grad_norm": 296.1563720703125, "learning_rate": 5.601723794048558e-06, "loss": 18.1674, "step": 257720 }, { "epoch": 0.520630906159981, "grad_norm": 171.5693817138672, "learning_rate": 5.601377263687262e-06, "loss": 22.7732, "step": 257730 }, { "epoch": 0.5206511067926648, "grad_norm": 214.3036651611328, "learning_rate": 5.601030730394923e-06, "loss": 29.8944, "step": 257740 }, { "epoch": 0.5206713074253486, "grad_norm": 532.0447998046875, "learning_rate": 5.600684194173236e-06, "loss": 12.5308, "step": 257750 }, { "epoch": 0.5206915080580323, "grad_norm": 768.5252075195312, "learning_rate": 5.600337655023887e-06, "loss": 39.9223, "step": 257760 }, { "epoch": 0.5207117086907161, "grad_norm": 529.4053344726562, "learning_rate": 5.599991112948564e-06, "loss": 15.0716, "step": 257770 }, { "epoch": 0.5207319093234, "grad_norm": 635.5668334960938, "learning_rate": 5.5996445679489566e-06, "loss": 20.5109, "step": 257780 }, { "epoch": 0.5207521099560838, "grad_norm": 27.236709594726562, "learning_rate": 5.599298020026757e-06, "loss": 24.8813, "step": 257790 }, { "epoch": 0.5207723105887676, "grad_norm": 298.06182861328125, "learning_rate": 5.598951469183649e-06, "loss": 23.0239, "step": 257800 }, { "epoch": 0.5207925112214514, "grad_norm": 210.44068908691406, "learning_rate": 5.598604915421324e-06, "loss": 12.4049, "step": 257810 }, { "epoch": 0.5208127118541352, "grad_norm": 376.73248291015625, "learning_rate": 5.598258358741472e-06, "loss": 13.9753, "step": 257820 }, { "epoch": 0.5208329124868191, "grad_norm": 513.9283447265625, "learning_rate": 5.597911799145781e-06, "loss": 19.6141, "step": 257830 }, { "epoch": 0.5208531131195029, "grad_norm": 832.7177734375, "learning_rate": 5.597565236635942e-06, "loss": 30.1937, "step": 257840 }, { "epoch": 0.5208733137521867, "grad_norm": 361.26300048828125, "learning_rate": 5.59721867121364e-06, "loss": 21.9525, "step": 257850 }, { "epoch": 0.5208935143848705, "grad_norm": 328.2436828613281, "learning_rate": 5.596872102880568e-06, "loss": 18.5472, "step": 257860 }, { "epoch": 0.5209137150175543, "grad_norm": 127.17230987548828, "learning_rate": 5.596525531638415e-06, "loss": 15.9013, "step": 257870 }, { "epoch": 0.5209339156502382, "grad_norm": 309.1257019042969, "learning_rate": 5.596178957488867e-06, "loss": 28.6147, "step": 257880 }, { "epoch": 0.520954116282922, "grad_norm": 60.98568344116211, "learning_rate": 5.595832380433616e-06, "loss": 9.7927, "step": 257890 }, { "epoch": 0.5209743169156058, "grad_norm": 233.31597900390625, "learning_rate": 5.59548580047435e-06, "loss": 19.158, "step": 257900 }, { "epoch": 0.5209945175482896, "grad_norm": 370.2422790527344, "learning_rate": 5.595139217612758e-06, "loss": 26.8181, "step": 257910 }, { "epoch": 0.5210147181809734, "grad_norm": 410.9178771972656, "learning_rate": 5.59479263185053e-06, "loss": 16.782, "step": 257920 }, { "epoch": 0.5210349188136573, "grad_norm": 408.39849853515625, "learning_rate": 5.594446043189355e-06, "loss": 15.3456, "step": 257930 }, { "epoch": 0.5210551194463411, "grad_norm": 505.7781066894531, "learning_rate": 5.594099451630921e-06, "loss": 16.3654, "step": 257940 }, { "epoch": 0.5210753200790249, "grad_norm": 962.6716918945312, "learning_rate": 5.593752857176921e-06, "loss": 21.7308, "step": 257950 }, { "epoch": 0.5210955207117087, "grad_norm": 455.1202697753906, "learning_rate": 5.593406259829038e-06, "loss": 14.5062, "step": 257960 }, { "epoch": 0.5211157213443925, "grad_norm": 300.18402099609375, "learning_rate": 5.593059659588968e-06, "loss": 19.0189, "step": 257970 }, { "epoch": 0.5211359219770764, "grad_norm": 220.6542205810547, "learning_rate": 5.592713056458395e-06, "loss": 18.6487, "step": 257980 }, { "epoch": 0.5211561226097602, "grad_norm": 323.6950378417969, "learning_rate": 5.592366450439012e-06, "loss": 25.2693, "step": 257990 }, { "epoch": 0.521176323242444, "grad_norm": 262.5322570800781, "learning_rate": 5.592019841532507e-06, "loss": 13.7469, "step": 258000 }, { "epoch": 0.5211965238751277, "grad_norm": 505.59637451171875, "learning_rate": 5.591673229740566e-06, "loss": 23.9838, "step": 258010 }, { "epoch": 0.5212167245078115, "grad_norm": 244.86148071289062, "learning_rate": 5.591326615064885e-06, "loss": 17.9718, "step": 258020 }, { "epoch": 0.5212369251404954, "grad_norm": 1824.36767578125, "learning_rate": 5.590979997507146e-06, "loss": 37.4517, "step": 258030 }, { "epoch": 0.5212571257731792, "grad_norm": 532.5745239257812, "learning_rate": 5.590633377069046e-06, "loss": 17.079, "step": 258040 }, { "epoch": 0.521277326405863, "grad_norm": 624.6132202148438, "learning_rate": 5.590286753752269e-06, "loss": 18.441, "step": 258050 }, { "epoch": 0.5212975270385468, "grad_norm": 184.2586212158203, "learning_rate": 5.5899401275585064e-06, "loss": 17.4711, "step": 258060 }, { "epoch": 0.5213177276712306, "grad_norm": 7.946119785308838, "learning_rate": 5.5895934984894476e-06, "loss": 18.3772, "step": 258070 }, { "epoch": 0.5213379283039145, "grad_norm": 750.5645141601562, "learning_rate": 5.58924686654678e-06, "loss": 22.892, "step": 258080 }, { "epoch": 0.5213581289365983, "grad_norm": 154.44918823242188, "learning_rate": 5.588900231732196e-06, "loss": 16.0328, "step": 258090 }, { "epoch": 0.5213783295692821, "grad_norm": 418.48748779296875, "learning_rate": 5.588553594047382e-06, "loss": 19.5818, "step": 258100 }, { "epoch": 0.5213985302019659, "grad_norm": 445.0689392089844, "learning_rate": 5.5882069534940305e-06, "loss": 22.2834, "step": 258110 }, { "epoch": 0.5214187308346497, "grad_norm": 94.60790252685547, "learning_rate": 5.58786031007383e-06, "loss": 11.85, "step": 258120 }, { "epoch": 0.5214389314673336, "grad_norm": 231.06190490722656, "learning_rate": 5.5875136637884695e-06, "loss": 13.5067, "step": 258130 }, { "epoch": 0.5214591321000174, "grad_norm": 347.8343200683594, "learning_rate": 5.587167014639638e-06, "loss": 9.9655, "step": 258140 }, { "epoch": 0.5214793327327012, "grad_norm": 226.5662078857422, "learning_rate": 5.5868203626290266e-06, "loss": 23.8623, "step": 258150 }, { "epoch": 0.521499533365385, "grad_norm": 258.6246337890625, "learning_rate": 5.586473707758322e-06, "loss": 20.4882, "step": 258160 }, { "epoch": 0.5215197339980688, "grad_norm": 378.86688232421875, "learning_rate": 5.586127050029218e-06, "loss": 20.8964, "step": 258170 }, { "epoch": 0.5215399346307527, "grad_norm": 51.07099533081055, "learning_rate": 5.585780389443401e-06, "loss": 27.5344, "step": 258180 }, { "epoch": 0.5215601352634365, "grad_norm": 400.8778076171875, "learning_rate": 5.58543372600256e-06, "loss": 15.4608, "step": 258190 }, { "epoch": 0.5215803358961203, "grad_norm": 472.527099609375, "learning_rate": 5.585087059708389e-06, "loss": 24.3184, "step": 258200 }, { "epoch": 0.5216005365288041, "grad_norm": 138.24754333496094, "learning_rate": 5.584740390562572e-06, "loss": 7.7216, "step": 258210 }, { "epoch": 0.5216207371614879, "grad_norm": 337.6424560546875, "learning_rate": 5.584393718566802e-06, "loss": 21.9638, "step": 258220 }, { "epoch": 0.5216409377941718, "grad_norm": 606.82763671875, "learning_rate": 5.584047043722768e-06, "loss": 36.9224, "step": 258230 }, { "epoch": 0.5216611384268556, "grad_norm": 577.6422729492188, "learning_rate": 5.5837003660321596e-06, "loss": 22.4856, "step": 258240 }, { "epoch": 0.5216813390595394, "grad_norm": 38.453495025634766, "learning_rate": 5.5833536854966665e-06, "loss": 9.996, "step": 258250 }, { "epoch": 0.5217015396922232, "grad_norm": 347.631103515625, "learning_rate": 5.5830070021179785e-06, "loss": 18.9333, "step": 258260 }, { "epoch": 0.5217217403249069, "grad_norm": 794.9881591796875, "learning_rate": 5.582660315897785e-06, "loss": 32.6643, "step": 258270 }, { "epoch": 0.5217419409575907, "grad_norm": 156.62416076660156, "learning_rate": 5.582313626837776e-06, "loss": 26.8639, "step": 258280 }, { "epoch": 0.5217621415902746, "grad_norm": 418.0146789550781, "learning_rate": 5.58196693493964e-06, "loss": 18.6138, "step": 258290 }, { "epoch": 0.5217823422229584, "grad_norm": 332.8797607421875, "learning_rate": 5.581620240205068e-06, "loss": 21.2172, "step": 258300 }, { "epoch": 0.5218025428556422, "grad_norm": 186.88479614257812, "learning_rate": 5.58127354263575e-06, "loss": 23.2312, "step": 258310 }, { "epoch": 0.521822743488326, "grad_norm": 295.6640319824219, "learning_rate": 5.580926842233375e-06, "loss": 14.8955, "step": 258320 }, { "epoch": 0.5218429441210098, "grad_norm": 284.3543395996094, "learning_rate": 5.580580138999633e-06, "loss": 25.2832, "step": 258330 }, { "epoch": 0.5218631447536937, "grad_norm": 323.582275390625, "learning_rate": 5.580233432936215e-06, "loss": 18.7778, "step": 258340 }, { "epoch": 0.5218833453863775, "grad_norm": 8.183572769165039, "learning_rate": 5.5798867240448075e-06, "loss": 13.8316, "step": 258350 }, { "epoch": 0.5219035460190613, "grad_norm": 312.5469970703125, "learning_rate": 5.579540012327103e-06, "loss": 15.7613, "step": 258360 }, { "epoch": 0.5219237466517451, "grad_norm": 963.30712890625, "learning_rate": 5.579193297784792e-06, "loss": 15.6505, "step": 258370 }, { "epoch": 0.5219439472844289, "grad_norm": 3.0414092540740967, "learning_rate": 5.578846580419562e-06, "loss": 15.5307, "step": 258380 }, { "epoch": 0.5219641479171128, "grad_norm": 202.86471557617188, "learning_rate": 5.578499860233104e-06, "loss": 23.9264, "step": 258390 }, { "epoch": 0.5219843485497966, "grad_norm": 354.16278076171875, "learning_rate": 5.578153137227109e-06, "loss": 10.0166, "step": 258400 }, { "epoch": 0.5220045491824804, "grad_norm": 265.00823974609375, "learning_rate": 5.577806411403265e-06, "loss": 5.5652, "step": 258410 }, { "epoch": 0.5220247498151642, "grad_norm": 117.40001678466797, "learning_rate": 5.577459682763262e-06, "loss": 16.3928, "step": 258420 }, { "epoch": 0.522044950447848, "grad_norm": 643.47607421875, "learning_rate": 5.577112951308792e-06, "loss": 29.5794, "step": 258430 }, { "epoch": 0.5220651510805319, "grad_norm": 251.23094177246094, "learning_rate": 5.576766217041541e-06, "loss": 15.4696, "step": 258440 }, { "epoch": 0.5220853517132157, "grad_norm": 486.6929626464844, "learning_rate": 5.576419479963204e-06, "loss": 24.7433, "step": 258450 }, { "epoch": 0.5221055523458995, "grad_norm": 480.2086486816406, "learning_rate": 5.576072740075467e-06, "loss": 14.7398, "step": 258460 }, { "epoch": 0.5221257529785833, "grad_norm": 373.065673828125, "learning_rate": 5.575725997380023e-06, "loss": 25.3467, "step": 258470 }, { "epoch": 0.5221459536112671, "grad_norm": 414.2657165527344, "learning_rate": 5.575379251878558e-06, "loss": 23.7654, "step": 258480 }, { "epoch": 0.522166154243951, "grad_norm": 460.9728088378906, "learning_rate": 5.575032503572765e-06, "loss": 23.8758, "step": 258490 }, { "epoch": 0.5221863548766348, "grad_norm": 9.743806838989258, "learning_rate": 5.5746857524643335e-06, "loss": 9.0604, "step": 258500 }, { "epoch": 0.5222065555093186, "grad_norm": 460.9274597167969, "learning_rate": 5.5743389985549535e-06, "loss": 20.8826, "step": 258510 }, { "epoch": 0.5222267561420023, "grad_norm": 192.0853729248047, "learning_rate": 5.573992241846315e-06, "loss": 10.7161, "step": 258520 }, { "epoch": 0.5222469567746861, "grad_norm": 405.860595703125, "learning_rate": 5.573645482340107e-06, "loss": 17.9584, "step": 258530 }, { "epoch": 0.52226715740737, "grad_norm": 528.8161010742188, "learning_rate": 5.573298720038022e-06, "loss": 12.7832, "step": 258540 }, { "epoch": 0.5222873580400538, "grad_norm": 217.5623779296875, "learning_rate": 5.572951954941748e-06, "loss": 18.7989, "step": 258550 }, { "epoch": 0.5223075586727376, "grad_norm": 474.1935119628906, "learning_rate": 5.572605187052975e-06, "loss": 11.0172, "step": 258560 }, { "epoch": 0.5223277593054214, "grad_norm": 152.40489196777344, "learning_rate": 5.572258416373394e-06, "loss": 12.0545, "step": 258570 }, { "epoch": 0.5223479599381052, "grad_norm": 699.2028198242188, "learning_rate": 5.571911642904696e-06, "loss": 21.6052, "step": 258580 }, { "epoch": 0.522368160570789, "grad_norm": 181.78952026367188, "learning_rate": 5.571564866648569e-06, "loss": 12.0304, "step": 258590 }, { "epoch": 0.5223883612034729, "grad_norm": 129.2386016845703, "learning_rate": 5.5712180876067045e-06, "loss": 15.395, "step": 258600 }, { "epoch": 0.5224085618361567, "grad_norm": 459.8892822265625, "learning_rate": 5.570871305780793e-06, "loss": 18.8547, "step": 258610 }, { "epoch": 0.5224287624688405, "grad_norm": 175.90707397460938, "learning_rate": 5.570524521172523e-06, "loss": 13.9561, "step": 258620 }, { "epoch": 0.5224489631015243, "grad_norm": 388.4878234863281, "learning_rate": 5.570177733783586e-06, "loss": 19.4359, "step": 258630 }, { "epoch": 0.5224691637342082, "grad_norm": 270.1606140136719, "learning_rate": 5.56983094361567e-06, "loss": 9.4464, "step": 258640 }, { "epoch": 0.522489364366892, "grad_norm": 446.36431884765625, "learning_rate": 5.56948415067047e-06, "loss": 34.3768, "step": 258650 }, { "epoch": 0.5225095649995758, "grad_norm": 142.0673828125, "learning_rate": 5.569137354949672e-06, "loss": 23.0147, "step": 258660 }, { "epoch": 0.5225297656322596, "grad_norm": 418.1278076171875, "learning_rate": 5.568790556454967e-06, "loss": 15.567, "step": 258670 }, { "epoch": 0.5225499662649434, "grad_norm": 229.39422607421875, "learning_rate": 5.568443755188048e-06, "loss": 4.9313, "step": 258680 }, { "epoch": 0.5225701668976273, "grad_norm": 677.0360717773438, "learning_rate": 5.568096951150601e-06, "loss": 17.7067, "step": 258690 }, { "epoch": 0.5225903675303111, "grad_norm": 255.9961700439453, "learning_rate": 5.567750144344318e-06, "loss": 15.2524, "step": 258700 }, { "epoch": 0.5226105681629949, "grad_norm": 224.96157836914062, "learning_rate": 5.567403334770891e-06, "loss": 12.1423, "step": 258710 }, { "epoch": 0.5226307687956787, "grad_norm": 129.64767456054688, "learning_rate": 5.567056522432008e-06, "loss": 17.2798, "step": 258720 }, { "epoch": 0.5226509694283625, "grad_norm": 889.9985961914062, "learning_rate": 5.5667097073293605e-06, "loss": 34.7892, "step": 258730 }, { "epoch": 0.5226711700610464, "grad_norm": 9.282400131225586, "learning_rate": 5.56636288946464e-06, "loss": 24.7925, "step": 258740 }, { "epoch": 0.5226913706937302, "grad_norm": 600.868896484375, "learning_rate": 5.566016068839535e-06, "loss": 19.0472, "step": 258750 }, { "epoch": 0.522711571326414, "grad_norm": 686.1902465820312, "learning_rate": 5.565669245455735e-06, "loss": 15.5807, "step": 258760 }, { "epoch": 0.5227317719590978, "grad_norm": 508.9956359863281, "learning_rate": 5.565322419314933e-06, "loss": 23.0285, "step": 258770 }, { "epoch": 0.5227519725917815, "grad_norm": 360.8724060058594, "learning_rate": 5.564975590418816e-06, "loss": 36.6206, "step": 258780 }, { "epoch": 0.5227721732244653, "grad_norm": 632.7630004882812, "learning_rate": 5.564628758769079e-06, "loss": 30.5118, "step": 258790 }, { "epoch": 0.5227923738571492, "grad_norm": 651.6338500976562, "learning_rate": 5.5642819243674085e-06, "loss": 20.9828, "step": 258800 }, { "epoch": 0.522812574489833, "grad_norm": 803.3969116210938, "learning_rate": 5.563935087215497e-06, "loss": 20.3904, "step": 258810 }, { "epoch": 0.5228327751225168, "grad_norm": 385.5091247558594, "learning_rate": 5.563588247315035e-06, "loss": 23.2029, "step": 258820 }, { "epoch": 0.5228529757552006, "grad_norm": 12.902904510498047, "learning_rate": 5.563241404667711e-06, "loss": 12.9647, "step": 258830 }, { "epoch": 0.5228731763878844, "grad_norm": 383.08953857421875, "learning_rate": 5.562894559275216e-06, "loss": 23.1276, "step": 258840 }, { "epoch": 0.5228933770205683, "grad_norm": 623.5396118164062, "learning_rate": 5.562547711139243e-06, "loss": 16.249, "step": 258850 }, { "epoch": 0.5229135776532521, "grad_norm": 159.24526977539062, "learning_rate": 5.562200860261481e-06, "loss": 14.0956, "step": 258860 }, { "epoch": 0.5229337782859359, "grad_norm": 216.1454620361328, "learning_rate": 5.5618540066436174e-06, "loss": 7.0407, "step": 258870 }, { "epoch": 0.5229539789186197, "grad_norm": 645.8809204101562, "learning_rate": 5.561507150287347e-06, "loss": 24.7977, "step": 258880 }, { "epoch": 0.5229741795513035, "grad_norm": 380.7661437988281, "learning_rate": 5.56116029119436e-06, "loss": 30.2449, "step": 258890 }, { "epoch": 0.5229943801839874, "grad_norm": 387.68853759765625, "learning_rate": 5.560813429366345e-06, "loss": 19.7908, "step": 258900 }, { "epoch": 0.5230145808166712, "grad_norm": 1197.5845947265625, "learning_rate": 5.560466564804993e-06, "loss": 20.6971, "step": 258910 }, { "epoch": 0.523034781449355, "grad_norm": 276.0072021484375, "learning_rate": 5.560119697511995e-06, "loss": 10.2198, "step": 258920 }, { "epoch": 0.5230549820820388, "grad_norm": 137.66383361816406, "learning_rate": 5.559772827489042e-06, "loss": 24.7138, "step": 258930 }, { "epoch": 0.5230751827147226, "grad_norm": 4.750400543212891, "learning_rate": 5.559425954737824e-06, "loss": 13.2065, "step": 258940 }, { "epoch": 0.5230953833474065, "grad_norm": 300.6987609863281, "learning_rate": 5.559079079260032e-06, "loss": 15.55, "step": 258950 }, { "epoch": 0.5231155839800903, "grad_norm": 561.4017333984375, "learning_rate": 5.558732201057355e-06, "loss": 14.0307, "step": 258960 }, { "epoch": 0.5231357846127741, "grad_norm": 286.33770751953125, "learning_rate": 5.558385320131487e-06, "loss": 18.4139, "step": 258970 }, { "epoch": 0.5231559852454579, "grad_norm": 183.73011779785156, "learning_rate": 5.558038436484116e-06, "loss": 19.1513, "step": 258980 }, { "epoch": 0.5231761858781417, "grad_norm": 427.0572509765625, "learning_rate": 5.5576915501169314e-06, "loss": 17.268, "step": 258990 }, { "epoch": 0.5231963865108256, "grad_norm": 202.74530029296875, "learning_rate": 5.557344661031628e-06, "loss": 9.5992, "step": 259000 }, { "epoch": 0.5232165871435094, "grad_norm": 387.3416748046875, "learning_rate": 5.556997769229893e-06, "loss": 25.6398, "step": 259010 }, { "epoch": 0.5232367877761932, "grad_norm": 444.55316162109375, "learning_rate": 5.556650874713421e-06, "loss": 39.484, "step": 259020 }, { "epoch": 0.5232569884088769, "grad_norm": 277.5933532714844, "learning_rate": 5.556303977483898e-06, "loss": 15.9852, "step": 259030 }, { "epoch": 0.5232771890415607, "grad_norm": 36.20888900756836, "learning_rate": 5.555957077543016e-06, "loss": 19.1169, "step": 259040 }, { "epoch": 0.5232973896742446, "grad_norm": 122.30401611328125, "learning_rate": 5.555610174892468e-06, "loss": 20.7532, "step": 259050 }, { "epoch": 0.5233175903069284, "grad_norm": 87.84234619140625, "learning_rate": 5.555263269533945e-06, "loss": 20.0165, "step": 259060 }, { "epoch": 0.5233377909396122, "grad_norm": 177.54856872558594, "learning_rate": 5.554916361469133e-06, "loss": 14.0952, "step": 259070 }, { "epoch": 0.523357991572296, "grad_norm": 609.1646728515625, "learning_rate": 5.554569450699727e-06, "loss": 13.2931, "step": 259080 }, { "epoch": 0.5233781922049798, "grad_norm": 630.5967407226562, "learning_rate": 5.554222537227417e-06, "loss": 27.3751, "step": 259090 }, { "epoch": 0.5233983928376637, "grad_norm": 144.98471069335938, "learning_rate": 5.553875621053893e-06, "loss": 25.0189, "step": 259100 }, { "epoch": 0.5234185934703475, "grad_norm": 248.9592742919922, "learning_rate": 5.553528702180848e-06, "loss": 10.8189, "step": 259110 }, { "epoch": 0.5234387941030313, "grad_norm": 407.3052673339844, "learning_rate": 5.55318178060997e-06, "loss": 19.5606, "step": 259120 }, { "epoch": 0.5234589947357151, "grad_norm": 116.8155746459961, "learning_rate": 5.5528348563429524e-06, "loss": 21.4337, "step": 259130 }, { "epoch": 0.5234791953683989, "grad_norm": 73.57362365722656, "learning_rate": 5.552487929381484e-06, "loss": 13.3468, "step": 259140 }, { "epoch": 0.5234993960010828, "grad_norm": 380.897216796875, "learning_rate": 5.552140999727256e-06, "loss": 16.9576, "step": 259150 }, { "epoch": 0.5235195966337666, "grad_norm": 63.59773635864258, "learning_rate": 5.551794067381959e-06, "loss": 28.4819, "step": 259160 }, { "epoch": 0.5235397972664504, "grad_norm": 531.4129638671875, "learning_rate": 5.551447132347286e-06, "loss": 18.2065, "step": 259170 }, { "epoch": 0.5235599978991342, "grad_norm": 217.3151092529297, "learning_rate": 5.551100194624925e-06, "loss": 21.4243, "step": 259180 }, { "epoch": 0.523580198531818, "grad_norm": 118.2372055053711, "learning_rate": 5.5507532542165706e-06, "loss": 22.6855, "step": 259190 }, { "epoch": 0.5236003991645019, "grad_norm": 262.572021484375, "learning_rate": 5.5504063111239116e-06, "loss": 15.9268, "step": 259200 }, { "epoch": 0.5236205997971857, "grad_norm": 266.8731384277344, "learning_rate": 5.550059365348638e-06, "loss": 11.6768, "step": 259210 }, { "epoch": 0.5236408004298695, "grad_norm": 476.862060546875, "learning_rate": 5.549712416892442e-06, "loss": 9.7514, "step": 259220 }, { "epoch": 0.5236610010625533, "grad_norm": 726.2845458984375, "learning_rate": 5.549365465757013e-06, "loss": 34.2101, "step": 259230 }, { "epoch": 0.5236812016952371, "grad_norm": 332.2506103515625, "learning_rate": 5.549018511944046e-06, "loss": 21.7106, "step": 259240 }, { "epoch": 0.523701402327921, "grad_norm": 215.2087860107422, "learning_rate": 5.548671555455226e-06, "loss": 48.7787, "step": 259250 }, { "epoch": 0.5237216029606048, "grad_norm": 285.1471862792969, "learning_rate": 5.548324596292251e-06, "loss": 33.917, "step": 259260 }, { "epoch": 0.5237418035932886, "grad_norm": 445.18170166015625, "learning_rate": 5.547977634456806e-06, "loss": 28.387, "step": 259270 }, { "epoch": 0.5237620042259724, "grad_norm": 357.4407653808594, "learning_rate": 5.547630669950585e-06, "loss": 21.1768, "step": 259280 }, { "epoch": 0.5237822048586561, "grad_norm": 551.136474609375, "learning_rate": 5.547283702775279e-06, "loss": 20.7401, "step": 259290 }, { "epoch": 0.5238024054913399, "grad_norm": 64.77119445800781, "learning_rate": 5.546936732932578e-06, "loss": 8.634, "step": 259300 }, { "epoch": 0.5238226061240238, "grad_norm": 549.214111328125, "learning_rate": 5.546589760424175e-06, "loss": 24.5534, "step": 259310 }, { "epoch": 0.5238428067567076, "grad_norm": 286.0890808105469, "learning_rate": 5.5462427852517585e-06, "loss": 26.9847, "step": 259320 }, { "epoch": 0.5238630073893914, "grad_norm": 508.03515625, "learning_rate": 5.545895807417021e-06, "loss": 13.2195, "step": 259330 }, { "epoch": 0.5238832080220752, "grad_norm": 533.0213012695312, "learning_rate": 5.545548826921653e-06, "loss": 18.3663, "step": 259340 }, { "epoch": 0.523903408654759, "grad_norm": 323.7482604980469, "learning_rate": 5.545201843767348e-06, "loss": 19.4811, "step": 259350 }, { "epoch": 0.5239236092874429, "grad_norm": 187.0774383544922, "learning_rate": 5.544854857955795e-06, "loss": 17.8492, "step": 259360 }, { "epoch": 0.5239438099201267, "grad_norm": 166.46250915527344, "learning_rate": 5.544507869488684e-06, "loss": 14.3456, "step": 259370 }, { "epoch": 0.5239640105528105, "grad_norm": 171.3353729248047, "learning_rate": 5.544160878367709e-06, "loss": 25.5687, "step": 259380 }, { "epoch": 0.5239842111854943, "grad_norm": 277.5767517089844, "learning_rate": 5.543813884594559e-06, "loss": 15.5179, "step": 259390 }, { "epoch": 0.5240044118181781, "grad_norm": 541.5955200195312, "learning_rate": 5.543466888170927e-06, "loss": 30.6137, "step": 259400 }, { "epoch": 0.524024612450862, "grad_norm": 83.60375213623047, "learning_rate": 5.5431198890985014e-06, "loss": 5.8933, "step": 259410 }, { "epoch": 0.5240448130835458, "grad_norm": 878.072509765625, "learning_rate": 5.542772887378978e-06, "loss": 21.106, "step": 259420 }, { "epoch": 0.5240650137162296, "grad_norm": 395.443359375, "learning_rate": 5.5424258830140434e-06, "loss": 16.6393, "step": 259430 }, { "epoch": 0.5240852143489134, "grad_norm": 267.90826416015625, "learning_rate": 5.542078876005391e-06, "loss": 51.0356, "step": 259440 }, { "epoch": 0.5241054149815972, "grad_norm": 134.7003936767578, "learning_rate": 5.541731866354713e-06, "loss": 11.3166, "step": 259450 }, { "epoch": 0.5241256156142811, "grad_norm": 288.36785888671875, "learning_rate": 5.5413848540637e-06, "loss": 20.2249, "step": 259460 }, { "epoch": 0.5241458162469649, "grad_norm": 441.0600280761719, "learning_rate": 5.541037839134041e-06, "loss": 24.9278, "step": 259470 }, { "epoch": 0.5241660168796487, "grad_norm": 201.4093475341797, "learning_rate": 5.5406908215674306e-06, "loss": 14.75, "step": 259480 }, { "epoch": 0.5241862175123325, "grad_norm": 244.89747619628906, "learning_rate": 5.5403438013655575e-06, "loss": 17.7876, "step": 259490 }, { "epoch": 0.5242064181450163, "grad_norm": 192.8192901611328, "learning_rate": 5.539996778530114e-06, "loss": 24.9327, "step": 259500 }, { "epoch": 0.5242266187777002, "grad_norm": 958.9153442382812, "learning_rate": 5.539649753062795e-06, "loss": 23.6456, "step": 259510 }, { "epoch": 0.524246819410384, "grad_norm": 782.2985229492188, "learning_rate": 5.5393027249652844e-06, "loss": 29.5384, "step": 259520 }, { "epoch": 0.5242670200430678, "grad_norm": 467.1959533691406, "learning_rate": 5.5389556942392794e-06, "loss": 24.0411, "step": 259530 }, { "epoch": 0.5242872206757516, "grad_norm": 102.59488677978516, "learning_rate": 5.538608660886471e-06, "loss": 14.9539, "step": 259540 }, { "epoch": 0.5243074213084353, "grad_norm": 43.54492950439453, "learning_rate": 5.5382616249085476e-06, "loss": 23.5561, "step": 259550 }, { "epoch": 0.5243276219411191, "grad_norm": 133.77264404296875, "learning_rate": 5.537914586307204e-06, "loss": 21.1789, "step": 259560 }, { "epoch": 0.524347822573803, "grad_norm": 668.717041015625, "learning_rate": 5.537567545084127e-06, "loss": 23.9103, "step": 259570 }, { "epoch": 0.5243680232064868, "grad_norm": 197.05282592773438, "learning_rate": 5.537220501241014e-06, "loss": 12.083, "step": 259580 }, { "epoch": 0.5243882238391706, "grad_norm": 219.2777862548828, "learning_rate": 5.536873454779552e-06, "loss": 20.6043, "step": 259590 }, { "epoch": 0.5244084244718544, "grad_norm": 630.7650756835938, "learning_rate": 5.536526405701433e-06, "loss": 14.3581, "step": 259600 }, { "epoch": 0.5244286251045382, "grad_norm": 358.7485046386719, "learning_rate": 5.536179354008351e-06, "loss": 7.4995, "step": 259610 }, { "epoch": 0.5244488257372221, "grad_norm": 27.89251708984375, "learning_rate": 5.5358322997019955e-06, "loss": 10.0689, "step": 259620 }, { "epoch": 0.5244690263699059, "grad_norm": 708.9737548828125, "learning_rate": 5.535485242784059e-06, "loss": 8.1251, "step": 259630 }, { "epoch": 0.5244892270025897, "grad_norm": 477.3925476074219, "learning_rate": 5.5351381832562316e-06, "loss": 30.8436, "step": 259640 }, { "epoch": 0.5245094276352735, "grad_norm": 79.54764556884766, "learning_rate": 5.534791121120205e-06, "loss": 34.1224, "step": 259650 }, { "epoch": 0.5245296282679573, "grad_norm": 58.21992492675781, "learning_rate": 5.534444056377671e-06, "loss": 34.8295, "step": 259660 }, { "epoch": 0.5245498289006412, "grad_norm": 249.1851806640625, "learning_rate": 5.534096989030324e-06, "loss": 20.0038, "step": 259670 }, { "epoch": 0.524570029533325, "grad_norm": 430.1670227050781, "learning_rate": 5.53374991907985e-06, "loss": 20.2447, "step": 259680 }, { "epoch": 0.5245902301660088, "grad_norm": 979.83203125, "learning_rate": 5.533402846527947e-06, "loss": 24.442, "step": 259690 }, { "epoch": 0.5246104307986926, "grad_norm": 292.3692321777344, "learning_rate": 5.5330557713763e-06, "loss": 10.8437, "step": 259700 }, { "epoch": 0.5246306314313764, "grad_norm": 372.9408874511719, "learning_rate": 5.532708693626605e-06, "loss": 19.4395, "step": 259710 }, { "epoch": 0.5246508320640603, "grad_norm": 218.0306396484375, "learning_rate": 5.5323616132805536e-06, "loss": 14.077, "step": 259720 }, { "epoch": 0.5246710326967441, "grad_norm": 14.43504810333252, "learning_rate": 5.532014530339834e-06, "loss": 35.8548, "step": 259730 }, { "epoch": 0.5246912333294279, "grad_norm": 429.4441223144531, "learning_rate": 5.531667444806142e-06, "loss": 17.55, "step": 259740 }, { "epoch": 0.5247114339621117, "grad_norm": 744.641845703125, "learning_rate": 5.5313203566811666e-06, "loss": 11.5287, "step": 259750 }, { "epoch": 0.5247316345947955, "grad_norm": 42.671661376953125, "learning_rate": 5.5309732659666e-06, "loss": 13.2541, "step": 259760 }, { "epoch": 0.5247518352274794, "grad_norm": 256.8294677734375, "learning_rate": 5.530626172664135e-06, "loss": 17.0344, "step": 259770 }, { "epoch": 0.5247720358601632, "grad_norm": 190.9846649169922, "learning_rate": 5.530279076775461e-06, "loss": 22.5209, "step": 259780 }, { "epoch": 0.524792236492847, "grad_norm": 273.3391418457031, "learning_rate": 5.529931978302272e-06, "loss": 21.7716, "step": 259790 }, { "epoch": 0.5248124371255307, "grad_norm": 238.27056884765625, "learning_rate": 5.52958487724626e-06, "loss": 22.222, "step": 259800 }, { "epoch": 0.5248326377582145, "grad_norm": 543.8564453125, "learning_rate": 5.529237773609114e-06, "loss": 20.6765, "step": 259810 }, { "epoch": 0.5248528383908984, "grad_norm": 168.21966552734375, "learning_rate": 5.528890667392527e-06, "loss": 48.8529, "step": 259820 }, { "epoch": 0.5248730390235822, "grad_norm": 560.0949096679688, "learning_rate": 5.528543558598193e-06, "loss": 15.7487, "step": 259830 }, { "epoch": 0.524893239656266, "grad_norm": 247.71060180664062, "learning_rate": 5.528196447227798e-06, "loss": 17.572, "step": 259840 }, { "epoch": 0.5249134402889498, "grad_norm": 71.76203918457031, "learning_rate": 5.527849333283042e-06, "loss": 24.7157, "step": 259850 }, { "epoch": 0.5249336409216336, "grad_norm": 907.2033081054688, "learning_rate": 5.527502216765609e-06, "loss": 17.4662, "step": 259860 }, { "epoch": 0.5249538415543175, "grad_norm": 84.12830352783203, "learning_rate": 5.527155097677196e-06, "loss": 19.072, "step": 259870 }, { "epoch": 0.5249740421870013, "grad_norm": 338.2107849121094, "learning_rate": 5.526807976019492e-06, "loss": 19.2565, "step": 259880 }, { "epoch": 0.5249942428196851, "grad_norm": 615.3682250976562, "learning_rate": 5.526460851794191e-06, "loss": 21.7621, "step": 259890 }, { "epoch": 0.5250144434523689, "grad_norm": 32.63579559326172, "learning_rate": 5.526113725002984e-06, "loss": 11.3412, "step": 259900 }, { "epoch": 0.5250346440850527, "grad_norm": 265.86065673828125, "learning_rate": 5.525766595647561e-06, "loss": 15.7407, "step": 259910 }, { "epoch": 0.5250548447177366, "grad_norm": 41.338722229003906, "learning_rate": 5.525419463729615e-06, "loss": 15.5702, "step": 259920 }, { "epoch": 0.5250750453504204, "grad_norm": 880.5347290039062, "learning_rate": 5.525072329250839e-06, "loss": 11.2259, "step": 259930 }, { "epoch": 0.5250952459831042, "grad_norm": 801.6827392578125, "learning_rate": 5.524725192212924e-06, "loss": 27.9179, "step": 259940 }, { "epoch": 0.525115446615788, "grad_norm": 504.3584289550781, "learning_rate": 5.524378052617563e-06, "loss": 18.1123, "step": 259950 }, { "epoch": 0.5251356472484718, "grad_norm": 2.5646278858184814, "learning_rate": 5.524030910466447e-06, "loss": 19.4226, "step": 259960 }, { "epoch": 0.5251558478811557, "grad_norm": 334.9571838378906, "learning_rate": 5.523683765761266e-06, "loss": 19.0106, "step": 259970 }, { "epoch": 0.5251760485138395, "grad_norm": 253.3269805908203, "learning_rate": 5.523336618503715e-06, "loss": 22.0745, "step": 259980 }, { "epoch": 0.5251962491465233, "grad_norm": 360.09344482421875, "learning_rate": 5.522989468695487e-06, "loss": 28.4839, "step": 259990 }, { "epoch": 0.5252164497792071, "grad_norm": 324.1053771972656, "learning_rate": 5.522642316338268e-06, "loss": 19.7486, "step": 260000 }, { "epoch": 0.5252366504118909, "grad_norm": 876.195556640625, "learning_rate": 5.5222951614337564e-06, "loss": 26.3472, "step": 260010 }, { "epoch": 0.5252568510445748, "grad_norm": 337.7935791015625, "learning_rate": 5.521948003983639e-06, "loss": 12.4376, "step": 260020 }, { "epoch": 0.5252770516772586, "grad_norm": 852.6124267578125, "learning_rate": 5.521600843989613e-06, "loss": 22.2194, "step": 260030 }, { "epoch": 0.5252972523099424, "grad_norm": 80.01764678955078, "learning_rate": 5.521253681453366e-06, "loss": 20.5192, "step": 260040 }, { "epoch": 0.5253174529426262, "grad_norm": 462.2069091796875, "learning_rate": 5.520906516376592e-06, "loss": 10.2419, "step": 260050 }, { "epoch": 0.5253376535753099, "grad_norm": 166.80796813964844, "learning_rate": 5.520559348760984e-06, "loss": 25.0185, "step": 260060 }, { "epoch": 0.5253578542079937, "grad_norm": 49.324462890625, "learning_rate": 5.520212178608231e-06, "loss": 7.3639, "step": 260070 }, { "epoch": 0.5253780548406776, "grad_norm": 80.81123352050781, "learning_rate": 5.519865005920029e-06, "loss": 11.0352, "step": 260080 }, { "epoch": 0.5253982554733614, "grad_norm": 341.9402770996094, "learning_rate": 5.519517830698067e-06, "loss": 13.5035, "step": 260090 }, { "epoch": 0.5254184561060452, "grad_norm": 532.9110107421875, "learning_rate": 5.519170652944037e-06, "loss": 36.718, "step": 260100 }, { "epoch": 0.525438656738729, "grad_norm": 636.3525390625, "learning_rate": 5.518823472659634e-06, "loss": 20.4944, "step": 260110 }, { "epoch": 0.5254588573714128, "grad_norm": 248.55691528320312, "learning_rate": 5.518476289846548e-06, "loss": 15.9055, "step": 260120 }, { "epoch": 0.5254790580040967, "grad_norm": 428.7301025390625, "learning_rate": 5.518129104506471e-06, "loss": 18.6148, "step": 260130 }, { "epoch": 0.5254992586367805, "grad_norm": 366.0169372558594, "learning_rate": 5.5177819166410955e-06, "loss": 16.0438, "step": 260140 }, { "epoch": 0.5255194592694643, "grad_norm": 411.673583984375, "learning_rate": 5.517434726252113e-06, "loss": 11.1201, "step": 260150 }, { "epoch": 0.5255396599021481, "grad_norm": 130.8328094482422, "learning_rate": 5.5170875333412176e-06, "loss": 38.1429, "step": 260160 }, { "epoch": 0.525559860534832, "grad_norm": 114.20699310302734, "learning_rate": 5.516740337910101e-06, "loss": 8.5824, "step": 260170 }, { "epoch": 0.5255800611675158, "grad_norm": 404.4974060058594, "learning_rate": 5.516393139960452e-06, "loss": 20.9042, "step": 260180 }, { "epoch": 0.5256002618001996, "grad_norm": 707.0994262695312, "learning_rate": 5.516045939493968e-06, "loss": 25.061, "step": 260190 }, { "epoch": 0.5256204624328834, "grad_norm": 922.6636352539062, "learning_rate": 5.515698736512337e-06, "loss": 31.5711, "step": 260200 }, { "epoch": 0.5256406630655672, "grad_norm": 298.8223876953125, "learning_rate": 5.515351531017254e-06, "loss": 23.908, "step": 260210 }, { "epoch": 0.525660863698251, "grad_norm": 406.9949645996094, "learning_rate": 5.51500432301041e-06, "loss": 34.995, "step": 260220 }, { "epoch": 0.5256810643309349, "grad_norm": 162.56687927246094, "learning_rate": 5.514657112493497e-06, "loss": 19.2722, "step": 260230 }, { "epoch": 0.5257012649636187, "grad_norm": 178.6824951171875, "learning_rate": 5.514309899468209e-06, "loss": 25.4962, "step": 260240 }, { "epoch": 0.5257214655963025, "grad_norm": 518.660400390625, "learning_rate": 5.513962683936235e-06, "loss": 17.0572, "step": 260250 }, { "epoch": 0.5257416662289863, "grad_norm": 370.029296875, "learning_rate": 5.51361546589927e-06, "loss": 26.6064, "step": 260260 }, { "epoch": 0.5257618668616701, "grad_norm": 435.8376770019531, "learning_rate": 5.513268245359005e-06, "loss": 15.1549, "step": 260270 }, { "epoch": 0.525782067494354, "grad_norm": 313.71844482421875, "learning_rate": 5.512921022317135e-06, "loss": 31.2322, "step": 260280 }, { "epoch": 0.5258022681270378, "grad_norm": 409.15087890625, "learning_rate": 5.512573796775347e-06, "loss": 23.1105, "step": 260290 }, { "epoch": 0.5258224687597216, "grad_norm": 168.183837890625, "learning_rate": 5.512226568735338e-06, "loss": 22.3686, "step": 260300 }, { "epoch": 0.5258426693924053, "grad_norm": 538.6890869140625, "learning_rate": 5.5118793381987985e-06, "loss": 22.7936, "step": 260310 }, { "epoch": 0.5258628700250891, "grad_norm": 240.4720916748047, "learning_rate": 5.511532105167422e-06, "loss": 12.3813, "step": 260320 }, { "epoch": 0.525883070657773, "grad_norm": 488.4220275878906, "learning_rate": 5.5111848696429005e-06, "loss": 6.9798, "step": 260330 }, { "epoch": 0.5259032712904568, "grad_norm": 302.3664855957031, "learning_rate": 5.510837631626923e-06, "loss": 14.1341, "step": 260340 }, { "epoch": 0.5259234719231406, "grad_norm": 546.3446655273438, "learning_rate": 5.510490391121188e-06, "loss": 18.4073, "step": 260350 }, { "epoch": 0.5259436725558244, "grad_norm": 316.3846740722656, "learning_rate": 5.510143148127384e-06, "loss": 13.9157, "step": 260360 }, { "epoch": 0.5259638731885082, "grad_norm": 372.5844421386719, "learning_rate": 5.509795902647203e-06, "loss": 21.7814, "step": 260370 }, { "epoch": 0.525984073821192, "grad_norm": 738.1446533203125, "learning_rate": 5.509448654682339e-06, "loss": 26.4201, "step": 260380 }, { "epoch": 0.5260042744538759, "grad_norm": 143.0221710205078, "learning_rate": 5.509101404234485e-06, "loss": 11.4723, "step": 260390 }, { "epoch": 0.5260244750865597, "grad_norm": 206.6165008544922, "learning_rate": 5.508754151305332e-06, "loss": 23.3635, "step": 260400 }, { "epoch": 0.5260446757192435, "grad_norm": 785.65087890625, "learning_rate": 5.508406895896573e-06, "loss": 21.6582, "step": 260410 }, { "epoch": 0.5260648763519273, "grad_norm": 122.45549011230469, "learning_rate": 5.5080596380099e-06, "loss": 14.9553, "step": 260420 }, { "epoch": 0.5260850769846112, "grad_norm": 499.3188781738281, "learning_rate": 5.507712377647006e-06, "loss": 11.4883, "step": 260430 }, { "epoch": 0.526105277617295, "grad_norm": 263.1232604980469, "learning_rate": 5.507365114809585e-06, "loss": 27.1334, "step": 260440 }, { "epoch": 0.5261254782499788, "grad_norm": 584.4727172851562, "learning_rate": 5.507017849499326e-06, "loss": 14.8529, "step": 260450 }, { "epoch": 0.5261456788826626, "grad_norm": 383.7892761230469, "learning_rate": 5.506670581717925e-06, "loss": 18.6287, "step": 260460 }, { "epoch": 0.5261658795153464, "grad_norm": 479.3489685058594, "learning_rate": 5.506323311467071e-06, "loss": 6.2042, "step": 260470 }, { "epoch": 0.5261860801480303, "grad_norm": 282.7895812988281, "learning_rate": 5.5059760387484595e-06, "loss": 13.2479, "step": 260480 }, { "epoch": 0.5262062807807141, "grad_norm": 495.27392578125, "learning_rate": 5.505628763563783e-06, "loss": 23.3407, "step": 260490 }, { "epoch": 0.5262264814133979, "grad_norm": 222.0509033203125, "learning_rate": 5.505281485914732e-06, "loss": 27.4989, "step": 260500 }, { "epoch": 0.5262466820460817, "grad_norm": 963.912353515625, "learning_rate": 5.504934205803002e-06, "loss": 25.8154, "step": 260510 }, { "epoch": 0.5262668826787655, "grad_norm": 527.7839965820312, "learning_rate": 5.504586923230283e-06, "loss": 13.4463, "step": 260520 }, { "epoch": 0.5262870833114494, "grad_norm": 256.66802978515625, "learning_rate": 5.504239638198267e-06, "loss": 21.2495, "step": 260530 }, { "epoch": 0.5263072839441332, "grad_norm": 241.9513397216797, "learning_rate": 5.503892350708651e-06, "loss": 17.1914, "step": 260540 }, { "epoch": 0.526327484576817, "grad_norm": 326.9222106933594, "learning_rate": 5.503545060763123e-06, "loss": 34.5534, "step": 260550 }, { "epoch": 0.5263476852095008, "grad_norm": 357.99700927734375, "learning_rate": 5.503197768363378e-06, "loss": 14.2177, "step": 260560 }, { "epoch": 0.5263678858421845, "grad_norm": 246.52980041503906, "learning_rate": 5.502850473511108e-06, "loss": 12.4622, "step": 260570 }, { "epoch": 0.5263880864748683, "grad_norm": 480.41668701171875, "learning_rate": 5.502503176208006e-06, "loss": 25.3514, "step": 260580 }, { "epoch": 0.5264082871075522, "grad_norm": 269.68865966796875, "learning_rate": 5.502155876455764e-06, "loss": 25.3879, "step": 260590 }, { "epoch": 0.526428487740236, "grad_norm": 408.2362060546875, "learning_rate": 5.5018085742560745e-06, "loss": 16.3543, "step": 260600 }, { "epoch": 0.5264486883729198, "grad_norm": 249.12245178222656, "learning_rate": 5.501461269610632e-06, "loss": 13.5725, "step": 260610 }, { "epoch": 0.5264688890056036, "grad_norm": 438.4444580078125, "learning_rate": 5.501113962521129e-06, "loss": 20.8684, "step": 260620 }, { "epoch": 0.5264890896382874, "grad_norm": 0.0, "learning_rate": 5.5007666529892545e-06, "loss": 24.0455, "step": 260630 }, { "epoch": 0.5265092902709713, "grad_norm": 277.3448181152344, "learning_rate": 5.500419341016707e-06, "loss": 14.3256, "step": 260640 }, { "epoch": 0.5265294909036551, "grad_norm": 425.6750793457031, "learning_rate": 5.500072026605175e-06, "loss": 21.7922, "step": 260650 }, { "epoch": 0.5265496915363389, "grad_norm": 398.42236328125, "learning_rate": 5.499724709756352e-06, "loss": 12.7472, "step": 260660 }, { "epoch": 0.5265698921690227, "grad_norm": 651.5570068359375, "learning_rate": 5.499377390471933e-06, "loss": 27.2713, "step": 260670 }, { "epoch": 0.5265900928017065, "grad_norm": 364.0670166015625, "learning_rate": 5.4990300687536065e-06, "loss": 12.4383, "step": 260680 }, { "epoch": 0.5266102934343904, "grad_norm": 425.282470703125, "learning_rate": 5.498682744603071e-06, "loss": 13.0307, "step": 260690 }, { "epoch": 0.5266304940670742, "grad_norm": 311.4023132324219, "learning_rate": 5.498335418022015e-06, "loss": 20.9397, "step": 260700 }, { "epoch": 0.526650694699758, "grad_norm": 159.4119415283203, "learning_rate": 5.497988089012132e-06, "loss": 15.087, "step": 260710 }, { "epoch": 0.5266708953324418, "grad_norm": 601.800048828125, "learning_rate": 5.497640757575116e-06, "loss": 26.5291, "step": 260720 }, { "epoch": 0.5266910959651256, "grad_norm": 203.097900390625, "learning_rate": 5.497293423712661e-06, "loss": 24.5891, "step": 260730 }, { "epoch": 0.5267112965978095, "grad_norm": 162.74554443359375, "learning_rate": 5.4969460874264555e-06, "loss": 8.3716, "step": 260740 }, { "epoch": 0.5267314972304933, "grad_norm": 526.2352294921875, "learning_rate": 5.496598748718196e-06, "loss": 14.3457, "step": 260750 }, { "epoch": 0.5267516978631771, "grad_norm": 764.6087036132812, "learning_rate": 5.4962514075895746e-06, "loss": 20.3772, "step": 260760 }, { "epoch": 0.5267718984958609, "grad_norm": 541.0398559570312, "learning_rate": 5.4959040640422836e-06, "loss": 22.6953, "step": 260770 }, { "epoch": 0.5267920991285447, "grad_norm": 414.8677978515625, "learning_rate": 5.495556718078017e-06, "loss": 25.4641, "step": 260780 }, { "epoch": 0.5268122997612286, "grad_norm": 350.560791015625, "learning_rate": 5.495209369698466e-06, "loss": 16.316, "step": 260790 }, { "epoch": 0.5268325003939124, "grad_norm": 316.64569091796875, "learning_rate": 5.4948620189053255e-06, "loss": 17.0801, "step": 260800 }, { "epoch": 0.5268527010265962, "grad_norm": 488.3839111328125, "learning_rate": 5.494514665700288e-06, "loss": 12.9622, "step": 260810 }, { "epoch": 0.52687290165928, "grad_norm": 972.8881225585938, "learning_rate": 5.494167310085045e-06, "loss": 28.7659, "step": 260820 }, { "epoch": 0.5268931022919637, "grad_norm": 395.9073181152344, "learning_rate": 5.49381995206129e-06, "loss": 56.1014, "step": 260830 }, { "epoch": 0.5269133029246476, "grad_norm": 301.20037841796875, "learning_rate": 5.493472591630717e-06, "loss": 16.386, "step": 260840 }, { "epoch": 0.5269335035573314, "grad_norm": 263.2437744140625, "learning_rate": 5.49312522879502e-06, "loss": 24.0984, "step": 260850 }, { "epoch": 0.5269537041900152, "grad_norm": 208.31399536132812, "learning_rate": 5.492777863555889e-06, "loss": 20.7022, "step": 260860 }, { "epoch": 0.526973904822699, "grad_norm": 1261.545654296875, "learning_rate": 5.492430495915018e-06, "loss": 18.0993, "step": 260870 }, { "epoch": 0.5269941054553828, "grad_norm": 612.3999633789062, "learning_rate": 5.4920831258741016e-06, "loss": 18.8504, "step": 260880 }, { "epoch": 0.5270143060880667, "grad_norm": 126.61519622802734, "learning_rate": 5.491735753434832e-06, "loss": 18.0525, "step": 260890 }, { "epoch": 0.5270345067207505, "grad_norm": 538.5910034179688, "learning_rate": 5.491388378598899e-06, "loss": 14.6739, "step": 260900 }, { "epoch": 0.5270547073534343, "grad_norm": 222.38272094726562, "learning_rate": 5.4910410013680015e-06, "loss": 27.0445, "step": 260910 }, { "epoch": 0.5270749079861181, "grad_norm": 426.7323303222656, "learning_rate": 5.490693621743829e-06, "loss": 22.8191, "step": 260920 }, { "epoch": 0.5270951086188019, "grad_norm": 346.5004577636719, "learning_rate": 5.490346239728076e-06, "loss": 29.5733, "step": 260930 }, { "epoch": 0.5271153092514858, "grad_norm": 237.61834716796875, "learning_rate": 5.489998855322435e-06, "loss": 15.4577, "step": 260940 }, { "epoch": 0.5271355098841696, "grad_norm": 492.9292907714844, "learning_rate": 5.489651468528596e-06, "loss": 17.7796, "step": 260950 }, { "epoch": 0.5271557105168534, "grad_norm": 382.273681640625, "learning_rate": 5.489304079348259e-06, "loss": 17.7072, "step": 260960 }, { "epoch": 0.5271759111495372, "grad_norm": 1783.85107421875, "learning_rate": 5.488956687783111e-06, "loss": 35.1095, "step": 260970 }, { "epoch": 0.527196111782221, "grad_norm": 1896.2325439453125, "learning_rate": 5.4886092938348475e-06, "loss": 25.3209, "step": 260980 }, { "epoch": 0.5272163124149049, "grad_norm": 309.3804931640625, "learning_rate": 5.488261897505163e-06, "loss": 12.4335, "step": 260990 }, { "epoch": 0.5272365130475887, "grad_norm": 187.5780487060547, "learning_rate": 5.487914498795748e-06, "loss": 22.6779, "step": 261000 }, { "epoch": 0.5272567136802725, "grad_norm": 206.28851318359375, "learning_rate": 5.487567097708298e-06, "loss": 8.0016, "step": 261010 }, { "epoch": 0.5272769143129563, "grad_norm": 516.361572265625, "learning_rate": 5.487219694244505e-06, "loss": 25.4137, "step": 261020 }, { "epoch": 0.5272971149456401, "grad_norm": 166.94139099121094, "learning_rate": 5.48687228840606e-06, "loss": 21.2136, "step": 261030 }, { "epoch": 0.527317315578324, "grad_norm": 1025.854736328125, "learning_rate": 5.48652488019466e-06, "loss": 45.8827, "step": 261040 }, { "epoch": 0.5273375162110078, "grad_norm": 244.59588623046875, "learning_rate": 5.486177469611999e-06, "loss": 21.5262, "step": 261050 }, { "epoch": 0.5273577168436916, "grad_norm": 22.894620895385742, "learning_rate": 5.485830056659763e-06, "loss": 19.5141, "step": 261060 }, { "epoch": 0.5273779174763754, "grad_norm": 74.53453826904297, "learning_rate": 5.4854826413396546e-06, "loss": 8.8525, "step": 261070 }, { "epoch": 0.5273981181090591, "grad_norm": 1197.507080078125, "learning_rate": 5.485135223653362e-06, "loss": 25.0771, "step": 261080 }, { "epoch": 0.5274183187417429, "grad_norm": 365.26715087890625, "learning_rate": 5.484787803602577e-06, "loss": 18.7501, "step": 261090 }, { "epoch": 0.5274385193744268, "grad_norm": 415.0979309082031, "learning_rate": 5.484440381188997e-06, "loss": 22.1903, "step": 261100 }, { "epoch": 0.5274587200071106, "grad_norm": 46.778907775878906, "learning_rate": 5.484092956414312e-06, "loss": 16.4155, "step": 261110 }, { "epoch": 0.5274789206397944, "grad_norm": 499.7535400390625, "learning_rate": 5.483745529280219e-06, "loss": 18.5447, "step": 261120 }, { "epoch": 0.5274991212724782, "grad_norm": 426.1343994140625, "learning_rate": 5.4833980997884054e-06, "loss": 12.4086, "step": 261130 }, { "epoch": 0.527519321905162, "grad_norm": 275.7474060058594, "learning_rate": 5.483050667940571e-06, "loss": 15.0397, "step": 261140 }, { "epoch": 0.5275395225378459, "grad_norm": 797.1040649414062, "learning_rate": 5.482703233738405e-06, "loss": 31.8337, "step": 261150 }, { "epoch": 0.5275597231705297, "grad_norm": 308.7997131347656, "learning_rate": 5.482355797183602e-06, "loss": 37.3068, "step": 261160 }, { "epoch": 0.5275799238032135, "grad_norm": 614.94189453125, "learning_rate": 5.482008358277855e-06, "loss": 22.907, "step": 261170 }, { "epoch": 0.5276001244358973, "grad_norm": 459.3724060058594, "learning_rate": 5.48166091702286e-06, "loss": 18.8272, "step": 261180 }, { "epoch": 0.5276203250685811, "grad_norm": 352.71551513671875, "learning_rate": 5.481313473420306e-06, "loss": 28.507, "step": 261190 }, { "epoch": 0.527640525701265, "grad_norm": 658.2681274414062, "learning_rate": 5.480966027471889e-06, "loss": 19.0675, "step": 261200 }, { "epoch": 0.5276607263339488, "grad_norm": 430.00823974609375, "learning_rate": 5.480618579179301e-06, "loss": 29.2889, "step": 261210 }, { "epoch": 0.5276809269666326, "grad_norm": 448.3904113769531, "learning_rate": 5.4802711285442375e-06, "loss": 17.5373, "step": 261220 }, { "epoch": 0.5277011275993164, "grad_norm": 440.13897705078125, "learning_rate": 5.4799236755683916e-06, "loss": 28.9601, "step": 261230 }, { "epoch": 0.5277213282320002, "grad_norm": 285.37274169921875, "learning_rate": 5.479576220253453e-06, "loss": 20.8815, "step": 261240 }, { "epoch": 0.5277415288646841, "grad_norm": 361.54547119140625, "learning_rate": 5.4792287626011206e-06, "loss": 25.0595, "step": 261250 }, { "epoch": 0.5277617294973679, "grad_norm": 581.8213500976562, "learning_rate": 5.478881302613085e-06, "loss": 13.6178, "step": 261260 }, { "epoch": 0.5277819301300517, "grad_norm": 423.68377685546875, "learning_rate": 5.478533840291039e-06, "loss": 24.4431, "step": 261270 }, { "epoch": 0.5278021307627355, "grad_norm": 391.1540222167969, "learning_rate": 5.478186375636678e-06, "loss": 23.6289, "step": 261280 }, { "epoch": 0.5278223313954193, "grad_norm": 696.769287109375, "learning_rate": 5.477838908651694e-06, "loss": 26.5889, "step": 261290 }, { "epoch": 0.5278425320281032, "grad_norm": 509.2430725097656, "learning_rate": 5.477491439337782e-06, "loss": 17.4735, "step": 261300 }, { "epoch": 0.527862732660787, "grad_norm": 263.2110290527344, "learning_rate": 5.477143967696634e-06, "loss": 12.5045, "step": 261310 }, { "epoch": 0.5278829332934708, "grad_norm": 552.67626953125, "learning_rate": 5.476796493729943e-06, "loss": 25.14, "step": 261320 }, { "epoch": 0.5279031339261546, "grad_norm": 209.8045196533203, "learning_rate": 5.476449017439406e-06, "loss": 25.4521, "step": 261330 }, { "epoch": 0.5279233345588383, "grad_norm": 316.1282653808594, "learning_rate": 5.476101538826714e-06, "loss": 18.1899, "step": 261340 }, { "epoch": 0.5279435351915222, "grad_norm": 735.6625366210938, "learning_rate": 5.4757540578935595e-06, "loss": 17.8253, "step": 261350 }, { "epoch": 0.527963735824206, "grad_norm": 435.79742431640625, "learning_rate": 5.475406574641637e-06, "loss": 32.5203, "step": 261360 }, { "epoch": 0.5279839364568898, "grad_norm": 236.00254821777344, "learning_rate": 5.475059089072642e-06, "loss": 25.0577, "step": 261370 }, { "epoch": 0.5280041370895736, "grad_norm": 498.2556457519531, "learning_rate": 5.474711601188266e-06, "loss": 20.142, "step": 261380 }, { "epoch": 0.5280243377222574, "grad_norm": 131.3717803955078, "learning_rate": 5.4743641109902045e-06, "loss": 16.5378, "step": 261390 }, { "epoch": 0.5280445383549413, "grad_norm": 601.6570434570312, "learning_rate": 5.474016618480147e-06, "loss": 12.4973, "step": 261400 }, { "epoch": 0.5280647389876251, "grad_norm": 404.3240051269531, "learning_rate": 5.473669123659793e-06, "loss": 27.5379, "step": 261410 }, { "epoch": 0.5280849396203089, "grad_norm": 400.5625, "learning_rate": 5.4733216265308305e-06, "loss": 8.8741, "step": 261420 }, { "epoch": 0.5281051402529927, "grad_norm": 200.19439697265625, "learning_rate": 5.472974127094957e-06, "loss": 21.7069, "step": 261430 }, { "epoch": 0.5281253408856765, "grad_norm": 34.03194808959961, "learning_rate": 5.472626625353865e-06, "loss": 25.374, "step": 261440 }, { "epoch": 0.5281455415183604, "grad_norm": 265.70245361328125, "learning_rate": 5.472279121309248e-06, "loss": 13.8763, "step": 261450 }, { "epoch": 0.5281657421510442, "grad_norm": 331.0539245605469, "learning_rate": 5.471931614962802e-06, "loss": 17.9479, "step": 261460 }, { "epoch": 0.528185942783728, "grad_norm": 254.81101989746094, "learning_rate": 5.471584106316216e-06, "loss": 12.9582, "step": 261470 }, { "epoch": 0.5282061434164118, "grad_norm": 425.1376647949219, "learning_rate": 5.471236595371187e-06, "loss": 17.6356, "step": 261480 }, { "epoch": 0.5282263440490956, "grad_norm": 202.1089324951172, "learning_rate": 5.470889082129407e-06, "loss": 27.5508, "step": 261490 }, { "epoch": 0.5282465446817795, "grad_norm": 197.38787841796875, "learning_rate": 5.470541566592573e-06, "loss": 11.6215, "step": 261500 }, { "epoch": 0.5282667453144633, "grad_norm": 307.8194580078125, "learning_rate": 5.470194048762374e-06, "loss": 11.7671, "step": 261510 }, { "epoch": 0.5282869459471471, "grad_norm": 186.99232482910156, "learning_rate": 5.469846528640508e-06, "loss": 7.1621, "step": 261520 }, { "epoch": 0.5283071465798309, "grad_norm": 593.8322143554688, "learning_rate": 5.469499006228666e-06, "loss": 16.4775, "step": 261530 }, { "epoch": 0.5283273472125147, "grad_norm": 398.6197814941406, "learning_rate": 5.469151481528543e-06, "loss": 17.6969, "step": 261540 }, { "epoch": 0.5283475478451986, "grad_norm": 964.5626220703125, "learning_rate": 5.468803954541834e-06, "loss": 36.3001, "step": 261550 }, { "epoch": 0.5283677484778824, "grad_norm": 590.163330078125, "learning_rate": 5.468456425270229e-06, "loss": 36.6744, "step": 261560 }, { "epoch": 0.5283879491105662, "grad_norm": 377.348388671875, "learning_rate": 5.468108893715426e-06, "loss": 20.7374, "step": 261570 }, { "epoch": 0.52840814974325, "grad_norm": 678.54541015625, "learning_rate": 5.467761359879116e-06, "loss": 18.3416, "step": 261580 }, { "epoch": 0.5284283503759337, "grad_norm": 401.6737365722656, "learning_rate": 5.467413823762994e-06, "loss": 16.5677, "step": 261590 }, { "epoch": 0.5284485510086175, "grad_norm": 260.65948486328125, "learning_rate": 5.467066285368754e-06, "loss": 28.8671, "step": 261600 }, { "epoch": 0.5284687516413014, "grad_norm": 184.55581665039062, "learning_rate": 5.466718744698089e-06, "loss": 12.4206, "step": 261610 }, { "epoch": 0.5284889522739852, "grad_norm": 484.190185546875, "learning_rate": 5.4663712017526946e-06, "loss": 21.1198, "step": 261620 }, { "epoch": 0.528509152906669, "grad_norm": 331.16400146484375, "learning_rate": 5.466023656534263e-06, "loss": 30.1253, "step": 261630 }, { "epoch": 0.5285293535393528, "grad_norm": 388.0387268066406, "learning_rate": 5.4656761090444875e-06, "loss": 11.3414, "step": 261640 }, { "epoch": 0.5285495541720366, "grad_norm": 256.4833679199219, "learning_rate": 5.465328559285064e-06, "loss": 16.1876, "step": 261650 }, { "epoch": 0.5285697548047205, "grad_norm": 202.82069396972656, "learning_rate": 5.464981007257686e-06, "loss": 13.6507, "step": 261660 }, { "epoch": 0.5285899554374043, "grad_norm": 369.3669738769531, "learning_rate": 5.4646334529640454e-06, "loss": 21.4978, "step": 261670 }, { "epoch": 0.5286101560700881, "grad_norm": 597.6946411132812, "learning_rate": 5.46428589640584e-06, "loss": 47.2797, "step": 261680 }, { "epoch": 0.5286303567027719, "grad_norm": 237.5253448486328, "learning_rate": 5.46393833758476e-06, "loss": 22.3166, "step": 261690 }, { "epoch": 0.5286505573354557, "grad_norm": 517.5831909179688, "learning_rate": 5.463590776502501e-06, "loss": 10.3808, "step": 261700 }, { "epoch": 0.5286707579681396, "grad_norm": 714.017333984375, "learning_rate": 5.463243213160758e-06, "loss": 24.9592, "step": 261710 }, { "epoch": 0.5286909586008234, "grad_norm": 165.38265991210938, "learning_rate": 5.462895647561222e-06, "loss": 11.7884, "step": 261720 }, { "epoch": 0.5287111592335072, "grad_norm": 323.59521484375, "learning_rate": 5.46254807970559e-06, "loss": 20.3679, "step": 261730 }, { "epoch": 0.528731359866191, "grad_norm": 516.4874267578125, "learning_rate": 5.462200509595553e-06, "loss": 14.5833, "step": 261740 }, { "epoch": 0.5287515604988748, "grad_norm": 424.514892578125, "learning_rate": 5.461852937232809e-06, "loss": 22.5686, "step": 261750 }, { "epoch": 0.5287717611315587, "grad_norm": 448.6566467285156, "learning_rate": 5.461505362619048e-06, "loss": 14.5051, "step": 261760 }, { "epoch": 0.5287919617642425, "grad_norm": 690.8274536132812, "learning_rate": 5.4611577857559676e-06, "loss": 24.0594, "step": 261770 }, { "epoch": 0.5288121623969263, "grad_norm": 214.0674591064453, "learning_rate": 5.460810206645258e-06, "loss": 9.9378, "step": 261780 }, { "epoch": 0.5288323630296101, "grad_norm": 643.51513671875, "learning_rate": 5.460462625288617e-06, "loss": 25.3458, "step": 261790 }, { "epoch": 0.5288525636622939, "grad_norm": 1006.0845336914062, "learning_rate": 5.460115041687737e-06, "loss": 22.4337, "step": 261800 }, { "epoch": 0.5288727642949778, "grad_norm": 6.621699333190918, "learning_rate": 5.4597674558443114e-06, "loss": 27.015, "step": 261810 }, { "epoch": 0.5288929649276616, "grad_norm": 489.2602844238281, "learning_rate": 5.459419867760034e-06, "loss": 21.4121, "step": 261820 }, { "epoch": 0.5289131655603454, "grad_norm": 0.0008042035042308271, "learning_rate": 5.4590722774366015e-06, "loss": 21.7596, "step": 261830 }, { "epoch": 0.5289333661930292, "grad_norm": 517.4910888671875, "learning_rate": 5.458724684875707e-06, "loss": 23.4646, "step": 261840 }, { "epoch": 0.5289535668257129, "grad_norm": 464.22216796875, "learning_rate": 5.4583770900790415e-06, "loss": 20.7891, "step": 261850 }, { "epoch": 0.5289737674583967, "grad_norm": 373.5762023925781, "learning_rate": 5.458029493048303e-06, "loss": 12.5113, "step": 261860 }, { "epoch": 0.5289939680910806, "grad_norm": 287.6373596191406, "learning_rate": 5.457681893785185e-06, "loss": 15.555, "step": 261870 }, { "epoch": 0.5290141687237644, "grad_norm": 267.95806884765625, "learning_rate": 5.45733429229138e-06, "loss": 12.9552, "step": 261880 }, { "epoch": 0.5290343693564482, "grad_norm": 231.57301330566406, "learning_rate": 5.456986688568584e-06, "loss": 23.6357, "step": 261890 }, { "epoch": 0.529054569989132, "grad_norm": 631.8321533203125, "learning_rate": 5.456639082618489e-06, "loss": 15.179, "step": 261900 }, { "epoch": 0.5290747706218158, "grad_norm": 250.34884643554688, "learning_rate": 5.456291474442792e-06, "loss": 27.8501, "step": 261910 }, { "epoch": 0.5290949712544997, "grad_norm": 242.3703155517578, "learning_rate": 5.455943864043185e-06, "loss": 14.8319, "step": 261920 }, { "epoch": 0.5291151718871835, "grad_norm": 227.57235717773438, "learning_rate": 5.4555962514213624e-06, "loss": 33.865, "step": 261930 }, { "epoch": 0.5291353725198673, "grad_norm": 166.92816162109375, "learning_rate": 5.4552486365790196e-06, "loss": 13.2772, "step": 261940 }, { "epoch": 0.5291555731525511, "grad_norm": 155.92852783203125, "learning_rate": 5.454901019517851e-06, "loss": 14.4723, "step": 261950 }, { "epoch": 0.529175773785235, "grad_norm": 267.2330017089844, "learning_rate": 5.454553400239548e-06, "loss": 9.4315, "step": 261960 }, { "epoch": 0.5291959744179188, "grad_norm": 326.3690490722656, "learning_rate": 5.454205778745808e-06, "loss": 11.2432, "step": 261970 }, { "epoch": 0.5292161750506026, "grad_norm": 183.72789001464844, "learning_rate": 5.453858155038324e-06, "loss": 15.6313, "step": 261980 }, { "epoch": 0.5292363756832864, "grad_norm": 476.95166015625, "learning_rate": 5.45351052911879e-06, "loss": 23.1629, "step": 261990 }, { "epoch": 0.5292565763159702, "grad_norm": 203.19126892089844, "learning_rate": 5.453162900988902e-06, "loss": 23.9593, "step": 262000 }, { "epoch": 0.529276776948654, "grad_norm": 998.18310546875, "learning_rate": 5.452815270650351e-06, "loss": 27.4498, "step": 262010 }, { "epoch": 0.5292969775813379, "grad_norm": 193.37799072265625, "learning_rate": 5.452467638104834e-06, "loss": 21.1561, "step": 262020 }, { "epoch": 0.5293171782140217, "grad_norm": 324.4336853027344, "learning_rate": 5.452120003354046e-06, "loss": 23.6304, "step": 262030 }, { "epoch": 0.5293373788467055, "grad_norm": 529.0345458984375, "learning_rate": 5.451772366399678e-06, "loss": 31.1582, "step": 262040 }, { "epoch": 0.5293575794793893, "grad_norm": 330.73876953125, "learning_rate": 5.451424727243428e-06, "loss": 19.3234, "step": 262050 }, { "epoch": 0.5293777801120731, "grad_norm": 322.2069091796875, "learning_rate": 5.451077085886987e-06, "loss": 28.1488, "step": 262060 }, { "epoch": 0.529397980744757, "grad_norm": 409.0740051269531, "learning_rate": 5.450729442332052e-06, "loss": 24.9972, "step": 262070 }, { "epoch": 0.5294181813774408, "grad_norm": 347.320068359375, "learning_rate": 5.450381796580317e-06, "loss": 17.1953, "step": 262080 }, { "epoch": 0.5294383820101246, "grad_norm": 373.0260925292969, "learning_rate": 5.450034148633474e-06, "loss": 19.227, "step": 262090 }, { "epoch": 0.5294585826428083, "grad_norm": 339.3687438964844, "learning_rate": 5.449686498493219e-06, "loss": 17.1814, "step": 262100 }, { "epoch": 0.5294787832754921, "grad_norm": 316.0090637207031, "learning_rate": 5.449338846161248e-06, "loss": 14.4609, "step": 262110 }, { "epoch": 0.529498983908176, "grad_norm": 207.0604705810547, "learning_rate": 5.448991191639254e-06, "loss": 13.3553, "step": 262120 }, { "epoch": 0.5295191845408598, "grad_norm": 262.06134033203125, "learning_rate": 5.448643534928931e-06, "loss": 27.3413, "step": 262130 }, { "epoch": 0.5295393851735436, "grad_norm": 409.28448486328125, "learning_rate": 5.448295876031974e-06, "loss": 26.6529, "step": 262140 }, { "epoch": 0.5295595858062274, "grad_norm": 401.1116638183594, "learning_rate": 5.447948214950078e-06, "loss": 12.4856, "step": 262150 }, { "epoch": 0.5295797864389112, "grad_norm": 510.2715759277344, "learning_rate": 5.447600551684936e-06, "loss": 21.6855, "step": 262160 }, { "epoch": 0.5295999870715951, "grad_norm": 93.42176818847656, "learning_rate": 5.4472528862382415e-06, "loss": 28.3929, "step": 262170 }, { "epoch": 0.5296201877042789, "grad_norm": 619.77001953125, "learning_rate": 5.446905218611694e-06, "loss": 19.0133, "step": 262180 }, { "epoch": 0.5296403883369627, "grad_norm": 194.4291534423828, "learning_rate": 5.4465575488069795e-06, "loss": 22.0499, "step": 262190 }, { "epoch": 0.5296605889696465, "grad_norm": 246.71034240722656, "learning_rate": 5.446209876825803e-06, "loss": 10.5823, "step": 262200 }, { "epoch": 0.5296807896023303, "grad_norm": 1150.928955078125, "learning_rate": 5.445862202669851e-06, "loss": 25.7024, "step": 262210 }, { "epoch": 0.5297009902350142, "grad_norm": 501.6942443847656, "learning_rate": 5.445514526340822e-06, "loss": 14.3524, "step": 262220 }, { "epoch": 0.529721190867698, "grad_norm": 231.6753387451172, "learning_rate": 5.445166847840409e-06, "loss": 32.6382, "step": 262230 }, { "epoch": 0.5297413915003818, "grad_norm": 387.2635192871094, "learning_rate": 5.444819167170306e-06, "loss": 33.5837, "step": 262240 }, { "epoch": 0.5297615921330656, "grad_norm": 553.984619140625, "learning_rate": 5.4444714843322085e-06, "loss": 20.1767, "step": 262250 }, { "epoch": 0.5297817927657494, "grad_norm": 254.49600219726562, "learning_rate": 5.444123799327811e-06, "loss": 20.5725, "step": 262260 }, { "epoch": 0.5298019933984333, "grad_norm": 629.6550903320312, "learning_rate": 5.443776112158808e-06, "loss": 22.7081, "step": 262270 }, { "epoch": 0.5298221940311171, "grad_norm": 13.064043998718262, "learning_rate": 5.443428422826893e-06, "loss": 27.7504, "step": 262280 }, { "epoch": 0.5298423946638009, "grad_norm": 439.01837158203125, "learning_rate": 5.443080731333764e-06, "loss": 24.9362, "step": 262290 }, { "epoch": 0.5298625952964847, "grad_norm": 493.9464111328125, "learning_rate": 5.442733037681112e-06, "loss": 14.7247, "step": 262300 }, { "epoch": 0.5298827959291685, "grad_norm": 275.8974914550781, "learning_rate": 5.442385341870633e-06, "loss": 23.168, "step": 262310 }, { "epoch": 0.5299029965618524, "grad_norm": 209.71031188964844, "learning_rate": 5.442037643904022e-06, "loss": 8.5229, "step": 262320 }, { "epoch": 0.5299231971945362, "grad_norm": 402.67315673828125, "learning_rate": 5.4416899437829705e-06, "loss": 19.1335, "step": 262330 }, { "epoch": 0.52994339782722, "grad_norm": 658.8362426757812, "learning_rate": 5.441342241509179e-06, "loss": 34.262, "step": 262340 }, { "epoch": 0.5299635984599038, "grad_norm": 238.89524841308594, "learning_rate": 5.440994537084337e-06, "loss": 17.8649, "step": 262350 }, { "epoch": 0.5299837990925875, "grad_norm": 655.3569946289062, "learning_rate": 5.440646830510142e-06, "loss": 19.8445, "step": 262360 }, { "epoch": 0.5300039997252713, "grad_norm": 324.5028991699219, "learning_rate": 5.440299121788289e-06, "loss": 12.9395, "step": 262370 }, { "epoch": 0.5300242003579552, "grad_norm": 69.92562866210938, "learning_rate": 5.439951410920469e-06, "loss": 18.4332, "step": 262380 }, { "epoch": 0.530044400990639, "grad_norm": 413.9827575683594, "learning_rate": 5.439603697908381e-06, "loss": 38.8664, "step": 262390 }, { "epoch": 0.5300646016233228, "grad_norm": 273.46185302734375, "learning_rate": 5.439255982753717e-06, "loss": 11.9836, "step": 262400 }, { "epoch": 0.5300848022560066, "grad_norm": 353.564208984375, "learning_rate": 5.438908265458172e-06, "loss": 25.7608, "step": 262410 }, { "epoch": 0.5301050028886904, "grad_norm": 431.2625427246094, "learning_rate": 5.438560546023442e-06, "loss": 17.5037, "step": 262420 }, { "epoch": 0.5301252035213743, "grad_norm": 985.5978393554688, "learning_rate": 5.438212824451221e-06, "loss": 26.1163, "step": 262430 }, { "epoch": 0.5301454041540581, "grad_norm": 507.8580017089844, "learning_rate": 5.437865100743205e-06, "loss": 31.0964, "step": 262440 }, { "epoch": 0.5301656047867419, "grad_norm": 264.5030517578125, "learning_rate": 5.437517374901087e-06, "loss": 28.6044, "step": 262450 }, { "epoch": 0.5301858054194257, "grad_norm": 303.4159851074219, "learning_rate": 5.437169646926561e-06, "loss": 21.8809, "step": 262460 }, { "epoch": 0.5302060060521095, "grad_norm": 243.92320251464844, "learning_rate": 5.436821916821325e-06, "loss": 18.0218, "step": 262470 }, { "epoch": 0.5302262066847934, "grad_norm": 497.0818176269531, "learning_rate": 5.436474184587071e-06, "loss": 24.2977, "step": 262480 }, { "epoch": 0.5302464073174772, "grad_norm": 230.01681518554688, "learning_rate": 5.436126450225495e-06, "loss": 13.5655, "step": 262490 }, { "epoch": 0.530266607950161, "grad_norm": 855.636962890625, "learning_rate": 5.435778713738292e-06, "loss": 18.769, "step": 262500 }, { "epoch": 0.5302868085828448, "grad_norm": 65.8647689819336, "learning_rate": 5.435430975127155e-06, "loss": 11.7633, "step": 262510 }, { "epoch": 0.5303070092155286, "grad_norm": 525.977294921875, "learning_rate": 5.435083234393782e-06, "loss": 10.8237, "step": 262520 }, { "epoch": 0.5303272098482125, "grad_norm": 0.0, "learning_rate": 5.434735491539866e-06, "loss": 19.7727, "step": 262530 }, { "epoch": 0.5303474104808963, "grad_norm": 242.5271453857422, "learning_rate": 5.4343877465671e-06, "loss": 19.3758, "step": 262540 }, { "epoch": 0.5303676111135801, "grad_norm": 199.46829223632812, "learning_rate": 5.434039999477182e-06, "loss": 22.1765, "step": 262550 }, { "epoch": 0.5303878117462639, "grad_norm": 486.7574462890625, "learning_rate": 5.433692250271806e-06, "loss": 25.3172, "step": 262560 }, { "epoch": 0.5304080123789477, "grad_norm": 491.2746887207031, "learning_rate": 5.433344498952666e-06, "loss": 17.1765, "step": 262570 }, { "epoch": 0.5304282130116316, "grad_norm": 347.827392578125, "learning_rate": 5.432996745521458e-06, "loss": 13.0126, "step": 262580 }, { "epoch": 0.5304484136443154, "grad_norm": 342.2633972167969, "learning_rate": 5.4326489899798765e-06, "loss": 26.3243, "step": 262590 }, { "epoch": 0.5304686142769992, "grad_norm": 442.13446044921875, "learning_rate": 5.432301232329615e-06, "loss": 18.282, "step": 262600 }, { "epoch": 0.530488814909683, "grad_norm": 99.82096099853516, "learning_rate": 5.431953472572372e-06, "loss": 16.625, "step": 262610 }, { "epoch": 0.5305090155423667, "grad_norm": 167.40602111816406, "learning_rate": 5.431605710709838e-06, "loss": 23.094, "step": 262620 }, { "epoch": 0.5305292161750506, "grad_norm": 457.5943603515625, "learning_rate": 5.431257946743711e-06, "loss": 15.3913, "step": 262630 }, { "epoch": 0.5305494168077344, "grad_norm": 643.6044311523438, "learning_rate": 5.430910180675685e-06, "loss": 16.8985, "step": 262640 }, { "epoch": 0.5305696174404182, "grad_norm": 727.259521484375, "learning_rate": 5.430562412507454e-06, "loss": 21.613, "step": 262650 }, { "epoch": 0.530589818073102, "grad_norm": 164.78611755371094, "learning_rate": 5.430214642240716e-06, "loss": 17.1354, "step": 262660 }, { "epoch": 0.5306100187057858, "grad_norm": 858.7236328125, "learning_rate": 5.429866869877163e-06, "loss": 18.4539, "step": 262670 }, { "epoch": 0.5306302193384697, "grad_norm": 365.9755859375, "learning_rate": 5.429519095418492e-06, "loss": 12.9498, "step": 262680 }, { "epoch": 0.5306504199711535, "grad_norm": 76.51131439208984, "learning_rate": 5.429171318866395e-06, "loss": 31.2644, "step": 262690 }, { "epoch": 0.5306706206038373, "grad_norm": 505.0215148925781, "learning_rate": 5.42882354022257e-06, "loss": 16.3202, "step": 262700 }, { "epoch": 0.5306908212365211, "grad_norm": 508.2402038574219, "learning_rate": 5.428475759488711e-06, "loss": 29.6478, "step": 262710 }, { "epoch": 0.5307110218692049, "grad_norm": 233.74713134765625, "learning_rate": 5.428127976666513e-06, "loss": 12.7844, "step": 262720 }, { "epoch": 0.5307312225018888, "grad_norm": 324.79144287109375, "learning_rate": 5.4277801917576724e-06, "loss": 22.4773, "step": 262730 }, { "epoch": 0.5307514231345726, "grad_norm": 248.5419921875, "learning_rate": 5.427432404763882e-06, "loss": 17.6601, "step": 262740 }, { "epoch": 0.5307716237672564, "grad_norm": 476.61553955078125, "learning_rate": 5.4270846156868386e-06, "loss": 25.1193, "step": 262750 }, { "epoch": 0.5307918243999402, "grad_norm": 211.3850860595703, "learning_rate": 5.426736824528236e-06, "loss": 15.8085, "step": 262760 }, { "epoch": 0.530812025032624, "grad_norm": 348.0032653808594, "learning_rate": 5.426389031289771e-06, "loss": 21.6775, "step": 262770 }, { "epoch": 0.5308322256653079, "grad_norm": 217.60696411132812, "learning_rate": 5.426041235973134e-06, "loss": 10.3254, "step": 262780 }, { "epoch": 0.5308524262979917, "grad_norm": 178.12786865234375, "learning_rate": 5.4256934385800275e-06, "loss": 20.2644, "step": 262790 }, { "epoch": 0.5308726269306755, "grad_norm": 539.5770263671875, "learning_rate": 5.425345639112141e-06, "loss": 15.3722, "step": 262800 }, { "epoch": 0.5308928275633593, "grad_norm": 329.5556335449219, "learning_rate": 5.424997837571172e-06, "loss": 19.8378, "step": 262810 }, { "epoch": 0.5309130281960431, "grad_norm": 478.9924621582031, "learning_rate": 5.4246500339588144e-06, "loss": 16.7398, "step": 262820 }, { "epoch": 0.530933228828727, "grad_norm": 320.1447448730469, "learning_rate": 5.4243022282767645e-06, "loss": 15.0234, "step": 262830 }, { "epoch": 0.5309534294614108, "grad_norm": 396.3629150390625, "learning_rate": 5.4239544205267185e-06, "loss": 26.5332, "step": 262840 }, { "epoch": 0.5309736300940946, "grad_norm": 552.6524047851562, "learning_rate": 5.423606610710368e-06, "loss": 24.3753, "step": 262850 }, { "epoch": 0.5309938307267784, "grad_norm": 164.86257934570312, "learning_rate": 5.4232587988294105e-06, "loss": 21.6404, "step": 262860 }, { "epoch": 0.5310140313594621, "grad_norm": 496.59881591796875, "learning_rate": 5.422910984885542e-06, "loss": 26.8352, "step": 262870 }, { "epoch": 0.5310342319921459, "grad_norm": 390.3055114746094, "learning_rate": 5.422563168880456e-06, "loss": 21.1653, "step": 262880 }, { "epoch": 0.5310544326248298, "grad_norm": 309.1153564453125, "learning_rate": 5.422215350815848e-06, "loss": 16.7842, "step": 262890 }, { "epoch": 0.5310746332575136, "grad_norm": 361.0472106933594, "learning_rate": 5.4218675306934145e-06, "loss": 16.1422, "step": 262900 }, { "epoch": 0.5310948338901974, "grad_norm": 293.56005859375, "learning_rate": 5.42151970851485e-06, "loss": 25.3251, "step": 262910 }, { "epoch": 0.5311150345228812, "grad_norm": 444.24945068359375, "learning_rate": 5.4211718842818485e-06, "loss": 10.4506, "step": 262920 }, { "epoch": 0.531135235155565, "grad_norm": 623.7088623046875, "learning_rate": 5.420824057996107e-06, "loss": 13.4804, "step": 262930 }, { "epoch": 0.5311554357882489, "grad_norm": 291.3700866699219, "learning_rate": 5.420476229659319e-06, "loss": 19.6258, "step": 262940 }, { "epoch": 0.5311756364209327, "grad_norm": 892.9168090820312, "learning_rate": 5.420128399273183e-06, "loss": 25.5505, "step": 262950 }, { "epoch": 0.5311958370536165, "grad_norm": 189.8000946044922, "learning_rate": 5.419780566839389e-06, "loss": 16.807, "step": 262960 }, { "epoch": 0.5312160376863003, "grad_norm": 361.9689025878906, "learning_rate": 5.419432732359637e-06, "loss": 11.4353, "step": 262970 }, { "epoch": 0.5312362383189841, "grad_norm": 395.75323486328125, "learning_rate": 5.419084895835621e-06, "loss": 14.5804, "step": 262980 }, { "epoch": 0.531256438951668, "grad_norm": 632.162841796875, "learning_rate": 5.418737057269037e-06, "loss": 27.1474, "step": 262990 }, { "epoch": 0.5312766395843518, "grad_norm": 764.3357543945312, "learning_rate": 5.41838921666158e-06, "loss": 24.8534, "step": 263000 }, { "epoch": 0.5312968402170356, "grad_norm": 414.3321838378906, "learning_rate": 5.418041374014942e-06, "loss": 23.2771, "step": 263010 }, { "epoch": 0.5313170408497194, "grad_norm": 606.6253051757812, "learning_rate": 5.417693529330822e-06, "loss": 19.7729, "step": 263020 }, { "epoch": 0.5313372414824032, "grad_norm": 281.4236755371094, "learning_rate": 5.417345682610914e-06, "loss": 19.4735, "step": 263030 }, { "epoch": 0.5313574421150871, "grad_norm": 360.08984375, "learning_rate": 5.416997833856914e-06, "loss": 30.3656, "step": 263040 }, { "epoch": 0.5313776427477709, "grad_norm": 422.86907958984375, "learning_rate": 5.416649983070518e-06, "loss": 13.8087, "step": 263050 }, { "epoch": 0.5313978433804547, "grad_norm": 122.8550033569336, "learning_rate": 5.4163021302534204e-06, "loss": 29.5725, "step": 263060 }, { "epoch": 0.5314180440131385, "grad_norm": 382.35406494140625, "learning_rate": 5.415954275407316e-06, "loss": 15.5037, "step": 263070 }, { "epoch": 0.5314382446458223, "grad_norm": 160.0564727783203, "learning_rate": 5.415606418533901e-06, "loss": 21.0315, "step": 263080 }, { "epoch": 0.5314584452785062, "grad_norm": 151.24624633789062, "learning_rate": 5.4152585596348704e-06, "loss": 25.3234, "step": 263090 }, { "epoch": 0.53147864591119, "grad_norm": 480.6485595703125, "learning_rate": 5.41491069871192e-06, "loss": 10.9988, "step": 263100 }, { "epoch": 0.5314988465438738, "grad_norm": 193.90916442871094, "learning_rate": 5.414562835766747e-06, "loss": 30.0569, "step": 263110 }, { "epoch": 0.5315190471765576, "grad_norm": 253.57791137695312, "learning_rate": 5.414214970801041e-06, "loss": 24.2024, "step": 263120 }, { "epoch": 0.5315392478092413, "grad_norm": 846.8713989257812, "learning_rate": 5.413867103816506e-06, "loss": 21.1907, "step": 263130 }, { "epoch": 0.5315594484419252, "grad_norm": 164.007568359375, "learning_rate": 5.413519234814831e-06, "loss": 26.9846, "step": 263140 }, { "epoch": 0.531579649074609, "grad_norm": 461.7021179199219, "learning_rate": 5.413171363797713e-06, "loss": 12.8953, "step": 263150 }, { "epoch": 0.5315998497072928, "grad_norm": 934.0023193359375, "learning_rate": 5.412823490766849e-06, "loss": 25.0997, "step": 263160 }, { "epoch": 0.5316200503399766, "grad_norm": 208.72476196289062, "learning_rate": 5.412475615723931e-06, "loss": 17.7574, "step": 263170 }, { "epoch": 0.5316402509726604, "grad_norm": 161.92298889160156, "learning_rate": 5.41212773867066e-06, "loss": 30.3754, "step": 263180 }, { "epoch": 0.5316604516053443, "grad_norm": 382.603759765625, "learning_rate": 5.4117798596087265e-06, "loss": 29.2349, "step": 263190 }, { "epoch": 0.5316806522380281, "grad_norm": 222.22035217285156, "learning_rate": 5.411431978539829e-06, "loss": 23.2413, "step": 263200 }, { "epoch": 0.5317008528707119, "grad_norm": 378.6025695800781, "learning_rate": 5.411084095465661e-06, "loss": 25.6467, "step": 263210 }, { "epoch": 0.5317210535033957, "grad_norm": 756.3626098632812, "learning_rate": 5.41073621038792e-06, "loss": 21.5958, "step": 263220 }, { "epoch": 0.5317412541360795, "grad_norm": 386.68011474609375, "learning_rate": 5.410388323308299e-06, "loss": 22.2969, "step": 263230 }, { "epoch": 0.5317614547687634, "grad_norm": 219.22743225097656, "learning_rate": 5.410040434228496e-06, "loss": 10.8826, "step": 263240 }, { "epoch": 0.5317816554014472, "grad_norm": 13.215092658996582, "learning_rate": 5.409692543150206e-06, "loss": 16.3198, "step": 263250 }, { "epoch": 0.531801856034131, "grad_norm": 567.9088134765625, "learning_rate": 5.409344650075123e-06, "loss": 25.1716, "step": 263260 }, { "epoch": 0.5318220566668148, "grad_norm": 360.5703125, "learning_rate": 5.4089967550049445e-06, "loss": 12.8228, "step": 263270 }, { "epoch": 0.5318422572994986, "grad_norm": 446.908935546875, "learning_rate": 5.408648857941365e-06, "loss": 17.5523, "step": 263280 }, { "epoch": 0.5318624579321825, "grad_norm": 444.71173095703125, "learning_rate": 5.408300958886083e-06, "loss": 21.1386, "step": 263290 }, { "epoch": 0.5318826585648663, "grad_norm": 288.6525573730469, "learning_rate": 5.4079530578407895e-06, "loss": 13.9982, "step": 263300 }, { "epoch": 0.5319028591975501, "grad_norm": 433.6891174316406, "learning_rate": 5.407605154807182e-06, "loss": 11.7726, "step": 263310 }, { "epoch": 0.5319230598302339, "grad_norm": 534.1845092773438, "learning_rate": 5.4072572497869556e-06, "loss": 15.9662, "step": 263320 }, { "epoch": 0.5319432604629177, "grad_norm": 350.7384948730469, "learning_rate": 5.406909342781809e-06, "loss": 25.5752, "step": 263330 }, { "epoch": 0.5319634610956016, "grad_norm": 232.4453125, "learning_rate": 5.406561433793435e-06, "loss": 28.6806, "step": 263340 }, { "epoch": 0.5319836617282854, "grad_norm": 550.2820434570312, "learning_rate": 5.406213522823529e-06, "loss": 15.2013, "step": 263350 }, { "epoch": 0.5320038623609692, "grad_norm": 417.4783630371094, "learning_rate": 5.4058656098737885e-06, "loss": 34.7693, "step": 263360 }, { "epoch": 0.532024062993653, "grad_norm": 53.68265914916992, "learning_rate": 5.405517694945907e-06, "loss": 26.0981, "step": 263370 }, { "epoch": 0.5320442636263367, "grad_norm": 616.639404296875, "learning_rate": 5.405169778041583e-06, "loss": 11.6597, "step": 263380 }, { "epoch": 0.5320644642590205, "grad_norm": 471.22039794921875, "learning_rate": 5.404821859162509e-06, "loss": 19.9855, "step": 263390 }, { "epoch": 0.5320846648917044, "grad_norm": 598.6522827148438, "learning_rate": 5.404473938310384e-06, "loss": 18.3524, "step": 263400 }, { "epoch": 0.5321048655243882, "grad_norm": 20.47283363342285, "learning_rate": 5.404126015486901e-06, "loss": 20.8549, "step": 263410 }, { "epoch": 0.532125066157072, "grad_norm": 283.7949523925781, "learning_rate": 5.403778090693758e-06, "loss": 21.7683, "step": 263420 }, { "epoch": 0.5321452667897558, "grad_norm": 192.6545867919922, "learning_rate": 5.403430163932648e-06, "loss": 32.1624, "step": 263430 }, { "epoch": 0.5321654674224396, "grad_norm": 189.44528198242188, "learning_rate": 5.403082235205269e-06, "loss": 14.1211, "step": 263440 }, { "epoch": 0.5321856680551235, "grad_norm": 684.8155517578125, "learning_rate": 5.402734304513316e-06, "loss": 22.6836, "step": 263450 }, { "epoch": 0.5322058686878073, "grad_norm": 173.18606567382812, "learning_rate": 5.402386371858486e-06, "loss": 22.5028, "step": 263460 }, { "epoch": 0.5322260693204911, "grad_norm": 170.42735290527344, "learning_rate": 5.402038437242471e-06, "loss": 21.4767, "step": 263470 }, { "epoch": 0.5322462699531749, "grad_norm": 345.851318359375, "learning_rate": 5.401690500666972e-06, "loss": 8.1845, "step": 263480 }, { "epoch": 0.5322664705858587, "grad_norm": 36.30461502075195, "learning_rate": 5.401342562133682e-06, "loss": 17.9214, "step": 263490 }, { "epoch": 0.5322866712185426, "grad_norm": 216.7414093017578, "learning_rate": 5.400994621644294e-06, "loss": 9.2869, "step": 263500 }, { "epoch": 0.5323068718512264, "grad_norm": 907.1314086914062, "learning_rate": 5.4006466792005105e-06, "loss": 21.1397, "step": 263510 }, { "epoch": 0.5323270724839102, "grad_norm": 406.8268737792969, "learning_rate": 5.400298734804023e-06, "loss": 20.9543, "step": 263520 }, { "epoch": 0.532347273116594, "grad_norm": 215.2248077392578, "learning_rate": 5.399950788456526e-06, "loss": 28.9349, "step": 263530 }, { "epoch": 0.5323674737492778, "grad_norm": 479.4017639160156, "learning_rate": 5.39960284015972e-06, "loss": 21.6751, "step": 263540 }, { "epoch": 0.5323876743819617, "grad_norm": 192.6239776611328, "learning_rate": 5.399254889915296e-06, "loss": 24.9698, "step": 263550 }, { "epoch": 0.5324078750146455, "grad_norm": 377.7315368652344, "learning_rate": 5.398906937724954e-06, "loss": 15.3041, "step": 263560 }, { "epoch": 0.5324280756473293, "grad_norm": 252.1598358154297, "learning_rate": 5.398558983590385e-06, "loss": 21.2734, "step": 263570 }, { "epoch": 0.5324482762800131, "grad_norm": 87.29096984863281, "learning_rate": 5.398211027513291e-06, "loss": 27.8237, "step": 263580 }, { "epoch": 0.5324684769126969, "grad_norm": 158.6113739013672, "learning_rate": 5.397863069495364e-06, "loss": 19.541, "step": 263590 }, { "epoch": 0.5324886775453808, "grad_norm": 502.4717712402344, "learning_rate": 5.3975151095383e-06, "loss": 35.3605, "step": 263600 }, { "epoch": 0.5325088781780646, "grad_norm": 255.55706787109375, "learning_rate": 5.397167147643796e-06, "loss": 14.7615, "step": 263610 }, { "epoch": 0.5325290788107484, "grad_norm": 555.1712036132812, "learning_rate": 5.396819183813547e-06, "loss": 21.8285, "step": 263620 }, { "epoch": 0.5325492794434322, "grad_norm": 501.9200744628906, "learning_rate": 5.396471218049249e-06, "loss": 9.4004, "step": 263630 }, { "epoch": 0.5325694800761159, "grad_norm": 322.0678405761719, "learning_rate": 5.3961232503526e-06, "loss": 16.2422, "step": 263640 }, { "epoch": 0.5325896807087998, "grad_norm": 29.05496597290039, "learning_rate": 5.3957752807252925e-06, "loss": 12.9038, "step": 263650 }, { "epoch": 0.5326098813414836, "grad_norm": 376.70440673828125, "learning_rate": 5.3954273091690245e-06, "loss": 18.4567, "step": 263660 }, { "epoch": 0.5326300819741674, "grad_norm": 471.65032958984375, "learning_rate": 5.395079335685494e-06, "loss": 18.5832, "step": 263670 }, { "epoch": 0.5326502826068512, "grad_norm": 666.2213745117188, "learning_rate": 5.394731360276393e-06, "loss": 23.8404, "step": 263680 }, { "epoch": 0.532670483239535, "grad_norm": 320.22027587890625, "learning_rate": 5.394383382943419e-06, "loss": 12.4747, "step": 263690 }, { "epoch": 0.5326906838722189, "grad_norm": 524.0413208007812, "learning_rate": 5.394035403688268e-06, "loss": 19.0076, "step": 263700 }, { "epoch": 0.5327108845049027, "grad_norm": 131.5553436279297, "learning_rate": 5.393687422512637e-06, "loss": 16.9744, "step": 263710 }, { "epoch": 0.5327310851375865, "grad_norm": 190.25286865234375, "learning_rate": 5.393339439418222e-06, "loss": 25.3414, "step": 263720 }, { "epoch": 0.5327512857702703, "grad_norm": 732.912353515625, "learning_rate": 5.392991454406716e-06, "loss": 16.244, "step": 263730 }, { "epoch": 0.5327714864029541, "grad_norm": 690.7264404296875, "learning_rate": 5.39264346747982e-06, "loss": 20.082, "step": 263740 }, { "epoch": 0.532791687035638, "grad_norm": 474.1722412109375, "learning_rate": 5.392295478639226e-06, "loss": 28.8244, "step": 263750 }, { "epoch": 0.5328118876683218, "grad_norm": 269.3618469238281, "learning_rate": 5.391947487886631e-06, "loss": 15.4086, "step": 263760 }, { "epoch": 0.5328320883010056, "grad_norm": 628.8046875, "learning_rate": 5.391599495223732e-06, "loss": 19.8448, "step": 263770 }, { "epoch": 0.5328522889336894, "grad_norm": 449.0356750488281, "learning_rate": 5.391251500652224e-06, "loss": 16.8209, "step": 263780 }, { "epoch": 0.5328724895663732, "grad_norm": 577.2578735351562, "learning_rate": 5.390903504173805e-06, "loss": 10.4767, "step": 263790 }, { "epoch": 0.532892690199057, "grad_norm": 622.21875, "learning_rate": 5.390555505790168e-06, "loss": 20.1476, "step": 263800 }, { "epoch": 0.5329128908317409, "grad_norm": 592.1292114257812, "learning_rate": 5.390207505503012e-06, "loss": 14.5329, "step": 263810 }, { "epoch": 0.5329330914644247, "grad_norm": 421.0754699707031, "learning_rate": 5.389859503314031e-06, "loss": 10.4641, "step": 263820 }, { "epoch": 0.5329532920971085, "grad_norm": 457.61810302734375, "learning_rate": 5.389511499224925e-06, "loss": 26.411, "step": 263830 }, { "epoch": 0.5329734927297923, "grad_norm": 511.234619140625, "learning_rate": 5.389163493237382e-06, "loss": 14.4797, "step": 263840 }, { "epoch": 0.5329936933624762, "grad_norm": 1096.11767578125, "learning_rate": 5.388815485353109e-06, "loss": 38.672, "step": 263850 }, { "epoch": 0.53301389399516, "grad_norm": 571.6307983398438, "learning_rate": 5.388467475573792e-06, "loss": 25.8154, "step": 263860 }, { "epoch": 0.5330340946278438, "grad_norm": 466.66357421875, "learning_rate": 5.388119463901134e-06, "loss": 17.001, "step": 263870 }, { "epoch": 0.5330542952605276, "grad_norm": 517.427978515625, "learning_rate": 5.3877714503368285e-06, "loss": 11.6967, "step": 263880 }, { "epoch": 0.5330744958932114, "grad_norm": 994.4615478515625, "learning_rate": 5.387423434882571e-06, "loss": 30.1973, "step": 263890 }, { "epoch": 0.5330946965258951, "grad_norm": 829.72314453125, "learning_rate": 5.3870754175400595e-06, "loss": 28.2975, "step": 263900 }, { "epoch": 0.533114897158579, "grad_norm": 346.5272521972656, "learning_rate": 5.386727398310989e-06, "loss": 26.1818, "step": 263910 }, { "epoch": 0.5331350977912628, "grad_norm": 585.033203125, "learning_rate": 5.386379377197056e-06, "loss": 18.1059, "step": 263920 }, { "epoch": 0.5331552984239466, "grad_norm": 358.1483154296875, "learning_rate": 5.386031354199956e-06, "loss": 19.7494, "step": 263930 }, { "epoch": 0.5331754990566304, "grad_norm": 246.29762268066406, "learning_rate": 5.385683329321387e-06, "loss": 23.5754, "step": 263940 }, { "epoch": 0.5331956996893142, "grad_norm": 184.22647094726562, "learning_rate": 5.385335302563046e-06, "loss": 20.0688, "step": 263950 }, { "epoch": 0.5332159003219981, "grad_norm": 810.5045776367188, "learning_rate": 5.384987273926625e-06, "loss": 13.1073, "step": 263960 }, { "epoch": 0.5332361009546819, "grad_norm": 677.8699951171875, "learning_rate": 5.384639243413824e-06, "loss": 30.2363, "step": 263970 }, { "epoch": 0.5332563015873657, "grad_norm": 516.2007446289062, "learning_rate": 5.384291211026337e-06, "loss": 17.3284, "step": 263980 }, { "epoch": 0.5332765022200495, "grad_norm": 1023.1954345703125, "learning_rate": 5.383943176765862e-06, "loss": 18.264, "step": 263990 }, { "epoch": 0.5332967028527333, "grad_norm": 2461.611083984375, "learning_rate": 5.383595140634093e-06, "loss": 28.0451, "step": 264000 }, { "epoch": 0.5333169034854172, "grad_norm": 215.49591064453125, "learning_rate": 5.383247102632731e-06, "loss": 16.7719, "step": 264010 }, { "epoch": 0.533337104118101, "grad_norm": 626.0396118164062, "learning_rate": 5.3828990627634655e-06, "loss": 27.2502, "step": 264020 }, { "epoch": 0.5333573047507848, "grad_norm": 960.50927734375, "learning_rate": 5.382551021027999e-06, "loss": 17.597, "step": 264030 }, { "epoch": 0.5333775053834686, "grad_norm": 663.5912475585938, "learning_rate": 5.382202977428025e-06, "loss": 22.6198, "step": 264040 }, { "epoch": 0.5333977060161524, "grad_norm": 67.04728698730469, "learning_rate": 5.381854931965238e-06, "loss": 18.9294, "step": 264050 }, { "epoch": 0.5334179066488363, "grad_norm": 393.4915771484375, "learning_rate": 5.381506884641339e-06, "loss": 13.7701, "step": 264060 }, { "epoch": 0.5334381072815201, "grad_norm": 151.0877227783203, "learning_rate": 5.381158835458019e-06, "loss": 18.0856, "step": 264070 }, { "epoch": 0.5334583079142039, "grad_norm": 547.76318359375, "learning_rate": 5.380810784416979e-06, "loss": 16.9888, "step": 264080 }, { "epoch": 0.5334785085468877, "grad_norm": 132.0612335205078, "learning_rate": 5.380462731519912e-06, "loss": 10.7681, "step": 264090 }, { "epoch": 0.5334987091795715, "grad_norm": 51.655364990234375, "learning_rate": 5.380114676768516e-06, "loss": 14.4731, "step": 264100 }, { "epoch": 0.5335189098122554, "grad_norm": 601.3800048828125, "learning_rate": 5.379766620164488e-06, "loss": 15.6159, "step": 264110 }, { "epoch": 0.5335391104449392, "grad_norm": 146.5826416015625, "learning_rate": 5.379418561709524e-06, "loss": 22.4184, "step": 264120 }, { "epoch": 0.533559311077623, "grad_norm": 796.7298583984375, "learning_rate": 5.37907050140532e-06, "loss": 17.6309, "step": 264130 }, { "epoch": 0.5335795117103068, "grad_norm": 187.71275329589844, "learning_rate": 5.378722439253571e-06, "loss": 15.502, "step": 264140 }, { "epoch": 0.5335997123429905, "grad_norm": 731.462646484375, "learning_rate": 5.378374375255977e-06, "loss": 25.3865, "step": 264150 }, { "epoch": 0.5336199129756743, "grad_norm": 299.503662109375, "learning_rate": 5.378026309414229e-06, "loss": 22.9609, "step": 264160 }, { "epoch": 0.5336401136083582, "grad_norm": 481.3091125488281, "learning_rate": 5.377678241730029e-06, "loss": 29.2946, "step": 264170 }, { "epoch": 0.533660314241042, "grad_norm": 677.627685546875, "learning_rate": 5.377330172205068e-06, "loss": 22.5868, "step": 264180 }, { "epoch": 0.5336805148737258, "grad_norm": 163.53543090820312, "learning_rate": 5.37698210084105e-06, "loss": 16.8451, "step": 264190 }, { "epoch": 0.5337007155064096, "grad_norm": 795.53125, "learning_rate": 5.376634027639664e-06, "loss": 39.883, "step": 264200 }, { "epoch": 0.5337209161390934, "grad_norm": 380.3072204589844, "learning_rate": 5.37628595260261e-06, "loss": 25.8065, "step": 264210 }, { "epoch": 0.5337411167717773, "grad_norm": 0.0, "learning_rate": 5.375937875731585e-06, "loss": 11.8602, "step": 264220 }, { "epoch": 0.5337613174044611, "grad_norm": 466.2163391113281, "learning_rate": 5.375589797028282e-06, "loss": 6.8212, "step": 264230 }, { "epoch": 0.5337815180371449, "grad_norm": 132.32325744628906, "learning_rate": 5.375241716494403e-06, "loss": 22.9862, "step": 264240 }, { "epoch": 0.5338017186698287, "grad_norm": 620.8463134765625, "learning_rate": 5.3748936341316395e-06, "loss": 14.3129, "step": 264250 }, { "epoch": 0.5338219193025125, "grad_norm": 738.5233154296875, "learning_rate": 5.37454554994169e-06, "loss": 26.2885, "step": 264260 }, { "epoch": 0.5338421199351964, "grad_norm": 753.781494140625, "learning_rate": 5.374197463926251e-06, "loss": 21.0322, "step": 264270 }, { "epoch": 0.5338623205678802, "grad_norm": 271.4347839355469, "learning_rate": 5.37384937608702e-06, "loss": 14.2221, "step": 264280 }, { "epoch": 0.533882521200564, "grad_norm": 267.5508117675781, "learning_rate": 5.373501286425691e-06, "loss": 17.9659, "step": 264290 }, { "epoch": 0.5339027218332478, "grad_norm": 663.1692504882812, "learning_rate": 5.373153194943962e-06, "loss": 24.0293, "step": 264300 }, { "epoch": 0.5339229224659316, "grad_norm": 359.5267333984375, "learning_rate": 5.37280510164353e-06, "loss": 32.0395, "step": 264310 }, { "epoch": 0.5339431230986155, "grad_norm": 239.31358337402344, "learning_rate": 5.37245700652609e-06, "loss": 16.0137, "step": 264320 }, { "epoch": 0.5339633237312993, "grad_norm": 512.493408203125, "learning_rate": 5.372108909593343e-06, "loss": 23.8659, "step": 264330 }, { "epoch": 0.5339835243639831, "grad_norm": 398.17938232421875, "learning_rate": 5.371760810846979e-06, "loss": 11.4735, "step": 264340 }, { "epoch": 0.5340037249966669, "grad_norm": 352.3731384277344, "learning_rate": 5.371412710288701e-06, "loss": 18.8082, "step": 264350 }, { "epoch": 0.5340239256293507, "grad_norm": 93.62528228759766, "learning_rate": 5.3710646079202e-06, "loss": 14.97, "step": 264360 }, { "epoch": 0.5340441262620346, "grad_norm": 362.8443603515625, "learning_rate": 5.370716503743175e-06, "loss": 35.2391, "step": 264370 }, { "epoch": 0.5340643268947184, "grad_norm": 905.8558349609375, "learning_rate": 5.370368397759324e-06, "loss": 44.9503, "step": 264380 }, { "epoch": 0.5340845275274022, "grad_norm": 132.63365173339844, "learning_rate": 5.370020289970341e-06, "loss": 32.7998, "step": 264390 }, { "epoch": 0.534104728160086, "grad_norm": 348.46185302734375, "learning_rate": 5.3696721803779265e-06, "loss": 25.7999, "step": 264400 }, { "epoch": 0.5341249287927697, "grad_norm": 138.4418487548828, "learning_rate": 5.369324068983772e-06, "loss": 14.2852, "step": 264410 }, { "epoch": 0.5341451294254536, "grad_norm": 157.70175170898438, "learning_rate": 5.368975955789577e-06, "loss": 18.0189, "step": 264420 }, { "epoch": 0.5341653300581374, "grad_norm": 368.96221923828125, "learning_rate": 5.368627840797039e-06, "loss": 17.9959, "step": 264430 }, { "epoch": 0.5341855306908212, "grad_norm": 186.53025817871094, "learning_rate": 5.368279724007854e-06, "loss": 18.3524, "step": 264440 }, { "epoch": 0.534205731323505, "grad_norm": 403.6632385253906, "learning_rate": 5.3679316054237165e-06, "loss": 42.2809, "step": 264450 }, { "epoch": 0.5342259319561888, "grad_norm": 3.657899856567383, "learning_rate": 5.367583485046327e-06, "loss": 29.1559, "step": 264460 }, { "epoch": 0.5342461325888727, "grad_norm": 186.83163452148438, "learning_rate": 5.367235362877379e-06, "loss": 9.2051, "step": 264470 }, { "epoch": 0.5342663332215565, "grad_norm": 41.42416000366211, "learning_rate": 5.366887238918571e-06, "loss": 15.1823, "step": 264480 }, { "epoch": 0.5342865338542403, "grad_norm": 75.92926788330078, "learning_rate": 5.3665391131716e-06, "loss": 18.3596, "step": 264490 }, { "epoch": 0.5343067344869241, "grad_norm": 676.78662109375, "learning_rate": 5.366190985638159e-06, "loss": 47.9756, "step": 264500 }, { "epoch": 0.5343269351196079, "grad_norm": 94.7362060546875, "learning_rate": 5.36584285631995e-06, "loss": 10.0836, "step": 264510 }, { "epoch": 0.5343471357522918, "grad_norm": 174.0113525390625, "learning_rate": 5.365494725218667e-06, "loss": 16.7035, "step": 264520 }, { "epoch": 0.5343673363849756, "grad_norm": 222.8553924560547, "learning_rate": 5.3651465923360045e-06, "loss": 34.4077, "step": 264530 }, { "epoch": 0.5343875370176594, "grad_norm": 285.4593200683594, "learning_rate": 5.3647984576736645e-06, "loss": 17.6323, "step": 264540 }, { "epoch": 0.5344077376503432, "grad_norm": 414.40814208984375, "learning_rate": 5.3644503212333395e-06, "loss": 14.8091, "step": 264550 }, { "epoch": 0.534427938283027, "grad_norm": 125.51368713378906, "learning_rate": 5.36410218301673e-06, "loss": 23.6634, "step": 264560 }, { "epoch": 0.5344481389157109, "grad_norm": 270.2915344238281, "learning_rate": 5.363754043025528e-06, "loss": 18.8809, "step": 264570 }, { "epoch": 0.5344683395483947, "grad_norm": 235.06610107421875, "learning_rate": 5.3634059012614345e-06, "loss": 7.6422, "step": 264580 }, { "epoch": 0.5344885401810785, "grad_norm": 2.9614596366882324, "learning_rate": 5.363057757726145e-06, "loss": 12.5189, "step": 264590 }, { "epoch": 0.5345087408137623, "grad_norm": 199.17440795898438, "learning_rate": 5.362709612421355e-06, "loss": 22.2044, "step": 264600 }, { "epoch": 0.5345289414464461, "grad_norm": 224.73255920410156, "learning_rate": 5.362361465348762e-06, "loss": 22.9273, "step": 264610 }, { "epoch": 0.53454914207913, "grad_norm": 516.4451293945312, "learning_rate": 5.3620133165100656e-06, "loss": 25.1774, "step": 264620 }, { "epoch": 0.5345693427118138, "grad_norm": 359.0810241699219, "learning_rate": 5.3616651659069576e-06, "loss": 27.5257, "step": 264630 }, { "epoch": 0.5345895433444976, "grad_norm": 6.806414604187012, "learning_rate": 5.3613170135411384e-06, "loss": 13.3821, "step": 264640 }, { "epoch": 0.5346097439771814, "grad_norm": 545.328125, "learning_rate": 5.360968859414305e-06, "loss": 30.1279, "step": 264650 }, { "epoch": 0.5346299446098651, "grad_norm": 198.06240844726562, "learning_rate": 5.36062070352815e-06, "loss": 16.2758, "step": 264660 }, { "epoch": 0.534650145242549, "grad_norm": 197.97659301757812, "learning_rate": 5.360272545884376e-06, "loss": 17.6192, "step": 264670 }, { "epoch": 0.5346703458752328, "grad_norm": 284.88006591796875, "learning_rate": 5.359924386484676e-06, "loss": 32.0421, "step": 264680 }, { "epoch": 0.5346905465079166, "grad_norm": 350.51611328125, "learning_rate": 5.35957622533075e-06, "loss": 26.1766, "step": 264690 }, { "epoch": 0.5347107471406004, "grad_norm": 242.34136962890625, "learning_rate": 5.359228062424292e-06, "loss": 13.5947, "step": 264700 }, { "epoch": 0.5347309477732842, "grad_norm": 189.86558532714844, "learning_rate": 5.358879897767e-06, "loss": 14.2171, "step": 264710 }, { "epoch": 0.534751148405968, "grad_norm": 526.793701171875, "learning_rate": 5.358531731360571e-06, "loss": 10.9611, "step": 264720 }, { "epoch": 0.5347713490386519, "grad_norm": 241.5596466064453, "learning_rate": 5.358183563206703e-06, "loss": 17.8949, "step": 264730 }, { "epoch": 0.5347915496713357, "grad_norm": 396.1840515136719, "learning_rate": 5.357835393307089e-06, "loss": 14.8254, "step": 264740 }, { "epoch": 0.5348117503040195, "grad_norm": 981.4369506835938, "learning_rate": 5.35748722166343e-06, "loss": 38.9434, "step": 264750 }, { "epoch": 0.5348319509367033, "grad_norm": 362.9976806640625, "learning_rate": 5.357139048277422e-06, "loss": 13.3401, "step": 264760 }, { "epoch": 0.5348521515693871, "grad_norm": 249.92623901367188, "learning_rate": 5.356790873150761e-06, "loss": 14.2971, "step": 264770 }, { "epoch": 0.534872352202071, "grad_norm": 123.45661163330078, "learning_rate": 5.356442696285146e-06, "loss": 8.8225, "step": 264780 }, { "epoch": 0.5348925528347548, "grad_norm": 342.8233337402344, "learning_rate": 5.3560945176822695e-06, "loss": 25.6532, "step": 264790 }, { "epoch": 0.5349127534674386, "grad_norm": 267.54119873046875, "learning_rate": 5.355746337343835e-06, "loss": 12.2637, "step": 264800 }, { "epoch": 0.5349329541001224, "grad_norm": 144.90191650390625, "learning_rate": 5.355398155271535e-06, "loss": 11.5702, "step": 264810 }, { "epoch": 0.5349531547328062, "grad_norm": 128.60110473632812, "learning_rate": 5.355049971467066e-06, "loss": 10.0458, "step": 264820 }, { "epoch": 0.5349733553654901, "grad_norm": 270.35791015625, "learning_rate": 5.354701785932129e-06, "loss": 17.4736, "step": 264830 }, { "epoch": 0.5349935559981739, "grad_norm": 367.4037780761719, "learning_rate": 5.354353598668416e-06, "loss": 24.0634, "step": 264840 }, { "epoch": 0.5350137566308577, "grad_norm": 223.6572723388672, "learning_rate": 5.354005409677628e-06, "loss": 33.9743, "step": 264850 }, { "epoch": 0.5350339572635415, "grad_norm": 302.42987060546875, "learning_rate": 5.35365721896146e-06, "loss": 14.558, "step": 264860 }, { "epoch": 0.5350541578962253, "grad_norm": 418.0154113769531, "learning_rate": 5.353309026521609e-06, "loss": 22.6505, "step": 264870 }, { "epoch": 0.5350743585289092, "grad_norm": 835.4775390625, "learning_rate": 5.3529608323597735e-06, "loss": 41.2067, "step": 264880 }, { "epoch": 0.535094559161593, "grad_norm": 608.3998413085938, "learning_rate": 5.352612636477651e-06, "loss": 27.9021, "step": 264890 }, { "epoch": 0.5351147597942768, "grad_norm": 211.52325439453125, "learning_rate": 5.352264438876935e-06, "loss": 29.2275, "step": 264900 }, { "epoch": 0.5351349604269606, "grad_norm": 251.80059814453125, "learning_rate": 5.351916239559326e-06, "loss": 5.9152, "step": 264910 }, { "epoch": 0.5351551610596443, "grad_norm": 632.3524169921875, "learning_rate": 5.35156803852652e-06, "loss": 26.1784, "step": 264920 }, { "epoch": 0.5351753616923282, "grad_norm": 812.4949340820312, "learning_rate": 5.351219835780213e-06, "loss": 17.2401, "step": 264930 }, { "epoch": 0.535195562325012, "grad_norm": 150.9505615234375, "learning_rate": 5.3508716313221054e-06, "loss": 12.239, "step": 264940 }, { "epoch": 0.5352157629576958, "grad_norm": 980.598388671875, "learning_rate": 5.3505234251538885e-06, "loss": 28.4981, "step": 264950 }, { "epoch": 0.5352359635903796, "grad_norm": 192.49562072753906, "learning_rate": 5.3501752172772655e-06, "loss": 10.0218, "step": 264960 }, { "epoch": 0.5352561642230634, "grad_norm": 170.17161560058594, "learning_rate": 5.34982700769393e-06, "loss": 8.1467, "step": 264970 }, { "epoch": 0.5352763648557473, "grad_norm": 513.2670288085938, "learning_rate": 5.3494787964055805e-06, "loss": 27.5369, "step": 264980 }, { "epoch": 0.5352965654884311, "grad_norm": 569.3101806640625, "learning_rate": 5.349130583413915e-06, "loss": 11.8051, "step": 264990 }, { "epoch": 0.5353167661211149, "grad_norm": 567.8678588867188, "learning_rate": 5.348782368720627e-06, "loss": 14.2739, "step": 265000 }, { "epoch": 0.5353369667537987, "grad_norm": 437.3683166503906, "learning_rate": 5.348434152327418e-06, "loss": 15.0816, "step": 265010 }, { "epoch": 0.5353571673864825, "grad_norm": 764.6904296875, "learning_rate": 5.348085934235981e-06, "loss": 40.8282, "step": 265020 }, { "epoch": 0.5353773680191664, "grad_norm": 804.3911743164062, "learning_rate": 5.347737714448017e-06, "loss": 24.8363, "step": 265030 }, { "epoch": 0.5353975686518502, "grad_norm": 68.34645080566406, "learning_rate": 5.347389492965221e-06, "loss": 9.1033, "step": 265040 }, { "epoch": 0.535417769284534, "grad_norm": 336.69256591796875, "learning_rate": 5.347041269789293e-06, "loss": 22.3642, "step": 265050 }, { "epoch": 0.5354379699172178, "grad_norm": 390.03448486328125, "learning_rate": 5.346693044921925e-06, "loss": 16.6374, "step": 265060 }, { "epoch": 0.5354581705499016, "grad_norm": 650.5511474609375, "learning_rate": 5.3463448183648185e-06, "loss": 13.1788, "step": 265070 }, { "epoch": 0.5354783711825855, "grad_norm": 311.6661071777344, "learning_rate": 5.345996590119668e-06, "loss": 22.0306, "step": 265080 }, { "epoch": 0.5354985718152693, "grad_norm": 418.624267578125, "learning_rate": 5.345648360188173e-06, "loss": 20.1837, "step": 265090 }, { "epoch": 0.5355187724479531, "grad_norm": 565.4595336914062, "learning_rate": 5.345300128572031e-06, "loss": 19.1615, "step": 265100 }, { "epoch": 0.5355389730806369, "grad_norm": 297.052978515625, "learning_rate": 5.344951895272935e-06, "loss": 21.3977, "step": 265110 }, { "epoch": 0.5355591737133207, "grad_norm": 360.16717529296875, "learning_rate": 5.344603660292588e-06, "loss": 11.1924, "step": 265120 }, { "epoch": 0.5355793743460046, "grad_norm": 843.7276611328125, "learning_rate": 5.344255423632684e-06, "loss": 28.0311, "step": 265130 }, { "epoch": 0.5355995749786884, "grad_norm": 568.5252685546875, "learning_rate": 5.34390718529492e-06, "loss": 26.0348, "step": 265140 }, { "epoch": 0.5356197756113722, "grad_norm": 220.79872131347656, "learning_rate": 5.343558945280994e-06, "loss": 8.2561, "step": 265150 }, { "epoch": 0.535639976244056, "grad_norm": 349.0790710449219, "learning_rate": 5.343210703592604e-06, "loss": 19.9893, "step": 265160 }, { "epoch": 0.5356601768767397, "grad_norm": 677.7298583984375, "learning_rate": 5.342862460231448e-06, "loss": 18.7261, "step": 265170 }, { "epoch": 0.5356803775094235, "grad_norm": 477.18865966796875, "learning_rate": 5.34251421519922e-06, "loss": 33.3211, "step": 265180 }, { "epoch": 0.5357005781421074, "grad_norm": 416.8053283691406, "learning_rate": 5.3421659684976205e-06, "loss": 17.2127, "step": 265190 }, { "epoch": 0.5357207787747912, "grad_norm": 132.3507843017578, "learning_rate": 5.341817720128344e-06, "loss": 15.1434, "step": 265200 }, { "epoch": 0.535740979407475, "grad_norm": 1010.3002319335938, "learning_rate": 5.341469470093091e-06, "loss": 21.6635, "step": 265210 }, { "epoch": 0.5357611800401588, "grad_norm": 529.0772094726562, "learning_rate": 5.341121218393555e-06, "loss": 13.5647, "step": 265220 }, { "epoch": 0.5357813806728426, "grad_norm": 189.75848388671875, "learning_rate": 5.340772965031439e-06, "loss": 11.9191, "step": 265230 }, { "epoch": 0.5358015813055265, "grad_norm": 48.30891036987305, "learning_rate": 5.340424710008434e-06, "loss": 19.925, "step": 265240 }, { "epoch": 0.5358217819382103, "grad_norm": 179.47984313964844, "learning_rate": 5.3400764533262415e-06, "loss": 26.0865, "step": 265250 }, { "epoch": 0.5358419825708941, "grad_norm": 579.6912841796875, "learning_rate": 5.339728194986559e-06, "loss": 17.8074, "step": 265260 }, { "epoch": 0.5358621832035779, "grad_norm": 443.2523498535156, "learning_rate": 5.339379934991079e-06, "loss": 24.461, "step": 265270 }, { "epoch": 0.5358823838362617, "grad_norm": 655.2186279296875, "learning_rate": 5.339031673341505e-06, "loss": 20.1031, "step": 265280 }, { "epoch": 0.5359025844689456, "grad_norm": 189.1371307373047, "learning_rate": 5.338683410039529e-06, "loss": 23.1255, "step": 265290 }, { "epoch": 0.5359227851016294, "grad_norm": 243.77195739746094, "learning_rate": 5.338335145086855e-06, "loss": 17.1901, "step": 265300 }, { "epoch": 0.5359429857343132, "grad_norm": 265.5787353515625, "learning_rate": 5.337986878485174e-06, "loss": 23.4474, "step": 265310 }, { "epoch": 0.535963186366997, "grad_norm": 517.0961303710938, "learning_rate": 5.337638610236186e-06, "loss": 13.078, "step": 265320 }, { "epoch": 0.5359833869996808, "grad_norm": 487.6114501953125, "learning_rate": 5.3372903403415896e-06, "loss": 12.1002, "step": 265330 }, { "epoch": 0.5360035876323647, "grad_norm": 82.22420501708984, "learning_rate": 5.336942068803081e-06, "loss": 12.6738, "step": 265340 }, { "epoch": 0.5360237882650485, "grad_norm": 509.4190673828125, "learning_rate": 5.336593795622357e-06, "loss": 14.7771, "step": 265350 }, { "epoch": 0.5360439888977323, "grad_norm": 313.20086669921875, "learning_rate": 5.336245520801115e-06, "loss": 12.552, "step": 265360 }, { "epoch": 0.5360641895304161, "grad_norm": 1148.6875, "learning_rate": 5.335897244341054e-06, "loss": 34.6896, "step": 265370 }, { "epoch": 0.5360843901630999, "grad_norm": 690.5263671875, "learning_rate": 5.33554896624387e-06, "loss": 18.4221, "step": 265380 }, { "epoch": 0.5361045907957838, "grad_norm": 207.52447509765625, "learning_rate": 5.335200686511262e-06, "loss": 10.7602, "step": 265390 }, { "epoch": 0.5361247914284676, "grad_norm": 132.81065368652344, "learning_rate": 5.334852405144926e-06, "loss": 13.416, "step": 265400 }, { "epoch": 0.5361449920611514, "grad_norm": 482.5021667480469, "learning_rate": 5.3345041221465586e-06, "loss": 16.366, "step": 265410 }, { "epoch": 0.5361651926938352, "grad_norm": 504.22662353515625, "learning_rate": 5.33415583751786e-06, "loss": 27.1793, "step": 265420 }, { "epoch": 0.5361853933265189, "grad_norm": 259.0811462402344, "learning_rate": 5.333807551260526e-06, "loss": 21.8486, "step": 265430 }, { "epoch": 0.5362055939592028, "grad_norm": 437.5867919921875, "learning_rate": 5.333459263376256e-06, "loss": 11.8019, "step": 265440 }, { "epoch": 0.5362257945918866, "grad_norm": 261.80950927734375, "learning_rate": 5.333110973866743e-06, "loss": 22.4439, "step": 265450 }, { "epoch": 0.5362459952245704, "grad_norm": 339.9731750488281, "learning_rate": 5.3327626827336906e-06, "loss": 18.0988, "step": 265460 }, { "epoch": 0.5362661958572542, "grad_norm": 6.128870487213135, "learning_rate": 5.332414389978792e-06, "loss": 13.528, "step": 265470 }, { "epoch": 0.536286396489938, "grad_norm": 386.41497802734375, "learning_rate": 5.332066095603745e-06, "loss": 26.8685, "step": 265480 }, { "epoch": 0.5363065971226219, "grad_norm": 173.89610290527344, "learning_rate": 5.33171779961025e-06, "loss": 12.9385, "step": 265490 }, { "epoch": 0.5363267977553057, "grad_norm": 117.55145263671875, "learning_rate": 5.3313695020000026e-06, "loss": 34.7294, "step": 265500 }, { "epoch": 0.5363469983879895, "grad_norm": 658.5884399414062, "learning_rate": 5.331021202774699e-06, "loss": 16.3639, "step": 265510 }, { "epoch": 0.5363671990206733, "grad_norm": 597.7453002929688, "learning_rate": 5.330672901936038e-06, "loss": 17.0038, "step": 265520 }, { "epoch": 0.5363873996533571, "grad_norm": 363.2785339355469, "learning_rate": 5.330324599485718e-06, "loss": 16.9144, "step": 265530 }, { "epoch": 0.536407600286041, "grad_norm": 480.3471984863281, "learning_rate": 5.329976295425437e-06, "loss": 7.5248, "step": 265540 }, { "epoch": 0.5364278009187248, "grad_norm": 242.0186767578125, "learning_rate": 5.32962798975689e-06, "loss": 9.2614, "step": 265550 }, { "epoch": 0.5364480015514086, "grad_norm": 327.3664855957031, "learning_rate": 5.329279682481776e-06, "loss": 11.1113, "step": 265560 }, { "epoch": 0.5364682021840924, "grad_norm": 95.38156127929688, "learning_rate": 5.328931373601794e-06, "loss": 15.4326, "step": 265570 }, { "epoch": 0.5364884028167762, "grad_norm": 186.45181274414062, "learning_rate": 5.328583063118641e-06, "loss": 21.2168, "step": 265580 }, { "epoch": 0.53650860344946, "grad_norm": 360.18255615234375, "learning_rate": 5.328234751034011e-06, "loss": 18.9164, "step": 265590 }, { "epoch": 0.5365288040821439, "grad_norm": 646.5363159179688, "learning_rate": 5.327886437349609e-06, "loss": 19.7649, "step": 265600 }, { "epoch": 0.5365490047148277, "grad_norm": 1159.917236328125, "learning_rate": 5.327538122067124e-06, "loss": 19.1672, "step": 265610 }, { "epoch": 0.5365692053475115, "grad_norm": 368.328369140625, "learning_rate": 5.327189805188261e-06, "loss": 20.3391, "step": 265620 }, { "epoch": 0.5365894059801953, "grad_norm": 289.69073486328125, "learning_rate": 5.326841486714713e-06, "loss": 21.6539, "step": 265630 }, { "epoch": 0.5366096066128792, "grad_norm": 823.12158203125, "learning_rate": 5.326493166648179e-06, "loss": 27.7374, "step": 265640 }, { "epoch": 0.536629807245563, "grad_norm": 100.33020782470703, "learning_rate": 5.326144844990357e-06, "loss": 26.8109, "step": 265650 }, { "epoch": 0.5366500078782468, "grad_norm": 919.996337890625, "learning_rate": 5.3257965217429465e-06, "loss": 14.6398, "step": 265660 }, { "epoch": 0.5366702085109306, "grad_norm": 547.6209716796875, "learning_rate": 5.325448196907642e-06, "loss": 20.7509, "step": 265670 }, { "epoch": 0.5366904091436144, "grad_norm": 66.95198822021484, "learning_rate": 5.3250998704861425e-06, "loss": 17.0755, "step": 265680 }, { "epoch": 0.5367106097762981, "grad_norm": 407.3754577636719, "learning_rate": 5.324751542480144e-06, "loss": 23.6862, "step": 265690 }, { "epoch": 0.536730810408982, "grad_norm": 219.86447143554688, "learning_rate": 5.3244032128913485e-06, "loss": 23.5132, "step": 265700 }, { "epoch": 0.5367510110416658, "grad_norm": 139.1474151611328, "learning_rate": 5.32405488172145e-06, "loss": 24.8371, "step": 265710 }, { "epoch": 0.5367712116743496, "grad_norm": 184.0261993408203, "learning_rate": 5.3237065489721465e-06, "loss": 19.828, "step": 265720 }, { "epoch": 0.5367914123070334, "grad_norm": 397.114013671875, "learning_rate": 5.3233582146451375e-06, "loss": 36.484, "step": 265730 }, { "epoch": 0.5368116129397172, "grad_norm": 935.712890625, "learning_rate": 5.323009878742119e-06, "loss": 18.6447, "step": 265740 }, { "epoch": 0.5368318135724011, "grad_norm": 311.7372741699219, "learning_rate": 5.322661541264791e-06, "loss": 31.576, "step": 265750 }, { "epoch": 0.5368520142050849, "grad_norm": 748.21142578125, "learning_rate": 5.322313202214848e-06, "loss": 28.1338, "step": 265760 }, { "epoch": 0.5368722148377687, "grad_norm": 348.9870300292969, "learning_rate": 5.32196486159399e-06, "loss": 12.2605, "step": 265770 }, { "epoch": 0.5368924154704525, "grad_norm": 197.72886657714844, "learning_rate": 5.321616519403916e-06, "loss": 20.8514, "step": 265780 }, { "epoch": 0.5369126161031363, "grad_norm": 5.432241439819336, "learning_rate": 5.3212681756463205e-06, "loss": 22.8081, "step": 265790 }, { "epoch": 0.5369328167358202, "grad_norm": 96.3584213256836, "learning_rate": 5.320919830322903e-06, "loss": 13.6813, "step": 265800 }, { "epoch": 0.536953017368504, "grad_norm": 199.02757263183594, "learning_rate": 5.320571483435362e-06, "loss": 17.0379, "step": 265810 }, { "epoch": 0.5369732180011878, "grad_norm": 302.49151611328125, "learning_rate": 5.320223134985393e-06, "loss": 17.1833, "step": 265820 }, { "epoch": 0.5369934186338716, "grad_norm": 193.92271423339844, "learning_rate": 5.319874784974696e-06, "loss": 14.019, "step": 265830 }, { "epoch": 0.5370136192665554, "grad_norm": 475.6900329589844, "learning_rate": 5.319526433404969e-06, "loss": 20.0408, "step": 265840 }, { "epoch": 0.5370338198992393, "grad_norm": 348.04736328125, "learning_rate": 5.319178080277908e-06, "loss": 15.6812, "step": 265850 }, { "epoch": 0.5370540205319231, "grad_norm": 248.185302734375, "learning_rate": 5.318829725595212e-06, "loss": 6.4697, "step": 265860 }, { "epoch": 0.5370742211646069, "grad_norm": 264.96893310546875, "learning_rate": 5.318481369358579e-06, "loss": 10.0228, "step": 265870 }, { "epoch": 0.5370944217972907, "grad_norm": 385.7796630859375, "learning_rate": 5.318133011569704e-06, "loss": 22.5801, "step": 265880 }, { "epoch": 0.5371146224299745, "grad_norm": 699.6243286132812, "learning_rate": 5.31778465223029e-06, "loss": 26.2126, "step": 265890 }, { "epoch": 0.5371348230626584, "grad_norm": 434.1804504394531, "learning_rate": 5.317436291342031e-06, "loss": 23.8011, "step": 265900 }, { "epoch": 0.5371550236953422, "grad_norm": 483.005615234375, "learning_rate": 5.3170879289066265e-06, "loss": 23.9942, "step": 265910 }, { "epoch": 0.537175224328026, "grad_norm": 495.5999450683594, "learning_rate": 5.316739564925773e-06, "loss": 16.2944, "step": 265920 }, { "epoch": 0.5371954249607098, "grad_norm": 310.24444580078125, "learning_rate": 5.3163911994011705e-06, "loss": 45.5477, "step": 265930 }, { "epoch": 0.5372156255933935, "grad_norm": 141.1289520263672, "learning_rate": 5.316042832334516e-06, "loss": 11.9408, "step": 265940 }, { "epoch": 0.5372358262260774, "grad_norm": 497.3875427246094, "learning_rate": 5.315694463727506e-06, "loss": 17.9419, "step": 265950 }, { "epoch": 0.5372560268587612, "grad_norm": 814.2578735351562, "learning_rate": 5.3153460935818405e-06, "loss": 22.0574, "step": 265960 }, { "epoch": 0.537276227491445, "grad_norm": 87.76453399658203, "learning_rate": 5.314997721899214e-06, "loss": 19.425, "step": 265970 }, { "epoch": 0.5372964281241288, "grad_norm": 379.69281005859375, "learning_rate": 5.3146493486813285e-06, "loss": 35.2839, "step": 265980 }, { "epoch": 0.5373166287568126, "grad_norm": 507.013671875, "learning_rate": 5.31430097392988e-06, "loss": 24.9944, "step": 265990 }, { "epoch": 0.5373368293894965, "grad_norm": 222.95751953125, "learning_rate": 5.3139525976465675e-06, "loss": 21.1369, "step": 266000 }, { "epoch": 0.5373570300221803, "grad_norm": 295.1164245605469, "learning_rate": 5.313604219833087e-06, "loss": 18.3363, "step": 266010 }, { "epoch": 0.5373772306548641, "grad_norm": 584.6785278320312, "learning_rate": 5.313255840491138e-06, "loss": 24.1707, "step": 266020 }, { "epoch": 0.5373974312875479, "grad_norm": 1128.349365234375, "learning_rate": 5.312907459622418e-06, "loss": 39.631, "step": 266030 }, { "epoch": 0.5374176319202317, "grad_norm": 30.17184829711914, "learning_rate": 5.3125590772286255e-06, "loss": 23.6821, "step": 266040 }, { "epoch": 0.5374378325529156, "grad_norm": 455.755615234375, "learning_rate": 5.312210693311458e-06, "loss": 28.8897, "step": 266050 }, { "epoch": 0.5374580331855994, "grad_norm": 327.5693664550781, "learning_rate": 5.311862307872611e-06, "loss": 30.3681, "step": 266060 }, { "epoch": 0.5374782338182832, "grad_norm": 410.9100646972656, "learning_rate": 5.311513920913789e-06, "loss": 15.8005, "step": 266070 }, { "epoch": 0.537498434450967, "grad_norm": 311.3714599609375, "learning_rate": 5.311165532436683e-06, "loss": 28.2955, "step": 266080 }, { "epoch": 0.5375186350836508, "grad_norm": 184.61801147460938, "learning_rate": 5.310817142442995e-06, "loss": 9.4785, "step": 266090 }, { "epoch": 0.5375388357163347, "grad_norm": 302.0754699707031, "learning_rate": 5.310468750934421e-06, "loss": 12.7787, "step": 266100 }, { "epoch": 0.5375590363490185, "grad_norm": 229.22055053710938, "learning_rate": 5.310120357912661e-06, "loss": 16.4678, "step": 266110 }, { "epoch": 0.5375792369817023, "grad_norm": 55.7581787109375, "learning_rate": 5.309771963379412e-06, "loss": 19.0601, "step": 266120 }, { "epoch": 0.5375994376143861, "grad_norm": 637.02734375, "learning_rate": 5.309423567336371e-06, "loss": 21.7023, "step": 266130 }, { "epoch": 0.5376196382470699, "grad_norm": 1429.7181396484375, "learning_rate": 5.309075169785238e-06, "loss": 47.7239, "step": 266140 }, { "epoch": 0.5376398388797538, "grad_norm": 313.2904968261719, "learning_rate": 5.30872677072771e-06, "loss": 12.5589, "step": 266150 }, { "epoch": 0.5376600395124376, "grad_norm": 704.8517456054688, "learning_rate": 5.308378370165486e-06, "loss": 36.1345, "step": 266160 }, { "epoch": 0.5376802401451214, "grad_norm": 615.5545043945312, "learning_rate": 5.308029968100261e-06, "loss": 17.5777, "step": 266170 }, { "epoch": 0.5377004407778052, "grad_norm": 398.2856140136719, "learning_rate": 5.307681564533736e-06, "loss": 30.5398, "step": 266180 }, { "epoch": 0.537720641410489, "grad_norm": 374.3896179199219, "learning_rate": 5.307333159467609e-06, "loss": 14.7331, "step": 266190 }, { "epoch": 0.5377408420431727, "grad_norm": 242.48318481445312, "learning_rate": 5.306984752903578e-06, "loss": 21.0312, "step": 266200 }, { "epoch": 0.5377610426758566, "grad_norm": 848.95849609375, "learning_rate": 5.3066363448433414e-06, "loss": 15.3074, "step": 266210 }, { "epoch": 0.5377812433085404, "grad_norm": 334.0947265625, "learning_rate": 5.306287935288593e-06, "loss": 31.8886, "step": 266220 }, { "epoch": 0.5378014439412242, "grad_norm": 628.0471801757812, "learning_rate": 5.305939524241037e-06, "loss": 22.4649, "step": 266230 }, { "epoch": 0.537821644573908, "grad_norm": 388.18621826171875, "learning_rate": 5.305591111702368e-06, "loss": 35.5274, "step": 266240 }, { "epoch": 0.5378418452065918, "grad_norm": 321.6109619140625, "learning_rate": 5.3052426976742855e-06, "loss": 14.3846, "step": 266250 }, { "epoch": 0.5378620458392757, "grad_norm": 4.319701671600342, "learning_rate": 5.304894282158486e-06, "loss": 13.5861, "step": 266260 }, { "epoch": 0.5378822464719595, "grad_norm": 72.18042755126953, "learning_rate": 5.304545865156669e-06, "loss": 22.5068, "step": 266270 }, { "epoch": 0.5379024471046433, "grad_norm": 151.73434448242188, "learning_rate": 5.3041974466705335e-06, "loss": 24.1643, "step": 266280 }, { "epoch": 0.5379226477373271, "grad_norm": 489.5429992675781, "learning_rate": 5.303849026701776e-06, "loss": 15.1565, "step": 266290 }, { "epoch": 0.5379428483700109, "grad_norm": 852.3153076171875, "learning_rate": 5.303500605252095e-06, "loss": 18.5286, "step": 266300 }, { "epoch": 0.5379630490026948, "grad_norm": 391.2786865234375, "learning_rate": 5.303152182323189e-06, "loss": 14.128, "step": 266310 }, { "epoch": 0.5379832496353786, "grad_norm": 1297.095947265625, "learning_rate": 5.302803757916757e-06, "loss": 22.1098, "step": 266320 }, { "epoch": 0.5380034502680624, "grad_norm": 126.69831085205078, "learning_rate": 5.302455332034494e-06, "loss": 20.9619, "step": 266330 }, { "epoch": 0.5380236509007462, "grad_norm": 473.3225402832031, "learning_rate": 5.3021069046781025e-06, "loss": 18.761, "step": 266340 }, { "epoch": 0.53804385153343, "grad_norm": 582.2522583007812, "learning_rate": 5.301758475849278e-06, "loss": 10.9951, "step": 266350 }, { "epoch": 0.5380640521661139, "grad_norm": 348.973876953125, "learning_rate": 5.301410045549719e-06, "loss": 22.3628, "step": 266360 }, { "epoch": 0.5380842527987977, "grad_norm": 660.4752807617188, "learning_rate": 5.301061613781123e-06, "loss": 17.2147, "step": 266370 }, { "epoch": 0.5381044534314815, "grad_norm": 382.2339782714844, "learning_rate": 5.300713180545189e-06, "loss": 17.9901, "step": 266380 }, { "epoch": 0.5381246540641653, "grad_norm": 213.0685577392578, "learning_rate": 5.300364745843618e-06, "loss": 20.0158, "step": 266390 }, { "epoch": 0.5381448546968491, "grad_norm": 420.6485290527344, "learning_rate": 5.300016309678104e-06, "loss": 24.067, "step": 266400 }, { "epoch": 0.538165055329533, "grad_norm": 139.35597229003906, "learning_rate": 5.299667872050348e-06, "loss": 45.067, "step": 266410 }, { "epoch": 0.5381852559622168, "grad_norm": 586.6095581054688, "learning_rate": 5.299319432962046e-06, "loss": 22.0159, "step": 266420 }, { "epoch": 0.5382054565949006, "grad_norm": 281.5572814941406, "learning_rate": 5.298970992414897e-06, "loss": 17.244, "step": 266430 }, { "epoch": 0.5382256572275844, "grad_norm": 395.41571044921875, "learning_rate": 5.2986225504106e-06, "loss": 12.9337, "step": 266440 }, { "epoch": 0.5382458578602681, "grad_norm": 293.61102294921875, "learning_rate": 5.298274106950855e-06, "loss": 14.9702, "step": 266450 }, { "epoch": 0.538266058492952, "grad_norm": 419.4900817871094, "learning_rate": 5.297925662037356e-06, "loss": 6.5225, "step": 266460 }, { "epoch": 0.5382862591256358, "grad_norm": 711.2694091796875, "learning_rate": 5.297577215671803e-06, "loss": 17.3594, "step": 266470 }, { "epoch": 0.5383064597583196, "grad_norm": 271.6884460449219, "learning_rate": 5.297228767855898e-06, "loss": 15.5889, "step": 266480 }, { "epoch": 0.5383266603910034, "grad_norm": 25.23544692993164, "learning_rate": 5.296880318591331e-06, "loss": 32.5112, "step": 266490 }, { "epoch": 0.5383468610236872, "grad_norm": 230.8773193359375, "learning_rate": 5.296531867879809e-06, "loss": 11.6164, "step": 266500 }, { "epoch": 0.538367061656371, "grad_norm": 308.32122802734375, "learning_rate": 5.296183415723024e-06, "loss": 21.5435, "step": 266510 }, { "epoch": 0.5383872622890549, "grad_norm": 521.9244384765625, "learning_rate": 5.29583496212268e-06, "loss": 19.3713, "step": 266520 }, { "epoch": 0.5384074629217387, "grad_norm": 428.1097717285156, "learning_rate": 5.2954865070804705e-06, "loss": 11.197, "step": 266530 }, { "epoch": 0.5384276635544225, "grad_norm": 313.6487731933594, "learning_rate": 5.295138050598097e-06, "loss": 17.1205, "step": 266540 }, { "epoch": 0.5384478641871063, "grad_norm": 500.1407470703125, "learning_rate": 5.294789592677255e-06, "loss": 19.4713, "step": 266550 }, { "epoch": 0.5384680648197901, "grad_norm": 61.38530349731445, "learning_rate": 5.2944411333196445e-06, "loss": 12.1264, "step": 266560 }, { "epoch": 0.538488265452474, "grad_norm": 93.75505065917969, "learning_rate": 5.294092672526963e-06, "loss": 18.3813, "step": 266570 }, { "epoch": 0.5385084660851578, "grad_norm": 318.5310363769531, "learning_rate": 5.293744210300911e-06, "loss": 13.4829, "step": 266580 }, { "epoch": 0.5385286667178416, "grad_norm": 590.1112060546875, "learning_rate": 5.293395746643184e-06, "loss": 17.2738, "step": 266590 }, { "epoch": 0.5385488673505254, "grad_norm": 513.4954223632812, "learning_rate": 5.293047281555482e-06, "loss": 32.3895, "step": 266600 }, { "epoch": 0.5385690679832092, "grad_norm": 461.12060546875, "learning_rate": 5.292698815039505e-06, "loss": 12.3369, "step": 266610 }, { "epoch": 0.5385892686158931, "grad_norm": 617.8843994140625, "learning_rate": 5.292350347096949e-06, "loss": 15.8319, "step": 266620 }, { "epoch": 0.5386094692485769, "grad_norm": 57.038421630859375, "learning_rate": 5.29200187772951e-06, "loss": 13.7819, "step": 266630 }, { "epoch": 0.5386296698812607, "grad_norm": 592.28466796875, "learning_rate": 5.291653406938892e-06, "loss": 24.1666, "step": 266640 }, { "epoch": 0.5386498705139445, "grad_norm": 262.87371826171875, "learning_rate": 5.291304934726789e-06, "loss": 18.2219, "step": 266650 }, { "epoch": 0.5386700711466283, "grad_norm": 327.9078369140625, "learning_rate": 5.290956461094903e-06, "loss": 25.113, "step": 266660 }, { "epoch": 0.5386902717793122, "grad_norm": 344.01123046875, "learning_rate": 5.290607986044928e-06, "loss": 43.9331, "step": 266670 }, { "epoch": 0.538710472411996, "grad_norm": 587.4368896484375, "learning_rate": 5.290259509578567e-06, "loss": 19.13, "step": 266680 }, { "epoch": 0.5387306730446798, "grad_norm": 274.8553466796875, "learning_rate": 5.289911031697515e-06, "loss": 13.857, "step": 266690 }, { "epoch": 0.5387508736773636, "grad_norm": 161.65403747558594, "learning_rate": 5.289562552403472e-06, "loss": 15.1486, "step": 266700 }, { "epoch": 0.5387710743100473, "grad_norm": 561.6729736328125, "learning_rate": 5.289214071698138e-06, "loss": 13.5816, "step": 266710 }, { "epoch": 0.5387912749427312, "grad_norm": 635.9703369140625, "learning_rate": 5.2888655895832075e-06, "loss": 27.2917, "step": 266720 }, { "epoch": 0.538811475575415, "grad_norm": 506.6813659667969, "learning_rate": 5.288517106060383e-06, "loss": 13.8341, "step": 266730 }, { "epoch": 0.5388316762080988, "grad_norm": 993.2285766601562, "learning_rate": 5.288168621131359e-06, "loss": 21.6998, "step": 266740 }, { "epoch": 0.5388518768407826, "grad_norm": 94.2969741821289, "learning_rate": 5.287820134797837e-06, "loss": 14.8746, "step": 266750 }, { "epoch": 0.5388720774734664, "grad_norm": 326.3339538574219, "learning_rate": 5.287471647061515e-06, "loss": 18.561, "step": 266760 }, { "epoch": 0.5388922781061503, "grad_norm": 454.34283447265625, "learning_rate": 5.2871231579240916e-06, "loss": 14.6379, "step": 266770 }, { "epoch": 0.5389124787388341, "grad_norm": 159.68142700195312, "learning_rate": 5.286774667387262e-06, "loss": 18.4233, "step": 266780 }, { "epoch": 0.5389326793715179, "grad_norm": 461.82659912109375, "learning_rate": 5.28642617545273e-06, "loss": 12.2659, "step": 266790 }, { "epoch": 0.5389528800042017, "grad_norm": 282.8290710449219, "learning_rate": 5.2860776821221915e-06, "loss": 9.7265, "step": 266800 }, { "epoch": 0.5389730806368855, "grad_norm": 1727.9700927734375, "learning_rate": 5.285729187397344e-06, "loss": 27.368, "step": 266810 }, { "epoch": 0.5389932812695694, "grad_norm": 12628.2509765625, "learning_rate": 5.285380691279889e-06, "loss": 27.0251, "step": 266820 }, { "epoch": 0.5390134819022532, "grad_norm": 397.4987487792969, "learning_rate": 5.2850321937715195e-06, "loss": 8.3365, "step": 266830 }, { "epoch": 0.539033682534937, "grad_norm": 37.92121505737305, "learning_rate": 5.284683694873941e-06, "loss": 16.2793, "step": 266840 }, { "epoch": 0.5390538831676208, "grad_norm": 811.298095703125, "learning_rate": 5.284335194588848e-06, "loss": 32.7642, "step": 266850 }, { "epoch": 0.5390740838003046, "grad_norm": 731.6546020507812, "learning_rate": 5.28398669291794e-06, "loss": 26.945, "step": 266860 }, { "epoch": 0.5390942844329885, "grad_norm": 415.3759460449219, "learning_rate": 5.283638189862914e-06, "loss": 21.0374, "step": 266870 }, { "epoch": 0.5391144850656723, "grad_norm": 83.38080596923828, "learning_rate": 5.28328968542547e-06, "loss": 17.559, "step": 266880 }, { "epoch": 0.5391346856983561, "grad_norm": 6.075273036956787, "learning_rate": 5.2829411796073085e-06, "loss": 16.2683, "step": 266890 }, { "epoch": 0.5391548863310399, "grad_norm": 128.59246826171875, "learning_rate": 5.282592672410124e-06, "loss": 12.9341, "step": 266900 }, { "epoch": 0.5391750869637237, "grad_norm": 199.89334106445312, "learning_rate": 5.282244163835617e-06, "loss": 18.6665, "step": 266910 }, { "epoch": 0.5391952875964076, "grad_norm": 18.035985946655273, "learning_rate": 5.281895653885486e-06, "loss": 36.1795, "step": 266920 }, { "epoch": 0.5392154882290914, "grad_norm": 919.3248901367188, "learning_rate": 5.2815471425614315e-06, "loss": 24.2861, "step": 266930 }, { "epoch": 0.5392356888617752, "grad_norm": 145.3681182861328, "learning_rate": 5.281198629865149e-06, "loss": 23.8972, "step": 266940 }, { "epoch": 0.539255889494459, "grad_norm": 48.641456604003906, "learning_rate": 5.28085011579834e-06, "loss": 26.0535, "step": 266950 }, { "epoch": 0.5392760901271428, "grad_norm": 470.5023498535156, "learning_rate": 5.2805016003627e-06, "loss": 19.4136, "step": 266960 }, { "epoch": 0.5392962907598265, "grad_norm": 451.8315734863281, "learning_rate": 5.2801530835599295e-06, "loss": 18.7958, "step": 266970 }, { "epoch": 0.5393164913925104, "grad_norm": 201.1184844970703, "learning_rate": 5.2798045653917275e-06, "loss": 8.757, "step": 266980 }, { "epoch": 0.5393366920251942, "grad_norm": 233.8871307373047, "learning_rate": 5.27945604585979e-06, "loss": 18.3737, "step": 266990 }, { "epoch": 0.539356892657878, "grad_norm": 332.1441955566406, "learning_rate": 5.27910752496582e-06, "loss": 14.6293, "step": 267000 }, { "epoch": 0.5393770932905618, "grad_norm": 530.1821899414062, "learning_rate": 5.278759002711513e-06, "loss": 26.4881, "step": 267010 }, { "epoch": 0.5393972939232456, "grad_norm": 262.960693359375, "learning_rate": 5.278410479098568e-06, "loss": 29.0102, "step": 267020 }, { "epoch": 0.5394174945559295, "grad_norm": 546.9447021484375, "learning_rate": 5.278061954128684e-06, "loss": 24.2532, "step": 267030 }, { "epoch": 0.5394376951886133, "grad_norm": 283.64508056640625, "learning_rate": 5.27771342780356e-06, "loss": 20.5688, "step": 267040 }, { "epoch": 0.5394578958212971, "grad_norm": 308.6816101074219, "learning_rate": 5.2773649001248965e-06, "loss": 19.5089, "step": 267050 }, { "epoch": 0.5394780964539809, "grad_norm": 499.01544189453125, "learning_rate": 5.277016371094388e-06, "loss": 13.3796, "step": 267060 }, { "epoch": 0.5394982970866647, "grad_norm": 1139.388427734375, "learning_rate": 5.276667840713735e-06, "loss": 19.0707, "step": 267070 }, { "epoch": 0.5395184977193486, "grad_norm": 236.742431640625, "learning_rate": 5.276319308984637e-06, "loss": 39.8516, "step": 267080 }, { "epoch": 0.5395386983520324, "grad_norm": 527.2527465820312, "learning_rate": 5.275970775908793e-06, "loss": 16.3725, "step": 267090 }, { "epoch": 0.5395588989847162, "grad_norm": 308.12030029296875, "learning_rate": 5.275622241487899e-06, "loss": 17.6134, "step": 267100 }, { "epoch": 0.5395790996174, "grad_norm": 303.1805725097656, "learning_rate": 5.275273705723657e-06, "loss": 7.6083, "step": 267110 }, { "epoch": 0.5395993002500838, "grad_norm": 1047.5565185546875, "learning_rate": 5.274925168617763e-06, "loss": 31.721, "step": 267120 }, { "epoch": 0.5396195008827677, "grad_norm": 816.3365478515625, "learning_rate": 5.274576630171919e-06, "loss": 25.0034, "step": 267130 }, { "epoch": 0.5396397015154515, "grad_norm": 320.99114990234375, "learning_rate": 5.274228090387821e-06, "loss": 30.0734, "step": 267140 }, { "epoch": 0.5396599021481353, "grad_norm": 137.37452697753906, "learning_rate": 5.273879549267168e-06, "loss": 24.587, "step": 267150 }, { "epoch": 0.5396801027808191, "grad_norm": 1203.2591552734375, "learning_rate": 5.2735310068116605e-06, "loss": 33.0592, "step": 267160 }, { "epoch": 0.539700303413503, "grad_norm": 397.1142578125, "learning_rate": 5.2731824630229955e-06, "loss": 26.8125, "step": 267170 }, { "epoch": 0.5397205040461868, "grad_norm": 462.3182067871094, "learning_rate": 5.272833917902872e-06, "loss": 22.8828, "step": 267180 }, { "epoch": 0.5397407046788706, "grad_norm": 593.2162475585938, "learning_rate": 5.27248537145299e-06, "loss": 18.0968, "step": 267190 }, { "epoch": 0.5397609053115544, "grad_norm": 804.9915161132812, "learning_rate": 5.272136823675046e-06, "loss": 33.5634, "step": 267200 }, { "epoch": 0.5397811059442382, "grad_norm": 453.7289123535156, "learning_rate": 5.271788274570741e-06, "loss": 11.2804, "step": 267210 }, { "epoch": 0.5398013065769219, "grad_norm": 412.3597717285156, "learning_rate": 5.2714397241417736e-06, "loss": 14.8988, "step": 267220 }, { "epoch": 0.5398215072096058, "grad_norm": 485.5625915527344, "learning_rate": 5.271091172389841e-06, "loss": 23.9658, "step": 267230 }, { "epoch": 0.5398417078422896, "grad_norm": 442.7439270019531, "learning_rate": 5.270742619316642e-06, "loss": 26.0454, "step": 267240 }, { "epoch": 0.5398619084749734, "grad_norm": 297.7681579589844, "learning_rate": 5.270394064923878e-06, "loss": 17.0849, "step": 267250 }, { "epoch": 0.5398821091076572, "grad_norm": 182.657958984375, "learning_rate": 5.270045509213244e-06, "loss": 17.654, "step": 267260 }, { "epoch": 0.539902309740341, "grad_norm": 176.88636779785156, "learning_rate": 5.2696969521864435e-06, "loss": 22.5983, "step": 267270 }, { "epoch": 0.5399225103730249, "grad_norm": 457.3097839355469, "learning_rate": 5.2693483938451705e-06, "loss": 12.3397, "step": 267280 }, { "epoch": 0.5399427110057087, "grad_norm": 396.48687744140625, "learning_rate": 5.268999834191128e-06, "loss": 18.101, "step": 267290 }, { "epoch": 0.5399629116383925, "grad_norm": 133.23741149902344, "learning_rate": 5.268651273226011e-06, "loss": 16.3219, "step": 267300 }, { "epoch": 0.5399831122710763, "grad_norm": 331.26904296875, "learning_rate": 5.268302710951522e-06, "loss": 16.586, "step": 267310 }, { "epoch": 0.5400033129037601, "grad_norm": 266.48883056640625, "learning_rate": 5.267954147369359e-06, "loss": 44.3651, "step": 267320 }, { "epoch": 0.540023513536444, "grad_norm": 179.49365234375, "learning_rate": 5.267605582481216e-06, "loss": 36.2442, "step": 267330 }, { "epoch": 0.5400437141691278, "grad_norm": 267.8640441894531, "learning_rate": 5.2672570162887996e-06, "loss": 33.6776, "step": 267340 }, { "epoch": 0.5400639148018116, "grad_norm": 326.7994079589844, "learning_rate": 5.2669084487938025e-06, "loss": 16.1556, "step": 267350 }, { "epoch": 0.5400841154344954, "grad_norm": 774.1517333984375, "learning_rate": 5.266559879997928e-06, "loss": 28.1933, "step": 267360 }, { "epoch": 0.5401043160671792, "grad_norm": 384.9989318847656, "learning_rate": 5.266211309902871e-06, "loss": 11.1729, "step": 267370 }, { "epoch": 0.540124516699863, "grad_norm": 370.0032653808594, "learning_rate": 5.265862738510335e-06, "loss": 24.5614, "step": 267380 }, { "epoch": 0.5401447173325469, "grad_norm": 230.2034454345703, "learning_rate": 5.265514165822014e-06, "loss": 18.2394, "step": 267390 }, { "epoch": 0.5401649179652307, "grad_norm": 593.0401000976562, "learning_rate": 5.26516559183961e-06, "loss": 22.137, "step": 267400 }, { "epoch": 0.5401851185979145, "grad_norm": 378.4549560546875, "learning_rate": 5.26481701656482e-06, "loss": 14.3919, "step": 267410 }, { "epoch": 0.5402053192305983, "grad_norm": 608.0015869140625, "learning_rate": 5.264468439999345e-06, "loss": 14.4138, "step": 267420 }, { "epoch": 0.5402255198632822, "grad_norm": 188.7982940673828, "learning_rate": 5.2641198621448845e-06, "loss": 24.5316, "step": 267430 }, { "epoch": 0.540245720495966, "grad_norm": 646.892822265625, "learning_rate": 5.263771283003133e-06, "loss": 30.7849, "step": 267440 }, { "epoch": 0.5402659211286498, "grad_norm": 83.54496765136719, "learning_rate": 5.263422702575793e-06, "loss": 17.6118, "step": 267450 }, { "epoch": 0.5402861217613336, "grad_norm": 192.852783203125, "learning_rate": 5.263074120864564e-06, "loss": 15.532, "step": 267460 }, { "epoch": 0.5403063223940174, "grad_norm": 295.820556640625, "learning_rate": 5.2627255378711414e-06, "loss": 16.4569, "step": 267470 }, { "epoch": 0.5403265230267011, "grad_norm": 378.82110595703125, "learning_rate": 5.262376953597228e-06, "loss": 11.037, "step": 267480 }, { "epoch": 0.540346723659385, "grad_norm": 229.91859436035156, "learning_rate": 5.26202836804452e-06, "loss": 10.3388, "step": 267490 }, { "epoch": 0.5403669242920688, "grad_norm": 309.54864501953125, "learning_rate": 5.2616797812147205e-06, "loss": 26.4148, "step": 267500 }, { "epoch": 0.5403871249247526, "grad_norm": 227.2258758544922, "learning_rate": 5.261331193109524e-06, "loss": 14.9826, "step": 267510 }, { "epoch": 0.5404073255574364, "grad_norm": 405.8484191894531, "learning_rate": 5.260982603730629e-06, "loss": 32.9173, "step": 267520 }, { "epoch": 0.5404275261901202, "grad_norm": 359.85107421875, "learning_rate": 5.260634013079738e-06, "loss": 29.7109, "step": 267530 }, { "epoch": 0.5404477268228041, "grad_norm": 280.190673828125, "learning_rate": 5.260285421158548e-06, "loss": 13.0215, "step": 267540 }, { "epoch": 0.5404679274554879, "grad_norm": 438.93182373046875, "learning_rate": 5.259936827968758e-06, "loss": 15.9954, "step": 267550 }, { "epoch": 0.5404881280881717, "grad_norm": 421.66094970703125, "learning_rate": 5.25958823351207e-06, "loss": 14.2632, "step": 267560 }, { "epoch": 0.5405083287208555, "grad_norm": 798.4306640625, "learning_rate": 5.259239637790178e-06, "loss": 26.1816, "step": 267570 }, { "epoch": 0.5405285293535393, "grad_norm": 48.887691497802734, "learning_rate": 5.258891040804783e-06, "loss": 15.2979, "step": 267580 }, { "epoch": 0.5405487299862232, "grad_norm": 1093.5631103515625, "learning_rate": 5.258542442557586e-06, "loss": 40.7765, "step": 267590 }, { "epoch": 0.540568930618907, "grad_norm": 53.55421447753906, "learning_rate": 5.258193843050283e-06, "loss": 23.1957, "step": 267600 }, { "epoch": 0.5405891312515908, "grad_norm": 135.53494262695312, "learning_rate": 5.257845242284576e-06, "loss": 20.9141, "step": 267610 }, { "epoch": 0.5406093318842746, "grad_norm": 1050.6634521484375, "learning_rate": 5.2574966402621615e-06, "loss": 30.1993, "step": 267620 }, { "epoch": 0.5406295325169584, "grad_norm": 712.6954345703125, "learning_rate": 5.25714803698474e-06, "loss": 21.2461, "step": 267630 }, { "epoch": 0.5406497331496423, "grad_norm": 475.6708679199219, "learning_rate": 5.25679943245401e-06, "loss": 19.1392, "step": 267640 }, { "epoch": 0.5406699337823261, "grad_norm": 401.18487548828125, "learning_rate": 5.256450826671671e-06, "loss": 18.4018, "step": 267650 }, { "epoch": 0.5406901344150099, "grad_norm": 669.2423095703125, "learning_rate": 5.256102219639423e-06, "loss": 23.2288, "step": 267660 }, { "epoch": 0.5407103350476937, "grad_norm": 739.7628784179688, "learning_rate": 5.2557536113589625e-06, "loss": 18.7592, "step": 267670 }, { "epoch": 0.5407305356803775, "grad_norm": 198.78236389160156, "learning_rate": 5.25540500183199e-06, "loss": 22.221, "step": 267680 }, { "epoch": 0.5407507363130614, "grad_norm": 156.10227966308594, "learning_rate": 5.2550563910602035e-06, "loss": 12.436, "step": 267690 }, { "epoch": 0.5407709369457452, "grad_norm": 215.703369140625, "learning_rate": 5.254707779045305e-06, "loss": 18.5177, "step": 267700 }, { "epoch": 0.540791137578429, "grad_norm": 460.5707702636719, "learning_rate": 5.25435916578899e-06, "loss": 21.5672, "step": 267710 }, { "epoch": 0.5408113382111128, "grad_norm": 178.6480255126953, "learning_rate": 5.254010551292961e-06, "loss": 25.5017, "step": 267720 }, { "epoch": 0.5408315388437965, "grad_norm": 196.70570373535156, "learning_rate": 5.253661935558914e-06, "loss": 15.2379, "step": 267730 }, { "epoch": 0.5408517394764804, "grad_norm": 361.7843017578125, "learning_rate": 5.25331331858855e-06, "loss": 11.7066, "step": 267740 }, { "epoch": 0.5408719401091642, "grad_norm": 279.7679748535156, "learning_rate": 5.252964700383567e-06, "loss": 13.664, "step": 267750 }, { "epoch": 0.540892140741848, "grad_norm": 196.13653564453125, "learning_rate": 5.252616080945665e-06, "loss": 14.2319, "step": 267760 }, { "epoch": 0.5409123413745318, "grad_norm": 273.5278015136719, "learning_rate": 5.252267460276544e-06, "loss": 33.3677, "step": 267770 }, { "epoch": 0.5409325420072156, "grad_norm": 220.14305114746094, "learning_rate": 5.2519188383779e-06, "loss": 11.0212, "step": 267780 }, { "epoch": 0.5409527426398995, "grad_norm": 286.4742431640625, "learning_rate": 5.251570215251436e-06, "loss": 22.6878, "step": 267790 }, { "epoch": 0.5409729432725833, "grad_norm": 8.339970588684082, "learning_rate": 5.251221590898848e-06, "loss": 22.2532, "step": 267800 }, { "epoch": 0.5409931439052671, "grad_norm": 316.2557067871094, "learning_rate": 5.250872965321837e-06, "loss": 18.4021, "step": 267810 }, { "epoch": 0.5410133445379509, "grad_norm": 287.31121826171875, "learning_rate": 5.250524338522102e-06, "loss": 13.7786, "step": 267820 }, { "epoch": 0.5410335451706347, "grad_norm": 74.9324722290039, "learning_rate": 5.250175710501342e-06, "loss": 9.5435, "step": 267830 }, { "epoch": 0.5410537458033186, "grad_norm": 336.6729736328125, "learning_rate": 5.249827081261255e-06, "loss": 15.7134, "step": 267840 }, { "epoch": 0.5410739464360024, "grad_norm": 230.1107635498047, "learning_rate": 5.249478450803541e-06, "loss": 19.7327, "step": 267850 }, { "epoch": 0.5410941470686862, "grad_norm": 105.64854431152344, "learning_rate": 5.2491298191298986e-06, "loss": 22.1532, "step": 267860 }, { "epoch": 0.54111434770137, "grad_norm": 268.0977783203125, "learning_rate": 5.248781186242029e-06, "loss": 12.7907, "step": 267870 }, { "epoch": 0.5411345483340538, "grad_norm": 396.26824951171875, "learning_rate": 5.2484325521416315e-06, "loss": 12.5745, "step": 267880 }, { "epoch": 0.5411547489667377, "grad_norm": 264.67877197265625, "learning_rate": 5.2480839168304e-06, "loss": 13.2185, "step": 267890 }, { "epoch": 0.5411749495994215, "grad_norm": 726.4971313476562, "learning_rate": 5.247735280310041e-06, "loss": 14.9952, "step": 267900 }, { "epoch": 0.5411951502321053, "grad_norm": 745.1004638671875, "learning_rate": 5.247386642582248e-06, "loss": 19.5732, "step": 267910 }, { "epoch": 0.5412153508647891, "grad_norm": 595.3374633789062, "learning_rate": 5.2470380036487245e-06, "loss": 16.2793, "step": 267920 }, { "epoch": 0.5412355514974729, "grad_norm": 361.1235046386719, "learning_rate": 5.246689363511167e-06, "loss": 25.6698, "step": 267930 }, { "epoch": 0.5412557521301568, "grad_norm": 419.58831787109375, "learning_rate": 5.2463407221712745e-06, "loss": 19.84, "step": 267940 }, { "epoch": 0.5412759527628406, "grad_norm": 489.6854553222656, "learning_rate": 5.245992079630748e-06, "loss": 18.5201, "step": 267950 }, { "epoch": 0.5412961533955244, "grad_norm": 530.859130859375, "learning_rate": 5.2456434358912865e-06, "loss": 26.3349, "step": 267960 }, { "epoch": 0.5413163540282082, "grad_norm": 123.85330200195312, "learning_rate": 5.245294790954587e-06, "loss": 16.3806, "step": 267970 }, { "epoch": 0.541336554660892, "grad_norm": 94.74688720703125, "learning_rate": 5.244946144822351e-06, "loss": 10.811, "step": 267980 }, { "epoch": 0.5413567552935757, "grad_norm": 700.8999633789062, "learning_rate": 5.24459749749628e-06, "loss": 18.7394, "step": 267990 }, { "epoch": 0.5413769559262596, "grad_norm": 251.6261444091797, "learning_rate": 5.244248848978067e-06, "loss": 11.436, "step": 268000 }, { "epoch": 0.5413971565589434, "grad_norm": 291.4370422363281, "learning_rate": 5.243900199269416e-06, "loss": 17.3246, "step": 268010 }, { "epoch": 0.5414173571916272, "grad_norm": 607.9581909179688, "learning_rate": 5.2435515483720246e-06, "loss": 22.6848, "step": 268020 }, { "epoch": 0.541437557824311, "grad_norm": 525.0802612304688, "learning_rate": 5.243202896287593e-06, "loss": 22.731, "step": 268030 }, { "epoch": 0.5414577584569948, "grad_norm": 36.72077941894531, "learning_rate": 5.242854243017821e-06, "loss": 14.782, "step": 268040 }, { "epoch": 0.5414779590896787, "grad_norm": 349.0224914550781, "learning_rate": 5.242505588564404e-06, "loss": 7.2446, "step": 268050 }, { "epoch": 0.5414981597223625, "grad_norm": 381.7718200683594, "learning_rate": 5.2421569329290465e-06, "loss": 8.3921, "step": 268060 }, { "epoch": 0.5415183603550463, "grad_norm": 159.91815185546875, "learning_rate": 5.241808276113445e-06, "loss": 22.856, "step": 268070 }, { "epoch": 0.5415385609877301, "grad_norm": 399.8495178222656, "learning_rate": 5.241459618119299e-06, "loss": 27.867, "step": 268080 }, { "epoch": 0.5415587616204139, "grad_norm": 467.8490905761719, "learning_rate": 5.241110958948307e-06, "loss": 15.6234, "step": 268090 }, { "epoch": 0.5415789622530978, "grad_norm": 747.2193603515625, "learning_rate": 5.240762298602171e-06, "loss": 16.2672, "step": 268100 }, { "epoch": 0.5415991628857816, "grad_norm": 251.5902099609375, "learning_rate": 5.240413637082588e-06, "loss": 23.726, "step": 268110 }, { "epoch": 0.5416193635184654, "grad_norm": 357.5133056640625, "learning_rate": 5.240064974391259e-06, "loss": 22.1624, "step": 268120 }, { "epoch": 0.5416395641511492, "grad_norm": 354.6746826171875, "learning_rate": 5.239716310529882e-06, "loss": 18.5596, "step": 268130 }, { "epoch": 0.541659764783833, "grad_norm": 55.200035095214844, "learning_rate": 5.2393676455001565e-06, "loss": 31.4963, "step": 268140 }, { "epoch": 0.5416799654165169, "grad_norm": 192.11386108398438, "learning_rate": 5.239018979303784e-06, "loss": 33.9058, "step": 268150 }, { "epoch": 0.5417001660492007, "grad_norm": 1041.11865234375, "learning_rate": 5.238670311942459e-06, "loss": 17.9892, "step": 268160 }, { "epoch": 0.5417203666818845, "grad_norm": 320.5815734863281, "learning_rate": 5.2383216434178856e-06, "loss": 11.3272, "step": 268170 }, { "epoch": 0.5417405673145683, "grad_norm": 223.17514038085938, "learning_rate": 5.237972973731761e-06, "loss": 34.2814, "step": 268180 }, { "epoch": 0.5417607679472521, "grad_norm": 267.6535949707031, "learning_rate": 5.237624302885785e-06, "loss": 32.6544, "step": 268190 }, { "epoch": 0.541780968579936, "grad_norm": 145.8458251953125, "learning_rate": 5.237275630881658e-06, "loss": 13.8566, "step": 268200 }, { "epoch": 0.5418011692126198, "grad_norm": 121.94860076904297, "learning_rate": 5.236926957721075e-06, "loss": 20.9379, "step": 268210 }, { "epoch": 0.5418213698453036, "grad_norm": 485.9193420410156, "learning_rate": 5.236578283405742e-06, "loss": 16.1732, "step": 268220 }, { "epoch": 0.5418415704779874, "grad_norm": 321.4118957519531, "learning_rate": 5.236229607937354e-06, "loss": 18.9713, "step": 268230 }, { "epoch": 0.5418617711106711, "grad_norm": 611.24609375, "learning_rate": 5.235880931317612e-06, "loss": 29.5335, "step": 268240 }, { "epoch": 0.541881971743355, "grad_norm": 239.5470733642578, "learning_rate": 5.235532253548213e-06, "loss": 20.4106, "step": 268250 }, { "epoch": 0.5419021723760388, "grad_norm": 388.25921630859375, "learning_rate": 5.235183574630861e-06, "loss": 15.0666, "step": 268260 }, { "epoch": 0.5419223730087226, "grad_norm": 419.8719482421875, "learning_rate": 5.234834894567252e-06, "loss": 32.829, "step": 268270 }, { "epoch": 0.5419425736414064, "grad_norm": 182.15264892578125, "learning_rate": 5.2344862133590855e-06, "loss": 14.6446, "step": 268280 }, { "epoch": 0.5419627742740902, "grad_norm": 431.7532653808594, "learning_rate": 5.234137531008062e-06, "loss": 13.4429, "step": 268290 }, { "epoch": 0.541982974906774, "grad_norm": 26.060903549194336, "learning_rate": 5.233788847515882e-06, "loss": 29.4722, "step": 268300 }, { "epoch": 0.5420031755394579, "grad_norm": 404.68316650390625, "learning_rate": 5.233440162884241e-06, "loss": 24.2382, "step": 268310 }, { "epoch": 0.5420233761721417, "grad_norm": 320.2318420410156, "learning_rate": 5.233091477114842e-06, "loss": 27.1324, "step": 268320 }, { "epoch": 0.5420435768048255, "grad_norm": 369.7477722167969, "learning_rate": 5.232742790209384e-06, "loss": 16.1336, "step": 268330 }, { "epoch": 0.5420637774375093, "grad_norm": 546.682861328125, "learning_rate": 5.232394102169566e-06, "loss": 29.3535, "step": 268340 }, { "epoch": 0.5420839780701932, "grad_norm": 316.20721435546875, "learning_rate": 5.2320454129970866e-06, "loss": 15.8043, "step": 268350 }, { "epoch": 0.542104178702877, "grad_norm": 466.38140869140625, "learning_rate": 5.2316967226936454e-06, "loss": 19.0468, "step": 268360 }, { "epoch": 0.5421243793355608, "grad_norm": 338.31561279296875, "learning_rate": 5.231348031260943e-06, "loss": 13.6088, "step": 268370 }, { "epoch": 0.5421445799682446, "grad_norm": 194.09835815429688, "learning_rate": 5.2309993387006795e-06, "loss": 21.1841, "step": 268380 }, { "epoch": 0.5421647806009284, "grad_norm": 562.6314697265625, "learning_rate": 5.230650645014551e-06, "loss": 16.7676, "step": 268390 }, { "epoch": 0.5421849812336123, "grad_norm": 541.9793090820312, "learning_rate": 5.230301950204261e-06, "loss": 23.2916, "step": 268400 }, { "epoch": 0.5422051818662961, "grad_norm": 569.9012451171875, "learning_rate": 5.229953254271507e-06, "loss": 25.0741, "step": 268410 }, { "epoch": 0.5422253824989799, "grad_norm": 446.9424133300781, "learning_rate": 5.229604557217988e-06, "loss": 12.487, "step": 268420 }, { "epoch": 0.5422455831316637, "grad_norm": 567.8118286132812, "learning_rate": 5.229255859045405e-06, "loss": 16.18, "step": 268430 }, { "epoch": 0.5422657837643475, "grad_norm": 532.9462280273438, "learning_rate": 5.228907159755457e-06, "loss": 23.2328, "step": 268440 }, { "epoch": 0.5422859843970314, "grad_norm": 241.0742950439453, "learning_rate": 5.228558459349844e-06, "loss": 17.8243, "step": 268450 }, { "epoch": 0.5423061850297152, "grad_norm": 225.08526611328125, "learning_rate": 5.2282097578302624e-06, "loss": 21.8836, "step": 268460 }, { "epoch": 0.542326385662399, "grad_norm": 14.749529838562012, "learning_rate": 5.227861055198415e-06, "loss": 15.4618, "step": 268470 }, { "epoch": 0.5423465862950828, "grad_norm": 668.3131103515625, "learning_rate": 5.227512351456001e-06, "loss": 14.1384, "step": 268480 }, { "epoch": 0.5423667869277666, "grad_norm": 226.07180786132812, "learning_rate": 5.227163646604721e-06, "loss": 19.7496, "step": 268490 }, { "epoch": 0.5423869875604503, "grad_norm": 608.514892578125, "learning_rate": 5.226814940646268e-06, "loss": 21.0938, "step": 268500 }, { "epoch": 0.5424071881931342, "grad_norm": 132.5753631591797, "learning_rate": 5.226466233582351e-06, "loss": 14.3611, "step": 268510 }, { "epoch": 0.542427388825818, "grad_norm": 197.1700439453125, "learning_rate": 5.226117525414663e-06, "loss": 27.8339, "step": 268520 }, { "epoch": 0.5424475894585018, "grad_norm": 395.619873046875, "learning_rate": 5.225768816144907e-06, "loss": 12.5512, "step": 268530 }, { "epoch": 0.5424677900911856, "grad_norm": 291.5622253417969, "learning_rate": 5.225420105774781e-06, "loss": 18.111, "step": 268540 }, { "epoch": 0.5424879907238694, "grad_norm": 297.4586486816406, "learning_rate": 5.2250713943059826e-06, "loss": 27.0939, "step": 268550 }, { "epoch": 0.5425081913565533, "grad_norm": 330.82550048828125, "learning_rate": 5.224722681740217e-06, "loss": 21.5205, "step": 268560 }, { "epoch": 0.5425283919892371, "grad_norm": 186.67616271972656, "learning_rate": 5.224373968079177e-06, "loss": 11.1447, "step": 268570 }, { "epoch": 0.5425485926219209, "grad_norm": 267.33770751953125, "learning_rate": 5.224025253324567e-06, "loss": 17.7968, "step": 268580 }, { "epoch": 0.5425687932546047, "grad_norm": 28.95694923400879, "learning_rate": 5.223676537478085e-06, "loss": 14.0016, "step": 268590 }, { "epoch": 0.5425889938872885, "grad_norm": 443.6894836425781, "learning_rate": 5.223327820541432e-06, "loss": 13.041, "step": 268600 }, { "epoch": 0.5426091945199724, "grad_norm": 370.8968505859375, "learning_rate": 5.222979102516304e-06, "loss": 12.6683, "step": 268610 }, { "epoch": 0.5426293951526562, "grad_norm": 374.7540588378906, "learning_rate": 5.2226303834044036e-06, "loss": 11.2769, "step": 268620 }, { "epoch": 0.54264959578534, "grad_norm": 565.6763916015625, "learning_rate": 5.22228166320743e-06, "loss": 16.972, "step": 268630 }, { "epoch": 0.5426697964180238, "grad_norm": 792.1552124023438, "learning_rate": 5.2219329419270825e-06, "loss": 7.2791, "step": 268640 }, { "epoch": 0.5426899970507076, "grad_norm": 504.08355712890625, "learning_rate": 5.221584219565061e-06, "loss": 20.0575, "step": 268650 }, { "epoch": 0.5427101976833915, "grad_norm": 303.4017639160156, "learning_rate": 5.221235496123064e-06, "loss": 21.0303, "step": 268660 }, { "epoch": 0.5427303983160753, "grad_norm": 837.9013671875, "learning_rate": 5.220886771602793e-06, "loss": 10.3367, "step": 268670 }, { "epoch": 0.5427505989487591, "grad_norm": 13.7284517288208, "learning_rate": 5.2205380460059466e-06, "loss": 14.3829, "step": 268680 }, { "epoch": 0.5427707995814429, "grad_norm": 620.3934326171875, "learning_rate": 5.2201893193342234e-06, "loss": 16.6033, "step": 268690 }, { "epoch": 0.5427910002141267, "grad_norm": 307.4211120605469, "learning_rate": 5.219840591589325e-06, "loss": 15.6857, "step": 268700 }, { "epoch": 0.5428112008468106, "grad_norm": 263.71844482421875, "learning_rate": 5.21949186277295e-06, "loss": 17.0162, "step": 268710 }, { "epoch": 0.5428314014794944, "grad_norm": 777.85791015625, "learning_rate": 5.219143132886799e-06, "loss": 31.5894, "step": 268720 }, { "epoch": 0.5428516021121782, "grad_norm": 820.7977294921875, "learning_rate": 5.218794401932571e-06, "loss": 22.0244, "step": 268730 }, { "epoch": 0.542871802744862, "grad_norm": 204.06298828125, "learning_rate": 5.218445669911964e-06, "loss": 11.2491, "step": 268740 }, { "epoch": 0.5428920033775458, "grad_norm": 285.7690124511719, "learning_rate": 5.218096936826681e-06, "loss": 28.146, "step": 268750 }, { "epoch": 0.5429122040102295, "grad_norm": 364.461181640625, "learning_rate": 5.21774820267842e-06, "loss": 18.1274, "step": 268760 }, { "epoch": 0.5429324046429134, "grad_norm": 359.6499938964844, "learning_rate": 5.2173994674688786e-06, "loss": 13.214, "step": 268770 }, { "epoch": 0.5429526052755972, "grad_norm": 197.79251098632812, "learning_rate": 5.2170507311997605e-06, "loss": 27.0702, "step": 268780 }, { "epoch": 0.542972805908281, "grad_norm": 292.5074768066406, "learning_rate": 5.216701993872763e-06, "loss": 26.8686, "step": 268790 }, { "epoch": 0.5429930065409648, "grad_norm": 40.50156021118164, "learning_rate": 5.216353255489586e-06, "loss": 16.3967, "step": 268800 }, { "epoch": 0.5430132071736486, "grad_norm": 1.1966899633407593, "learning_rate": 5.21600451605193e-06, "loss": 8.233, "step": 268810 }, { "epoch": 0.5430334078063325, "grad_norm": 549.1659545898438, "learning_rate": 5.215655775561493e-06, "loss": 19.3528, "step": 268820 }, { "epoch": 0.5430536084390163, "grad_norm": 483.72149658203125, "learning_rate": 5.215307034019977e-06, "loss": 14.8691, "step": 268830 }, { "epoch": 0.5430738090717001, "grad_norm": 265.9928283691406, "learning_rate": 5.214958291429079e-06, "loss": 17.7766, "step": 268840 }, { "epoch": 0.5430940097043839, "grad_norm": 193.4778289794922, "learning_rate": 5.214609547790504e-06, "loss": 22.8305, "step": 268850 }, { "epoch": 0.5431142103370677, "grad_norm": 284.76556396484375, "learning_rate": 5.214260803105945e-06, "loss": 25.5298, "step": 268860 }, { "epoch": 0.5431344109697516, "grad_norm": 406.9671936035156, "learning_rate": 5.213912057377105e-06, "loss": 19.4018, "step": 268870 }, { "epoch": 0.5431546116024354, "grad_norm": 384.6708679199219, "learning_rate": 5.213563310605686e-06, "loss": 10.0793, "step": 268880 }, { "epoch": 0.5431748122351192, "grad_norm": 449.76385498046875, "learning_rate": 5.213214562793383e-06, "loss": 24.3866, "step": 268890 }, { "epoch": 0.543195012867803, "grad_norm": 897.1937255859375, "learning_rate": 5.212865813941899e-06, "loss": 26.1392, "step": 268900 }, { "epoch": 0.5432152135004868, "grad_norm": 268.6221923828125, "learning_rate": 5.2125170640529325e-06, "loss": 17.073, "step": 268910 }, { "epoch": 0.5432354141331707, "grad_norm": 9.538450241088867, "learning_rate": 5.212168313128183e-06, "loss": 14.7929, "step": 268920 }, { "epoch": 0.5432556147658545, "grad_norm": 189.12136840820312, "learning_rate": 5.2118195611693515e-06, "loss": 23.0357, "step": 268930 }, { "epoch": 0.5432758153985383, "grad_norm": 558.6755981445312, "learning_rate": 5.211470808178137e-06, "loss": 17.7658, "step": 268940 }, { "epoch": 0.5432960160312221, "grad_norm": 303.1871032714844, "learning_rate": 5.21112205415624e-06, "loss": 12.9415, "step": 268950 }, { "epoch": 0.543316216663906, "grad_norm": 241.51431274414062, "learning_rate": 5.210773299105358e-06, "loss": 18.8631, "step": 268960 }, { "epoch": 0.5433364172965898, "grad_norm": 963.1439208984375, "learning_rate": 5.210424543027195e-06, "loss": 28.2425, "step": 268970 }, { "epoch": 0.5433566179292736, "grad_norm": 170.69017028808594, "learning_rate": 5.210075785923446e-06, "loss": 13.1179, "step": 268980 }, { "epoch": 0.5433768185619574, "grad_norm": 865.9637451171875, "learning_rate": 5.209727027795816e-06, "loss": 24.2814, "step": 268990 }, { "epoch": 0.5433970191946412, "grad_norm": 271.4307861328125, "learning_rate": 5.209378268645998e-06, "loss": 31.9874, "step": 269000 }, { "epoch": 0.5434172198273249, "grad_norm": 139.47418212890625, "learning_rate": 5.209029508475699e-06, "loss": 17.2439, "step": 269010 }, { "epoch": 0.5434374204600088, "grad_norm": 471.4844665527344, "learning_rate": 5.208680747286614e-06, "loss": 16.2661, "step": 269020 }, { "epoch": 0.5434576210926926, "grad_norm": 422.0306701660156, "learning_rate": 5.2083319850804445e-06, "loss": 17.746, "step": 269030 }, { "epoch": 0.5434778217253764, "grad_norm": 225.77320861816406, "learning_rate": 5.20798322185889e-06, "loss": 18.9828, "step": 269040 }, { "epoch": 0.5434980223580602, "grad_norm": 206.78524780273438, "learning_rate": 5.207634457623652e-06, "loss": 15.9055, "step": 269050 }, { "epoch": 0.543518222990744, "grad_norm": 135.02735900878906, "learning_rate": 5.207285692376427e-06, "loss": 14.0669, "step": 269060 }, { "epoch": 0.5435384236234279, "grad_norm": 956.4881591796875, "learning_rate": 5.206936926118917e-06, "loss": 38.2816, "step": 269070 }, { "epoch": 0.5435586242561117, "grad_norm": 196.66917419433594, "learning_rate": 5.206588158852822e-06, "loss": 11.8373, "step": 269080 }, { "epoch": 0.5435788248887955, "grad_norm": 289.5069274902344, "learning_rate": 5.206239390579842e-06, "loss": 8.7243, "step": 269090 }, { "epoch": 0.5435990255214793, "grad_norm": 179.99478149414062, "learning_rate": 5.205890621301676e-06, "loss": 11.5368, "step": 269100 }, { "epoch": 0.5436192261541631, "grad_norm": 248.57186889648438, "learning_rate": 5.205541851020022e-06, "loss": 11.7105, "step": 269110 }, { "epoch": 0.543639426786847, "grad_norm": 318.1573486328125, "learning_rate": 5.205193079736584e-06, "loss": 20.7421, "step": 269120 }, { "epoch": 0.5436596274195308, "grad_norm": 1006.7952270507812, "learning_rate": 5.204844307453059e-06, "loss": 25.0289, "step": 269130 }, { "epoch": 0.5436798280522146, "grad_norm": 522.5182495117188, "learning_rate": 5.204495534171148e-06, "loss": 16.6759, "step": 269140 }, { "epoch": 0.5437000286848984, "grad_norm": 107.71540832519531, "learning_rate": 5.204146759892551e-06, "loss": 23.3373, "step": 269150 }, { "epoch": 0.5437202293175822, "grad_norm": 685.9620971679688, "learning_rate": 5.2037979846189655e-06, "loss": 12.0589, "step": 269160 }, { "epoch": 0.5437404299502661, "grad_norm": 2979.21728515625, "learning_rate": 5.203449208352096e-06, "loss": 28.0118, "step": 269170 }, { "epoch": 0.5437606305829499, "grad_norm": 626.6129150390625, "learning_rate": 5.203100431093638e-06, "loss": 23.7391, "step": 269180 }, { "epoch": 0.5437808312156337, "grad_norm": 168.13368225097656, "learning_rate": 5.202751652845294e-06, "loss": 20.9722, "step": 269190 }, { "epoch": 0.5438010318483175, "grad_norm": 336.0380554199219, "learning_rate": 5.202402873608763e-06, "loss": 38.7637, "step": 269200 }, { "epoch": 0.5438212324810013, "grad_norm": 485.0022888183594, "learning_rate": 5.2020540933857455e-06, "loss": 27.5945, "step": 269210 }, { "epoch": 0.5438414331136852, "grad_norm": 364.8822021484375, "learning_rate": 5.201705312177939e-06, "loss": 12.1223, "step": 269220 }, { "epoch": 0.543861633746369, "grad_norm": 606.0297241210938, "learning_rate": 5.2013565299870475e-06, "loss": 18.6942, "step": 269230 }, { "epoch": 0.5438818343790528, "grad_norm": 76.48331451416016, "learning_rate": 5.201007746814767e-06, "loss": 16.9994, "step": 269240 }, { "epoch": 0.5439020350117366, "grad_norm": 617.9075317382812, "learning_rate": 5.200658962662799e-06, "loss": 9.8852, "step": 269250 }, { "epoch": 0.5439222356444204, "grad_norm": 290.7354736328125, "learning_rate": 5.2003101775328455e-06, "loss": 22.7082, "step": 269260 }, { "epoch": 0.5439424362771041, "grad_norm": 322.8319396972656, "learning_rate": 5.199961391426601e-06, "loss": 22.3802, "step": 269270 }, { "epoch": 0.543962636909788, "grad_norm": 250.70684814453125, "learning_rate": 5.199612604345773e-06, "loss": 12.3198, "step": 269280 }, { "epoch": 0.5439828375424718, "grad_norm": 176.34652709960938, "learning_rate": 5.199263816292055e-06, "loss": 15.4487, "step": 269290 }, { "epoch": 0.5440030381751556, "grad_norm": 1224.541015625, "learning_rate": 5.19891502726715e-06, "loss": 32.8499, "step": 269300 }, { "epoch": 0.5440232388078394, "grad_norm": 406.5760498046875, "learning_rate": 5.198566237272757e-06, "loss": 17.3707, "step": 269310 }, { "epoch": 0.5440434394405232, "grad_norm": 221.11219787597656, "learning_rate": 5.198217446310576e-06, "loss": 10.185, "step": 269320 }, { "epoch": 0.5440636400732071, "grad_norm": 830.6646118164062, "learning_rate": 5.197868654382307e-06, "loss": 34.4302, "step": 269330 }, { "epoch": 0.5440838407058909, "grad_norm": 111.73726654052734, "learning_rate": 5.197519861489652e-06, "loss": 17.7608, "step": 269340 }, { "epoch": 0.5441040413385747, "grad_norm": 180.2215576171875, "learning_rate": 5.197171067634307e-06, "loss": 18.8368, "step": 269350 }, { "epoch": 0.5441242419712585, "grad_norm": 1184.9173583984375, "learning_rate": 5.196822272817975e-06, "loss": 36.22, "step": 269360 }, { "epoch": 0.5441444426039423, "grad_norm": 233.44515991210938, "learning_rate": 5.196473477042355e-06, "loss": 24.8944, "step": 269370 }, { "epoch": 0.5441646432366262, "grad_norm": 394.85345458984375, "learning_rate": 5.196124680309148e-06, "loss": 18.5167, "step": 269380 }, { "epoch": 0.54418484386931, "grad_norm": 300.61065673828125, "learning_rate": 5.1957758826200525e-06, "loss": 31.084, "step": 269390 }, { "epoch": 0.5442050445019938, "grad_norm": 208.00262451171875, "learning_rate": 5.195427083976768e-06, "loss": 12.3589, "step": 269400 }, { "epoch": 0.5442252451346776, "grad_norm": 1135.732666015625, "learning_rate": 5.195078284380996e-06, "loss": 37.1302, "step": 269410 }, { "epoch": 0.5442454457673614, "grad_norm": 279.19293212890625, "learning_rate": 5.194729483834438e-06, "loss": 23.9411, "step": 269420 }, { "epoch": 0.5442656464000453, "grad_norm": 68.35921478271484, "learning_rate": 5.19438068233879e-06, "loss": 20.2188, "step": 269430 }, { "epoch": 0.5442858470327291, "grad_norm": 842.069091796875, "learning_rate": 5.194031879895756e-06, "loss": 19.3563, "step": 269440 }, { "epoch": 0.5443060476654129, "grad_norm": 885.3028564453125, "learning_rate": 5.193683076507031e-06, "loss": 20.23, "step": 269450 }, { "epoch": 0.5443262482980967, "grad_norm": 429.1953125, "learning_rate": 5.193334272174321e-06, "loss": 29.8051, "step": 269460 }, { "epoch": 0.5443464489307805, "grad_norm": 422.0041809082031, "learning_rate": 5.192985466899323e-06, "loss": 23.2527, "step": 269470 }, { "epoch": 0.5443666495634644, "grad_norm": 176.17156982421875, "learning_rate": 5.1926366606837365e-06, "loss": 23.1212, "step": 269480 }, { "epoch": 0.5443868501961482, "grad_norm": 359.56170654296875, "learning_rate": 5.192287853529263e-06, "loss": 21.667, "step": 269490 }, { "epoch": 0.544407050828832, "grad_norm": 124.85405731201172, "learning_rate": 5.1919390454376e-06, "loss": 16.2355, "step": 269500 }, { "epoch": 0.5444272514615158, "grad_norm": 139.08444213867188, "learning_rate": 5.191590236410451e-06, "loss": 21.6483, "step": 269510 }, { "epoch": 0.5444474520941995, "grad_norm": 316.3241882324219, "learning_rate": 5.191241426449513e-06, "loss": 18.8722, "step": 269520 }, { "epoch": 0.5444676527268834, "grad_norm": 696.6727905273438, "learning_rate": 5.1908926155564885e-06, "loss": 30.8763, "step": 269530 }, { "epoch": 0.5444878533595672, "grad_norm": 32.10171127319336, "learning_rate": 5.190543803733077e-06, "loss": 16.6814, "step": 269540 }, { "epoch": 0.544508053992251, "grad_norm": 391.8594055175781, "learning_rate": 5.190194990980979e-06, "loss": 14.8044, "step": 269550 }, { "epoch": 0.5445282546249348, "grad_norm": 607.99755859375, "learning_rate": 5.189846177301892e-06, "loss": 27.7045, "step": 269560 }, { "epoch": 0.5445484552576186, "grad_norm": 142.82421875, "learning_rate": 5.189497362697518e-06, "loss": 15.7861, "step": 269570 }, { "epoch": 0.5445686558903025, "grad_norm": 652.6636352539062, "learning_rate": 5.189148547169558e-06, "loss": 24.9412, "step": 269580 }, { "epoch": 0.5445888565229863, "grad_norm": 799.7808227539062, "learning_rate": 5.188799730719708e-06, "loss": 32.4974, "step": 269590 }, { "epoch": 0.5446090571556701, "grad_norm": 546.4911499023438, "learning_rate": 5.188450913349674e-06, "loss": 20.361, "step": 269600 }, { "epoch": 0.5446292577883539, "grad_norm": 428.3880615234375, "learning_rate": 5.188102095061151e-06, "loss": 22.8714, "step": 269610 }, { "epoch": 0.5446494584210377, "grad_norm": 172.1936798095703, "learning_rate": 5.187753275855843e-06, "loss": 22.8166, "step": 269620 }, { "epoch": 0.5446696590537216, "grad_norm": 164.95391845703125, "learning_rate": 5.187404455735448e-06, "loss": 25.7966, "step": 269630 }, { "epoch": 0.5446898596864054, "grad_norm": 339.9266357421875, "learning_rate": 5.187055634701664e-06, "loss": 19.976, "step": 269640 }, { "epoch": 0.5447100603190892, "grad_norm": 284.510986328125, "learning_rate": 5.186706812756197e-06, "loss": 20.0402, "step": 269650 }, { "epoch": 0.544730260951773, "grad_norm": 332.6054382324219, "learning_rate": 5.1863579899007424e-06, "loss": 15.5388, "step": 269660 }, { "epoch": 0.5447504615844568, "grad_norm": 222.80596923828125, "learning_rate": 5.186009166137e-06, "loss": 14.0953, "step": 269670 }, { "epoch": 0.5447706622171407, "grad_norm": 301.080078125, "learning_rate": 5.185660341466673e-06, "loss": 14.633, "step": 269680 }, { "epoch": 0.5447908628498245, "grad_norm": 150.59100341796875, "learning_rate": 5.1853115158914595e-06, "loss": 9.0634, "step": 269690 }, { "epoch": 0.5448110634825083, "grad_norm": 7.729396820068359, "learning_rate": 5.18496268941306e-06, "loss": 11.2065, "step": 269700 }, { "epoch": 0.5448312641151921, "grad_norm": 382.56707763671875, "learning_rate": 5.1846138620331766e-06, "loss": 11.5221, "step": 269710 }, { "epoch": 0.5448514647478759, "grad_norm": 199.4328155517578, "learning_rate": 5.184265033753506e-06, "loss": 35.3915, "step": 269720 }, { "epoch": 0.5448716653805598, "grad_norm": 404.68121337890625, "learning_rate": 5.18391620457575e-06, "loss": 26.1202, "step": 269730 }, { "epoch": 0.5448918660132436, "grad_norm": 141.96356201171875, "learning_rate": 5.183567374501608e-06, "loss": 20.8223, "step": 269740 }, { "epoch": 0.5449120666459274, "grad_norm": 345.89385986328125, "learning_rate": 5.183218543532782e-06, "loss": 9.901, "step": 269750 }, { "epoch": 0.5449322672786112, "grad_norm": 273.4797058105469, "learning_rate": 5.182869711670971e-06, "loss": 17.5253, "step": 269760 }, { "epoch": 0.544952467911295, "grad_norm": 746.3378295898438, "learning_rate": 5.182520878917874e-06, "loss": 24.2267, "step": 269770 }, { "epoch": 0.5449726685439787, "grad_norm": 607.471435546875, "learning_rate": 5.1821720452751945e-06, "loss": 28.7323, "step": 269780 }, { "epoch": 0.5449928691766626, "grad_norm": 164.28695678710938, "learning_rate": 5.181823210744629e-06, "loss": 25.6031, "step": 269790 }, { "epoch": 0.5450130698093464, "grad_norm": 321.724609375, "learning_rate": 5.18147437532788e-06, "loss": 15.5116, "step": 269800 }, { "epoch": 0.5450332704420302, "grad_norm": 240.0693359375, "learning_rate": 5.181125539026646e-06, "loss": 14.679, "step": 269810 }, { "epoch": 0.545053471074714, "grad_norm": 276.6699523925781, "learning_rate": 5.180776701842629e-06, "loss": 6.2979, "step": 269820 }, { "epoch": 0.5450736717073978, "grad_norm": 476.1277770996094, "learning_rate": 5.180427863777528e-06, "loss": 21.9404, "step": 269830 }, { "epoch": 0.5450938723400817, "grad_norm": 945.3489990234375, "learning_rate": 5.180079024833043e-06, "loss": 16.7016, "step": 269840 }, { "epoch": 0.5451140729727655, "grad_norm": 468.953125, "learning_rate": 5.179730185010875e-06, "loss": 23.8215, "step": 269850 }, { "epoch": 0.5451342736054493, "grad_norm": 0.0, "learning_rate": 5.179381344312724e-06, "loss": 16.315, "step": 269860 }, { "epoch": 0.5451544742381331, "grad_norm": 127.08235931396484, "learning_rate": 5.179032502740291e-06, "loss": 20.8154, "step": 269870 }, { "epoch": 0.5451746748708169, "grad_norm": 334.3542175292969, "learning_rate": 5.178683660295273e-06, "loss": 24.2088, "step": 269880 }, { "epoch": 0.5451948755035008, "grad_norm": 500.1344909667969, "learning_rate": 5.178334816979374e-06, "loss": 20.7751, "step": 269890 }, { "epoch": 0.5452150761361846, "grad_norm": 343.8796691894531, "learning_rate": 5.177985972794293e-06, "loss": 20.277, "step": 269900 }, { "epoch": 0.5452352767688684, "grad_norm": 111.69133758544922, "learning_rate": 5.177637127741729e-06, "loss": 21.7036, "step": 269910 }, { "epoch": 0.5452554774015522, "grad_norm": 1137.2344970703125, "learning_rate": 5.177288281823385e-06, "loss": 34.2687, "step": 269920 }, { "epoch": 0.545275678034236, "grad_norm": 1319.4700927734375, "learning_rate": 5.176939435040958e-06, "loss": 34.8743, "step": 269930 }, { "epoch": 0.5452958786669199, "grad_norm": 269.3182067871094, "learning_rate": 5.17659058739615e-06, "loss": 24.8758, "step": 269940 }, { "epoch": 0.5453160792996037, "grad_norm": 627.1061401367188, "learning_rate": 5.17624173889066e-06, "loss": 19.9231, "step": 269950 }, { "epoch": 0.5453362799322875, "grad_norm": 83.94268035888672, "learning_rate": 5.175892889526189e-06, "loss": 13.5603, "step": 269960 }, { "epoch": 0.5453564805649713, "grad_norm": 182.0495147705078, "learning_rate": 5.175544039304439e-06, "loss": 18.6815, "step": 269970 }, { "epoch": 0.5453766811976551, "grad_norm": 352.86236572265625, "learning_rate": 5.175195188227108e-06, "loss": 29.8947, "step": 269980 }, { "epoch": 0.545396881830339, "grad_norm": 341.9924011230469, "learning_rate": 5.174846336295897e-06, "loss": 22.6534, "step": 269990 }, { "epoch": 0.5454170824630228, "grad_norm": 563.7020874023438, "learning_rate": 5.174497483512506e-06, "loss": 15.0685, "step": 270000 }, { "epoch": 0.5454372830957066, "grad_norm": 606.5368041992188, "learning_rate": 5.174148629878635e-06, "loss": 31.138, "step": 270010 }, { "epoch": 0.5454574837283904, "grad_norm": 295.58697509765625, "learning_rate": 5.1737997753959846e-06, "loss": 30.6939, "step": 270020 }, { "epoch": 0.5454776843610742, "grad_norm": 554.0674438476562, "learning_rate": 5.173450920066256e-06, "loss": 38.5939, "step": 270030 }, { "epoch": 0.545497884993758, "grad_norm": 462.5925598144531, "learning_rate": 5.173102063891148e-06, "loss": 14.4654, "step": 270040 }, { "epoch": 0.5455180856264418, "grad_norm": 452.27740478515625, "learning_rate": 5.172753206872363e-06, "loss": 24.4104, "step": 270050 }, { "epoch": 0.5455382862591256, "grad_norm": 256.5585632324219, "learning_rate": 5.172404349011599e-06, "loss": 21.2853, "step": 270060 }, { "epoch": 0.5455584868918094, "grad_norm": 289.0415344238281, "learning_rate": 5.172055490310555e-06, "loss": 22.3098, "step": 270070 }, { "epoch": 0.5455786875244932, "grad_norm": 336.8205871582031, "learning_rate": 5.171706630770935e-06, "loss": 14.1378, "step": 270080 }, { "epoch": 0.545598888157177, "grad_norm": 336.18194580078125, "learning_rate": 5.171357770394439e-06, "loss": 14.374, "step": 270090 }, { "epoch": 0.5456190887898609, "grad_norm": 344.4145812988281, "learning_rate": 5.171008909182765e-06, "loss": 19.6497, "step": 270100 }, { "epoch": 0.5456392894225447, "grad_norm": 236.5043182373047, "learning_rate": 5.170660047137613e-06, "loss": 28.6461, "step": 270110 }, { "epoch": 0.5456594900552285, "grad_norm": 1888.2957763671875, "learning_rate": 5.1703111842606864e-06, "loss": 15.333, "step": 270120 }, { "epoch": 0.5456796906879123, "grad_norm": 143.0895233154297, "learning_rate": 5.169962320553683e-06, "loss": 17.241, "step": 270130 }, { "epoch": 0.5456998913205962, "grad_norm": 0.0, "learning_rate": 5.1696134560183045e-06, "loss": 12.0756, "step": 270140 }, { "epoch": 0.54572009195328, "grad_norm": 269.81671142578125, "learning_rate": 5.169264590656249e-06, "loss": 17.0617, "step": 270150 }, { "epoch": 0.5457402925859638, "grad_norm": 704.2405395507812, "learning_rate": 5.16891572446922e-06, "loss": 23.0752, "step": 270160 }, { "epoch": 0.5457604932186476, "grad_norm": 98.35591125488281, "learning_rate": 5.168566857458917e-06, "loss": 11.9061, "step": 270170 }, { "epoch": 0.5457806938513314, "grad_norm": 455.7306213378906, "learning_rate": 5.168217989627037e-06, "loss": 16.8541, "step": 270180 }, { "epoch": 0.5458008944840153, "grad_norm": 530.9266357421875, "learning_rate": 5.1678691209752855e-06, "loss": 16.3598, "step": 270190 }, { "epoch": 0.5458210951166991, "grad_norm": 88.47686767578125, "learning_rate": 5.167520251505358e-06, "loss": 8.2374, "step": 270200 }, { "epoch": 0.5458412957493829, "grad_norm": 567.403076171875, "learning_rate": 5.1671713812189585e-06, "loss": 14.3183, "step": 270210 }, { "epoch": 0.5458614963820667, "grad_norm": 599.0480346679688, "learning_rate": 5.166822510117785e-06, "loss": 28.233, "step": 270220 }, { "epoch": 0.5458816970147505, "grad_norm": 457.58868408203125, "learning_rate": 5.166473638203539e-06, "loss": 21.6233, "step": 270230 }, { "epoch": 0.5459018976474344, "grad_norm": 813.5020751953125, "learning_rate": 5.166124765477923e-06, "loss": 33.3464, "step": 270240 }, { "epoch": 0.5459220982801182, "grad_norm": 288.6170959472656, "learning_rate": 5.165775891942631e-06, "loss": 24.956, "step": 270250 }, { "epoch": 0.545942298912802, "grad_norm": 282.595703125, "learning_rate": 5.165427017599371e-06, "loss": 25.055, "step": 270260 }, { "epoch": 0.5459624995454858, "grad_norm": 388.81134033203125, "learning_rate": 5.1650781424498385e-06, "loss": 20.2176, "step": 270270 }, { "epoch": 0.5459827001781696, "grad_norm": 460.6811218261719, "learning_rate": 5.164729266495735e-06, "loss": 55.1902, "step": 270280 }, { "epoch": 0.5460029008108533, "grad_norm": 466.9342956542969, "learning_rate": 5.16438038973876e-06, "loss": 37.4311, "step": 270290 }, { "epoch": 0.5460231014435372, "grad_norm": 1034.878662109375, "learning_rate": 5.164031512180616e-06, "loss": 23.7974, "step": 270300 }, { "epoch": 0.546043302076221, "grad_norm": 187.3511199951172, "learning_rate": 5.163682633823003e-06, "loss": 14.4381, "step": 270310 }, { "epoch": 0.5460635027089048, "grad_norm": 494.9722900390625, "learning_rate": 5.16333375466762e-06, "loss": 22.032, "step": 270320 }, { "epoch": 0.5460837033415886, "grad_norm": 14.278849601745605, "learning_rate": 5.162984874716168e-06, "loss": 26.7451, "step": 270330 }, { "epoch": 0.5461039039742724, "grad_norm": 902.59521484375, "learning_rate": 5.162635993970347e-06, "loss": 17.8312, "step": 270340 }, { "epoch": 0.5461241046069563, "grad_norm": 305.71771240234375, "learning_rate": 5.162287112431858e-06, "loss": 12.0388, "step": 270350 }, { "epoch": 0.5461443052396401, "grad_norm": 253.86412048339844, "learning_rate": 5.1619382301024025e-06, "loss": 16.5757, "step": 270360 }, { "epoch": 0.5461645058723239, "grad_norm": 260.5821228027344, "learning_rate": 5.16158934698368e-06, "loss": 15.3401, "step": 270370 }, { "epoch": 0.5461847065050077, "grad_norm": 229.546875, "learning_rate": 5.161240463077387e-06, "loss": 25.9571, "step": 270380 }, { "epoch": 0.5462049071376915, "grad_norm": 382.9422607421875, "learning_rate": 5.160891578385232e-06, "loss": 13.2796, "step": 270390 }, { "epoch": 0.5462251077703754, "grad_norm": 537.4158325195312, "learning_rate": 5.160542692908909e-06, "loss": 20.2076, "step": 270400 }, { "epoch": 0.5462453084030592, "grad_norm": 308.6058349609375, "learning_rate": 5.16019380665012e-06, "loss": 8.9912, "step": 270410 }, { "epoch": 0.546265509035743, "grad_norm": 621.6725463867188, "learning_rate": 5.159844919610566e-06, "loss": 25.2461, "step": 270420 }, { "epoch": 0.5462857096684268, "grad_norm": 643.1211547851562, "learning_rate": 5.159496031791947e-06, "loss": 37.0699, "step": 270430 }, { "epoch": 0.5463059103011106, "grad_norm": 235.88206481933594, "learning_rate": 5.159147143195965e-06, "loss": 14.9954, "step": 270440 }, { "epoch": 0.5463261109337945, "grad_norm": 227.0337371826172, "learning_rate": 5.158798253824319e-06, "loss": 13.3651, "step": 270450 }, { "epoch": 0.5463463115664783, "grad_norm": 0.0, "learning_rate": 5.158449363678708e-06, "loss": 25.9175, "step": 270460 }, { "epoch": 0.5463665121991621, "grad_norm": 153.15687561035156, "learning_rate": 5.1581004727608345e-06, "loss": 6.5073, "step": 270470 }, { "epoch": 0.5463867128318459, "grad_norm": 160.54690551757812, "learning_rate": 5.1577515810724e-06, "loss": 18.2212, "step": 270480 }, { "epoch": 0.5464069134645297, "grad_norm": 191.25054931640625, "learning_rate": 5.1574026886151005e-06, "loss": 29.0367, "step": 270490 }, { "epoch": 0.5464271140972136, "grad_norm": 418.3304748535156, "learning_rate": 5.157053795390642e-06, "loss": 22.2465, "step": 270500 }, { "epoch": 0.5464473147298974, "grad_norm": 260.75054931640625, "learning_rate": 5.156704901400722e-06, "loss": 23.1032, "step": 270510 }, { "epoch": 0.5464675153625812, "grad_norm": 302.0773620605469, "learning_rate": 5.156356006647041e-06, "loss": 10.0188, "step": 270520 }, { "epoch": 0.546487715995265, "grad_norm": 492.5730895996094, "learning_rate": 5.156007111131301e-06, "loss": 12.8026, "step": 270530 }, { "epoch": 0.5465079166279488, "grad_norm": 380.69659423828125, "learning_rate": 5.155658214855197e-06, "loss": 11.8795, "step": 270540 }, { "epoch": 0.5465281172606326, "grad_norm": 1181.2581787109375, "learning_rate": 5.155309317820438e-06, "loss": 15.8361, "step": 270550 }, { "epoch": 0.5465483178933164, "grad_norm": 564.9685668945312, "learning_rate": 5.154960420028718e-06, "loss": 17.8807, "step": 270560 }, { "epoch": 0.5465685185260002, "grad_norm": 392.8375549316406, "learning_rate": 5.154611521481742e-06, "loss": 16.7477, "step": 270570 }, { "epoch": 0.546588719158684, "grad_norm": 645.0961303710938, "learning_rate": 5.154262622181205e-06, "loss": 25.7439, "step": 270580 }, { "epoch": 0.5466089197913678, "grad_norm": 239.34262084960938, "learning_rate": 5.153913722128813e-06, "loss": 36.698, "step": 270590 }, { "epoch": 0.5466291204240517, "grad_norm": 254.29592895507812, "learning_rate": 5.153564821326265e-06, "loss": 13.964, "step": 270600 }, { "epoch": 0.5466493210567355, "grad_norm": 125.31671142578125, "learning_rate": 5.153215919775259e-06, "loss": 12.2003, "step": 270610 }, { "epoch": 0.5466695216894193, "grad_norm": 204.45960998535156, "learning_rate": 5.1528670174774965e-06, "loss": 20.8455, "step": 270620 }, { "epoch": 0.5466897223221031, "grad_norm": 798.116943359375, "learning_rate": 5.15251811443468e-06, "loss": 24.2493, "step": 270630 }, { "epoch": 0.5467099229547869, "grad_norm": 1514.6075439453125, "learning_rate": 5.152169210648509e-06, "loss": 20.5565, "step": 270640 }, { "epoch": 0.5467301235874708, "grad_norm": 501.8855285644531, "learning_rate": 5.151820306120682e-06, "loss": 14.8503, "step": 270650 }, { "epoch": 0.5467503242201546, "grad_norm": 257.0301513671875, "learning_rate": 5.151471400852903e-06, "loss": 43.7242, "step": 270660 }, { "epoch": 0.5467705248528384, "grad_norm": 514.08349609375, "learning_rate": 5.15112249484687e-06, "loss": 17.7152, "step": 270670 }, { "epoch": 0.5467907254855222, "grad_norm": 753.3203125, "learning_rate": 5.150773588104284e-06, "loss": 24.4477, "step": 270680 }, { "epoch": 0.546810926118206, "grad_norm": 124.55496978759766, "learning_rate": 5.150424680626846e-06, "loss": 23.9681, "step": 270690 }, { "epoch": 0.5468311267508899, "grad_norm": 154.38717651367188, "learning_rate": 5.150075772416256e-06, "loss": 21.9183, "step": 270700 }, { "epoch": 0.5468513273835737, "grad_norm": 116.17897033691406, "learning_rate": 5.149726863474217e-06, "loss": 10.05, "step": 270710 }, { "epoch": 0.5468715280162575, "grad_norm": 372.535888671875, "learning_rate": 5.149377953802426e-06, "loss": 27.1142, "step": 270720 }, { "epoch": 0.5468917286489413, "grad_norm": 397.5634765625, "learning_rate": 5.149029043402584e-06, "loss": 14.8103, "step": 270730 }, { "epoch": 0.5469119292816251, "grad_norm": 167.58265686035156, "learning_rate": 5.1486801322763935e-06, "loss": 9.2214, "step": 270740 }, { "epoch": 0.546932129914309, "grad_norm": 442.3593444824219, "learning_rate": 5.148331220425554e-06, "loss": 24.5757, "step": 270750 }, { "epoch": 0.5469523305469928, "grad_norm": 278.6236267089844, "learning_rate": 5.147982307851766e-06, "loss": 23.799, "step": 270760 }, { "epoch": 0.5469725311796766, "grad_norm": 266.60107421875, "learning_rate": 5.147633394556731e-06, "loss": 19.6006, "step": 270770 }, { "epoch": 0.5469927318123604, "grad_norm": 565.9557495117188, "learning_rate": 5.147284480542149e-06, "loss": 27.3513, "step": 270780 }, { "epoch": 0.5470129324450442, "grad_norm": 403.841064453125, "learning_rate": 5.1469355658097186e-06, "loss": 36.6437, "step": 270790 }, { "epoch": 0.5470331330777279, "grad_norm": 257.75042724609375, "learning_rate": 5.146586650361143e-06, "loss": 11.5962, "step": 270800 }, { "epoch": 0.5470533337104118, "grad_norm": 434.3193054199219, "learning_rate": 5.146237734198121e-06, "loss": 24.7431, "step": 270810 }, { "epoch": 0.5470735343430956, "grad_norm": 113.63842010498047, "learning_rate": 5.145888817322355e-06, "loss": 5.8752, "step": 270820 }, { "epoch": 0.5470937349757794, "grad_norm": 305.0410461425781, "learning_rate": 5.145539899735543e-06, "loss": 14.7208, "step": 270830 }, { "epoch": 0.5471139356084632, "grad_norm": 384.3434143066406, "learning_rate": 5.1451909814393895e-06, "loss": 10.4676, "step": 270840 }, { "epoch": 0.547134136241147, "grad_norm": 13.002974510192871, "learning_rate": 5.144842062435591e-06, "loss": 21.7042, "step": 270850 }, { "epoch": 0.5471543368738309, "grad_norm": 137.16197204589844, "learning_rate": 5.144493142725851e-06, "loss": 21.789, "step": 270860 }, { "epoch": 0.5471745375065147, "grad_norm": 8.841368675231934, "learning_rate": 5.144144222311868e-06, "loss": 10.6037, "step": 270870 }, { "epoch": 0.5471947381391985, "grad_norm": 352.5816345214844, "learning_rate": 5.143795301195343e-06, "loss": 17.2547, "step": 270880 }, { "epoch": 0.5472149387718823, "grad_norm": 1420.1578369140625, "learning_rate": 5.1434463793779795e-06, "loss": 13.2471, "step": 270890 }, { "epoch": 0.5472351394045661, "grad_norm": 334.9108581542969, "learning_rate": 5.143097456861474e-06, "loss": 39.9207, "step": 270900 }, { "epoch": 0.54725534003725, "grad_norm": 1152.33251953125, "learning_rate": 5.14274853364753e-06, "loss": 35.0431, "step": 270910 }, { "epoch": 0.5472755406699338, "grad_norm": 319.8374938964844, "learning_rate": 5.142399609737846e-06, "loss": 13.7766, "step": 270920 }, { "epoch": 0.5472957413026176, "grad_norm": 570.3477172851562, "learning_rate": 5.142050685134124e-06, "loss": 9.5086, "step": 270930 }, { "epoch": 0.5473159419353014, "grad_norm": 504.3333740234375, "learning_rate": 5.141701759838065e-06, "loss": 31.4781, "step": 270940 }, { "epoch": 0.5473361425679852, "grad_norm": 715.6351928710938, "learning_rate": 5.141352833851367e-06, "loss": 22.7826, "step": 270950 }, { "epoch": 0.5473563432006691, "grad_norm": 93.32371520996094, "learning_rate": 5.141003907175733e-06, "loss": 14.7414, "step": 270960 }, { "epoch": 0.5473765438333529, "grad_norm": 496.8685607910156, "learning_rate": 5.140654979812864e-06, "loss": 37.5867, "step": 270970 }, { "epoch": 0.5473967444660367, "grad_norm": 646.5079956054688, "learning_rate": 5.140306051764459e-06, "loss": 16.9507, "step": 270980 }, { "epoch": 0.5474169450987205, "grad_norm": 478.1387023925781, "learning_rate": 5.139957123032217e-06, "loss": 22.7514, "step": 270990 }, { "epoch": 0.5474371457314043, "grad_norm": 152.84176635742188, "learning_rate": 5.139608193617846e-06, "loss": 13.7706, "step": 271000 }, { "epoch": 0.5474573463640882, "grad_norm": 361.98858642578125, "learning_rate": 5.139259263523038e-06, "loss": 12.1614, "step": 271010 }, { "epoch": 0.547477546996772, "grad_norm": 924.8574829101562, "learning_rate": 5.138910332749499e-06, "loss": 17.7725, "step": 271020 }, { "epoch": 0.5474977476294558, "grad_norm": 611.1624145507812, "learning_rate": 5.138561401298926e-06, "loss": 23.9968, "step": 271030 }, { "epoch": 0.5475179482621396, "grad_norm": 111.28363800048828, "learning_rate": 5.138212469173022e-06, "loss": 23.7445, "step": 271040 }, { "epoch": 0.5475381488948234, "grad_norm": 335.9480285644531, "learning_rate": 5.1378635363734884e-06, "loss": 11.0445, "step": 271050 }, { "epoch": 0.5475583495275071, "grad_norm": 195.91561889648438, "learning_rate": 5.137514602902024e-06, "loss": 17.046, "step": 271060 }, { "epoch": 0.547578550160191, "grad_norm": 205.09426879882812, "learning_rate": 5.13716566876033e-06, "loss": 27.2901, "step": 271070 }, { "epoch": 0.5475987507928748, "grad_norm": 89.74513244628906, "learning_rate": 5.136816733950108e-06, "loss": 20.4734, "step": 271080 }, { "epoch": 0.5476189514255586, "grad_norm": 491.3321838378906, "learning_rate": 5.136467798473057e-06, "loss": 11.6179, "step": 271090 }, { "epoch": 0.5476391520582424, "grad_norm": 633.1895751953125, "learning_rate": 5.136118862330876e-06, "loss": 34.9297, "step": 271100 }, { "epoch": 0.5476593526909262, "grad_norm": 216.28421020507812, "learning_rate": 5.135769925525272e-06, "loss": 16.7076, "step": 271110 }, { "epoch": 0.5476795533236101, "grad_norm": 574.0361328125, "learning_rate": 5.135420988057941e-06, "loss": 15.4687, "step": 271120 }, { "epoch": 0.5476997539562939, "grad_norm": 326.55908203125, "learning_rate": 5.135072049930584e-06, "loss": 20.5921, "step": 271130 }, { "epoch": 0.5477199545889777, "grad_norm": 225.47398376464844, "learning_rate": 5.1347231111449034e-06, "loss": 17.2656, "step": 271140 }, { "epoch": 0.5477401552216615, "grad_norm": 268.95947265625, "learning_rate": 5.134374171702596e-06, "loss": 20.3391, "step": 271150 }, { "epoch": 0.5477603558543453, "grad_norm": 530.3201904296875, "learning_rate": 5.1340252316053686e-06, "loss": 15.5422, "step": 271160 }, { "epoch": 0.5477805564870292, "grad_norm": 383.7457580566406, "learning_rate": 5.133676290854915e-06, "loss": 24.9986, "step": 271170 }, { "epoch": 0.547800757119713, "grad_norm": 243.0459442138672, "learning_rate": 5.133327349452941e-06, "loss": 16.5114, "step": 271180 }, { "epoch": 0.5478209577523968, "grad_norm": 416.4145812988281, "learning_rate": 5.1329784074011454e-06, "loss": 22.5556, "step": 271190 }, { "epoch": 0.5478411583850806, "grad_norm": 550.4816284179688, "learning_rate": 5.13262946470123e-06, "loss": 23.3909, "step": 271200 }, { "epoch": 0.5478613590177644, "grad_norm": 329.26409912109375, "learning_rate": 5.132280521354896e-06, "loss": 45.6847, "step": 271210 }, { "epoch": 0.5478815596504483, "grad_norm": 51.97325134277344, "learning_rate": 5.13193157736384e-06, "loss": 8.4784, "step": 271220 }, { "epoch": 0.5479017602831321, "grad_norm": 607.20849609375, "learning_rate": 5.131582632729766e-06, "loss": 17.7073, "step": 271230 }, { "epoch": 0.5479219609158159, "grad_norm": 425.3113098144531, "learning_rate": 5.131233687454375e-06, "loss": 20.6337, "step": 271240 }, { "epoch": 0.5479421615484997, "grad_norm": 841.8544921875, "learning_rate": 5.130884741539367e-06, "loss": 25.0148, "step": 271250 }, { "epoch": 0.5479623621811835, "grad_norm": 463.42962646484375, "learning_rate": 5.130535794986441e-06, "loss": 16.8616, "step": 271260 }, { "epoch": 0.5479825628138674, "grad_norm": 357.28094482421875, "learning_rate": 5.130186847797302e-06, "loss": 22.4319, "step": 271270 }, { "epoch": 0.5480027634465512, "grad_norm": 64.43353271484375, "learning_rate": 5.1298378999736465e-06, "loss": 10.2836, "step": 271280 }, { "epoch": 0.548022964079235, "grad_norm": 245.54632568359375, "learning_rate": 5.129488951517176e-06, "loss": 14.4729, "step": 271290 }, { "epoch": 0.5480431647119188, "grad_norm": 460.65496826171875, "learning_rate": 5.1291400024295946e-06, "loss": 15.8487, "step": 271300 }, { "epoch": 0.5480633653446025, "grad_norm": 472.4771423339844, "learning_rate": 5.128791052712597e-06, "loss": 28.0166, "step": 271310 }, { "epoch": 0.5480835659772864, "grad_norm": 399.5858154296875, "learning_rate": 5.128442102367891e-06, "loss": 20.5258, "step": 271320 }, { "epoch": 0.5481037666099702, "grad_norm": 112.1563491821289, "learning_rate": 5.128093151397172e-06, "loss": 11.7805, "step": 271330 }, { "epoch": 0.548123967242654, "grad_norm": 25.99222183227539, "learning_rate": 5.127744199802143e-06, "loss": 17.4676, "step": 271340 }, { "epoch": 0.5481441678753378, "grad_norm": 237.99264526367188, "learning_rate": 5.127395247584503e-06, "loss": 14.9504, "step": 271350 }, { "epoch": 0.5481643685080216, "grad_norm": 364.6976013183594, "learning_rate": 5.127046294745955e-06, "loss": 13.0167, "step": 271360 }, { "epoch": 0.5481845691407055, "grad_norm": 189.3727264404297, "learning_rate": 5.126697341288198e-06, "loss": 16.8966, "step": 271370 }, { "epoch": 0.5482047697733893, "grad_norm": 285.57855224609375, "learning_rate": 5.126348387212935e-06, "loss": 20.2623, "step": 271380 }, { "epoch": 0.5482249704060731, "grad_norm": 0.8439492583274841, "learning_rate": 5.125999432521864e-06, "loss": 12.5823, "step": 271390 }, { "epoch": 0.5482451710387569, "grad_norm": 262.3326416015625, "learning_rate": 5.1256504772166885e-06, "loss": 17.7855, "step": 271400 }, { "epoch": 0.5482653716714407, "grad_norm": 997.421875, "learning_rate": 5.125301521299107e-06, "loss": 27.1073, "step": 271410 }, { "epoch": 0.5482855723041246, "grad_norm": 450.3413391113281, "learning_rate": 5.12495256477082e-06, "loss": 16.3724, "step": 271420 }, { "epoch": 0.5483057729368084, "grad_norm": 209.2963409423828, "learning_rate": 5.124603607633532e-06, "loss": 26.7913, "step": 271430 }, { "epoch": 0.5483259735694922, "grad_norm": 647.6463012695312, "learning_rate": 5.124254649888938e-06, "loss": 22.9064, "step": 271440 }, { "epoch": 0.548346174202176, "grad_norm": 273.53656005859375, "learning_rate": 5.123905691538744e-06, "loss": 20.3962, "step": 271450 }, { "epoch": 0.5483663748348598, "grad_norm": 306.15765380859375, "learning_rate": 5.123556732584648e-06, "loss": 7.9716, "step": 271460 }, { "epoch": 0.5483865754675437, "grad_norm": 337.2430419921875, "learning_rate": 5.1232077730283515e-06, "loss": 32.7838, "step": 271470 }, { "epoch": 0.5484067761002275, "grad_norm": 330.0291748046875, "learning_rate": 5.122858812871555e-06, "loss": 38.0643, "step": 271480 }, { "epoch": 0.5484269767329113, "grad_norm": 496.5209655761719, "learning_rate": 5.122509852115959e-06, "loss": 19.4325, "step": 271490 }, { "epoch": 0.5484471773655951, "grad_norm": 211.59396362304688, "learning_rate": 5.1221608907632665e-06, "loss": 10.1448, "step": 271500 }, { "epoch": 0.5484673779982789, "grad_norm": 427.5717468261719, "learning_rate": 5.121811928815176e-06, "loss": 21.4249, "step": 271510 }, { "epoch": 0.5484875786309628, "grad_norm": 429.03070068359375, "learning_rate": 5.121462966273388e-06, "loss": 10.5538, "step": 271520 }, { "epoch": 0.5485077792636466, "grad_norm": 0.0, "learning_rate": 5.121114003139605e-06, "loss": 29.7853, "step": 271530 }, { "epoch": 0.5485279798963304, "grad_norm": 381.1743469238281, "learning_rate": 5.120765039415528e-06, "loss": 24.2726, "step": 271540 }, { "epoch": 0.5485481805290142, "grad_norm": 305.34710693359375, "learning_rate": 5.120416075102855e-06, "loss": 13.1915, "step": 271550 }, { "epoch": 0.548568381161698, "grad_norm": 125.90432739257812, "learning_rate": 5.120067110203289e-06, "loss": 27.9554, "step": 271560 }, { "epoch": 0.5485885817943817, "grad_norm": 97.01104736328125, "learning_rate": 5.119718144718532e-06, "loss": 20.3684, "step": 271570 }, { "epoch": 0.5486087824270656, "grad_norm": 538.3457641601562, "learning_rate": 5.119369178650282e-06, "loss": 29.191, "step": 271580 }, { "epoch": 0.5486289830597494, "grad_norm": 475.2385559082031, "learning_rate": 5.119020212000242e-06, "loss": 31.225, "step": 271590 }, { "epoch": 0.5486491836924332, "grad_norm": 417.1386413574219, "learning_rate": 5.118671244770111e-06, "loss": 21.5984, "step": 271600 }, { "epoch": 0.548669384325117, "grad_norm": 390.43096923828125, "learning_rate": 5.118322276961591e-06, "loss": 10.422, "step": 271610 }, { "epoch": 0.5486895849578008, "grad_norm": 91.09558868408203, "learning_rate": 5.117973308576383e-06, "loss": 39.9538, "step": 271620 }, { "epoch": 0.5487097855904847, "grad_norm": 272.7152099609375, "learning_rate": 5.117624339616186e-06, "loss": 18.8097, "step": 271630 }, { "epoch": 0.5487299862231685, "grad_norm": 36.199092864990234, "learning_rate": 5.117275370082704e-06, "loss": 19.1957, "step": 271640 }, { "epoch": 0.5487501868558523, "grad_norm": 124.73023223876953, "learning_rate": 5.116926399977634e-06, "loss": 17.8846, "step": 271650 }, { "epoch": 0.5487703874885361, "grad_norm": 499.0604553222656, "learning_rate": 5.116577429302682e-06, "loss": 8.9388, "step": 271660 }, { "epoch": 0.54879058812122, "grad_norm": 598.5208129882812, "learning_rate": 5.116228458059544e-06, "loss": 27.804, "step": 271670 }, { "epoch": 0.5488107887539038, "grad_norm": 224.67706298828125, "learning_rate": 5.115879486249922e-06, "loss": 12.2586, "step": 271680 }, { "epoch": 0.5488309893865876, "grad_norm": 588.0548095703125, "learning_rate": 5.115530513875517e-06, "loss": 13.8001, "step": 271690 }, { "epoch": 0.5488511900192714, "grad_norm": 401.2864685058594, "learning_rate": 5.115181540938032e-06, "loss": 16.7872, "step": 271700 }, { "epoch": 0.5488713906519552, "grad_norm": 765.8482666015625, "learning_rate": 5.114832567439164e-06, "loss": 27.1772, "step": 271710 }, { "epoch": 0.548891591284639, "grad_norm": 650.2271728515625, "learning_rate": 5.114483593380619e-06, "loss": 27.4372, "step": 271720 }, { "epoch": 0.5489117919173229, "grad_norm": 141.1337890625, "learning_rate": 5.114134618764093e-06, "loss": 15.951, "step": 271730 }, { "epoch": 0.5489319925500067, "grad_norm": 351.6592712402344, "learning_rate": 5.113785643591289e-06, "loss": 15.2401, "step": 271740 }, { "epoch": 0.5489521931826905, "grad_norm": 182.3080596923828, "learning_rate": 5.113436667863908e-06, "loss": 25.1616, "step": 271750 }, { "epoch": 0.5489723938153743, "grad_norm": 205.26272583007812, "learning_rate": 5.1130876915836495e-06, "loss": 6.5299, "step": 271760 }, { "epoch": 0.5489925944480581, "grad_norm": 0.0, "learning_rate": 5.112738714752216e-06, "loss": 18.4389, "step": 271770 }, { "epoch": 0.549012795080742, "grad_norm": 237.01702880859375, "learning_rate": 5.112389737371307e-06, "loss": 16.0729, "step": 271780 }, { "epoch": 0.5490329957134258, "grad_norm": 600.59033203125, "learning_rate": 5.112040759442624e-06, "loss": 18.2496, "step": 271790 }, { "epoch": 0.5490531963461096, "grad_norm": 106.196044921875, "learning_rate": 5.111691780967869e-06, "loss": 9.5061, "step": 271800 }, { "epoch": 0.5490733969787934, "grad_norm": 149.70947265625, "learning_rate": 5.111342801948741e-06, "loss": 11.8095, "step": 271810 }, { "epoch": 0.5490935976114772, "grad_norm": 280.45989990234375, "learning_rate": 5.110993822386943e-06, "loss": 34.7241, "step": 271820 }, { "epoch": 0.549113798244161, "grad_norm": 474.2350769042969, "learning_rate": 5.110644842284173e-06, "loss": 20.4751, "step": 271830 }, { "epoch": 0.5491339988768448, "grad_norm": 517.3894653320312, "learning_rate": 5.110295861642134e-06, "loss": 17.7995, "step": 271840 }, { "epoch": 0.5491541995095286, "grad_norm": 361.8367919921875, "learning_rate": 5.109946880462526e-06, "loss": 11.9078, "step": 271850 }, { "epoch": 0.5491744001422124, "grad_norm": 56.79434585571289, "learning_rate": 5.109597898747052e-06, "loss": 19.3067, "step": 271860 }, { "epoch": 0.5491946007748962, "grad_norm": 246.1206512451172, "learning_rate": 5.109248916497408e-06, "loss": 26.395, "step": 271870 }, { "epoch": 0.54921480140758, "grad_norm": 264.8724365234375, "learning_rate": 5.1088999337153015e-06, "loss": 11.9957, "step": 271880 }, { "epoch": 0.5492350020402639, "grad_norm": 530.26123046875, "learning_rate": 5.108550950402427e-06, "loss": 14.8299, "step": 271890 }, { "epoch": 0.5492552026729477, "grad_norm": 167.43325805664062, "learning_rate": 5.1082019665604895e-06, "loss": 17.903, "step": 271900 }, { "epoch": 0.5492754033056315, "grad_norm": 412.4073791503906, "learning_rate": 5.1078529821911895e-06, "loss": 29.7963, "step": 271910 }, { "epoch": 0.5492956039383153, "grad_norm": 336.5175476074219, "learning_rate": 5.107503997296225e-06, "loss": 27.0349, "step": 271920 }, { "epoch": 0.5493158045709992, "grad_norm": 312.0638427734375, "learning_rate": 5.107155011877302e-06, "loss": 19.7038, "step": 271930 }, { "epoch": 0.549336005203683, "grad_norm": 752.3568725585938, "learning_rate": 5.1068060259361155e-06, "loss": 15.015, "step": 271940 }, { "epoch": 0.5493562058363668, "grad_norm": 413.9532470703125, "learning_rate": 5.1064570394743705e-06, "loss": 19.5474, "step": 271950 }, { "epoch": 0.5493764064690506, "grad_norm": 341.1914367675781, "learning_rate": 5.106108052493768e-06, "loss": 14.9049, "step": 271960 }, { "epoch": 0.5493966071017344, "grad_norm": 18.22888946533203, "learning_rate": 5.105759064996007e-06, "loss": 11.7169, "step": 271970 }, { "epoch": 0.5494168077344183, "grad_norm": 1162.8382568359375, "learning_rate": 5.105410076982789e-06, "loss": 33.184, "step": 271980 }, { "epoch": 0.5494370083671021, "grad_norm": 1.4117162227630615, "learning_rate": 5.105061088455815e-06, "loss": 28.6601, "step": 271990 }, { "epoch": 0.5494572089997859, "grad_norm": 412.9360046386719, "learning_rate": 5.1047120994167855e-06, "loss": 14.7263, "step": 272000 }, { "epoch": 0.5494774096324697, "grad_norm": 227.7907257080078, "learning_rate": 5.104363109867403e-06, "loss": 16.2703, "step": 272010 }, { "epoch": 0.5494976102651535, "grad_norm": 109.08214569091797, "learning_rate": 5.104014119809366e-06, "loss": 14.6951, "step": 272020 }, { "epoch": 0.5495178108978374, "grad_norm": 335.3134765625, "learning_rate": 5.1036651292443774e-06, "loss": 23.7036, "step": 272030 }, { "epoch": 0.5495380115305212, "grad_norm": 922.094970703125, "learning_rate": 5.103316138174139e-06, "loss": 11.4573, "step": 272040 }, { "epoch": 0.549558212163205, "grad_norm": 224.463134765625, "learning_rate": 5.102967146600348e-06, "loss": 18.6878, "step": 272050 }, { "epoch": 0.5495784127958888, "grad_norm": 398.591796875, "learning_rate": 5.102618154524709e-06, "loss": 24.6213, "step": 272060 }, { "epoch": 0.5495986134285726, "grad_norm": 433.3297424316406, "learning_rate": 5.1022691619489205e-06, "loss": 14.8695, "step": 272070 }, { "epoch": 0.5496188140612563, "grad_norm": 447.2281494140625, "learning_rate": 5.101920168874686e-06, "loss": 23.7672, "step": 272080 }, { "epoch": 0.5496390146939402, "grad_norm": 301.21612548828125, "learning_rate": 5.101571175303704e-06, "loss": 22.6396, "step": 272090 }, { "epoch": 0.549659215326624, "grad_norm": 260.4903259277344, "learning_rate": 5.101222181237676e-06, "loss": 23.6789, "step": 272100 }, { "epoch": 0.5496794159593078, "grad_norm": 305.85595703125, "learning_rate": 5.1008731866783045e-06, "loss": 21.5858, "step": 272110 }, { "epoch": 0.5496996165919916, "grad_norm": 766.408935546875, "learning_rate": 5.100524191627289e-06, "loss": 18.3037, "step": 272120 }, { "epoch": 0.5497198172246754, "grad_norm": 489.1642150878906, "learning_rate": 5.10017519608633e-06, "loss": 19.8543, "step": 272130 }, { "epoch": 0.5497400178573593, "grad_norm": 459.5745544433594, "learning_rate": 5.09982620005713e-06, "loss": 14.109, "step": 272140 }, { "epoch": 0.5497602184900431, "grad_norm": 449.01348876953125, "learning_rate": 5.09947720354139e-06, "loss": 16.2363, "step": 272150 }, { "epoch": 0.5497804191227269, "grad_norm": 355.3873291015625, "learning_rate": 5.0991282065408086e-06, "loss": 31.8173, "step": 272160 }, { "epoch": 0.5498006197554107, "grad_norm": 507.5166320800781, "learning_rate": 5.098779209057089e-06, "loss": 17.5008, "step": 272170 }, { "epoch": 0.5498208203880945, "grad_norm": 612.6504516601562, "learning_rate": 5.098430211091931e-06, "loss": 27.119, "step": 272180 }, { "epoch": 0.5498410210207784, "grad_norm": 195.0165557861328, "learning_rate": 5.098081212647036e-06, "loss": 11.7921, "step": 272190 }, { "epoch": 0.5498612216534622, "grad_norm": 9.930233001708984, "learning_rate": 5.097732213724107e-06, "loss": 13.5304, "step": 272200 }, { "epoch": 0.549881422286146, "grad_norm": 826.8731689453125, "learning_rate": 5.0973832143248405e-06, "loss": 21.6225, "step": 272210 }, { "epoch": 0.5499016229188298, "grad_norm": 240.11874389648438, "learning_rate": 5.097034214450941e-06, "loss": 18.5178, "step": 272220 }, { "epoch": 0.5499218235515136, "grad_norm": 293.3193054199219, "learning_rate": 5.096685214104109e-06, "loss": 28.7017, "step": 272230 }, { "epoch": 0.5499420241841975, "grad_norm": 376.0853271484375, "learning_rate": 5.096336213286044e-06, "loss": 35.3474, "step": 272240 }, { "epoch": 0.5499622248168813, "grad_norm": 399.8958435058594, "learning_rate": 5.09598721199845e-06, "loss": 15.9976, "step": 272250 }, { "epoch": 0.5499824254495651, "grad_norm": 211.1976318359375, "learning_rate": 5.095638210243023e-06, "loss": 10.4221, "step": 272260 }, { "epoch": 0.5500026260822489, "grad_norm": 426.4907531738281, "learning_rate": 5.09528920802147e-06, "loss": 17.1838, "step": 272270 }, { "epoch": 0.5500228267149327, "grad_norm": 147.65066528320312, "learning_rate": 5.094940205335487e-06, "loss": 11.8268, "step": 272280 }, { "epoch": 0.5500430273476166, "grad_norm": 263.3006896972656, "learning_rate": 5.094591202186777e-06, "loss": 28.0874, "step": 272290 }, { "epoch": 0.5500632279803004, "grad_norm": 771.3316040039062, "learning_rate": 5.0942421985770415e-06, "loss": 12.1778, "step": 272300 }, { "epoch": 0.5500834286129842, "grad_norm": 194.57643127441406, "learning_rate": 5.093893194507982e-06, "loss": 25.5668, "step": 272310 }, { "epoch": 0.550103629245668, "grad_norm": 8.136611938476562, "learning_rate": 5.093544189981297e-06, "loss": 13.8155, "step": 272320 }, { "epoch": 0.5501238298783518, "grad_norm": 263.6270446777344, "learning_rate": 5.093195184998689e-06, "loss": 14.6481, "step": 272330 }, { "epoch": 0.5501440305110356, "grad_norm": 264.63226318359375, "learning_rate": 5.092846179561859e-06, "loss": 12.843, "step": 272340 }, { "epoch": 0.5501642311437194, "grad_norm": 507.2126770019531, "learning_rate": 5.092497173672508e-06, "loss": 19.0892, "step": 272350 }, { "epoch": 0.5501844317764032, "grad_norm": 703.0762329101562, "learning_rate": 5.092148167332338e-06, "loss": 27.9065, "step": 272360 }, { "epoch": 0.550204632409087, "grad_norm": 303.1719055175781, "learning_rate": 5.091799160543047e-06, "loss": 18.8407, "step": 272370 }, { "epoch": 0.5502248330417708, "grad_norm": 279.81689453125, "learning_rate": 5.09145015330634e-06, "loss": 18.0741, "step": 272380 }, { "epoch": 0.5502450336744547, "grad_norm": 334.1409912109375, "learning_rate": 5.091101145623916e-06, "loss": 18.1141, "step": 272390 }, { "epoch": 0.5502652343071385, "grad_norm": 299.89886474609375, "learning_rate": 5.090752137497474e-06, "loss": 21.4631, "step": 272400 }, { "epoch": 0.5502854349398223, "grad_norm": 216.7633056640625, "learning_rate": 5.090403128928719e-06, "loss": 12.8604, "step": 272410 }, { "epoch": 0.5503056355725061, "grad_norm": 591.6594848632812, "learning_rate": 5.090054119919349e-06, "loss": 35.0272, "step": 272420 }, { "epoch": 0.5503258362051899, "grad_norm": 1284.27978515625, "learning_rate": 5.089705110471068e-06, "loss": 20.0657, "step": 272430 }, { "epoch": 0.5503460368378738, "grad_norm": 298.3394775390625, "learning_rate": 5.089356100585574e-06, "loss": 9.1207, "step": 272440 }, { "epoch": 0.5503662374705576, "grad_norm": 102.35218811035156, "learning_rate": 5.089007090264568e-06, "loss": 12.6346, "step": 272450 }, { "epoch": 0.5503864381032414, "grad_norm": 466.7763366699219, "learning_rate": 5.088658079509754e-06, "loss": 18.0275, "step": 272460 }, { "epoch": 0.5504066387359252, "grad_norm": 719.829833984375, "learning_rate": 5.08830906832283e-06, "loss": 21.3638, "step": 272470 }, { "epoch": 0.550426839368609, "grad_norm": 798.9398193359375, "learning_rate": 5.087960056705499e-06, "loss": 35.3024, "step": 272480 }, { "epoch": 0.5504470400012929, "grad_norm": 353.4775695800781, "learning_rate": 5.087611044659462e-06, "loss": 14.3152, "step": 272490 }, { "epoch": 0.5504672406339767, "grad_norm": 469.1107177734375, "learning_rate": 5.087262032186418e-06, "loss": 21.2768, "step": 272500 }, { "epoch": 0.5504874412666605, "grad_norm": 538.6112060546875, "learning_rate": 5.08691301928807e-06, "loss": 13.7717, "step": 272510 }, { "epoch": 0.5505076418993443, "grad_norm": 96.44287872314453, "learning_rate": 5.08656400596612e-06, "loss": 21.8078, "step": 272520 }, { "epoch": 0.5505278425320281, "grad_norm": 480.8155517578125, "learning_rate": 5.086214992222265e-06, "loss": 28.9269, "step": 272530 }, { "epoch": 0.550548043164712, "grad_norm": 1.9340639114379883, "learning_rate": 5.08586597805821e-06, "loss": 6.4889, "step": 272540 }, { "epoch": 0.5505682437973958, "grad_norm": 289.463623046875, "learning_rate": 5.085516963475653e-06, "loss": 25.3641, "step": 272550 }, { "epoch": 0.5505884444300796, "grad_norm": 544.4713134765625, "learning_rate": 5.0851679484762996e-06, "loss": 16.4804, "step": 272560 }, { "epoch": 0.5506086450627634, "grad_norm": 432.995849609375, "learning_rate": 5.084818933061846e-06, "loss": 21.1848, "step": 272570 }, { "epoch": 0.5506288456954472, "grad_norm": 239.32839965820312, "learning_rate": 5.084469917233995e-06, "loss": 18.6366, "step": 272580 }, { "epoch": 0.5506490463281309, "grad_norm": 328.63525390625, "learning_rate": 5.084120900994449e-06, "loss": 19.7848, "step": 272590 }, { "epoch": 0.5506692469608148, "grad_norm": 429.8453674316406, "learning_rate": 5.083771884344908e-06, "loss": 35.6967, "step": 272600 }, { "epoch": 0.5506894475934986, "grad_norm": 256.73309326171875, "learning_rate": 5.0834228672870725e-06, "loss": 20.9563, "step": 272610 }, { "epoch": 0.5507096482261824, "grad_norm": 314.0640869140625, "learning_rate": 5.083073849822643e-06, "loss": 36.8181, "step": 272620 }, { "epoch": 0.5507298488588662, "grad_norm": 316.7877197265625, "learning_rate": 5.0827248319533225e-06, "loss": 32.7444, "step": 272630 }, { "epoch": 0.55075004949155, "grad_norm": 16.720890045166016, "learning_rate": 5.082375813680811e-06, "loss": 11.1595, "step": 272640 }, { "epoch": 0.5507702501242339, "grad_norm": 28.173297882080078, "learning_rate": 5.0820267950068115e-06, "loss": 15.0024, "step": 272650 }, { "epoch": 0.5507904507569177, "grad_norm": 62.392906188964844, "learning_rate": 5.0816777759330215e-06, "loss": 14.6509, "step": 272660 }, { "epoch": 0.5508106513896015, "grad_norm": 285.71002197265625, "learning_rate": 5.081328756461145e-06, "loss": 14.8924, "step": 272670 }, { "epoch": 0.5508308520222853, "grad_norm": 183.81626892089844, "learning_rate": 5.08097973659288e-06, "loss": 20.9146, "step": 272680 }, { "epoch": 0.5508510526549691, "grad_norm": 0.0, "learning_rate": 5.0806307163299305e-06, "loss": 10.7393, "step": 272690 }, { "epoch": 0.550871253287653, "grad_norm": 542.828125, "learning_rate": 5.080281695673999e-06, "loss": 16.5757, "step": 272700 }, { "epoch": 0.5508914539203368, "grad_norm": 549.6185302734375, "learning_rate": 5.079932674626781e-06, "loss": 10.9822, "step": 272710 }, { "epoch": 0.5509116545530206, "grad_norm": 942.9983520507812, "learning_rate": 5.079583653189982e-06, "loss": 14.7968, "step": 272720 }, { "epoch": 0.5509318551857044, "grad_norm": 0.18729591369628906, "learning_rate": 5.079234631365303e-06, "loss": 15.978, "step": 272730 }, { "epoch": 0.5509520558183882, "grad_norm": 160.91488647460938, "learning_rate": 5.0788856091544425e-06, "loss": 17.7453, "step": 272740 }, { "epoch": 0.5509722564510721, "grad_norm": 187.86961364746094, "learning_rate": 5.078536586559104e-06, "loss": 12.1295, "step": 272750 }, { "epoch": 0.5509924570837559, "grad_norm": 461.9878234863281, "learning_rate": 5.078187563580988e-06, "loss": 26.6588, "step": 272760 }, { "epoch": 0.5510126577164397, "grad_norm": 771.72705078125, "learning_rate": 5.077838540221794e-06, "loss": 28.8553, "step": 272770 }, { "epoch": 0.5510328583491235, "grad_norm": 229.1396026611328, "learning_rate": 5.077489516483225e-06, "loss": 56.1018, "step": 272780 }, { "epoch": 0.5510530589818073, "grad_norm": 297.3424072265625, "learning_rate": 5.077140492366982e-06, "loss": 16.4137, "step": 272790 }, { "epoch": 0.5510732596144912, "grad_norm": 705.1866455078125, "learning_rate": 5.0767914678747655e-06, "loss": 24.9397, "step": 272800 }, { "epoch": 0.551093460247175, "grad_norm": 580.1013793945312, "learning_rate": 5.076442443008277e-06, "loss": 16.4608, "step": 272810 }, { "epoch": 0.5511136608798588, "grad_norm": 289.314453125, "learning_rate": 5.076093417769216e-06, "loss": 15.8894, "step": 272820 }, { "epoch": 0.5511338615125426, "grad_norm": 588.11474609375, "learning_rate": 5.075744392159285e-06, "loss": 16.0408, "step": 272830 }, { "epoch": 0.5511540621452264, "grad_norm": 664.2628784179688, "learning_rate": 5.075395366180186e-06, "loss": 22.328, "step": 272840 }, { "epoch": 0.5511742627779102, "grad_norm": 30.495628356933594, "learning_rate": 5.0750463398336195e-06, "loss": 46.8985, "step": 272850 }, { "epoch": 0.551194463410594, "grad_norm": 509.7676086425781, "learning_rate": 5.074697313121286e-06, "loss": 26.6984, "step": 272860 }, { "epoch": 0.5512146640432778, "grad_norm": 735.7946166992188, "learning_rate": 5.074348286044884e-06, "loss": 33.8265, "step": 272870 }, { "epoch": 0.5512348646759616, "grad_norm": 172.11744689941406, "learning_rate": 5.073999258606121e-06, "loss": 9.6947, "step": 272880 }, { "epoch": 0.5512550653086454, "grad_norm": 338.67047119140625, "learning_rate": 5.073650230806693e-06, "loss": 19.2771, "step": 272890 }, { "epoch": 0.5512752659413293, "grad_norm": 427.8309326171875, "learning_rate": 5.073301202648304e-06, "loss": 16.5368, "step": 272900 }, { "epoch": 0.5512954665740131, "grad_norm": 640.8612670898438, "learning_rate": 5.072952174132653e-06, "loss": 21.4062, "step": 272910 }, { "epoch": 0.5513156672066969, "grad_norm": 132.24171447753906, "learning_rate": 5.072603145261442e-06, "loss": 11.2991, "step": 272920 }, { "epoch": 0.5513358678393807, "grad_norm": 178.75592041015625, "learning_rate": 5.072254116036372e-06, "loss": 11.4653, "step": 272930 }, { "epoch": 0.5513560684720645, "grad_norm": 96.27967071533203, "learning_rate": 5.071905086459145e-06, "loss": 25.1724, "step": 272940 }, { "epoch": 0.5513762691047484, "grad_norm": 99.01346588134766, "learning_rate": 5.0715560565314595e-06, "loss": 21.9778, "step": 272950 }, { "epoch": 0.5513964697374322, "grad_norm": 502.8134765625, "learning_rate": 5.071207026255019e-06, "loss": 12.7288, "step": 272960 }, { "epoch": 0.551416670370116, "grad_norm": 272.5439453125, "learning_rate": 5.070857995631526e-06, "loss": 7.4142, "step": 272970 }, { "epoch": 0.5514368710027998, "grad_norm": 695.0064086914062, "learning_rate": 5.070508964662677e-06, "loss": 38.9627, "step": 272980 }, { "epoch": 0.5514570716354836, "grad_norm": 1581.58984375, "learning_rate": 5.070159933350178e-06, "loss": 15.3666, "step": 272990 }, { "epoch": 0.5514772722681675, "grad_norm": 410.3154296875, "learning_rate": 5.069810901695727e-06, "loss": 26.9396, "step": 273000 }, { "epoch": 0.5514974729008513, "grad_norm": 3.71242356300354, "learning_rate": 5.0694618697010265e-06, "loss": 19.1724, "step": 273010 }, { "epoch": 0.5515176735335351, "grad_norm": 614.3184814453125, "learning_rate": 5.069112837367777e-06, "loss": 17.708, "step": 273020 }, { "epoch": 0.5515378741662189, "grad_norm": 496.86505126953125, "learning_rate": 5.068763804697679e-06, "loss": 29.1489, "step": 273030 }, { "epoch": 0.5515580747989027, "grad_norm": 333.1636657714844, "learning_rate": 5.068414771692437e-06, "loss": 18.1164, "step": 273040 }, { "epoch": 0.5515782754315866, "grad_norm": 1166.0439453125, "learning_rate": 5.068065738353748e-06, "loss": 34.7047, "step": 273050 }, { "epoch": 0.5515984760642704, "grad_norm": 240.95263671875, "learning_rate": 5.067716704683315e-06, "loss": 14.7328, "step": 273060 }, { "epoch": 0.5516186766969542, "grad_norm": 405.3726501464844, "learning_rate": 5.067367670682839e-06, "loss": 21.5972, "step": 273070 }, { "epoch": 0.551638877329638, "grad_norm": 255.45797729492188, "learning_rate": 5.067018636354021e-06, "loss": 11.9275, "step": 273080 }, { "epoch": 0.5516590779623218, "grad_norm": 273.974365234375, "learning_rate": 5.066669601698562e-06, "loss": 13.6709, "step": 273090 }, { "epoch": 0.5516792785950057, "grad_norm": 1166.1033935546875, "learning_rate": 5.066320566718165e-06, "loss": 25.2127, "step": 273100 }, { "epoch": 0.5516994792276894, "grad_norm": 423.2397155761719, "learning_rate": 5.065971531414528e-06, "loss": 19.6114, "step": 273110 }, { "epoch": 0.5517196798603732, "grad_norm": 349.9311828613281, "learning_rate": 5.0656224957893545e-06, "loss": 19.3992, "step": 273120 }, { "epoch": 0.551739880493057, "grad_norm": 718.978759765625, "learning_rate": 5.065273459844345e-06, "loss": 15.1803, "step": 273130 }, { "epoch": 0.5517600811257408, "grad_norm": 319.8184509277344, "learning_rate": 5.0649244235812e-06, "loss": 18.1024, "step": 273140 }, { "epoch": 0.5517802817584246, "grad_norm": 0.0, "learning_rate": 5.0645753870016224e-06, "loss": 16.6912, "step": 273150 }, { "epoch": 0.5518004823911085, "grad_norm": 1063.666015625, "learning_rate": 5.0642263501073096e-06, "loss": 24.3302, "step": 273160 }, { "epoch": 0.5518206830237923, "grad_norm": 451.7352600097656, "learning_rate": 5.063877312899969e-06, "loss": 23.0781, "step": 273170 }, { "epoch": 0.5518408836564761, "grad_norm": 255.57518005371094, "learning_rate": 5.063528275381295e-06, "loss": 15.9749, "step": 273180 }, { "epoch": 0.5518610842891599, "grad_norm": 34.647613525390625, "learning_rate": 5.063179237552993e-06, "loss": 16.2164, "step": 273190 }, { "epoch": 0.5518812849218437, "grad_norm": 861.7901000976562, "learning_rate": 5.062830199416764e-06, "loss": 24.4547, "step": 273200 }, { "epoch": 0.5519014855545276, "grad_norm": 538.9898681640625, "learning_rate": 5.062481160974308e-06, "loss": 15.3167, "step": 273210 }, { "epoch": 0.5519216861872114, "grad_norm": 351.23944091796875, "learning_rate": 5.0621321222273255e-06, "loss": 32.0002, "step": 273220 }, { "epoch": 0.5519418868198952, "grad_norm": 411.3854675292969, "learning_rate": 5.061783083177519e-06, "loss": 12.7911, "step": 273230 }, { "epoch": 0.551962087452579, "grad_norm": 592.5401611328125, "learning_rate": 5.061434043826588e-06, "loss": 19.5976, "step": 273240 }, { "epoch": 0.5519822880852628, "grad_norm": 609.3246459960938, "learning_rate": 5.061085004176236e-06, "loss": 19.3405, "step": 273250 }, { "epoch": 0.5520024887179467, "grad_norm": 154.5178985595703, "learning_rate": 5.060735964228164e-06, "loss": 10.1593, "step": 273260 }, { "epoch": 0.5520226893506305, "grad_norm": 155.55648803710938, "learning_rate": 5.060386923984071e-06, "loss": 13.301, "step": 273270 }, { "epoch": 0.5520428899833143, "grad_norm": 1033.5396728515625, "learning_rate": 5.06003788344566e-06, "loss": 33.5247, "step": 273280 }, { "epoch": 0.5520630906159981, "grad_norm": 265.4301452636719, "learning_rate": 5.0596888426146325e-06, "loss": 13.5706, "step": 273290 }, { "epoch": 0.5520832912486819, "grad_norm": 465.9914855957031, "learning_rate": 5.059339801492687e-06, "loss": 24.8596, "step": 273300 }, { "epoch": 0.5521034918813658, "grad_norm": 377.5484924316406, "learning_rate": 5.058990760081528e-06, "loss": 20.5792, "step": 273310 }, { "epoch": 0.5521236925140496, "grad_norm": 484.415283203125, "learning_rate": 5.058641718382853e-06, "loss": 32.0516, "step": 273320 }, { "epoch": 0.5521438931467334, "grad_norm": 103.9054183959961, "learning_rate": 5.058292676398368e-06, "loss": 12.6073, "step": 273330 }, { "epoch": 0.5521640937794172, "grad_norm": 226.50808715820312, "learning_rate": 5.0579436341297705e-06, "loss": 14.3633, "step": 273340 }, { "epoch": 0.552184294412101, "grad_norm": 573.47412109375, "learning_rate": 5.0575945915787616e-06, "loss": 18.9658, "step": 273350 }, { "epoch": 0.5522044950447847, "grad_norm": 885.1104125976562, "learning_rate": 5.057245548747045e-06, "loss": 26.1932, "step": 273360 }, { "epoch": 0.5522246956774686, "grad_norm": 221.2861785888672, "learning_rate": 5.056896505636322e-06, "loss": 15.6163, "step": 273370 }, { "epoch": 0.5522448963101524, "grad_norm": 258.8567199707031, "learning_rate": 5.05654746224829e-06, "loss": 25.2411, "step": 273380 }, { "epoch": 0.5522650969428362, "grad_norm": 690.5338745117188, "learning_rate": 5.056198418584653e-06, "loss": 28.2111, "step": 273390 }, { "epoch": 0.55228529757552, "grad_norm": 625.1259155273438, "learning_rate": 5.055849374647112e-06, "loss": 23.1402, "step": 273400 }, { "epoch": 0.5523054982082038, "grad_norm": 353.44970703125, "learning_rate": 5.0555003304373674e-06, "loss": 26.7578, "step": 273410 }, { "epoch": 0.5523256988408877, "grad_norm": 520.411865234375, "learning_rate": 5.0551512859571215e-06, "loss": 13.3652, "step": 273420 }, { "epoch": 0.5523458994735715, "grad_norm": 476.323486328125, "learning_rate": 5.054802241208075e-06, "loss": 38.6315, "step": 273430 }, { "epoch": 0.5523661001062553, "grad_norm": 390.4078674316406, "learning_rate": 5.054453196191929e-06, "loss": 13.9013, "step": 273440 }, { "epoch": 0.5523863007389391, "grad_norm": 366.37664794921875, "learning_rate": 5.054104150910384e-06, "loss": 17.8417, "step": 273450 }, { "epoch": 0.552406501371623, "grad_norm": 337.0237731933594, "learning_rate": 5.053755105365142e-06, "loss": 24.9817, "step": 273460 }, { "epoch": 0.5524267020043068, "grad_norm": 534.3334350585938, "learning_rate": 5.053406059557906e-06, "loss": 21.2075, "step": 273470 }, { "epoch": 0.5524469026369906, "grad_norm": 381.9807434082031, "learning_rate": 5.0530570134903725e-06, "loss": 20.7716, "step": 273480 }, { "epoch": 0.5524671032696744, "grad_norm": 287.4529724121094, "learning_rate": 5.052707967164248e-06, "loss": 13.157, "step": 273490 }, { "epoch": 0.5524873039023582, "grad_norm": 414.7626647949219, "learning_rate": 5.05235892058123e-06, "loss": 26.3657, "step": 273500 }, { "epoch": 0.552507504535042, "grad_norm": 446.5314636230469, "learning_rate": 5.052009873743021e-06, "loss": 17.5753, "step": 273510 }, { "epoch": 0.5525277051677259, "grad_norm": 54.65743637084961, "learning_rate": 5.051660826651322e-06, "loss": 24.5452, "step": 273520 }, { "epoch": 0.5525479058004097, "grad_norm": 271.3385925292969, "learning_rate": 5.0513117793078345e-06, "loss": 17.0786, "step": 273530 }, { "epoch": 0.5525681064330935, "grad_norm": 163.66925048828125, "learning_rate": 5.0509627317142606e-06, "loss": 14.266, "step": 273540 }, { "epoch": 0.5525883070657773, "grad_norm": 467.8886413574219, "learning_rate": 5.050613683872299e-06, "loss": 21.9826, "step": 273550 }, { "epoch": 0.5526085076984611, "grad_norm": 150.468505859375, "learning_rate": 5.050264635783654e-06, "loss": 11.6281, "step": 273560 }, { "epoch": 0.552628708331145, "grad_norm": 307.3399658203125, "learning_rate": 5.049915587450024e-06, "loss": 18.0145, "step": 273570 }, { "epoch": 0.5526489089638288, "grad_norm": 926.0619506835938, "learning_rate": 5.049566538873113e-06, "loss": 27.2354, "step": 273580 }, { "epoch": 0.5526691095965126, "grad_norm": 92.09886932373047, "learning_rate": 5.049217490054619e-06, "loss": 16.0031, "step": 273590 }, { "epoch": 0.5526893102291964, "grad_norm": 300.13995361328125, "learning_rate": 5.048868440996246e-06, "loss": 20.6436, "step": 273600 }, { "epoch": 0.5527095108618802, "grad_norm": 410.94781494140625, "learning_rate": 5.048519391699693e-06, "loss": 20.6964, "step": 273610 }, { "epoch": 0.552729711494564, "grad_norm": 257.89715576171875, "learning_rate": 5.048170342166664e-06, "loss": 16.4832, "step": 273620 }, { "epoch": 0.5527499121272478, "grad_norm": 194.0541534423828, "learning_rate": 5.0478212923988576e-06, "loss": 13.7152, "step": 273630 }, { "epoch": 0.5527701127599316, "grad_norm": 584.8863525390625, "learning_rate": 5.047472242397976e-06, "loss": 16.2681, "step": 273640 }, { "epoch": 0.5527903133926154, "grad_norm": 461.9921875, "learning_rate": 5.047123192165722e-06, "loss": 10.395, "step": 273650 }, { "epoch": 0.5528105140252992, "grad_norm": 571.4125366210938, "learning_rate": 5.046774141703793e-06, "loss": 25.3463, "step": 273660 }, { "epoch": 0.5528307146579831, "grad_norm": 6.0866923332214355, "learning_rate": 5.046425091013893e-06, "loss": 13.3849, "step": 273670 }, { "epoch": 0.5528509152906669, "grad_norm": 218.32177734375, "learning_rate": 5.046076040097722e-06, "loss": 27.7339, "step": 273680 }, { "epoch": 0.5528711159233507, "grad_norm": 613.5769653320312, "learning_rate": 5.045726988956984e-06, "loss": 17.5133, "step": 273690 }, { "epoch": 0.5528913165560345, "grad_norm": 693.943603515625, "learning_rate": 5.045377937593376e-06, "loss": 26.6492, "step": 273700 }, { "epoch": 0.5529115171887183, "grad_norm": 213.2667999267578, "learning_rate": 5.045028886008605e-06, "loss": 10.0386, "step": 273710 }, { "epoch": 0.5529317178214022, "grad_norm": 581.2164916992188, "learning_rate": 5.044679834204366e-06, "loss": 27.7785, "step": 273720 }, { "epoch": 0.552951918454086, "grad_norm": 290.66510009765625, "learning_rate": 5.044330782182363e-06, "loss": 13.7657, "step": 273730 }, { "epoch": 0.5529721190867698, "grad_norm": 529.0045166015625, "learning_rate": 5.043981729944298e-06, "loss": 24.8721, "step": 273740 }, { "epoch": 0.5529923197194536, "grad_norm": 259.0770263671875, "learning_rate": 5.04363267749187e-06, "loss": 13.4246, "step": 273750 }, { "epoch": 0.5530125203521374, "grad_norm": 870.479736328125, "learning_rate": 5.043283624826783e-06, "loss": 21.902, "step": 273760 }, { "epoch": 0.5530327209848213, "grad_norm": 241.07386779785156, "learning_rate": 5.042934571950735e-06, "loss": 31.6515, "step": 273770 }, { "epoch": 0.5530529216175051, "grad_norm": 159.55506896972656, "learning_rate": 5.042585518865431e-06, "loss": 15.1274, "step": 273780 }, { "epoch": 0.5530731222501889, "grad_norm": 244.0486297607422, "learning_rate": 5.04223646557257e-06, "loss": 10.7543, "step": 273790 }, { "epoch": 0.5530933228828727, "grad_norm": 417.79833984375, "learning_rate": 5.041887412073853e-06, "loss": 22.1971, "step": 273800 }, { "epoch": 0.5531135235155565, "grad_norm": 55.917091369628906, "learning_rate": 5.041538358370983e-06, "loss": 17.2982, "step": 273810 }, { "epoch": 0.5531337241482404, "grad_norm": 294.291259765625, "learning_rate": 5.04118930446566e-06, "loss": 29.3356, "step": 273820 }, { "epoch": 0.5531539247809242, "grad_norm": 225.97354125976562, "learning_rate": 5.040840250359584e-06, "loss": 22.751, "step": 273830 }, { "epoch": 0.553174125413608, "grad_norm": 387.2437744140625, "learning_rate": 5.0404911960544575e-06, "loss": 8.9484, "step": 273840 }, { "epoch": 0.5531943260462918, "grad_norm": 531.6309814453125, "learning_rate": 5.040142141551982e-06, "loss": 15.1858, "step": 273850 }, { "epoch": 0.5532145266789756, "grad_norm": 321.93206787109375, "learning_rate": 5.039793086853859e-06, "loss": 14.421, "step": 273860 }, { "epoch": 0.5532347273116593, "grad_norm": 742.7291259765625, "learning_rate": 5.039444031961791e-06, "loss": 21.1888, "step": 273870 }, { "epoch": 0.5532549279443432, "grad_norm": 496.53289794921875, "learning_rate": 5.0390949768774755e-06, "loss": 20.8615, "step": 273880 }, { "epoch": 0.553275128577027, "grad_norm": 305.88897705078125, "learning_rate": 5.038745921602617e-06, "loss": 28.2547, "step": 273890 }, { "epoch": 0.5532953292097108, "grad_norm": 366.7085266113281, "learning_rate": 5.038396866138915e-06, "loss": 20.9414, "step": 273900 }, { "epoch": 0.5533155298423946, "grad_norm": 681.21435546875, "learning_rate": 5.03804781048807e-06, "loss": 16.4326, "step": 273910 }, { "epoch": 0.5533357304750784, "grad_norm": 278.194580078125, "learning_rate": 5.037698754651786e-06, "loss": 26.1326, "step": 273920 }, { "epoch": 0.5533559311077623, "grad_norm": 89.72135925292969, "learning_rate": 5.037349698631762e-06, "loss": 22.8615, "step": 273930 }, { "epoch": 0.5533761317404461, "grad_norm": 130.1783447265625, "learning_rate": 5.037000642429701e-06, "loss": 14.1529, "step": 273940 }, { "epoch": 0.5533963323731299, "grad_norm": 12.04598617553711, "learning_rate": 5.036651586047303e-06, "loss": 18.4062, "step": 273950 }, { "epoch": 0.5534165330058137, "grad_norm": 390.1788330078125, "learning_rate": 5.03630252948627e-06, "loss": 11.0696, "step": 273960 }, { "epoch": 0.5534367336384975, "grad_norm": 823.8593139648438, "learning_rate": 5.035953472748304e-06, "loss": 24.8164, "step": 273970 }, { "epoch": 0.5534569342711814, "grad_norm": 702.7938842773438, "learning_rate": 5.035604415835102e-06, "loss": 27.215, "step": 273980 }, { "epoch": 0.5534771349038652, "grad_norm": 1129.494873046875, "learning_rate": 5.035255358748371e-06, "loss": 27.6904, "step": 273990 }, { "epoch": 0.553497335536549, "grad_norm": 90.69853973388672, "learning_rate": 5.034906301489808e-06, "loss": 24.6617, "step": 274000 }, { "epoch": 0.5535175361692328, "grad_norm": 323.79534912109375, "learning_rate": 5.034557244061117e-06, "loss": 26.2408, "step": 274010 }, { "epoch": 0.5535377368019166, "grad_norm": 139.48471069335938, "learning_rate": 5.034208186463998e-06, "loss": 16.7441, "step": 274020 }, { "epoch": 0.5535579374346005, "grad_norm": 331.9467468261719, "learning_rate": 5.0338591287001525e-06, "loss": 15.302, "step": 274030 }, { "epoch": 0.5535781380672843, "grad_norm": 451.3541564941406, "learning_rate": 5.03351007077128e-06, "loss": 18.568, "step": 274040 }, { "epoch": 0.5535983386999681, "grad_norm": 647.1574096679688, "learning_rate": 5.033161012679087e-06, "loss": 16.6301, "step": 274050 }, { "epoch": 0.5536185393326519, "grad_norm": 532.1934204101562, "learning_rate": 5.032811954425268e-06, "loss": 18.1645, "step": 274060 }, { "epoch": 0.5536387399653357, "grad_norm": 246.70433044433594, "learning_rate": 5.0324628960115296e-06, "loss": 9.0928, "step": 274070 }, { "epoch": 0.5536589405980196, "grad_norm": 290.0281982421875, "learning_rate": 5.032113837439571e-06, "loss": 11.8767, "step": 274080 }, { "epoch": 0.5536791412307034, "grad_norm": 175.98838806152344, "learning_rate": 5.0317647787110915e-06, "loss": 24.4881, "step": 274090 }, { "epoch": 0.5536993418633872, "grad_norm": 110.80126953125, "learning_rate": 5.031415719827796e-06, "loss": 21.7746, "step": 274100 }, { "epoch": 0.553719542496071, "grad_norm": 564.2908935546875, "learning_rate": 5.031066660791383e-06, "loss": 21.3942, "step": 274110 }, { "epoch": 0.5537397431287548, "grad_norm": 557.5830688476562, "learning_rate": 5.030717601603556e-06, "loss": 15.1739, "step": 274120 }, { "epoch": 0.5537599437614386, "grad_norm": 420.3285827636719, "learning_rate": 5.030368542266013e-06, "loss": 22.6166, "step": 274130 }, { "epoch": 0.5537801443941224, "grad_norm": 441.185302734375, "learning_rate": 5.030019482780459e-06, "loss": 17.6675, "step": 274140 }, { "epoch": 0.5538003450268062, "grad_norm": 785.4033203125, "learning_rate": 5.029670423148595e-06, "loss": 22.174, "step": 274150 }, { "epoch": 0.55382054565949, "grad_norm": 628.7343139648438, "learning_rate": 5.029321363372119e-06, "loss": 12.8308, "step": 274160 }, { "epoch": 0.5538407462921738, "grad_norm": 450.0870056152344, "learning_rate": 5.0289723034527345e-06, "loss": 14.8841, "step": 274170 }, { "epoch": 0.5538609469248577, "grad_norm": 740.5001831054688, "learning_rate": 5.028623243392143e-06, "loss": 18.4416, "step": 274180 }, { "epoch": 0.5538811475575415, "grad_norm": 549.3732299804688, "learning_rate": 5.028274183192046e-06, "loss": 43.1109, "step": 274190 }, { "epoch": 0.5539013481902253, "grad_norm": 375.171630859375, "learning_rate": 5.027925122854141e-06, "loss": 14.3481, "step": 274200 }, { "epoch": 0.5539215488229091, "grad_norm": 410.18707275390625, "learning_rate": 5.027576062380136e-06, "loss": 20.8517, "step": 274210 }, { "epoch": 0.5539417494555929, "grad_norm": 440.8715515136719, "learning_rate": 5.027227001771727e-06, "loss": 7.7379, "step": 274220 }, { "epoch": 0.5539619500882768, "grad_norm": 636.6222534179688, "learning_rate": 5.0268779410306164e-06, "loss": 30.4021, "step": 274230 }, { "epoch": 0.5539821507209606, "grad_norm": 162.743896484375, "learning_rate": 5.026528880158508e-06, "loss": 20.3886, "step": 274240 }, { "epoch": 0.5540023513536444, "grad_norm": 97.67532348632812, "learning_rate": 5.0261798191570975e-06, "loss": 25.955, "step": 274250 }, { "epoch": 0.5540225519863282, "grad_norm": 479.6761779785156, "learning_rate": 5.025830758028093e-06, "loss": 21.1345, "step": 274260 }, { "epoch": 0.554042752619012, "grad_norm": 252.00277709960938, "learning_rate": 5.025481696773191e-06, "loss": 11.5059, "step": 274270 }, { "epoch": 0.5540629532516959, "grad_norm": 524.0341796875, "learning_rate": 5.025132635394095e-06, "loss": 19.7833, "step": 274280 }, { "epoch": 0.5540831538843797, "grad_norm": 1358.397705078125, "learning_rate": 5.024783573892505e-06, "loss": 31.1891, "step": 274290 }, { "epoch": 0.5541033545170635, "grad_norm": 768.6133422851562, "learning_rate": 5.024434512270123e-06, "loss": 14.8123, "step": 274300 }, { "epoch": 0.5541235551497473, "grad_norm": 195.52130126953125, "learning_rate": 5.0240854505286505e-06, "loss": 9.175, "step": 274310 }, { "epoch": 0.5541437557824311, "grad_norm": 318.7700500488281, "learning_rate": 5.023736388669789e-06, "loss": 17.8853, "step": 274320 }, { "epoch": 0.554163956415115, "grad_norm": 462.0083312988281, "learning_rate": 5.023387326695238e-06, "loss": 10.3964, "step": 274330 }, { "epoch": 0.5541841570477988, "grad_norm": 183.35916137695312, "learning_rate": 5.0230382646067e-06, "loss": 14.3003, "step": 274340 }, { "epoch": 0.5542043576804826, "grad_norm": 666.4971923828125, "learning_rate": 5.0226892024058785e-06, "loss": 30.8109, "step": 274350 }, { "epoch": 0.5542245583131664, "grad_norm": 497.9892883300781, "learning_rate": 5.022340140094469e-06, "loss": 19.2232, "step": 274360 }, { "epoch": 0.5542447589458502, "grad_norm": 148.6495361328125, "learning_rate": 5.021991077674179e-06, "loss": 11.1378, "step": 274370 }, { "epoch": 0.5542649595785339, "grad_norm": 497.6520080566406, "learning_rate": 5.021642015146705e-06, "loss": 25.8033, "step": 274380 }, { "epoch": 0.5542851602112178, "grad_norm": 372.19580078125, "learning_rate": 5.021292952513752e-06, "loss": 22.642, "step": 274390 }, { "epoch": 0.5543053608439016, "grad_norm": 480.487548828125, "learning_rate": 5.0209438897770205e-06, "loss": 19.5264, "step": 274400 }, { "epoch": 0.5543255614765854, "grad_norm": 492.4881896972656, "learning_rate": 5.020594826938209e-06, "loss": 31.9544, "step": 274410 }, { "epoch": 0.5543457621092692, "grad_norm": 452.8685302734375, "learning_rate": 5.020245763999024e-06, "loss": 25.9853, "step": 274420 }, { "epoch": 0.554365962741953, "grad_norm": 177.11929321289062, "learning_rate": 5.01989670096116e-06, "loss": 25.5444, "step": 274430 }, { "epoch": 0.5543861633746369, "grad_norm": 328.0756530761719, "learning_rate": 5.019547637826323e-06, "loss": 23.5047, "step": 274440 }, { "epoch": 0.5544063640073207, "grad_norm": 124.34886169433594, "learning_rate": 5.019198574596213e-06, "loss": 13.553, "step": 274450 }, { "epoch": 0.5544265646400045, "grad_norm": 7.3490891456604, "learning_rate": 5.018849511272532e-06, "loss": 29.7773, "step": 274460 }, { "epoch": 0.5544467652726883, "grad_norm": 428.7235107421875, "learning_rate": 5.018500447856981e-06, "loss": 21.9885, "step": 274470 }, { "epoch": 0.5544669659053721, "grad_norm": 249.6378936767578, "learning_rate": 5.0181513843512615e-06, "loss": 10.3065, "step": 274480 }, { "epoch": 0.554487166538056, "grad_norm": 142.67721557617188, "learning_rate": 5.017802320757073e-06, "loss": 27.0052, "step": 274490 }, { "epoch": 0.5545073671707398, "grad_norm": 707.7395629882812, "learning_rate": 5.0174532570761194e-06, "loss": 23.4318, "step": 274500 }, { "epoch": 0.5545275678034236, "grad_norm": 369.94677734375, "learning_rate": 5.0171041933101e-06, "loss": 32.0048, "step": 274510 }, { "epoch": 0.5545477684361074, "grad_norm": 453.3552551269531, "learning_rate": 5.016755129460717e-06, "loss": 26.6822, "step": 274520 }, { "epoch": 0.5545679690687912, "grad_norm": 341.4087219238281, "learning_rate": 5.016406065529672e-06, "loss": 23.0827, "step": 274530 }, { "epoch": 0.5545881697014751, "grad_norm": 361.6666259765625, "learning_rate": 5.016057001518664e-06, "loss": 19.9923, "step": 274540 }, { "epoch": 0.5546083703341589, "grad_norm": 421.46746826171875, "learning_rate": 5.015707937429398e-06, "loss": 16.2056, "step": 274550 }, { "epoch": 0.5546285709668427, "grad_norm": 103.21510314941406, "learning_rate": 5.0153588732635734e-06, "loss": 34.3507, "step": 274560 }, { "epoch": 0.5546487715995265, "grad_norm": 150.9375457763672, "learning_rate": 5.015009809022891e-06, "loss": 31.226, "step": 274570 }, { "epoch": 0.5546689722322103, "grad_norm": 227.6450653076172, "learning_rate": 5.014660744709053e-06, "loss": 16.8979, "step": 274580 }, { "epoch": 0.5546891728648942, "grad_norm": 578.1546630859375, "learning_rate": 5.014311680323759e-06, "loss": 26.6818, "step": 274590 }, { "epoch": 0.554709373497578, "grad_norm": 266.2040100097656, "learning_rate": 5.013962615868714e-06, "loss": 24.6263, "step": 274600 }, { "epoch": 0.5547295741302618, "grad_norm": 110.05225372314453, "learning_rate": 5.013613551345614e-06, "loss": 8.7034, "step": 274610 }, { "epoch": 0.5547497747629456, "grad_norm": 286.9217834472656, "learning_rate": 5.013264486756165e-06, "loss": 16.5623, "step": 274620 }, { "epoch": 0.5547699753956294, "grad_norm": 160.9605255126953, "learning_rate": 5.012915422102066e-06, "loss": 12.3764, "step": 274630 }, { "epoch": 0.5547901760283132, "grad_norm": 672.0164794921875, "learning_rate": 5.0125663573850204e-06, "loss": 17.3915, "step": 274640 }, { "epoch": 0.554810376660997, "grad_norm": 245.0459747314453, "learning_rate": 5.012217292606726e-06, "loss": 22.9495, "step": 274650 }, { "epoch": 0.5548305772936808, "grad_norm": 501.7677001953125, "learning_rate": 5.011868227768886e-06, "loss": 19.9008, "step": 274660 }, { "epoch": 0.5548507779263646, "grad_norm": 456.3265686035156, "learning_rate": 5.011519162873202e-06, "loss": 18.3497, "step": 274670 }, { "epoch": 0.5548709785590484, "grad_norm": 305.5513610839844, "learning_rate": 5.011170097921375e-06, "loss": 10.6182, "step": 274680 }, { "epoch": 0.5548911791917323, "grad_norm": 485.4612731933594, "learning_rate": 5.010821032915108e-06, "loss": 15.1355, "step": 274690 }, { "epoch": 0.5549113798244161, "grad_norm": 632.5755615234375, "learning_rate": 5.010471967856096e-06, "loss": 18.6826, "step": 274700 }, { "epoch": 0.5549315804570999, "grad_norm": 838.0838012695312, "learning_rate": 5.01012290274605e-06, "loss": 17.3757, "step": 274710 }, { "epoch": 0.5549517810897837, "grad_norm": 577.13720703125, "learning_rate": 5.009773837586663e-06, "loss": 11.8671, "step": 274720 }, { "epoch": 0.5549719817224675, "grad_norm": 638.5238647460938, "learning_rate": 5.0094247723796405e-06, "loss": 17.7085, "step": 274730 }, { "epoch": 0.5549921823551514, "grad_norm": 226.6175079345703, "learning_rate": 5.009075707126684e-06, "loss": 17.44, "step": 274740 }, { "epoch": 0.5550123829878352, "grad_norm": 471.9786071777344, "learning_rate": 5.008726641829492e-06, "loss": 32.2502, "step": 274750 }, { "epoch": 0.555032583620519, "grad_norm": 15.962459564208984, "learning_rate": 5.008377576489769e-06, "loss": 12.847, "step": 274760 }, { "epoch": 0.5550527842532028, "grad_norm": 634.9119873046875, "learning_rate": 5.008028511109213e-06, "loss": 14.3255, "step": 274770 }, { "epoch": 0.5550729848858866, "grad_norm": 528.1943359375, "learning_rate": 5.007679445689527e-06, "loss": 9.3628, "step": 274780 }, { "epoch": 0.5550931855185705, "grad_norm": 354.98638916015625, "learning_rate": 5.007330380232414e-06, "loss": 22.5814, "step": 274790 }, { "epoch": 0.5551133861512543, "grad_norm": 649.9990844726562, "learning_rate": 5.006981314739573e-06, "loss": 12.6906, "step": 274800 }, { "epoch": 0.5551335867839381, "grad_norm": 425.6797790527344, "learning_rate": 5.0066322492127036e-06, "loss": 38.1626, "step": 274810 }, { "epoch": 0.5551537874166219, "grad_norm": 430.2084655761719, "learning_rate": 5.006283183653513e-06, "loss": 19.2571, "step": 274820 }, { "epoch": 0.5551739880493057, "grad_norm": 637.6443481445312, "learning_rate": 5.005934118063697e-06, "loss": 28.1729, "step": 274830 }, { "epoch": 0.5551941886819896, "grad_norm": 421.87628173828125, "learning_rate": 5.005585052444959e-06, "loss": 17.778, "step": 274840 }, { "epoch": 0.5552143893146734, "grad_norm": 79.63214111328125, "learning_rate": 5.005235986799001e-06, "loss": 27.9943, "step": 274850 }, { "epoch": 0.5552345899473572, "grad_norm": 305.18768310546875, "learning_rate": 5.004886921127521e-06, "loss": 22.6701, "step": 274860 }, { "epoch": 0.555254790580041, "grad_norm": 535.249755859375, "learning_rate": 5.0045378554322256e-06, "loss": 27.5421, "step": 274870 }, { "epoch": 0.5552749912127248, "grad_norm": 519.9200439453125, "learning_rate": 5.004188789714811e-06, "loss": 15.89, "step": 274880 }, { "epoch": 0.5552951918454087, "grad_norm": 445.910888671875, "learning_rate": 5.003839723976982e-06, "loss": 14.5666, "step": 274890 }, { "epoch": 0.5553153924780924, "grad_norm": 36.32236862182617, "learning_rate": 5.003490658220438e-06, "loss": 20.8325, "step": 274900 }, { "epoch": 0.5553355931107762, "grad_norm": 99.9104995727539, "learning_rate": 5.0031415924468816e-06, "loss": 11.8601, "step": 274910 }, { "epoch": 0.55535579374346, "grad_norm": 668.4713134765625, "learning_rate": 5.002792526658015e-06, "loss": 22.1278, "step": 274920 }, { "epoch": 0.5553759943761438, "grad_norm": 766.052978515625, "learning_rate": 5.002443460855535e-06, "loss": 36.3304, "step": 274930 }, { "epoch": 0.5553961950088276, "grad_norm": 734.5594482421875, "learning_rate": 5.002094395041147e-06, "loss": 12.9675, "step": 274940 }, { "epoch": 0.5554163956415115, "grad_norm": 328.8623962402344, "learning_rate": 5.001745329216551e-06, "loss": 12.0246, "step": 274950 }, { "epoch": 0.5554365962741953, "grad_norm": 1398.352783203125, "learning_rate": 5.00139626338345e-06, "loss": 25.3666, "step": 274960 }, { "epoch": 0.5554567969068791, "grad_norm": 183.2097930908203, "learning_rate": 5.00104719754354e-06, "loss": 34.8507, "step": 274970 }, { "epoch": 0.5554769975395629, "grad_norm": 325.44854736328125, "learning_rate": 5.000698131698531e-06, "loss": 26.734, "step": 274980 }, { "epoch": 0.5554971981722467, "grad_norm": 1094.00341796875, "learning_rate": 5.000349065850117e-06, "loss": 30.343, "step": 274990 }, { "epoch": 0.5555173988049306, "grad_norm": 314.4059753417969, "learning_rate": 5e-06, "loss": 26.2702, "step": 275000 }, { "epoch": 0.5555375994376144, "grad_norm": 176.5780792236328, "learning_rate": 4.999650934149885e-06, "loss": 6.8526, "step": 275010 }, { "epoch": 0.5555578000702982, "grad_norm": 613.3285522460938, "learning_rate": 4.999301868301472e-06, "loss": 20.9954, "step": 275020 }, { "epoch": 0.555578000702982, "grad_norm": 305.3586730957031, "learning_rate": 4.9989528024564606e-06, "loss": 15.1626, "step": 275030 }, { "epoch": 0.5555982013356658, "grad_norm": 89.95189666748047, "learning_rate": 4.998603736616552e-06, "loss": 21.0642, "step": 275040 }, { "epoch": 0.5556184019683497, "grad_norm": 638.3482055664062, "learning_rate": 4.9982546707834514e-06, "loss": 12.8903, "step": 275050 }, { "epoch": 0.5556386026010335, "grad_norm": 201.15760803222656, "learning_rate": 4.9979056049588545e-06, "loss": 16.1875, "step": 275060 }, { "epoch": 0.5556588032337173, "grad_norm": 589.5166625976562, "learning_rate": 4.997556539144467e-06, "loss": 19.4037, "step": 275070 }, { "epoch": 0.5556790038664011, "grad_norm": 234.26467895507812, "learning_rate": 4.9972074733419875e-06, "loss": 18.1829, "step": 275080 }, { "epoch": 0.5556992044990849, "grad_norm": 190.3571014404297, "learning_rate": 4.996858407553119e-06, "loss": 14.9323, "step": 275090 }, { "epoch": 0.5557194051317688, "grad_norm": 422.9874267578125, "learning_rate": 4.996509341779563e-06, "loss": 29.7398, "step": 275100 }, { "epoch": 0.5557396057644526, "grad_norm": 604.4100341796875, "learning_rate": 4.996160276023018e-06, "loss": 43.8679, "step": 275110 }, { "epoch": 0.5557598063971364, "grad_norm": 86.82774353027344, "learning_rate": 4.99581121028519e-06, "loss": 22.9737, "step": 275120 }, { "epoch": 0.5557800070298202, "grad_norm": 414.9309387207031, "learning_rate": 4.995462144567776e-06, "loss": 15.1246, "step": 275130 }, { "epoch": 0.555800207662504, "grad_norm": 579.406005859375, "learning_rate": 4.9951130788724796e-06, "loss": 13.6963, "step": 275140 }, { "epoch": 0.5558204082951878, "grad_norm": 509.14105224609375, "learning_rate": 4.994764013201002e-06, "loss": 10.8033, "step": 275150 }, { "epoch": 0.5558406089278716, "grad_norm": 303.1459655761719, "learning_rate": 4.994414947555043e-06, "loss": 26.2788, "step": 275160 }, { "epoch": 0.5558608095605554, "grad_norm": 543.0546264648438, "learning_rate": 4.994065881936305e-06, "loss": 30.235, "step": 275170 }, { "epoch": 0.5558810101932392, "grad_norm": 205.7716064453125, "learning_rate": 4.99371681634649e-06, "loss": 22.751, "step": 275180 }, { "epoch": 0.555901210825923, "grad_norm": 149.52926635742188, "learning_rate": 4.993367750787297e-06, "loss": 14.7283, "step": 275190 }, { "epoch": 0.5559214114586069, "grad_norm": 280.9228210449219, "learning_rate": 4.993018685260428e-06, "loss": 16.4838, "step": 275200 }, { "epoch": 0.5559416120912907, "grad_norm": 195.95802307128906, "learning_rate": 4.992669619767589e-06, "loss": 11.5435, "step": 275210 }, { "epoch": 0.5559618127239745, "grad_norm": 6.11033296585083, "learning_rate": 4.992320554310474e-06, "loss": 47.3396, "step": 275220 }, { "epoch": 0.5559820133566583, "grad_norm": 446.1274719238281, "learning_rate": 4.991971488890789e-06, "loss": 31.3487, "step": 275230 }, { "epoch": 0.5560022139893421, "grad_norm": 178.6339569091797, "learning_rate": 4.991622423510233e-06, "loss": 31.0977, "step": 275240 }, { "epoch": 0.556022414622026, "grad_norm": 376.4140930175781, "learning_rate": 4.9912733581705095e-06, "loss": 8.8311, "step": 275250 }, { "epoch": 0.5560426152547098, "grad_norm": 331.22418212890625, "learning_rate": 4.9909242928733185e-06, "loss": 25.8522, "step": 275260 }, { "epoch": 0.5560628158873936, "grad_norm": 475.052001953125, "learning_rate": 4.990575227620359e-06, "loss": 29.3616, "step": 275270 }, { "epoch": 0.5560830165200774, "grad_norm": 559.9166870117188, "learning_rate": 4.990226162413338e-06, "loss": 30.6506, "step": 275280 }, { "epoch": 0.5561032171527612, "grad_norm": 911.1737060546875, "learning_rate": 4.989877097253952e-06, "loss": 18.0968, "step": 275290 }, { "epoch": 0.556123417785445, "grad_norm": 772.8670043945312, "learning_rate": 4.9895280321439036e-06, "loss": 9.2231, "step": 275300 }, { "epoch": 0.5561436184181289, "grad_norm": 642.33984375, "learning_rate": 4.989178967084896e-06, "loss": 19.8402, "step": 275310 }, { "epoch": 0.5561638190508127, "grad_norm": 7.4433393478393555, "learning_rate": 4.9888299020786265e-06, "loss": 19.86, "step": 275320 }, { "epoch": 0.5561840196834965, "grad_norm": 671.8960571289062, "learning_rate": 4.9884808371268e-06, "loss": 32.5698, "step": 275330 }, { "epoch": 0.5562042203161803, "grad_norm": 198.35899353027344, "learning_rate": 4.9881317722311165e-06, "loss": 5.8491, "step": 275340 }, { "epoch": 0.5562244209488642, "grad_norm": 410.0425720214844, "learning_rate": 4.987782707393276e-06, "loss": 13.0426, "step": 275350 }, { "epoch": 0.556244621581548, "grad_norm": 564.2764892578125, "learning_rate": 4.987433642614981e-06, "loss": 20.9403, "step": 275360 }, { "epoch": 0.5562648222142318, "grad_norm": 2.9762794971466064, "learning_rate": 4.987084577897936e-06, "loss": 14.003, "step": 275370 }, { "epoch": 0.5562850228469156, "grad_norm": 101.92252349853516, "learning_rate": 4.986735513243836e-06, "loss": 8.4082, "step": 275380 }, { "epoch": 0.5563052234795994, "grad_norm": 227.08737182617188, "learning_rate": 4.9863864486543865e-06, "loss": 24.8647, "step": 275390 }, { "epoch": 0.5563254241122833, "grad_norm": 573.6603393554688, "learning_rate": 4.986037384131288e-06, "loss": 12.8151, "step": 275400 }, { "epoch": 0.556345624744967, "grad_norm": 730.9862060546875, "learning_rate": 4.985688319676242e-06, "loss": 21.6598, "step": 275410 }, { "epoch": 0.5563658253776508, "grad_norm": 222.87841796875, "learning_rate": 4.985339255290948e-06, "loss": 9.5083, "step": 275420 }, { "epoch": 0.5563860260103346, "grad_norm": 336.2375183105469, "learning_rate": 4.98499019097711e-06, "loss": 12.9924, "step": 275430 }, { "epoch": 0.5564062266430184, "grad_norm": 220.37144470214844, "learning_rate": 4.984641126736428e-06, "loss": 15.1812, "step": 275440 }, { "epoch": 0.5564264272757022, "grad_norm": 594.5784301757812, "learning_rate": 4.984292062570603e-06, "loss": 33.5213, "step": 275450 }, { "epoch": 0.5564466279083861, "grad_norm": 480.54833984375, "learning_rate": 4.983942998481336e-06, "loss": 16.8189, "step": 275460 }, { "epoch": 0.5564668285410699, "grad_norm": 331.3450012207031, "learning_rate": 4.9835939344703305e-06, "loss": 23.6895, "step": 275470 }, { "epoch": 0.5564870291737537, "grad_norm": 141.87034606933594, "learning_rate": 4.983244870539284e-06, "loss": 20.68, "step": 275480 }, { "epoch": 0.5565072298064375, "grad_norm": 6.501047134399414, "learning_rate": 4.9828958066899e-06, "loss": 7.0658, "step": 275490 }, { "epoch": 0.5565274304391213, "grad_norm": 368.3595275878906, "learning_rate": 4.982546742923883e-06, "loss": 13.4994, "step": 275500 }, { "epoch": 0.5565476310718052, "grad_norm": 343.9040222167969, "learning_rate": 4.9821976792429274e-06, "loss": 11.4451, "step": 275510 }, { "epoch": 0.556567831704489, "grad_norm": 151.44241333007812, "learning_rate": 4.981848615648739e-06, "loss": 22.9241, "step": 275520 }, { "epoch": 0.5565880323371728, "grad_norm": 292.4028015136719, "learning_rate": 4.9814995521430195e-06, "loss": 20.1823, "step": 275530 }, { "epoch": 0.5566082329698566, "grad_norm": 405.0984802246094, "learning_rate": 4.981150488727469e-06, "loss": 11.9487, "step": 275540 }, { "epoch": 0.5566284336025404, "grad_norm": 322.7618103027344, "learning_rate": 4.980801425403788e-06, "loss": 14.5687, "step": 275550 }, { "epoch": 0.5566486342352243, "grad_norm": 59.802757263183594, "learning_rate": 4.980452362173676e-06, "loss": 18.1742, "step": 275560 }, { "epoch": 0.5566688348679081, "grad_norm": 389.64208984375, "learning_rate": 4.980103299038842e-06, "loss": 12.1644, "step": 275570 }, { "epoch": 0.5566890355005919, "grad_norm": 1275.4359130859375, "learning_rate": 4.979754236000978e-06, "loss": 24.2242, "step": 275580 }, { "epoch": 0.5567092361332757, "grad_norm": 934.800537109375, "learning_rate": 4.979405173061791e-06, "loss": 14.1845, "step": 275590 }, { "epoch": 0.5567294367659595, "grad_norm": 309.9809265136719, "learning_rate": 4.979056110222982e-06, "loss": 19.1958, "step": 275600 }, { "epoch": 0.5567496373986434, "grad_norm": 421.5162048339844, "learning_rate": 4.978707047486249e-06, "loss": 21.8696, "step": 275610 }, { "epoch": 0.5567698380313272, "grad_norm": 500.7240905761719, "learning_rate": 4.978357984853296e-06, "loss": 15.3565, "step": 275620 }, { "epoch": 0.556790038664011, "grad_norm": 199.2356414794922, "learning_rate": 4.9780089223258235e-06, "loss": 18.4397, "step": 275630 }, { "epoch": 0.5568102392966948, "grad_norm": 0.5291028022766113, "learning_rate": 4.977659859905532e-06, "loss": 26.18, "step": 275640 }, { "epoch": 0.5568304399293786, "grad_norm": 240.7754669189453, "learning_rate": 4.977310797594124e-06, "loss": 11.6249, "step": 275650 }, { "epoch": 0.5568506405620623, "grad_norm": 207.90576171875, "learning_rate": 4.9769617353933025e-06, "loss": 14.2674, "step": 275660 }, { "epoch": 0.5568708411947462, "grad_norm": 35.586524963378906, "learning_rate": 4.976612673304764e-06, "loss": 19.3714, "step": 275670 }, { "epoch": 0.55689104182743, "grad_norm": 288.2771301269531, "learning_rate": 4.976263611330213e-06, "loss": 16.8436, "step": 275680 }, { "epoch": 0.5569112424601138, "grad_norm": 541.0880126953125, "learning_rate": 4.97591454947135e-06, "loss": 9.6839, "step": 275690 }, { "epoch": 0.5569314430927976, "grad_norm": 368.6847229003906, "learning_rate": 4.975565487729879e-06, "loss": 13.0142, "step": 275700 }, { "epoch": 0.5569516437254814, "grad_norm": 535.9522705078125, "learning_rate": 4.9752164261074964e-06, "loss": 34.5483, "step": 275710 }, { "epoch": 0.5569718443581653, "grad_norm": 0.9300443530082703, "learning_rate": 4.974867364605906e-06, "loss": 14.6588, "step": 275720 }, { "epoch": 0.5569920449908491, "grad_norm": 629.6343994140625, "learning_rate": 4.97451830322681e-06, "loss": 27.1088, "step": 275730 }, { "epoch": 0.5570122456235329, "grad_norm": 686.4780883789062, "learning_rate": 4.974169241971908e-06, "loss": 21.9865, "step": 275740 }, { "epoch": 0.5570324462562167, "grad_norm": 3.323115825653076, "learning_rate": 4.9738201808429025e-06, "loss": 12.1139, "step": 275750 }, { "epoch": 0.5570526468889005, "grad_norm": 518.5536499023438, "learning_rate": 4.973471119841495e-06, "loss": 25.2883, "step": 275760 }, { "epoch": 0.5570728475215844, "grad_norm": 698.150634765625, "learning_rate": 4.973122058969384e-06, "loss": 22.5602, "step": 275770 }, { "epoch": 0.5570930481542682, "grad_norm": 344.7100524902344, "learning_rate": 4.972772998228274e-06, "loss": 15.8895, "step": 275780 }, { "epoch": 0.557113248786952, "grad_norm": 664.1873779296875, "learning_rate": 4.972423937619866e-06, "loss": 22.1436, "step": 275790 }, { "epoch": 0.5571334494196358, "grad_norm": 299.5687561035156, "learning_rate": 4.9720748771458595e-06, "loss": 12.1201, "step": 275800 }, { "epoch": 0.5571536500523196, "grad_norm": 193.34686279296875, "learning_rate": 4.971725816807956e-06, "loss": 19.3396, "step": 275810 }, { "epoch": 0.5571738506850035, "grad_norm": 323.8533935546875, "learning_rate": 4.97137675660786e-06, "loss": 20.3575, "step": 275820 }, { "epoch": 0.5571940513176873, "grad_norm": 595.7634887695312, "learning_rate": 4.971027696547266e-06, "loss": 29.8513, "step": 275830 }, { "epoch": 0.5572142519503711, "grad_norm": 399.06573486328125, "learning_rate": 4.970678636627882e-06, "loss": 24.1719, "step": 275840 }, { "epoch": 0.5572344525830549, "grad_norm": 719.6656494140625, "learning_rate": 4.970329576851406e-06, "loss": 15.5013, "step": 275850 }, { "epoch": 0.5572546532157387, "grad_norm": 171.09657287597656, "learning_rate": 4.969980517219542e-06, "loss": 10.368, "step": 275860 }, { "epoch": 0.5572748538484226, "grad_norm": 934.2931518554688, "learning_rate": 4.969631457733988e-06, "loss": 27.2345, "step": 275870 }, { "epoch": 0.5572950544811064, "grad_norm": 237.9071807861328, "learning_rate": 4.969282398396445e-06, "loss": 13.1042, "step": 275880 }, { "epoch": 0.5573152551137902, "grad_norm": 238.2025146484375, "learning_rate": 4.96893333920862e-06, "loss": 17.233, "step": 275890 }, { "epoch": 0.557335455746474, "grad_norm": 214.6839141845703, "learning_rate": 4.968584280172206e-06, "loss": 18.8641, "step": 275900 }, { "epoch": 0.5573556563791578, "grad_norm": 265.20068359375, "learning_rate": 4.968235221288909e-06, "loss": 15.945, "step": 275910 }, { "epoch": 0.5573758570118416, "grad_norm": 274.2738952636719, "learning_rate": 4.967886162560432e-06, "loss": 30.9454, "step": 275920 }, { "epoch": 0.5573960576445254, "grad_norm": 226.4269561767578, "learning_rate": 4.967537103988472e-06, "loss": 33.7239, "step": 275930 }, { "epoch": 0.5574162582772092, "grad_norm": 726.7615966796875, "learning_rate": 4.967188045574733e-06, "loss": 22.764, "step": 275940 }, { "epoch": 0.557436458909893, "grad_norm": 358.86590576171875, "learning_rate": 4.966838987320916e-06, "loss": 19.9171, "step": 275950 }, { "epoch": 0.5574566595425768, "grad_norm": 449.6456298828125, "learning_rate": 4.966489929228721e-06, "loss": 29.3903, "step": 275960 }, { "epoch": 0.5574768601752607, "grad_norm": 632.8236694335938, "learning_rate": 4.966140871299849e-06, "loss": 17.1162, "step": 275970 }, { "epoch": 0.5574970608079445, "grad_norm": 301.8818054199219, "learning_rate": 4.965791813536004e-06, "loss": 21.3535, "step": 275980 }, { "epoch": 0.5575172614406283, "grad_norm": 653.9390869140625, "learning_rate": 4.9654427559388845e-06, "loss": 14.3596, "step": 275990 }, { "epoch": 0.5575374620733121, "grad_norm": 340.1526794433594, "learning_rate": 4.965093698510192e-06, "loss": 13.6781, "step": 276000 }, { "epoch": 0.5575576627059959, "grad_norm": 508.2944030761719, "learning_rate": 4.96474464125163e-06, "loss": 13.1891, "step": 276010 }, { "epoch": 0.5575778633386798, "grad_norm": 469.45013427734375, "learning_rate": 4.964395584164899e-06, "loss": 17.2308, "step": 276020 }, { "epoch": 0.5575980639713636, "grad_norm": 320.333251953125, "learning_rate": 4.964046527251698e-06, "loss": 34.6382, "step": 276030 }, { "epoch": 0.5576182646040474, "grad_norm": 855.4724731445312, "learning_rate": 4.9636974705137305e-06, "loss": 19.0854, "step": 276040 }, { "epoch": 0.5576384652367312, "grad_norm": 845.752685546875, "learning_rate": 4.9633484139526975e-06, "loss": 17.8679, "step": 276050 }, { "epoch": 0.557658665869415, "grad_norm": 450.273193359375, "learning_rate": 4.9629993575702995e-06, "loss": 31.8959, "step": 276060 }, { "epoch": 0.5576788665020989, "grad_norm": 152.7506103515625, "learning_rate": 4.962650301368238e-06, "loss": 11.4191, "step": 276070 }, { "epoch": 0.5576990671347827, "grad_norm": 248.6024169921875, "learning_rate": 4.962301245348215e-06, "loss": 17.2046, "step": 276080 }, { "epoch": 0.5577192677674665, "grad_norm": 265.6996154785156, "learning_rate": 4.961952189511932e-06, "loss": 17.0613, "step": 276090 }, { "epoch": 0.5577394684001503, "grad_norm": 490.7113037109375, "learning_rate": 4.961603133861086e-06, "loss": 20.1365, "step": 276100 }, { "epoch": 0.5577596690328341, "grad_norm": 58.4522590637207, "learning_rate": 4.961254078397386e-06, "loss": 19.8307, "step": 276110 }, { "epoch": 0.557779869665518, "grad_norm": 89.830078125, "learning_rate": 4.960905023122526e-06, "loss": 17.4171, "step": 276120 }, { "epoch": 0.5578000702982018, "grad_norm": 278.5671691894531, "learning_rate": 4.9605559680382104e-06, "loss": 17.8109, "step": 276130 }, { "epoch": 0.5578202709308856, "grad_norm": 647.3381958007812, "learning_rate": 4.960206913146141e-06, "loss": 18.2097, "step": 276140 }, { "epoch": 0.5578404715635694, "grad_norm": 448.3829345703125, "learning_rate": 4.9598578584480186e-06, "loss": 7.8238, "step": 276150 }, { "epoch": 0.5578606721962532, "grad_norm": 86.89173889160156, "learning_rate": 4.959508803945543e-06, "loss": 11.8763, "step": 276160 }, { "epoch": 0.5578808728289371, "grad_norm": 96.09246063232422, "learning_rate": 4.9591597496404165e-06, "loss": 9.3863, "step": 276170 }, { "epoch": 0.5579010734616208, "grad_norm": 163.861572265625, "learning_rate": 4.958810695534343e-06, "loss": 23.1588, "step": 276180 }, { "epoch": 0.5579212740943046, "grad_norm": 256.9462585449219, "learning_rate": 4.958461641629018e-06, "loss": 16.4793, "step": 276190 }, { "epoch": 0.5579414747269884, "grad_norm": 177.65811157226562, "learning_rate": 4.9581125879261476e-06, "loss": 11.4484, "step": 276200 }, { "epoch": 0.5579616753596722, "grad_norm": 376.4410705566406, "learning_rate": 4.957763534427431e-06, "loss": 21.685, "step": 276210 }, { "epoch": 0.557981875992356, "grad_norm": 385.52325439453125, "learning_rate": 4.9574144811345695e-06, "loss": 10.8762, "step": 276220 }, { "epoch": 0.5580020766250399, "grad_norm": 700.00830078125, "learning_rate": 4.957065428049265e-06, "loss": 15.0065, "step": 276230 }, { "epoch": 0.5580222772577237, "grad_norm": 247.28700256347656, "learning_rate": 4.956716375173219e-06, "loss": 27.583, "step": 276240 }, { "epoch": 0.5580424778904075, "grad_norm": 265.1885681152344, "learning_rate": 4.956367322508131e-06, "loss": 11.7517, "step": 276250 }, { "epoch": 0.5580626785230913, "grad_norm": 252.61227416992188, "learning_rate": 4.956018270055703e-06, "loss": 23.381, "step": 276260 }, { "epoch": 0.5580828791557751, "grad_norm": 0.002575975377112627, "learning_rate": 4.9556692178176395e-06, "loss": 16.0502, "step": 276270 }, { "epoch": 0.558103079788459, "grad_norm": 363.0325927734375, "learning_rate": 4.955320165795636e-06, "loss": 33.9509, "step": 276280 }, { "epoch": 0.5581232804211428, "grad_norm": 158.7438507080078, "learning_rate": 4.954971113991397e-06, "loss": 14.7082, "step": 276290 }, { "epoch": 0.5581434810538266, "grad_norm": 11.163829803466797, "learning_rate": 4.954622062406623e-06, "loss": 23.315, "step": 276300 }, { "epoch": 0.5581636816865104, "grad_norm": 406.20037841796875, "learning_rate": 4.954273011043018e-06, "loss": 24.7558, "step": 276310 }, { "epoch": 0.5581838823191942, "grad_norm": 180.91114807128906, "learning_rate": 4.9539239599022784e-06, "loss": 19.4117, "step": 276320 }, { "epoch": 0.5582040829518781, "grad_norm": 461.32830810546875, "learning_rate": 4.953574908986108e-06, "loss": 21.8708, "step": 276330 }, { "epoch": 0.5582242835845619, "grad_norm": 499.2826843261719, "learning_rate": 4.95322585829621e-06, "loss": 24.8518, "step": 276340 }, { "epoch": 0.5582444842172457, "grad_norm": 267.67010498046875, "learning_rate": 4.952876807834281e-06, "loss": 27.8653, "step": 276350 }, { "epoch": 0.5582646848499295, "grad_norm": 232.80372619628906, "learning_rate": 4.952527757602025e-06, "loss": 18.1488, "step": 276360 }, { "epoch": 0.5582848854826133, "grad_norm": 518.7470703125, "learning_rate": 4.952178707601144e-06, "loss": 55.594, "step": 276370 }, { "epoch": 0.5583050861152972, "grad_norm": 667.466552734375, "learning_rate": 4.951829657833337e-06, "loss": 21.389, "step": 276380 }, { "epoch": 0.558325286747981, "grad_norm": 367.1182556152344, "learning_rate": 4.951480608300308e-06, "loss": 13.6146, "step": 276390 }, { "epoch": 0.5583454873806648, "grad_norm": 34.47004699707031, "learning_rate": 4.951131559003756e-06, "loss": 15.0139, "step": 276400 }, { "epoch": 0.5583656880133486, "grad_norm": 748.808837890625, "learning_rate": 4.950782509945383e-06, "loss": 20.2013, "step": 276410 }, { "epoch": 0.5583858886460324, "grad_norm": 411.05963134765625, "learning_rate": 4.950433461126888e-06, "loss": 17.0833, "step": 276420 }, { "epoch": 0.5584060892787162, "grad_norm": 304.01220703125, "learning_rate": 4.950084412549978e-06, "loss": 13.3847, "step": 276430 }, { "epoch": 0.5584262899114, "grad_norm": 4.686367034912109, "learning_rate": 4.949735364216348e-06, "loss": 14.4116, "step": 276440 }, { "epoch": 0.5584464905440838, "grad_norm": 303.236083984375, "learning_rate": 4.9493863161277016e-06, "loss": 13.1932, "step": 276450 }, { "epoch": 0.5584666911767676, "grad_norm": 244.71414184570312, "learning_rate": 4.949037268285741e-06, "loss": 35.4463, "step": 276460 }, { "epoch": 0.5584868918094514, "grad_norm": 418.2349548339844, "learning_rate": 4.948688220692167e-06, "loss": 25.0785, "step": 276470 }, { "epoch": 0.5585070924421353, "grad_norm": 287.80889892578125, "learning_rate": 4.94833917334868e-06, "loss": 13.6992, "step": 276480 }, { "epoch": 0.5585272930748191, "grad_norm": 324.98583984375, "learning_rate": 4.9479901262569795e-06, "loss": 45.5813, "step": 276490 }, { "epoch": 0.5585474937075029, "grad_norm": 103.70023345947266, "learning_rate": 4.9476410794187726e-06, "loss": 18.5455, "step": 276500 }, { "epoch": 0.5585676943401867, "grad_norm": 165.24594116210938, "learning_rate": 4.947292032835754e-06, "loss": 18.9912, "step": 276510 }, { "epoch": 0.5585878949728705, "grad_norm": 549.1682739257812, "learning_rate": 4.946942986509628e-06, "loss": 20.0053, "step": 276520 }, { "epoch": 0.5586080956055544, "grad_norm": 333.8079833984375, "learning_rate": 4.946593940442097e-06, "loss": 17.1418, "step": 276530 }, { "epoch": 0.5586282962382382, "grad_norm": 62.44570541381836, "learning_rate": 4.9462448946348594e-06, "loss": 12.6096, "step": 276540 }, { "epoch": 0.558648496870922, "grad_norm": 1.0580861568450928, "learning_rate": 4.945895849089618e-06, "loss": 23.9356, "step": 276550 }, { "epoch": 0.5586686975036058, "grad_norm": 361.0372314453125, "learning_rate": 4.945546803808074e-06, "loss": 18.6434, "step": 276560 }, { "epoch": 0.5586888981362896, "grad_norm": 661.345947265625, "learning_rate": 4.945197758791928e-06, "loss": 23.0392, "step": 276570 }, { "epoch": 0.5587090987689735, "grad_norm": 276.2427978515625, "learning_rate": 4.944848714042879e-06, "loss": 18.4101, "step": 276580 }, { "epoch": 0.5587292994016573, "grad_norm": 280.89178466796875, "learning_rate": 4.9444996695626325e-06, "loss": 9.9693, "step": 276590 }, { "epoch": 0.5587495000343411, "grad_norm": 143.61856079101562, "learning_rate": 4.94415062535289e-06, "loss": 18.7723, "step": 276600 }, { "epoch": 0.5587697006670249, "grad_norm": 1065.444091796875, "learning_rate": 4.943801581415348e-06, "loss": 14.0857, "step": 276610 }, { "epoch": 0.5587899012997087, "grad_norm": 334.22528076171875, "learning_rate": 4.9434525377517115e-06, "loss": 28.4729, "step": 276620 }, { "epoch": 0.5588101019323926, "grad_norm": 42.48712921142578, "learning_rate": 4.9431034943636816e-06, "loss": 25.1483, "step": 276630 }, { "epoch": 0.5588303025650764, "grad_norm": 203.1193084716797, "learning_rate": 4.942754451252957e-06, "loss": 21.2341, "step": 276640 }, { "epoch": 0.5588505031977602, "grad_norm": 11.171894073486328, "learning_rate": 4.942405408421238e-06, "loss": 11.2092, "step": 276650 }, { "epoch": 0.558870703830444, "grad_norm": 395.5190124511719, "learning_rate": 4.942056365870231e-06, "loss": 8.646, "step": 276660 }, { "epoch": 0.5588909044631278, "grad_norm": 689.1996459960938, "learning_rate": 4.941707323601633e-06, "loss": 24.7655, "step": 276670 }, { "epoch": 0.5589111050958117, "grad_norm": 23.772083282470703, "learning_rate": 4.941358281617148e-06, "loss": 23.8002, "step": 276680 }, { "epoch": 0.5589313057284954, "grad_norm": 328.78912353515625, "learning_rate": 4.941009239918474e-06, "loss": 25.579, "step": 276690 }, { "epoch": 0.5589515063611792, "grad_norm": 803.1546630859375, "learning_rate": 4.940660198507315e-06, "loss": 18.9472, "step": 276700 }, { "epoch": 0.558971706993863, "grad_norm": 106.0699234008789, "learning_rate": 4.940311157385369e-06, "loss": 16.5392, "step": 276710 }, { "epoch": 0.5589919076265468, "grad_norm": 462.4819030761719, "learning_rate": 4.939962116554343e-06, "loss": 20.0266, "step": 276720 }, { "epoch": 0.5590121082592306, "grad_norm": 595.452880859375, "learning_rate": 4.93961307601593e-06, "loss": 31.1329, "step": 276730 }, { "epoch": 0.5590323088919145, "grad_norm": 165.49835205078125, "learning_rate": 4.939264035771837e-06, "loss": 12.3925, "step": 276740 }, { "epoch": 0.5590525095245983, "grad_norm": 550.9774169921875, "learning_rate": 4.938914995823764e-06, "loss": 22.1677, "step": 276750 }, { "epoch": 0.5590727101572821, "grad_norm": 835.67333984375, "learning_rate": 4.938565956173413e-06, "loss": 23.3062, "step": 276760 }, { "epoch": 0.5590929107899659, "grad_norm": 139.3406219482422, "learning_rate": 4.938216916822483e-06, "loss": 15.5264, "step": 276770 }, { "epoch": 0.5591131114226497, "grad_norm": 130.74205017089844, "learning_rate": 4.937867877772675e-06, "loss": 15.202, "step": 276780 }, { "epoch": 0.5591333120553336, "grad_norm": 568.3181762695312, "learning_rate": 4.937518839025695e-06, "loss": 14.5231, "step": 276790 }, { "epoch": 0.5591535126880174, "grad_norm": 90.9675064086914, "learning_rate": 4.937169800583237e-06, "loss": 12.635, "step": 276800 }, { "epoch": 0.5591737133207012, "grad_norm": 604.9907836914062, "learning_rate": 4.936820762447007e-06, "loss": 18.2224, "step": 276810 }, { "epoch": 0.559193913953385, "grad_norm": 195.16026306152344, "learning_rate": 4.936471724618706e-06, "loss": 12.2022, "step": 276820 }, { "epoch": 0.5592141145860688, "grad_norm": 270.56549072265625, "learning_rate": 4.936122687100034e-06, "loss": 21.3985, "step": 276830 }, { "epoch": 0.5592343152187527, "grad_norm": 221.6443634033203, "learning_rate": 4.93577364989269e-06, "loss": 25.6469, "step": 276840 }, { "epoch": 0.5592545158514365, "grad_norm": 395.1712951660156, "learning_rate": 4.93542461299838e-06, "loss": 13.1065, "step": 276850 }, { "epoch": 0.5592747164841203, "grad_norm": 390.8291320800781, "learning_rate": 4.935075576418802e-06, "loss": 15.2034, "step": 276860 }, { "epoch": 0.5592949171168041, "grad_norm": 516.2682495117188, "learning_rate": 4.934726540155656e-06, "loss": 27.2103, "step": 276870 }, { "epoch": 0.5593151177494879, "grad_norm": 246.35308837890625, "learning_rate": 4.934377504210648e-06, "loss": 24.024, "step": 276880 }, { "epoch": 0.5593353183821718, "grad_norm": 823.3765869140625, "learning_rate": 4.934028468585473e-06, "loss": 15.8606, "step": 276890 }, { "epoch": 0.5593555190148556, "grad_norm": 365.7604064941406, "learning_rate": 4.933679433281837e-06, "loss": 17.7518, "step": 276900 }, { "epoch": 0.5593757196475394, "grad_norm": 2235.00390625, "learning_rate": 4.933330398301438e-06, "loss": 32.2481, "step": 276910 }, { "epoch": 0.5593959202802232, "grad_norm": 180.20101928710938, "learning_rate": 4.932981363645981e-06, "loss": 26.0842, "step": 276920 }, { "epoch": 0.559416120912907, "grad_norm": 416.2943420410156, "learning_rate": 4.932632329317162e-06, "loss": 18.6779, "step": 276930 }, { "epoch": 0.5594363215455908, "grad_norm": 325.4767150878906, "learning_rate": 4.9322832953166856e-06, "loss": 18.2107, "step": 276940 }, { "epoch": 0.5594565221782746, "grad_norm": 626.8479614257812, "learning_rate": 4.931934261646255e-06, "loss": 24.3436, "step": 276950 }, { "epoch": 0.5594767228109584, "grad_norm": 3.538747549057007, "learning_rate": 4.931585228307564e-06, "loss": 17.349, "step": 276960 }, { "epoch": 0.5594969234436422, "grad_norm": 71.02916717529297, "learning_rate": 4.931236195302321e-06, "loss": 20.873, "step": 276970 }, { "epoch": 0.559517124076326, "grad_norm": 1171.236328125, "learning_rate": 4.930887162632225e-06, "loss": 20.8846, "step": 276980 }, { "epoch": 0.5595373247090099, "grad_norm": 430.44439697265625, "learning_rate": 4.930538130298975e-06, "loss": 31.7646, "step": 276990 }, { "epoch": 0.5595575253416937, "grad_norm": 498.4757995605469, "learning_rate": 4.9301890983042744e-06, "loss": 25.1662, "step": 277000 }, { "epoch": 0.5595777259743775, "grad_norm": 421.6228332519531, "learning_rate": 4.929840066649824e-06, "loss": 20.3946, "step": 277010 }, { "epoch": 0.5595979266070613, "grad_norm": 22.71751594543457, "learning_rate": 4.929491035337325e-06, "loss": 19.0553, "step": 277020 }, { "epoch": 0.5596181272397451, "grad_norm": 301.5302429199219, "learning_rate": 4.929142004368475e-06, "loss": 15.6127, "step": 277030 }, { "epoch": 0.559638327872429, "grad_norm": 460.103759765625, "learning_rate": 4.928792973744983e-06, "loss": 39.9705, "step": 277040 }, { "epoch": 0.5596585285051128, "grad_norm": 742.2890014648438, "learning_rate": 4.928443943468541e-06, "loss": 16.4028, "step": 277050 }, { "epoch": 0.5596787291377966, "grad_norm": 540.8504638671875, "learning_rate": 4.928094913540857e-06, "loss": 11.9522, "step": 277060 }, { "epoch": 0.5596989297704804, "grad_norm": 491.7070617675781, "learning_rate": 4.927745883963629e-06, "loss": 17.2544, "step": 277070 }, { "epoch": 0.5597191304031642, "grad_norm": 322.5917663574219, "learning_rate": 4.92739685473856e-06, "loss": 20.6291, "step": 277080 }, { "epoch": 0.559739331035848, "grad_norm": 276.1647033691406, "learning_rate": 4.927047825867349e-06, "loss": 18.5095, "step": 277090 }, { "epoch": 0.5597595316685319, "grad_norm": 3.7427709102630615, "learning_rate": 4.926698797351697e-06, "loss": 17.7034, "step": 277100 }, { "epoch": 0.5597797323012157, "grad_norm": 286.2646789550781, "learning_rate": 4.926349769193308e-06, "loss": 11.4961, "step": 277110 }, { "epoch": 0.5597999329338995, "grad_norm": 735.1262817382812, "learning_rate": 4.92600074139388e-06, "loss": 16.7976, "step": 277120 }, { "epoch": 0.5598201335665833, "grad_norm": 621.32177734375, "learning_rate": 4.925651713955115e-06, "loss": 26.1611, "step": 277130 }, { "epoch": 0.5598403341992672, "grad_norm": 364.3940734863281, "learning_rate": 4.925302686878717e-06, "loss": 25.4851, "step": 277140 }, { "epoch": 0.559860534831951, "grad_norm": 224.42904663085938, "learning_rate": 4.924953660166383e-06, "loss": 23.8722, "step": 277150 }, { "epoch": 0.5598807354646348, "grad_norm": 360.2662048339844, "learning_rate": 4.924604633819815e-06, "loss": 16.7067, "step": 277160 }, { "epoch": 0.5599009360973186, "grad_norm": 286.9324951171875, "learning_rate": 4.924255607840717e-06, "loss": 21.0849, "step": 277170 }, { "epoch": 0.5599211367300024, "grad_norm": 224.43296813964844, "learning_rate": 4.923906582230786e-06, "loss": 21.4258, "step": 277180 }, { "epoch": 0.5599413373626863, "grad_norm": 276.378173828125, "learning_rate": 4.923557556991724e-06, "loss": 11.3986, "step": 277190 }, { "epoch": 0.55996153799537, "grad_norm": 205.25302124023438, "learning_rate": 4.923208532125235e-06, "loss": 23.9816, "step": 277200 }, { "epoch": 0.5599817386280538, "grad_norm": 420.8791809082031, "learning_rate": 4.9228595076330196e-06, "loss": 10.7947, "step": 277210 }, { "epoch": 0.5600019392607376, "grad_norm": 702.0499877929688, "learning_rate": 4.9225104835167755e-06, "loss": 12.4634, "step": 277220 }, { "epoch": 0.5600221398934214, "grad_norm": 382.42437744140625, "learning_rate": 4.9221614597782066e-06, "loss": 18.7814, "step": 277230 }, { "epoch": 0.5600423405261052, "grad_norm": 213.75582885742188, "learning_rate": 4.921812436419014e-06, "loss": 21.6486, "step": 277240 }, { "epoch": 0.5600625411587891, "grad_norm": 0.0, "learning_rate": 4.921463413440898e-06, "loss": 25.8586, "step": 277250 }, { "epoch": 0.5600827417914729, "grad_norm": 214.06646728515625, "learning_rate": 4.9211143908455575e-06, "loss": 22.5008, "step": 277260 }, { "epoch": 0.5601029424241567, "grad_norm": 183.02252197265625, "learning_rate": 4.920765368634699e-06, "loss": 10.4723, "step": 277270 }, { "epoch": 0.5601231430568405, "grad_norm": 505.80792236328125, "learning_rate": 4.920416346810019e-06, "loss": 7.7827, "step": 277280 }, { "epoch": 0.5601433436895243, "grad_norm": 811.605712890625, "learning_rate": 4.920067325373219e-06, "loss": 33.468, "step": 277290 }, { "epoch": 0.5601635443222082, "grad_norm": 243.12509155273438, "learning_rate": 4.9197183043260035e-06, "loss": 18.1433, "step": 277300 }, { "epoch": 0.560183744954892, "grad_norm": 90.96365356445312, "learning_rate": 4.91936928367007e-06, "loss": 21.1801, "step": 277310 }, { "epoch": 0.5602039455875758, "grad_norm": 127.96955871582031, "learning_rate": 4.919020263407121e-06, "loss": 16.3724, "step": 277320 }, { "epoch": 0.5602241462202596, "grad_norm": 180.5815887451172, "learning_rate": 4.918671243538859e-06, "loss": 12.0421, "step": 277330 }, { "epoch": 0.5602443468529434, "grad_norm": 673.2286987304688, "learning_rate": 4.91832222406698e-06, "loss": 24.3858, "step": 277340 }, { "epoch": 0.5602645474856273, "grad_norm": 712.7254028320312, "learning_rate": 4.91797320499319e-06, "loss": 16.976, "step": 277350 }, { "epoch": 0.5602847481183111, "grad_norm": 106.54489135742188, "learning_rate": 4.9176241863191895e-06, "loss": 20.2608, "step": 277360 }, { "epoch": 0.5603049487509949, "grad_norm": 595.0263671875, "learning_rate": 4.917275168046678e-06, "loss": 26.9587, "step": 277370 }, { "epoch": 0.5603251493836787, "grad_norm": 177.71038818359375, "learning_rate": 4.916926150177358e-06, "loss": 12.8446, "step": 277380 }, { "epoch": 0.5603453500163625, "grad_norm": 499.8451232910156, "learning_rate": 4.916577132712929e-06, "loss": 20.6954, "step": 277390 }, { "epoch": 0.5603655506490464, "grad_norm": 453.36749267578125, "learning_rate": 4.9162281156550945e-06, "loss": 27.0135, "step": 277400 }, { "epoch": 0.5603857512817302, "grad_norm": 66.6358871459961, "learning_rate": 4.915879099005552e-06, "loss": 15.8422, "step": 277410 }, { "epoch": 0.560405951914414, "grad_norm": 292.084716796875, "learning_rate": 4.915530082766005e-06, "loss": 14.657, "step": 277420 }, { "epoch": 0.5604261525470978, "grad_norm": 387.52532958984375, "learning_rate": 4.915181066938156e-06, "loss": 20.5576, "step": 277430 }, { "epoch": 0.5604463531797816, "grad_norm": 121.14635467529297, "learning_rate": 4.914832051523702e-06, "loss": 11.5561, "step": 277440 }, { "epoch": 0.5604665538124654, "grad_norm": 164.2471160888672, "learning_rate": 4.9144830365243464e-06, "loss": 20.0493, "step": 277450 }, { "epoch": 0.5604867544451492, "grad_norm": 688.5679931640625, "learning_rate": 4.914134021941792e-06, "loss": 21.2318, "step": 277460 }, { "epoch": 0.560506955077833, "grad_norm": 433.1025695800781, "learning_rate": 4.913785007777737e-06, "loss": 13.295, "step": 277470 }, { "epoch": 0.5605271557105168, "grad_norm": 65.97478485107422, "learning_rate": 4.9134359940338815e-06, "loss": 22.3415, "step": 277480 }, { "epoch": 0.5605473563432006, "grad_norm": 436.15618896484375, "learning_rate": 4.913086980711932e-06, "loss": 31.5212, "step": 277490 }, { "epoch": 0.5605675569758845, "grad_norm": 299.2603454589844, "learning_rate": 4.9127379678135825e-06, "loss": 19.6428, "step": 277500 }, { "epoch": 0.5605877576085683, "grad_norm": 734.9041748046875, "learning_rate": 4.91238895534054e-06, "loss": 9.5273, "step": 277510 }, { "epoch": 0.5606079582412521, "grad_norm": 139.4139862060547, "learning_rate": 4.912039943294502e-06, "loss": 15.6401, "step": 277520 }, { "epoch": 0.5606281588739359, "grad_norm": 113.31924438476562, "learning_rate": 4.911690931677172e-06, "loss": 14.8608, "step": 277530 }, { "epoch": 0.5606483595066197, "grad_norm": 70.83113861083984, "learning_rate": 4.911341920490248e-06, "loss": 21.0946, "step": 277540 }, { "epoch": 0.5606685601393036, "grad_norm": 169.0532989501953, "learning_rate": 4.910992909735432e-06, "loss": 30.2231, "step": 277550 }, { "epoch": 0.5606887607719874, "grad_norm": 214.66018676757812, "learning_rate": 4.910643899414429e-06, "loss": 13.8178, "step": 277560 }, { "epoch": 0.5607089614046712, "grad_norm": 409.4646301269531, "learning_rate": 4.910294889528934e-06, "loss": 14.5599, "step": 277570 }, { "epoch": 0.560729162037355, "grad_norm": 110.76598358154297, "learning_rate": 4.909945880080651e-06, "loss": 13.367, "step": 277580 }, { "epoch": 0.5607493626700388, "grad_norm": 1096.98486328125, "learning_rate": 4.909596871071283e-06, "loss": 18.3253, "step": 277590 }, { "epoch": 0.5607695633027227, "grad_norm": 201.20767211914062, "learning_rate": 4.9092478625025266e-06, "loss": 7.0634, "step": 277600 }, { "epoch": 0.5607897639354065, "grad_norm": 285.1210632324219, "learning_rate": 4.908898854376086e-06, "loss": 13.3978, "step": 277610 }, { "epoch": 0.5608099645680903, "grad_norm": 126.02528381347656, "learning_rate": 4.908549846693662e-06, "loss": 8.4953, "step": 277620 }, { "epoch": 0.5608301652007741, "grad_norm": 615.0642700195312, "learning_rate": 4.908200839456955e-06, "loss": 12.3349, "step": 277630 }, { "epoch": 0.5608503658334579, "grad_norm": 328.4018859863281, "learning_rate": 4.907851832667663e-06, "loss": 13.5078, "step": 277640 }, { "epoch": 0.5608705664661418, "grad_norm": 209.73092651367188, "learning_rate": 4.9075028263274925e-06, "loss": 10.9013, "step": 277650 }, { "epoch": 0.5608907670988256, "grad_norm": 306.17877197265625, "learning_rate": 4.907153820438142e-06, "loss": 12.0092, "step": 277660 }, { "epoch": 0.5609109677315094, "grad_norm": 981.385009765625, "learning_rate": 4.9068048150013124e-06, "loss": 24.4804, "step": 277670 }, { "epoch": 0.5609311683641932, "grad_norm": 285.9813537597656, "learning_rate": 4.906455810018705e-06, "loss": 14.1561, "step": 277680 }, { "epoch": 0.560951368996877, "grad_norm": 416.0923767089844, "learning_rate": 4.906106805492021e-06, "loss": 22.5247, "step": 277690 }, { "epoch": 0.5609715696295609, "grad_norm": 894.4718627929688, "learning_rate": 4.90575780142296e-06, "loss": 22.7162, "step": 277700 }, { "epoch": 0.5609917702622446, "grad_norm": 330.90423583984375, "learning_rate": 4.905408797813223e-06, "loss": 27.7434, "step": 277710 }, { "epoch": 0.5610119708949284, "grad_norm": 1.8141493797302246, "learning_rate": 4.9050597946645155e-06, "loss": 24.424, "step": 277720 }, { "epoch": 0.5610321715276122, "grad_norm": 0.0, "learning_rate": 4.904710791978532e-06, "loss": 15.6168, "step": 277730 }, { "epoch": 0.561052372160296, "grad_norm": 156.7672119140625, "learning_rate": 4.9043617897569775e-06, "loss": 17.4672, "step": 277740 }, { "epoch": 0.5610725727929798, "grad_norm": 321.03082275390625, "learning_rate": 4.904012788001553e-06, "loss": 17.8866, "step": 277750 }, { "epoch": 0.5610927734256637, "grad_norm": 1058.6097412109375, "learning_rate": 4.903663786713957e-06, "loss": 29.5709, "step": 277760 }, { "epoch": 0.5611129740583475, "grad_norm": 305.7610778808594, "learning_rate": 4.903314785895893e-06, "loss": 27.1025, "step": 277770 }, { "epoch": 0.5611331746910313, "grad_norm": 905.561767578125, "learning_rate": 4.902965785549061e-06, "loss": 24.3868, "step": 277780 }, { "epoch": 0.5611533753237151, "grad_norm": 126.15748596191406, "learning_rate": 4.902616785675162e-06, "loss": 22.0782, "step": 277790 }, { "epoch": 0.5611735759563989, "grad_norm": 414.2947082519531, "learning_rate": 4.902267786275895e-06, "loss": 30.3595, "step": 277800 }, { "epoch": 0.5611937765890828, "grad_norm": 775.2684326171875, "learning_rate": 4.901918787352965e-06, "loss": 21.3826, "step": 277810 }, { "epoch": 0.5612139772217666, "grad_norm": 314.74493408203125, "learning_rate": 4.901569788908071e-06, "loss": 15.4112, "step": 277820 }, { "epoch": 0.5612341778544504, "grad_norm": 272.0699768066406, "learning_rate": 4.901220790942913e-06, "loss": 24.3775, "step": 277830 }, { "epoch": 0.5612543784871342, "grad_norm": 484.6834411621094, "learning_rate": 4.900871793459193e-06, "loss": 25.3064, "step": 277840 }, { "epoch": 0.561274579119818, "grad_norm": 310.4993591308594, "learning_rate": 4.900522796458613e-06, "loss": 12.8083, "step": 277850 }, { "epoch": 0.5612947797525019, "grad_norm": 277.1334228515625, "learning_rate": 4.900173799942873e-06, "loss": 25.7227, "step": 277860 }, { "epoch": 0.5613149803851857, "grad_norm": 506.7071533203125, "learning_rate": 4.89982480391367e-06, "loss": 14.2547, "step": 277870 }, { "epoch": 0.5613351810178695, "grad_norm": 345.21087646484375, "learning_rate": 4.899475808372714e-06, "loss": 17.3943, "step": 277880 }, { "epoch": 0.5613553816505533, "grad_norm": 498.5858459472656, "learning_rate": 4.899126813321697e-06, "loss": 21.8743, "step": 277890 }, { "epoch": 0.5613755822832371, "grad_norm": 39.76124954223633, "learning_rate": 4.898777818762325e-06, "loss": 17.1321, "step": 277900 }, { "epoch": 0.561395782915921, "grad_norm": 258.9658508300781, "learning_rate": 4.898428824696298e-06, "loss": 26.9842, "step": 277910 }, { "epoch": 0.5614159835486048, "grad_norm": 106.4733657836914, "learning_rate": 4.898079831125316e-06, "loss": 16.924, "step": 277920 }, { "epoch": 0.5614361841812886, "grad_norm": 326.1387023925781, "learning_rate": 4.897730838051081e-06, "loss": 28.6345, "step": 277930 }, { "epoch": 0.5614563848139724, "grad_norm": 521.5694580078125, "learning_rate": 4.897381845475294e-06, "loss": 26.2371, "step": 277940 }, { "epoch": 0.5614765854466562, "grad_norm": 156.76898193359375, "learning_rate": 4.897032853399653e-06, "loss": 20.5586, "step": 277950 }, { "epoch": 0.5614967860793401, "grad_norm": 0.0, "learning_rate": 4.896683861825863e-06, "loss": 12.6179, "step": 277960 }, { "epoch": 0.5615169867120238, "grad_norm": 380.5687561035156, "learning_rate": 4.896334870755623e-06, "loss": 20.4136, "step": 277970 }, { "epoch": 0.5615371873447076, "grad_norm": 407.02862548828125, "learning_rate": 4.895985880190636e-06, "loss": 13.2282, "step": 277980 }, { "epoch": 0.5615573879773914, "grad_norm": 336.0501708984375, "learning_rate": 4.895636890132599e-06, "loss": 21.0171, "step": 277990 }, { "epoch": 0.5615775886100752, "grad_norm": 1520.871826171875, "learning_rate": 4.895287900583216e-06, "loss": 35.0917, "step": 278000 }, { "epoch": 0.561597789242759, "grad_norm": 282.1485900878906, "learning_rate": 4.894938911544188e-06, "loss": 22.7634, "step": 278010 }, { "epoch": 0.5616179898754429, "grad_norm": 99.08549499511719, "learning_rate": 4.894589923017212e-06, "loss": 16.0685, "step": 278020 }, { "epoch": 0.5616381905081267, "grad_norm": 393.707763671875, "learning_rate": 4.894240935003994e-06, "loss": 15.8697, "step": 278030 }, { "epoch": 0.5616583911408105, "grad_norm": 433.3358459472656, "learning_rate": 4.893891947506234e-06, "loss": 15.7811, "step": 278040 }, { "epoch": 0.5616785917734943, "grad_norm": 338.52593994140625, "learning_rate": 4.89354296052563e-06, "loss": 24.2855, "step": 278050 }, { "epoch": 0.5616987924061781, "grad_norm": 190.8038330078125, "learning_rate": 4.893193974063885e-06, "loss": 22.1841, "step": 278060 }, { "epoch": 0.561718993038862, "grad_norm": 635.9793701171875, "learning_rate": 4.892844988122701e-06, "loss": 37.8201, "step": 278070 }, { "epoch": 0.5617391936715458, "grad_norm": 258.3083801269531, "learning_rate": 4.892496002703777e-06, "loss": 19.9726, "step": 278080 }, { "epoch": 0.5617593943042296, "grad_norm": 263.74127197265625, "learning_rate": 4.892147017808812e-06, "loss": 37.1322, "step": 278090 }, { "epoch": 0.5617795949369134, "grad_norm": 375.145751953125, "learning_rate": 4.891798033439511e-06, "loss": 16.6032, "step": 278100 }, { "epoch": 0.5617997955695972, "grad_norm": 161.28228759765625, "learning_rate": 4.891449049597574e-06, "loss": 18.2481, "step": 278110 }, { "epoch": 0.5618199962022811, "grad_norm": 18.511276245117188, "learning_rate": 4.891100066284701e-06, "loss": 12.4458, "step": 278120 }, { "epoch": 0.5618401968349649, "grad_norm": 229.6265411376953, "learning_rate": 4.8907510835025924e-06, "loss": 17.1244, "step": 278130 }, { "epoch": 0.5618603974676487, "grad_norm": 318.438232421875, "learning_rate": 4.890402101252951e-06, "loss": 18.0329, "step": 278140 }, { "epoch": 0.5618805981003325, "grad_norm": 344.5833740234375, "learning_rate": 4.890053119537475e-06, "loss": 13.6882, "step": 278150 }, { "epoch": 0.5619007987330163, "grad_norm": 100.07310485839844, "learning_rate": 4.889704138357867e-06, "loss": 25.8497, "step": 278160 }, { "epoch": 0.5619209993657002, "grad_norm": 2.145982265472412, "learning_rate": 4.889355157715829e-06, "loss": 24.2324, "step": 278170 }, { "epoch": 0.561941199998384, "grad_norm": 0.07046766579151154, "learning_rate": 4.889006177613059e-06, "loss": 23.4975, "step": 278180 }, { "epoch": 0.5619614006310678, "grad_norm": 305.05572509765625, "learning_rate": 4.888657198051259e-06, "loss": 23.9864, "step": 278190 }, { "epoch": 0.5619816012637516, "grad_norm": 247.42340087890625, "learning_rate": 4.888308219032133e-06, "loss": 19.8828, "step": 278200 }, { "epoch": 0.5620018018964354, "grad_norm": 354.2450256347656, "learning_rate": 4.8879592405573765e-06, "loss": 22.2688, "step": 278210 }, { "epoch": 0.5620220025291192, "grad_norm": 305.7784423828125, "learning_rate": 4.887610262628694e-06, "loss": 15.7137, "step": 278220 }, { "epoch": 0.562042203161803, "grad_norm": 240.77565002441406, "learning_rate": 4.887261285247787e-06, "loss": 10.2753, "step": 278230 }, { "epoch": 0.5620624037944868, "grad_norm": 394.3060607910156, "learning_rate": 4.886912308416353e-06, "loss": 43.582, "step": 278240 }, { "epoch": 0.5620826044271706, "grad_norm": 282.2279052734375, "learning_rate": 4.886563332136093e-06, "loss": 15.0963, "step": 278250 }, { "epoch": 0.5621028050598544, "grad_norm": 500.75189208984375, "learning_rate": 4.886214356408712e-06, "loss": 12.8432, "step": 278260 }, { "epoch": 0.5621230056925383, "grad_norm": 333.8148193359375, "learning_rate": 4.885865381235909e-06, "loss": 13.4417, "step": 278270 }, { "epoch": 0.5621432063252221, "grad_norm": 383.7184143066406, "learning_rate": 4.885516406619383e-06, "loss": 33.8934, "step": 278280 }, { "epoch": 0.5621634069579059, "grad_norm": 510.25048828125, "learning_rate": 4.885167432560836e-06, "loss": 16.0545, "step": 278290 }, { "epoch": 0.5621836075905897, "grad_norm": 259.1756591796875, "learning_rate": 4.88481845906197e-06, "loss": 28.3703, "step": 278300 }, { "epoch": 0.5622038082232735, "grad_norm": 1500.63330078125, "learning_rate": 4.884469486124484e-06, "loss": 29.4633, "step": 278310 }, { "epoch": 0.5622240088559574, "grad_norm": 85.11979675292969, "learning_rate": 4.884120513750079e-06, "loss": 18.508, "step": 278320 }, { "epoch": 0.5622442094886412, "grad_norm": 594.4029541015625, "learning_rate": 4.8837715419404596e-06, "loss": 25.4804, "step": 278330 }, { "epoch": 0.562264410121325, "grad_norm": 234.77423095703125, "learning_rate": 4.88342257069732e-06, "loss": 12.2237, "step": 278340 }, { "epoch": 0.5622846107540088, "grad_norm": 266.2846984863281, "learning_rate": 4.883073600022366e-06, "loss": 17.3074, "step": 278350 }, { "epoch": 0.5623048113866926, "grad_norm": 210.6982879638672, "learning_rate": 4.882724629917298e-06, "loss": 20.9359, "step": 278360 }, { "epoch": 0.5623250120193765, "grad_norm": 568.3202514648438, "learning_rate": 4.8823756603838155e-06, "loss": 16.2269, "step": 278370 }, { "epoch": 0.5623452126520603, "grad_norm": 443.9132080078125, "learning_rate": 4.882026691423619e-06, "loss": 28.8183, "step": 278380 }, { "epoch": 0.5623654132847441, "grad_norm": 409.15472412109375, "learning_rate": 4.881677723038411e-06, "loss": 19.5304, "step": 278390 }, { "epoch": 0.5623856139174279, "grad_norm": 562.7970581054688, "learning_rate": 4.881328755229892e-06, "loss": 31.7977, "step": 278400 }, { "epoch": 0.5624058145501117, "grad_norm": 449.59893798828125, "learning_rate": 4.88097978799976e-06, "loss": 18.6794, "step": 278410 }, { "epoch": 0.5624260151827956, "grad_norm": 170.77944946289062, "learning_rate": 4.880630821349718e-06, "loss": 19.7776, "step": 278420 }, { "epoch": 0.5624462158154794, "grad_norm": 317.1785888671875, "learning_rate": 4.8802818552814695e-06, "loss": 15.5262, "step": 278430 }, { "epoch": 0.5624664164481632, "grad_norm": 501.2215270996094, "learning_rate": 4.879932889796711e-06, "loss": 17.5528, "step": 278440 }, { "epoch": 0.562486617080847, "grad_norm": 905.4297485351562, "learning_rate": 4.879583924897146e-06, "loss": 24.2411, "step": 278450 }, { "epoch": 0.5625068177135308, "grad_norm": 734.2091674804688, "learning_rate": 4.879234960584474e-06, "loss": 24.3813, "step": 278460 }, { "epoch": 0.5625270183462147, "grad_norm": 492.3178405761719, "learning_rate": 4.878885996860396e-06, "loss": 20.8955, "step": 278470 }, { "epoch": 0.5625472189788984, "grad_norm": 371.9866027832031, "learning_rate": 4.878537033726612e-06, "loss": 17.3136, "step": 278480 }, { "epoch": 0.5625674196115822, "grad_norm": 158.58082580566406, "learning_rate": 4.878188071184827e-06, "loss": 23.8322, "step": 278490 }, { "epoch": 0.562587620244266, "grad_norm": 367.6337890625, "learning_rate": 4.877839109236735e-06, "loss": 21.9475, "step": 278500 }, { "epoch": 0.5626078208769498, "grad_norm": 480.400146484375, "learning_rate": 4.877490147884042e-06, "loss": 16.6314, "step": 278510 }, { "epoch": 0.5626280215096336, "grad_norm": 152.57212829589844, "learning_rate": 4.8771411871284465e-06, "loss": 32.986, "step": 278520 }, { "epoch": 0.5626482221423175, "grad_norm": 217.05926513671875, "learning_rate": 4.87679222697165e-06, "loss": 14.5209, "step": 278530 }, { "epoch": 0.5626684227750013, "grad_norm": 402.0018615722656, "learning_rate": 4.8764432674153536e-06, "loss": 21.1349, "step": 278540 }, { "epoch": 0.5626886234076851, "grad_norm": 287.79718017578125, "learning_rate": 4.8760943084612585e-06, "loss": 10.3715, "step": 278550 }, { "epoch": 0.5627088240403689, "grad_norm": 391.60687255859375, "learning_rate": 4.875745350111064e-06, "loss": 16.3361, "step": 278560 }, { "epoch": 0.5627290246730527, "grad_norm": 976.3838500976562, "learning_rate": 4.87539639236647e-06, "loss": 24.7614, "step": 278570 }, { "epoch": 0.5627492253057366, "grad_norm": 523.6411743164062, "learning_rate": 4.87504743522918e-06, "loss": 26.72, "step": 278580 }, { "epoch": 0.5627694259384204, "grad_norm": 453.2245178222656, "learning_rate": 4.874698478700895e-06, "loss": 20.8388, "step": 278590 }, { "epoch": 0.5627896265711042, "grad_norm": 410.42626953125, "learning_rate": 4.874349522783313e-06, "loss": 18.1273, "step": 278600 }, { "epoch": 0.562809827203788, "grad_norm": 468.30401611328125, "learning_rate": 4.874000567478137e-06, "loss": 19.8073, "step": 278610 }, { "epoch": 0.5628300278364718, "grad_norm": 615.7197265625, "learning_rate": 4.873651612787067e-06, "loss": 24.4914, "step": 278620 }, { "epoch": 0.5628502284691557, "grad_norm": 515.7312622070312, "learning_rate": 4.873302658711803e-06, "loss": 14.5848, "step": 278630 }, { "epoch": 0.5628704291018395, "grad_norm": 380.0971374511719, "learning_rate": 4.872953705254045e-06, "loss": 12.4876, "step": 278640 }, { "epoch": 0.5628906297345233, "grad_norm": 178.8086700439453, "learning_rate": 4.8726047524154985e-06, "loss": 18.3994, "step": 278650 }, { "epoch": 0.5629108303672071, "grad_norm": 145.70703125, "learning_rate": 4.872255800197859e-06, "loss": 21.4966, "step": 278660 }, { "epoch": 0.562931030999891, "grad_norm": 119.83448028564453, "learning_rate": 4.87190684860283e-06, "loss": 17.6204, "step": 278670 }, { "epoch": 0.5629512316325748, "grad_norm": 247.00778198242188, "learning_rate": 4.871557897632111e-06, "loss": 27.0825, "step": 278680 }, { "epoch": 0.5629714322652586, "grad_norm": 358.8352355957031, "learning_rate": 4.871208947287404e-06, "loss": 37.2915, "step": 278690 }, { "epoch": 0.5629916328979424, "grad_norm": 531.917724609375, "learning_rate": 4.870859997570407e-06, "loss": 18.6128, "step": 278700 }, { "epoch": 0.5630118335306262, "grad_norm": 585.2196044921875, "learning_rate": 4.870511048482824e-06, "loss": 36.4132, "step": 278710 }, { "epoch": 0.56303203416331, "grad_norm": 78.5827865600586, "learning_rate": 4.870162100026355e-06, "loss": 15.3775, "step": 278720 }, { "epoch": 0.5630522347959938, "grad_norm": 515.3905029296875, "learning_rate": 4.8698131522027e-06, "loss": 18.907, "step": 278730 }, { "epoch": 0.5630724354286776, "grad_norm": 257.6161804199219, "learning_rate": 4.8694642050135595e-06, "loss": 21.4215, "step": 278740 }, { "epoch": 0.5630926360613614, "grad_norm": 313.5485534667969, "learning_rate": 4.869115258460636e-06, "loss": 17.9498, "step": 278750 }, { "epoch": 0.5631128366940452, "grad_norm": 545.9793701171875, "learning_rate": 4.868766312545627e-06, "loss": 31.018, "step": 278760 }, { "epoch": 0.563133037326729, "grad_norm": 31.352577209472656, "learning_rate": 4.868417367270234e-06, "loss": 24.2385, "step": 278770 }, { "epoch": 0.5631532379594129, "grad_norm": 362.8998107910156, "learning_rate": 4.8680684226361624e-06, "loss": 27.9904, "step": 278780 }, { "epoch": 0.5631734385920967, "grad_norm": 70.08490753173828, "learning_rate": 4.867719478645106e-06, "loss": 16.635, "step": 278790 }, { "epoch": 0.5631936392247805, "grad_norm": 120.2506103515625, "learning_rate": 4.86737053529877e-06, "loss": 15.7078, "step": 278800 }, { "epoch": 0.5632138398574643, "grad_norm": 279.32183837890625, "learning_rate": 4.867021592598855e-06, "loss": 12.1191, "step": 278810 }, { "epoch": 0.5632340404901481, "grad_norm": 367.0658264160156, "learning_rate": 4.86667265054706e-06, "loss": 20.4642, "step": 278820 }, { "epoch": 0.563254241122832, "grad_norm": 447.9247131347656, "learning_rate": 4.8663237091450856e-06, "loss": 23.8312, "step": 278830 }, { "epoch": 0.5632744417555158, "grad_norm": 526.6144409179688, "learning_rate": 4.865974768394635e-06, "loss": 27.8641, "step": 278840 }, { "epoch": 0.5632946423881996, "grad_norm": 343.0456848144531, "learning_rate": 4.865625828297405e-06, "loss": 13.2943, "step": 278850 }, { "epoch": 0.5633148430208834, "grad_norm": 204.62393188476562, "learning_rate": 4.865276888855098e-06, "loss": 23.9033, "step": 278860 }, { "epoch": 0.5633350436535672, "grad_norm": 225.6749725341797, "learning_rate": 4.864927950069417e-06, "loss": 22.7242, "step": 278870 }, { "epoch": 0.5633552442862511, "grad_norm": 289.04107666015625, "learning_rate": 4.864579011942061e-06, "loss": 13.1412, "step": 278880 }, { "epoch": 0.5633754449189349, "grad_norm": 335.34326171875, "learning_rate": 4.8642300744747285e-06, "loss": 21.6426, "step": 278890 }, { "epoch": 0.5633956455516187, "grad_norm": 569.2777709960938, "learning_rate": 4.863881137669123e-06, "loss": 18.1175, "step": 278900 }, { "epoch": 0.5634158461843025, "grad_norm": 293.57550048828125, "learning_rate": 4.8635322015269455e-06, "loss": 19.5456, "step": 278910 }, { "epoch": 0.5634360468169863, "grad_norm": 422.96697998046875, "learning_rate": 4.863183266049895e-06, "loss": 19.1662, "step": 278920 }, { "epoch": 0.5634562474496702, "grad_norm": 603.3621215820312, "learning_rate": 4.86283433123967e-06, "loss": 22.3891, "step": 278930 }, { "epoch": 0.563476448082354, "grad_norm": 676.2031860351562, "learning_rate": 4.862485397097979e-06, "loss": 14.0476, "step": 278940 }, { "epoch": 0.5634966487150378, "grad_norm": 577.7756958007812, "learning_rate": 4.862136463626512e-06, "loss": 11.8324, "step": 278950 }, { "epoch": 0.5635168493477216, "grad_norm": 428.1227111816406, "learning_rate": 4.861787530826979e-06, "loss": 17.5698, "step": 278960 }, { "epoch": 0.5635370499804054, "grad_norm": 298.8414001464844, "learning_rate": 4.861438598701076e-06, "loss": 26.2628, "step": 278970 }, { "epoch": 0.5635572506130893, "grad_norm": 201.48023986816406, "learning_rate": 4.861089667250504e-06, "loss": 18.7578, "step": 278980 }, { "epoch": 0.563577451245773, "grad_norm": 233.4277801513672, "learning_rate": 4.860740736476963e-06, "loss": 21.6329, "step": 278990 }, { "epoch": 0.5635976518784568, "grad_norm": 428.1015930175781, "learning_rate": 4.860391806382157e-06, "loss": 13.8313, "step": 279000 }, { "epoch": 0.5636178525111406, "grad_norm": 135.4143829345703, "learning_rate": 4.860042876967784e-06, "loss": 13.5498, "step": 279010 }, { "epoch": 0.5636380531438244, "grad_norm": 326.22918701171875, "learning_rate": 4.859693948235542e-06, "loss": 15.7906, "step": 279020 }, { "epoch": 0.5636582537765082, "grad_norm": 168.1468048095703, "learning_rate": 4.859345020187137e-06, "loss": 14.452, "step": 279030 }, { "epoch": 0.5636784544091921, "grad_norm": 822.0842895507812, "learning_rate": 4.858996092824268e-06, "loss": 16.4693, "step": 279040 }, { "epoch": 0.5636986550418759, "grad_norm": 126.21709442138672, "learning_rate": 4.8586471661486345e-06, "loss": 19.1546, "step": 279050 }, { "epoch": 0.5637188556745597, "grad_norm": 154.8107147216797, "learning_rate": 4.8582982401619376e-06, "loss": 11.1282, "step": 279060 }, { "epoch": 0.5637390563072435, "grad_norm": 609.3305053710938, "learning_rate": 4.857949314865878e-06, "loss": 20.3392, "step": 279070 }, { "epoch": 0.5637592569399273, "grad_norm": 270.771240234375, "learning_rate": 4.857600390262156e-06, "loss": 27.7497, "step": 279080 }, { "epoch": 0.5637794575726112, "grad_norm": 537.5477905273438, "learning_rate": 4.857251466352471e-06, "loss": 17.6501, "step": 279090 }, { "epoch": 0.563799658205295, "grad_norm": 469.29638671875, "learning_rate": 4.856902543138528e-06, "loss": 19.0969, "step": 279100 }, { "epoch": 0.5638198588379788, "grad_norm": 727.9530639648438, "learning_rate": 4.856553620622021e-06, "loss": 21.7398, "step": 279110 }, { "epoch": 0.5638400594706626, "grad_norm": 252.12364196777344, "learning_rate": 4.856204698804656e-06, "loss": 17.4326, "step": 279120 }, { "epoch": 0.5638602601033464, "grad_norm": 537.7473754882812, "learning_rate": 4.855855777688133e-06, "loss": 13.5666, "step": 279130 }, { "epoch": 0.5638804607360303, "grad_norm": 379.7203674316406, "learning_rate": 4.85550685727415e-06, "loss": 26.9287, "step": 279140 }, { "epoch": 0.5639006613687141, "grad_norm": 372.1243896484375, "learning_rate": 4.85515793756441e-06, "loss": 12.531, "step": 279150 }, { "epoch": 0.5639208620013979, "grad_norm": 740.4132690429688, "learning_rate": 4.854809018560611e-06, "loss": 28.5175, "step": 279160 }, { "epoch": 0.5639410626340817, "grad_norm": 252.4897003173828, "learning_rate": 4.8544601002644585e-06, "loss": 19.9668, "step": 279170 }, { "epoch": 0.5639612632667655, "grad_norm": 380.94866943359375, "learning_rate": 4.854111182677646e-06, "loss": 18.2015, "step": 279180 }, { "epoch": 0.5639814638994494, "grad_norm": 175.6231689453125, "learning_rate": 4.85376226580188e-06, "loss": 14.1915, "step": 279190 }, { "epoch": 0.5640016645321332, "grad_norm": 573.9922485351562, "learning_rate": 4.853413349638859e-06, "loss": 18.1083, "step": 279200 }, { "epoch": 0.564021865164817, "grad_norm": 517.6943969726562, "learning_rate": 4.853064434190283e-06, "loss": 21.1547, "step": 279210 }, { "epoch": 0.5640420657975008, "grad_norm": 392.81610107421875, "learning_rate": 4.852715519457854e-06, "loss": 26.4975, "step": 279220 }, { "epoch": 0.5640622664301846, "grad_norm": 33.941307067871094, "learning_rate": 4.852366605443271e-06, "loss": 13.3732, "step": 279230 }, { "epoch": 0.5640824670628685, "grad_norm": 250.2884979248047, "learning_rate": 4.8520176921482355e-06, "loss": 16.7073, "step": 279240 }, { "epoch": 0.5641026676955522, "grad_norm": 1069.0157470703125, "learning_rate": 4.851668779574446e-06, "loss": 18.5817, "step": 279250 }, { "epoch": 0.564122868328236, "grad_norm": 283.9867248535156, "learning_rate": 4.851319867723607e-06, "loss": 21.8473, "step": 279260 }, { "epoch": 0.5641430689609198, "grad_norm": 201.53427124023438, "learning_rate": 4.8509709565974165e-06, "loss": 21.4699, "step": 279270 }, { "epoch": 0.5641632695936036, "grad_norm": 250.0788116455078, "learning_rate": 4.850622046197576e-06, "loss": 25.5267, "step": 279280 }, { "epoch": 0.5641834702262875, "grad_norm": 391.1429443359375, "learning_rate": 4.8502731365257855e-06, "loss": 22.5211, "step": 279290 }, { "epoch": 0.5642036708589713, "grad_norm": 571.8225708007812, "learning_rate": 4.8499242275837444e-06, "loss": 20.4421, "step": 279300 }, { "epoch": 0.5642238714916551, "grad_norm": 620.9213256835938, "learning_rate": 4.8495753193731545e-06, "loss": 37.9211, "step": 279310 }, { "epoch": 0.5642440721243389, "grad_norm": 709.5044555664062, "learning_rate": 4.8492264118957165e-06, "loss": 34.7678, "step": 279320 }, { "epoch": 0.5642642727570227, "grad_norm": 393.5628967285156, "learning_rate": 4.848877505153131e-06, "loss": 29.5212, "step": 279330 }, { "epoch": 0.5642844733897066, "grad_norm": 674.4771728515625, "learning_rate": 4.848528599147098e-06, "loss": 24.2638, "step": 279340 }, { "epoch": 0.5643046740223904, "grad_norm": 303.4005126953125, "learning_rate": 4.848179693879319e-06, "loss": 23.9594, "step": 279350 }, { "epoch": 0.5643248746550742, "grad_norm": 322.4669494628906, "learning_rate": 4.8478307893514934e-06, "loss": 18.0247, "step": 279360 }, { "epoch": 0.564345075287758, "grad_norm": 294.3116455078125, "learning_rate": 4.847481885565322e-06, "loss": 12.8049, "step": 279370 }, { "epoch": 0.5643652759204418, "grad_norm": 332.1798400878906, "learning_rate": 4.8471329825225034e-06, "loss": 16.6716, "step": 279380 }, { "epoch": 0.5643854765531257, "grad_norm": 308.9256896972656, "learning_rate": 4.846784080224744e-06, "loss": 16.0941, "step": 279390 }, { "epoch": 0.5644056771858095, "grad_norm": 822.2352294921875, "learning_rate": 4.846435178673737e-06, "loss": 27.9769, "step": 279400 }, { "epoch": 0.5644258778184933, "grad_norm": 526.4793090820312, "learning_rate": 4.846086277871188e-06, "loss": 23.6909, "step": 279410 }, { "epoch": 0.5644460784511771, "grad_norm": 481.90557861328125, "learning_rate": 4.845737377818796e-06, "loss": 18.722, "step": 279420 }, { "epoch": 0.5644662790838609, "grad_norm": 430.7373352050781, "learning_rate": 4.845388478518261e-06, "loss": 43.4882, "step": 279430 }, { "epoch": 0.5644864797165448, "grad_norm": 389.912841796875, "learning_rate": 4.845039579971283e-06, "loss": 35.5991, "step": 279440 }, { "epoch": 0.5645066803492286, "grad_norm": 473.7503967285156, "learning_rate": 4.8446906821795645e-06, "loss": 21.2011, "step": 279450 }, { "epoch": 0.5645268809819124, "grad_norm": 570.1041870117188, "learning_rate": 4.8443417851448035e-06, "loss": 11.7235, "step": 279460 }, { "epoch": 0.5645470816145962, "grad_norm": 639.3972778320312, "learning_rate": 4.843992888868702e-06, "loss": 17.078, "step": 279470 }, { "epoch": 0.56456728224728, "grad_norm": 812.7796630859375, "learning_rate": 4.84364399335296e-06, "loss": 33.3612, "step": 279480 }, { "epoch": 0.5645874828799639, "grad_norm": 702.8096313476562, "learning_rate": 4.84329509859928e-06, "loss": 21.8659, "step": 279490 }, { "epoch": 0.5646076835126476, "grad_norm": 184.54876708984375, "learning_rate": 4.842946204609359e-06, "loss": 18.5785, "step": 279500 }, { "epoch": 0.5646278841453314, "grad_norm": 437.0586853027344, "learning_rate": 4.8425973113848995e-06, "loss": 34.0539, "step": 279510 }, { "epoch": 0.5646480847780152, "grad_norm": 413.0774230957031, "learning_rate": 4.842248418927603e-06, "loss": 37.4086, "step": 279520 }, { "epoch": 0.564668285410699, "grad_norm": 742.9720458984375, "learning_rate": 4.841899527239166e-06, "loss": 29.4849, "step": 279530 }, { "epoch": 0.5646884860433828, "grad_norm": 554.9951171875, "learning_rate": 4.841550636321292e-06, "loss": 20.8777, "step": 279540 }, { "epoch": 0.5647086866760667, "grad_norm": 443.943359375, "learning_rate": 4.8412017461756845e-06, "loss": 33.688, "step": 279550 }, { "epoch": 0.5647288873087505, "grad_norm": 480.1344909667969, "learning_rate": 4.8408528568040365e-06, "loss": 21.1558, "step": 279560 }, { "epoch": 0.5647490879414343, "grad_norm": 206.64242553710938, "learning_rate": 4.840503968208054e-06, "loss": 12.8486, "step": 279570 }, { "epoch": 0.5647692885741181, "grad_norm": 297.1890563964844, "learning_rate": 4.840155080389436e-06, "loss": 17.4767, "step": 279580 }, { "epoch": 0.5647894892068019, "grad_norm": 501.24017333984375, "learning_rate": 4.839806193349882e-06, "loss": 17.387, "step": 279590 }, { "epoch": 0.5648096898394858, "grad_norm": 59.65628433227539, "learning_rate": 4.839457307091093e-06, "loss": 26.266, "step": 279600 }, { "epoch": 0.5648298904721696, "grad_norm": 78.96497344970703, "learning_rate": 4.839108421614771e-06, "loss": 10.7241, "step": 279610 }, { "epoch": 0.5648500911048534, "grad_norm": 249.42201232910156, "learning_rate": 4.838759536922614e-06, "loss": 35.6987, "step": 279620 }, { "epoch": 0.5648702917375372, "grad_norm": 475.94378662109375, "learning_rate": 4.838410653016322e-06, "loss": 19.9952, "step": 279630 }, { "epoch": 0.564890492370221, "grad_norm": 576.3741455078125, "learning_rate": 4.838061769897598e-06, "loss": 40.5024, "step": 279640 }, { "epoch": 0.5649106930029049, "grad_norm": 413.41949462890625, "learning_rate": 4.837712887568143e-06, "loss": 24.0064, "step": 279650 }, { "epoch": 0.5649308936355887, "grad_norm": 250.33079528808594, "learning_rate": 4.837364006029654e-06, "loss": 13.1016, "step": 279660 }, { "epoch": 0.5649510942682725, "grad_norm": 585.5135498046875, "learning_rate": 4.837015125283833e-06, "loss": 29.7956, "step": 279670 }, { "epoch": 0.5649712949009563, "grad_norm": 368.90618896484375, "learning_rate": 4.8366662453323826e-06, "loss": 24.4503, "step": 279680 }, { "epoch": 0.5649914955336401, "grad_norm": 1304.8232421875, "learning_rate": 4.836317366176999e-06, "loss": 24.6257, "step": 279690 }, { "epoch": 0.565011696166324, "grad_norm": 768.2684936523438, "learning_rate": 4.835968487819384e-06, "loss": 20.2988, "step": 279700 }, { "epoch": 0.5650318967990078, "grad_norm": 307.398681640625, "learning_rate": 4.835619610261242e-06, "loss": 28.9655, "step": 279710 }, { "epoch": 0.5650520974316916, "grad_norm": 331.45989990234375, "learning_rate": 4.835270733504267e-06, "loss": 15.687, "step": 279720 }, { "epoch": 0.5650722980643754, "grad_norm": 525.4197387695312, "learning_rate": 4.834921857550163e-06, "loss": 18.0182, "step": 279730 }, { "epoch": 0.5650924986970592, "grad_norm": 243.6503143310547, "learning_rate": 4.834572982400631e-06, "loss": 14.2919, "step": 279740 }, { "epoch": 0.5651126993297431, "grad_norm": 314.42059326171875, "learning_rate": 4.8342241080573696e-06, "loss": 19.056, "step": 279750 }, { "epoch": 0.5651328999624268, "grad_norm": 404.8274841308594, "learning_rate": 4.83387523452208e-06, "loss": 26.8217, "step": 279760 }, { "epoch": 0.5651531005951106, "grad_norm": 466.4205322265625, "learning_rate": 4.833526361796461e-06, "loss": 21.2137, "step": 279770 }, { "epoch": 0.5651733012277944, "grad_norm": 380.7063293457031, "learning_rate": 4.833177489882217e-06, "loss": 23.7955, "step": 279780 }, { "epoch": 0.5651935018604782, "grad_norm": 0.0, "learning_rate": 4.832828618781042e-06, "loss": 13.6185, "step": 279790 }, { "epoch": 0.565213702493162, "grad_norm": 358.6909484863281, "learning_rate": 4.832479748494643e-06, "loss": 25.1073, "step": 279800 }, { "epoch": 0.5652339031258459, "grad_norm": 370.5196838378906, "learning_rate": 4.832130879024717e-06, "loss": 24.2912, "step": 279810 }, { "epoch": 0.5652541037585297, "grad_norm": 373.6588439941406, "learning_rate": 4.831782010372964e-06, "loss": 10.702, "step": 279820 }, { "epoch": 0.5652743043912135, "grad_norm": 731.8858032226562, "learning_rate": 4.831433142541086e-06, "loss": 20.3273, "step": 279830 }, { "epoch": 0.5652945050238973, "grad_norm": 3.847933053970337, "learning_rate": 4.831084275530782e-06, "loss": 20.619, "step": 279840 }, { "epoch": 0.5653147056565812, "grad_norm": 339.99896240234375, "learning_rate": 4.830735409343752e-06, "loss": 22.4393, "step": 279850 }, { "epoch": 0.565334906289265, "grad_norm": 606.836181640625, "learning_rate": 4.830386543981696e-06, "loss": 26.6426, "step": 279860 }, { "epoch": 0.5653551069219488, "grad_norm": 641.5547485351562, "learning_rate": 4.83003767944632e-06, "loss": 13.8716, "step": 279870 }, { "epoch": 0.5653753075546326, "grad_norm": 105.29841613769531, "learning_rate": 4.829688815739315e-06, "loss": 12.9752, "step": 279880 }, { "epoch": 0.5653955081873164, "grad_norm": 622.8025512695312, "learning_rate": 4.829339952862388e-06, "loss": 19.8063, "step": 279890 }, { "epoch": 0.5654157088200003, "grad_norm": 310.1514587402344, "learning_rate": 4.828991090817238e-06, "loss": 18.3451, "step": 279900 }, { "epoch": 0.5654359094526841, "grad_norm": 161.7609405517578, "learning_rate": 4.828642229605564e-06, "loss": 24.5835, "step": 279910 }, { "epoch": 0.5654561100853679, "grad_norm": 311.6127624511719, "learning_rate": 4.8282933692290665e-06, "loss": 17.4492, "step": 279920 }, { "epoch": 0.5654763107180517, "grad_norm": 33.28404235839844, "learning_rate": 4.827944509689445e-06, "loss": 9.8755, "step": 279930 }, { "epoch": 0.5654965113507355, "grad_norm": 298.7069396972656, "learning_rate": 4.827595650988404e-06, "loss": 27.2258, "step": 279940 }, { "epoch": 0.5655167119834194, "grad_norm": 593.8300170898438, "learning_rate": 4.827246793127639e-06, "loss": 12.0075, "step": 279950 }, { "epoch": 0.5655369126161032, "grad_norm": 344.9347839355469, "learning_rate": 4.826897936108853e-06, "loss": 16.5854, "step": 279960 }, { "epoch": 0.565557113248787, "grad_norm": 676.9986572265625, "learning_rate": 4.8265490799337455e-06, "loss": 21.8863, "step": 279970 }, { "epoch": 0.5655773138814708, "grad_norm": 195.52239990234375, "learning_rate": 4.826200224604017e-06, "loss": 7.0834, "step": 279980 }, { "epoch": 0.5655975145141546, "grad_norm": 499.9918212890625, "learning_rate": 4.8258513701213665e-06, "loss": 10.8261, "step": 279990 }, { "epoch": 0.5656177151468385, "grad_norm": 863.3130493164062, "learning_rate": 4.825502516487497e-06, "loss": 17.5498, "step": 280000 }, { "epoch": 0.5656379157795222, "grad_norm": 390.77508544921875, "learning_rate": 4.825153663704104e-06, "loss": 22.8388, "step": 280010 }, { "epoch": 0.565658116412206, "grad_norm": 553.9664916992188, "learning_rate": 4.824804811772893e-06, "loss": 26.1581, "step": 280020 }, { "epoch": 0.5656783170448898, "grad_norm": 293.2061767578125, "learning_rate": 4.824455960695563e-06, "loss": 24.5048, "step": 280030 }, { "epoch": 0.5656985176775736, "grad_norm": 359.7368469238281, "learning_rate": 4.8241071104738115e-06, "loss": 22.6865, "step": 280040 }, { "epoch": 0.5657187183102574, "grad_norm": 460.0821533203125, "learning_rate": 4.823758261109341e-06, "loss": 10.5031, "step": 280050 }, { "epoch": 0.5657389189429413, "grad_norm": 309.7868347167969, "learning_rate": 4.8234094126038524e-06, "loss": 17.6452, "step": 280060 }, { "epoch": 0.5657591195756251, "grad_norm": 226.4683074951172, "learning_rate": 4.823060564959045e-06, "loss": 19.3593, "step": 280070 }, { "epoch": 0.5657793202083089, "grad_norm": 187.0656280517578, "learning_rate": 4.8227117181766165e-06, "loss": 14.3262, "step": 280080 }, { "epoch": 0.5657995208409927, "grad_norm": 770.1342163085938, "learning_rate": 4.82236287225827e-06, "loss": 28.2982, "step": 280090 }, { "epoch": 0.5658197214736765, "grad_norm": 872.9797973632812, "learning_rate": 4.822014027205708e-06, "loss": 18.9361, "step": 280100 }, { "epoch": 0.5658399221063604, "grad_norm": 205.1911163330078, "learning_rate": 4.8216651830206265e-06, "loss": 21.1727, "step": 280110 }, { "epoch": 0.5658601227390442, "grad_norm": 499.60150146484375, "learning_rate": 4.821316339704727e-06, "loss": 20.7421, "step": 280120 }, { "epoch": 0.565880323371728, "grad_norm": 568.2881469726562, "learning_rate": 4.8209674972597116e-06, "loss": 29.0031, "step": 280130 }, { "epoch": 0.5659005240044118, "grad_norm": 166.3893280029297, "learning_rate": 4.820618655687277e-06, "loss": 22.0419, "step": 280140 }, { "epoch": 0.5659207246370956, "grad_norm": 367.2247314453125, "learning_rate": 4.820269814989125e-06, "loss": 19.7562, "step": 280150 }, { "epoch": 0.5659409252697795, "grad_norm": 209.6517791748047, "learning_rate": 4.819920975166959e-06, "loss": 25.7543, "step": 280160 }, { "epoch": 0.5659611259024633, "grad_norm": 76.78960418701172, "learning_rate": 4.8195721362224725e-06, "loss": 17.7162, "step": 280170 }, { "epoch": 0.5659813265351471, "grad_norm": 594.5751342773438, "learning_rate": 4.819223298157372e-06, "loss": 22.5247, "step": 280180 }, { "epoch": 0.5660015271678309, "grad_norm": 412.4549255371094, "learning_rate": 4.818874460973356e-06, "loss": 22.4813, "step": 280190 }, { "epoch": 0.5660217278005147, "grad_norm": 313.1060791015625, "learning_rate": 4.818525624672122e-06, "loss": 19.6513, "step": 280200 }, { "epoch": 0.5660419284331986, "grad_norm": 293.21575927734375, "learning_rate": 4.818176789255372e-06, "loss": 10.3151, "step": 280210 }, { "epoch": 0.5660621290658824, "grad_norm": 129.5390167236328, "learning_rate": 4.8178279547248055e-06, "loss": 17.854, "step": 280220 }, { "epoch": 0.5660823296985662, "grad_norm": 384.1737060546875, "learning_rate": 4.817479121082127e-06, "loss": 31.3266, "step": 280230 }, { "epoch": 0.56610253033125, "grad_norm": 148.68045043945312, "learning_rate": 4.81713028832903e-06, "loss": 15.6833, "step": 280240 }, { "epoch": 0.5661227309639338, "grad_norm": 267.49066162109375, "learning_rate": 4.816781456467218e-06, "loss": 14.2361, "step": 280250 }, { "epoch": 0.5661429315966177, "grad_norm": 537.7645263671875, "learning_rate": 4.816432625498394e-06, "loss": 20.0404, "step": 280260 }, { "epoch": 0.5661631322293014, "grad_norm": 0.0, "learning_rate": 4.816083795424252e-06, "loss": 47.0537, "step": 280270 }, { "epoch": 0.5661833328619852, "grad_norm": 413.2496643066406, "learning_rate": 4.815734966246496e-06, "loss": 26.7662, "step": 280280 }, { "epoch": 0.566203533494669, "grad_norm": 724.4093017578125, "learning_rate": 4.815386137966827e-06, "loss": 28.9771, "step": 280290 }, { "epoch": 0.5662237341273528, "grad_norm": 181.0941619873047, "learning_rate": 4.815037310586941e-06, "loss": 22.6037, "step": 280300 }, { "epoch": 0.5662439347600366, "grad_norm": 541.9688720703125, "learning_rate": 4.8146884841085405e-06, "loss": 21.288, "step": 280310 }, { "epoch": 0.5662641353927205, "grad_norm": 0.7081328630447388, "learning_rate": 4.8143396585333295e-06, "loss": 14.5067, "step": 280320 }, { "epoch": 0.5662843360254043, "grad_norm": 337.2891845703125, "learning_rate": 4.813990833863001e-06, "loss": 26.5572, "step": 280330 }, { "epoch": 0.5663045366580881, "grad_norm": 287.2080993652344, "learning_rate": 4.81364201009926e-06, "loss": 30.8338, "step": 280340 }, { "epoch": 0.5663247372907719, "grad_norm": 611.9329223632812, "learning_rate": 4.813293187243806e-06, "loss": 16.5224, "step": 280350 }, { "epoch": 0.5663449379234557, "grad_norm": 452.2556457519531, "learning_rate": 4.812944365298337e-06, "loss": 14.7404, "step": 280360 }, { "epoch": 0.5663651385561396, "grad_norm": 48.49687576293945, "learning_rate": 4.812595544264554e-06, "loss": 22.9897, "step": 280370 }, { "epoch": 0.5663853391888234, "grad_norm": 390.6477355957031, "learning_rate": 4.812246724144158e-06, "loss": 27.3596, "step": 280380 }, { "epoch": 0.5664055398215072, "grad_norm": 498.2398681640625, "learning_rate": 4.811897904938851e-06, "loss": 32.31, "step": 280390 }, { "epoch": 0.566425740454191, "grad_norm": 742.808837890625, "learning_rate": 4.811549086650327e-06, "loss": 19.7711, "step": 280400 }, { "epoch": 0.5664459410868748, "grad_norm": 235.90054321289062, "learning_rate": 4.811200269280292e-06, "loss": 12.1947, "step": 280410 }, { "epoch": 0.5664661417195587, "grad_norm": 596.4269409179688, "learning_rate": 4.810851452830445e-06, "loss": 15.5925, "step": 280420 }, { "epoch": 0.5664863423522425, "grad_norm": 466.2594909667969, "learning_rate": 4.810502637302484e-06, "loss": 18.6616, "step": 280430 }, { "epoch": 0.5665065429849263, "grad_norm": 294.703369140625, "learning_rate": 4.81015382269811e-06, "loss": 17.7578, "step": 280440 }, { "epoch": 0.5665267436176101, "grad_norm": 164.2449951171875, "learning_rate": 4.809805009019024e-06, "loss": 9.3957, "step": 280450 }, { "epoch": 0.566546944250294, "grad_norm": 396.9536437988281, "learning_rate": 4.809456196266925e-06, "loss": 30.4809, "step": 280460 }, { "epoch": 0.5665671448829778, "grad_norm": 332.344970703125, "learning_rate": 4.809107384443511e-06, "loss": 14.1402, "step": 280470 }, { "epoch": 0.5665873455156616, "grad_norm": 486.4132385253906, "learning_rate": 4.808758573550488e-06, "loss": 16.9073, "step": 280480 }, { "epoch": 0.5666075461483454, "grad_norm": 511.4957275390625, "learning_rate": 4.8084097635895505e-06, "loss": 18.6391, "step": 280490 }, { "epoch": 0.5666277467810292, "grad_norm": 71.69532012939453, "learning_rate": 4.8080609545624004e-06, "loss": 14.7074, "step": 280500 }, { "epoch": 0.566647947413713, "grad_norm": 246.12234497070312, "learning_rate": 4.80771214647074e-06, "loss": 28.4342, "step": 280510 }, { "epoch": 0.5666681480463968, "grad_norm": 143.98562622070312, "learning_rate": 4.807363339316265e-06, "loss": 19.2224, "step": 280520 }, { "epoch": 0.5666883486790806, "grad_norm": 533.887939453125, "learning_rate": 4.807014533100679e-06, "loss": 22.2298, "step": 280530 }, { "epoch": 0.5667085493117644, "grad_norm": 434.1194152832031, "learning_rate": 4.806665727825679e-06, "loss": 15.4137, "step": 280540 }, { "epoch": 0.5667287499444482, "grad_norm": 296.2685241699219, "learning_rate": 4.80631692349297e-06, "loss": 17.5248, "step": 280550 }, { "epoch": 0.566748950577132, "grad_norm": 915.760498046875, "learning_rate": 4.805968120104246e-06, "loss": 21.6545, "step": 280560 }, { "epoch": 0.5667691512098159, "grad_norm": 649.8720092773438, "learning_rate": 4.805619317661211e-06, "loss": 15.8103, "step": 280570 }, { "epoch": 0.5667893518424997, "grad_norm": 542.402587890625, "learning_rate": 4.805270516165564e-06, "loss": 18.8665, "step": 280580 }, { "epoch": 0.5668095524751835, "grad_norm": 480.7325744628906, "learning_rate": 4.8049217156190045e-06, "loss": 20.6515, "step": 280590 }, { "epoch": 0.5668297531078673, "grad_norm": 130.50900268554688, "learning_rate": 4.8045729160232326e-06, "loss": 34.2715, "step": 280600 }, { "epoch": 0.5668499537405511, "grad_norm": 19.832996368408203, "learning_rate": 4.80422411737995e-06, "loss": 13.7181, "step": 280610 }, { "epoch": 0.566870154373235, "grad_norm": 164.57254028320312, "learning_rate": 4.8038753196908534e-06, "loss": 13.625, "step": 280620 }, { "epoch": 0.5668903550059188, "grad_norm": 194.63401794433594, "learning_rate": 4.803526522957645e-06, "loss": 11.2362, "step": 280630 }, { "epoch": 0.5669105556386026, "grad_norm": 37.46737289428711, "learning_rate": 4.803177727182026e-06, "loss": 13.6499, "step": 280640 }, { "epoch": 0.5669307562712864, "grad_norm": 197.3994140625, "learning_rate": 4.802828932365694e-06, "loss": 27.7821, "step": 280650 }, { "epoch": 0.5669509569039702, "grad_norm": 2664.021728515625, "learning_rate": 4.80248013851035e-06, "loss": 20.306, "step": 280660 }, { "epoch": 0.5669711575366541, "grad_norm": 307.2100524902344, "learning_rate": 4.802131345617694e-06, "loss": 17.2772, "step": 280670 }, { "epoch": 0.5669913581693379, "grad_norm": 358.0711975097656, "learning_rate": 4.801782553689426e-06, "loss": 25.9146, "step": 280680 }, { "epoch": 0.5670115588020217, "grad_norm": 716.7332153320312, "learning_rate": 4.801433762727244e-06, "loss": 48.5615, "step": 280690 }, { "epoch": 0.5670317594347055, "grad_norm": 282.84881591796875, "learning_rate": 4.801084972732851e-06, "loss": 14.6979, "step": 280700 }, { "epoch": 0.5670519600673893, "grad_norm": 430.576904296875, "learning_rate": 4.800736183707946e-06, "loss": 14.4485, "step": 280710 }, { "epoch": 0.5670721607000732, "grad_norm": 459.9219665527344, "learning_rate": 4.800387395654229e-06, "loss": 30.1554, "step": 280720 }, { "epoch": 0.567092361332757, "grad_norm": 556.7384643554688, "learning_rate": 4.800038608573398e-06, "loss": 13.7043, "step": 280730 }, { "epoch": 0.5671125619654408, "grad_norm": 298.2059326171875, "learning_rate": 4.799689822467157e-06, "loss": 27.1181, "step": 280740 }, { "epoch": 0.5671327625981246, "grad_norm": 600.5845947265625, "learning_rate": 4.799341037337203e-06, "loss": 38.7656, "step": 280750 }, { "epoch": 0.5671529632308084, "grad_norm": 496.095947265625, "learning_rate": 4.798992253185233e-06, "loss": 16.733, "step": 280760 }, { "epoch": 0.5671731638634923, "grad_norm": 310.8692321777344, "learning_rate": 4.798643470012956e-06, "loss": 9.6334, "step": 280770 }, { "epoch": 0.567193364496176, "grad_norm": 613.90771484375, "learning_rate": 4.798294687822062e-06, "loss": 19.7655, "step": 280780 }, { "epoch": 0.5672135651288598, "grad_norm": 235.32797241210938, "learning_rate": 4.797945906614256e-06, "loss": 35.1255, "step": 280790 }, { "epoch": 0.5672337657615436, "grad_norm": 342.994384765625, "learning_rate": 4.797597126391238e-06, "loss": 15.036, "step": 280800 }, { "epoch": 0.5672539663942274, "grad_norm": 156.72019958496094, "learning_rate": 4.797248347154707e-06, "loss": 27.67, "step": 280810 }, { "epoch": 0.5672741670269112, "grad_norm": 219.47348022460938, "learning_rate": 4.796899568906363e-06, "loss": 13.3814, "step": 280820 }, { "epoch": 0.5672943676595951, "grad_norm": 582.8139038085938, "learning_rate": 4.796550791647905e-06, "loss": 24.4508, "step": 280830 }, { "epoch": 0.5673145682922789, "grad_norm": 143.56533813476562, "learning_rate": 4.796202015381035e-06, "loss": 20.3075, "step": 280840 }, { "epoch": 0.5673347689249627, "grad_norm": 216.61070251464844, "learning_rate": 4.79585324010745e-06, "loss": 9.2895, "step": 280850 }, { "epoch": 0.5673549695576465, "grad_norm": 441.1408386230469, "learning_rate": 4.795504465828853e-06, "loss": 24.3529, "step": 280860 }, { "epoch": 0.5673751701903303, "grad_norm": 434.54815673828125, "learning_rate": 4.795155692546942e-06, "loss": 21.3057, "step": 280870 }, { "epoch": 0.5673953708230142, "grad_norm": 436.3892517089844, "learning_rate": 4.794806920263417e-06, "loss": 29.8058, "step": 280880 }, { "epoch": 0.567415571455698, "grad_norm": 492.61260986328125, "learning_rate": 4.794458148979979e-06, "loss": 23.2764, "step": 280890 }, { "epoch": 0.5674357720883818, "grad_norm": 469.8958740234375, "learning_rate": 4.794109378698327e-06, "loss": 15.7702, "step": 280900 }, { "epoch": 0.5674559727210656, "grad_norm": 716.3184814453125, "learning_rate": 4.793760609420161e-06, "loss": 25.6189, "step": 280910 }, { "epoch": 0.5674761733537494, "grad_norm": 133.90565490722656, "learning_rate": 4.7934118411471785e-06, "loss": 25.8989, "step": 280920 }, { "epoch": 0.5674963739864333, "grad_norm": 1263.489501953125, "learning_rate": 4.7930630738810855e-06, "loss": 41.5443, "step": 280930 }, { "epoch": 0.5675165746191171, "grad_norm": 505.8876037597656, "learning_rate": 4.792714307623574e-06, "loss": 29.7477, "step": 280940 }, { "epoch": 0.5675367752518009, "grad_norm": 155.53802490234375, "learning_rate": 4.79236554237635e-06, "loss": 15.5484, "step": 280950 }, { "epoch": 0.5675569758844847, "grad_norm": 138.1282958984375, "learning_rate": 4.792016778141111e-06, "loss": 18.8946, "step": 280960 }, { "epoch": 0.5675771765171685, "grad_norm": 397.5463562011719, "learning_rate": 4.791668014919557e-06, "loss": 21.5662, "step": 280970 }, { "epoch": 0.5675973771498524, "grad_norm": 464.1717529296875, "learning_rate": 4.791319252713388e-06, "loss": 16.8356, "step": 280980 }, { "epoch": 0.5676175777825362, "grad_norm": 216.24668884277344, "learning_rate": 4.790970491524302e-06, "loss": 15.9672, "step": 280990 }, { "epoch": 0.56763777841522, "grad_norm": 5.927821636199951, "learning_rate": 4.7906217313540035e-06, "loss": 8.7603, "step": 281000 }, { "epoch": 0.5676579790479038, "grad_norm": 86.56169891357422, "learning_rate": 4.790272972204186e-06, "loss": 15.9588, "step": 281010 }, { "epoch": 0.5676781796805876, "grad_norm": 398.86572265625, "learning_rate": 4.789924214076554e-06, "loss": 28.5181, "step": 281020 }, { "epoch": 0.5676983803132715, "grad_norm": 143.67971801757812, "learning_rate": 4.789575456972807e-06, "loss": 14.2371, "step": 281030 }, { "epoch": 0.5677185809459552, "grad_norm": 327.8136291503906, "learning_rate": 4.789226700894643e-06, "loss": 16.7905, "step": 281040 }, { "epoch": 0.567738781578639, "grad_norm": 320.13824462890625, "learning_rate": 4.788877945843762e-06, "loss": 15.8231, "step": 281050 }, { "epoch": 0.5677589822113228, "grad_norm": 341.2049560546875, "learning_rate": 4.7885291918218656e-06, "loss": 15.138, "step": 281060 }, { "epoch": 0.5677791828440066, "grad_norm": 213.2560272216797, "learning_rate": 4.788180438830651e-06, "loss": 26.9224, "step": 281070 }, { "epoch": 0.5677993834766905, "grad_norm": 1243.47119140625, "learning_rate": 4.787831686871818e-06, "loss": 16.1275, "step": 281080 }, { "epoch": 0.5678195841093743, "grad_norm": 551.6944580078125, "learning_rate": 4.787482935947071e-06, "loss": 18.4467, "step": 281090 }, { "epoch": 0.5678397847420581, "grad_norm": 283.828369140625, "learning_rate": 4.787134186058103e-06, "loss": 24.1669, "step": 281100 }, { "epoch": 0.5678599853747419, "grad_norm": 422.85626220703125, "learning_rate": 4.7867854372066185e-06, "loss": 22.0561, "step": 281110 }, { "epoch": 0.5678801860074257, "grad_norm": 400.2016906738281, "learning_rate": 4.786436689394317e-06, "loss": 14.1001, "step": 281120 }, { "epoch": 0.5679003866401096, "grad_norm": 658.47705078125, "learning_rate": 4.7860879426228965e-06, "loss": 22.3099, "step": 281130 }, { "epoch": 0.5679205872727934, "grad_norm": 725.5134887695312, "learning_rate": 4.785739196894056e-06, "loss": 18.1255, "step": 281140 }, { "epoch": 0.5679407879054772, "grad_norm": 195.5977783203125, "learning_rate": 4.785390452209497e-06, "loss": 19.2654, "step": 281150 }, { "epoch": 0.567960988538161, "grad_norm": 75.21509552001953, "learning_rate": 4.785041708570921e-06, "loss": 15.7326, "step": 281160 }, { "epoch": 0.5679811891708448, "grad_norm": 1675.8681640625, "learning_rate": 4.784692965980024e-06, "loss": 21.5999, "step": 281170 }, { "epoch": 0.5680013898035287, "grad_norm": 433.5064392089844, "learning_rate": 4.784344224438508e-06, "loss": 16.2804, "step": 281180 }, { "epoch": 0.5680215904362125, "grad_norm": 276.25213623046875, "learning_rate": 4.783995483948072e-06, "loss": 13.0544, "step": 281190 }, { "epoch": 0.5680417910688963, "grad_norm": 760.7417602539062, "learning_rate": 4.783646744510416e-06, "loss": 18.5853, "step": 281200 }, { "epoch": 0.5680619917015801, "grad_norm": 549.2852172851562, "learning_rate": 4.783298006127238e-06, "loss": 20.926, "step": 281210 }, { "epoch": 0.5680821923342639, "grad_norm": 409.4819641113281, "learning_rate": 4.782949268800242e-06, "loss": 21.3589, "step": 281220 }, { "epoch": 0.5681023929669478, "grad_norm": 357.6761169433594, "learning_rate": 4.782600532531123e-06, "loss": 16.9343, "step": 281230 }, { "epoch": 0.5681225935996316, "grad_norm": 330.9826354980469, "learning_rate": 4.7822517973215814e-06, "loss": 14.5164, "step": 281240 }, { "epoch": 0.5681427942323154, "grad_norm": 616.1460571289062, "learning_rate": 4.781903063173321e-06, "loss": 15.6828, "step": 281250 }, { "epoch": 0.5681629948649992, "grad_norm": 366.4429016113281, "learning_rate": 4.7815543300880374e-06, "loss": 11.1462, "step": 281260 }, { "epoch": 0.568183195497683, "grad_norm": 842.9706420898438, "learning_rate": 4.781205598067431e-06, "loss": 20.5096, "step": 281270 }, { "epoch": 0.5682033961303669, "grad_norm": 202.7994384765625, "learning_rate": 4.780856867113202e-06, "loss": 18.9489, "step": 281280 }, { "epoch": 0.5682235967630506, "grad_norm": 269.7091979980469, "learning_rate": 4.780508137227052e-06, "loss": 28.9723, "step": 281290 }, { "epoch": 0.5682437973957344, "grad_norm": 323.88671875, "learning_rate": 4.780159408410677e-06, "loss": 21.4513, "step": 281300 }, { "epoch": 0.5682639980284182, "grad_norm": 199.1850128173828, "learning_rate": 4.7798106806657765e-06, "loss": 12.8795, "step": 281310 }, { "epoch": 0.568284198661102, "grad_norm": 8.439977645874023, "learning_rate": 4.779461953994055e-06, "loss": 14.439, "step": 281320 }, { "epoch": 0.5683043992937858, "grad_norm": 114.3587875366211, "learning_rate": 4.779113228397208e-06, "loss": 17.9183, "step": 281330 }, { "epoch": 0.5683245999264697, "grad_norm": 220.39505004882812, "learning_rate": 4.778764503876937e-06, "loss": 19.0702, "step": 281340 }, { "epoch": 0.5683448005591535, "grad_norm": 538.3729248046875, "learning_rate": 4.778415780434941e-06, "loss": 49.2227, "step": 281350 }, { "epoch": 0.5683650011918373, "grad_norm": 142.07130432128906, "learning_rate": 4.778067058072919e-06, "loss": 18.075, "step": 281360 }, { "epoch": 0.5683852018245211, "grad_norm": 186.54107666015625, "learning_rate": 4.777718336792571e-06, "loss": 17.1097, "step": 281370 }, { "epoch": 0.568405402457205, "grad_norm": 175.35775756835938, "learning_rate": 4.777369616595599e-06, "loss": 12.8044, "step": 281380 }, { "epoch": 0.5684256030898888, "grad_norm": 773.0756225585938, "learning_rate": 4.777020897483697e-06, "loss": 19.8385, "step": 281390 }, { "epoch": 0.5684458037225726, "grad_norm": 546.4215698242188, "learning_rate": 4.7766721794585704e-06, "loss": 12.1978, "step": 281400 }, { "epoch": 0.5684660043552564, "grad_norm": 453.91693115234375, "learning_rate": 4.776323462521916e-06, "loss": 22.5422, "step": 281410 }, { "epoch": 0.5684862049879402, "grad_norm": 571.7416381835938, "learning_rate": 4.775974746675434e-06, "loss": 17.7625, "step": 281420 }, { "epoch": 0.568506405620624, "grad_norm": 144.67437744140625, "learning_rate": 4.7756260319208245e-06, "loss": 18.1891, "step": 281430 }, { "epoch": 0.5685266062533079, "grad_norm": 463.4716796875, "learning_rate": 4.775277318259784e-06, "loss": 10.107, "step": 281440 }, { "epoch": 0.5685468068859917, "grad_norm": 156.15028381347656, "learning_rate": 4.774928605694018e-06, "loss": 16.9089, "step": 281450 }, { "epoch": 0.5685670075186755, "grad_norm": 113.29301452636719, "learning_rate": 4.77457989422522e-06, "loss": 19.2779, "step": 281460 }, { "epoch": 0.5685872081513593, "grad_norm": 875.8801879882812, "learning_rate": 4.774231183855093e-06, "loss": 17.1479, "step": 281470 }, { "epoch": 0.5686074087840431, "grad_norm": 354.21478271484375, "learning_rate": 4.773882474585338e-06, "loss": 13.1078, "step": 281480 }, { "epoch": 0.568627609416727, "grad_norm": 400.21356201171875, "learning_rate": 4.77353376641765e-06, "loss": 20.4264, "step": 281490 }, { "epoch": 0.5686478100494108, "grad_norm": 648.59423828125, "learning_rate": 4.7731850593537316e-06, "loss": 15.6605, "step": 281500 }, { "epoch": 0.5686680106820946, "grad_norm": 213.05715942382812, "learning_rate": 4.772836353395283e-06, "loss": 8.2071, "step": 281510 }, { "epoch": 0.5686882113147784, "grad_norm": 500.738525390625, "learning_rate": 4.772487648544e-06, "loss": 20.6662, "step": 281520 }, { "epoch": 0.5687084119474622, "grad_norm": 439.78668212890625, "learning_rate": 4.772138944801585e-06, "loss": 19.7219, "step": 281530 }, { "epoch": 0.5687286125801461, "grad_norm": 164.27357482910156, "learning_rate": 4.77179024216974e-06, "loss": 15.299, "step": 281540 }, { "epoch": 0.5687488132128298, "grad_norm": 575.8212280273438, "learning_rate": 4.771441540650158e-06, "loss": 31.0574, "step": 281550 }, { "epoch": 0.5687690138455136, "grad_norm": 17.1769962310791, "learning_rate": 4.771092840244544e-06, "loss": 8.2188, "step": 281560 }, { "epoch": 0.5687892144781974, "grad_norm": 346.69927978515625, "learning_rate": 4.770744140954596e-06, "loss": 18.1759, "step": 281570 }, { "epoch": 0.5688094151108812, "grad_norm": 0.0, "learning_rate": 4.7703954427820125e-06, "loss": 22.863, "step": 281580 }, { "epoch": 0.568829615743565, "grad_norm": 687.7879638671875, "learning_rate": 4.770046745728494e-06, "loss": 17.0569, "step": 281590 }, { "epoch": 0.5688498163762489, "grad_norm": 317.03082275390625, "learning_rate": 4.769698049795739e-06, "loss": 23.1847, "step": 281600 }, { "epoch": 0.5688700170089327, "grad_norm": 509.2730712890625, "learning_rate": 4.76934935498545e-06, "loss": 24.0008, "step": 281610 }, { "epoch": 0.5688902176416165, "grad_norm": 474.55657958984375, "learning_rate": 4.769000661299322e-06, "loss": 31.9968, "step": 281620 }, { "epoch": 0.5689104182743003, "grad_norm": 48.3017463684082, "learning_rate": 4.768651968739057e-06, "loss": 12.2753, "step": 281630 }, { "epoch": 0.5689306189069842, "grad_norm": 276.9859924316406, "learning_rate": 4.768303277306356e-06, "loss": 23.6071, "step": 281640 }, { "epoch": 0.568950819539668, "grad_norm": 175.224609375, "learning_rate": 4.767954587002915e-06, "loss": 22.392, "step": 281650 }, { "epoch": 0.5689710201723518, "grad_norm": 144.96896362304688, "learning_rate": 4.767605897830436e-06, "loss": 19.9227, "step": 281660 }, { "epoch": 0.5689912208050356, "grad_norm": 155.78317260742188, "learning_rate": 4.767257209790618e-06, "loss": 21.4134, "step": 281670 }, { "epoch": 0.5690114214377194, "grad_norm": 313.7227478027344, "learning_rate": 4.76690852288516e-06, "loss": 14.5007, "step": 281680 }, { "epoch": 0.5690316220704033, "grad_norm": 100.6632308959961, "learning_rate": 4.76655983711576e-06, "loss": 19.0345, "step": 281690 }, { "epoch": 0.5690518227030871, "grad_norm": 377.2840270996094, "learning_rate": 4.766211152484122e-06, "loss": 16.0773, "step": 281700 }, { "epoch": 0.5690720233357709, "grad_norm": 110.20166015625, "learning_rate": 4.765862468991939e-06, "loss": 19.0861, "step": 281710 }, { "epoch": 0.5690922239684547, "grad_norm": 404.0787658691406, "learning_rate": 4.765513786640915e-06, "loss": 27.0663, "step": 281720 }, { "epoch": 0.5691124246011385, "grad_norm": 369.5503845214844, "learning_rate": 4.765165105432749e-06, "loss": 22.2442, "step": 281730 }, { "epoch": 0.5691326252338224, "grad_norm": 85.97356414794922, "learning_rate": 4.764816425369141e-06, "loss": 17.9469, "step": 281740 }, { "epoch": 0.5691528258665062, "grad_norm": 0.6718572974205017, "learning_rate": 4.7644677464517874e-06, "loss": 16.9593, "step": 281750 }, { "epoch": 0.56917302649919, "grad_norm": 539.9227905273438, "learning_rate": 4.764119068682389e-06, "loss": 34.1517, "step": 281760 }, { "epoch": 0.5691932271318738, "grad_norm": 237.00498962402344, "learning_rate": 4.7637703920626484e-06, "loss": 30.3294, "step": 281770 }, { "epoch": 0.5692134277645576, "grad_norm": 412.45269775390625, "learning_rate": 4.76342171659426e-06, "loss": 20.29, "step": 281780 }, { "epoch": 0.5692336283972415, "grad_norm": 191.51370239257812, "learning_rate": 4.763073042278925e-06, "loss": 12.3758, "step": 281790 }, { "epoch": 0.5692538290299252, "grad_norm": 286.8011169433594, "learning_rate": 4.762724369118346e-06, "loss": 16.6205, "step": 281800 }, { "epoch": 0.569274029662609, "grad_norm": 269.1073303222656, "learning_rate": 4.762375697114217e-06, "loss": 19.3239, "step": 281810 }, { "epoch": 0.5692942302952928, "grad_norm": 294.68017578125, "learning_rate": 4.762027026268241e-06, "loss": 36.4113, "step": 281820 }, { "epoch": 0.5693144309279766, "grad_norm": 270.39190673828125, "learning_rate": 4.761678356582117e-06, "loss": 30.8563, "step": 281830 }, { "epoch": 0.5693346315606604, "grad_norm": 402.2464599609375, "learning_rate": 4.761329688057543e-06, "loss": 53.8486, "step": 281840 }, { "epoch": 0.5693548321933443, "grad_norm": 305.43365478515625, "learning_rate": 4.760981020696218e-06, "loss": 18.5423, "step": 281850 }, { "epoch": 0.5693750328260281, "grad_norm": 868.0171508789062, "learning_rate": 4.760632354499846e-06, "loss": 34.0763, "step": 281860 }, { "epoch": 0.5693952334587119, "grad_norm": 412.5047607421875, "learning_rate": 4.760283689470119e-06, "loss": 22.9232, "step": 281870 }, { "epoch": 0.5694154340913957, "grad_norm": 247.03213500976562, "learning_rate": 4.759935025608742e-06, "loss": 15.8073, "step": 281880 }, { "epoch": 0.5694356347240795, "grad_norm": 423.51239013671875, "learning_rate": 4.7595863629174125e-06, "loss": 18.0722, "step": 281890 }, { "epoch": 0.5694558353567634, "grad_norm": 303.29095458984375, "learning_rate": 4.759237701397831e-06, "loss": 20.4888, "step": 281900 }, { "epoch": 0.5694760359894472, "grad_norm": 536.0755004882812, "learning_rate": 4.758889041051694e-06, "loss": 25.8428, "step": 281910 }, { "epoch": 0.569496236622131, "grad_norm": 746.490478515625, "learning_rate": 4.758540381880702e-06, "loss": 18.181, "step": 281920 }, { "epoch": 0.5695164372548148, "grad_norm": 3515.61279296875, "learning_rate": 4.7581917238865565e-06, "loss": 54.3617, "step": 281930 }, { "epoch": 0.5695366378874986, "grad_norm": 229.06857299804688, "learning_rate": 4.757843067070955e-06, "loss": 28.6551, "step": 281940 }, { "epoch": 0.5695568385201825, "grad_norm": 844.1669921875, "learning_rate": 4.757494411435597e-06, "loss": 45.6698, "step": 281950 }, { "epoch": 0.5695770391528663, "grad_norm": 357.9671325683594, "learning_rate": 4.757145756982182e-06, "loss": 14.9378, "step": 281960 }, { "epoch": 0.5695972397855501, "grad_norm": 726.5892333984375, "learning_rate": 4.756797103712409e-06, "loss": 17.4079, "step": 281970 }, { "epoch": 0.5696174404182339, "grad_norm": 243.23487854003906, "learning_rate": 4.756448451627977e-06, "loss": 14.5127, "step": 281980 }, { "epoch": 0.5696376410509177, "grad_norm": 648.876708984375, "learning_rate": 4.7560998007305865e-06, "loss": 9.6891, "step": 281990 }, { "epoch": 0.5696578416836016, "grad_norm": 268.9858093261719, "learning_rate": 4.755751151021934e-06, "loss": 30.9187, "step": 282000 }, { "epoch": 0.5696780423162854, "grad_norm": 164.58192443847656, "learning_rate": 4.755402502503722e-06, "loss": 19.8471, "step": 282010 }, { "epoch": 0.5696982429489692, "grad_norm": 496.41510009765625, "learning_rate": 4.7550538551776495e-06, "loss": 17.9169, "step": 282020 }, { "epoch": 0.569718443581653, "grad_norm": 682.2836303710938, "learning_rate": 4.754705209045414e-06, "loss": 27.1735, "step": 282030 }, { "epoch": 0.5697386442143368, "grad_norm": 565.455078125, "learning_rate": 4.754356564108715e-06, "loss": 25.299, "step": 282040 }, { "epoch": 0.5697588448470207, "grad_norm": 469.9396057128906, "learning_rate": 4.754007920369252e-06, "loss": 21.7952, "step": 282050 }, { "epoch": 0.5697790454797044, "grad_norm": 777.315185546875, "learning_rate": 4.753659277828727e-06, "loss": 28.935, "step": 282060 }, { "epoch": 0.5697992461123882, "grad_norm": 533.95068359375, "learning_rate": 4.7533106364888345e-06, "loss": 13.9896, "step": 282070 }, { "epoch": 0.569819446745072, "grad_norm": 189.7462158203125, "learning_rate": 4.752961996351276e-06, "loss": 10.9761, "step": 282080 }, { "epoch": 0.5698396473777558, "grad_norm": 422.6891784667969, "learning_rate": 4.7526133574177525e-06, "loss": 20.301, "step": 282090 }, { "epoch": 0.5698598480104397, "grad_norm": 877.2344970703125, "learning_rate": 4.752264719689961e-06, "loss": 28.6998, "step": 282100 }, { "epoch": 0.5698800486431235, "grad_norm": 604.5916137695312, "learning_rate": 4.7519160831696e-06, "loss": 25.9339, "step": 282110 }, { "epoch": 0.5699002492758073, "grad_norm": 416.35302734375, "learning_rate": 4.751567447858372e-06, "loss": 19.9025, "step": 282120 }, { "epoch": 0.5699204499084911, "grad_norm": 733.4224243164062, "learning_rate": 4.751218813757972e-06, "loss": 28.5293, "step": 282130 }, { "epoch": 0.5699406505411749, "grad_norm": 273.4183654785156, "learning_rate": 4.7508701808701006e-06, "loss": 21.8609, "step": 282140 }, { "epoch": 0.5699608511738588, "grad_norm": 388.1825866699219, "learning_rate": 4.7505215491964615e-06, "loss": 9.5101, "step": 282150 }, { "epoch": 0.5699810518065426, "grad_norm": 242.81985473632812, "learning_rate": 4.750172918738747e-06, "loss": 24.2439, "step": 282160 }, { "epoch": 0.5700012524392264, "grad_norm": 210.24472045898438, "learning_rate": 4.7498242894986595e-06, "loss": 17.8979, "step": 282170 }, { "epoch": 0.5700214530719102, "grad_norm": 287.41278076171875, "learning_rate": 4.7494756614779e-06, "loss": 39.2889, "step": 282180 }, { "epoch": 0.570041653704594, "grad_norm": 527.2347412109375, "learning_rate": 4.749127034678165e-06, "loss": 18.7753, "step": 282190 }, { "epoch": 0.5700618543372779, "grad_norm": 317.0581970214844, "learning_rate": 4.748778409101153e-06, "loss": 18.5149, "step": 282200 }, { "epoch": 0.5700820549699617, "grad_norm": 689.727783203125, "learning_rate": 4.748429784748564e-06, "loss": 22.1526, "step": 282210 }, { "epoch": 0.5701022556026455, "grad_norm": 229.11322021484375, "learning_rate": 4.748081161622101e-06, "loss": 10.3614, "step": 282220 }, { "epoch": 0.5701224562353293, "grad_norm": 315.3077392578125, "learning_rate": 4.7477325397234575e-06, "loss": 16.2357, "step": 282230 }, { "epoch": 0.5701426568680131, "grad_norm": 1760.7344970703125, "learning_rate": 4.747383919054335e-06, "loss": 33.4112, "step": 282240 }, { "epoch": 0.570162857500697, "grad_norm": 266.5099792480469, "learning_rate": 4.747035299616434e-06, "loss": 15.6812, "step": 282250 }, { "epoch": 0.5701830581333808, "grad_norm": 192.255615234375, "learning_rate": 4.746686681411451e-06, "loss": 19.3875, "step": 282260 }, { "epoch": 0.5702032587660646, "grad_norm": 709.2426147460938, "learning_rate": 4.746338064441087e-06, "loss": 37.0994, "step": 282270 }, { "epoch": 0.5702234593987484, "grad_norm": 465.7525634765625, "learning_rate": 4.745989448707042e-06, "loss": 22.1241, "step": 282280 }, { "epoch": 0.5702436600314322, "grad_norm": 225.25108337402344, "learning_rate": 4.745640834211012e-06, "loss": 11.6983, "step": 282290 }, { "epoch": 0.570263860664116, "grad_norm": 559.8445434570312, "learning_rate": 4.745292220954696e-06, "loss": 24.5407, "step": 282300 }, { "epoch": 0.5702840612967999, "grad_norm": 53.93816375732422, "learning_rate": 4.744943608939799e-06, "loss": 20.1797, "step": 282310 }, { "epoch": 0.5703042619294836, "grad_norm": 362.0687561035156, "learning_rate": 4.744594998168012e-06, "loss": 16.7693, "step": 282320 }, { "epoch": 0.5703244625621674, "grad_norm": 396.4376525878906, "learning_rate": 4.744246388641039e-06, "loss": 17.0657, "step": 282330 }, { "epoch": 0.5703446631948512, "grad_norm": 371.0090026855469, "learning_rate": 4.743897780360578e-06, "loss": 11.6621, "step": 282340 }, { "epoch": 0.570364863827535, "grad_norm": 321.3207702636719, "learning_rate": 4.74354917332833e-06, "loss": 19.1614, "step": 282350 }, { "epoch": 0.5703850644602189, "grad_norm": 195.78021240234375, "learning_rate": 4.7432005675459905e-06, "loss": 28.7699, "step": 282360 }, { "epoch": 0.5704052650929027, "grad_norm": 183.8314971923828, "learning_rate": 4.74285196301526e-06, "loss": 29.7161, "step": 282370 }, { "epoch": 0.5704254657255865, "grad_norm": 764.7041015625, "learning_rate": 4.742503359737841e-06, "loss": 17.1614, "step": 282380 }, { "epoch": 0.5704456663582703, "grad_norm": 172.70347595214844, "learning_rate": 4.742154757715425e-06, "loss": 20.2722, "step": 282390 }, { "epoch": 0.5704658669909541, "grad_norm": 276.43487548828125, "learning_rate": 4.741806156949718e-06, "loss": 12.9694, "step": 282400 }, { "epoch": 0.570486067623638, "grad_norm": 163.83872985839844, "learning_rate": 4.741457557442416e-06, "loss": 18.2952, "step": 282410 }, { "epoch": 0.5705062682563218, "grad_norm": 459.07470703125, "learning_rate": 4.7411089591952184e-06, "loss": 38.4178, "step": 282420 }, { "epoch": 0.5705264688890056, "grad_norm": 609.1342163085938, "learning_rate": 4.740760362209824e-06, "loss": 25.564, "step": 282430 }, { "epoch": 0.5705466695216894, "grad_norm": 382.3327331542969, "learning_rate": 4.740411766487933e-06, "loss": 16.1625, "step": 282440 }, { "epoch": 0.5705668701543732, "grad_norm": 377.52508544921875, "learning_rate": 4.740063172031243e-06, "loss": 37.4605, "step": 282450 }, { "epoch": 0.5705870707870571, "grad_norm": 220.13027954101562, "learning_rate": 4.7397145788414525e-06, "loss": 20.6798, "step": 282460 }, { "epoch": 0.5706072714197409, "grad_norm": 680.699951171875, "learning_rate": 4.739365986920265e-06, "loss": 16.0136, "step": 282470 }, { "epoch": 0.5706274720524247, "grad_norm": 482.6812438964844, "learning_rate": 4.7390173962693724e-06, "loss": 21.8054, "step": 282480 }, { "epoch": 0.5706476726851085, "grad_norm": 221.7867889404297, "learning_rate": 4.738668806890479e-06, "loss": 9.6261, "step": 282490 }, { "epoch": 0.5706678733177923, "grad_norm": 340.18707275390625, "learning_rate": 4.738320218785281e-06, "loss": 20.3762, "step": 282500 }, { "epoch": 0.5706880739504762, "grad_norm": 853.8125610351562, "learning_rate": 4.737971631955481e-06, "loss": 23.8096, "step": 282510 }, { "epoch": 0.57070827458316, "grad_norm": 961.3689575195312, "learning_rate": 4.737623046402774e-06, "loss": 17.6552, "step": 282520 }, { "epoch": 0.5707284752158438, "grad_norm": 388.4830017089844, "learning_rate": 4.737274462128858e-06, "loss": 26.1711, "step": 282530 }, { "epoch": 0.5707486758485276, "grad_norm": 307.6900939941406, "learning_rate": 4.736925879135439e-06, "loss": 20.3948, "step": 282540 }, { "epoch": 0.5707688764812114, "grad_norm": 420.995361328125, "learning_rate": 4.7365772974242075e-06, "loss": 17.5211, "step": 282550 }, { "epoch": 0.5707890771138953, "grad_norm": 434.5019836425781, "learning_rate": 4.736228716996868e-06, "loss": 8.8662, "step": 282560 }, { "epoch": 0.570809277746579, "grad_norm": 259.2095947265625, "learning_rate": 4.735880137855118e-06, "loss": 18.553, "step": 282570 }, { "epoch": 0.5708294783792628, "grad_norm": 473.3949279785156, "learning_rate": 4.735531560000656e-06, "loss": 29.2533, "step": 282580 }, { "epoch": 0.5708496790119466, "grad_norm": 432.2046203613281, "learning_rate": 4.735182983435181e-06, "loss": 36.4894, "step": 282590 }, { "epoch": 0.5708698796446304, "grad_norm": 220.06646728515625, "learning_rate": 4.734834408160393e-06, "loss": 27.4091, "step": 282600 }, { "epoch": 0.5708900802773142, "grad_norm": 65.85558319091797, "learning_rate": 4.734485834177987e-06, "loss": 15.7926, "step": 282610 }, { "epoch": 0.5709102809099981, "grad_norm": 346.2181091308594, "learning_rate": 4.734137261489667e-06, "loss": 14.9316, "step": 282620 }, { "epoch": 0.5709304815426819, "grad_norm": 592.6093139648438, "learning_rate": 4.73378869009713e-06, "loss": 27.6009, "step": 282630 }, { "epoch": 0.5709506821753657, "grad_norm": 948.87451171875, "learning_rate": 4.7334401200020745e-06, "loss": 24.6394, "step": 282640 }, { "epoch": 0.5709708828080495, "grad_norm": 336.6229248046875, "learning_rate": 4.733091551206198e-06, "loss": 11.5142, "step": 282650 }, { "epoch": 0.5709910834407333, "grad_norm": 371.5719909667969, "learning_rate": 4.732742983711202e-06, "loss": 23.0044, "step": 282660 }, { "epoch": 0.5710112840734172, "grad_norm": 586.419189453125, "learning_rate": 4.732394417518785e-06, "loss": 22.4501, "step": 282670 }, { "epoch": 0.571031484706101, "grad_norm": 234.36026000976562, "learning_rate": 4.732045852630644e-06, "loss": 18.4327, "step": 282680 }, { "epoch": 0.5710516853387848, "grad_norm": 165.2674102783203, "learning_rate": 4.731697289048479e-06, "loss": 19.9626, "step": 282690 }, { "epoch": 0.5710718859714686, "grad_norm": 162.3771514892578, "learning_rate": 4.73134872677399e-06, "loss": 18.7244, "step": 282700 }, { "epoch": 0.5710920866041524, "grad_norm": 232.7933349609375, "learning_rate": 4.731000165808874e-06, "loss": 13.7459, "step": 282710 }, { "epoch": 0.5711122872368363, "grad_norm": 664.8682250976562, "learning_rate": 4.73065160615483e-06, "loss": 14.6749, "step": 282720 }, { "epoch": 0.5711324878695201, "grad_norm": 262.6925964355469, "learning_rate": 4.730303047813559e-06, "loss": 25.8547, "step": 282730 }, { "epoch": 0.5711526885022039, "grad_norm": 0.0, "learning_rate": 4.7299544907867576e-06, "loss": 16.9246, "step": 282740 }, { "epoch": 0.5711728891348877, "grad_norm": 468.6184997558594, "learning_rate": 4.729605935076123e-06, "loss": 10.2398, "step": 282750 }, { "epoch": 0.5711930897675715, "grad_norm": 613.7074584960938, "learning_rate": 4.7292573806833605e-06, "loss": 32.9369, "step": 282760 }, { "epoch": 0.5712132904002554, "grad_norm": 359.0036315917969, "learning_rate": 4.7289088276101616e-06, "loss": 18.4079, "step": 282770 }, { "epoch": 0.5712334910329392, "grad_norm": 253.33212280273438, "learning_rate": 4.728560275858228e-06, "loss": 19.6505, "step": 282780 }, { "epoch": 0.571253691665623, "grad_norm": 139.0804443359375, "learning_rate": 4.72821172542926e-06, "loss": 10.7074, "step": 282790 }, { "epoch": 0.5712738922983068, "grad_norm": 402.0624084472656, "learning_rate": 4.727863176324955e-06, "loss": 14.0952, "step": 282800 }, { "epoch": 0.5712940929309906, "grad_norm": 665.03759765625, "learning_rate": 4.7275146285470115e-06, "loss": 25.4496, "step": 282810 }, { "epoch": 0.5713142935636745, "grad_norm": 224.83116149902344, "learning_rate": 4.727166082097127e-06, "loss": 15.2676, "step": 282820 }, { "epoch": 0.5713344941963582, "grad_norm": 315.6308288574219, "learning_rate": 4.726817536977006e-06, "loss": 31.0143, "step": 282830 }, { "epoch": 0.571354694829042, "grad_norm": 403.99566650390625, "learning_rate": 4.72646899318834e-06, "loss": 24.1681, "step": 282840 }, { "epoch": 0.5713748954617258, "grad_norm": 239.3922576904297, "learning_rate": 4.7261204507328315e-06, "loss": 21.9767, "step": 282850 }, { "epoch": 0.5713950960944096, "grad_norm": 193.40237426757812, "learning_rate": 4.72577190961218e-06, "loss": 13.7396, "step": 282860 }, { "epoch": 0.5714152967270935, "grad_norm": 274.01324462890625, "learning_rate": 4.725423369828082e-06, "loss": 21.2507, "step": 282870 }, { "epoch": 0.5714354973597773, "grad_norm": 77.75462341308594, "learning_rate": 4.725074831382237e-06, "loss": 11.1334, "step": 282880 }, { "epoch": 0.5714556979924611, "grad_norm": 210.43345642089844, "learning_rate": 4.724726294276345e-06, "loss": 34.2454, "step": 282890 }, { "epoch": 0.5714758986251449, "grad_norm": 884.965576171875, "learning_rate": 4.7243777585121034e-06, "loss": 36.7404, "step": 282900 }, { "epoch": 0.5714960992578287, "grad_norm": 313.2972717285156, "learning_rate": 4.724029224091209e-06, "loss": 25.3529, "step": 282910 }, { "epoch": 0.5715162998905126, "grad_norm": 246.2036590576172, "learning_rate": 4.723680691015366e-06, "loss": 19.2682, "step": 282920 }, { "epoch": 0.5715365005231964, "grad_norm": 337.3580322265625, "learning_rate": 4.723332159286267e-06, "loss": 13.7533, "step": 282930 }, { "epoch": 0.5715567011558802, "grad_norm": 198.6982879638672, "learning_rate": 4.722983628905614e-06, "loss": 13.4708, "step": 282940 }, { "epoch": 0.571576901788564, "grad_norm": 232.9688262939453, "learning_rate": 4.722635099875106e-06, "loss": 13.4651, "step": 282950 }, { "epoch": 0.5715971024212478, "grad_norm": 689.5285034179688, "learning_rate": 4.722286572196441e-06, "loss": 25.4789, "step": 282960 }, { "epoch": 0.5716173030539317, "grad_norm": 894.1478881835938, "learning_rate": 4.721938045871317e-06, "loss": 10.2879, "step": 282970 }, { "epoch": 0.5716375036866155, "grad_norm": 856.7816162109375, "learning_rate": 4.721589520901433e-06, "loss": 27.1754, "step": 282980 }, { "epoch": 0.5716577043192993, "grad_norm": 550.2301025390625, "learning_rate": 4.7212409972884894e-06, "loss": 16.1218, "step": 282990 }, { "epoch": 0.5716779049519831, "grad_norm": 111.42938232421875, "learning_rate": 4.720892475034181e-06, "loss": 9.3968, "step": 283000 }, { "epoch": 0.5716981055846669, "grad_norm": 697.5478515625, "learning_rate": 4.72054395414021e-06, "loss": 23.3174, "step": 283010 }, { "epoch": 0.5717183062173508, "grad_norm": 221.56004333496094, "learning_rate": 4.720195434608275e-06, "loss": 13.4679, "step": 283020 }, { "epoch": 0.5717385068500346, "grad_norm": 376.4030456542969, "learning_rate": 4.719846916440072e-06, "loss": 15.4297, "step": 283030 }, { "epoch": 0.5717587074827184, "grad_norm": 407.6830749511719, "learning_rate": 4.719498399637302e-06, "loss": 13.9991, "step": 283040 }, { "epoch": 0.5717789081154022, "grad_norm": 186.31411743164062, "learning_rate": 4.719149884201664e-06, "loss": 28.5582, "step": 283050 }, { "epoch": 0.571799108748086, "grad_norm": 179.6485595703125, "learning_rate": 4.718801370134853e-06, "loss": 11.7174, "step": 283060 }, { "epoch": 0.5718193093807699, "grad_norm": 168.99423217773438, "learning_rate": 4.718452857438569e-06, "loss": 11.1836, "step": 283070 }, { "epoch": 0.5718395100134536, "grad_norm": 640.58447265625, "learning_rate": 4.7181043461145155e-06, "loss": 27.7388, "step": 283080 }, { "epoch": 0.5718597106461374, "grad_norm": 445.20245361328125, "learning_rate": 4.717755836164384e-06, "loss": 18.8104, "step": 283090 }, { "epoch": 0.5718799112788212, "grad_norm": 398.60247802734375, "learning_rate": 4.717407327589878e-06, "loss": 15.9926, "step": 283100 }, { "epoch": 0.571900111911505, "grad_norm": 410.79620361328125, "learning_rate": 4.717058820392694e-06, "loss": 27.9072, "step": 283110 }, { "epoch": 0.5719203125441888, "grad_norm": 378.79998779296875, "learning_rate": 4.7167103145745315e-06, "loss": 10.7875, "step": 283120 }, { "epoch": 0.5719405131768727, "grad_norm": 306.0965881347656, "learning_rate": 4.716361810137088e-06, "loss": 11.5585, "step": 283130 }, { "epoch": 0.5719607138095565, "grad_norm": 703.5479125976562, "learning_rate": 4.716013307082061e-06, "loss": 20.6412, "step": 283140 }, { "epoch": 0.5719809144422403, "grad_norm": 440.7583923339844, "learning_rate": 4.715664805411155e-06, "loss": 14.8568, "step": 283150 }, { "epoch": 0.5720011150749241, "grad_norm": 126.62924194335938, "learning_rate": 4.715316305126059e-06, "loss": 25.2764, "step": 283160 }, { "epoch": 0.572021315707608, "grad_norm": 433.77154541015625, "learning_rate": 4.71496780622848e-06, "loss": 14.7368, "step": 283170 }, { "epoch": 0.5720415163402918, "grad_norm": 335.3080139160156, "learning_rate": 4.714619308720113e-06, "loss": 15.2308, "step": 283180 }, { "epoch": 0.5720617169729756, "grad_norm": 654.994873046875, "learning_rate": 4.714270812602657e-06, "loss": 20.2485, "step": 283190 }, { "epoch": 0.5720819176056594, "grad_norm": 383.84527587890625, "learning_rate": 4.71392231787781e-06, "loss": 23.0565, "step": 283200 }, { "epoch": 0.5721021182383432, "grad_norm": 302.2643127441406, "learning_rate": 4.713573824547271e-06, "loss": 15.9064, "step": 283210 }, { "epoch": 0.572122318871027, "grad_norm": 136.98361206054688, "learning_rate": 4.7132253326127394e-06, "loss": 31.1406, "step": 283220 }, { "epoch": 0.5721425195037109, "grad_norm": 314.59521484375, "learning_rate": 4.71287684207591e-06, "loss": 13.0349, "step": 283230 }, { "epoch": 0.5721627201363947, "grad_norm": 258.1735534667969, "learning_rate": 4.712528352938487e-06, "loss": 17.8948, "step": 283240 }, { "epoch": 0.5721829207690785, "grad_norm": 230.92201232910156, "learning_rate": 4.712179865202164e-06, "loss": 19.5708, "step": 283250 }, { "epoch": 0.5722031214017623, "grad_norm": 1058.202392578125, "learning_rate": 4.711831378868643e-06, "loss": 34.6557, "step": 283260 }, { "epoch": 0.5722233220344461, "grad_norm": 283.6180725097656, "learning_rate": 4.71148289393962e-06, "loss": 24.73, "step": 283270 }, { "epoch": 0.57224352266713, "grad_norm": 104.200927734375, "learning_rate": 4.711134410416794e-06, "loss": 13.046, "step": 283280 }, { "epoch": 0.5722637232998138, "grad_norm": 419.1370544433594, "learning_rate": 4.7107859283018635e-06, "loss": 21.7297, "step": 283290 }, { "epoch": 0.5722839239324976, "grad_norm": 206.23863220214844, "learning_rate": 4.710437447596528e-06, "loss": 26.745, "step": 283300 }, { "epoch": 0.5723041245651814, "grad_norm": 258.88409423828125, "learning_rate": 4.710088968302486e-06, "loss": 18.8854, "step": 283310 }, { "epoch": 0.5723243251978652, "grad_norm": 283.68267822265625, "learning_rate": 4.709740490421435e-06, "loss": 11.6669, "step": 283320 }, { "epoch": 0.5723445258305491, "grad_norm": 254.8634490966797, "learning_rate": 4.709392013955073e-06, "loss": 32.1802, "step": 283330 }, { "epoch": 0.5723647264632328, "grad_norm": 380.2320861816406, "learning_rate": 4.7090435389051e-06, "loss": 16.0333, "step": 283340 }, { "epoch": 0.5723849270959166, "grad_norm": 524.1536254882812, "learning_rate": 4.708695065273213e-06, "loss": 19.2232, "step": 283350 }, { "epoch": 0.5724051277286004, "grad_norm": 1058.6983642578125, "learning_rate": 4.708346593061109e-06, "loss": 21.6366, "step": 283360 }, { "epoch": 0.5724253283612842, "grad_norm": 252.1746826171875, "learning_rate": 4.707998122270493e-06, "loss": 17.1793, "step": 283370 }, { "epoch": 0.5724455289939681, "grad_norm": 162.8192596435547, "learning_rate": 4.707649652903054e-06, "loss": 20.0504, "step": 283380 }, { "epoch": 0.5724657296266519, "grad_norm": 43.742671966552734, "learning_rate": 4.707301184960496e-06, "loss": 25.1147, "step": 283390 }, { "epoch": 0.5724859302593357, "grad_norm": 3088.93408203125, "learning_rate": 4.706952718444518e-06, "loss": 30.3981, "step": 283400 }, { "epoch": 0.5725061308920195, "grad_norm": 237.17019653320312, "learning_rate": 4.706604253356817e-06, "loss": 18.2808, "step": 283410 }, { "epoch": 0.5725263315247033, "grad_norm": 874.8134155273438, "learning_rate": 4.70625578969909e-06, "loss": 29.7182, "step": 283420 }, { "epoch": 0.5725465321573872, "grad_norm": 278.7037353515625, "learning_rate": 4.705907327473036e-06, "loss": 28.5297, "step": 283430 }, { "epoch": 0.572566732790071, "grad_norm": 385.6778564453125, "learning_rate": 4.705558866680357e-06, "loss": 50.21, "step": 283440 }, { "epoch": 0.5725869334227548, "grad_norm": 407.6443786621094, "learning_rate": 4.705210407322746e-06, "loss": 27.3507, "step": 283450 }, { "epoch": 0.5726071340554386, "grad_norm": 931.3094482421875, "learning_rate": 4.704861949401904e-06, "loss": 23.4207, "step": 283460 }, { "epoch": 0.5726273346881224, "grad_norm": 274.1264343261719, "learning_rate": 4.70451349291953e-06, "loss": 25.8156, "step": 283470 }, { "epoch": 0.5726475353208063, "grad_norm": 49.78269958496094, "learning_rate": 4.704165037877321e-06, "loss": 10.8104, "step": 283480 }, { "epoch": 0.5726677359534901, "grad_norm": 151.04638671875, "learning_rate": 4.703816584276975e-06, "loss": 16.2919, "step": 283490 }, { "epoch": 0.5726879365861739, "grad_norm": 215.10507202148438, "learning_rate": 4.703468132120193e-06, "loss": 28.8384, "step": 283500 }, { "epoch": 0.5727081372188577, "grad_norm": 266.309814453125, "learning_rate": 4.70311968140867e-06, "loss": 22.1145, "step": 283510 }, { "epoch": 0.5727283378515415, "grad_norm": 493.8695983886719, "learning_rate": 4.702771232144104e-06, "loss": 25.2268, "step": 283520 }, { "epoch": 0.5727485384842254, "grad_norm": 559.64794921875, "learning_rate": 4.702422784328199e-06, "loss": 18.1718, "step": 283530 }, { "epoch": 0.5727687391169092, "grad_norm": 842.6926879882812, "learning_rate": 4.702074337962645e-06, "loss": 29.7846, "step": 283540 }, { "epoch": 0.572788939749593, "grad_norm": 142.5811767578125, "learning_rate": 4.7017258930491474e-06, "loss": 18.9788, "step": 283550 }, { "epoch": 0.5728091403822768, "grad_norm": 325.9900207519531, "learning_rate": 4.7013774495894e-06, "loss": 27.6884, "step": 283560 }, { "epoch": 0.5728293410149606, "grad_norm": 693.3881225585938, "learning_rate": 4.7010290075851035e-06, "loss": 19.1565, "step": 283570 }, { "epoch": 0.5728495416476445, "grad_norm": 379.7054443359375, "learning_rate": 4.700680567037956e-06, "loss": 16.461, "step": 283580 }, { "epoch": 0.5728697422803282, "grad_norm": 783.3622436523438, "learning_rate": 4.7003321279496526e-06, "loss": 14.4221, "step": 283590 }, { "epoch": 0.572889942913012, "grad_norm": 840.8717651367188, "learning_rate": 4.699983690321898e-06, "loss": 30.6254, "step": 283600 }, { "epoch": 0.5729101435456958, "grad_norm": 136.46791076660156, "learning_rate": 4.699635254156383e-06, "loss": 14.5199, "step": 283610 }, { "epoch": 0.5729303441783796, "grad_norm": 459.1830139160156, "learning_rate": 4.69928681945481e-06, "loss": 24.2669, "step": 283620 }, { "epoch": 0.5729505448110634, "grad_norm": 317.6564025878906, "learning_rate": 4.6989383862188785e-06, "loss": 25.7679, "step": 283630 }, { "epoch": 0.5729707454437473, "grad_norm": 109.60838317871094, "learning_rate": 4.6985899544502835e-06, "loss": 14.57, "step": 283640 }, { "epoch": 0.5729909460764311, "grad_norm": 291.0938720703125, "learning_rate": 4.698241524150724e-06, "loss": 26.9732, "step": 283650 }, { "epoch": 0.5730111467091149, "grad_norm": 954.2431640625, "learning_rate": 4.6978930953219e-06, "loss": 27.9935, "step": 283660 }, { "epoch": 0.5730313473417987, "grad_norm": 355.62835693359375, "learning_rate": 4.697544667965507e-06, "loss": 24.8484, "step": 283670 }, { "epoch": 0.5730515479744825, "grad_norm": 367.4694519042969, "learning_rate": 4.697196242083245e-06, "loss": 23.9496, "step": 283680 }, { "epoch": 0.5730717486071664, "grad_norm": 160.22972106933594, "learning_rate": 4.696847817676814e-06, "loss": 10.2788, "step": 283690 }, { "epoch": 0.5730919492398502, "grad_norm": 408.6793518066406, "learning_rate": 4.696499394747906e-06, "loss": 14.0879, "step": 283700 }, { "epoch": 0.573112149872534, "grad_norm": 317.40509033203125, "learning_rate": 4.696150973298225e-06, "loss": 17.0443, "step": 283710 }, { "epoch": 0.5731323505052178, "grad_norm": 310.83990478515625, "learning_rate": 4.695802553329467e-06, "loss": 24.7235, "step": 283720 }, { "epoch": 0.5731525511379016, "grad_norm": 422.622802734375, "learning_rate": 4.695454134843332e-06, "loss": 11.7616, "step": 283730 }, { "epoch": 0.5731727517705855, "grad_norm": 276.022216796875, "learning_rate": 4.695105717841516e-06, "loss": 25.9616, "step": 283740 }, { "epoch": 0.5731929524032693, "grad_norm": 247.97230529785156, "learning_rate": 4.694757302325715e-06, "loss": 17.5643, "step": 283750 }, { "epoch": 0.5732131530359531, "grad_norm": 1246.0626220703125, "learning_rate": 4.694408888297635e-06, "loss": 12.6615, "step": 283760 }, { "epoch": 0.5732333536686369, "grad_norm": 1299.63720703125, "learning_rate": 4.694060475758964e-06, "loss": 31.702, "step": 283770 }, { "epoch": 0.5732535543013207, "grad_norm": 8.856138229370117, "learning_rate": 4.693712064711408e-06, "loss": 11.0963, "step": 283780 }, { "epoch": 0.5732737549340046, "grad_norm": 234.0096893310547, "learning_rate": 4.693363655156662e-06, "loss": 13.3781, "step": 283790 }, { "epoch": 0.5732939555666884, "grad_norm": 263.8318176269531, "learning_rate": 4.693015247096423e-06, "loss": 7.937, "step": 283800 }, { "epoch": 0.5733141561993722, "grad_norm": 318.6791076660156, "learning_rate": 4.6926668405323915e-06, "loss": 17.5293, "step": 283810 }, { "epoch": 0.573334356832056, "grad_norm": 537.685302734375, "learning_rate": 4.692318435466265e-06, "loss": 23.3625, "step": 283820 }, { "epoch": 0.5733545574647398, "grad_norm": 351.2520751953125, "learning_rate": 4.691970031899741e-06, "loss": 19.2957, "step": 283830 }, { "epoch": 0.5733747580974237, "grad_norm": 209.622802734375, "learning_rate": 4.691621629834516e-06, "loss": 9.8297, "step": 283840 }, { "epoch": 0.5733949587301074, "grad_norm": 186.2348175048828, "learning_rate": 4.691273229272291e-06, "loss": 18.9344, "step": 283850 }, { "epoch": 0.5734151593627912, "grad_norm": 190.50189208984375, "learning_rate": 4.6909248302147634e-06, "loss": 21.8252, "step": 283860 }, { "epoch": 0.573435359995475, "grad_norm": 211.6445770263672, "learning_rate": 4.69057643266363e-06, "loss": 13.5997, "step": 283870 }, { "epoch": 0.5734555606281588, "grad_norm": 669.6118774414062, "learning_rate": 4.69022803662059e-06, "loss": 32.8732, "step": 283880 }, { "epoch": 0.5734757612608427, "grad_norm": 353.7624206542969, "learning_rate": 4.689879642087341e-06, "loss": 16.5534, "step": 283890 }, { "epoch": 0.5734959618935265, "grad_norm": 102.54301452636719, "learning_rate": 4.689531249065581e-06, "loss": 18.7183, "step": 283900 }, { "epoch": 0.5735161625262103, "grad_norm": 212.87945556640625, "learning_rate": 4.689182857557006e-06, "loss": 15.7912, "step": 283910 }, { "epoch": 0.5735363631588941, "grad_norm": 412.1329040527344, "learning_rate": 4.688834467563318e-06, "loss": 21.5479, "step": 283920 }, { "epoch": 0.5735565637915779, "grad_norm": 547.8233642578125, "learning_rate": 4.688486079086213e-06, "loss": 14.5297, "step": 283930 }, { "epoch": 0.5735767644242618, "grad_norm": 75.20598602294922, "learning_rate": 4.688137692127389e-06, "loss": 15.2088, "step": 283940 }, { "epoch": 0.5735969650569456, "grad_norm": 256.3761291503906, "learning_rate": 4.687789306688544e-06, "loss": 20.6092, "step": 283950 }, { "epoch": 0.5736171656896294, "grad_norm": 375.7027893066406, "learning_rate": 4.687440922771376e-06, "loss": 11.2912, "step": 283960 }, { "epoch": 0.5736373663223132, "grad_norm": 363.12255859375, "learning_rate": 4.687092540377583e-06, "loss": 22.0384, "step": 283970 }, { "epoch": 0.573657566954997, "grad_norm": 455.80316162109375, "learning_rate": 4.686744159508864e-06, "loss": 10.5865, "step": 283980 }, { "epoch": 0.5736777675876809, "grad_norm": 338.5058288574219, "learning_rate": 4.686395780166914e-06, "loss": 9.4401, "step": 283990 }, { "epoch": 0.5736979682203647, "grad_norm": 506.3396911621094, "learning_rate": 4.686047402353433e-06, "loss": 20.7148, "step": 284000 }, { "epoch": 0.5737181688530485, "grad_norm": 266.2140808105469, "learning_rate": 4.68569902607012e-06, "loss": 10.6317, "step": 284010 }, { "epoch": 0.5737383694857323, "grad_norm": 436.3377380371094, "learning_rate": 4.685350651318672e-06, "loss": 26.013, "step": 284020 }, { "epoch": 0.5737585701184161, "grad_norm": 99.87852478027344, "learning_rate": 4.685002278100787e-06, "loss": 22.1473, "step": 284030 }, { "epoch": 0.5737787707511, "grad_norm": 399.6739196777344, "learning_rate": 4.68465390641816e-06, "loss": 20.2335, "step": 284040 }, { "epoch": 0.5737989713837838, "grad_norm": 523.8590698242188, "learning_rate": 4.684305536272496e-06, "loss": 18.9442, "step": 284050 }, { "epoch": 0.5738191720164676, "grad_norm": 330.4162902832031, "learning_rate": 4.683957167665485e-06, "loss": 32.5483, "step": 284060 }, { "epoch": 0.5738393726491514, "grad_norm": 979.9481201171875, "learning_rate": 4.6836088005988295e-06, "loss": 20.9171, "step": 284070 }, { "epoch": 0.5738595732818352, "grad_norm": 64.82814025878906, "learning_rate": 4.6832604350742275e-06, "loss": 22.665, "step": 284080 }, { "epoch": 0.573879773914519, "grad_norm": 995.423828125, "learning_rate": 4.682912071093374e-06, "loss": 33.3612, "step": 284090 }, { "epoch": 0.5738999745472029, "grad_norm": 253.1041259765625, "learning_rate": 4.68256370865797e-06, "loss": 21.2609, "step": 284100 }, { "epoch": 0.5739201751798866, "grad_norm": 514.6492309570312, "learning_rate": 4.682215347769712e-06, "loss": 26.7161, "step": 284110 }, { "epoch": 0.5739403758125704, "grad_norm": 598.346923828125, "learning_rate": 4.681866988430297e-06, "loss": 18.6298, "step": 284120 }, { "epoch": 0.5739605764452542, "grad_norm": 174.0736083984375, "learning_rate": 4.6815186306414225e-06, "loss": 12.6335, "step": 284130 }, { "epoch": 0.573980777077938, "grad_norm": 762.2974853515625, "learning_rate": 4.681170274404791e-06, "loss": 22.1227, "step": 284140 }, { "epoch": 0.5740009777106219, "grad_norm": 496.7584228515625, "learning_rate": 4.680821919722094e-06, "loss": 13.5093, "step": 284150 }, { "epoch": 0.5740211783433057, "grad_norm": 402.07781982421875, "learning_rate": 4.6804735665950325e-06, "loss": 24.4496, "step": 284160 }, { "epoch": 0.5740413789759895, "grad_norm": 425.4755554199219, "learning_rate": 4.680125215025304e-06, "loss": 16.626, "step": 284170 }, { "epoch": 0.5740615796086733, "grad_norm": 625.1243286132812, "learning_rate": 4.679776865014609e-06, "loss": 40.6312, "step": 284180 }, { "epoch": 0.5740817802413571, "grad_norm": 578.5704345703125, "learning_rate": 4.67942851656464e-06, "loss": 31.0527, "step": 284190 }, { "epoch": 0.574101980874041, "grad_norm": 300.3113708496094, "learning_rate": 4.679080169677097e-06, "loss": 8.6096, "step": 284200 }, { "epoch": 0.5741221815067248, "grad_norm": 106.82528686523438, "learning_rate": 4.678731824353682e-06, "loss": 25.4715, "step": 284210 }, { "epoch": 0.5741423821394086, "grad_norm": 465.44378662109375, "learning_rate": 4.678383480596085e-06, "loss": 17.6224, "step": 284220 }, { "epoch": 0.5741625827720924, "grad_norm": 702.7250366210938, "learning_rate": 4.678035138406009e-06, "loss": 46.881, "step": 284230 }, { "epoch": 0.5741827834047762, "grad_norm": 499.0555114746094, "learning_rate": 4.677686797785153e-06, "loss": 23.8855, "step": 284240 }, { "epoch": 0.5742029840374601, "grad_norm": 381.1941833496094, "learning_rate": 4.677338458735211e-06, "loss": 40.2674, "step": 284250 }, { "epoch": 0.5742231846701439, "grad_norm": 339.23687744140625, "learning_rate": 4.676990121257882e-06, "loss": 18.1712, "step": 284260 }, { "epoch": 0.5742433853028277, "grad_norm": 263.4681091308594, "learning_rate": 4.676641785354865e-06, "loss": 15.9104, "step": 284270 }, { "epoch": 0.5742635859355115, "grad_norm": 641.75732421875, "learning_rate": 4.676293451027855e-06, "loss": 16.1626, "step": 284280 }, { "epoch": 0.5742837865681953, "grad_norm": 232.75022888183594, "learning_rate": 4.675945118278552e-06, "loss": 22.6068, "step": 284290 }, { "epoch": 0.5743039872008792, "grad_norm": 133.041259765625, "learning_rate": 4.675596787108652e-06, "loss": 15.6812, "step": 284300 }, { "epoch": 0.574324187833563, "grad_norm": 406.1040954589844, "learning_rate": 4.675248457519857e-06, "loss": 28.2724, "step": 284310 }, { "epoch": 0.5743443884662468, "grad_norm": 268.32012939453125, "learning_rate": 4.67490012951386e-06, "loss": 16.0275, "step": 284320 }, { "epoch": 0.5743645890989306, "grad_norm": 570.830810546875, "learning_rate": 4.6745518030923595e-06, "loss": 16.8502, "step": 284330 }, { "epoch": 0.5743847897316144, "grad_norm": 233.2513427734375, "learning_rate": 4.674203478257055e-06, "loss": 20.7955, "step": 284340 }, { "epoch": 0.5744049903642983, "grad_norm": 257.6399230957031, "learning_rate": 4.673855155009644e-06, "loss": 19.863, "step": 284350 }, { "epoch": 0.574425190996982, "grad_norm": 264.8829345703125, "learning_rate": 4.673506833351821e-06, "loss": 23.7191, "step": 284360 }, { "epoch": 0.5744453916296658, "grad_norm": 985.7921752929688, "learning_rate": 4.673158513285289e-06, "loss": 32.1464, "step": 284370 }, { "epoch": 0.5744655922623496, "grad_norm": 295.1639709472656, "learning_rate": 4.67281019481174e-06, "loss": 20.751, "step": 284380 }, { "epoch": 0.5744857928950334, "grad_norm": 330.4938659667969, "learning_rate": 4.672461877932877e-06, "loss": 14.6958, "step": 284390 }, { "epoch": 0.5745059935277173, "grad_norm": 92.31692504882812, "learning_rate": 4.672113562650394e-06, "loss": 15.6347, "step": 284400 }, { "epoch": 0.5745261941604011, "grad_norm": 381.076904296875, "learning_rate": 4.6717652489659894e-06, "loss": 22.0171, "step": 284410 }, { "epoch": 0.5745463947930849, "grad_norm": 922.604248046875, "learning_rate": 4.671416936881361e-06, "loss": 20.3584, "step": 284420 }, { "epoch": 0.5745665954257687, "grad_norm": 203.6077117919922, "learning_rate": 4.671068626398208e-06, "loss": 30.4045, "step": 284430 }, { "epoch": 0.5745867960584525, "grad_norm": 357.0528564453125, "learning_rate": 4.670720317518226e-06, "loss": 7.9743, "step": 284440 }, { "epoch": 0.5746069966911364, "grad_norm": 5.510529041290283, "learning_rate": 4.670372010243111e-06, "loss": 27.2087, "step": 284450 }, { "epoch": 0.5746271973238202, "grad_norm": 436.4128112792969, "learning_rate": 4.670023704574564e-06, "loss": 12.2783, "step": 284460 }, { "epoch": 0.574647397956504, "grad_norm": 172.4686737060547, "learning_rate": 4.669675400514283e-06, "loss": 25.0014, "step": 284470 }, { "epoch": 0.5746675985891878, "grad_norm": 24.989734649658203, "learning_rate": 4.669327098063963e-06, "loss": 19.5955, "step": 284480 }, { "epoch": 0.5746877992218716, "grad_norm": 398.2413024902344, "learning_rate": 4.668978797225303e-06, "loss": 22.0749, "step": 284490 }, { "epoch": 0.5747079998545555, "grad_norm": 725.6436157226562, "learning_rate": 4.668630498000001e-06, "loss": 41.6987, "step": 284500 }, { "epoch": 0.5747282004872393, "grad_norm": 56.92924880981445, "learning_rate": 4.668282200389752e-06, "loss": 16.7594, "step": 284510 }, { "epoch": 0.5747484011199231, "grad_norm": 60.30400848388672, "learning_rate": 4.667933904396255e-06, "loss": 20.4393, "step": 284520 }, { "epoch": 0.5747686017526069, "grad_norm": 287.49615478515625, "learning_rate": 4.667585610021211e-06, "loss": 15.5899, "step": 284530 }, { "epoch": 0.5747888023852907, "grad_norm": 302.23297119140625, "learning_rate": 4.667237317266311e-06, "loss": 23.3494, "step": 284540 }, { "epoch": 0.5748090030179746, "grad_norm": 1191.14111328125, "learning_rate": 4.666889026133257e-06, "loss": 34.3193, "step": 284550 }, { "epoch": 0.5748292036506584, "grad_norm": 481.72100830078125, "learning_rate": 4.666540736623746e-06, "loss": 20.3894, "step": 284560 }, { "epoch": 0.5748494042833422, "grad_norm": 23486.73046875, "learning_rate": 4.666192448739475e-06, "loss": 34.2088, "step": 284570 }, { "epoch": 0.574869604916026, "grad_norm": 408.9329528808594, "learning_rate": 4.665844162482141e-06, "loss": 10.6384, "step": 284580 }, { "epoch": 0.5748898055487098, "grad_norm": 731.5371704101562, "learning_rate": 4.665495877853443e-06, "loss": 27.1174, "step": 284590 }, { "epoch": 0.5749100061813937, "grad_norm": 747.6769409179688, "learning_rate": 4.6651475948550765e-06, "loss": 25.1141, "step": 284600 }, { "epoch": 0.5749302068140775, "grad_norm": 386.1933898925781, "learning_rate": 4.66479931348874e-06, "loss": 16.0697, "step": 284610 }, { "epoch": 0.5749504074467612, "grad_norm": 645.2978515625, "learning_rate": 4.664451033756131e-06, "loss": 11.5531, "step": 284620 }, { "epoch": 0.574970608079445, "grad_norm": 1509.620361328125, "learning_rate": 4.664102755658948e-06, "loss": 22.9517, "step": 284630 }, { "epoch": 0.5749908087121288, "grad_norm": 352.56793212890625, "learning_rate": 4.663754479198887e-06, "loss": 11.9502, "step": 284640 }, { "epoch": 0.5750110093448126, "grad_norm": 480.3440246582031, "learning_rate": 4.663406204377645e-06, "loss": 17.9299, "step": 284650 }, { "epoch": 0.5750312099774965, "grad_norm": 376.5037841796875, "learning_rate": 4.663057931196922e-06, "loss": 27.2972, "step": 284660 }, { "epoch": 0.5750514106101803, "grad_norm": 502.1021423339844, "learning_rate": 4.662709659658411e-06, "loss": 20.9237, "step": 284670 }, { "epoch": 0.5750716112428641, "grad_norm": 236.69732666015625, "learning_rate": 4.6623613897638146e-06, "loss": 32.1217, "step": 284680 }, { "epoch": 0.5750918118755479, "grad_norm": 2.2966716289520264, "learning_rate": 4.6620131215148275e-06, "loss": 13.1908, "step": 284690 }, { "epoch": 0.5751120125082317, "grad_norm": 326.837158203125, "learning_rate": 4.661664854913147e-06, "loss": 14.4557, "step": 284700 }, { "epoch": 0.5751322131409156, "grad_norm": 546.8901977539062, "learning_rate": 4.661316589960471e-06, "loss": 10.5499, "step": 284710 }, { "epoch": 0.5751524137735994, "grad_norm": 292.4002990722656, "learning_rate": 4.660968326658497e-06, "loss": 16.1762, "step": 284720 }, { "epoch": 0.5751726144062832, "grad_norm": 18.51688003540039, "learning_rate": 4.660620065008923e-06, "loss": 18.8795, "step": 284730 }, { "epoch": 0.575192815038967, "grad_norm": 591.8967895507812, "learning_rate": 4.6602718050134435e-06, "loss": 22.2219, "step": 284740 }, { "epoch": 0.5752130156716508, "grad_norm": 146.34230041503906, "learning_rate": 4.659923546673761e-06, "loss": 38.448, "step": 284750 }, { "epoch": 0.5752332163043347, "grad_norm": 65.420166015625, "learning_rate": 4.659575289991567e-06, "loss": 21.2529, "step": 284760 }, { "epoch": 0.5752534169370185, "grad_norm": 187.43064880371094, "learning_rate": 4.659227034968563e-06, "loss": 31.3825, "step": 284770 }, { "epoch": 0.5752736175697023, "grad_norm": 240.3877410888672, "learning_rate": 4.658878781606445e-06, "loss": 12.2182, "step": 284780 }, { "epoch": 0.5752938182023861, "grad_norm": 404.1460266113281, "learning_rate": 4.658530529906911e-06, "loss": 26.9092, "step": 284790 }, { "epoch": 0.5753140188350699, "grad_norm": 446.2254638671875, "learning_rate": 4.658182279871657e-06, "loss": 13.1347, "step": 284800 }, { "epoch": 0.5753342194677538, "grad_norm": 307.27301025390625, "learning_rate": 4.65783403150238e-06, "loss": 18.2271, "step": 284810 }, { "epoch": 0.5753544201004376, "grad_norm": 97.33065032958984, "learning_rate": 4.657485784800782e-06, "loss": 14.324, "step": 284820 }, { "epoch": 0.5753746207331214, "grad_norm": 219.03695678710938, "learning_rate": 4.657137539768553e-06, "loss": 24.3588, "step": 284830 }, { "epoch": 0.5753948213658052, "grad_norm": 416.7481384277344, "learning_rate": 4.656789296407396e-06, "loss": 14.1049, "step": 284840 }, { "epoch": 0.575415021998489, "grad_norm": 98.80854797363281, "learning_rate": 4.656441054719007e-06, "loss": 16.7309, "step": 284850 }, { "epoch": 0.5754352226311729, "grad_norm": 305.9570007324219, "learning_rate": 4.656092814705082e-06, "loss": 23.5086, "step": 284860 }, { "epoch": 0.5754554232638566, "grad_norm": 123.20638275146484, "learning_rate": 4.655744576367318e-06, "loss": 10.871, "step": 284870 }, { "epoch": 0.5754756238965404, "grad_norm": 298.4538879394531, "learning_rate": 4.655396339707414e-06, "loss": 8.1463, "step": 284880 }, { "epoch": 0.5754958245292242, "grad_norm": 255.8907470703125, "learning_rate": 4.655048104727066e-06, "loss": 5.6172, "step": 284890 }, { "epoch": 0.575516025161908, "grad_norm": 600.5504150390625, "learning_rate": 4.654699871427972e-06, "loss": 34.4467, "step": 284900 }, { "epoch": 0.5755362257945918, "grad_norm": 317.0341491699219, "learning_rate": 4.654351639811828e-06, "loss": 17.3721, "step": 284910 }, { "epoch": 0.5755564264272757, "grad_norm": 424.71966552734375, "learning_rate": 4.654003409880333e-06, "loss": 17.8341, "step": 284920 }, { "epoch": 0.5755766270599595, "grad_norm": 28.263832092285156, "learning_rate": 4.653655181635184e-06, "loss": 18.1012, "step": 284930 }, { "epoch": 0.5755968276926433, "grad_norm": 538.7200317382812, "learning_rate": 4.653306955078077e-06, "loss": 19.7967, "step": 284940 }, { "epoch": 0.5756170283253271, "grad_norm": 451.8140869140625, "learning_rate": 4.652958730210711e-06, "loss": 11.1338, "step": 284950 }, { "epoch": 0.575637228958011, "grad_norm": 193.7926025390625, "learning_rate": 4.65261050703478e-06, "loss": 11.4366, "step": 284960 }, { "epoch": 0.5756574295906948, "grad_norm": 140.07032775878906, "learning_rate": 4.652262285551983e-06, "loss": 22.7315, "step": 284970 }, { "epoch": 0.5756776302233786, "grad_norm": 301.08514404296875, "learning_rate": 4.651914065764021e-06, "loss": 14.3331, "step": 284980 }, { "epoch": 0.5756978308560624, "grad_norm": 198.33978271484375, "learning_rate": 4.651565847672584e-06, "loss": 12.0796, "step": 284990 }, { "epoch": 0.5757180314887462, "grad_norm": 460.6780700683594, "learning_rate": 4.651217631279374e-06, "loss": 20.4087, "step": 285000 }, { "epoch": 0.57573823212143, "grad_norm": 490.967041015625, "learning_rate": 4.650869416586088e-06, "loss": 13.374, "step": 285010 }, { "epoch": 0.5757584327541139, "grad_norm": 320.7071228027344, "learning_rate": 4.650521203594421e-06, "loss": 9.587, "step": 285020 }, { "epoch": 0.5757786333867977, "grad_norm": 60.833412170410156, "learning_rate": 4.6501729923060705e-06, "loss": 19.382, "step": 285030 }, { "epoch": 0.5757988340194815, "grad_norm": 319.47808837890625, "learning_rate": 4.649824782722737e-06, "loss": 12.8303, "step": 285040 }, { "epoch": 0.5758190346521653, "grad_norm": 768.2241821289062, "learning_rate": 4.649476574846113e-06, "loss": 17.567, "step": 285050 }, { "epoch": 0.5758392352848491, "grad_norm": 686.4849853515625, "learning_rate": 4.649128368677896e-06, "loss": 28.091, "step": 285060 }, { "epoch": 0.575859435917533, "grad_norm": 17.46307945251465, "learning_rate": 4.648780164219787e-06, "loss": 31.0867, "step": 285070 }, { "epoch": 0.5758796365502168, "grad_norm": 828.7020263671875, "learning_rate": 4.648431961473482e-06, "loss": 17.7138, "step": 285080 }, { "epoch": 0.5758998371829006, "grad_norm": 192.98765563964844, "learning_rate": 4.648083760440676e-06, "loss": 9.0035, "step": 285090 }, { "epoch": 0.5759200378155844, "grad_norm": 580.1175537109375, "learning_rate": 4.6477355611230655e-06, "loss": 20.5532, "step": 285100 }, { "epoch": 0.5759402384482682, "grad_norm": 309.5498962402344, "learning_rate": 4.6473873635223514e-06, "loss": 27.1804, "step": 285110 }, { "epoch": 0.5759604390809521, "grad_norm": 6.215914726257324, "learning_rate": 4.647039167640227e-06, "loss": 22.8904, "step": 285120 }, { "epoch": 0.5759806397136358, "grad_norm": 251.83738708496094, "learning_rate": 4.646690973478391e-06, "loss": 18.3718, "step": 285130 }, { "epoch": 0.5760008403463196, "grad_norm": 225.23223876953125, "learning_rate": 4.6463427810385425e-06, "loss": 13.746, "step": 285140 }, { "epoch": 0.5760210409790034, "grad_norm": 242.44158935546875, "learning_rate": 4.645994590322373e-06, "loss": 31.7429, "step": 285150 }, { "epoch": 0.5760412416116872, "grad_norm": 86.13539123535156, "learning_rate": 4.645646401331585e-06, "loss": 10.4489, "step": 285160 }, { "epoch": 0.5760614422443711, "grad_norm": 189.4027557373047, "learning_rate": 4.6452982140678735e-06, "loss": 16.5795, "step": 285170 }, { "epoch": 0.5760816428770549, "grad_norm": 614.1434326171875, "learning_rate": 4.644950028532935e-06, "loss": 30.6802, "step": 285180 }, { "epoch": 0.5761018435097387, "grad_norm": 612.4061889648438, "learning_rate": 4.644601844728467e-06, "loss": 22.207, "step": 285190 }, { "epoch": 0.5761220441424225, "grad_norm": 458.0311584472656, "learning_rate": 4.644253662656167e-06, "loss": 24.1358, "step": 285200 }, { "epoch": 0.5761422447751063, "grad_norm": 276.1941223144531, "learning_rate": 4.643905482317731e-06, "loss": 12.9366, "step": 285210 }, { "epoch": 0.5761624454077902, "grad_norm": 282.3110656738281, "learning_rate": 4.643557303714855e-06, "loss": 17.2801, "step": 285220 }, { "epoch": 0.576182646040474, "grad_norm": 489.2680358886719, "learning_rate": 4.643209126849239e-06, "loss": 19.5753, "step": 285230 }, { "epoch": 0.5762028466731578, "grad_norm": 351.1014709472656, "learning_rate": 4.64286095172258e-06, "loss": 22.7033, "step": 285240 }, { "epoch": 0.5762230473058416, "grad_norm": 1463.812255859375, "learning_rate": 4.642512778336571e-06, "loss": 31.2228, "step": 285250 }, { "epoch": 0.5762432479385254, "grad_norm": 221.68878173828125, "learning_rate": 4.642164606692912e-06, "loss": 17.4044, "step": 285260 }, { "epoch": 0.5762634485712093, "grad_norm": 183.8222198486328, "learning_rate": 4.641816436793301e-06, "loss": 13.5053, "step": 285270 }, { "epoch": 0.5762836492038931, "grad_norm": 571.3402709960938, "learning_rate": 4.64146826863943e-06, "loss": 32.581, "step": 285280 }, { "epoch": 0.5763038498365769, "grad_norm": 667.9365844726562, "learning_rate": 4.641120102233001e-06, "loss": 18.8078, "step": 285290 }, { "epoch": 0.5763240504692607, "grad_norm": 479.35076904296875, "learning_rate": 4.6407719375757095e-06, "loss": 16.2684, "step": 285300 }, { "epoch": 0.5763442511019445, "grad_norm": 166.7450408935547, "learning_rate": 4.6404237746692514e-06, "loss": 20.5289, "step": 285310 }, { "epoch": 0.5763644517346284, "grad_norm": 466.5275573730469, "learning_rate": 4.640075613515324e-06, "loss": 12.6233, "step": 285320 }, { "epoch": 0.5763846523673122, "grad_norm": 399.5809631347656, "learning_rate": 4.639727454115626e-06, "loss": 13.901, "step": 285330 }, { "epoch": 0.576404852999996, "grad_norm": 781.8657836914062, "learning_rate": 4.639379296471851e-06, "loss": 27.9794, "step": 285340 }, { "epoch": 0.5764250536326798, "grad_norm": 514.377197265625, "learning_rate": 4.639031140585697e-06, "loss": 12.9701, "step": 285350 }, { "epoch": 0.5764452542653636, "grad_norm": 309.2298889160156, "learning_rate": 4.638682986458862e-06, "loss": 23.8775, "step": 285360 }, { "epoch": 0.5764654548980475, "grad_norm": 578.560302734375, "learning_rate": 4.638334834093044e-06, "loss": 24.1145, "step": 285370 }, { "epoch": 0.5764856555307313, "grad_norm": 559.6069946289062, "learning_rate": 4.637986683489937e-06, "loss": 18.4905, "step": 285380 }, { "epoch": 0.576505856163415, "grad_norm": 204.30908203125, "learning_rate": 4.637638534651238e-06, "loss": 16.427, "step": 285390 }, { "epoch": 0.5765260567960988, "grad_norm": 588.1658325195312, "learning_rate": 4.637290387578647e-06, "loss": 32.3938, "step": 285400 }, { "epoch": 0.5765462574287826, "grad_norm": 392.609619140625, "learning_rate": 4.636942242273857e-06, "loss": 22.5501, "step": 285410 }, { "epoch": 0.5765664580614664, "grad_norm": 246.3218536376953, "learning_rate": 4.6365940987385655e-06, "loss": 26.3835, "step": 285420 }, { "epoch": 0.5765866586941503, "grad_norm": 23.963226318359375, "learning_rate": 4.636245956974474e-06, "loss": 11.035, "step": 285430 }, { "epoch": 0.5766068593268341, "grad_norm": 346.2037658691406, "learning_rate": 4.635897816983272e-06, "loss": 22.7347, "step": 285440 }, { "epoch": 0.5766270599595179, "grad_norm": 189.39111328125, "learning_rate": 4.635549678766661e-06, "loss": 11.7879, "step": 285450 }, { "epoch": 0.5766472605922017, "grad_norm": 18.474414825439453, "learning_rate": 4.635201542326337e-06, "loss": 18.6868, "step": 285460 }, { "epoch": 0.5766674612248855, "grad_norm": 460.7426452636719, "learning_rate": 4.634853407663996e-06, "loss": 22.0784, "step": 285470 }, { "epoch": 0.5766876618575694, "grad_norm": 597.2728881835938, "learning_rate": 4.634505274781336e-06, "loss": 11.4606, "step": 285480 }, { "epoch": 0.5767078624902532, "grad_norm": 94.71599578857422, "learning_rate": 4.634157143680053e-06, "loss": 8.5866, "step": 285490 }, { "epoch": 0.576728063122937, "grad_norm": 1044.5379638671875, "learning_rate": 4.6338090143618435e-06, "loss": 24.0246, "step": 285500 }, { "epoch": 0.5767482637556208, "grad_norm": 116.58110809326172, "learning_rate": 4.633460886828402e-06, "loss": 9.3799, "step": 285510 }, { "epoch": 0.5767684643883046, "grad_norm": 587.9881591796875, "learning_rate": 4.63311276108143e-06, "loss": 22.7472, "step": 285520 }, { "epoch": 0.5767886650209885, "grad_norm": 17.695222854614258, "learning_rate": 4.632764637122622e-06, "loss": 18.6941, "step": 285530 }, { "epoch": 0.5768088656536723, "grad_norm": 28.241188049316406, "learning_rate": 4.632416514953675e-06, "loss": 22.2359, "step": 285540 }, { "epoch": 0.5768290662863561, "grad_norm": 439.5326232910156, "learning_rate": 4.6320683945762835e-06, "loss": 20.7384, "step": 285550 }, { "epoch": 0.5768492669190399, "grad_norm": 828.4425048828125, "learning_rate": 4.631720275992148e-06, "loss": 25.1309, "step": 285560 }, { "epoch": 0.5768694675517237, "grad_norm": 534.795654296875, "learning_rate": 4.631372159202962e-06, "loss": 24.1091, "step": 285570 }, { "epoch": 0.5768896681844076, "grad_norm": 459.3776550292969, "learning_rate": 4.631024044210422e-06, "loss": 26.7064, "step": 285580 }, { "epoch": 0.5769098688170914, "grad_norm": 611.1201782226562, "learning_rate": 4.6306759310162304e-06, "loss": 12.0516, "step": 285590 }, { "epoch": 0.5769300694497752, "grad_norm": 586.6641235351562, "learning_rate": 4.630327819622076e-06, "loss": 40.366, "step": 285600 }, { "epoch": 0.576950270082459, "grad_norm": 601.6295166015625, "learning_rate": 4.62997971002966e-06, "loss": 24.1361, "step": 285610 }, { "epoch": 0.5769704707151428, "grad_norm": 614.7229614257812, "learning_rate": 4.629631602240678e-06, "loss": 29.7239, "step": 285620 }, { "epoch": 0.5769906713478267, "grad_norm": 255.7998504638672, "learning_rate": 4.6292834962568265e-06, "loss": 27.0257, "step": 285630 }, { "epoch": 0.5770108719805104, "grad_norm": 165.79042053222656, "learning_rate": 4.628935392079802e-06, "loss": 13.3021, "step": 285640 }, { "epoch": 0.5770310726131942, "grad_norm": 214.94522094726562, "learning_rate": 4.628587289711303e-06, "loss": 18.4219, "step": 285650 }, { "epoch": 0.577051273245878, "grad_norm": 175.484375, "learning_rate": 4.628239189153023e-06, "loss": 13.9871, "step": 285660 }, { "epoch": 0.5770714738785618, "grad_norm": 3.3779449462890625, "learning_rate": 4.627891090406659e-06, "loss": 4.6265, "step": 285670 }, { "epoch": 0.5770916745112457, "grad_norm": 274.63739013671875, "learning_rate": 4.627542993473909e-06, "loss": 23.1313, "step": 285680 }, { "epoch": 0.5771118751439295, "grad_norm": 465.6402587890625, "learning_rate": 4.6271948983564715e-06, "loss": 35.0637, "step": 285690 }, { "epoch": 0.5771320757766133, "grad_norm": 532.4801635742188, "learning_rate": 4.6268468050560394e-06, "loss": 16.0424, "step": 285700 }, { "epoch": 0.5771522764092971, "grad_norm": 193.82769775390625, "learning_rate": 4.626498713574311e-06, "loss": 18.8277, "step": 285710 }, { "epoch": 0.5771724770419809, "grad_norm": 428.383056640625, "learning_rate": 4.626150623912983e-06, "loss": 24.673, "step": 285720 }, { "epoch": 0.5771926776746648, "grad_norm": 482.5599060058594, "learning_rate": 4.625802536073751e-06, "loss": 17.7467, "step": 285730 }, { "epoch": 0.5772128783073486, "grad_norm": 221.01895141601562, "learning_rate": 4.625454450058311e-06, "loss": 24.9587, "step": 285740 }, { "epoch": 0.5772330789400324, "grad_norm": 355.8588562011719, "learning_rate": 4.625106365868363e-06, "loss": 23.7542, "step": 285750 }, { "epoch": 0.5772532795727162, "grad_norm": 243.97454833984375, "learning_rate": 4.624758283505599e-06, "loss": 22.4432, "step": 285760 }, { "epoch": 0.5772734802054, "grad_norm": 331.9516296386719, "learning_rate": 4.624410202971718e-06, "loss": 14.9428, "step": 285770 }, { "epoch": 0.5772936808380839, "grad_norm": 206.70016479492188, "learning_rate": 4.624062124268418e-06, "loss": 23.7267, "step": 285780 }, { "epoch": 0.5773138814707677, "grad_norm": 22.560590744018555, "learning_rate": 4.6237140473973916e-06, "loss": 21.4815, "step": 285790 }, { "epoch": 0.5773340821034515, "grad_norm": 655.5744018554688, "learning_rate": 4.6233659723603374e-06, "loss": 31.4316, "step": 285800 }, { "epoch": 0.5773542827361353, "grad_norm": 320.5361328125, "learning_rate": 4.623017899158953e-06, "loss": 17.9883, "step": 285810 }, { "epoch": 0.5773744833688191, "grad_norm": 694.0796508789062, "learning_rate": 4.6226698277949325e-06, "loss": 27.7262, "step": 285820 }, { "epoch": 0.577394684001503, "grad_norm": 558.8837280273438, "learning_rate": 4.622321758269972e-06, "loss": 14.9198, "step": 285830 }, { "epoch": 0.5774148846341868, "grad_norm": 376.5775451660156, "learning_rate": 4.621973690585772e-06, "loss": 19.4645, "step": 285840 }, { "epoch": 0.5774350852668706, "grad_norm": 444.7864685058594, "learning_rate": 4.621625624744026e-06, "loss": 21.8486, "step": 285850 }, { "epoch": 0.5774552858995544, "grad_norm": 542.1842041015625, "learning_rate": 4.62127756074643e-06, "loss": 20.4572, "step": 285860 }, { "epoch": 0.5774754865322382, "grad_norm": 693.6360473632812, "learning_rate": 4.620929498594682e-06, "loss": 18.3903, "step": 285870 }, { "epoch": 0.5774956871649221, "grad_norm": 454.9926452636719, "learning_rate": 4.620581438290478e-06, "loss": 18.1475, "step": 285880 }, { "epoch": 0.5775158877976059, "grad_norm": 4.223319053649902, "learning_rate": 4.620233379835513e-06, "loss": 14.8415, "step": 285890 }, { "epoch": 0.5775360884302896, "grad_norm": 841.5548706054688, "learning_rate": 4.619885323231484e-06, "loss": 26.8061, "step": 285900 }, { "epoch": 0.5775562890629734, "grad_norm": 322.03594970703125, "learning_rate": 4.6195372684800895e-06, "loss": 13.2405, "step": 285910 }, { "epoch": 0.5775764896956572, "grad_norm": 643.8477783203125, "learning_rate": 4.619189215583023e-06, "loss": 31.1178, "step": 285920 }, { "epoch": 0.577596690328341, "grad_norm": 263.5108642578125, "learning_rate": 4.618841164541982e-06, "loss": 19.3314, "step": 285930 }, { "epoch": 0.5776168909610249, "grad_norm": 479.68951416015625, "learning_rate": 4.618493115358665e-06, "loss": 13.9931, "step": 285940 }, { "epoch": 0.5776370915937087, "grad_norm": 262.92724609375, "learning_rate": 4.618145068034764e-06, "loss": 10.7394, "step": 285950 }, { "epoch": 0.5776572922263925, "grad_norm": 26.95539665222168, "learning_rate": 4.617797022571977e-06, "loss": 14.5507, "step": 285960 }, { "epoch": 0.5776774928590763, "grad_norm": 127.87303924560547, "learning_rate": 4.617448978972002e-06, "loss": 13.9312, "step": 285970 }, { "epoch": 0.5776976934917601, "grad_norm": 687.3388061523438, "learning_rate": 4.617100937236535e-06, "loss": 18.8998, "step": 285980 }, { "epoch": 0.577717894124444, "grad_norm": 141.27928161621094, "learning_rate": 4.616752897367271e-06, "loss": 25.3384, "step": 285990 }, { "epoch": 0.5777380947571278, "grad_norm": 382.4380798339844, "learning_rate": 4.6164048593659076e-06, "loss": 16.0115, "step": 286000 }, { "epoch": 0.5777582953898116, "grad_norm": 662.46435546875, "learning_rate": 4.6160568232341406e-06, "loss": 16.4892, "step": 286010 }, { "epoch": 0.5777784960224954, "grad_norm": 140.6461639404297, "learning_rate": 4.615708788973664e-06, "loss": 17.4467, "step": 286020 }, { "epoch": 0.5777986966551792, "grad_norm": 255.77389526367188, "learning_rate": 4.615360756586177e-06, "loss": 33.3595, "step": 286030 }, { "epoch": 0.5778188972878631, "grad_norm": 159.06700134277344, "learning_rate": 4.615012726073376e-06, "loss": 15.472, "step": 286040 }, { "epoch": 0.5778390979205469, "grad_norm": 284.42010498046875, "learning_rate": 4.614664697436956e-06, "loss": 14.2701, "step": 286050 }, { "epoch": 0.5778592985532307, "grad_norm": 103.40221405029297, "learning_rate": 4.614316670678612e-06, "loss": 20.7587, "step": 286060 }, { "epoch": 0.5778794991859145, "grad_norm": 351.7590637207031, "learning_rate": 4.6139686458000445e-06, "loss": 15.2206, "step": 286070 }, { "epoch": 0.5778996998185983, "grad_norm": 218.58926391601562, "learning_rate": 4.613620622802945e-06, "loss": 11.3273, "step": 286080 }, { "epoch": 0.5779199004512822, "grad_norm": 235.08702087402344, "learning_rate": 4.6132726016890126e-06, "loss": 10.5108, "step": 286090 }, { "epoch": 0.577940101083966, "grad_norm": 94.96534729003906, "learning_rate": 4.612924582459943e-06, "loss": 30.6132, "step": 286100 }, { "epoch": 0.5779603017166498, "grad_norm": 678.427978515625, "learning_rate": 4.612576565117431e-06, "loss": 18.7635, "step": 286110 }, { "epoch": 0.5779805023493336, "grad_norm": 223.06146240234375, "learning_rate": 4.612228549663173e-06, "loss": 24.9446, "step": 286120 }, { "epoch": 0.5780007029820174, "grad_norm": 698.78271484375, "learning_rate": 4.611880536098867e-06, "loss": 21.3133, "step": 286130 }, { "epoch": 0.5780209036147013, "grad_norm": 59.89653778076172, "learning_rate": 4.61153252442621e-06, "loss": 11.58, "step": 286140 }, { "epoch": 0.578041104247385, "grad_norm": 148.0355682373047, "learning_rate": 4.611184514646894e-06, "loss": 21.3908, "step": 286150 }, { "epoch": 0.5780613048800688, "grad_norm": 400.5846862792969, "learning_rate": 4.610836506762618e-06, "loss": 13.8255, "step": 286160 }, { "epoch": 0.5780815055127526, "grad_norm": 523.7686767578125, "learning_rate": 4.610488500775078e-06, "loss": 17.843, "step": 286170 }, { "epoch": 0.5781017061454364, "grad_norm": 54.488616943359375, "learning_rate": 4.61014049668597e-06, "loss": 17.2355, "step": 286180 }, { "epoch": 0.5781219067781203, "grad_norm": 592.7142944335938, "learning_rate": 4.6097924944969885e-06, "loss": 38.8723, "step": 286190 }, { "epoch": 0.5781421074108041, "grad_norm": 207.0294952392578, "learning_rate": 4.609444494209834e-06, "loss": 22.1777, "step": 286200 }, { "epoch": 0.5781623080434879, "grad_norm": 550.2642211914062, "learning_rate": 4.609096495826196e-06, "loss": 16.6159, "step": 286210 }, { "epoch": 0.5781825086761717, "grad_norm": 147.8919219970703, "learning_rate": 4.608748499347777e-06, "loss": 13.4256, "step": 286220 }, { "epoch": 0.5782027093088555, "grad_norm": 397.0547180175781, "learning_rate": 4.60840050477627e-06, "loss": 9.2584, "step": 286230 }, { "epoch": 0.5782229099415394, "grad_norm": 258.3592224121094, "learning_rate": 4.608052512113371e-06, "loss": 20.8878, "step": 286240 }, { "epoch": 0.5782431105742232, "grad_norm": 446.7528991699219, "learning_rate": 4.6077045213607765e-06, "loss": 11.6692, "step": 286250 }, { "epoch": 0.578263311206907, "grad_norm": 441.7554626464844, "learning_rate": 4.607356532520183e-06, "loss": 22.9813, "step": 286260 }, { "epoch": 0.5782835118395908, "grad_norm": 88.1390380859375, "learning_rate": 4.607008545593286e-06, "loss": 20.0238, "step": 286270 }, { "epoch": 0.5783037124722746, "grad_norm": 168.27078247070312, "learning_rate": 4.606660560581779e-06, "loss": 16.4193, "step": 286280 }, { "epoch": 0.5783239131049585, "grad_norm": 100.44886779785156, "learning_rate": 4.606312577487364e-06, "loss": 21.3463, "step": 286290 }, { "epoch": 0.5783441137376423, "grad_norm": 1153.3206787109375, "learning_rate": 4.605964596311733e-06, "loss": 20.3138, "step": 286300 }, { "epoch": 0.5783643143703261, "grad_norm": 283.0211486816406, "learning_rate": 4.605616617056583e-06, "loss": 14.5252, "step": 286310 }, { "epoch": 0.5783845150030099, "grad_norm": 268.0848693847656, "learning_rate": 4.6052686397236084e-06, "loss": 13.064, "step": 286320 }, { "epoch": 0.5784047156356937, "grad_norm": 270.8878479003906, "learning_rate": 4.6049206643145086e-06, "loss": 11.1924, "step": 286330 }, { "epoch": 0.5784249162683776, "grad_norm": 821.0260620117188, "learning_rate": 4.604572690830976e-06, "loss": 17.9596, "step": 286340 }, { "epoch": 0.5784451169010614, "grad_norm": 460.11639404296875, "learning_rate": 4.604224719274708e-06, "loss": 23.2671, "step": 286350 }, { "epoch": 0.5784653175337452, "grad_norm": 198.3529510498047, "learning_rate": 4.603876749647404e-06, "loss": 71.356, "step": 286360 }, { "epoch": 0.578485518166429, "grad_norm": 3.7742393016815186, "learning_rate": 4.6035287819507515e-06, "loss": 26.5519, "step": 286370 }, { "epoch": 0.5785057187991128, "grad_norm": 253.92433166503906, "learning_rate": 4.603180816186454e-06, "loss": 14.5308, "step": 286380 }, { "epoch": 0.5785259194317967, "grad_norm": 309.96221923828125, "learning_rate": 4.6028328523562065e-06, "loss": 16.8102, "step": 286390 }, { "epoch": 0.5785461200644805, "grad_norm": 395.74951171875, "learning_rate": 4.602484890461702e-06, "loss": 15.503, "step": 286400 }, { "epoch": 0.5785663206971642, "grad_norm": 298.9871826171875, "learning_rate": 4.602136930504638e-06, "loss": 25.5999, "step": 286410 }, { "epoch": 0.578586521329848, "grad_norm": 534.5841674804688, "learning_rate": 4.601788972486709e-06, "loss": 21.1018, "step": 286420 }, { "epoch": 0.5786067219625318, "grad_norm": 840.9351806640625, "learning_rate": 4.601441016409616e-06, "loss": 10.8312, "step": 286430 }, { "epoch": 0.5786269225952156, "grad_norm": 394.81683349609375, "learning_rate": 4.601093062275048e-06, "loss": 10.7939, "step": 286440 }, { "epoch": 0.5786471232278995, "grad_norm": 309.9267578125, "learning_rate": 4.600745110084704e-06, "loss": 23.2984, "step": 286450 }, { "epoch": 0.5786673238605833, "grad_norm": 318.5095520019531, "learning_rate": 4.6003971598402825e-06, "loss": 25.5717, "step": 286460 }, { "epoch": 0.5786875244932671, "grad_norm": 342.0323486328125, "learning_rate": 4.600049211543475e-06, "loss": 31.5346, "step": 286470 }, { "epoch": 0.5787077251259509, "grad_norm": 1021.0853881835938, "learning_rate": 4.599701265195979e-06, "loss": 25.0337, "step": 286480 }, { "epoch": 0.5787279257586347, "grad_norm": 0.24047352373600006, "learning_rate": 4.599353320799492e-06, "loss": 20.3372, "step": 286490 }, { "epoch": 0.5787481263913186, "grad_norm": 63.22924041748047, "learning_rate": 4.5990053783557066e-06, "loss": 24.6879, "step": 286500 }, { "epoch": 0.5787683270240024, "grad_norm": 89.17341613769531, "learning_rate": 4.598657437866319e-06, "loss": 17.7072, "step": 286510 }, { "epoch": 0.5787885276566862, "grad_norm": 154.88485717773438, "learning_rate": 4.59830949933303e-06, "loss": 15.3738, "step": 286520 }, { "epoch": 0.57880872828937, "grad_norm": 178.2100372314453, "learning_rate": 4.5979615627575295e-06, "loss": 21.1074, "step": 286530 }, { "epoch": 0.5788289289220538, "grad_norm": 273.7059326171875, "learning_rate": 4.597613628141516e-06, "loss": 23.362, "step": 286540 }, { "epoch": 0.5788491295547377, "grad_norm": 335.8993835449219, "learning_rate": 4.597265695486685e-06, "loss": 25.632, "step": 286550 }, { "epoch": 0.5788693301874215, "grad_norm": 106.1709976196289, "learning_rate": 4.5969177647947325e-06, "loss": 19.213, "step": 286560 }, { "epoch": 0.5788895308201053, "grad_norm": 277.79095458984375, "learning_rate": 4.596569836067353e-06, "loss": 16.6344, "step": 286570 }, { "epoch": 0.5789097314527891, "grad_norm": 273.9773254394531, "learning_rate": 4.596221909306243e-06, "loss": 17.7803, "step": 286580 }, { "epoch": 0.5789299320854729, "grad_norm": 536.6856079101562, "learning_rate": 4.5958739845131e-06, "loss": 19.9283, "step": 286590 }, { "epoch": 0.5789501327181568, "grad_norm": 151.8507537841797, "learning_rate": 4.595526061689617e-06, "loss": 20.9777, "step": 286600 }, { "epoch": 0.5789703333508406, "grad_norm": 438.26495361328125, "learning_rate": 4.5951781408374915e-06, "loss": 31.4876, "step": 286610 }, { "epoch": 0.5789905339835244, "grad_norm": 323.227294921875, "learning_rate": 4.594830221958419e-06, "loss": 26.477, "step": 286620 }, { "epoch": 0.5790107346162082, "grad_norm": 35.54415512084961, "learning_rate": 4.594482305054094e-06, "loss": 15.607, "step": 286630 }, { "epoch": 0.579030935248892, "grad_norm": 283.94781494140625, "learning_rate": 4.594134390126213e-06, "loss": 20.376, "step": 286640 }, { "epoch": 0.5790511358815759, "grad_norm": 536.4644775390625, "learning_rate": 4.593786477176473e-06, "loss": 15.0391, "step": 286650 }, { "epoch": 0.5790713365142596, "grad_norm": 164.15357971191406, "learning_rate": 4.593438566206567e-06, "loss": 20.2518, "step": 286660 }, { "epoch": 0.5790915371469434, "grad_norm": 388.2125244140625, "learning_rate": 4.593090657218192e-06, "loss": 20.0896, "step": 286670 }, { "epoch": 0.5791117377796272, "grad_norm": 200.10169982910156, "learning_rate": 4.592742750213045e-06, "loss": 30.201, "step": 286680 }, { "epoch": 0.579131938412311, "grad_norm": 227.21434020996094, "learning_rate": 4.59239484519282e-06, "loss": 14.6815, "step": 286690 }, { "epoch": 0.5791521390449949, "grad_norm": 13.020706176757812, "learning_rate": 4.592046942159213e-06, "loss": 15.0803, "step": 286700 }, { "epoch": 0.5791723396776787, "grad_norm": 376.193115234375, "learning_rate": 4.591699041113921e-06, "loss": 19.5756, "step": 286710 }, { "epoch": 0.5791925403103625, "grad_norm": 580.8711547851562, "learning_rate": 4.5913511420586365e-06, "loss": 33.743, "step": 286720 }, { "epoch": 0.5792127409430463, "grad_norm": 304.92724609375, "learning_rate": 4.591003244995056e-06, "loss": 20.8417, "step": 286730 }, { "epoch": 0.5792329415757301, "grad_norm": 239.2734375, "learning_rate": 4.5906553499248775e-06, "loss": 25.4103, "step": 286740 }, { "epoch": 0.579253142208414, "grad_norm": 142.53900146484375, "learning_rate": 4.590307456849797e-06, "loss": 12.3575, "step": 286750 }, { "epoch": 0.5792733428410978, "grad_norm": 168.9327392578125, "learning_rate": 4.589959565771505e-06, "loss": 15.8229, "step": 286760 }, { "epoch": 0.5792935434737816, "grad_norm": 490.1933898925781, "learning_rate": 4.589611676691702e-06, "loss": 8.6005, "step": 286770 }, { "epoch": 0.5793137441064654, "grad_norm": 188.1273651123047, "learning_rate": 4.589263789612083e-06, "loss": 32.1084, "step": 286780 }, { "epoch": 0.5793339447391492, "grad_norm": 163.9735870361328, "learning_rate": 4.588915904534341e-06, "loss": 8.0317, "step": 286790 }, { "epoch": 0.579354145371833, "grad_norm": 319.4701843261719, "learning_rate": 4.588568021460172e-06, "loss": 18.7378, "step": 286800 }, { "epoch": 0.5793743460045169, "grad_norm": 392.4018859863281, "learning_rate": 4.588220140391276e-06, "loss": 33.772, "step": 286810 }, { "epoch": 0.5793945466372007, "grad_norm": 518.6945190429688, "learning_rate": 4.5878722613293415e-06, "loss": 21.3204, "step": 286820 }, { "epoch": 0.5794147472698845, "grad_norm": 408.5188293457031, "learning_rate": 4.5875243842760684e-06, "loss": 14.1288, "step": 286830 }, { "epoch": 0.5794349479025683, "grad_norm": 463.9983215332031, "learning_rate": 4.5871765092331536e-06, "loss": 26.961, "step": 286840 }, { "epoch": 0.5794551485352522, "grad_norm": 567.8639526367188, "learning_rate": 4.586828636202288e-06, "loss": 18.6884, "step": 286850 }, { "epoch": 0.579475349167936, "grad_norm": 175.72988891601562, "learning_rate": 4.58648076518517e-06, "loss": 21.585, "step": 286860 }, { "epoch": 0.5794955498006198, "grad_norm": 314.5470275878906, "learning_rate": 4.586132896183494e-06, "loss": 13.5188, "step": 286870 }, { "epoch": 0.5795157504333036, "grad_norm": 173.61602783203125, "learning_rate": 4.5857850291989596e-06, "loss": 21.5312, "step": 286880 }, { "epoch": 0.5795359510659874, "grad_norm": 800.304931640625, "learning_rate": 4.585437164233255e-06, "loss": 217.2249, "step": 286890 }, { "epoch": 0.5795561516986713, "grad_norm": 161.47119140625, "learning_rate": 4.5850893012880806e-06, "loss": 11.6351, "step": 286900 }, { "epoch": 0.5795763523313551, "grad_norm": 493.5769348144531, "learning_rate": 4.584741440365131e-06, "loss": 22.1845, "step": 286910 }, { "epoch": 0.5795965529640388, "grad_norm": 124.64498901367188, "learning_rate": 4.5843935814661e-06, "loss": 23.2055, "step": 286920 }, { "epoch": 0.5796167535967226, "grad_norm": 223.63775634765625, "learning_rate": 4.584045724592686e-06, "loss": 10.2217, "step": 286930 }, { "epoch": 0.5796369542294064, "grad_norm": 1177.7420654296875, "learning_rate": 4.583697869746582e-06, "loss": 30.2963, "step": 286940 }, { "epoch": 0.5796571548620902, "grad_norm": 45.36520004272461, "learning_rate": 4.583350016929484e-06, "loss": 16.0916, "step": 286950 }, { "epoch": 0.5796773554947741, "grad_norm": 557.9664306640625, "learning_rate": 4.583002166143086e-06, "loss": 12.6396, "step": 286960 }, { "epoch": 0.5796975561274579, "grad_norm": 278.6199951171875, "learning_rate": 4.582654317389088e-06, "loss": 7.6749, "step": 286970 }, { "epoch": 0.5797177567601417, "grad_norm": 443.8434143066406, "learning_rate": 4.58230647066918e-06, "loss": 19.511, "step": 286980 }, { "epoch": 0.5797379573928255, "grad_norm": 422.25677490234375, "learning_rate": 4.581958625985059e-06, "loss": 17.3441, "step": 286990 }, { "epoch": 0.5797581580255093, "grad_norm": 204.35519409179688, "learning_rate": 4.581610783338424e-06, "loss": 22.0155, "step": 287000 }, { "epoch": 0.5797783586581932, "grad_norm": 589.8109741210938, "learning_rate": 4.581262942730965e-06, "loss": 13.1869, "step": 287010 }, { "epoch": 0.579798559290877, "grad_norm": 406.0547180175781, "learning_rate": 4.58091510416438e-06, "loss": 15.0325, "step": 287020 }, { "epoch": 0.5798187599235608, "grad_norm": 63.33355712890625, "learning_rate": 4.580567267640363e-06, "loss": 18.0086, "step": 287030 }, { "epoch": 0.5798389605562446, "grad_norm": 195.82664489746094, "learning_rate": 4.580219433160613e-06, "loss": 50.8753, "step": 287040 }, { "epoch": 0.5798591611889284, "grad_norm": 33.13059997558594, "learning_rate": 4.579871600726819e-06, "loss": 26.4659, "step": 287050 }, { "epoch": 0.5798793618216123, "grad_norm": 666.5596923828125, "learning_rate": 4.579523770340681e-06, "loss": 18.8621, "step": 287060 }, { "epoch": 0.5798995624542961, "grad_norm": 581.1525268554688, "learning_rate": 4.579175942003895e-06, "loss": 21.7768, "step": 287070 }, { "epoch": 0.5799197630869799, "grad_norm": 287.9134521484375, "learning_rate": 4.578828115718153e-06, "loss": 11.986, "step": 287080 }, { "epoch": 0.5799399637196637, "grad_norm": 562.1785278320312, "learning_rate": 4.578480291485153e-06, "loss": 24.1375, "step": 287090 }, { "epoch": 0.5799601643523475, "grad_norm": 201.13356018066406, "learning_rate": 4.578132469306588e-06, "loss": 17.8536, "step": 287100 }, { "epoch": 0.5799803649850314, "grad_norm": 63.46379089355469, "learning_rate": 4.5777846491841536e-06, "loss": 21.7501, "step": 287110 }, { "epoch": 0.5800005656177152, "grad_norm": 523.4032592773438, "learning_rate": 4.577436831119545e-06, "loss": 17.5555, "step": 287120 }, { "epoch": 0.580020766250399, "grad_norm": 2.2471923828125, "learning_rate": 4.577089015114461e-06, "loss": 14.3536, "step": 287130 }, { "epoch": 0.5800409668830828, "grad_norm": 1016.3574829101562, "learning_rate": 4.57674120117059e-06, "loss": 17.8618, "step": 287140 }, { "epoch": 0.5800611675157666, "grad_norm": 332.4660339355469, "learning_rate": 4.576393389289633e-06, "loss": 12.7511, "step": 287150 }, { "epoch": 0.5800813681484505, "grad_norm": 174.55853271484375, "learning_rate": 4.576045579473284e-06, "loss": 22.5814, "step": 287160 }, { "epoch": 0.5801015687811343, "grad_norm": 28.36602020263672, "learning_rate": 4.575697771723236e-06, "loss": 10.2393, "step": 287170 }, { "epoch": 0.580121769413818, "grad_norm": 220.09515380859375, "learning_rate": 4.575349966041187e-06, "loss": 11.1455, "step": 287180 }, { "epoch": 0.5801419700465018, "grad_norm": 174.09661865234375, "learning_rate": 4.5750021624288285e-06, "loss": 6.7785, "step": 287190 }, { "epoch": 0.5801621706791856, "grad_norm": 443.2445373535156, "learning_rate": 4.57465436088786e-06, "loss": 23.8112, "step": 287200 }, { "epoch": 0.5801823713118694, "grad_norm": 245.59463500976562, "learning_rate": 4.574306561419974e-06, "loss": 18.2209, "step": 287210 }, { "epoch": 0.5802025719445533, "grad_norm": 409.2039794921875, "learning_rate": 4.573958764026866e-06, "loss": 10.2005, "step": 287220 }, { "epoch": 0.5802227725772371, "grad_norm": 924.8493041992188, "learning_rate": 4.573610968710233e-06, "loss": 19.8324, "step": 287230 }, { "epoch": 0.5802429732099209, "grad_norm": 490.92413330078125, "learning_rate": 4.573263175471766e-06, "loss": 16.8571, "step": 287240 }, { "epoch": 0.5802631738426047, "grad_norm": 110.32278442382812, "learning_rate": 4.572915384313163e-06, "loss": 9.8044, "step": 287250 }, { "epoch": 0.5802833744752885, "grad_norm": 151.6855926513672, "learning_rate": 4.57256759523612e-06, "loss": 22.8395, "step": 287260 }, { "epoch": 0.5803035751079724, "grad_norm": 229.5697784423828, "learning_rate": 4.572219808242328e-06, "loss": 20.2441, "step": 287270 }, { "epoch": 0.5803237757406562, "grad_norm": 383.2900390625, "learning_rate": 4.571872023333487e-06, "loss": 20.9785, "step": 287280 }, { "epoch": 0.58034397637334, "grad_norm": 570.6531982421875, "learning_rate": 4.57152424051129e-06, "loss": 22.2628, "step": 287290 }, { "epoch": 0.5803641770060238, "grad_norm": 89.79441833496094, "learning_rate": 4.571176459777431e-06, "loss": 37.244, "step": 287300 }, { "epoch": 0.5803843776387076, "grad_norm": 169.4888153076172, "learning_rate": 4.570828681133606e-06, "loss": 13.9649, "step": 287310 }, { "epoch": 0.5804045782713915, "grad_norm": 468.6986389160156, "learning_rate": 4.570480904581511e-06, "loss": 27.3959, "step": 287320 }, { "epoch": 0.5804247789040753, "grad_norm": 386.35137939453125, "learning_rate": 4.5701331301228395e-06, "loss": 23.3206, "step": 287330 }, { "epoch": 0.5804449795367591, "grad_norm": 387.7290344238281, "learning_rate": 4.5697853577592846e-06, "loss": 18.9909, "step": 287340 }, { "epoch": 0.5804651801694429, "grad_norm": 516.852294921875, "learning_rate": 4.569437587492545e-06, "loss": 12.2166, "step": 287350 }, { "epoch": 0.5804853808021267, "grad_norm": 359.33599853515625, "learning_rate": 4.569089819324317e-06, "loss": 9.5905, "step": 287360 }, { "epoch": 0.5805055814348106, "grad_norm": 562.2782592773438, "learning_rate": 4.56874205325629e-06, "loss": 17.7842, "step": 287370 }, { "epoch": 0.5805257820674944, "grad_norm": 362.6568908691406, "learning_rate": 4.568394289290163e-06, "loss": 21.974, "step": 287380 }, { "epoch": 0.5805459827001782, "grad_norm": 225.65406799316406, "learning_rate": 4.5680465274276306e-06, "loss": 17.6372, "step": 287390 }, { "epoch": 0.580566183332862, "grad_norm": 454.0522766113281, "learning_rate": 4.5676987676703865e-06, "loss": 18.4502, "step": 287400 }, { "epoch": 0.5805863839655458, "grad_norm": 207.7526397705078, "learning_rate": 4.567351010020124e-06, "loss": 14.4471, "step": 287410 }, { "epoch": 0.5806065845982297, "grad_norm": 397.4646301269531, "learning_rate": 4.567003254478545e-06, "loss": 19.5973, "step": 287420 }, { "epoch": 0.5806267852309134, "grad_norm": 484.93377685546875, "learning_rate": 4.566655501047335e-06, "loss": 22.8856, "step": 287430 }, { "epoch": 0.5806469858635972, "grad_norm": 415.3368225097656, "learning_rate": 4.566307749728195e-06, "loss": 20.3493, "step": 287440 }, { "epoch": 0.580667186496281, "grad_norm": 691.5158081054688, "learning_rate": 4.56596000052282e-06, "loss": 20.2267, "step": 287450 }, { "epoch": 0.5806873871289648, "grad_norm": 280.9125671386719, "learning_rate": 4.565612253432902e-06, "loss": 15.8731, "step": 287460 }, { "epoch": 0.5807075877616487, "grad_norm": 175.01219177246094, "learning_rate": 4.565264508460137e-06, "loss": 10.2099, "step": 287470 }, { "epoch": 0.5807277883943325, "grad_norm": 394.6832275390625, "learning_rate": 4.564916765606218e-06, "loss": 15.352, "step": 287480 }, { "epoch": 0.5807479890270163, "grad_norm": 269.8906555175781, "learning_rate": 4.564569024872846e-06, "loss": 17.4512, "step": 287490 }, { "epoch": 0.5807681896597001, "grad_norm": 192.99142456054688, "learning_rate": 4.564221286261709e-06, "loss": 7.287, "step": 287500 }, { "epoch": 0.5807883902923839, "grad_norm": 382.4043273925781, "learning_rate": 4.563873549774506e-06, "loss": 20.0747, "step": 287510 }, { "epoch": 0.5808085909250678, "grad_norm": 133.63954162597656, "learning_rate": 4.56352581541293e-06, "loss": 27.828, "step": 287520 }, { "epoch": 0.5808287915577516, "grad_norm": 407.80230712890625, "learning_rate": 4.5631780831786765e-06, "loss": 22.2352, "step": 287530 }, { "epoch": 0.5808489921904354, "grad_norm": 592.959228515625, "learning_rate": 4.5628303530734395e-06, "loss": 22.7711, "step": 287540 }, { "epoch": 0.5808691928231192, "grad_norm": 159.25987243652344, "learning_rate": 4.5624826250989156e-06, "loss": 15.9744, "step": 287550 }, { "epoch": 0.580889393455803, "grad_norm": 18.627595901489258, "learning_rate": 4.562134899256797e-06, "loss": 28.6998, "step": 287560 }, { "epoch": 0.5809095940884869, "grad_norm": 481.6170959472656, "learning_rate": 4.561787175548779e-06, "loss": 16.4343, "step": 287570 }, { "epoch": 0.5809297947211707, "grad_norm": 274.38763427734375, "learning_rate": 4.56143945397656e-06, "loss": 14.786, "step": 287580 }, { "epoch": 0.5809499953538545, "grad_norm": 249.79217529296875, "learning_rate": 4.5610917345418285e-06, "loss": 12.4017, "step": 287590 }, { "epoch": 0.5809701959865383, "grad_norm": 113.56183624267578, "learning_rate": 4.560744017246284e-06, "loss": 17.7107, "step": 287600 }, { "epoch": 0.5809903966192221, "grad_norm": 106.08311462402344, "learning_rate": 4.560396302091622e-06, "loss": 24.6664, "step": 287610 }, { "epoch": 0.581010597251906, "grad_norm": 107.45292663574219, "learning_rate": 4.5600485890795325e-06, "loss": 11.5848, "step": 287620 }, { "epoch": 0.5810307978845898, "grad_norm": 274.9022216796875, "learning_rate": 4.559700878211714e-06, "loss": 14.5965, "step": 287630 }, { "epoch": 0.5810509985172736, "grad_norm": 259.85986328125, "learning_rate": 4.5593531694898576e-06, "loss": 21.7642, "step": 287640 }, { "epoch": 0.5810711991499574, "grad_norm": 212.4558563232422, "learning_rate": 4.559005462915665e-06, "loss": 22.2325, "step": 287650 }, { "epoch": 0.5810913997826412, "grad_norm": 446.8583068847656, "learning_rate": 4.558657758490822e-06, "loss": 14.455, "step": 287660 }, { "epoch": 0.5811116004153251, "grad_norm": 626.045654296875, "learning_rate": 4.5583100562170294e-06, "loss": 12.9183, "step": 287670 }, { "epoch": 0.5811318010480089, "grad_norm": 509.44427490234375, "learning_rate": 4.55796235609598e-06, "loss": 11.8783, "step": 287680 }, { "epoch": 0.5811520016806926, "grad_norm": 395.339599609375, "learning_rate": 4.557614658129369e-06, "loss": 9.2559, "step": 287690 }, { "epoch": 0.5811722023133764, "grad_norm": 326.5661926269531, "learning_rate": 4.557266962318889e-06, "loss": 24.1077, "step": 287700 }, { "epoch": 0.5811924029460602, "grad_norm": 43.446617126464844, "learning_rate": 4.556919268666238e-06, "loss": 19.6863, "step": 287710 }, { "epoch": 0.581212603578744, "grad_norm": 296.4700927734375, "learning_rate": 4.5565715771731075e-06, "loss": 31.4844, "step": 287720 }, { "epoch": 0.5812328042114279, "grad_norm": 282.0885009765625, "learning_rate": 4.556223887841192e-06, "loss": 21.5665, "step": 287730 }, { "epoch": 0.5812530048441117, "grad_norm": 263.99066162109375, "learning_rate": 4.555876200672192e-06, "loss": 18.8875, "step": 287740 }, { "epoch": 0.5812732054767955, "grad_norm": 2.8462512493133545, "learning_rate": 4.555528515667793e-06, "loss": 12.3651, "step": 287750 }, { "epoch": 0.5812934061094793, "grad_norm": 164.52963256835938, "learning_rate": 4.555180832829695e-06, "loss": 19.4629, "step": 287760 }, { "epoch": 0.5813136067421631, "grad_norm": 493.3425598144531, "learning_rate": 4.554833152159594e-06, "loss": 14.9375, "step": 287770 }, { "epoch": 0.581333807374847, "grad_norm": 169.95790100097656, "learning_rate": 4.55448547365918e-06, "loss": 10.8374, "step": 287780 }, { "epoch": 0.5813540080075308, "grad_norm": 167.28880310058594, "learning_rate": 4.5541377973301505e-06, "loss": 19.0744, "step": 287790 }, { "epoch": 0.5813742086402146, "grad_norm": 216.5238494873047, "learning_rate": 4.553790123174198e-06, "loss": 22.8251, "step": 287800 }, { "epoch": 0.5813944092728984, "grad_norm": 540.6886596679688, "learning_rate": 4.553442451193021e-06, "loss": 17.8551, "step": 287810 }, { "epoch": 0.5814146099055822, "grad_norm": 124.38551330566406, "learning_rate": 4.553094781388309e-06, "loss": 18.4553, "step": 287820 }, { "epoch": 0.5814348105382661, "grad_norm": 391.7790222167969, "learning_rate": 4.552747113761759e-06, "loss": 10.7357, "step": 287830 }, { "epoch": 0.5814550111709499, "grad_norm": 499.1982727050781, "learning_rate": 4.552399448315067e-06, "loss": 22.711, "step": 287840 }, { "epoch": 0.5814752118036337, "grad_norm": 557.7574462890625, "learning_rate": 4.552051785049925e-06, "loss": 22.8761, "step": 287850 }, { "epoch": 0.5814954124363175, "grad_norm": 20.266773223876953, "learning_rate": 4.551704123968027e-06, "loss": 12.2276, "step": 287860 }, { "epoch": 0.5815156130690013, "grad_norm": 0.0, "learning_rate": 4.55135646507107e-06, "loss": 23.068, "step": 287870 }, { "epoch": 0.5815358137016852, "grad_norm": 70.69975280761719, "learning_rate": 4.551008808360747e-06, "loss": 26.6899, "step": 287880 }, { "epoch": 0.581556014334369, "grad_norm": 253.72727966308594, "learning_rate": 4.550661153838752e-06, "loss": 19.0773, "step": 287890 }, { "epoch": 0.5815762149670528, "grad_norm": 669.1590576171875, "learning_rate": 4.5503135015067815e-06, "loss": 19.9383, "step": 287900 }, { "epoch": 0.5815964155997366, "grad_norm": 689.8755493164062, "learning_rate": 4.549965851366528e-06, "loss": 18.6397, "step": 287910 }, { "epoch": 0.5816166162324204, "grad_norm": 631.7351684570312, "learning_rate": 4.549618203419684e-06, "loss": 14.6648, "step": 287920 }, { "epoch": 0.5816368168651043, "grad_norm": 458.5159606933594, "learning_rate": 4.549270557667949e-06, "loss": 19.0287, "step": 287930 }, { "epoch": 0.581657017497788, "grad_norm": 1231.04248046875, "learning_rate": 4.548922914113014e-06, "loss": 43.0836, "step": 287940 }, { "epoch": 0.5816772181304718, "grad_norm": 546.4039306640625, "learning_rate": 4.548575272756573e-06, "loss": 13.7789, "step": 287950 }, { "epoch": 0.5816974187631556, "grad_norm": 531.2095947265625, "learning_rate": 4.548227633600322e-06, "loss": 17.3354, "step": 287960 }, { "epoch": 0.5817176193958394, "grad_norm": 574.9794921875, "learning_rate": 4.547879996645956e-06, "loss": 18.906, "step": 287970 }, { "epoch": 0.5817378200285233, "grad_norm": 114.48976135253906, "learning_rate": 4.5475323618951665e-06, "loss": 20.1184, "step": 287980 }, { "epoch": 0.5817580206612071, "grad_norm": 164.88328552246094, "learning_rate": 4.5471847293496495e-06, "loss": 23.6874, "step": 287990 }, { "epoch": 0.5817782212938909, "grad_norm": 20.963428497314453, "learning_rate": 4.546837099011101e-06, "loss": 12.7755, "step": 288000 }, { "epoch": 0.5817984219265747, "grad_norm": 276.94244384765625, "learning_rate": 4.546489470881211e-06, "loss": 16.2343, "step": 288010 }, { "epoch": 0.5818186225592585, "grad_norm": 284.3495788574219, "learning_rate": 4.5461418449616765e-06, "loss": 19.776, "step": 288020 }, { "epoch": 0.5818388231919424, "grad_norm": 25.324033737182617, "learning_rate": 4.5457942212541944e-06, "loss": 27.6506, "step": 288030 }, { "epoch": 0.5818590238246262, "grad_norm": 87.18405151367188, "learning_rate": 4.545446599760453e-06, "loss": 16.6801, "step": 288040 }, { "epoch": 0.58187922445731, "grad_norm": 823.1707763671875, "learning_rate": 4.545098980482151e-06, "loss": 19.9332, "step": 288050 }, { "epoch": 0.5818994250899938, "grad_norm": 916.0725708007812, "learning_rate": 4.544751363420981e-06, "loss": 27.003, "step": 288060 }, { "epoch": 0.5819196257226776, "grad_norm": 371.8467102050781, "learning_rate": 4.544403748578638e-06, "loss": 24.9305, "step": 288070 }, { "epoch": 0.5819398263553615, "grad_norm": 406.9341125488281, "learning_rate": 4.544056135956816e-06, "loss": 19.5344, "step": 288080 }, { "epoch": 0.5819600269880453, "grad_norm": 1053.5054931640625, "learning_rate": 4.543708525557208e-06, "loss": 48.0804, "step": 288090 }, { "epoch": 0.5819802276207291, "grad_norm": 192.28451538085938, "learning_rate": 4.543360917381512e-06, "loss": 12.5592, "step": 288100 }, { "epoch": 0.5820004282534129, "grad_norm": 68.4184799194336, "learning_rate": 4.543013311431417e-06, "loss": 12.5787, "step": 288110 }, { "epoch": 0.5820206288860967, "grad_norm": 327.2054443359375, "learning_rate": 4.54266570770862e-06, "loss": 16.7678, "step": 288120 }, { "epoch": 0.5820408295187806, "grad_norm": 734.5392456054688, "learning_rate": 4.542318106214817e-06, "loss": 23.1994, "step": 288130 }, { "epoch": 0.5820610301514644, "grad_norm": 542.3958129882812, "learning_rate": 4.541970506951698e-06, "loss": 22.6746, "step": 288140 }, { "epoch": 0.5820812307841482, "grad_norm": 24.169677734375, "learning_rate": 4.541622909920959e-06, "loss": 14.9233, "step": 288150 }, { "epoch": 0.582101431416832, "grad_norm": 4.579701900482178, "learning_rate": 4.541275315124296e-06, "loss": 11.0592, "step": 288160 }, { "epoch": 0.5821216320495158, "grad_norm": 453.038818359375, "learning_rate": 4.5409277225634e-06, "loss": 13.6834, "step": 288170 }, { "epoch": 0.5821418326821997, "grad_norm": 152.3386688232422, "learning_rate": 4.540580132239966e-06, "loss": 20.8467, "step": 288180 }, { "epoch": 0.5821620333148835, "grad_norm": 571.427001953125, "learning_rate": 4.540232544155692e-06, "loss": 27.3594, "step": 288190 }, { "epoch": 0.5821822339475672, "grad_norm": 588.6444091796875, "learning_rate": 4.539884958312265e-06, "loss": 22.54, "step": 288200 }, { "epoch": 0.582202434580251, "grad_norm": 333.3024597167969, "learning_rate": 4.539537374711384e-06, "loss": 19.5617, "step": 288210 }, { "epoch": 0.5822226352129348, "grad_norm": 237.90054321289062, "learning_rate": 4.5391897933547436e-06, "loss": 22.8537, "step": 288220 }, { "epoch": 0.5822428358456186, "grad_norm": 604.9053344726562, "learning_rate": 4.538842214244035e-06, "loss": 22.4265, "step": 288230 }, { "epoch": 0.5822630364783025, "grad_norm": 503.3699035644531, "learning_rate": 4.538494637380953e-06, "loss": 22.6053, "step": 288240 }, { "epoch": 0.5822832371109863, "grad_norm": 430.1613464355469, "learning_rate": 4.538147062767191e-06, "loss": 26.2458, "step": 288250 }, { "epoch": 0.5823034377436701, "grad_norm": 5.554739475250244, "learning_rate": 4.5377994904044485e-06, "loss": 21.6245, "step": 288260 }, { "epoch": 0.5823236383763539, "grad_norm": 442.7223815917969, "learning_rate": 4.537451920294411e-06, "loss": 24.7883, "step": 288270 }, { "epoch": 0.5823438390090377, "grad_norm": 566.8401489257812, "learning_rate": 4.537104352438779e-06, "loss": 17.7039, "step": 288280 }, { "epoch": 0.5823640396417216, "grad_norm": 266.1036071777344, "learning_rate": 4.5367567868392445e-06, "loss": 13.9091, "step": 288290 }, { "epoch": 0.5823842402744054, "grad_norm": 478.8221740722656, "learning_rate": 4.5364092234975e-06, "loss": 13.9773, "step": 288300 }, { "epoch": 0.5824044409070892, "grad_norm": 693.6759033203125, "learning_rate": 4.536061662415241e-06, "loss": 46.1397, "step": 288310 }, { "epoch": 0.582424641539773, "grad_norm": 367.02276611328125, "learning_rate": 4.535714103594162e-06, "loss": 14.7058, "step": 288320 }, { "epoch": 0.5824448421724568, "grad_norm": 294.8173828125, "learning_rate": 4.535366547035955e-06, "loss": 7.4283, "step": 288330 }, { "epoch": 0.5824650428051407, "grad_norm": 1459.068603515625, "learning_rate": 4.535018992742315e-06, "loss": 33.764, "step": 288340 }, { "epoch": 0.5824852434378245, "grad_norm": 231.80996704101562, "learning_rate": 4.534671440714939e-06, "loss": 22.8392, "step": 288350 }, { "epoch": 0.5825054440705083, "grad_norm": 917.2021484375, "learning_rate": 4.534323890955514e-06, "loss": 27.3177, "step": 288360 }, { "epoch": 0.5825256447031921, "grad_norm": 411.9390869140625, "learning_rate": 4.533976343465739e-06, "loss": 21.1837, "step": 288370 }, { "epoch": 0.582545845335876, "grad_norm": 304.41455078125, "learning_rate": 4.533628798247308e-06, "loss": 17.4472, "step": 288380 }, { "epoch": 0.5825660459685598, "grad_norm": 485.7433166503906, "learning_rate": 4.533281255301913e-06, "loss": 28.0418, "step": 288390 }, { "epoch": 0.5825862466012436, "grad_norm": 526.515625, "learning_rate": 4.532933714631248e-06, "loss": 14.768, "step": 288400 }, { "epoch": 0.5826064472339274, "grad_norm": 335.7763366699219, "learning_rate": 4.532586176237007e-06, "loss": 15.7195, "step": 288410 }, { "epoch": 0.5826266478666112, "grad_norm": 452.3872375488281, "learning_rate": 4.532238640120887e-06, "loss": 23.946, "step": 288420 }, { "epoch": 0.582646848499295, "grad_norm": 144.83799743652344, "learning_rate": 4.531891106284576e-06, "loss": 17.1629, "step": 288430 }, { "epoch": 0.5826670491319789, "grad_norm": 40.88713836669922, "learning_rate": 4.531543574729772e-06, "loss": 32.2347, "step": 288440 }, { "epoch": 0.5826872497646627, "grad_norm": 157.28497314453125, "learning_rate": 4.5311960454581685e-06, "loss": 22.038, "step": 288450 }, { "epoch": 0.5827074503973464, "grad_norm": 490.5456237792969, "learning_rate": 4.5308485184714585e-06, "loss": 16.3475, "step": 288460 }, { "epoch": 0.5827276510300302, "grad_norm": 391.0112609863281, "learning_rate": 4.530500993771335e-06, "loss": 15.6698, "step": 288470 }, { "epoch": 0.582747851662714, "grad_norm": 42.42919921875, "learning_rate": 4.530153471359495e-06, "loss": 14.5029, "step": 288480 }, { "epoch": 0.5827680522953979, "grad_norm": 241.38282775878906, "learning_rate": 4.529805951237628e-06, "loss": 14.4093, "step": 288490 }, { "epoch": 0.5827882529280817, "grad_norm": 162.1904754638672, "learning_rate": 4.529458433407429e-06, "loss": 15.7063, "step": 288500 }, { "epoch": 0.5828084535607655, "grad_norm": 615.3388061523438, "learning_rate": 4.529110917870594e-06, "loss": 27.1512, "step": 288510 }, { "epoch": 0.5828286541934493, "grad_norm": 329.695068359375, "learning_rate": 4.528763404628815e-06, "loss": 37.7019, "step": 288520 }, { "epoch": 0.5828488548261331, "grad_norm": 411.4642639160156, "learning_rate": 4.528415893683785e-06, "loss": 17.7521, "step": 288530 }, { "epoch": 0.582869055458817, "grad_norm": 225.48716735839844, "learning_rate": 4.5280683850372e-06, "loss": 8.8208, "step": 288540 }, { "epoch": 0.5828892560915008, "grad_norm": 522.101806640625, "learning_rate": 4.527720878690752e-06, "loss": 26.4603, "step": 288550 }, { "epoch": 0.5829094567241846, "grad_norm": 274.3569030761719, "learning_rate": 4.527373374646136e-06, "loss": 21.8242, "step": 288560 }, { "epoch": 0.5829296573568684, "grad_norm": 226.85150146484375, "learning_rate": 4.527025872905043e-06, "loss": 11.6747, "step": 288570 }, { "epoch": 0.5829498579895522, "grad_norm": 649.5722045898438, "learning_rate": 4.52667837346917e-06, "loss": 20.3602, "step": 288580 }, { "epoch": 0.582970058622236, "grad_norm": 627.9529418945312, "learning_rate": 4.526330876340209e-06, "loss": 20.5397, "step": 288590 }, { "epoch": 0.5829902592549199, "grad_norm": 178.1346893310547, "learning_rate": 4.525983381519853e-06, "loss": 14.0624, "step": 288600 }, { "epoch": 0.5830104598876037, "grad_norm": 721.8845825195312, "learning_rate": 4.525635889009798e-06, "loss": 26.133, "step": 288610 }, { "epoch": 0.5830306605202875, "grad_norm": 629.3155517578125, "learning_rate": 4.5252883988117356e-06, "loss": 21.6051, "step": 288620 }, { "epoch": 0.5830508611529713, "grad_norm": 549.1826171875, "learning_rate": 4.524940910927359e-06, "loss": 26.5076, "step": 288630 }, { "epoch": 0.5830710617856552, "grad_norm": 245.66929626464844, "learning_rate": 4.524593425358364e-06, "loss": 11.9352, "step": 288640 }, { "epoch": 0.583091262418339, "grad_norm": 463.8624267578125, "learning_rate": 4.524245942106442e-06, "loss": 21.8307, "step": 288650 }, { "epoch": 0.5831114630510228, "grad_norm": 347.1028747558594, "learning_rate": 4.523898461173288e-06, "loss": 6.4501, "step": 288660 }, { "epoch": 0.5831316636837066, "grad_norm": 693.39306640625, "learning_rate": 4.5235509825605965e-06, "loss": 21.0688, "step": 288670 }, { "epoch": 0.5831518643163904, "grad_norm": 626.076904296875, "learning_rate": 4.523203506270058e-06, "loss": 17.7032, "step": 288680 }, { "epoch": 0.5831720649490743, "grad_norm": 303.54803466796875, "learning_rate": 4.5228560323033675e-06, "loss": 18.9164, "step": 288690 }, { "epoch": 0.5831922655817581, "grad_norm": 255.87606811523438, "learning_rate": 4.522508560662219e-06, "loss": 19.003, "step": 288700 }, { "epoch": 0.5832124662144418, "grad_norm": 217.08895874023438, "learning_rate": 4.522161091348308e-06, "loss": 18.2368, "step": 288710 }, { "epoch": 0.5832326668471256, "grad_norm": 529.8825073242188, "learning_rate": 4.521813624363323e-06, "loss": 25.8109, "step": 288720 }, { "epoch": 0.5832528674798094, "grad_norm": 520.8623657226562, "learning_rate": 4.521466159708962e-06, "loss": 21.7238, "step": 288730 }, { "epoch": 0.5832730681124932, "grad_norm": 29.747051239013672, "learning_rate": 4.521118697386917e-06, "loss": 11.2049, "step": 288740 }, { "epoch": 0.5832932687451771, "grad_norm": 346.5938720703125, "learning_rate": 4.52077123739888e-06, "loss": 22.0653, "step": 288750 }, { "epoch": 0.5833134693778609, "grad_norm": 179.41348266601562, "learning_rate": 4.520423779746547e-06, "loss": 19.6645, "step": 288760 }, { "epoch": 0.5833336700105447, "grad_norm": 683.7769165039062, "learning_rate": 4.520076324431612e-06, "loss": 26.248, "step": 288770 }, { "epoch": 0.5833538706432285, "grad_norm": 198.7277374267578, "learning_rate": 4.519728871455764e-06, "loss": 17.4634, "step": 288780 }, { "epoch": 0.5833740712759123, "grad_norm": 60.829261779785156, "learning_rate": 4.519381420820699e-06, "loss": 25.8267, "step": 288790 }, { "epoch": 0.5833942719085962, "grad_norm": 98.24775695800781, "learning_rate": 4.519033972528114e-06, "loss": 21.8371, "step": 288800 }, { "epoch": 0.58341447254128, "grad_norm": 700.4127197265625, "learning_rate": 4.518686526579695e-06, "loss": 14.5168, "step": 288810 }, { "epoch": 0.5834346731739638, "grad_norm": 361.0285339355469, "learning_rate": 4.518339082977142e-06, "loss": 23.4959, "step": 288820 }, { "epoch": 0.5834548738066476, "grad_norm": 611.7372436523438, "learning_rate": 4.517991641722146e-06, "loss": 12.2447, "step": 288830 }, { "epoch": 0.5834750744393314, "grad_norm": 477.4300842285156, "learning_rate": 4.517644202816399e-06, "loss": 20.9765, "step": 288840 }, { "epoch": 0.5834952750720153, "grad_norm": 469.504638671875, "learning_rate": 4.517296766261596e-06, "loss": 14.6185, "step": 288850 }, { "epoch": 0.5835154757046991, "grad_norm": 377.9756164550781, "learning_rate": 4.516949332059429e-06, "loss": 9.1162, "step": 288860 }, { "epoch": 0.5835356763373829, "grad_norm": 477.607421875, "learning_rate": 4.516601900211595e-06, "loss": 27.8192, "step": 288870 }, { "epoch": 0.5835558769700667, "grad_norm": 136.77139282226562, "learning_rate": 4.516254470719783e-06, "loss": 15.3713, "step": 288880 }, { "epoch": 0.5835760776027505, "grad_norm": 562.5999755859375, "learning_rate": 4.515907043585688e-06, "loss": 16.258, "step": 288890 }, { "epoch": 0.5835962782354344, "grad_norm": 3.4296677112579346, "learning_rate": 4.5155596188110055e-06, "loss": 19.372, "step": 288900 }, { "epoch": 0.5836164788681182, "grad_norm": 393.48583984375, "learning_rate": 4.515212196397424e-06, "loss": 13.7755, "step": 288910 }, { "epoch": 0.583636679500802, "grad_norm": 251.51904296875, "learning_rate": 4.5148647763466405e-06, "loss": 21.0673, "step": 288920 }, { "epoch": 0.5836568801334858, "grad_norm": 1052.9322509765625, "learning_rate": 4.514517358660347e-06, "loss": 16.6626, "step": 288930 }, { "epoch": 0.5836770807661696, "grad_norm": 484.47119140625, "learning_rate": 4.514169943340238e-06, "loss": 19.6845, "step": 288940 }, { "epoch": 0.5836972813988535, "grad_norm": 55.82490921020508, "learning_rate": 4.513822530388004e-06, "loss": 25.6817, "step": 288950 }, { "epoch": 0.5837174820315373, "grad_norm": 392.26385498046875, "learning_rate": 4.513475119805342e-06, "loss": 16.0411, "step": 288960 }, { "epoch": 0.583737682664221, "grad_norm": 221.4032745361328, "learning_rate": 4.513127711593941e-06, "loss": 9.7543, "step": 288970 }, { "epoch": 0.5837578832969048, "grad_norm": 289.62725830078125, "learning_rate": 4.512780305755498e-06, "loss": 17.1563, "step": 288980 }, { "epoch": 0.5837780839295886, "grad_norm": 84.89144897460938, "learning_rate": 4.512432902291703e-06, "loss": 30.9123, "step": 288990 }, { "epoch": 0.5837982845622725, "grad_norm": 87.54905700683594, "learning_rate": 4.512085501204254e-06, "loss": 14.9372, "step": 289000 }, { "epoch": 0.5838184851949563, "grad_norm": 118.93350982666016, "learning_rate": 4.511738102494839e-06, "loss": 13.1498, "step": 289010 }, { "epoch": 0.5838386858276401, "grad_norm": 316.2390441894531, "learning_rate": 4.5113907061651524e-06, "loss": 22.6011, "step": 289020 }, { "epoch": 0.5838588864603239, "grad_norm": 306.5341796875, "learning_rate": 4.511043312216891e-06, "loss": 26.2065, "step": 289030 }, { "epoch": 0.5838790870930077, "grad_norm": 165.6309356689453, "learning_rate": 4.510695920651742e-06, "loss": 27.4762, "step": 289040 }, { "epoch": 0.5838992877256916, "grad_norm": 160.45919799804688, "learning_rate": 4.510348531471403e-06, "loss": 16.4532, "step": 289050 }, { "epoch": 0.5839194883583754, "grad_norm": 64.6205062866211, "learning_rate": 4.510001144677568e-06, "loss": 9.6832, "step": 289060 }, { "epoch": 0.5839396889910592, "grad_norm": 452.0450439453125, "learning_rate": 4.509653760271926e-06, "loss": 12.1438, "step": 289070 }, { "epoch": 0.583959889623743, "grad_norm": 19.967851638793945, "learning_rate": 4.509306378256172e-06, "loss": 13.3504, "step": 289080 }, { "epoch": 0.5839800902564268, "grad_norm": 254.1773223876953, "learning_rate": 4.508958998632e-06, "loss": 23.1505, "step": 289090 }, { "epoch": 0.5840002908891107, "grad_norm": 863.7036743164062, "learning_rate": 4.508611621401102e-06, "loss": 23.4529, "step": 289100 }, { "epoch": 0.5840204915217945, "grad_norm": 303.6961669921875, "learning_rate": 4.50826424656517e-06, "loss": 15.552, "step": 289110 }, { "epoch": 0.5840406921544783, "grad_norm": 358.7690734863281, "learning_rate": 4.507916874125902e-06, "loss": 22.2946, "step": 289120 }, { "epoch": 0.5840608927871621, "grad_norm": 330.37896728515625, "learning_rate": 4.507569504084983e-06, "loss": 28.6702, "step": 289130 }, { "epoch": 0.5840810934198459, "grad_norm": 5.9064483642578125, "learning_rate": 4.5072221364441126e-06, "loss": 18.6341, "step": 289140 }, { "epoch": 0.5841012940525298, "grad_norm": 214.5717315673828, "learning_rate": 4.506874771204981e-06, "loss": 15.4846, "step": 289150 }, { "epoch": 0.5841214946852136, "grad_norm": 230.72467041015625, "learning_rate": 4.506527408369285e-06, "loss": 17.9754, "step": 289160 }, { "epoch": 0.5841416953178974, "grad_norm": 510.48687744140625, "learning_rate": 4.506180047938711e-06, "loss": 37.1512, "step": 289170 }, { "epoch": 0.5841618959505812, "grad_norm": 689.6371459960938, "learning_rate": 4.505832689914956e-06, "loss": 22.063, "step": 289180 }, { "epoch": 0.584182096583265, "grad_norm": 243.24209594726562, "learning_rate": 4.505485334299714e-06, "loss": 12.5306, "step": 289190 }, { "epoch": 0.5842022972159489, "grad_norm": 547.1610717773438, "learning_rate": 4.505137981094675e-06, "loss": 8.9335, "step": 289200 }, { "epoch": 0.5842224978486327, "grad_norm": 285.6500549316406, "learning_rate": 4.504790630301535e-06, "loss": 19.7256, "step": 289210 }, { "epoch": 0.5842426984813164, "grad_norm": 699.3668823242188, "learning_rate": 4.504443281921985e-06, "loss": 16.4456, "step": 289220 }, { "epoch": 0.5842628991140002, "grad_norm": 481.00897216796875, "learning_rate": 4.504095935957718e-06, "loss": 18.3304, "step": 289230 }, { "epoch": 0.584283099746684, "grad_norm": 563.0565185546875, "learning_rate": 4.503748592410427e-06, "loss": 18.334, "step": 289240 }, { "epoch": 0.5843033003793678, "grad_norm": 357.5160827636719, "learning_rate": 4.5034012512818065e-06, "loss": 22.5118, "step": 289250 }, { "epoch": 0.5843235010120517, "grad_norm": 362.5657958984375, "learning_rate": 4.503053912573545e-06, "loss": 10.575, "step": 289260 }, { "epoch": 0.5843437016447355, "grad_norm": 168.14981079101562, "learning_rate": 4.502706576287341e-06, "loss": 16.7396, "step": 289270 }, { "epoch": 0.5843639022774193, "grad_norm": 244.01336669921875, "learning_rate": 4.502359242424885e-06, "loss": 25.2524, "step": 289280 }, { "epoch": 0.5843841029101031, "grad_norm": 512.7816772460938, "learning_rate": 4.502011910987869e-06, "loss": 16.5723, "step": 289290 }, { "epoch": 0.5844043035427869, "grad_norm": 598.6182861328125, "learning_rate": 4.5016645819779865e-06, "loss": 27.588, "step": 289300 }, { "epoch": 0.5844245041754708, "grad_norm": 686.1741943359375, "learning_rate": 4.501317255396931e-06, "loss": 14.1701, "step": 289310 }, { "epoch": 0.5844447048081546, "grad_norm": 635.1903076171875, "learning_rate": 4.500969931246394e-06, "loss": 24.9359, "step": 289320 }, { "epoch": 0.5844649054408384, "grad_norm": 277.31646728515625, "learning_rate": 4.500622609528068e-06, "loss": 13.0281, "step": 289330 }, { "epoch": 0.5844851060735222, "grad_norm": 461.67437744140625, "learning_rate": 4.500275290243648e-06, "loss": 14.4205, "step": 289340 }, { "epoch": 0.584505306706206, "grad_norm": 1250.0423583984375, "learning_rate": 4.499927973394826e-06, "loss": 15.3964, "step": 289350 }, { "epoch": 0.5845255073388899, "grad_norm": 604.5083618164062, "learning_rate": 4.499580658983294e-06, "loss": 20.9392, "step": 289360 }, { "epoch": 0.5845457079715737, "grad_norm": 0.01670226640999317, "learning_rate": 4.4992333470107455e-06, "loss": 31.5604, "step": 289370 }, { "epoch": 0.5845659086042575, "grad_norm": 291.5645446777344, "learning_rate": 4.498886037478874e-06, "loss": 16.1465, "step": 289380 }, { "epoch": 0.5845861092369413, "grad_norm": 126.17826080322266, "learning_rate": 4.498538730389369e-06, "loss": 13.0477, "step": 289390 }, { "epoch": 0.5846063098696251, "grad_norm": 501.94659423828125, "learning_rate": 4.4981914257439254e-06, "loss": 24.4841, "step": 289400 }, { "epoch": 0.584626510502309, "grad_norm": 9.489340782165527, "learning_rate": 4.497844123544239e-06, "loss": 14.4267, "step": 289410 }, { "epoch": 0.5846467111349928, "grad_norm": 153.04071044921875, "learning_rate": 4.497496823791996e-06, "loss": 28.7787, "step": 289420 }, { "epoch": 0.5846669117676766, "grad_norm": 164.69967651367188, "learning_rate": 4.497149526488893e-06, "loss": 16.5493, "step": 289430 }, { "epoch": 0.5846871124003604, "grad_norm": 139.81759643554688, "learning_rate": 4.496802231636624e-06, "loss": 13.9025, "step": 289440 }, { "epoch": 0.5847073130330442, "grad_norm": 48.29279327392578, "learning_rate": 4.496454939236879e-06, "loss": 12.715, "step": 289450 }, { "epoch": 0.5847275136657281, "grad_norm": 1061.7513427734375, "learning_rate": 4.496107649291351e-06, "loss": 24.7893, "step": 289460 }, { "epoch": 0.5847477142984119, "grad_norm": 402.9743347167969, "learning_rate": 4.495760361801732e-06, "loss": 25.3875, "step": 289470 }, { "epoch": 0.5847679149310956, "grad_norm": 202.3970184326172, "learning_rate": 4.49541307676972e-06, "loss": 19.6393, "step": 289480 }, { "epoch": 0.5847881155637794, "grad_norm": 492.6229553222656, "learning_rate": 4.495065794196999e-06, "loss": 21.0966, "step": 289490 }, { "epoch": 0.5848083161964632, "grad_norm": 111.33905029296875, "learning_rate": 4.494718514085269e-06, "loss": 18.6436, "step": 289500 }, { "epoch": 0.584828516829147, "grad_norm": 302.810546875, "learning_rate": 4.494371236436219e-06, "loss": 18.3843, "step": 289510 }, { "epoch": 0.5848487174618309, "grad_norm": 494.04827880859375, "learning_rate": 4.494023961251542e-06, "loss": 16.7611, "step": 289520 }, { "epoch": 0.5848689180945147, "grad_norm": 309.8783874511719, "learning_rate": 4.49367668853293e-06, "loss": 20.4386, "step": 289530 }, { "epoch": 0.5848891187271985, "grad_norm": 378.0588073730469, "learning_rate": 4.4933294182820785e-06, "loss": 20.1889, "step": 289540 }, { "epoch": 0.5849093193598823, "grad_norm": 633.7971801757812, "learning_rate": 4.492982150500677e-06, "loss": 17.3576, "step": 289550 }, { "epoch": 0.5849295199925661, "grad_norm": 259.9631652832031, "learning_rate": 4.492634885190417e-06, "loss": 21.1418, "step": 289560 }, { "epoch": 0.58494972062525, "grad_norm": 342.33038330078125, "learning_rate": 4.492287622352996e-06, "loss": 15.3021, "step": 289570 }, { "epoch": 0.5849699212579338, "grad_norm": 365.4823913574219, "learning_rate": 4.491940361990101e-06, "loss": 9.4205, "step": 289580 }, { "epoch": 0.5849901218906176, "grad_norm": 412.4847412109375, "learning_rate": 4.4915931041034285e-06, "loss": 16.2116, "step": 289590 }, { "epoch": 0.5850103225233014, "grad_norm": 399.02093505859375, "learning_rate": 4.491245848694669e-06, "loss": 24.1776, "step": 289600 }, { "epoch": 0.5850305231559852, "grad_norm": 664.1265869140625, "learning_rate": 4.490898595765517e-06, "loss": 27.871, "step": 289610 }, { "epoch": 0.5850507237886691, "grad_norm": 686.6106567382812, "learning_rate": 4.490551345317662e-06, "loss": 13.0017, "step": 289620 }, { "epoch": 0.5850709244213529, "grad_norm": 591.8042602539062, "learning_rate": 4.4902040973527974e-06, "loss": 14.9969, "step": 289630 }, { "epoch": 0.5850911250540367, "grad_norm": 510.093994140625, "learning_rate": 4.489856851872619e-06, "loss": 32.5532, "step": 289640 }, { "epoch": 0.5851113256867205, "grad_norm": 156.0966796875, "learning_rate": 4.489509608878813e-06, "loss": 12.641, "step": 289650 }, { "epoch": 0.5851315263194043, "grad_norm": 688.0595092773438, "learning_rate": 4.4891623683730765e-06, "loss": 21.0297, "step": 289660 }, { "epoch": 0.5851517269520882, "grad_norm": 425.06640625, "learning_rate": 4.488815130357103e-06, "loss": 34.7271, "step": 289670 }, { "epoch": 0.585171927584772, "grad_norm": 227.1132049560547, "learning_rate": 4.48846789483258e-06, "loss": 19.1965, "step": 289680 }, { "epoch": 0.5851921282174558, "grad_norm": 185.37522888183594, "learning_rate": 4.488120661801202e-06, "loss": 13.6779, "step": 289690 }, { "epoch": 0.5852123288501396, "grad_norm": 764.2107543945312, "learning_rate": 4.487773431264664e-06, "loss": 19.2322, "step": 289700 }, { "epoch": 0.5852325294828234, "grad_norm": 267.73468017578125, "learning_rate": 4.487426203224655e-06, "loss": 14.0345, "step": 289710 }, { "epoch": 0.5852527301155073, "grad_norm": 557.2090454101562, "learning_rate": 4.487078977682867e-06, "loss": 16.3052, "step": 289720 }, { "epoch": 0.585272930748191, "grad_norm": 371.2828674316406, "learning_rate": 4.486731754640997e-06, "loss": 17.3501, "step": 289730 }, { "epoch": 0.5852931313808748, "grad_norm": 417.99871826171875, "learning_rate": 4.486384534100732e-06, "loss": 27.2229, "step": 289740 }, { "epoch": 0.5853133320135586, "grad_norm": 197.7482147216797, "learning_rate": 4.4860373160637665e-06, "loss": 18.6305, "step": 289750 }, { "epoch": 0.5853335326462424, "grad_norm": 634.205078125, "learning_rate": 4.485690100531793e-06, "loss": 38.9959, "step": 289760 }, { "epoch": 0.5853537332789263, "grad_norm": 21.159589767456055, "learning_rate": 4.485342887506505e-06, "loss": 7.3589, "step": 289770 }, { "epoch": 0.5853739339116101, "grad_norm": 416.6664123535156, "learning_rate": 4.484995676989592e-06, "loss": 26.0226, "step": 289780 }, { "epoch": 0.5853941345442939, "grad_norm": 187.21580505371094, "learning_rate": 4.4846484689827465e-06, "loss": 22.066, "step": 289790 }, { "epoch": 0.5854143351769777, "grad_norm": 403.3133544921875, "learning_rate": 4.484301263487664e-06, "loss": 39.342, "step": 289800 }, { "epoch": 0.5854345358096615, "grad_norm": 192.5809783935547, "learning_rate": 4.483954060506033e-06, "loss": 24.9163, "step": 289810 }, { "epoch": 0.5854547364423454, "grad_norm": 529.3194580078125, "learning_rate": 4.4836068600395484e-06, "loss": 11.6182, "step": 289820 }, { "epoch": 0.5854749370750292, "grad_norm": 221.81625366210938, "learning_rate": 4.483259662089902e-06, "loss": 19.4294, "step": 289830 }, { "epoch": 0.585495137707713, "grad_norm": 710.7893676757812, "learning_rate": 4.482912466658784e-06, "loss": 17.4546, "step": 289840 }, { "epoch": 0.5855153383403968, "grad_norm": 870.0003662109375, "learning_rate": 4.482565273747888e-06, "loss": 15.259, "step": 289850 }, { "epoch": 0.5855355389730806, "grad_norm": 112.72933959960938, "learning_rate": 4.482218083358907e-06, "loss": 31.1874, "step": 289860 }, { "epoch": 0.5855557396057645, "grad_norm": 295.20269775390625, "learning_rate": 4.481870895493531e-06, "loss": 17.5055, "step": 289870 }, { "epoch": 0.5855759402384483, "grad_norm": 875.2280883789062, "learning_rate": 4.481523710153454e-06, "loss": 27.2396, "step": 289880 }, { "epoch": 0.5855961408711321, "grad_norm": 524.6953125, "learning_rate": 4.481176527340368e-06, "loss": 23.6339, "step": 289890 }, { "epoch": 0.5856163415038159, "grad_norm": 267.17681884765625, "learning_rate": 4.4808293470559645e-06, "loss": 12.8049, "step": 289900 }, { "epoch": 0.5856365421364997, "grad_norm": 495.7286682128906, "learning_rate": 4.480482169301935e-06, "loss": 19.0436, "step": 289910 }, { "epoch": 0.5856567427691836, "grad_norm": 181.76971435546875, "learning_rate": 4.480134994079973e-06, "loss": 12.6426, "step": 289920 }, { "epoch": 0.5856769434018674, "grad_norm": 967.9786987304688, "learning_rate": 4.479787821391771e-06, "loss": 21.7154, "step": 289930 }, { "epoch": 0.5856971440345512, "grad_norm": 180.23681640625, "learning_rate": 4.4794406512390175e-06, "loss": 14.4039, "step": 289940 }, { "epoch": 0.585717344667235, "grad_norm": 91.26536560058594, "learning_rate": 4.479093483623409e-06, "loss": 16.9906, "step": 289950 }, { "epoch": 0.5857375452999188, "grad_norm": 179.8864288330078, "learning_rate": 4.478746318546636e-06, "loss": 14.3518, "step": 289960 }, { "epoch": 0.5857577459326027, "grad_norm": 1979.79150390625, "learning_rate": 4.478399156010389e-06, "loss": 26.318, "step": 289970 }, { "epoch": 0.5857779465652865, "grad_norm": 107.35133361816406, "learning_rate": 4.478051996016362e-06, "loss": 8.2717, "step": 289980 }, { "epoch": 0.5857981471979702, "grad_norm": 308.7791748046875, "learning_rate": 4.477704838566246e-06, "loss": 14.4244, "step": 289990 }, { "epoch": 0.585818347830654, "grad_norm": 445.58343505859375, "learning_rate": 4.477357683661734e-06, "loss": 26.5464, "step": 290000 }, { "epoch": 0.5858385484633378, "grad_norm": 211.74684143066406, "learning_rate": 4.477010531304515e-06, "loss": 21.5916, "step": 290010 }, { "epoch": 0.5858587490960216, "grad_norm": 361.0027770996094, "learning_rate": 4.476663381496287e-06, "loss": 20.5326, "step": 290020 }, { "epoch": 0.5858789497287055, "grad_norm": 46.908023834228516, "learning_rate": 4.476316234238735e-06, "loss": 13.4577, "step": 290030 }, { "epoch": 0.5858991503613893, "grad_norm": 577.0363159179688, "learning_rate": 4.4759690895335545e-06, "loss": 19.1926, "step": 290040 }, { "epoch": 0.5859193509940731, "grad_norm": 274.8571472167969, "learning_rate": 4.475621947382438e-06, "loss": 15.9146, "step": 290050 }, { "epoch": 0.5859395516267569, "grad_norm": 319.69970703125, "learning_rate": 4.475274807787077e-06, "loss": 18.9863, "step": 290060 }, { "epoch": 0.5859597522594407, "grad_norm": 284.3815002441406, "learning_rate": 4.474927670749162e-06, "loss": 23.6112, "step": 290070 }, { "epoch": 0.5859799528921246, "grad_norm": 355.4840087890625, "learning_rate": 4.474580536270385e-06, "loss": 24.4852, "step": 290080 }, { "epoch": 0.5860001535248084, "grad_norm": 139.454833984375, "learning_rate": 4.474233404352442e-06, "loss": 15.0361, "step": 290090 }, { "epoch": 0.5860203541574922, "grad_norm": 235.73052978515625, "learning_rate": 4.473886274997018e-06, "loss": 20.6806, "step": 290100 }, { "epoch": 0.586040554790176, "grad_norm": 212.15199279785156, "learning_rate": 4.47353914820581e-06, "loss": 15.8834, "step": 290110 }, { "epoch": 0.5860607554228598, "grad_norm": 295.6542053222656, "learning_rate": 4.473192023980509e-06, "loss": 32.0004, "step": 290120 }, { "epoch": 0.5860809560555437, "grad_norm": 457.3767395019531, "learning_rate": 4.472844902322805e-06, "loss": 15.9072, "step": 290130 }, { "epoch": 0.5861011566882275, "grad_norm": 84.76927947998047, "learning_rate": 4.472497783234392e-06, "loss": 11.4173, "step": 290140 }, { "epoch": 0.5861213573209113, "grad_norm": 74.38208770751953, "learning_rate": 4.472150666716961e-06, "loss": 51.078, "step": 290150 }, { "epoch": 0.5861415579535951, "grad_norm": 386.08428955078125, "learning_rate": 4.471803552772203e-06, "loss": 18.1884, "step": 290160 }, { "epoch": 0.586161758586279, "grad_norm": 429.62066650390625, "learning_rate": 4.471456441401809e-06, "loss": 26.5948, "step": 290170 }, { "epoch": 0.5861819592189628, "grad_norm": 357.4833679199219, "learning_rate": 4.471109332607475e-06, "loss": 22.8235, "step": 290180 }, { "epoch": 0.5862021598516466, "grad_norm": 412.8485412597656, "learning_rate": 4.4707622263908875e-06, "loss": 10.7813, "step": 290190 }, { "epoch": 0.5862223604843304, "grad_norm": 303.6448974609375, "learning_rate": 4.470415122753742e-06, "loss": 26.0047, "step": 290200 }, { "epoch": 0.5862425611170142, "grad_norm": 502.8114013671875, "learning_rate": 4.470068021697728e-06, "loss": 17.8887, "step": 290210 }, { "epoch": 0.586262761749698, "grad_norm": 531.0615844726562, "learning_rate": 4.4697209232245395e-06, "loss": 23.2769, "step": 290220 }, { "epoch": 0.5862829623823819, "grad_norm": 398.9246826171875, "learning_rate": 4.469373827335866e-06, "loss": 21.4093, "step": 290230 }, { "epoch": 0.5863031630150657, "grad_norm": 65.7341537475586, "learning_rate": 4.4690267340334e-06, "loss": 22.8751, "step": 290240 }, { "epoch": 0.5863233636477494, "grad_norm": 404.8388671875, "learning_rate": 4.468679643318836e-06, "loss": 25.5584, "step": 290250 }, { "epoch": 0.5863435642804332, "grad_norm": 1449.1175537109375, "learning_rate": 4.468332555193859e-06, "loss": 28.4466, "step": 290260 }, { "epoch": 0.586363764913117, "grad_norm": 658.9000854492188, "learning_rate": 4.467985469660166e-06, "loss": 26.6578, "step": 290270 }, { "epoch": 0.5863839655458009, "grad_norm": 603.8819580078125, "learning_rate": 4.467638386719448e-06, "loss": 23.7443, "step": 290280 }, { "epoch": 0.5864041661784847, "grad_norm": 437.9130859375, "learning_rate": 4.467291306373396e-06, "loss": 16.2615, "step": 290290 }, { "epoch": 0.5864243668111685, "grad_norm": 521.2614135742188, "learning_rate": 4.466944228623701e-06, "loss": 33.7539, "step": 290300 }, { "epoch": 0.5864445674438523, "grad_norm": 266.4351806640625, "learning_rate": 4.466597153472056e-06, "loss": 26.3424, "step": 290310 }, { "epoch": 0.5864647680765361, "grad_norm": 18.054241180419922, "learning_rate": 4.4662500809201515e-06, "loss": 17.4335, "step": 290320 }, { "epoch": 0.58648496870922, "grad_norm": 486.8254699707031, "learning_rate": 4.465903010969677e-06, "loss": 29.748, "step": 290330 }, { "epoch": 0.5865051693419038, "grad_norm": 391.8296813964844, "learning_rate": 4.46555594362233e-06, "loss": 22.5543, "step": 290340 }, { "epoch": 0.5865253699745876, "grad_norm": 401.99688720703125, "learning_rate": 4.4652088788797965e-06, "loss": 26.4605, "step": 290350 }, { "epoch": 0.5865455706072714, "grad_norm": 324.543212890625, "learning_rate": 4.46486181674377e-06, "loss": 14.6501, "step": 290360 }, { "epoch": 0.5865657712399552, "grad_norm": 355.5074157714844, "learning_rate": 4.464514757215943e-06, "loss": 12.5329, "step": 290370 }, { "epoch": 0.5865859718726391, "grad_norm": 328.8937072753906, "learning_rate": 4.464167700298006e-06, "loss": 17.8105, "step": 290380 }, { "epoch": 0.5866061725053229, "grad_norm": 86.53602600097656, "learning_rate": 4.463820645991651e-06, "loss": 17.9668, "step": 290390 }, { "epoch": 0.5866263731380067, "grad_norm": 529.2399291992188, "learning_rate": 4.463473594298567e-06, "loss": 29.0053, "step": 290400 }, { "epoch": 0.5866465737706905, "grad_norm": 433.610107421875, "learning_rate": 4.463126545220451e-06, "loss": 14.7375, "step": 290410 }, { "epoch": 0.5866667744033743, "grad_norm": 68.43505096435547, "learning_rate": 4.462779498758988e-06, "loss": 28.9777, "step": 290420 }, { "epoch": 0.5866869750360582, "grad_norm": 524.79833984375, "learning_rate": 4.462432454915873e-06, "loss": 7.0757, "step": 290430 }, { "epoch": 0.586707175668742, "grad_norm": 273.8827209472656, "learning_rate": 4.4620854136928e-06, "loss": 13.9228, "step": 290440 }, { "epoch": 0.5867273763014258, "grad_norm": 232.83982849121094, "learning_rate": 4.461738375091454e-06, "loss": 26.8315, "step": 290450 }, { "epoch": 0.5867475769341096, "grad_norm": 306.6068420410156, "learning_rate": 4.461391339113531e-06, "loss": 12.5558, "step": 290460 }, { "epoch": 0.5867677775667934, "grad_norm": 587.6408081054688, "learning_rate": 4.461044305760722e-06, "loss": 29.6375, "step": 290470 }, { "epoch": 0.5867879781994773, "grad_norm": 544.8992309570312, "learning_rate": 4.460697275034717e-06, "loss": 24.7492, "step": 290480 }, { "epoch": 0.5868081788321611, "grad_norm": 377.0224609375, "learning_rate": 4.460350246937207e-06, "loss": 22.4234, "step": 290490 }, { "epoch": 0.5868283794648448, "grad_norm": 430.548583984375, "learning_rate": 4.460003221469886e-06, "loss": 18.8128, "step": 290500 }, { "epoch": 0.5868485800975286, "grad_norm": 229.59872436523438, "learning_rate": 4.459656198634444e-06, "loss": 17.5641, "step": 290510 }, { "epoch": 0.5868687807302124, "grad_norm": 103.71757507324219, "learning_rate": 4.459309178432571e-06, "loss": 20.7203, "step": 290520 }, { "epoch": 0.5868889813628962, "grad_norm": 616.0745239257812, "learning_rate": 4.458962160865961e-06, "loss": 54.3132, "step": 290530 }, { "epoch": 0.5869091819955801, "grad_norm": 600.0247192382812, "learning_rate": 4.458615145936303e-06, "loss": 17.3017, "step": 290540 }, { "epoch": 0.5869293826282639, "grad_norm": 744.3265991210938, "learning_rate": 4.458268133645289e-06, "loss": 11.4757, "step": 290550 }, { "epoch": 0.5869495832609477, "grad_norm": 552.5853271484375, "learning_rate": 4.457921123994609e-06, "loss": 16.1665, "step": 290560 }, { "epoch": 0.5869697838936315, "grad_norm": 561.9412231445312, "learning_rate": 4.457574116985958e-06, "loss": 22.5984, "step": 290570 }, { "epoch": 0.5869899845263153, "grad_norm": 507.02099609375, "learning_rate": 4.457227112621024e-06, "loss": 16.5404, "step": 290580 }, { "epoch": 0.5870101851589992, "grad_norm": 151.58840942382812, "learning_rate": 4.456880110901499e-06, "loss": 15.3226, "step": 290590 }, { "epoch": 0.587030385791683, "grad_norm": 186.79566955566406, "learning_rate": 4.456533111829076e-06, "loss": 25.297, "step": 290600 }, { "epoch": 0.5870505864243668, "grad_norm": 442.0946350097656, "learning_rate": 4.456186115405443e-06, "loss": 15.7763, "step": 290610 }, { "epoch": 0.5870707870570506, "grad_norm": 608.88330078125, "learning_rate": 4.455839121632292e-06, "loss": 25.6646, "step": 290620 }, { "epoch": 0.5870909876897344, "grad_norm": 107.75144958496094, "learning_rate": 4.455492130511318e-06, "loss": 14.3466, "step": 290630 }, { "epoch": 0.5871111883224183, "grad_norm": 391.6009826660156, "learning_rate": 4.455145142044207e-06, "loss": 19.8789, "step": 290640 }, { "epoch": 0.5871313889551021, "grad_norm": 410.3229675292969, "learning_rate": 4.4547981562326535e-06, "loss": 14.6501, "step": 290650 }, { "epoch": 0.5871515895877859, "grad_norm": 876.847412109375, "learning_rate": 4.454451173078347e-06, "loss": 18.3208, "step": 290660 }, { "epoch": 0.5871717902204697, "grad_norm": 202.0698699951172, "learning_rate": 4.454104192582981e-06, "loss": 20.384, "step": 290670 }, { "epoch": 0.5871919908531535, "grad_norm": 489.487060546875, "learning_rate": 4.453757214748243e-06, "loss": 33.0625, "step": 290680 }, { "epoch": 0.5872121914858374, "grad_norm": 452.5086975097656, "learning_rate": 4.453410239575826e-06, "loss": 24.4016, "step": 290690 }, { "epoch": 0.5872323921185212, "grad_norm": 161.07870483398438, "learning_rate": 4.453063267067424e-06, "loss": 13.2982, "step": 290700 }, { "epoch": 0.587252592751205, "grad_norm": 187.7038116455078, "learning_rate": 4.452716297224722e-06, "loss": 18.8365, "step": 290710 }, { "epoch": 0.5872727933838888, "grad_norm": 412.16259765625, "learning_rate": 4.452369330049415e-06, "loss": 20.687, "step": 290720 }, { "epoch": 0.5872929940165726, "grad_norm": 195.6519317626953, "learning_rate": 4.452022365543195e-06, "loss": 12.3374, "step": 290730 }, { "epoch": 0.5873131946492565, "grad_norm": 726.9053955078125, "learning_rate": 4.451675403707751e-06, "loss": 20.9327, "step": 290740 }, { "epoch": 0.5873333952819403, "grad_norm": 136.5218505859375, "learning_rate": 4.451328444544774e-06, "loss": 28.0344, "step": 290750 }, { "epoch": 0.587353595914624, "grad_norm": 264.0720520019531, "learning_rate": 4.450981488055957e-06, "loss": 10.633, "step": 290760 }, { "epoch": 0.5873737965473078, "grad_norm": 170.5759735107422, "learning_rate": 4.450634534242989e-06, "loss": 15.1688, "step": 290770 }, { "epoch": 0.5873939971799916, "grad_norm": 142.72483825683594, "learning_rate": 4.4502875831075596e-06, "loss": 15.1651, "step": 290780 }, { "epoch": 0.5874141978126755, "grad_norm": 121.17604064941406, "learning_rate": 4.449940634651365e-06, "loss": 20.2002, "step": 290790 }, { "epoch": 0.5874343984453593, "grad_norm": 239.14881896972656, "learning_rate": 4.44959368887609e-06, "loss": 21.0542, "step": 290800 }, { "epoch": 0.5874545990780431, "grad_norm": 786.7036743164062, "learning_rate": 4.44924674578343e-06, "loss": 18.8186, "step": 290810 }, { "epoch": 0.5874747997107269, "grad_norm": 751.5238037109375, "learning_rate": 4.4488998053750746e-06, "loss": 29.3956, "step": 290820 }, { "epoch": 0.5874950003434107, "grad_norm": 50.34823226928711, "learning_rate": 4.448552867652715e-06, "loss": 15.6865, "step": 290830 }, { "epoch": 0.5875152009760946, "grad_norm": 153.85646057128906, "learning_rate": 4.448205932618042e-06, "loss": 11.5163, "step": 290840 }, { "epoch": 0.5875354016087784, "grad_norm": 656.3759155273438, "learning_rate": 4.447859000272744e-06, "loss": 18.2736, "step": 290850 }, { "epoch": 0.5875556022414622, "grad_norm": 509.01837158203125, "learning_rate": 4.447512070618519e-06, "loss": 25.3698, "step": 290860 }, { "epoch": 0.587575802874146, "grad_norm": 397.4849548339844, "learning_rate": 4.447165143657049e-06, "loss": 15.6323, "step": 290870 }, { "epoch": 0.5875960035068298, "grad_norm": 450.42822265625, "learning_rate": 4.44681821939003e-06, "loss": 21.6473, "step": 290880 }, { "epoch": 0.5876162041395137, "grad_norm": 460.9064025878906, "learning_rate": 4.446471297819154e-06, "loss": 18.155, "step": 290890 }, { "epoch": 0.5876364047721975, "grad_norm": 444.8457946777344, "learning_rate": 4.446124378946108e-06, "loss": 15.3499, "step": 290900 }, { "epoch": 0.5876566054048813, "grad_norm": 137.37344360351562, "learning_rate": 4.4457774627725835e-06, "loss": 9.8823, "step": 290910 }, { "epoch": 0.5876768060375651, "grad_norm": 1177.26171875, "learning_rate": 4.4454305493002744e-06, "loss": 17.1093, "step": 290920 }, { "epoch": 0.5876970066702489, "grad_norm": 467.1009216308594, "learning_rate": 4.44508363853087e-06, "loss": 17.3269, "step": 290930 }, { "epoch": 0.5877172073029328, "grad_norm": 14.7145414352417, "learning_rate": 4.444736730466057e-06, "loss": 14.337, "step": 290940 }, { "epoch": 0.5877374079356166, "grad_norm": 623.8619995117188, "learning_rate": 4.444389825107534e-06, "loss": 28.5236, "step": 290950 }, { "epoch": 0.5877576085683004, "grad_norm": 848.1859130859375, "learning_rate": 4.444042922456985e-06, "loss": 22.265, "step": 290960 }, { "epoch": 0.5877778092009842, "grad_norm": 206.10719299316406, "learning_rate": 4.4436960225161045e-06, "loss": 28.2873, "step": 290970 }, { "epoch": 0.587798009833668, "grad_norm": 235.267578125, "learning_rate": 4.443349125286581e-06, "loss": 21.8742, "step": 290980 }, { "epoch": 0.5878182104663519, "grad_norm": 253.72393798828125, "learning_rate": 4.443002230770108e-06, "loss": 14.245, "step": 290990 }, { "epoch": 0.5878384110990357, "grad_norm": 59.76152420043945, "learning_rate": 4.442655338968373e-06, "loss": 22.391, "step": 291000 }, { "epoch": 0.5878586117317194, "grad_norm": 252.1258087158203, "learning_rate": 4.4423084498830685e-06, "loss": 31.2413, "step": 291010 }, { "epoch": 0.5878788123644032, "grad_norm": 429.10491943359375, "learning_rate": 4.4419615635158875e-06, "loss": 18.1277, "step": 291020 }, { "epoch": 0.587899012997087, "grad_norm": 486.5559997558594, "learning_rate": 4.441614679868514e-06, "loss": 21.1466, "step": 291030 }, { "epoch": 0.5879192136297708, "grad_norm": 324.23699951171875, "learning_rate": 4.441267798942646e-06, "loss": 13.5959, "step": 291040 }, { "epoch": 0.5879394142624547, "grad_norm": 464.66375732421875, "learning_rate": 4.44092092073997e-06, "loss": 11.8075, "step": 291050 }, { "epoch": 0.5879596148951385, "grad_norm": 18.357135772705078, "learning_rate": 4.440574045262178e-06, "loss": 14.8909, "step": 291060 }, { "epoch": 0.5879798155278223, "grad_norm": 305.482666015625, "learning_rate": 4.440227172510959e-06, "loss": 19.1991, "step": 291070 }, { "epoch": 0.5880000161605061, "grad_norm": 248.35919189453125, "learning_rate": 4.439880302488007e-06, "loss": 12.7908, "step": 291080 }, { "epoch": 0.5880202167931899, "grad_norm": 480.1448059082031, "learning_rate": 4.439533435195009e-06, "loss": 10.5695, "step": 291090 }, { "epoch": 0.5880404174258738, "grad_norm": 265.6449279785156, "learning_rate": 4.439186570633656e-06, "loss": 9.9222, "step": 291100 }, { "epoch": 0.5880606180585576, "grad_norm": 328.6371154785156, "learning_rate": 4.43883970880564e-06, "loss": 19.673, "step": 291110 }, { "epoch": 0.5880808186912414, "grad_norm": 1000.6197509765625, "learning_rate": 4.4384928497126534e-06, "loss": 31.4514, "step": 291120 }, { "epoch": 0.5881010193239252, "grad_norm": 771.0360717773438, "learning_rate": 4.438145993356383e-06, "loss": 19.6446, "step": 291130 }, { "epoch": 0.588121219956609, "grad_norm": 199.8487548828125, "learning_rate": 4.437799139738521e-06, "loss": 15.8613, "step": 291140 }, { "epoch": 0.5881414205892929, "grad_norm": 322.72802734375, "learning_rate": 4.437452288860759e-06, "loss": 14.5427, "step": 291150 }, { "epoch": 0.5881616212219767, "grad_norm": 19.49073600769043, "learning_rate": 4.437105440724785e-06, "loss": 10.3116, "step": 291160 }, { "epoch": 0.5881818218546605, "grad_norm": 501.9320068359375, "learning_rate": 4.43675859533229e-06, "loss": 12.4192, "step": 291170 }, { "epoch": 0.5882020224873443, "grad_norm": 114.68348693847656, "learning_rate": 4.4364117526849674e-06, "loss": 14.7263, "step": 291180 }, { "epoch": 0.5882222231200281, "grad_norm": 254.6109161376953, "learning_rate": 4.436064912784504e-06, "loss": 26.2108, "step": 291190 }, { "epoch": 0.588242423752712, "grad_norm": 255.48748779296875, "learning_rate": 4.4357180756325915e-06, "loss": 30.6562, "step": 291200 }, { "epoch": 0.5882626243853958, "grad_norm": 339.65087890625, "learning_rate": 4.435371241230923e-06, "loss": 19.6818, "step": 291210 }, { "epoch": 0.5882828250180796, "grad_norm": 574.1737670898438, "learning_rate": 4.435024409581185e-06, "loss": 10.0981, "step": 291220 }, { "epoch": 0.5883030256507634, "grad_norm": 927.2265625, "learning_rate": 4.434677580685069e-06, "loss": 27.3333, "step": 291230 }, { "epoch": 0.5883232262834472, "grad_norm": 205.8227081298828, "learning_rate": 4.434330754544267e-06, "loss": 9.6747, "step": 291240 }, { "epoch": 0.5883434269161311, "grad_norm": 529.600341796875, "learning_rate": 4.4339839311604675e-06, "loss": 33.645, "step": 291250 }, { "epoch": 0.5883636275488149, "grad_norm": 451.12298583984375, "learning_rate": 4.433637110535361e-06, "loss": 18.1124, "step": 291260 }, { "epoch": 0.5883838281814986, "grad_norm": 38.88941955566406, "learning_rate": 4.4332902926706395e-06, "loss": 11.802, "step": 291270 }, { "epoch": 0.5884040288141824, "grad_norm": 319.8966979980469, "learning_rate": 4.432943477567993e-06, "loss": 18.3529, "step": 291280 }, { "epoch": 0.5884242294468662, "grad_norm": 122.94017028808594, "learning_rate": 4.43259666522911e-06, "loss": 27.3233, "step": 291290 }, { "epoch": 0.58844443007955, "grad_norm": 464.04132080078125, "learning_rate": 4.432249855655681e-06, "loss": 17.2269, "step": 291300 }, { "epoch": 0.5884646307122339, "grad_norm": 419.5864562988281, "learning_rate": 4.431903048849402e-06, "loss": 22.1468, "step": 291310 }, { "epoch": 0.5884848313449177, "grad_norm": 610.0906982421875, "learning_rate": 4.431556244811954e-06, "loss": 24.5321, "step": 291320 }, { "epoch": 0.5885050319776015, "grad_norm": 608.2277221679688, "learning_rate": 4.431209443545033e-06, "loss": 22.2584, "step": 291330 }, { "epoch": 0.5885252326102853, "grad_norm": 363.5028076171875, "learning_rate": 4.43086264505033e-06, "loss": 18.7677, "step": 291340 }, { "epoch": 0.5885454332429692, "grad_norm": 67.64398956298828, "learning_rate": 4.430515849329532e-06, "loss": 14.4721, "step": 291350 }, { "epoch": 0.588565633875653, "grad_norm": 184.47952270507812, "learning_rate": 4.43016905638433e-06, "loss": 18.4786, "step": 291360 }, { "epoch": 0.5885858345083368, "grad_norm": 323.0634460449219, "learning_rate": 4.429822266216417e-06, "loss": 12.0939, "step": 291370 }, { "epoch": 0.5886060351410206, "grad_norm": 24.33133316040039, "learning_rate": 4.42947547882748e-06, "loss": 23.6053, "step": 291380 }, { "epoch": 0.5886262357737044, "grad_norm": 321.9494323730469, "learning_rate": 4.4291286942192085e-06, "loss": 23.0632, "step": 291390 }, { "epoch": 0.5886464364063883, "grad_norm": 142.2558135986328, "learning_rate": 4.428781912393299e-06, "loss": 17.5378, "step": 291400 }, { "epoch": 0.5886666370390721, "grad_norm": 976.93359375, "learning_rate": 4.4284351333514315e-06, "loss": 20.9709, "step": 291410 }, { "epoch": 0.5886868376717559, "grad_norm": 308.3500061035156, "learning_rate": 4.428088357095306e-06, "loss": 21.0857, "step": 291420 }, { "epoch": 0.5887070383044397, "grad_norm": 209.01194763183594, "learning_rate": 4.427741583626607e-06, "loss": 19.3746, "step": 291430 }, { "epoch": 0.5887272389371235, "grad_norm": 293.47027587890625, "learning_rate": 4.4273948129470264e-06, "loss": 26.9354, "step": 291440 }, { "epoch": 0.5887474395698074, "grad_norm": 370.9302673339844, "learning_rate": 4.427048045058254e-06, "loss": 41.0488, "step": 291450 }, { "epoch": 0.5887676402024912, "grad_norm": 210.16143798828125, "learning_rate": 4.426701279961978e-06, "loss": 27.4176, "step": 291460 }, { "epoch": 0.588787840835175, "grad_norm": 277.9696044921875, "learning_rate": 4.426354517659894e-06, "loss": 26.8509, "step": 291470 }, { "epoch": 0.5888080414678588, "grad_norm": 1105.115234375, "learning_rate": 4.426007758153686e-06, "loss": 17.7384, "step": 291480 }, { "epoch": 0.5888282421005426, "grad_norm": 39.77167510986328, "learning_rate": 4.4256610014450465e-06, "loss": 8.5918, "step": 291490 }, { "epoch": 0.5888484427332265, "grad_norm": 216.14683532714844, "learning_rate": 4.425314247535668e-06, "loss": 27.5087, "step": 291500 }, { "epoch": 0.5888686433659103, "grad_norm": 747.41552734375, "learning_rate": 4.4249674964272365e-06, "loss": 20.6558, "step": 291510 }, { "epoch": 0.5888888439985941, "grad_norm": 413.9656982421875, "learning_rate": 4.424620748121443e-06, "loss": 9.3693, "step": 291520 }, { "epoch": 0.5889090446312778, "grad_norm": 226.15438842773438, "learning_rate": 4.42427400261998e-06, "loss": 15.4582, "step": 291530 }, { "epoch": 0.5889292452639616, "grad_norm": 177.61599731445312, "learning_rate": 4.423927259924535e-06, "loss": 13.591, "step": 291540 }, { "epoch": 0.5889494458966454, "grad_norm": 375.0335693359375, "learning_rate": 4.423580520036797e-06, "loss": 16.3396, "step": 291550 }, { "epoch": 0.5889696465293293, "grad_norm": 354.91650390625, "learning_rate": 4.423233782958459e-06, "loss": 20.1236, "step": 291560 }, { "epoch": 0.5889898471620131, "grad_norm": 685.5487060546875, "learning_rate": 4.42288704869121e-06, "loss": 22.85, "step": 291570 }, { "epoch": 0.5890100477946969, "grad_norm": 615.638671875, "learning_rate": 4.422540317236739e-06, "loss": 32.7813, "step": 291580 }, { "epoch": 0.5890302484273807, "grad_norm": 283.7459411621094, "learning_rate": 4.422193588596736e-06, "loss": 16.7828, "step": 291590 }, { "epoch": 0.5890504490600645, "grad_norm": 325.37493896484375, "learning_rate": 4.4218468627728935e-06, "loss": 19.9842, "step": 291600 }, { "epoch": 0.5890706496927484, "grad_norm": 72.0322265625, "learning_rate": 4.421500139766897e-06, "loss": 14.3139, "step": 291610 }, { "epoch": 0.5890908503254322, "grad_norm": 136.69898986816406, "learning_rate": 4.4211534195804385e-06, "loss": 13.7135, "step": 291620 }, { "epoch": 0.589111050958116, "grad_norm": 398.2770080566406, "learning_rate": 4.420806702215211e-06, "loss": 16.5902, "step": 291630 }, { "epoch": 0.5891312515907998, "grad_norm": 61.03514862060547, "learning_rate": 4.4204599876728975e-06, "loss": 38.1336, "step": 291640 }, { "epoch": 0.5891514522234836, "grad_norm": 430.0029296875, "learning_rate": 4.420113275955193e-06, "loss": 23.032, "step": 291650 }, { "epoch": 0.5891716528561675, "grad_norm": 668.921630859375, "learning_rate": 4.419766567063788e-06, "loss": 22.493, "step": 291660 }, { "epoch": 0.5891918534888513, "grad_norm": 414.0310974121094, "learning_rate": 4.419419861000369e-06, "loss": 21.329, "step": 291670 }, { "epoch": 0.5892120541215351, "grad_norm": 328.0341491699219, "learning_rate": 4.419073157766626e-06, "loss": 22.7492, "step": 291680 }, { "epoch": 0.5892322547542189, "grad_norm": 271.34423828125, "learning_rate": 4.418726457364252e-06, "loss": 15.9894, "step": 291690 }, { "epoch": 0.5892524553869027, "grad_norm": 133.44625854492188, "learning_rate": 4.418379759794934e-06, "loss": 20.1859, "step": 291700 }, { "epoch": 0.5892726560195866, "grad_norm": 729.990966796875, "learning_rate": 4.418033065060361e-06, "loss": 29.3806, "step": 291710 }, { "epoch": 0.5892928566522704, "grad_norm": 140.03456115722656, "learning_rate": 4.417686373162225e-06, "loss": 12.5217, "step": 291720 }, { "epoch": 0.5893130572849542, "grad_norm": 374.0782775878906, "learning_rate": 4.417339684102217e-06, "loss": 26.505, "step": 291730 }, { "epoch": 0.589333257917638, "grad_norm": 452.8331604003906, "learning_rate": 4.416992997882023e-06, "loss": 16.9886, "step": 291740 }, { "epoch": 0.5893534585503218, "grad_norm": 295.45562744140625, "learning_rate": 4.416646314503334e-06, "loss": 22.808, "step": 291750 }, { "epoch": 0.5893736591830057, "grad_norm": 583.7633056640625, "learning_rate": 4.416299633967842e-06, "loss": 29.0666, "step": 291760 }, { "epoch": 0.5893938598156895, "grad_norm": 690.9365234375, "learning_rate": 4.415952956277234e-06, "loss": 17.5846, "step": 291770 }, { "epoch": 0.5894140604483732, "grad_norm": 227.87814331054688, "learning_rate": 4.415606281433199e-06, "loss": 32.6798, "step": 291780 }, { "epoch": 0.589434261081057, "grad_norm": 232.24734497070312, "learning_rate": 4.415259609437431e-06, "loss": 14.3116, "step": 291790 }, { "epoch": 0.5894544617137408, "grad_norm": 382.23486328125, "learning_rate": 4.414912940291614e-06, "loss": 22.9699, "step": 291800 }, { "epoch": 0.5894746623464246, "grad_norm": 479.1570129394531, "learning_rate": 4.414566273997441e-06, "loss": 22.8322, "step": 291810 }, { "epoch": 0.5894948629791085, "grad_norm": 260.9040832519531, "learning_rate": 4.414219610556601e-06, "loss": 16.0159, "step": 291820 }, { "epoch": 0.5895150636117923, "grad_norm": 351.2622985839844, "learning_rate": 4.413872949970785e-06, "loss": 13.9373, "step": 291830 }, { "epoch": 0.5895352642444761, "grad_norm": 338.8872985839844, "learning_rate": 4.413526292241679e-06, "loss": 16.4922, "step": 291840 }, { "epoch": 0.5895554648771599, "grad_norm": 218.44630432128906, "learning_rate": 4.413179637370977e-06, "loss": 16.8497, "step": 291850 }, { "epoch": 0.5895756655098437, "grad_norm": 327.5431823730469, "learning_rate": 4.412832985360363e-06, "loss": 26.4961, "step": 291860 }, { "epoch": 0.5895958661425276, "grad_norm": 166.36444091796875, "learning_rate": 4.412486336211531e-06, "loss": 8.5061, "step": 291870 }, { "epoch": 0.5896160667752114, "grad_norm": 440.8267517089844, "learning_rate": 4.412139689926171e-06, "loss": 28.1508, "step": 291880 }, { "epoch": 0.5896362674078952, "grad_norm": 405.8255310058594, "learning_rate": 4.41179304650597e-06, "loss": 8.3289, "step": 291890 }, { "epoch": 0.589656468040579, "grad_norm": 469.4025573730469, "learning_rate": 4.4114464059526185e-06, "loss": 13.9428, "step": 291900 }, { "epoch": 0.5896766686732628, "grad_norm": 341.0938415527344, "learning_rate": 4.4110997682678056e-06, "loss": 11.3571, "step": 291910 }, { "epoch": 0.5896968693059467, "grad_norm": 256.1963806152344, "learning_rate": 4.410753133453222e-06, "loss": 8.6076, "step": 291920 }, { "epoch": 0.5897170699386305, "grad_norm": 234.28823852539062, "learning_rate": 4.410406501510554e-06, "loss": 34.8677, "step": 291930 }, { "epoch": 0.5897372705713143, "grad_norm": 345.32879638671875, "learning_rate": 4.410059872441494e-06, "loss": 22.7719, "step": 291940 }, { "epoch": 0.5897574712039981, "grad_norm": 347.39068603515625, "learning_rate": 4.409713246247732e-06, "loss": 20.8243, "step": 291950 }, { "epoch": 0.589777671836682, "grad_norm": 381.36431884765625, "learning_rate": 4.409366622930955e-06, "loss": 18.0616, "step": 291960 }, { "epoch": 0.5897978724693658, "grad_norm": 341.75372314453125, "learning_rate": 4.409020002492854e-06, "loss": 14.8532, "step": 291970 }, { "epoch": 0.5898180731020496, "grad_norm": 1875.607177734375, "learning_rate": 4.4086733849351174e-06, "loss": 29.2385, "step": 291980 }, { "epoch": 0.5898382737347334, "grad_norm": 302.335693359375, "learning_rate": 4.408326770259435e-06, "loss": 12.9311, "step": 291990 }, { "epoch": 0.5898584743674172, "grad_norm": 287.7569274902344, "learning_rate": 4.4079801584674955e-06, "loss": 20.1383, "step": 292000 }, { "epoch": 0.589878675000101, "grad_norm": 484.9185791015625, "learning_rate": 4.407633549560991e-06, "loss": 18.7982, "step": 292010 }, { "epoch": 0.5898988756327849, "grad_norm": 401.7894287109375, "learning_rate": 4.407286943541606e-06, "loss": 14.3355, "step": 292020 }, { "epoch": 0.5899190762654687, "grad_norm": 468.5331115722656, "learning_rate": 4.406940340411034e-06, "loss": 25.4369, "step": 292030 }, { "epoch": 0.5899392768981524, "grad_norm": 231.7653350830078, "learning_rate": 4.406593740170963e-06, "loss": 12.8763, "step": 292040 }, { "epoch": 0.5899594775308362, "grad_norm": 547.8223876953125, "learning_rate": 4.406247142823082e-06, "loss": 14.7608, "step": 292050 }, { "epoch": 0.58997967816352, "grad_norm": 387.529541015625, "learning_rate": 4.4059005483690805e-06, "loss": 23.933, "step": 292060 }, { "epoch": 0.5899998787962039, "grad_norm": 193.5162353515625, "learning_rate": 4.405553956810646e-06, "loss": 21.2242, "step": 292070 }, { "epoch": 0.5900200794288877, "grad_norm": 779.8995361328125, "learning_rate": 4.405207368149472e-06, "loss": 23.684, "step": 292080 }, { "epoch": 0.5900402800615715, "grad_norm": 418.9433288574219, "learning_rate": 4.404860782387243e-06, "loss": 32.1124, "step": 292090 }, { "epoch": 0.5900604806942553, "grad_norm": 167.64556884765625, "learning_rate": 4.404514199525651e-06, "loss": 18.7874, "step": 292100 }, { "epoch": 0.5900806813269391, "grad_norm": 239.8409423828125, "learning_rate": 4.404167619566386e-06, "loss": 18.2346, "step": 292110 }, { "epoch": 0.590100881959623, "grad_norm": 13.00391674041748, "learning_rate": 4.403821042511135e-06, "loss": 16.5054, "step": 292120 }, { "epoch": 0.5901210825923068, "grad_norm": 2.531355142593384, "learning_rate": 4.403474468361587e-06, "loss": 17.1285, "step": 292130 }, { "epoch": 0.5901412832249906, "grad_norm": 568.1395874023438, "learning_rate": 4.4031278971194335e-06, "loss": 19.5606, "step": 292140 }, { "epoch": 0.5901614838576744, "grad_norm": 248.3144989013672, "learning_rate": 4.402781328786361e-06, "loss": 10.4688, "step": 292150 }, { "epoch": 0.5901816844903582, "grad_norm": 65.69276428222656, "learning_rate": 4.402434763364059e-06, "loss": 22.1509, "step": 292160 }, { "epoch": 0.5902018851230421, "grad_norm": 328.47430419921875, "learning_rate": 4.4020882008542185e-06, "loss": 16.2797, "step": 292170 }, { "epoch": 0.5902220857557259, "grad_norm": 91.8713150024414, "learning_rate": 4.401741641258529e-06, "loss": 28.9528, "step": 292180 }, { "epoch": 0.5902422863884097, "grad_norm": 318.346435546875, "learning_rate": 4.401395084578677e-06, "loss": 22.6256, "step": 292190 }, { "epoch": 0.5902624870210935, "grad_norm": 142.5501251220703, "learning_rate": 4.401048530816353e-06, "loss": 14.0909, "step": 292200 }, { "epoch": 0.5902826876537773, "grad_norm": 382.1142272949219, "learning_rate": 4.4007019799732465e-06, "loss": 20.4528, "step": 292210 }, { "epoch": 0.5903028882864612, "grad_norm": 438.4935302734375, "learning_rate": 4.400355432051044e-06, "loss": 24.4151, "step": 292220 }, { "epoch": 0.590323088919145, "grad_norm": 346.56231689453125, "learning_rate": 4.400008887051437e-06, "loss": 32.0562, "step": 292230 }, { "epoch": 0.5903432895518288, "grad_norm": 263.6671447753906, "learning_rate": 4.399662344976116e-06, "loss": 12.4963, "step": 292240 }, { "epoch": 0.5903634901845126, "grad_norm": 294.6814880371094, "learning_rate": 4.399315805826765e-06, "loss": 15.2193, "step": 292250 }, { "epoch": 0.5903836908171964, "grad_norm": 688.6610717773438, "learning_rate": 4.398969269605077e-06, "loss": 15.6875, "step": 292260 }, { "epoch": 0.5904038914498803, "grad_norm": 625.572021484375, "learning_rate": 4.398622736312741e-06, "loss": 37.7061, "step": 292270 }, { "epoch": 0.5904240920825641, "grad_norm": 464.1097412109375, "learning_rate": 4.398276205951443e-06, "loss": 26.6318, "step": 292280 }, { "epoch": 0.5904442927152478, "grad_norm": 963.5963134765625, "learning_rate": 4.3979296785228744e-06, "loss": 15.4111, "step": 292290 }, { "epoch": 0.5904644933479316, "grad_norm": 259.0113830566406, "learning_rate": 4.397583154028725e-06, "loss": 15.5287, "step": 292300 }, { "epoch": 0.5904846939806154, "grad_norm": 517.06005859375, "learning_rate": 4.397236632470681e-06, "loss": 22.2305, "step": 292310 }, { "epoch": 0.5905048946132992, "grad_norm": 461.43206787109375, "learning_rate": 4.3968901138504315e-06, "loss": 14.02, "step": 292320 }, { "epoch": 0.5905250952459831, "grad_norm": 482.19598388671875, "learning_rate": 4.396543598169667e-06, "loss": 12.3937, "step": 292330 }, { "epoch": 0.5905452958786669, "grad_norm": 509.9044189453125, "learning_rate": 4.3961970854300774e-06, "loss": 22.6853, "step": 292340 }, { "epoch": 0.5905654965113507, "grad_norm": 570.2136840820312, "learning_rate": 4.395850575633348e-06, "loss": 19.9869, "step": 292350 }, { "epoch": 0.5905856971440345, "grad_norm": 501.50726318359375, "learning_rate": 4.395504068781171e-06, "loss": 26.7932, "step": 292360 }, { "epoch": 0.5906058977767183, "grad_norm": 539.0986328125, "learning_rate": 4.3951575648752346e-06, "loss": 28.5295, "step": 292370 }, { "epoch": 0.5906260984094022, "grad_norm": 197.9573211669922, "learning_rate": 4.394811063917225e-06, "loss": 24.7447, "step": 292380 }, { "epoch": 0.590646299042086, "grad_norm": 216.0475616455078, "learning_rate": 4.394464565908832e-06, "loss": 15.1393, "step": 292390 }, { "epoch": 0.5906664996747698, "grad_norm": 563.8461303710938, "learning_rate": 4.394118070851749e-06, "loss": 34.5846, "step": 292400 }, { "epoch": 0.5906867003074536, "grad_norm": 1089.698486328125, "learning_rate": 4.3937715787476576e-06, "loss": 14.3999, "step": 292410 }, { "epoch": 0.5907069009401374, "grad_norm": 134.1151580810547, "learning_rate": 4.393425089598251e-06, "loss": 17.3636, "step": 292420 }, { "epoch": 0.5907271015728213, "grad_norm": 266.62481689453125, "learning_rate": 4.393078603405218e-06, "loss": 25.2376, "step": 292430 }, { "epoch": 0.5907473022055051, "grad_norm": 279.544189453125, "learning_rate": 4.392732120170245e-06, "loss": 15.8477, "step": 292440 }, { "epoch": 0.5907675028381889, "grad_norm": 489.8971252441406, "learning_rate": 4.392385639895022e-06, "loss": 18.6997, "step": 292450 }, { "epoch": 0.5907877034708727, "grad_norm": 110.44843292236328, "learning_rate": 4.392039162581239e-06, "loss": 10.3079, "step": 292460 }, { "epoch": 0.5908079041035565, "grad_norm": 130.7806396484375, "learning_rate": 4.391692688230583e-06, "loss": 17.9064, "step": 292470 }, { "epoch": 0.5908281047362404, "grad_norm": 449.1365966796875, "learning_rate": 4.391346216844741e-06, "loss": 18.5721, "step": 292480 }, { "epoch": 0.5908483053689242, "grad_norm": 174.7170867919922, "learning_rate": 4.390999748425405e-06, "loss": 17.5537, "step": 292490 }, { "epoch": 0.590868506001608, "grad_norm": 488.6687927246094, "learning_rate": 4.390653282974264e-06, "loss": 15.9013, "step": 292500 }, { "epoch": 0.5908887066342918, "grad_norm": 643.6644287109375, "learning_rate": 4.390306820493003e-06, "loss": 34.1992, "step": 292510 }, { "epoch": 0.5909089072669756, "grad_norm": 484.48309326171875, "learning_rate": 4.389960360983313e-06, "loss": 26.0471, "step": 292520 }, { "epoch": 0.5909291078996595, "grad_norm": 364.2028503417969, "learning_rate": 4.3896139044468835e-06, "loss": 10.2862, "step": 292530 }, { "epoch": 0.5909493085323433, "grad_norm": 269.10894775390625, "learning_rate": 4.389267450885399e-06, "loss": 9.8183, "step": 292540 }, { "epoch": 0.590969509165027, "grad_norm": 162.6874542236328, "learning_rate": 4.388921000300553e-06, "loss": 18.3012, "step": 292550 }, { "epoch": 0.5909897097977108, "grad_norm": 451.3988342285156, "learning_rate": 4.388574552694032e-06, "loss": 23.3555, "step": 292560 }, { "epoch": 0.5910099104303946, "grad_norm": 204.1512451171875, "learning_rate": 4.3882281080675234e-06, "loss": 12.2363, "step": 292570 }, { "epoch": 0.5910301110630785, "grad_norm": 255.6993865966797, "learning_rate": 4.387881666422718e-06, "loss": 13.7673, "step": 292580 }, { "epoch": 0.5910503116957623, "grad_norm": 717.266357421875, "learning_rate": 4.387535227761303e-06, "loss": 23.6473, "step": 292590 }, { "epoch": 0.5910705123284461, "grad_norm": 508.82684326171875, "learning_rate": 4.387188792084967e-06, "loss": 27.529, "step": 292600 }, { "epoch": 0.5910907129611299, "grad_norm": 211.74488830566406, "learning_rate": 4.386842359395396e-06, "loss": 15.0736, "step": 292610 }, { "epoch": 0.5911109135938137, "grad_norm": 214.7193603515625, "learning_rate": 4.3864959296942835e-06, "loss": 27.5348, "step": 292620 }, { "epoch": 0.5911311142264976, "grad_norm": 900.5426025390625, "learning_rate": 4.386149502983316e-06, "loss": 29.8566, "step": 292630 }, { "epoch": 0.5911513148591814, "grad_norm": 853.7367553710938, "learning_rate": 4.38580307926418e-06, "loss": 34.2588, "step": 292640 }, { "epoch": 0.5911715154918652, "grad_norm": 826.5645751953125, "learning_rate": 4.385456658538565e-06, "loss": 28.2878, "step": 292650 }, { "epoch": 0.591191716124549, "grad_norm": 290.5691833496094, "learning_rate": 4.385110240808161e-06, "loss": 19.5628, "step": 292660 }, { "epoch": 0.5912119167572328, "grad_norm": 705.2667236328125, "learning_rate": 4.384763826074655e-06, "loss": 16.3966, "step": 292670 }, { "epoch": 0.5912321173899167, "grad_norm": 506.11907958984375, "learning_rate": 4.384417414339734e-06, "loss": 13.8883, "step": 292680 }, { "epoch": 0.5912523180226005, "grad_norm": 436.683837890625, "learning_rate": 4.38407100560509e-06, "loss": 15.7811, "step": 292690 }, { "epoch": 0.5912725186552843, "grad_norm": 288.6622009277344, "learning_rate": 4.383724599872407e-06, "loss": 24.5138, "step": 292700 }, { "epoch": 0.5912927192879681, "grad_norm": 307.366455078125, "learning_rate": 4.383378197143376e-06, "loss": 28.7145, "step": 292710 }, { "epoch": 0.5913129199206519, "grad_norm": 119.44100189208984, "learning_rate": 4.3830317974196864e-06, "loss": 12.5694, "step": 292720 }, { "epoch": 0.5913331205533358, "grad_norm": 551.1475219726562, "learning_rate": 4.382685400703024e-06, "loss": 16.1562, "step": 292730 }, { "epoch": 0.5913533211860196, "grad_norm": 293.0324401855469, "learning_rate": 4.382339006995078e-06, "loss": 25.7829, "step": 292740 }, { "epoch": 0.5913735218187034, "grad_norm": 155.7109832763672, "learning_rate": 4.381992616297538e-06, "loss": 24.7547, "step": 292750 }, { "epoch": 0.5913937224513872, "grad_norm": 279.54583740234375, "learning_rate": 4.38164622861209e-06, "loss": 20.3726, "step": 292760 }, { "epoch": 0.591413923084071, "grad_norm": 325.4188537597656, "learning_rate": 4.381299843940421e-06, "loss": 19.2596, "step": 292770 }, { "epoch": 0.5914341237167549, "grad_norm": 233.92567443847656, "learning_rate": 4.3809534622842245e-06, "loss": 34.5578, "step": 292780 }, { "epoch": 0.5914543243494387, "grad_norm": 247.04754638671875, "learning_rate": 4.380607083645185e-06, "loss": 19.3653, "step": 292790 }, { "epoch": 0.5914745249821224, "grad_norm": 258.3706970214844, "learning_rate": 4.380260708024991e-06, "loss": 18.3587, "step": 292800 }, { "epoch": 0.5914947256148062, "grad_norm": 388.7332458496094, "learning_rate": 4.379914335425332e-06, "loss": 17.6423, "step": 292810 }, { "epoch": 0.59151492624749, "grad_norm": 640.5714721679688, "learning_rate": 4.379567965847896e-06, "loss": 15.7696, "step": 292820 }, { "epoch": 0.5915351268801738, "grad_norm": 389.6690979003906, "learning_rate": 4.379221599294369e-06, "loss": 28.9675, "step": 292830 }, { "epoch": 0.5915553275128577, "grad_norm": 354.93170166015625, "learning_rate": 4.37887523576644e-06, "loss": 21.2728, "step": 292840 }, { "epoch": 0.5915755281455415, "grad_norm": 502.6838684082031, "learning_rate": 4.378528875265801e-06, "loss": 23.8819, "step": 292850 }, { "epoch": 0.5915957287782253, "grad_norm": 413.7265930175781, "learning_rate": 4.378182517794133e-06, "loss": 12.2479, "step": 292860 }, { "epoch": 0.5916159294109091, "grad_norm": 2.227445125579834, "learning_rate": 4.3778361633531296e-06, "loss": 15.8987, "step": 292870 }, { "epoch": 0.591636130043593, "grad_norm": 563.5757446289062, "learning_rate": 4.377489811944478e-06, "loss": 27.6906, "step": 292880 }, { "epoch": 0.5916563306762768, "grad_norm": 156.43028259277344, "learning_rate": 4.377143463569865e-06, "loss": 24.4495, "step": 292890 }, { "epoch": 0.5916765313089606, "grad_norm": 467.7649230957031, "learning_rate": 4.376797118230978e-06, "loss": 17.7552, "step": 292900 }, { "epoch": 0.5916967319416444, "grad_norm": 18.85779571533203, "learning_rate": 4.37645077592951e-06, "loss": 13.7122, "step": 292910 }, { "epoch": 0.5917169325743282, "grad_norm": 344.03594970703125, "learning_rate": 4.376104436667142e-06, "loss": 17.1261, "step": 292920 }, { "epoch": 0.591737133207012, "grad_norm": 419.68511962890625, "learning_rate": 4.375758100445564e-06, "loss": 23.6224, "step": 292930 }, { "epoch": 0.5917573338396959, "grad_norm": 208.86512756347656, "learning_rate": 4.375411767266468e-06, "loss": 10.4842, "step": 292940 }, { "epoch": 0.5917775344723797, "grad_norm": 637.0712890625, "learning_rate": 4.375065437131539e-06, "loss": 37.4901, "step": 292950 }, { "epoch": 0.5917977351050635, "grad_norm": 584.8602905273438, "learning_rate": 4.374719110042465e-06, "loss": 24.5428, "step": 292960 }, { "epoch": 0.5918179357377473, "grad_norm": 291.6265563964844, "learning_rate": 4.374372786000934e-06, "loss": 32.9707, "step": 292970 }, { "epoch": 0.5918381363704311, "grad_norm": 3.442250967025757, "learning_rate": 4.374026465008634e-06, "loss": 47.2955, "step": 292980 }, { "epoch": 0.591858337003115, "grad_norm": 542.1782836914062, "learning_rate": 4.373680147067254e-06, "loss": 22.1078, "step": 292990 }, { "epoch": 0.5918785376357988, "grad_norm": 143.48858642578125, "learning_rate": 4.373333832178478e-06, "loss": 20.4554, "step": 293000 }, { "epoch": 0.5918987382684826, "grad_norm": 401.704345703125, "learning_rate": 4.372987520344002e-06, "loss": 11.1024, "step": 293010 }, { "epoch": 0.5919189389011664, "grad_norm": 41.04372024536133, "learning_rate": 4.3726412115655046e-06, "loss": 18.1149, "step": 293020 }, { "epoch": 0.5919391395338502, "grad_norm": 371.2960510253906, "learning_rate": 4.372294905844679e-06, "loss": 27.4418, "step": 293030 }, { "epoch": 0.5919593401665341, "grad_norm": 292.4776916503906, "learning_rate": 4.371948603183213e-06, "loss": 23.4645, "step": 293040 }, { "epoch": 0.5919795407992179, "grad_norm": 787.6366577148438, "learning_rate": 4.371602303582792e-06, "loss": 18.1713, "step": 293050 }, { "epoch": 0.5919997414319016, "grad_norm": 218.78627014160156, "learning_rate": 4.3712560070451055e-06, "loss": 10.6186, "step": 293060 }, { "epoch": 0.5920199420645854, "grad_norm": 202.3785858154297, "learning_rate": 4.3709097135718395e-06, "loss": 16.0047, "step": 293070 }, { "epoch": 0.5920401426972692, "grad_norm": 400.23284912109375, "learning_rate": 4.370563423164687e-06, "loss": 16.0984, "step": 293080 }, { "epoch": 0.592060343329953, "grad_norm": 400.9034729003906, "learning_rate": 4.370217135825329e-06, "loss": 26.1754, "step": 293090 }, { "epoch": 0.5920805439626369, "grad_norm": 914.625, "learning_rate": 4.369870851555457e-06, "loss": 28.7863, "step": 293100 }, { "epoch": 0.5921007445953207, "grad_norm": 603.1434936523438, "learning_rate": 4.369524570356759e-06, "loss": 25.2377, "step": 293110 }, { "epoch": 0.5921209452280045, "grad_norm": 254.59913635253906, "learning_rate": 4.369178292230921e-06, "loss": 11.5563, "step": 293120 }, { "epoch": 0.5921411458606883, "grad_norm": 118.04940795898438, "learning_rate": 4.368832017179631e-06, "loss": 24.2292, "step": 293130 }, { "epoch": 0.5921613464933722, "grad_norm": 122.41696166992188, "learning_rate": 4.368485745204579e-06, "loss": 15.2559, "step": 293140 }, { "epoch": 0.592181547126056, "grad_norm": 782.8267211914062, "learning_rate": 4.3681394763074495e-06, "loss": 33.4571, "step": 293150 }, { "epoch": 0.5922017477587398, "grad_norm": 564.6712036132812, "learning_rate": 4.36779321048993e-06, "loss": 16.5709, "step": 293160 }, { "epoch": 0.5922219483914236, "grad_norm": 537.654541015625, "learning_rate": 4.367446947753712e-06, "loss": 16.9303, "step": 293170 }, { "epoch": 0.5922421490241074, "grad_norm": 601.1361694335938, "learning_rate": 4.36710068810048e-06, "loss": 22.0937, "step": 293180 }, { "epoch": 0.5922623496567913, "grad_norm": 832.7262573242188, "learning_rate": 4.366754431531923e-06, "loss": 13.957, "step": 293190 }, { "epoch": 0.5922825502894751, "grad_norm": 318.0546875, "learning_rate": 4.366408178049728e-06, "loss": 13.1061, "step": 293200 }, { "epoch": 0.5923027509221589, "grad_norm": 454.3702087402344, "learning_rate": 4.366061927655582e-06, "loss": 21.6157, "step": 293210 }, { "epoch": 0.5923229515548427, "grad_norm": 170.03672790527344, "learning_rate": 4.3657156803511745e-06, "loss": 17.3532, "step": 293220 }, { "epoch": 0.5923431521875265, "grad_norm": 299.2825927734375, "learning_rate": 4.3653694361381894e-06, "loss": 12.3919, "step": 293230 }, { "epoch": 0.5923633528202104, "grad_norm": 180.69033813476562, "learning_rate": 4.365023195018319e-06, "loss": 23.5594, "step": 293240 }, { "epoch": 0.5923835534528942, "grad_norm": 471.05535888671875, "learning_rate": 4.3646769569932475e-06, "loss": 27.935, "step": 293250 }, { "epoch": 0.592403754085578, "grad_norm": 272.734130859375, "learning_rate": 4.364330722064664e-06, "loss": 17.1042, "step": 293260 }, { "epoch": 0.5924239547182618, "grad_norm": 185.55250549316406, "learning_rate": 4.363984490234257e-06, "loss": 19.1317, "step": 293270 }, { "epoch": 0.5924441553509456, "grad_norm": 324.3248291015625, "learning_rate": 4.36363826150371e-06, "loss": 12.6496, "step": 293280 }, { "epoch": 0.5924643559836295, "grad_norm": 113.91188049316406, "learning_rate": 4.3632920358747125e-06, "loss": 12.1835, "step": 293290 }, { "epoch": 0.5924845566163133, "grad_norm": 245.42926025390625, "learning_rate": 4.362945813348956e-06, "loss": 22.5566, "step": 293300 }, { "epoch": 0.5925047572489971, "grad_norm": 617.8198852539062, "learning_rate": 4.36259959392812e-06, "loss": 28.8473, "step": 293310 }, { "epoch": 0.5925249578816808, "grad_norm": 411.3521423339844, "learning_rate": 4.3622533776138985e-06, "loss": 22.5069, "step": 293320 }, { "epoch": 0.5925451585143646, "grad_norm": 479.8760070800781, "learning_rate": 4.361907164407977e-06, "loss": 27.9318, "step": 293330 }, { "epoch": 0.5925653591470484, "grad_norm": 671.952392578125, "learning_rate": 4.361560954312042e-06, "loss": 12.5237, "step": 293340 }, { "epoch": 0.5925855597797323, "grad_norm": 266.8735656738281, "learning_rate": 4.361214747327781e-06, "loss": 10.6885, "step": 293350 }, { "epoch": 0.5926057604124161, "grad_norm": 1242.9803466796875, "learning_rate": 4.360868543456883e-06, "loss": 29.3166, "step": 293360 }, { "epoch": 0.5926259610450999, "grad_norm": 262.1438903808594, "learning_rate": 4.360522342701033e-06, "loss": 22.7987, "step": 293370 }, { "epoch": 0.5926461616777837, "grad_norm": 17.517539978027344, "learning_rate": 4.360176145061919e-06, "loss": 20.8275, "step": 293380 }, { "epoch": 0.5926663623104675, "grad_norm": 324.86761474609375, "learning_rate": 4.35982995054123e-06, "loss": 15.8237, "step": 293390 }, { "epoch": 0.5926865629431514, "grad_norm": 419.5167236328125, "learning_rate": 4.359483759140654e-06, "loss": 29.5502, "step": 293400 }, { "epoch": 0.5927067635758352, "grad_norm": 268.809326171875, "learning_rate": 4.359137570861874e-06, "loss": 12.0043, "step": 293410 }, { "epoch": 0.592726964208519, "grad_norm": 126.4826431274414, "learning_rate": 4.35879138570658e-06, "loss": 17.419, "step": 293420 }, { "epoch": 0.5927471648412028, "grad_norm": 553.036865234375, "learning_rate": 4.35844520367646e-06, "loss": 22.4381, "step": 293430 }, { "epoch": 0.5927673654738866, "grad_norm": 197.84173583984375, "learning_rate": 4.358099024773199e-06, "loss": 32.0205, "step": 293440 }, { "epoch": 0.5927875661065705, "grad_norm": 254.42730712890625, "learning_rate": 4.357752848998486e-06, "loss": 17.9257, "step": 293450 }, { "epoch": 0.5928077667392543, "grad_norm": 474.2779235839844, "learning_rate": 4.357406676354009e-06, "loss": 20.7048, "step": 293460 }, { "epoch": 0.5928279673719381, "grad_norm": 229.4510955810547, "learning_rate": 4.357060506841452e-06, "loss": 14.3312, "step": 293470 }, { "epoch": 0.5928481680046219, "grad_norm": 218.31007385253906, "learning_rate": 4.356714340462505e-06, "loss": 14.5799, "step": 293480 }, { "epoch": 0.5928683686373057, "grad_norm": 120.61658477783203, "learning_rate": 4.356368177218855e-06, "loss": 20.3388, "step": 293490 }, { "epoch": 0.5928885692699896, "grad_norm": 566.609619140625, "learning_rate": 4.356022017112187e-06, "loss": 15.9435, "step": 293500 }, { "epoch": 0.5929087699026734, "grad_norm": 252.36489868164062, "learning_rate": 4.35567586014419e-06, "loss": 13.5621, "step": 293510 }, { "epoch": 0.5929289705353572, "grad_norm": 575.0886840820312, "learning_rate": 4.355329706316552e-06, "loss": 27.5622, "step": 293520 }, { "epoch": 0.592949171168041, "grad_norm": 577.52490234375, "learning_rate": 4.354983555630957e-06, "loss": 28.4473, "step": 293530 }, { "epoch": 0.5929693718007248, "grad_norm": 69.46257019042969, "learning_rate": 4.354637408089093e-06, "loss": 13.2063, "step": 293540 }, { "epoch": 0.5929895724334087, "grad_norm": 408.7625732421875, "learning_rate": 4.35429126369265e-06, "loss": 18.4322, "step": 293550 }, { "epoch": 0.5930097730660925, "grad_norm": 450.35546875, "learning_rate": 4.353945122443314e-06, "loss": 22.1054, "step": 293560 }, { "epoch": 0.5930299736987762, "grad_norm": 386.18634033203125, "learning_rate": 4.3535989843427695e-06, "loss": 16.7074, "step": 293570 }, { "epoch": 0.59305017433146, "grad_norm": 566.9544677734375, "learning_rate": 4.3532528493927055e-06, "loss": 21.7555, "step": 293580 }, { "epoch": 0.5930703749641438, "grad_norm": 207.8059844970703, "learning_rate": 4.352906717594809e-06, "loss": 13.8164, "step": 293590 }, { "epoch": 0.5930905755968277, "grad_norm": 539.943603515625, "learning_rate": 4.352560588950766e-06, "loss": 20.9404, "step": 293600 }, { "epoch": 0.5931107762295115, "grad_norm": 396.5697937011719, "learning_rate": 4.352214463462263e-06, "loss": 23.2138, "step": 293610 }, { "epoch": 0.5931309768621953, "grad_norm": 54.271324157714844, "learning_rate": 4.351868341130992e-06, "loss": 8.0321, "step": 293620 }, { "epoch": 0.5931511774948791, "grad_norm": 297.89630126953125, "learning_rate": 4.351522221958633e-06, "loss": 15.4077, "step": 293630 }, { "epoch": 0.5931713781275629, "grad_norm": 708.1715698242188, "learning_rate": 4.351176105946876e-06, "loss": 30.5469, "step": 293640 }, { "epoch": 0.5931915787602468, "grad_norm": 676.7525634765625, "learning_rate": 4.350829993097409e-06, "loss": 27.1389, "step": 293650 }, { "epoch": 0.5932117793929306, "grad_norm": 178.55630493164062, "learning_rate": 4.350483883411918e-06, "loss": 18.748, "step": 293660 }, { "epoch": 0.5932319800256144, "grad_norm": 358.53997802734375, "learning_rate": 4.350137776892089e-06, "loss": 15.7355, "step": 293670 }, { "epoch": 0.5932521806582982, "grad_norm": 176.16119384765625, "learning_rate": 4.349791673539609e-06, "loss": 24.2379, "step": 293680 }, { "epoch": 0.593272381290982, "grad_norm": 330.54815673828125, "learning_rate": 4.349445573356168e-06, "loss": 5.8712, "step": 293690 }, { "epoch": 0.5932925819236659, "grad_norm": 103.69247436523438, "learning_rate": 4.349099476343448e-06, "loss": 21.6638, "step": 293700 }, { "epoch": 0.5933127825563497, "grad_norm": 0.0001358857552986592, "learning_rate": 4.3487533825031395e-06, "loss": 41.3696, "step": 293710 }, { "epoch": 0.5933329831890335, "grad_norm": 274.2425842285156, "learning_rate": 4.348407291836928e-06, "loss": 22.3109, "step": 293720 }, { "epoch": 0.5933531838217173, "grad_norm": 250.2849578857422, "learning_rate": 4.3480612043465e-06, "loss": 12.1256, "step": 293730 }, { "epoch": 0.5933733844544011, "grad_norm": 43.5379753112793, "learning_rate": 4.347715120033543e-06, "loss": 27.7267, "step": 293740 }, { "epoch": 0.593393585087085, "grad_norm": 77.8458480834961, "learning_rate": 4.347369038899744e-06, "loss": 10.1903, "step": 293750 }, { "epoch": 0.5934137857197688, "grad_norm": 312.5565185546875, "learning_rate": 4.3470229609467875e-06, "loss": 13.7661, "step": 293760 }, { "epoch": 0.5934339863524526, "grad_norm": 589.7279052734375, "learning_rate": 4.346676886176361e-06, "loss": 26.557, "step": 293770 }, { "epoch": 0.5934541869851364, "grad_norm": 236.9444122314453, "learning_rate": 4.346330814590156e-06, "loss": 24.97, "step": 293780 }, { "epoch": 0.5934743876178202, "grad_norm": 679.8486328125, "learning_rate": 4.345984746189852e-06, "loss": 26.8117, "step": 293790 }, { "epoch": 0.593494588250504, "grad_norm": 509.91424560546875, "learning_rate": 4.34563868097714e-06, "loss": 21.3296, "step": 293800 }, { "epoch": 0.5935147888831879, "grad_norm": 497.14593505859375, "learning_rate": 4.3452926189537056e-06, "loss": 22.1593, "step": 293810 }, { "epoch": 0.5935349895158717, "grad_norm": 683.8970336914062, "learning_rate": 4.344946560121236e-06, "loss": 21.4249, "step": 293820 }, { "epoch": 0.5935551901485554, "grad_norm": 367.6731262207031, "learning_rate": 4.344600504481416e-06, "loss": 17.2732, "step": 293830 }, { "epoch": 0.5935753907812392, "grad_norm": 224.6978302001953, "learning_rate": 4.344254452035934e-06, "loss": 20.5409, "step": 293840 }, { "epoch": 0.593595591413923, "grad_norm": 494.1619567871094, "learning_rate": 4.343908402786478e-06, "loss": 17.6511, "step": 293850 }, { "epoch": 0.5936157920466069, "grad_norm": 355.9205627441406, "learning_rate": 4.343562356734732e-06, "loss": 11.0737, "step": 293860 }, { "epoch": 0.5936359926792907, "grad_norm": 320.394287109375, "learning_rate": 4.3432163138823826e-06, "loss": 36.4259, "step": 293870 }, { "epoch": 0.5936561933119745, "grad_norm": 359.1979675292969, "learning_rate": 4.34287027423112e-06, "loss": 17.2608, "step": 293880 }, { "epoch": 0.5936763939446583, "grad_norm": 204.18450927734375, "learning_rate": 4.342524237782625e-06, "loss": 10.8793, "step": 293890 }, { "epoch": 0.5936965945773421, "grad_norm": 608.7417602539062, "learning_rate": 4.342178204538588e-06, "loss": 15.9976, "step": 293900 }, { "epoch": 0.593716795210026, "grad_norm": 302.6972351074219, "learning_rate": 4.341832174500696e-06, "loss": 15.7724, "step": 293910 }, { "epoch": 0.5937369958427098, "grad_norm": 715.2201538085938, "learning_rate": 4.341486147670631e-06, "loss": 24.8976, "step": 293920 }, { "epoch": 0.5937571964753936, "grad_norm": 836.394775390625, "learning_rate": 4.341140124050085e-06, "loss": 15.729, "step": 293930 }, { "epoch": 0.5937773971080774, "grad_norm": 155.0250701904297, "learning_rate": 4.340794103640743e-06, "loss": 22.8357, "step": 293940 }, { "epoch": 0.5937975977407612, "grad_norm": 191.35006713867188, "learning_rate": 4.340448086444288e-06, "loss": 13.4048, "step": 293950 }, { "epoch": 0.5938177983734451, "grad_norm": 397.1959533691406, "learning_rate": 4.340102072462411e-06, "loss": 39.0802, "step": 293960 }, { "epoch": 0.5938379990061289, "grad_norm": 601.370361328125, "learning_rate": 4.339756061696796e-06, "loss": 39.2011, "step": 293970 }, { "epoch": 0.5938581996388127, "grad_norm": 276.6083984375, "learning_rate": 4.33941005414913e-06, "loss": 19.5583, "step": 293980 }, { "epoch": 0.5938784002714965, "grad_norm": 400.5001525878906, "learning_rate": 4.339064049821098e-06, "loss": 15.7781, "step": 293990 }, { "epoch": 0.5938986009041803, "grad_norm": 632.0048828125, "learning_rate": 4.3387180487143875e-06, "loss": 19.4148, "step": 294000 }, { "epoch": 0.5939188015368642, "grad_norm": 75.03510284423828, "learning_rate": 4.338372050830687e-06, "loss": 12.7211, "step": 294010 }, { "epoch": 0.593939002169548, "grad_norm": 412.6486511230469, "learning_rate": 4.3380260561716795e-06, "loss": 30.6878, "step": 294020 }, { "epoch": 0.5939592028022318, "grad_norm": 323.1822204589844, "learning_rate": 4.337680064739053e-06, "loss": 13.3549, "step": 294030 }, { "epoch": 0.5939794034349156, "grad_norm": 25.212068557739258, "learning_rate": 4.337334076534495e-06, "loss": 32.215, "step": 294040 }, { "epoch": 0.5939996040675994, "grad_norm": 437.5789489746094, "learning_rate": 4.336988091559688e-06, "loss": 18.3768, "step": 294050 }, { "epoch": 0.5940198047002833, "grad_norm": 297.6226806640625, "learning_rate": 4.3366421098163215e-06, "loss": 22.5547, "step": 294060 }, { "epoch": 0.5940400053329671, "grad_norm": 350.1910705566406, "learning_rate": 4.336296131306083e-06, "loss": 14.6618, "step": 294070 }, { "epoch": 0.5940602059656508, "grad_norm": 228.41531372070312, "learning_rate": 4.335950156030653e-06, "loss": 20.6949, "step": 294080 }, { "epoch": 0.5940804065983346, "grad_norm": 513.9437255859375, "learning_rate": 4.335604183991723e-06, "loss": 13.8732, "step": 294090 }, { "epoch": 0.5941006072310184, "grad_norm": 364.6128234863281, "learning_rate": 4.335258215190979e-06, "loss": 28.7425, "step": 294100 }, { "epoch": 0.5941208078637022, "grad_norm": 495.7064514160156, "learning_rate": 4.334912249630104e-06, "loss": 18.0418, "step": 294110 }, { "epoch": 0.5941410084963861, "grad_norm": 301.4275817871094, "learning_rate": 4.334566287310787e-06, "loss": 16.9719, "step": 294120 }, { "epoch": 0.5941612091290699, "grad_norm": 692.076416015625, "learning_rate": 4.334220328234711e-06, "loss": 20.4319, "step": 294130 }, { "epoch": 0.5941814097617537, "grad_norm": 390.8522644042969, "learning_rate": 4.333874372403569e-06, "loss": 17.7004, "step": 294140 }, { "epoch": 0.5942016103944375, "grad_norm": 545.078369140625, "learning_rate": 4.3335284198190385e-06, "loss": 20.4591, "step": 294150 }, { "epoch": 0.5942218110271213, "grad_norm": 110.72166442871094, "learning_rate": 4.33318247048281e-06, "loss": 18.9082, "step": 294160 }, { "epoch": 0.5942420116598052, "grad_norm": 375.0422668457031, "learning_rate": 4.332836524396571e-06, "loss": 14.5648, "step": 294170 }, { "epoch": 0.594262212292489, "grad_norm": 702.2772827148438, "learning_rate": 4.332490581562005e-06, "loss": 17.8529, "step": 294180 }, { "epoch": 0.5942824129251728, "grad_norm": 118.50646209716797, "learning_rate": 4.332144641980799e-06, "loss": 14.6945, "step": 294190 }, { "epoch": 0.5943026135578566, "grad_norm": 527.292236328125, "learning_rate": 4.331798705654639e-06, "loss": 35.8904, "step": 294200 }, { "epoch": 0.5943228141905404, "grad_norm": 257.5843811035156, "learning_rate": 4.331452772585212e-06, "loss": 16.7288, "step": 294210 }, { "epoch": 0.5943430148232243, "grad_norm": 155.10333251953125, "learning_rate": 4.3311068427742e-06, "loss": 28.4758, "step": 294220 }, { "epoch": 0.5943632154559081, "grad_norm": 193.52496337890625, "learning_rate": 4.330760916223297e-06, "loss": 24.1052, "step": 294230 }, { "epoch": 0.5943834160885919, "grad_norm": 369.0464172363281, "learning_rate": 4.33041499293418e-06, "loss": 13.5614, "step": 294240 }, { "epoch": 0.5944036167212757, "grad_norm": 40.26343536376953, "learning_rate": 4.33006907290854e-06, "loss": 13.6466, "step": 294250 }, { "epoch": 0.5944238173539595, "grad_norm": 196.87574768066406, "learning_rate": 4.329723156148064e-06, "loss": 19.1329, "step": 294260 }, { "epoch": 0.5944440179866434, "grad_norm": 245.2478485107422, "learning_rate": 4.3293772426544336e-06, "loss": 12.3665, "step": 294270 }, { "epoch": 0.5944642186193272, "grad_norm": 285.3468933105469, "learning_rate": 4.329031332429338e-06, "loss": 19.2312, "step": 294280 }, { "epoch": 0.594484419252011, "grad_norm": 89.25577545166016, "learning_rate": 4.328685425474462e-06, "loss": 17.1383, "step": 294290 }, { "epoch": 0.5945046198846948, "grad_norm": 230.0323486328125, "learning_rate": 4.328339521791493e-06, "loss": 28.7411, "step": 294300 }, { "epoch": 0.5945248205173786, "grad_norm": 319.6100158691406, "learning_rate": 4.327993621382115e-06, "loss": 15.9985, "step": 294310 }, { "epoch": 0.5945450211500625, "grad_norm": 63.36458969116211, "learning_rate": 4.327647724248014e-06, "loss": 18.7383, "step": 294320 }, { "epoch": 0.5945652217827463, "grad_norm": 1.0024418830871582, "learning_rate": 4.327301830390878e-06, "loss": 9.2964, "step": 294330 }, { "epoch": 0.59458542241543, "grad_norm": 668.4827880859375, "learning_rate": 4.32695593981239e-06, "loss": 28.4542, "step": 294340 }, { "epoch": 0.5946056230481138, "grad_norm": 564.9884033203125, "learning_rate": 4.326610052514238e-06, "loss": 14.9649, "step": 294350 }, { "epoch": 0.5946258236807976, "grad_norm": 358.91278076171875, "learning_rate": 4.326264168498106e-06, "loss": 14.0499, "step": 294360 }, { "epoch": 0.5946460243134815, "grad_norm": 235.9729461669922, "learning_rate": 4.325918287765682e-06, "loss": 17.5944, "step": 294370 }, { "epoch": 0.5946662249461653, "grad_norm": 16.25257682800293, "learning_rate": 4.325572410318648e-06, "loss": 18.7083, "step": 294380 }, { "epoch": 0.5946864255788491, "grad_norm": 360.6956481933594, "learning_rate": 4.325226536158696e-06, "loss": 30.3617, "step": 294390 }, { "epoch": 0.5947066262115329, "grad_norm": 248.47532653808594, "learning_rate": 4.3248806652875045e-06, "loss": 23.8472, "step": 294400 }, { "epoch": 0.5947268268442167, "grad_norm": 6.759785175323486, "learning_rate": 4.324534797706764e-06, "loss": 12.691, "step": 294410 }, { "epoch": 0.5947470274769006, "grad_norm": 445.4151916503906, "learning_rate": 4.32418893341816e-06, "loss": 15.8771, "step": 294420 }, { "epoch": 0.5947672281095844, "grad_norm": 374.858154296875, "learning_rate": 4.323843072423376e-06, "loss": 22.9934, "step": 294430 }, { "epoch": 0.5947874287422682, "grad_norm": 469.3614807128906, "learning_rate": 4.323497214724099e-06, "loss": 24.0278, "step": 294440 }, { "epoch": 0.594807629374952, "grad_norm": 648.9302978515625, "learning_rate": 4.323151360322014e-06, "loss": 14.9563, "step": 294450 }, { "epoch": 0.5948278300076358, "grad_norm": 381.16802978515625, "learning_rate": 4.32280550921881e-06, "loss": 11.6384, "step": 294460 }, { "epoch": 0.5948480306403197, "grad_norm": 357.280517578125, "learning_rate": 4.3224596614161666e-06, "loss": 16.645, "step": 294470 }, { "epoch": 0.5948682312730035, "grad_norm": 221.17393493652344, "learning_rate": 4.322113816915774e-06, "loss": 17.212, "step": 294480 }, { "epoch": 0.5948884319056873, "grad_norm": 515.7291259765625, "learning_rate": 4.321767975719317e-06, "loss": 27.4941, "step": 294490 }, { "epoch": 0.5949086325383711, "grad_norm": 434.7697448730469, "learning_rate": 4.321422137828479e-06, "loss": 13.8194, "step": 294500 }, { "epoch": 0.5949288331710549, "grad_norm": 470.9134521484375, "learning_rate": 4.321076303244948e-06, "loss": 10.7291, "step": 294510 }, { "epoch": 0.5949490338037388, "grad_norm": 41.78329849243164, "learning_rate": 4.320730471970409e-06, "loss": 20.9479, "step": 294520 }, { "epoch": 0.5949692344364226, "grad_norm": 477.39697265625, "learning_rate": 4.320384644006546e-06, "loss": 18.3437, "step": 294530 }, { "epoch": 0.5949894350691064, "grad_norm": 253.376220703125, "learning_rate": 4.320038819355047e-06, "loss": 17.6595, "step": 294540 }, { "epoch": 0.5950096357017902, "grad_norm": 38.193458557128906, "learning_rate": 4.319692998017597e-06, "loss": 19.5714, "step": 294550 }, { "epoch": 0.595029836334474, "grad_norm": 192.7274627685547, "learning_rate": 4.31934717999588e-06, "loss": 13.3706, "step": 294560 }, { "epoch": 0.5950500369671579, "grad_norm": 348.2456359863281, "learning_rate": 4.319001365291582e-06, "loss": 23.303, "step": 294570 }, { "epoch": 0.5950702375998417, "grad_norm": 609.5001220703125, "learning_rate": 4.31865555390639e-06, "loss": 24.8376, "step": 294580 }, { "epoch": 0.5950904382325255, "grad_norm": 475.0218200683594, "learning_rate": 4.318309745841987e-06, "loss": 20.352, "step": 294590 }, { "epoch": 0.5951106388652092, "grad_norm": 442.212158203125, "learning_rate": 4.317963941100059e-06, "loss": 17.3036, "step": 294600 }, { "epoch": 0.595130839497893, "grad_norm": 381.7318115234375, "learning_rate": 4.3176181396822925e-06, "loss": 19.4467, "step": 294610 }, { "epoch": 0.5951510401305768, "grad_norm": 458.5332336425781, "learning_rate": 4.317272341590373e-06, "loss": 23.6071, "step": 294620 }, { "epoch": 0.5951712407632607, "grad_norm": 537.0899047851562, "learning_rate": 4.3169265468259855e-06, "loss": 28.2252, "step": 294630 }, { "epoch": 0.5951914413959445, "grad_norm": 19.255142211914062, "learning_rate": 4.316580755390814e-06, "loss": 8.8401, "step": 294640 }, { "epoch": 0.5952116420286283, "grad_norm": 665.742431640625, "learning_rate": 4.316234967286548e-06, "loss": 13.0372, "step": 294650 }, { "epoch": 0.5952318426613121, "grad_norm": 465.10662841796875, "learning_rate": 4.315889182514867e-06, "loss": 23.982, "step": 294660 }, { "epoch": 0.595252043293996, "grad_norm": 176.72573852539062, "learning_rate": 4.315543401077458e-06, "loss": 14.8843, "step": 294670 }, { "epoch": 0.5952722439266798, "grad_norm": 399.09808349609375, "learning_rate": 4.315197622976011e-06, "loss": 22.9115, "step": 294680 }, { "epoch": 0.5952924445593636, "grad_norm": 55.9252815246582, "learning_rate": 4.314851848212205e-06, "loss": 26.8541, "step": 294690 }, { "epoch": 0.5953126451920474, "grad_norm": 866.7681884765625, "learning_rate": 4.314506076787729e-06, "loss": 22.4973, "step": 294700 }, { "epoch": 0.5953328458247312, "grad_norm": 427.6380615234375, "learning_rate": 4.314160308704269e-06, "loss": 16.7196, "step": 294710 }, { "epoch": 0.595353046457415, "grad_norm": 166.1153564453125, "learning_rate": 4.313814543963505e-06, "loss": 16.7142, "step": 294720 }, { "epoch": 0.5953732470900989, "grad_norm": 237.71177673339844, "learning_rate": 4.313468782567128e-06, "loss": 18.7256, "step": 294730 }, { "epoch": 0.5953934477227827, "grad_norm": 105.42239379882812, "learning_rate": 4.313123024516819e-06, "loss": 15.7055, "step": 294740 }, { "epoch": 0.5954136483554665, "grad_norm": 139.54412841796875, "learning_rate": 4.312777269814268e-06, "loss": 19.2914, "step": 294750 }, { "epoch": 0.5954338489881503, "grad_norm": 216.41641235351562, "learning_rate": 4.312431518461154e-06, "loss": 18.8749, "step": 294760 }, { "epoch": 0.5954540496208341, "grad_norm": 298.35150146484375, "learning_rate": 4.312085770459167e-06, "loss": 14.0649, "step": 294770 }, { "epoch": 0.595474250253518, "grad_norm": 1004.7127075195312, "learning_rate": 4.311740025809992e-06, "loss": 12.4464, "step": 294780 }, { "epoch": 0.5954944508862018, "grad_norm": 178.72906494140625, "learning_rate": 4.31139428451531e-06, "loss": 33.6189, "step": 294790 }, { "epoch": 0.5955146515188856, "grad_norm": 72.146484375, "learning_rate": 4.31104854657681e-06, "loss": 14.6239, "step": 294800 }, { "epoch": 0.5955348521515694, "grad_norm": 362.326416015625, "learning_rate": 4.310702811996177e-06, "loss": 11.8445, "step": 294810 }, { "epoch": 0.5955550527842532, "grad_norm": 178.36807250976562, "learning_rate": 4.310357080775092e-06, "loss": 28.5427, "step": 294820 }, { "epoch": 0.5955752534169371, "grad_norm": 10.326326370239258, "learning_rate": 4.3100113529152444e-06, "loss": 22.2627, "step": 294830 }, { "epoch": 0.5955954540496209, "grad_norm": 168.36253356933594, "learning_rate": 4.30966562841832e-06, "loss": 10.1222, "step": 294840 }, { "epoch": 0.5956156546823046, "grad_norm": 315.1814270019531, "learning_rate": 4.309319907285998e-06, "loss": 14.5882, "step": 294850 }, { "epoch": 0.5956358553149884, "grad_norm": 566.19287109375, "learning_rate": 4.308974189519968e-06, "loss": 19.206, "step": 294860 }, { "epoch": 0.5956560559476722, "grad_norm": 843.1705932617188, "learning_rate": 4.308628475121916e-06, "loss": 25.139, "step": 294870 }, { "epoch": 0.5956762565803561, "grad_norm": 272.6224060058594, "learning_rate": 4.308282764093523e-06, "loss": 25.7788, "step": 294880 }, { "epoch": 0.5956964572130399, "grad_norm": 408.2192077636719, "learning_rate": 4.307937056436476e-06, "loss": 9.7835, "step": 294890 }, { "epoch": 0.5957166578457237, "grad_norm": 103.95111083984375, "learning_rate": 4.307591352152459e-06, "loss": 24.1882, "step": 294900 }, { "epoch": 0.5957368584784075, "grad_norm": 958.2185668945312, "learning_rate": 4.307245651243161e-06, "loss": 22.2643, "step": 294910 }, { "epoch": 0.5957570591110913, "grad_norm": 357.21533203125, "learning_rate": 4.30689995371026e-06, "loss": 15.4138, "step": 294920 }, { "epoch": 0.5957772597437752, "grad_norm": 567.7787475585938, "learning_rate": 4.306554259555447e-06, "loss": 25.7151, "step": 294930 }, { "epoch": 0.595797460376459, "grad_norm": 434.95721435546875, "learning_rate": 4.306208568780404e-06, "loss": 17.9521, "step": 294940 }, { "epoch": 0.5958176610091428, "grad_norm": 255.12109375, "learning_rate": 4.3058628813868154e-06, "loss": 14.506, "step": 294950 }, { "epoch": 0.5958378616418266, "grad_norm": 244.69798278808594, "learning_rate": 4.305517197376367e-06, "loss": 8.4031, "step": 294960 }, { "epoch": 0.5958580622745104, "grad_norm": 134.893310546875, "learning_rate": 4.305171516750746e-06, "loss": 32.4018, "step": 294970 }, { "epoch": 0.5958782629071943, "grad_norm": 343.2240295410156, "learning_rate": 4.3048258395116326e-06, "loss": 11.0011, "step": 294980 }, { "epoch": 0.5958984635398781, "grad_norm": 864.6599731445312, "learning_rate": 4.304480165660712e-06, "loss": 22.8115, "step": 294990 }, { "epoch": 0.5959186641725619, "grad_norm": 333.3636474609375, "learning_rate": 4.304134495199675e-06, "loss": 17.9298, "step": 295000 }, { "epoch": 0.5959388648052457, "grad_norm": 546.2763671875, "learning_rate": 4.303788828130198e-06, "loss": 21.2158, "step": 295010 }, { "epoch": 0.5959590654379295, "grad_norm": 239.50503540039062, "learning_rate": 4.303443164453971e-06, "loss": 16.3914, "step": 295020 }, { "epoch": 0.5959792660706134, "grad_norm": 202.0510711669922, "learning_rate": 4.303097504172679e-06, "loss": 16.3034, "step": 295030 }, { "epoch": 0.5959994667032972, "grad_norm": 90.5951919555664, "learning_rate": 4.302751847288005e-06, "loss": 13.3175, "step": 295040 }, { "epoch": 0.596019667335981, "grad_norm": 271.5483703613281, "learning_rate": 4.302406193801632e-06, "loss": 32.728, "step": 295050 }, { "epoch": 0.5960398679686648, "grad_norm": 708.0242309570312, "learning_rate": 4.302060543715247e-06, "loss": 14.4512, "step": 295060 }, { "epoch": 0.5960600686013486, "grad_norm": 195.20159912109375, "learning_rate": 4.301714897030537e-06, "loss": 16.2609, "step": 295070 }, { "epoch": 0.5960802692340325, "grad_norm": 470.51141357421875, "learning_rate": 4.3013692537491805e-06, "loss": 12.1136, "step": 295080 }, { "epoch": 0.5961004698667163, "grad_norm": 82.72560119628906, "learning_rate": 4.3010236138728674e-06, "loss": 28.6893, "step": 295090 }, { "epoch": 0.5961206704994001, "grad_norm": 1.0823280811309814, "learning_rate": 4.300677977403281e-06, "loss": 9.5059, "step": 295100 }, { "epoch": 0.5961408711320838, "grad_norm": 656.1585693359375, "learning_rate": 4.3003323443421045e-06, "loss": 27.7189, "step": 295110 }, { "epoch": 0.5961610717647676, "grad_norm": 13.325555801391602, "learning_rate": 4.299986714691022e-06, "loss": 10.0998, "step": 295120 }, { "epoch": 0.5961812723974514, "grad_norm": 448.62322998046875, "learning_rate": 4.299641088451721e-06, "loss": 16.4809, "step": 295130 }, { "epoch": 0.5962014730301353, "grad_norm": 335.9667053222656, "learning_rate": 4.299295465625884e-06, "loss": 19.6562, "step": 295140 }, { "epoch": 0.5962216736628191, "grad_norm": 517.5101928710938, "learning_rate": 4.298949846215195e-06, "loss": 27.424, "step": 295150 }, { "epoch": 0.5962418742955029, "grad_norm": 513.454345703125, "learning_rate": 4.298604230221341e-06, "loss": 16.3277, "step": 295160 }, { "epoch": 0.5962620749281867, "grad_norm": 480.7550964355469, "learning_rate": 4.298258617646004e-06, "loss": 24.3027, "step": 295170 }, { "epoch": 0.5962822755608705, "grad_norm": 394.1954345703125, "learning_rate": 4.29791300849087e-06, "loss": 15.316, "step": 295180 }, { "epoch": 0.5963024761935544, "grad_norm": 57.472801208496094, "learning_rate": 4.297567402757621e-06, "loss": 26.7314, "step": 295190 }, { "epoch": 0.5963226768262382, "grad_norm": 47.60441207885742, "learning_rate": 4.297221800447946e-06, "loss": 16.3556, "step": 295200 }, { "epoch": 0.596342877458922, "grad_norm": 525.2633666992188, "learning_rate": 4.296876201563524e-06, "loss": 12.7697, "step": 295210 }, { "epoch": 0.5963630780916058, "grad_norm": 275.6269226074219, "learning_rate": 4.296530606106043e-06, "loss": 16.9218, "step": 295220 }, { "epoch": 0.5963832787242896, "grad_norm": 219.87693786621094, "learning_rate": 4.296185014077188e-06, "loss": 12.1012, "step": 295230 }, { "epoch": 0.5964034793569735, "grad_norm": 270.0559387207031, "learning_rate": 4.295839425478641e-06, "loss": 16.1903, "step": 295240 }, { "epoch": 0.5964236799896573, "grad_norm": 473.7545471191406, "learning_rate": 4.295493840312087e-06, "loss": 26.2634, "step": 295250 }, { "epoch": 0.5964438806223411, "grad_norm": 516.518798828125, "learning_rate": 4.295148258579211e-06, "loss": 21.7435, "step": 295260 }, { "epoch": 0.5964640812550249, "grad_norm": 380.75726318359375, "learning_rate": 4.294802680281696e-06, "loss": 12.6696, "step": 295270 }, { "epoch": 0.5964842818877087, "grad_norm": 1.5298367738723755, "learning_rate": 4.294457105421228e-06, "loss": 15.1886, "step": 295280 }, { "epoch": 0.5965044825203926, "grad_norm": 239.9437713623047, "learning_rate": 4.294111533999492e-06, "loss": 11.9446, "step": 295290 }, { "epoch": 0.5965246831530764, "grad_norm": 701.978271484375, "learning_rate": 4.293765966018167e-06, "loss": 27.484, "step": 295300 }, { "epoch": 0.5965448837857602, "grad_norm": 544.4490966796875, "learning_rate": 4.293420401478943e-06, "loss": 14.4572, "step": 295310 }, { "epoch": 0.596565084418444, "grad_norm": 171.7335968017578, "learning_rate": 4.293074840383504e-06, "loss": 16.894, "step": 295320 }, { "epoch": 0.5965852850511278, "grad_norm": 219.708251953125, "learning_rate": 4.29272928273353e-06, "loss": 17.1215, "step": 295330 }, { "epoch": 0.5966054856838117, "grad_norm": 466.144775390625, "learning_rate": 4.2923837285307085e-06, "loss": 15.0668, "step": 295340 }, { "epoch": 0.5966256863164955, "grad_norm": 44.40127182006836, "learning_rate": 4.292038177776722e-06, "loss": 24.7863, "step": 295350 }, { "epoch": 0.5966458869491792, "grad_norm": 426.68804931640625, "learning_rate": 4.291692630473258e-06, "loss": 17.3891, "step": 295360 }, { "epoch": 0.596666087581863, "grad_norm": 218.92205810546875, "learning_rate": 4.291347086621996e-06, "loss": 11.4738, "step": 295370 }, { "epoch": 0.5966862882145468, "grad_norm": 366.910888671875, "learning_rate": 4.2910015462246225e-06, "loss": 14.6246, "step": 295380 }, { "epoch": 0.5967064888472307, "grad_norm": 339.2355041503906, "learning_rate": 4.290656009282823e-06, "loss": 11.8092, "step": 295390 }, { "epoch": 0.5967266894799145, "grad_norm": 243.0731964111328, "learning_rate": 4.290310475798278e-06, "loss": 19.5325, "step": 295400 }, { "epoch": 0.5967468901125983, "grad_norm": 843.3153076171875, "learning_rate": 4.289964945772675e-06, "loss": 30.3153, "step": 295410 }, { "epoch": 0.5967670907452821, "grad_norm": 384.42706298828125, "learning_rate": 4.289619419207698e-06, "loss": 40.5178, "step": 295420 }, { "epoch": 0.5967872913779659, "grad_norm": 330.98248291015625, "learning_rate": 4.289273896105027e-06, "loss": 16.1977, "step": 295430 }, { "epoch": 0.5968074920106498, "grad_norm": 389.6863708496094, "learning_rate": 4.288928376466349e-06, "loss": 11.3452, "step": 295440 }, { "epoch": 0.5968276926433336, "grad_norm": 231.9348602294922, "learning_rate": 4.288582860293351e-06, "loss": 13.2791, "step": 295450 }, { "epoch": 0.5968478932760174, "grad_norm": 305.238525390625, "learning_rate": 4.288237347587711e-06, "loss": 23.6382, "step": 295460 }, { "epoch": 0.5968680939087012, "grad_norm": 509.225830078125, "learning_rate": 4.287891838351117e-06, "loss": 11.3871, "step": 295470 }, { "epoch": 0.596888294541385, "grad_norm": 183.37425231933594, "learning_rate": 4.2875463325852514e-06, "loss": 15.3508, "step": 295480 }, { "epoch": 0.5969084951740689, "grad_norm": 359.0205383300781, "learning_rate": 4.287200830291799e-06, "loss": 31.7106, "step": 295490 }, { "epoch": 0.5969286958067527, "grad_norm": 648.9158935546875, "learning_rate": 4.286855331472442e-06, "loss": 22.7002, "step": 295500 }, { "epoch": 0.5969488964394365, "grad_norm": 893.8304443359375, "learning_rate": 4.286509836128866e-06, "loss": 29.3073, "step": 295510 }, { "epoch": 0.5969690970721203, "grad_norm": 126.27137756347656, "learning_rate": 4.286164344262756e-06, "loss": 18.4401, "step": 295520 }, { "epoch": 0.5969892977048041, "grad_norm": 558.2559204101562, "learning_rate": 4.285818855875793e-06, "loss": 20.1282, "step": 295530 }, { "epoch": 0.597009498337488, "grad_norm": 188.3037109375, "learning_rate": 4.285473370969663e-06, "loss": 24.9841, "step": 295540 }, { "epoch": 0.5970296989701718, "grad_norm": 323.2847900390625, "learning_rate": 4.285127889546049e-06, "loss": 18.3854, "step": 295550 }, { "epoch": 0.5970498996028556, "grad_norm": 452.3296203613281, "learning_rate": 4.284782411606635e-06, "loss": 16.278, "step": 295560 }, { "epoch": 0.5970701002355394, "grad_norm": 438.90399169921875, "learning_rate": 4.284436937153105e-06, "loss": 14.4432, "step": 295570 }, { "epoch": 0.5970903008682232, "grad_norm": 384.4964904785156, "learning_rate": 4.284091466187142e-06, "loss": 29.4804, "step": 295580 }, { "epoch": 0.597110501500907, "grad_norm": 133.99334716796875, "learning_rate": 4.283745998710431e-06, "loss": 14.5344, "step": 295590 }, { "epoch": 0.5971307021335909, "grad_norm": 545.9478149414062, "learning_rate": 4.283400534724654e-06, "loss": 17.9776, "step": 295600 }, { "epoch": 0.5971509027662747, "grad_norm": 436.95098876953125, "learning_rate": 4.283055074231498e-06, "loss": 21.3602, "step": 295610 }, { "epoch": 0.5971711033989584, "grad_norm": 215.1697235107422, "learning_rate": 4.282709617232642e-06, "loss": 17.1219, "step": 295620 }, { "epoch": 0.5971913040316422, "grad_norm": 426.19122314453125, "learning_rate": 4.282364163729773e-06, "loss": 15.4137, "step": 295630 }, { "epoch": 0.597211504664326, "grad_norm": 336.3559875488281, "learning_rate": 4.282018713724576e-06, "loss": 19.4231, "step": 295640 }, { "epoch": 0.5972317052970099, "grad_norm": 368.91864013671875, "learning_rate": 4.281673267218731e-06, "loss": 20.4048, "step": 295650 }, { "epoch": 0.5972519059296937, "grad_norm": 424.2174987792969, "learning_rate": 4.281327824213923e-06, "loss": 19.186, "step": 295660 }, { "epoch": 0.5972721065623775, "grad_norm": 219.901611328125, "learning_rate": 4.280982384711835e-06, "loss": 27.6856, "step": 295670 }, { "epoch": 0.5972923071950613, "grad_norm": 342.3939514160156, "learning_rate": 4.280636948714155e-06, "loss": 20.1481, "step": 295680 }, { "epoch": 0.5973125078277451, "grad_norm": 530.6146850585938, "learning_rate": 4.280291516222561e-06, "loss": 12.2937, "step": 295690 }, { "epoch": 0.597332708460429, "grad_norm": 410.14599609375, "learning_rate": 4.279946087238739e-06, "loss": 15.0413, "step": 295700 }, { "epoch": 0.5973529090931128, "grad_norm": 253.63958740234375, "learning_rate": 4.279600661764374e-06, "loss": 17.5502, "step": 295710 }, { "epoch": 0.5973731097257966, "grad_norm": 572.5477294921875, "learning_rate": 4.279255239801146e-06, "loss": 43.3426, "step": 295720 }, { "epoch": 0.5973933103584804, "grad_norm": 0.3940809965133667, "learning_rate": 4.278909821350742e-06, "loss": 11.7097, "step": 295730 }, { "epoch": 0.5974135109911642, "grad_norm": 280.0107421875, "learning_rate": 4.278564406414844e-06, "loss": 18.1503, "step": 295740 }, { "epoch": 0.5974337116238481, "grad_norm": 126.16145324707031, "learning_rate": 4.278218994995135e-06, "loss": 13.3549, "step": 295750 }, { "epoch": 0.5974539122565319, "grad_norm": 1052.9522705078125, "learning_rate": 4.277873587093298e-06, "loss": 22.0248, "step": 295760 }, { "epoch": 0.5974741128892157, "grad_norm": 211.38116455078125, "learning_rate": 4.27752818271102e-06, "loss": 11.4287, "step": 295770 }, { "epoch": 0.5974943135218995, "grad_norm": 423.4015197753906, "learning_rate": 4.27718278184998e-06, "loss": 23.6106, "step": 295780 }, { "epoch": 0.5975145141545833, "grad_norm": 307.5094299316406, "learning_rate": 4.276837384511864e-06, "loss": 13.9268, "step": 295790 }, { "epoch": 0.5975347147872672, "grad_norm": 250.709716796875, "learning_rate": 4.2764919906983545e-06, "loss": 21.8706, "step": 295800 }, { "epoch": 0.597554915419951, "grad_norm": 214.07861328125, "learning_rate": 4.276146600411137e-06, "loss": 16.992, "step": 295810 }, { "epoch": 0.5975751160526348, "grad_norm": 915.2093505859375, "learning_rate": 4.2758012136518925e-06, "loss": 19.7232, "step": 295820 }, { "epoch": 0.5975953166853186, "grad_norm": 499.217041015625, "learning_rate": 4.275455830422303e-06, "loss": 16.4675, "step": 295830 }, { "epoch": 0.5976155173180024, "grad_norm": 702.2924194335938, "learning_rate": 4.275110450724056e-06, "loss": 15.909, "step": 295840 }, { "epoch": 0.5976357179506863, "grad_norm": 562.8489990234375, "learning_rate": 4.274765074558832e-06, "loss": 22.8658, "step": 295850 }, { "epoch": 0.5976559185833701, "grad_norm": 255.215087890625, "learning_rate": 4.274419701928315e-06, "loss": 13.3467, "step": 295860 }, { "epoch": 0.5976761192160538, "grad_norm": 214.14102172851562, "learning_rate": 4.27407433283419e-06, "loss": 16.7613, "step": 295870 }, { "epoch": 0.5976963198487376, "grad_norm": 669.8508911132812, "learning_rate": 4.273728967278137e-06, "loss": 15.156, "step": 295880 }, { "epoch": 0.5977165204814214, "grad_norm": 414.38214111328125, "learning_rate": 4.273383605261841e-06, "loss": 15.0318, "step": 295890 }, { "epoch": 0.5977367211141053, "grad_norm": 272.2400817871094, "learning_rate": 4.273038246786986e-06, "loss": 24.4473, "step": 295900 }, { "epoch": 0.5977569217467891, "grad_norm": 270.507568359375, "learning_rate": 4.272692891855253e-06, "loss": 24.971, "step": 295910 }, { "epoch": 0.5977771223794729, "grad_norm": 531.7005004882812, "learning_rate": 4.272347540468327e-06, "loss": 28.0525, "step": 295920 }, { "epoch": 0.5977973230121567, "grad_norm": 172.25665283203125, "learning_rate": 4.272002192627892e-06, "loss": 19.4454, "step": 295930 }, { "epoch": 0.5978175236448405, "grad_norm": 162.74212646484375, "learning_rate": 4.2716568483356295e-06, "loss": 8.7676, "step": 295940 }, { "epoch": 0.5978377242775244, "grad_norm": 199.9047088623047, "learning_rate": 4.2713115075932225e-06, "loss": 18.3432, "step": 295950 }, { "epoch": 0.5978579249102082, "grad_norm": 138.35780334472656, "learning_rate": 4.270966170402354e-06, "loss": 20.9482, "step": 295960 }, { "epoch": 0.597878125542892, "grad_norm": 848.1473388671875, "learning_rate": 4.2706208367647115e-06, "loss": 26.043, "step": 295970 }, { "epoch": 0.5978983261755758, "grad_norm": 380.1227722167969, "learning_rate": 4.270275506681971e-06, "loss": 33.2933, "step": 295980 }, { "epoch": 0.5979185268082596, "grad_norm": 455.56158447265625, "learning_rate": 4.26993018015582e-06, "loss": 25.1772, "step": 295990 }, { "epoch": 0.5979387274409435, "grad_norm": 517.9584350585938, "learning_rate": 4.269584857187942e-06, "loss": 16.8953, "step": 296000 }, { "epoch": 0.5979589280736273, "grad_norm": 146.59242248535156, "learning_rate": 4.2692395377800185e-06, "loss": 15.436, "step": 296010 }, { "epoch": 0.5979791287063111, "grad_norm": 33.330013275146484, "learning_rate": 4.268894221933733e-06, "loss": 27.1096, "step": 296020 }, { "epoch": 0.5979993293389949, "grad_norm": 71.1676025390625, "learning_rate": 4.268548909650768e-06, "loss": 26.74, "step": 296030 }, { "epoch": 0.5980195299716787, "grad_norm": 183.0547332763672, "learning_rate": 4.2682036009328065e-06, "loss": 16.7322, "step": 296040 }, { "epoch": 0.5980397306043626, "grad_norm": 302.4768371582031, "learning_rate": 4.267858295781531e-06, "loss": 18.9091, "step": 296050 }, { "epoch": 0.5980599312370464, "grad_norm": 466.72283935546875, "learning_rate": 4.267512994198629e-06, "loss": 13.3001, "step": 296060 }, { "epoch": 0.5980801318697302, "grad_norm": 458.6282653808594, "learning_rate": 4.267167696185776e-06, "loss": 23.9082, "step": 296070 }, { "epoch": 0.598100332502414, "grad_norm": 208.31300354003906, "learning_rate": 4.2668224017446595e-06, "loss": 13.3059, "step": 296080 }, { "epoch": 0.5981205331350978, "grad_norm": 283.8260498046875, "learning_rate": 4.266477110876963e-06, "loss": 23.312, "step": 296090 }, { "epoch": 0.5981407337677817, "grad_norm": 240.76292419433594, "learning_rate": 4.266131823584368e-06, "loss": 28.9725, "step": 296100 }, { "epoch": 0.5981609344004655, "grad_norm": 61.494651794433594, "learning_rate": 4.265786539868556e-06, "loss": 13.0544, "step": 296110 }, { "epoch": 0.5981811350331493, "grad_norm": 152.66912841796875, "learning_rate": 4.265441259731211e-06, "loss": 25.169, "step": 296120 }, { "epoch": 0.598201335665833, "grad_norm": 390.42437744140625, "learning_rate": 4.26509598317402e-06, "loss": 14.1549, "step": 296130 }, { "epoch": 0.5982215362985168, "grad_norm": 536.2457885742188, "learning_rate": 4.2647507101986575e-06, "loss": 15.1672, "step": 296140 }, { "epoch": 0.5982417369312006, "grad_norm": 223.54296875, "learning_rate": 4.264405440806813e-06, "loss": 8.8315, "step": 296150 }, { "epoch": 0.5982619375638845, "grad_norm": 168.26861572265625, "learning_rate": 4.264060175000168e-06, "loss": 15.8226, "step": 296160 }, { "epoch": 0.5982821381965683, "grad_norm": 524.5977783203125, "learning_rate": 4.263714912780403e-06, "loss": 22.0013, "step": 296170 }, { "epoch": 0.5983023388292521, "grad_norm": 292.9800109863281, "learning_rate": 4.263369654149203e-06, "loss": 28.8297, "step": 296180 }, { "epoch": 0.5983225394619359, "grad_norm": 314.0179443359375, "learning_rate": 4.263024399108251e-06, "loss": 13.5808, "step": 296190 }, { "epoch": 0.5983427400946197, "grad_norm": 1170.5003662109375, "learning_rate": 4.262679147659227e-06, "loss": 30.0497, "step": 296200 }, { "epoch": 0.5983629407273036, "grad_norm": 240.40748596191406, "learning_rate": 4.262333899803814e-06, "loss": 24.7635, "step": 296210 }, { "epoch": 0.5983831413599874, "grad_norm": 187.4853515625, "learning_rate": 4.2619886555436995e-06, "loss": 22.2111, "step": 296220 }, { "epoch": 0.5984033419926712, "grad_norm": 603.9671630859375, "learning_rate": 4.26164341488056e-06, "loss": 12.4047, "step": 296230 }, { "epoch": 0.598423542625355, "grad_norm": 636.5455322265625, "learning_rate": 4.261298177816082e-06, "loss": 21.9845, "step": 296240 }, { "epoch": 0.5984437432580388, "grad_norm": 815.9725341796875, "learning_rate": 4.260952944351947e-06, "loss": 16.762, "step": 296250 }, { "epoch": 0.5984639438907227, "grad_norm": 391.21893310546875, "learning_rate": 4.260607714489839e-06, "loss": 15.0348, "step": 296260 }, { "epoch": 0.5984841445234065, "grad_norm": 636.109375, "learning_rate": 4.260262488231438e-06, "loss": 16.916, "step": 296270 }, { "epoch": 0.5985043451560903, "grad_norm": 450.681884765625, "learning_rate": 4.259917265578427e-06, "loss": 22.7164, "step": 296280 }, { "epoch": 0.5985245457887741, "grad_norm": 313.92584228515625, "learning_rate": 4.259572046532493e-06, "loss": 16.3863, "step": 296290 }, { "epoch": 0.5985447464214579, "grad_norm": 300.09429931640625, "learning_rate": 4.259226831095311e-06, "loss": 38.5883, "step": 296300 }, { "epoch": 0.5985649470541418, "grad_norm": 708.135498046875, "learning_rate": 4.258881619268569e-06, "loss": 26.8095, "step": 296310 }, { "epoch": 0.5985851476868256, "grad_norm": 33.75373077392578, "learning_rate": 4.258536411053949e-06, "loss": 25.2684, "step": 296320 }, { "epoch": 0.5986053483195094, "grad_norm": 500.82025146484375, "learning_rate": 4.258191206453132e-06, "loss": 29.452, "step": 296330 }, { "epoch": 0.5986255489521932, "grad_norm": 106.94981384277344, "learning_rate": 4.2578460054678e-06, "loss": 25.7791, "step": 296340 }, { "epoch": 0.598645749584877, "grad_norm": 538.3078002929688, "learning_rate": 4.25750080809964e-06, "loss": 23.9905, "step": 296350 }, { "epoch": 0.5986659502175609, "grad_norm": 315.728515625, "learning_rate": 4.2571556143503275e-06, "loss": 11.6234, "step": 296360 }, { "epoch": 0.5986861508502447, "grad_norm": 468.2890625, "learning_rate": 4.256810424221548e-06, "loss": 22.3584, "step": 296370 }, { "epoch": 0.5987063514829285, "grad_norm": 641.2987060546875, "learning_rate": 4.256465237714989e-06, "loss": 29.7856, "step": 296380 }, { "epoch": 0.5987265521156122, "grad_norm": 332.5379333496094, "learning_rate": 4.2561200548323224e-06, "loss": 11.4542, "step": 296390 }, { "epoch": 0.598746752748296, "grad_norm": 576.611083984375, "learning_rate": 4.255774875575239e-06, "loss": 43.6743, "step": 296400 }, { "epoch": 0.5987669533809798, "grad_norm": 110.72086334228516, "learning_rate": 4.2554296999454194e-06, "loss": 19.4639, "step": 296410 }, { "epoch": 0.5987871540136637, "grad_norm": 120.25221252441406, "learning_rate": 4.2550845279445455e-06, "loss": 10.3498, "step": 296420 }, { "epoch": 0.5988073546463475, "grad_norm": 310.85357666015625, "learning_rate": 4.254739359574298e-06, "loss": 12.2019, "step": 296430 }, { "epoch": 0.5988275552790313, "grad_norm": 131.8038330078125, "learning_rate": 4.25439419483636e-06, "loss": 13.7566, "step": 296440 }, { "epoch": 0.5988477559117151, "grad_norm": 153.33343505859375, "learning_rate": 4.2540490337324156e-06, "loss": 17.411, "step": 296450 }, { "epoch": 0.598867956544399, "grad_norm": 706.9985961914062, "learning_rate": 4.253703876264144e-06, "loss": 36.8503, "step": 296460 }, { "epoch": 0.5988881571770828, "grad_norm": 45.09660720825195, "learning_rate": 4.253358722433231e-06, "loss": 19.5842, "step": 296470 }, { "epoch": 0.5989083578097666, "grad_norm": 87.81842803955078, "learning_rate": 4.253013572241356e-06, "loss": 23.6001, "step": 296480 }, { "epoch": 0.5989285584424504, "grad_norm": 404.8854064941406, "learning_rate": 4.252668425690203e-06, "loss": 35.4945, "step": 296490 }, { "epoch": 0.5989487590751342, "grad_norm": 411.6787414550781, "learning_rate": 4.2523232827814534e-06, "loss": 14.2964, "step": 296500 }, { "epoch": 0.598968959707818, "grad_norm": 830.9055786132812, "learning_rate": 4.251978143516789e-06, "loss": 29.1576, "step": 296510 }, { "epoch": 0.5989891603405019, "grad_norm": 81.86772155761719, "learning_rate": 4.251633007897891e-06, "loss": 15.3215, "step": 296520 }, { "epoch": 0.5990093609731857, "grad_norm": 503.2459716796875, "learning_rate": 4.251287875926445e-06, "loss": 17.9395, "step": 296530 }, { "epoch": 0.5990295616058695, "grad_norm": 279.11480712890625, "learning_rate": 4.250942747604131e-06, "loss": 23.3277, "step": 296540 }, { "epoch": 0.5990497622385533, "grad_norm": 188.3449249267578, "learning_rate": 4.250597622932631e-06, "loss": 17.9754, "step": 296550 }, { "epoch": 0.5990699628712371, "grad_norm": 211.36280822753906, "learning_rate": 4.250252501913627e-06, "loss": 18.1663, "step": 296560 }, { "epoch": 0.599090163503921, "grad_norm": 249.63092041015625, "learning_rate": 4.249907384548801e-06, "loss": 15.9893, "step": 296570 }, { "epoch": 0.5991103641366048, "grad_norm": 161.63848876953125, "learning_rate": 4.249562270839837e-06, "loss": 24.6142, "step": 296580 }, { "epoch": 0.5991305647692886, "grad_norm": 1131.3251953125, "learning_rate": 4.249217160788413e-06, "loss": 25.2706, "step": 296590 }, { "epoch": 0.5991507654019724, "grad_norm": 171.084228515625, "learning_rate": 4.248872054396215e-06, "loss": 15.0584, "step": 296600 }, { "epoch": 0.5991709660346562, "grad_norm": 323.4442443847656, "learning_rate": 4.248526951664924e-06, "loss": 11.89, "step": 296610 }, { "epoch": 0.5991911666673401, "grad_norm": 329.6981506347656, "learning_rate": 4.248181852596221e-06, "loss": 20.4941, "step": 296620 }, { "epoch": 0.5992113673000239, "grad_norm": 318.2145690917969, "learning_rate": 4.247836757191787e-06, "loss": 27.5163, "step": 296630 }, { "epoch": 0.5992315679327076, "grad_norm": 196.76779174804688, "learning_rate": 4.2474916654533085e-06, "loss": 11.5402, "step": 296640 }, { "epoch": 0.5992517685653914, "grad_norm": 235.93809509277344, "learning_rate": 4.247146577382462e-06, "loss": 14.8903, "step": 296650 }, { "epoch": 0.5992719691980752, "grad_norm": 127.6539077758789, "learning_rate": 4.246801492980931e-06, "loss": 17.8151, "step": 296660 }, { "epoch": 0.5992921698307591, "grad_norm": 345.1177978515625, "learning_rate": 4.246456412250401e-06, "loss": 9.2841, "step": 296670 }, { "epoch": 0.5993123704634429, "grad_norm": 241.55496215820312, "learning_rate": 4.246111335192548e-06, "loss": 14.0622, "step": 296680 }, { "epoch": 0.5993325710961267, "grad_norm": 325.2892150878906, "learning_rate": 4.245766261809059e-06, "loss": 21.6404, "step": 296690 }, { "epoch": 0.5993527717288105, "grad_norm": 291.8177185058594, "learning_rate": 4.245421192101613e-06, "loss": 20.2458, "step": 296700 }, { "epoch": 0.5993729723614943, "grad_norm": 349.75384521484375, "learning_rate": 4.245076126071894e-06, "loss": 16.9885, "step": 296710 }, { "epoch": 0.5993931729941782, "grad_norm": 4.573607444763184, "learning_rate": 4.244731063721581e-06, "loss": 19.9236, "step": 296720 }, { "epoch": 0.599413373626862, "grad_norm": 216.05674743652344, "learning_rate": 4.244386005052356e-06, "loss": 15.9433, "step": 296730 }, { "epoch": 0.5994335742595458, "grad_norm": 305.31494140625, "learning_rate": 4.244040950065905e-06, "loss": 19.6417, "step": 296740 }, { "epoch": 0.5994537748922296, "grad_norm": 313.0830078125, "learning_rate": 4.243695898763904e-06, "loss": 16.5436, "step": 296750 }, { "epoch": 0.5994739755249134, "grad_norm": 445.0265197753906, "learning_rate": 4.243350851148039e-06, "loss": 16.1247, "step": 296760 }, { "epoch": 0.5994941761575973, "grad_norm": 342.6043701171875, "learning_rate": 4.24300580721999e-06, "loss": 33.9881, "step": 296770 }, { "epoch": 0.5995143767902811, "grad_norm": 153.574951171875, "learning_rate": 4.242660766981439e-06, "loss": 11.1193, "step": 296780 }, { "epoch": 0.5995345774229649, "grad_norm": 244.50338745117188, "learning_rate": 4.242315730434066e-06, "loss": 14.952, "step": 296790 }, { "epoch": 0.5995547780556487, "grad_norm": 485.69671630859375, "learning_rate": 4.241970697579557e-06, "loss": 29.0701, "step": 296800 }, { "epoch": 0.5995749786883325, "grad_norm": 143.21177673339844, "learning_rate": 4.2416256684195885e-06, "loss": 22.8393, "step": 296810 }, { "epoch": 0.5995951793210164, "grad_norm": 553.4241333007812, "learning_rate": 4.241280642955845e-06, "loss": 30.5847, "step": 296820 }, { "epoch": 0.5996153799537002, "grad_norm": 447.57470703125, "learning_rate": 4.24093562119001e-06, "loss": 10.8338, "step": 296830 }, { "epoch": 0.599635580586384, "grad_norm": 306.1315002441406, "learning_rate": 4.240590603123759e-06, "loss": 30.0672, "step": 296840 }, { "epoch": 0.5996557812190678, "grad_norm": 371.797607421875, "learning_rate": 4.240245588758778e-06, "loss": 22.4331, "step": 296850 }, { "epoch": 0.5996759818517516, "grad_norm": 228.33164978027344, "learning_rate": 4.23990057809675e-06, "loss": 20.1725, "step": 296860 }, { "epoch": 0.5996961824844355, "grad_norm": 332.0324401855469, "learning_rate": 4.239555571139353e-06, "loss": 15.8661, "step": 296870 }, { "epoch": 0.5997163831171193, "grad_norm": 328.9191589355469, "learning_rate": 4.23921056788827e-06, "loss": 11.8912, "step": 296880 }, { "epoch": 0.5997365837498031, "grad_norm": 709.0545654296875, "learning_rate": 4.238865568345182e-06, "loss": 24.6709, "step": 296890 }, { "epoch": 0.5997567843824868, "grad_norm": 398.2712707519531, "learning_rate": 4.238520572511773e-06, "loss": 17.7421, "step": 296900 }, { "epoch": 0.5997769850151706, "grad_norm": 472.1728820800781, "learning_rate": 4.238175580389719e-06, "loss": 10.7249, "step": 296910 }, { "epoch": 0.5997971856478544, "grad_norm": 359.57073974609375, "learning_rate": 4.2378305919807075e-06, "loss": 28.1431, "step": 296920 }, { "epoch": 0.5998173862805383, "grad_norm": 801.3305053710938, "learning_rate": 4.237485607286417e-06, "loss": 25.6771, "step": 296930 }, { "epoch": 0.5998375869132221, "grad_norm": 300.6571044921875, "learning_rate": 4.237140626308528e-06, "loss": 13.8791, "step": 296940 }, { "epoch": 0.5998577875459059, "grad_norm": 122.9664077758789, "learning_rate": 4.2367956490487235e-06, "loss": 15.0534, "step": 296950 }, { "epoch": 0.5998779881785897, "grad_norm": 481.1971740722656, "learning_rate": 4.2364506755086856e-06, "loss": 21.6385, "step": 296960 }, { "epoch": 0.5998981888112735, "grad_norm": 484.2899169921875, "learning_rate": 4.236105705690094e-06, "loss": 21.4996, "step": 296970 }, { "epoch": 0.5999183894439574, "grad_norm": 476.460693359375, "learning_rate": 4.2357607395946275e-06, "loss": 29.8308, "step": 296980 }, { "epoch": 0.5999385900766412, "grad_norm": 158.4099884033203, "learning_rate": 4.235415777223976e-06, "loss": 19.2788, "step": 296990 }, { "epoch": 0.599958790709325, "grad_norm": 468.2158203125, "learning_rate": 4.23507081857981e-06, "loss": 14.2631, "step": 297000 }, { "epoch": 0.5999789913420088, "grad_norm": 345.7831726074219, "learning_rate": 4.234725863663819e-06, "loss": 16.6418, "step": 297010 }, { "epoch": 0.5999991919746926, "grad_norm": 166.0795135498047, "learning_rate": 4.23438091247768e-06, "loss": 12.6064, "step": 297020 }, { "epoch": 0.6000193926073765, "grad_norm": 749.620361328125, "learning_rate": 4.234035965023077e-06, "loss": 42.4881, "step": 297030 }, { "epoch": 0.6000395932400603, "grad_norm": 724.5010986328125, "learning_rate": 4.233691021301689e-06, "loss": 25.3071, "step": 297040 }, { "epoch": 0.6000597938727441, "grad_norm": 126.72706604003906, "learning_rate": 4.233346081315197e-06, "loss": 35.7059, "step": 297050 }, { "epoch": 0.6000799945054279, "grad_norm": 453.6716613769531, "learning_rate": 4.233001145065286e-06, "loss": 15.4326, "step": 297060 }, { "epoch": 0.6001001951381117, "grad_norm": 97.83983612060547, "learning_rate": 4.232656212553631e-06, "loss": 14.1084, "step": 297070 }, { "epoch": 0.6001203957707956, "grad_norm": 549.7896118164062, "learning_rate": 4.232311283781918e-06, "loss": 15.4117, "step": 297080 }, { "epoch": 0.6001405964034794, "grad_norm": 498.0633850097656, "learning_rate": 4.231966358751828e-06, "loss": 15.6065, "step": 297090 }, { "epoch": 0.6001607970361632, "grad_norm": 610.4193725585938, "learning_rate": 4.23162143746504e-06, "loss": 22.8777, "step": 297100 }, { "epoch": 0.600180997668847, "grad_norm": 411.71893310546875, "learning_rate": 4.231276519923235e-06, "loss": 13.5345, "step": 297110 }, { "epoch": 0.6002011983015308, "grad_norm": 678.7944946289062, "learning_rate": 4.230931606128096e-06, "loss": 40.3877, "step": 297120 }, { "epoch": 0.6002213989342147, "grad_norm": 550.8128662109375, "learning_rate": 4.230586696081303e-06, "loss": 14.962, "step": 297130 }, { "epoch": 0.6002415995668985, "grad_norm": 138.277587890625, "learning_rate": 4.230241789784535e-06, "loss": 10.8564, "step": 297140 }, { "epoch": 0.6002618001995822, "grad_norm": 884.471923828125, "learning_rate": 4.2298968872394784e-06, "loss": 15.1153, "step": 297150 }, { "epoch": 0.600282000832266, "grad_norm": 203.5852813720703, "learning_rate": 4.229551988447809e-06, "loss": 26.2936, "step": 297160 }, { "epoch": 0.6003022014649498, "grad_norm": 141.3539276123047, "learning_rate": 4.22920709341121e-06, "loss": 25.7338, "step": 297170 }, { "epoch": 0.6003224020976337, "grad_norm": 209.94851684570312, "learning_rate": 4.228862202131362e-06, "loss": 14.39, "step": 297180 }, { "epoch": 0.6003426027303175, "grad_norm": 109.16659545898438, "learning_rate": 4.228517314609948e-06, "loss": 32.7086, "step": 297190 }, { "epoch": 0.6003628033630013, "grad_norm": 225.36117553710938, "learning_rate": 4.228172430848645e-06, "loss": 12.7105, "step": 297200 }, { "epoch": 0.6003830039956851, "grad_norm": 664.6941528320312, "learning_rate": 4.227827550849136e-06, "loss": 27.9936, "step": 297210 }, { "epoch": 0.6004032046283689, "grad_norm": 0.0, "learning_rate": 4.227482674613103e-06, "loss": 21.371, "step": 297220 }, { "epoch": 0.6004234052610528, "grad_norm": 189.36280822753906, "learning_rate": 4.227137802142225e-06, "loss": 12.8405, "step": 297230 }, { "epoch": 0.6004436058937366, "grad_norm": 371.6158447265625, "learning_rate": 4.226792933438183e-06, "loss": 33.8094, "step": 297240 }, { "epoch": 0.6004638065264204, "grad_norm": 482.85467529296875, "learning_rate": 4.226448068502661e-06, "loss": 8.6523, "step": 297250 }, { "epoch": 0.6004840071591042, "grad_norm": 684.0409545898438, "learning_rate": 4.2261032073373355e-06, "loss": 22.2693, "step": 297260 }, { "epoch": 0.600504207791788, "grad_norm": 752.8082275390625, "learning_rate": 4.225758349943888e-06, "loss": 18.2599, "step": 297270 }, { "epoch": 0.6005244084244719, "grad_norm": 200.68338012695312, "learning_rate": 4.225413496324003e-06, "loss": 17.5361, "step": 297280 }, { "epoch": 0.6005446090571557, "grad_norm": 182.53616333007812, "learning_rate": 4.225068646479356e-06, "loss": 18.5988, "step": 297290 }, { "epoch": 0.6005648096898395, "grad_norm": 300.6029357910156, "learning_rate": 4.224723800411631e-06, "loss": 19.0446, "step": 297300 }, { "epoch": 0.6005850103225233, "grad_norm": 486.53961181640625, "learning_rate": 4.22437895812251e-06, "loss": 11.3072, "step": 297310 }, { "epoch": 0.6006052109552071, "grad_norm": 631.1341552734375, "learning_rate": 4.224034119613671e-06, "loss": 13.3442, "step": 297320 }, { "epoch": 0.600625411587891, "grad_norm": 324.3525085449219, "learning_rate": 4.223689284886795e-06, "loss": 24.3036, "step": 297330 }, { "epoch": 0.6006456122205748, "grad_norm": 456.65167236328125, "learning_rate": 4.223344453943562e-06, "loss": 18.9171, "step": 297340 }, { "epoch": 0.6006658128532586, "grad_norm": 425.72894287109375, "learning_rate": 4.222999626785658e-06, "loss": 17.0448, "step": 297350 }, { "epoch": 0.6006860134859424, "grad_norm": 679.47021484375, "learning_rate": 4.2226548034147555e-06, "loss": 22.0326, "step": 297360 }, { "epoch": 0.6007062141186262, "grad_norm": 224.88563537597656, "learning_rate": 4.222309983832541e-06, "loss": 28.2879, "step": 297370 }, { "epoch": 0.6007264147513101, "grad_norm": 302.3341369628906, "learning_rate": 4.221965168040693e-06, "loss": 24.6421, "step": 297380 }, { "epoch": 0.6007466153839939, "grad_norm": 285.5158386230469, "learning_rate": 4.221620356040892e-06, "loss": 27.8586, "step": 297390 }, { "epoch": 0.6007668160166777, "grad_norm": 308.74993896484375, "learning_rate": 4.22127554783482e-06, "loss": 31.893, "step": 297400 }, { "epoch": 0.6007870166493614, "grad_norm": 157.2686004638672, "learning_rate": 4.220930743424157e-06, "loss": 19.8243, "step": 297410 }, { "epoch": 0.6008072172820452, "grad_norm": 605.8692626953125, "learning_rate": 4.220585942810582e-06, "loss": 12.8327, "step": 297420 }, { "epoch": 0.600827417914729, "grad_norm": 792.844482421875, "learning_rate": 4.220241145995775e-06, "loss": 39.2107, "step": 297430 }, { "epoch": 0.6008476185474129, "grad_norm": 881.8159790039062, "learning_rate": 4.219896352981422e-06, "loss": 27.649, "step": 297440 }, { "epoch": 0.6008678191800967, "grad_norm": 38.221126556396484, "learning_rate": 4.219551563769196e-06, "loss": 27.9305, "step": 297450 }, { "epoch": 0.6008880198127805, "grad_norm": 546.9678344726562, "learning_rate": 4.219206778360782e-06, "loss": 13.5159, "step": 297460 }, { "epoch": 0.6009082204454643, "grad_norm": 151.08399963378906, "learning_rate": 4.218861996757859e-06, "loss": 13.8155, "step": 297470 }, { "epoch": 0.6009284210781481, "grad_norm": 413.4466552734375, "learning_rate": 4.218517218962111e-06, "loss": 11.1458, "step": 297480 }, { "epoch": 0.600948621710832, "grad_norm": 448.5552978515625, "learning_rate": 4.218172444975212e-06, "loss": 10.4878, "step": 297490 }, { "epoch": 0.6009688223435158, "grad_norm": 304.6234436035156, "learning_rate": 4.217827674798845e-06, "loss": 10.4263, "step": 297500 }, { "epoch": 0.6009890229761996, "grad_norm": 158.3286895751953, "learning_rate": 4.217482908434695e-06, "loss": 14.0449, "step": 297510 }, { "epoch": 0.6010092236088834, "grad_norm": 685.6396484375, "learning_rate": 4.217138145884435e-06, "loss": 21.0802, "step": 297520 }, { "epoch": 0.6010294242415672, "grad_norm": 237.3669891357422, "learning_rate": 4.216793387149749e-06, "loss": 25.1187, "step": 297530 }, { "epoch": 0.6010496248742511, "grad_norm": 378.9090576171875, "learning_rate": 4.216448632232319e-06, "loss": 31.9589, "step": 297540 }, { "epoch": 0.6010698255069349, "grad_norm": 248.90325927734375, "learning_rate": 4.216103881133822e-06, "loss": 28.8022, "step": 297550 }, { "epoch": 0.6010900261396187, "grad_norm": 61.23601150512695, "learning_rate": 4.21575913385594e-06, "loss": 6.3718, "step": 297560 }, { "epoch": 0.6011102267723025, "grad_norm": 502.94525146484375, "learning_rate": 4.215414390400353e-06, "loss": 30.2917, "step": 297570 }, { "epoch": 0.6011304274049863, "grad_norm": 912.2431030273438, "learning_rate": 4.21506965076874e-06, "loss": 35.4973, "step": 297580 }, { "epoch": 0.6011506280376702, "grad_norm": 344.84832763671875, "learning_rate": 4.2147249149627826e-06, "loss": 17.9056, "step": 297590 }, { "epoch": 0.601170828670354, "grad_norm": 599.3446044921875, "learning_rate": 4.2143801829841635e-06, "loss": 10.8853, "step": 297600 }, { "epoch": 0.6011910293030378, "grad_norm": 50.71405029296875, "learning_rate": 4.214035454834556e-06, "loss": 19.0953, "step": 297610 }, { "epoch": 0.6012112299357216, "grad_norm": 504.15814208984375, "learning_rate": 4.213690730515646e-06, "loss": 19.7619, "step": 297620 }, { "epoch": 0.6012314305684054, "grad_norm": 246.1601104736328, "learning_rate": 4.213346010029112e-06, "loss": 16.6431, "step": 297630 }, { "epoch": 0.6012516312010893, "grad_norm": 141.6668701171875, "learning_rate": 4.213001293376635e-06, "loss": 17.8635, "step": 297640 }, { "epoch": 0.6012718318337731, "grad_norm": 399.06060791015625, "learning_rate": 4.212656580559894e-06, "loss": 10.4143, "step": 297650 }, { "epoch": 0.6012920324664568, "grad_norm": 491.92266845703125, "learning_rate": 4.212311871580568e-06, "loss": 13.642, "step": 297660 }, { "epoch": 0.6013122330991406, "grad_norm": 146.76007080078125, "learning_rate": 4.2119671664403404e-06, "loss": 10.5023, "step": 297670 }, { "epoch": 0.6013324337318244, "grad_norm": 369.0421447753906, "learning_rate": 4.211622465140887e-06, "loss": 24.4898, "step": 297680 }, { "epoch": 0.6013526343645083, "grad_norm": 372.4095458984375, "learning_rate": 4.211277767683891e-06, "loss": 18.4748, "step": 297690 }, { "epoch": 0.6013728349971921, "grad_norm": 219.39378356933594, "learning_rate": 4.210933074071033e-06, "loss": 14.4395, "step": 297700 }, { "epoch": 0.6013930356298759, "grad_norm": 593.753173828125, "learning_rate": 4.21058838430399e-06, "loss": 20.9442, "step": 297710 }, { "epoch": 0.6014132362625597, "grad_norm": 545.1883544921875, "learning_rate": 4.2102436983844435e-06, "loss": 39.2257, "step": 297720 }, { "epoch": 0.6014334368952435, "grad_norm": 990.1265258789062, "learning_rate": 4.209899016314075e-06, "loss": 16.6062, "step": 297730 }, { "epoch": 0.6014536375279274, "grad_norm": 182.2779083251953, "learning_rate": 4.209554338094561e-06, "loss": 11.2611, "step": 297740 }, { "epoch": 0.6014738381606112, "grad_norm": 59.02173614501953, "learning_rate": 4.209209663727583e-06, "loss": 15.6947, "step": 297750 }, { "epoch": 0.601494038793295, "grad_norm": 220.6677703857422, "learning_rate": 4.208864993214821e-06, "loss": 27.8063, "step": 297760 }, { "epoch": 0.6015142394259788, "grad_norm": 209.71670532226562, "learning_rate": 4.208520326557957e-06, "loss": 16.7089, "step": 297770 }, { "epoch": 0.6015344400586626, "grad_norm": 97.02810668945312, "learning_rate": 4.208175663758668e-06, "loss": 30.089, "step": 297780 }, { "epoch": 0.6015546406913465, "grad_norm": 367.11175537109375, "learning_rate": 4.2078310048186345e-06, "loss": 33.2478, "step": 297790 }, { "epoch": 0.6015748413240303, "grad_norm": 248.53724670410156, "learning_rate": 4.207486349739538e-06, "loss": 9.8839, "step": 297800 }, { "epoch": 0.6015950419567141, "grad_norm": 820.9579467773438, "learning_rate": 4.207141698523055e-06, "loss": 35.9561, "step": 297810 }, { "epoch": 0.6016152425893979, "grad_norm": 587.4654541015625, "learning_rate": 4.206797051170867e-06, "loss": 33.4931, "step": 297820 }, { "epoch": 0.6016354432220817, "grad_norm": 437.03118896484375, "learning_rate": 4.206452407684656e-06, "loss": 18.3676, "step": 297830 }, { "epoch": 0.6016556438547656, "grad_norm": 533.9827880859375, "learning_rate": 4.206107768066099e-06, "loss": 13.6058, "step": 297840 }, { "epoch": 0.6016758444874494, "grad_norm": 610.5231323242188, "learning_rate": 4.205763132316875e-06, "loss": 16.0694, "step": 297850 }, { "epoch": 0.6016960451201332, "grad_norm": 584.7250366210938, "learning_rate": 4.2054185004386675e-06, "loss": 16.5326, "step": 297860 }, { "epoch": 0.601716245752817, "grad_norm": 449.36236572265625, "learning_rate": 4.205073872433152e-06, "loss": 13.229, "step": 297870 }, { "epoch": 0.6017364463855008, "grad_norm": 428.4736328125, "learning_rate": 4.2047292483020096e-06, "loss": 17.5223, "step": 297880 }, { "epoch": 0.6017566470181847, "grad_norm": 396.2667541503906, "learning_rate": 4.204384628046924e-06, "loss": 18.1104, "step": 297890 }, { "epoch": 0.6017768476508685, "grad_norm": 70.28057098388672, "learning_rate": 4.204040011669567e-06, "loss": 31.339, "step": 297900 }, { "epoch": 0.6017970482835523, "grad_norm": 286.1497802734375, "learning_rate": 4.203695399171624e-06, "loss": 13.5954, "step": 297910 }, { "epoch": 0.601817248916236, "grad_norm": 109.39277648925781, "learning_rate": 4.203350790554773e-06, "loss": 9.4843, "step": 297920 }, { "epoch": 0.6018374495489198, "grad_norm": 245.7013397216797, "learning_rate": 4.203006185820695e-06, "loss": 18.6695, "step": 297930 }, { "epoch": 0.6018576501816036, "grad_norm": 682.17333984375, "learning_rate": 4.2026615849710665e-06, "loss": 23.312, "step": 297940 }, { "epoch": 0.6018778508142875, "grad_norm": 366.9371032714844, "learning_rate": 4.202316988007568e-06, "loss": 19.1287, "step": 297950 }, { "epoch": 0.6018980514469713, "grad_norm": 275.8540344238281, "learning_rate": 4.201972394931883e-06, "loss": 12.0746, "step": 297960 }, { "epoch": 0.6019182520796551, "grad_norm": 194.72300720214844, "learning_rate": 4.201627805745684e-06, "loss": 32.8855, "step": 297970 }, { "epoch": 0.6019384527123389, "grad_norm": 152.77197265625, "learning_rate": 4.201283220450656e-06, "loss": 9.2756, "step": 297980 }, { "epoch": 0.6019586533450227, "grad_norm": 302.232421875, "learning_rate": 4.200938639048477e-06, "loss": 14.8609, "step": 297990 }, { "epoch": 0.6019788539777066, "grad_norm": 592.5577392578125, "learning_rate": 4.200594061540827e-06, "loss": 24.2384, "step": 298000 }, { "epoch": 0.6019990546103904, "grad_norm": 370.72088623046875, "learning_rate": 4.200249487929383e-06, "loss": 47.6909, "step": 298010 }, { "epoch": 0.6020192552430742, "grad_norm": 194.29681396484375, "learning_rate": 4.199904918215827e-06, "loss": 13.4187, "step": 298020 }, { "epoch": 0.602039455875758, "grad_norm": 595.0006103515625, "learning_rate": 4.199560352401836e-06, "loss": 44.0719, "step": 298030 }, { "epoch": 0.6020596565084418, "grad_norm": 587.3641967773438, "learning_rate": 4.199215790489091e-06, "loss": 20.1468, "step": 298040 }, { "epoch": 0.6020798571411257, "grad_norm": 578.7800903320312, "learning_rate": 4.198871232479274e-06, "loss": 19.0328, "step": 298050 }, { "epoch": 0.6021000577738095, "grad_norm": 613.0132446289062, "learning_rate": 4.1985266783740575e-06, "loss": 18.4533, "step": 298060 }, { "epoch": 0.6021202584064933, "grad_norm": 1191.32080078125, "learning_rate": 4.198182128175126e-06, "loss": 21.9817, "step": 298070 }, { "epoch": 0.6021404590391771, "grad_norm": 521.42822265625, "learning_rate": 4.197837581884158e-06, "loss": 16.8588, "step": 298080 }, { "epoch": 0.6021606596718609, "grad_norm": 275.6913757324219, "learning_rate": 4.1974930395028325e-06, "loss": 29.4446, "step": 298090 }, { "epoch": 0.6021808603045448, "grad_norm": 191.11502075195312, "learning_rate": 4.197148501032829e-06, "loss": 19.7182, "step": 298100 }, { "epoch": 0.6022010609372286, "grad_norm": 681.275390625, "learning_rate": 4.1968039664758245e-06, "loss": 13.4476, "step": 298110 }, { "epoch": 0.6022212615699124, "grad_norm": 168.35659790039062, "learning_rate": 4.196459435833503e-06, "loss": 18.5558, "step": 298120 }, { "epoch": 0.6022414622025962, "grad_norm": 318.42889404296875, "learning_rate": 4.196114909107538e-06, "loss": 18.9843, "step": 298130 }, { "epoch": 0.60226166283528, "grad_norm": 521.52734375, "learning_rate": 4.195770386299612e-06, "loss": 12.5705, "step": 298140 }, { "epoch": 0.6022818634679639, "grad_norm": 285.8468017578125, "learning_rate": 4.195425867411404e-06, "loss": 16.2484, "step": 298150 }, { "epoch": 0.6023020641006477, "grad_norm": 379.369873046875, "learning_rate": 4.195081352444593e-06, "loss": 26.881, "step": 298160 }, { "epoch": 0.6023222647333315, "grad_norm": 205.96356201171875, "learning_rate": 4.194736841400858e-06, "loss": 8.2632, "step": 298170 }, { "epoch": 0.6023424653660152, "grad_norm": 289.69073486328125, "learning_rate": 4.1943923342818785e-06, "loss": 12.5926, "step": 298180 }, { "epoch": 0.602362665998699, "grad_norm": 302.1565856933594, "learning_rate": 4.194047831089332e-06, "loss": 8.5044, "step": 298190 }, { "epoch": 0.6023828666313829, "grad_norm": 553.7530517578125, "learning_rate": 4.193703331824898e-06, "loss": 12.2299, "step": 298200 }, { "epoch": 0.6024030672640667, "grad_norm": 278.378662109375, "learning_rate": 4.193358836490258e-06, "loss": 11.1498, "step": 298210 }, { "epoch": 0.6024232678967505, "grad_norm": 736.283935546875, "learning_rate": 4.193014345087088e-06, "loss": 21.8522, "step": 298220 }, { "epoch": 0.6024434685294343, "grad_norm": 343.1639709472656, "learning_rate": 4.192669857617068e-06, "loss": 17.3157, "step": 298230 }, { "epoch": 0.6024636691621181, "grad_norm": 306.9372863769531, "learning_rate": 4.192325374081877e-06, "loss": 12.2171, "step": 298240 }, { "epoch": 0.602483869794802, "grad_norm": 14.186240196228027, "learning_rate": 4.191980894483195e-06, "loss": 15.5916, "step": 298250 }, { "epoch": 0.6025040704274858, "grad_norm": 23.902244567871094, "learning_rate": 4.1916364188227e-06, "loss": 16.8782, "step": 298260 }, { "epoch": 0.6025242710601696, "grad_norm": 214.9687957763672, "learning_rate": 4.19129194710207e-06, "loss": 18.2315, "step": 298270 }, { "epoch": 0.6025444716928534, "grad_norm": 435.8495788574219, "learning_rate": 4.190947479322988e-06, "loss": 13.3487, "step": 298280 }, { "epoch": 0.6025646723255372, "grad_norm": 468.54241943359375, "learning_rate": 4.190603015487126e-06, "loss": 20.7279, "step": 298290 }, { "epoch": 0.602584872958221, "grad_norm": 577.8019409179688, "learning_rate": 4.190258555596168e-06, "loss": 17.5265, "step": 298300 }, { "epoch": 0.6026050735909049, "grad_norm": 131.77757263183594, "learning_rate": 4.1899140996517934e-06, "loss": 13.6653, "step": 298310 }, { "epoch": 0.6026252742235887, "grad_norm": 420.06695556640625, "learning_rate": 4.189569647655677e-06, "loss": 24.7182, "step": 298320 }, { "epoch": 0.6026454748562725, "grad_norm": 417.7607116699219, "learning_rate": 4.189225199609501e-06, "loss": 15.1371, "step": 298330 }, { "epoch": 0.6026656754889563, "grad_norm": 610.1613159179688, "learning_rate": 4.188880755514944e-06, "loss": 21.7652, "step": 298340 }, { "epoch": 0.6026858761216402, "grad_norm": 87.60273742675781, "learning_rate": 4.1885363153736825e-06, "loss": 23.8042, "step": 298350 }, { "epoch": 0.602706076754324, "grad_norm": 183.4608154296875, "learning_rate": 4.188191879187395e-06, "loss": 18.7179, "step": 298360 }, { "epoch": 0.6027262773870078, "grad_norm": 497.125732421875, "learning_rate": 4.187847446957763e-06, "loss": 19.7117, "step": 298370 }, { "epoch": 0.6027464780196916, "grad_norm": 105.0024642944336, "learning_rate": 4.187503018686466e-06, "loss": 8.4717, "step": 298380 }, { "epoch": 0.6027666786523754, "grad_norm": 603.5338134765625, "learning_rate": 4.1871585943751795e-06, "loss": 30.7272, "step": 298390 }, { "epoch": 0.6027868792850593, "grad_norm": 190.3978271484375, "learning_rate": 4.186814174025582e-06, "loss": 15.1717, "step": 298400 }, { "epoch": 0.6028070799177431, "grad_norm": 429.34259033203125, "learning_rate": 4.186469757639356e-06, "loss": 26.493, "step": 298410 }, { "epoch": 0.6028272805504269, "grad_norm": 620.0953979492188, "learning_rate": 4.186125345218177e-06, "loss": 18.7135, "step": 298420 }, { "epoch": 0.6028474811831106, "grad_norm": 671.0869750976562, "learning_rate": 4.185780936763722e-06, "loss": 25.2038, "step": 298430 }, { "epoch": 0.6028676818157944, "grad_norm": 81.80601501464844, "learning_rate": 4.185436532277675e-06, "loss": 14.779, "step": 298440 }, { "epoch": 0.6028878824484782, "grad_norm": 399.3650817871094, "learning_rate": 4.18509213176171e-06, "loss": 24.9025, "step": 298450 }, { "epoch": 0.6029080830811621, "grad_norm": 385.64703369140625, "learning_rate": 4.184747735217507e-06, "loss": 16.8063, "step": 298460 }, { "epoch": 0.6029282837138459, "grad_norm": 385.7299499511719, "learning_rate": 4.184403342646746e-06, "loss": 29.951, "step": 298470 }, { "epoch": 0.6029484843465297, "grad_norm": 178.00515747070312, "learning_rate": 4.1840589540511035e-06, "loss": 14.1718, "step": 298480 }, { "epoch": 0.6029686849792135, "grad_norm": 132.05557250976562, "learning_rate": 4.183714569432259e-06, "loss": 16.2912, "step": 298490 }, { "epoch": 0.6029888856118973, "grad_norm": 234.59046936035156, "learning_rate": 4.183370188791891e-06, "loss": 23.0456, "step": 298500 }, { "epoch": 0.6030090862445812, "grad_norm": 333.8605041503906, "learning_rate": 4.183025812131674e-06, "loss": 16.6333, "step": 298510 }, { "epoch": 0.603029286877265, "grad_norm": 121.52058410644531, "learning_rate": 4.182681439453294e-06, "loss": 16.7034, "step": 298520 }, { "epoch": 0.6030494875099488, "grad_norm": 572.5895385742188, "learning_rate": 4.182337070758425e-06, "loss": 17.9095, "step": 298530 }, { "epoch": 0.6030696881426326, "grad_norm": 560.9683227539062, "learning_rate": 4.1819927060487454e-06, "loss": 15.1039, "step": 298540 }, { "epoch": 0.6030898887753164, "grad_norm": 448.8406677246094, "learning_rate": 4.181648345325934e-06, "loss": 16.6443, "step": 298550 }, { "epoch": 0.6031100894080003, "grad_norm": 165.1465301513672, "learning_rate": 4.181303988591669e-06, "loss": 10.6893, "step": 298560 }, { "epoch": 0.6031302900406841, "grad_norm": 545.265380859375, "learning_rate": 4.1809596358476315e-06, "loss": 25.455, "step": 298570 }, { "epoch": 0.6031504906733679, "grad_norm": 677.195068359375, "learning_rate": 4.180615287095494e-06, "loss": 21.0746, "step": 298580 }, { "epoch": 0.6031706913060517, "grad_norm": 21.15522003173828, "learning_rate": 4.180270942336939e-06, "loss": 19.539, "step": 298590 }, { "epoch": 0.6031908919387355, "grad_norm": 495.523193359375, "learning_rate": 4.179926601573645e-06, "loss": 27.5947, "step": 298600 }, { "epoch": 0.6032110925714194, "grad_norm": 173.38722229003906, "learning_rate": 4.179582264807289e-06, "loss": 14.384, "step": 298610 }, { "epoch": 0.6032312932041032, "grad_norm": 204.8512420654297, "learning_rate": 4.17923793203955e-06, "loss": 12.818, "step": 298620 }, { "epoch": 0.603251493836787, "grad_norm": 488.9126281738281, "learning_rate": 4.1788936032721065e-06, "loss": 23.2934, "step": 298630 }, { "epoch": 0.6032716944694708, "grad_norm": 260.3359069824219, "learning_rate": 4.178549278506634e-06, "loss": 15.384, "step": 298640 }, { "epoch": 0.6032918951021546, "grad_norm": 439.2271423339844, "learning_rate": 4.178204957744812e-06, "loss": 14.0319, "step": 298650 }, { "epoch": 0.6033120957348385, "grad_norm": 283.25811767578125, "learning_rate": 4.177860640988323e-06, "loss": 19.0398, "step": 298660 }, { "epoch": 0.6033322963675223, "grad_norm": 196.1883544921875, "learning_rate": 4.177516328238838e-06, "loss": 22.395, "step": 298670 }, { "epoch": 0.6033524970002061, "grad_norm": 476.9971618652344, "learning_rate": 4.17717201949804e-06, "loss": 17.1944, "step": 298680 }, { "epoch": 0.6033726976328898, "grad_norm": 39.04746627807617, "learning_rate": 4.176827714767606e-06, "loss": 10.2527, "step": 298690 }, { "epoch": 0.6033928982655736, "grad_norm": 464.3876037597656, "learning_rate": 4.176483414049214e-06, "loss": 12.7073, "step": 298700 }, { "epoch": 0.6034130988982574, "grad_norm": 224.52981567382812, "learning_rate": 4.176139117344542e-06, "loss": 12.2768, "step": 298710 }, { "epoch": 0.6034332995309413, "grad_norm": 412.624755859375, "learning_rate": 4.175794824655266e-06, "loss": 17.816, "step": 298720 }, { "epoch": 0.6034535001636251, "grad_norm": 424.7938537597656, "learning_rate": 4.17545053598307e-06, "loss": 26.5277, "step": 298730 }, { "epoch": 0.6034737007963089, "grad_norm": 142.6999053955078, "learning_rate": 4.1751062513296245e-06, "loss": 17.8631, "step": 298740 }, { "epoch": 0.6034939014289927, "grad_norm": 331.29315185546875, "learning_rate": 4.174761970696612e-06, "loss": 16.1455, "step": 298750 }, { "epoch": 0.6035141020616765, "grad_norm": 639.9597778320312, "learning_rate": 4.174417694085711e-06, "loss": 27.9107, "step": 298760 }, { "epoch": 0.6035343026943604, "grad_norm": 94.59380340576172, "learning_rate": 4.174073421498597e-06, "loss": 20.1087, "step": 298770 }, { "epoch": 0.6035545033270442, "grad_norm": 157.8151092529297, "learning_rate": 4.173729152936948e-06, "loss": 23.2745, "step": 298780 }, { "epoch": 0.603574703959728, "grad_norm": 1348.0771484375, "learning_rate": 4.173384888402446e-06, "loss": 19.5732, "step": 298790 }, { "epoch": 0.6035949045924118, "grad_norm": 0.0, "learning_rate": 4.173040627896762e-06, "loss": 31.22, "step": 298800 }, { "epoch": 0.6036151052250956, "grad_norm": 1027.85107421875, "learning_rate": 4.172696371421579e-06, "loss": 18.6806, "step": 298810 }, { "epoch": 0.6036353058577795, "grad_norm": 69.05319213867188, "learning_rate": 4.172352118978573e-06, "loss": 39.396, "step": 298820 }, { "epoch": 0.6036555064904633, "grad_norm": 891.92529296875, "learning_rate": 4.172007870569425e-06, "loss": 17.773, "step": 298830 }, { "epoch": 0.6036757071231471, "grad_norm": 165.69923400878906, "learning_rate": 4.171663626195808e-06, "loss": 14.5471, "step": 298840 }, { "epoch": 0.6036959077558309, "grad_norm": 274.5810241699219, "learning_rate": 4.171319385859402e-06, "loss": 17.4796, "step": 298850 }, { "epoch": 0.6037161083885147, "grad_norm": 415.4864196777344, "learning_rate": 4.170975149561886e-06, "loss": 11.9298, "step": 298860 }, { "epoch": 0.6037363090211986, "grad_norm": 492.5143127441406, "learning_rate": 4.170630917304935e-06, "loss": 27.1149, "step": 298870 }, { "epoch": 0.6037565096538824, "grad_norm": 274.8476257324219, "learning_rate": 4.1702866890902285e-06, "loss": 26.0232, "step": 298880 }, { "epoch": 0.6037767102865662, "grad_norm": 183.34765625, "learning_rate": 4.169942464919446e-06, "loss": 20.6724, "step": 298890 }, { "epoch": 0.60379691091925, "grad_norm": 9.085321426391602, "learning_rate": 4.169598244794261e-06, "loss": 17.5541, "step": 298900 }, { "epoch": 0.6038171115519338, "grad_norm": 534.8471069335938, "learning_rate": 4.169254028716355e-06, "loss": 28.0896, "step": 298910 }, { "epoch": 0.6038373121846177, "grad_norm": 283.2433166503906, "learning_rate": 4.1689098166874046e-06, "loss": 19.5865, "step": 298920 }, { "epoch": 0.6038575128173015, "grad_norm": 195.63902282714844, "learning_rate": 4.168565608709085e-06, "loss": 15.0253, "step": 298930 }, { "epoch": 0.6038777134499852, "grad_norm": 481.405029296875, "learning_rate": 4.168221404783076e-06, "loss": 19.6114, "step": 298940 }, { "epoch": 0.603897914082669, "grad_norm": 316.28387451171875, "learning_rate": 4.167877204911057e-06, "loss": 19.1945, "step": 298950 }, { "epoch": 0.6039181147153528, "grad_norm": 426.0697326660156, "learning_rate": 4.167533009094702e-06, "loss": 18.7576, "step": 298960 }, { "epoch": 0.6039383153480367, "grad_norm": 268.5688781738281, "learning_rate": 4.167188817335689e-06, "loss": 19.8746, "step": 298970 }, { "epoch": 0.6039585159807205, "grad_norm": 392.6434631347656, "learning_rate": 4.166844629635698e-06, "loss": 15.2853, "step": 298980 }, { "epoch": 0.6039787166134043, "grad_norm": 92.74348449707031, "learning_rate": 4.166500445996407e-06, "loss": 24.6408, "step": 298990 }, { "epoch": 0.6039989172460881, "grad_norm": 365.57110595703125, "learning_rate": 4.166156266419489e-06, "loss": 23.8099, "step": 299000 }, { "epoch": 0.6040191178787719, "grad_norm": 356.58251953125, "learning_rate": 4.1658120909066255e-06, "loss": 14.063, "step": 299010 }, { "epoch": 0.6040393185114558, "grad_norm": 485.7627868652344, "learning_rate": 4.165467919459493e-06, "loss": 14.8582, "step": 299020 }, { "epoch": 0.6040595191441396, "grad_norm": 400.6348571777344, "learning_rate": 4.165123752079768e-06, "loss": 27.36, "step": 299030 }, { "epoch": 0.6040797197768234, "grad_norm": 230.67910766601562, "learning_rate": 4.1647795887691275e-06, "loss": 13.5759, "step": 299040 }, { "epoch": 0.6040999204095072, "grad_norm": 331.7479248046875, "learning_rate": 4.164435429529253e-06, "loss": 20.1367, "step": 299050 }, { "epoch": 0.604120121042191, "grad_norm": 319.7065124511719, "learning_rate": 4.164091274361815e-06, "loss": 15.6679, "step": 299060 }, { "epoch": 0.6041403216748749, "grad_norm": 517.3455810546875, "learning_rate": 4.163747123268497e-06, "loss": 15.0055, "step": 299070 }, { "epoch": 0.6041605223075587, "grad_norm": 414.9480285644531, "learning_rate": 4.1634029762509755e-06, "loss": 23.7813, "step": 299080 }, { "epoch": 0.6041807229402425, "grad_norm": 1325.8746337890625, "learning_rate": 4.163058833310925e-06, "loss": 8.0147, "step": 299090 }, { "epoch": 0.6042009235729263, "grad_norm": 637.223876953125, "learning_rate": 4.162714694450023e-06, "loss": 31.1577, "step": 299100 }, { "epoch": 0.6042211242056101, "grad_norm": 202.93833923339844, "learning_rate": 4.16237055966995e-06, "loss": 20.886, "step": 299110 }, { "epoch": 0.604241324838294, "grad_norm": 792.5867919921875, "learning_rate": 4.16202642897238e-06, "loss": 19.1535, "step": 299120 }, { "epoch": 0.6042615254709778, "grad_norm": 255.61788940429688, "learning_rate": 4.161682302358991e-06, "loss": 18.4287, "step": 299130 }, { "epoch": 0.6042817261036616, "grad_norm": 424.9412536621094, "learning_rate": 4.161338179831461e-06, "loss": 36.7544, "step": 299140 }, { "epoch": 0.6043019267363454, "grad_norm": 569.3251342773438, "learning_rate": 4.160994061391469e-06, "loss": 28.6545, "step": 299150 }, { "epoch": 0.6043221273690292, "grad_norm": 347.0662536621094, "learning_rate": 4.1606499470406885e-06, "loss": 41.8323, "step": 299160 }, { "epoch": 0.6043423280017131, "grad_norm": 330.9170227050781, "learning_rate": 4.1603058367807986e-06, "loss": 20.5058, "step": 299170 }, { "epoch": 0.6043625286343969, "grad_norm": 449.6436767578125, "learning_rate": 4.159961730613478e-06, "loss": 19.3311, "step": 299180 }, { "epoch": 0.6043827292670807, "grad_norm": 142.1448516845703, "learning_rate": 4.1596176285403985e-06, "loss": 22.8406, "step": 299190 }, { "epoch": 0.6044029298997644, "grad_norm": 119.17735290527344, "learning_rate": 4.159273530563243e-06, "loss": 9.936, "step": 299200 }, { "epoch": 0.6044231305324482, "grad_norm": 185.63804626464844, "learning_rate": 4.158929436683687e-06, "loss": 11.7484, "step": 299210 }, { "epoch": 0.604443331165132, "grad_norm": 191.6988983154297, "learning_rate": 4.158585346903405e-06, "loss": 16.4373, "step": 299220 }, { "epoch": 0.6044635317978159, "grad_norm": 463.1911926269531, "learning_rate": 4.1582412612240765e-06, "loss": 21.7914, "step": 299230 }, { "epoch": 0.6044837324304997, "grad_norm": 354.4056091308594, "learning_rate": 4.157897179647379e-06, "loss": 15.9639, "step": 299240 }, { "epoch": 0.6045039330631835, "grad_norm": 233.71731567382812, "learning_rate": 4.157553102174988e-06, "loss": 18.4421, "step": 299250 }, { "epoch": 0.6045241336958673, "grad_norm": 202.58102416992188, "learning_rate": 4.15720902880858e-06, "loss": 31.7332, "step": 299260 }, { "epoch": 0.6045443343285511, "grad_norm": 390.31072998046875, "learning_rate": 4.156864959549833e-06, "loss": 24.7472, "step": 299270 }, { "epoch": 0.604564534961235, "grad_norm": 398.3007507324219, "learning_rate": 4.156520894400426e-06, "loss": 22.008, "step": 299280 }, { "epoch": 0.6045847355939188, "grad_norm": 316.69415283203125, "learning_rate": 4.156176833362032e-06, "loss": 16.2221, "step": 299290 }, { "epoch": 0.6046049362266026, "grad_norm": 421.1678466796875, "learning_rate": 4.155832776436331e-06, "loss": 31.4256, "step": 299300 }, { "epoch": 0.6046251368592864, "grad_norm": 73.86972045898438, "learning_rate": 4.155488723624999e-06, "loss": 11.5173, "step": 299310 }, { "epoch": 0.6046453374919702, "grad_norm": 490.62542724609375, "learning_rate": 4.1551446749297104e-06, "loss": 24.8878, "step": 299320 }, { "epoch": 0.6046655381246541, "grad_norm": 498.3382873535156, "learning_rate": 4.154800630352145e-06, "loss": 13.6941, "step": 299330 }, { "epoch": 0.6046857387573379, "grad_norm": 313.3454284667969, "learning_rate": 4.154456589893981e-06, "loss": 14.2694, "step": 299340 }, { "epoch": 0.6047059393900217, "grad_norm": 293.8607177734375, "learning_rate": 4.15411255355689e-06, "loss": 19.203, "step": 299350 }, { "epoch": 0.6047261400227055, "grad_norm": 226.7985076904297, "learning_rate": 4.153768521342552e-06, "loss": 8.4586, "step": 299360 }, { "epoch": 0.6047463406553893, "grad_norm": 611.42236328125, "learning_rate": 4.153424493252646e-06, "loss": 27.3687, "step": 299370 }, { "epoch": 0.6047665412880732, "grad_norm": 197.8191375732422, "learning_rate": 4.153080469288845e-06, "loss": 11.2582, "step": 299380 }, { "epoch": 0.604786741920757, "grad_norm": 402.68408203125, "learning_rate": 4.152736449452827e-06, "loss": 31.7333, "step": 299390 }, { "epoch": 0.6048069425534408, "grad_norm": 491.1822509765625, "learning_rate": 4.15239243374627e-06, "loss": 20.3953, "step": 299400 }, { "epoch": 0.6048271431861246, "grad_norm": 734.6697387695312, "learning_rate": 4.152048422170848e-06, "loss": 30.9655, "step": 299410 }, { "epoch": 0.6048473438188084, "grad_norm": 941.27099609375, "learning_rate": 4.151704414728238e-06, "loss": 24.8178, "step": 299420 }, { "epoch": 0.6048675444514923, "grad_norm": 299.1211853027344, "learning_rate": 4.151360411420119e-06, "loss": 14.6758, "step": 299430 }, { "epoch": 0.6048877450841761, "grad_norm": 801.124755859375, "learning_rate": 4.1510164122481675e-06, "loss": 17.4863, "step": 299440 }, { "epoch": 0.6049079457168599, "grad_norm": 428.1326904296875, "learning_rate": 4.150672417214058e-06, "loss": 9.6424, "step": 299450 }, { "epoch": 0.6049281463495436, "grad_norm": 351.8671569824219, "learning_rate": 4.150328426319469e-06, "loss": 19.342, "step": 299460 }, { "epoch": 0.6049483469822274, "grad_norm": 244.36212158203125, "learning_rate": 4.149984439566076e-06, "loss": 31.1823, "step": 299470 }, { "epoch": 0.6049685476149113, "grad_norm": 230.03631591796875, "learning_rate": 4.149640456955555e-06, "loss": 19.8825, "step": 299480 }, { "epoch": 0.6049887482475951, "grad_norm": 563.4520263671875, "learning_rate": 4.149296478489583e-06, "loss": 35.0656, "step": 299490 }, { "epoch": 0.6050089488802789, "grad_norm": 448.1905517578125, "learning_rate": 4.148952504169839e-06, "loss": 15.7536, "step": 299500 }, { "epoch": 0.6050291495129627, "grad_norm": 621.169189453125, "learning_rate": 4.1486085339979944e-06, "loss": 14.6395, "step": 299510 }, { "epoch": 0.6050493501456465, "grad_norm": 390.1469421386719, "learning_rate": 4.148264567975729e-06, "loss": 32.5824, "step": 299520 }, { "epoch": 0.6050695507783304, "grad_norm": 420.9831848144531, "learning_rate": 4.1479206061047205e-06, "loss": 25.5124, "step": 299530 }, { "epoch": 0.6050897514110142, "grad_norm": 512.182861328125, "learning_rate": 4.147576648386643e-06, "loss": 20.1751, "step": 299540 }, { "epoch": 0.605109952043698, "grad_norm": 197.9337921142578, "learning_rate": 4.147232694823173e-06, "loss": 9.2306, "step": 299550 }, { "epoch": 0.6051301526763818, "grad_norm": 3.1143038272857666, "learning_rate": 4.146888745415988e-06, "loss": 26.7723, "step": 299560 }, { "epoch": 0.6051503533090656, "grad_norm": 160.60667419433594, "learning_rate": 4.146544800166764e-06, "loss": 16.7458, "step": 299570 }, { "epoch": 0.6051705539417495, "grad_norm": 113.62411499023438, "learning_rate": 4.146200859077175e-06, "loss": 23.438, "step": 299580 }, { "epoch": 0.6051907545744333, "grad_norm": 434.8067626953125, "learning_rate": 4.1458569221489006e-06, "loss": 10.0016, "step": 299590 }, { "epoch": 0.6052109552071171, "grad_norm": 410.1781921386719, "learning_rate": 4.145512989383618e-06, "loss": 17.4798, "step": 299600 }, { "epoch": 0.6052311558398009, "grad_norm": 497.3607482910156, "learning_rate": 4.1451690607829995e-06, "loss": 15.7014, "step": 299610 }, { "epoch": 0.6052513564724847, "grad_norm": 271.93951416015625, "learning_rate": 4.144825136348724e-06, "loss": 20.2383, "step": 299620 }, { "epoch": 0.6052715571051686, "grad_norm": 153.39515686035156, "learning_rate": 4.144481216082467e-06, "loss": 16.2669, "step": 299630 }, { "epoch": 0.6052917577378524, "grad_norm": 278.66412353515625, "learning_rate": 4.1441372999859046e-06, "loss": 19.849, "step": 299640 }, { "epoch": 0.6053119583705362, "grad_norm": 165.04925537109375, "learning_rate": 4.143793388060712e-06, "loss": 18.7088, "step": 299650 }, { "epoch": 0.60533215900322, "grad_norm": 371.5042419433594, "learning_rate": 4.143449480308569e-06, "loss": 22.5886, "step": 299660 }, { "epoch": 0.6053523596359038, "grad_norm": 273.3786315917969, "learning_rate": 4.143105576731147e-06, "loss": 15.8427, "step": 299670 }, { "epoch": 0.6053725602685877, "grad_norm": 96.92143249511719, "learning_rate": 4.1427616773301245e-06, "loss": 19.4343, "step": 299680 }, { "epoch": 0.6053927609012715, "grad_norm": 211.26661682128906, "learning_rate": 4.14241778210718e-06, "loss": 25.5301, "step": 299690 }, { "epoch": 0.6054129615339553, "grad_norm": 747.14404296875, "learning_rate": 4.142073891063986e-06, "loss": 23.3289, "step": 299700 }, { "epoch": 0.605433162166639, "grad_norm": 220.15956115722656, "learning_rate": 4.1417300042022195e-06, "loss": 13.643, "step": 299710 }, { "epoch": 0.6054533627993228, "grad_norm": 275.4222106933594, "learning_rate": 4.141386121523558e-06, "loss": 26.8061, "step": 299720 }, { "epoch": 0.6054735634320066, "grad_norm": 687.5581665039062, "learning_rate": 4.141042243029675e-06, "loss": 19.9321, "step": 299730 }, { "epoch": 0.6054937640646905, "grad_norm": 196.90611267089844, "learning_rate": 4.140698368722247e-06, "loss": 22.7837, "step": 299740 }, { "epoch": 0.6055139646973743, "grad_norm": 672.401123046875, "learning_rate": 4.140354498602952e-06, "loss": 26.9086, "step": 299750 }, { "epoch": 0.6055341653300581, "grad_norm": 46.130191802978516, "learning_rate": 4.140010632673466e-06, "loss": 21.9361, "step": 299760 }, { "epoch": 0.6055543659627419, "grad_norm": 678.9490966796875, "learning_rate": 4.139666770935463e-06, "loss": 22.1638, "step": 299770 }, { "epoch": 0.6055745665954257, "grad_norm": 540.2193603515625, "learning_rate": 4.13932291339062e-06, "loss": 26.2758, "step": 299780 }, { "epoch": 0.6055947672281096, "grad_norm": 526.988525390625, "learning_rate": 4.138979060040613e-06, "loss": 25.5432, "step": 299790 }, { "epoch": 0.6056149678607934, "grad_norm": 159.88980102539062, "learning_rate": 4.138635210887117e-06, "loss": 11.3931, "step": 299800 }, { "epoch": 0.6056351684934772, "grad_norm": 529.3505249023438, "learning_rate": 4.138291365931808e-06, "loss": 30.4891, "step": 299810 }, { "epoch": 0.605655369126161, "grad_norm": 522.1632690429688, "learning_rate": 4.137947525176364e-06, "loss": 16.009, "step": 299820 }, { "epoch": 0.6056755697588448, "grad_norm": 67.28885650634766, "learning_rate": 4.137603688622458e-06, "loss": 21.4409, "step": 299830 }, { "epoch": 0.6056957703915287, "grad_norm": 70.01111602783203, "learning_rate": 4.137259856271767e-06, "loss": 16.8961, "step": 299840 }, { "epoch": 0.6057159710242125, "grad_norm": 880.1499633789062, "learning_rate": 4.1369160281259685e-06, "loss": 34.1453, "step": 299850 }, { "epoch": 0.6057361716568963, "grad_norm": 56.148624420166016, "learning_rate": 4.136572204186735e-06, "loss": 12.5547, "step": 299860 }, { "epoch": 0.6057563722895801, "grad_norm": 260.4873352050781, "learning_rate": 4.136228384455743e-06, "loss": 14.8398, "step": 299870 }, { "epoch": 0.605776572922264, "grad_norm": 284.2904968261719, "learning_rate": 4.13588456893467e-06, "loss": 21.4895, "step": 299880 }, { "epoch": 0.6057967735549478, "grad_norm": 559.5950317382812, "learning_rate": 4.1355407576251925e-06, "loss": 13.033, "step": 299890 }, { "epoch": 0.6058169741876316, "grad_norm": 21.05386734008789, "learning_rate": 4.135196950528982e-06, "loss": 10.6381, "step": 299900 }, { "epoch": 0.6058371748203154, "grad_norm": 172.6096954345703, "learning_rate": 4.134853147647718e-06, "loss": 19.6288, "step": 299910 }, { "epoch": 0.6058573754529992, "grad_norm": 258.5855407714844, "learning_rate": 4.134509348983075e-06, "loss": 27.6375, "step": 299920 }, { "epoch": 0.605877576085683, "grad_norm": 717.6494140625, "learning_rate": 4.134165554536728e-06, "loss": 14.4708, "step": 299930 }, { "epoch": 0.6058977767183669, "grad_norm": 37.4779052734375, "learning_rate": 4.133821764310352e-06, "loss": 26.5522, "step": 299940 }, { "epoch": 0.6059179773510507, "grad_norm": 358.26361083984375, "learning_rate": 4.133477978305626e-06, "loss": 29.5468, "step": 299950 }, { "epoch": 0.6059381779837345, "grad_norm": 264.0389709472656, "learning_rate": 4.133134196524221e-06, "loss": 19.7583, "step": 299960 }, { "epoch": 0.6059583786164182, "grad_norm": 196.28353881835938, "learning_rate": 4.132790418967816e-06, "loss": 26.5197, "step": 299970 }, { "epoch": 0.605978579249102, "grad_norm": 252.9100799560547, "learning_rate": 4.132446645638086e-06, "loss": 17.9467, "step": 299980 }, { "epoch": 0.6059987798817859, "grad_norm": 443.7536926269531, "learning_rate": 4.132102876536705e-06, "loss": 26.3148, "step": 299990 }, { "epoch": 0.6060189805144697, "grad_norm": 156.29220581054688, "learning_rate": 4.131759111665349e-06, "loss": 16.4471, "step": 300000 }, { "epoch": 0.6060391811471535, "grad_norm": 441.18731689453125, "learning_rate": 4.131415351025695e-06, "loss": 11.3066, "step": 300010 }, { "epoch": 0.6060593817798373, "grad_norm": 0.0, "learning_rate": 4.131071594619416e-06, "loss": 34.9451, "step": 300020 }, { "epoch": 0.6060795824125211, "grad_norm": 213.2185516357422, "learning_rate": 4.130727842448187e-06, "loss": 11.2, "step": 300030 }, { "epoch": 0.606099783045205, "grad_norm": 287.30926513671875, "learning_rate": 4.130384094513688e-06, "loss": 10.5321, "step": 300040 }, { "epoch": 0.6061199836778888, "grad_norm": 448.23309326171875, "learning_rate": 4.13004035081759e-06, "loss": 26.159, "step": 300050 }, { "epoch": 0.6061401843105726, "grad_norm": 222.05453491210938, "learning_rate": 4.1296966113615705e-06, "loss": 17.9621, "step": 300060 }, { "epoch": 0.6061603849432564, "grad_norm": 302.32293701171875, "learning_rate": 4.129352876147304e-06, "loss": 28.4212, "step": 300070 }, { "epoch": 0.6061805855759402, "grad_norm": 216.21511840820312, "learning_rate": 4.129009145176467e-06, "loss": 23.9892, "step": 300080 }, { "epoch": 0.606200786208624, "grad_norm": 409.4975891113281, "learning_rate": 4.128665418450732e-06, "loss": 17.4646, "step": 300090 }, { "epoch": 0.6062209868413079, "grad_norm": 228.02105712890625, "learning_rate": 4.128321695971775e-06, "loss": 11.725, "step": 300100 }, { "epoch": 0.6062411874739917, "grad_norm": 521.8179931640625, "learning_rate": 4.127977977741277e-06, "loss": 24.6463, "step": 300110 }, { "epoch": 0.6062613881066755, "grad_norm": 2.4430065155029297, "learning_rate": 4.127634263760904e-06, "loss": 26.2461, "step": 300120 }, { "epoch": 0.6062815887393593, "grad_norm": 184.41311645507812, "learning_rate": 4.127290554032337e-06, "loss": 13.7439, "step": 300130 }, { "epoch": 0.6063017893720432, "grad_norm": 23.768491744995117, "learning_rate": 4.126946848557252e-06, "loss": 23.512, "step": 300140 }, { "epoch": 0.606321990004727, "grad_norm": 416.1184387207031, "learning_rate": 4.12660314733732e-06, "loss": 18.3558, "step": 300150 }, { "epoch": 0.6063421906374108, "grad_norm": 82.60257720947266, "learning_rate": 4.126259450374219e-06, "loss": 11.7744, "step": 300160 }, { "epoch": 0.6063623912700946, "grad_norm": 648.251708984375, "learning_rate": 4.125915757669624e-06, "loss": 19.0724, "step": 300170 }, { "epoch": 0.6063825919027784, "grad_norm": 531.5507202148438, "learning_rate": 4.1255720692252084e-06, "loss": 26.0003, "step": 300180 }, { "epoch": 0.6064027925354623, "grad_norm": 433.8568420410156, "learning_rate": 4.125228385042648e-06, "loss": 13.7875, "step": 300190 }, { "epoch": 0.6064229931681461, "grad_norm": 348.8671569824219, "learning_rate": 4.124884705123619e-06, "loss": 25.9525, "step": 300200 }, { "epoch": 0.6064431938008299, "grad_norm": 191.41236877441406, "learning_rate": 4.124541029469798e-06, "loss": 22.0683, "step": 300210 }, { "epoch": 0.6064633944335136, "grad_norm": 129.49757385253906, "learning_rate": 4.124197358082855e-06, "loss": 17.101, "step": 300220 }, { "epoch": 0.6064835950661974, "grad_norm": 284.73553466796875, "learning_rate": 4.12385369096447e-06, "loss": 17.0187, "step": 300230 }, { "epoch": 0.6065037956988812, "grad_norm": 421.3599853515625, "learning_rate": 4.123510028116315e-06, "loss": 26.3037, "step": 300240 }, { "epoch": 0.6065239963315651, "grad_norm": 9.619393348693848, "learning_rate": 4.123166369540066e-06, "loss": 17.2235, "step": 300250 }, { "epoch": 0.6065441969642489, "grad_norm": 608.0734252929688, "learning_rate": 4.1228227152373955e-06, "loss": 17.3087, "step": 300260 }, { "epoch": 0.6065643975969327, "grad_norm": 68.26480102539062, "learning_rate": 4.122479065209984e-06, "loss": 19.6844, "step": 300270 }, { "epoch": 0.6065845982296165, "grad_norm": 513.013916015625, "learning_rate": 4.122135419459501e-06, "loss": 23.8992, "step": 300280 }, { "epoch": 0.6066047988623003, "grad_norm": 524.5921630859375, "learning_rate": 4.121791777987624e-06, "loss": 18.7422, "step": 300290 }, { "epoch": 0.6066249994949842, "grad_norm": 588.4210205078125, "learning_rate": 4.121448140796029e-06, "loss": 20.0128, "step": 300300 }, { "epoch": 0.606645200127668, "grad_norm": 734.3225708007812, "learning_rate": 4.121104507886387e-06, "loss": 27.1061, "step": 300310 }, { "epoch": 0.6066654007603518, "grad_norm": 429.5791931152344, "learning_rate": 4.120760879260375e-06, "loss": 18.7656, "step": 300320 }, { "epoch": 0.6066856013930356, "grad_norm": 28.860681533813477, "learning_rate": 4.120417254919668e-06, "loss": 18.3603, "step": 300330 }, { "epoch": 0.6067058020257194, "grad_norm": 295.4386901855469, "learning_rate": 4.120073634865943e-06, "loss": 14.0962, "step": 300340 }, { "epoch": 0.6067260026584033, "grad_norm": 130.5166473388672, "learning_rate": 4.119730019100869e-06, "loss": 21.8594, "step": 300350 }, { "epoch": 0.6067462032910871, "grad_norm": 533.1133422851562, "learning_rate": 4.119386407626126e-06, "loss": 30.9608, "step": 300360 }, { "epoch": 0.6067664039237709, "grad_norm": 233.6865692138672, "learning_rate": 4.119042800443387e-06, "loss": 11.7632, "step": 300370 }, { "epoch": 0.6067866045564547, "grad_norm": 276.0942687988281, "learning_rate": 4.118699197554327e-06, "loss": 16.6963, "step": 300380 }, { "epoch": 0.6068068051891385, "grad_norm": 222.0702362060547, "learning_rate": 4.118355598960619e-06, "loss": 15.4077, "step": 300390 }, { "epoch": 0.6068270058218224, "grad_norm": 728.0061645507812, "learning_rate": 4.118012004663939e-06, "loss": 16.6156, "step": 300400 }, { "epoch": 0.6068472064545062, "grad_norm": 492.2958068847656, "learning_rate": 4.117668414665962e-06, "loss": 26.7079, "step": 300410 }, { "epoch": 0.60686740708719, "grad_norm": 1085.5723876953125, "learning_rate": 4.117324828968361e-06, "loss": 20.2057, "step": 300420 }, { "epoch": 0.6068876077198738, "grad_norm": 354.3676452636719, "learning_rate": 4.116981247572814e-06, "loss": 15.1552, "step": 300430 }, { "epoch": 0.6069078083525576, "grad_norm": 410.5220642089844, "learning_rate": 4.11663767048099e-06, "loss": 30.9106, "step": 300440 }, { "epoch": 0.6069280089852415, "grad_norm": 82.55916595458984, "learning_rate": 4.1162940976945695e-06, "loss": 11.6088, "step": 300450 }, { "epoch": 0.6069482096179253, "grad_norm": 266.9975280761719, "learning_rate": 4.115950529215225e-06, "loss": 21.6371, "step": 300460 }, { "epoch": 0.6069684102506091, "grad_norm": 1182.3514404296875, "learning_rate": 4.115606965044628e-06, "loss": 26.6251, "step": 300470 }, { "epoch": 0.6069886108832928, "grad_norm": 586.1812133789062, "learning_rate": 4.115263405184456e-06, "loss": 22.5297, "step": 300480 }, { "epoch": 0.6070088115159766, "grad_norm": 467.3453369140625, "learning_rate": 4.114919849636383e-06, "loss": 17.5214, "step": 300490 }, { "epoch": 0.6070290121486605, "grad_norm": 1092.0445556640625, "learning_rate": 4.114576298402085e-06, "loss": 19.347, "step": 300500 }, { "epoch": 0.6070492127813443, "grad_norm": 200.702880859375, "learning_rate": 4.1142327514832326e-06, "loss": 24.5764, "step": 300510 }, { "epoch": 0.6070694134140281, "grad_norm": 320.72991943359375, "learning_rate": 4.1138892088815025e-06, "loss": 22.3576, "step": 300520 }, { "epoch": 0.6070896140467119, "grad_norm": 583.939453125, "learning_rate": 4.113545670598571e-06, "loss": 24.4126, "step": 300530 }, { "epoch": 0.6071098146793957, "grad_norm": 105.73104095458984, "learning_rate": 4.113202136636108e-06, "loss": 12.8853, "step": 300540 }, { "epoch": 0.6071300153120796, "grad_norm": 10.637992858886719, "learning_rate": 4.11285860699579e-06, "loss": 15.2372, "step": 300550 }, { "epoch": 0.6071502159447634, "grad_norm": 460.02093505859375, "learning_rate": 4.112515081679295e-06, "loss": 11.4211, "step": 300560 }, { "epoch": 0.6071704165774472, "grad_norm": 180.24130249023438, "learning_rate": 4.112171560688289e-06, "loss": 16.7634, "step": 300570 }, { "epoch": 0.607190617210131, "grad_norm": 420.7903137207031, "learning_rate": 4.111828044024454e-06, "loss": 31.8523, "step": 300580 }, { "epoch": 0.6072108178428148, "grad_norm": 109.20729064941406, "learning_rate": 4.111484531689462e-06, "loss": 12.5776, "step": 300590 }, { "epoch": 0.6072310184754987, "grad_norm": 521.873046875, "learning_rate": 4.111141023684986e-06, "loss": 16.0646, "step": 300600 }, { "epoch": 0.6072512191081825, "grad_norm": 529.4212036132812, "learning_rate": 4.1107975200126996e-06, "loss": 18.8753, "step": 300610 }, { "epoch": 0.6072714197408663, "grad_norm": 377.3888854980469, "learning_rate": 4.11045402067428e-06, "loss": 21.1732, "step": 300620 }, { "epoch": 0.6072916203735501, "grad_norm": 289.85052490234375, "learning_rate": 4.110110525671399e-06, "loss": 19.4854, "step": 300630 }, { "epoch": 0.6073118210062339, "grad_norm": 321.3138427734375, "learning_rate": 4.109767035005729e-06, "loss": 17.224, "step": 300640 }, { "epoch": 0.6073320216389178, "grad_norm": 292.6101379394531, "learning_rate": 4.109423548678949e-06, "loss": 11.0724, "step": 300650 }, { "epoch": 0.6073522222716016, "grad_norm": 283.8573913574219, "learning_rate": 4.109080066692731e-06, "loss": 15.4067, "step": 300660 }, { "epoch": 0.6073724229042854, "grad_norm": 511.39447021484375, "learning_rate": 4.108736589048748e-06, "loss": 27.6468, "step": 300670 }, { "epoch": 0.6073926235369692, "grad_norm": 84.58218383789062, "learning_rate": 4.108393115748675e-06, "loss": 16.2186, "step": 300680 }, { "epoch": 0.607412824169653, "grad_norm": 302.5014953613281, "learning_rate": 4.108049646794186e-06, "loss": 20.6337, "step": 300690 }, { "epoch": 0.6074330248023369, "grad_norm": 190.1282501220703, "learning_rate": 4.107706182186954e-06, "loss": 18.9557, "step": 300700 }, { "epoch": 0.6074532254350207, "grad_norm": 402.6483154296875, "learning_rate": 4.107362721928653e-06, "loss": 18.9309, "step": 300710 }, { "epoch": 0.6074734260677045, "grad_norm": 293.894775390625, "learning_rate": 4.107019266020961e-06, "loss": 13.3887, "step": 300720 }, { "epoch": 0.6074936267003882, "grad_norm": 395.2062072753906, "learning_rate": 4.106675814465545e-06, "loss": 15.7355, "step": 300730 }, { "epoch": 0.607513827333072, "grad_norm": 451.3974609375, "learning_rate": 4.106332367264085e-06, "loss": 12.7407, "step": 300740 }, { "epoch": 0.6075340279657558, "grad_norm": 483.8083801269531, "learning_rate": 4.105988924418252e-06, "loss": 18.352, "step": 300750 }, { "epoch": 0.6075542285984397, "grad_norm": 446.7733154296875, "learning_rate": 4.105645485929721e-06, "loss": 10.8636, "step": 300760 }, { "epoch": 0.6075744292311235, "grad_norm": 17.746076583862305, "learning_rate": 4.105302051800166e-06, "loss": 20.9803, "step": 300770 }, { "epoch": 0.6075946298638073, "grad_norm": 59.37981414794922, "learning_rate": 4.1049586220312594e-06, "loss": 46.9566, "step": 300780 }, { "epoch": 0.6076148304964911, "grad_norm": 398.4682312011719, "learning_rate": 4.104615196624676e-06, "loss": 18.145, "step": 300790 }, { "epoch": 0.6076350311291749, "grad_norm": 346.58404541015625, "learning_rate": 4.104271775582089e-06, "loss": 24.9918, "step": 300800 }, { "epoch": 0.6076552317618588, "grad_norm": 340.0547180175781, "learning_rate": 4.103928358905173e-06, "loss": 13.4387, "step": 300810 }, { "epoch": 0.6076754323945426, "grad_norm": 552.000732421875, "learning_rate": 4.1035849465956024e-06, "loss": 23.7551, "step": 300820 }, { "epoch": 0.6076956330272264, "grad_norm": 355.2682800292969, "learning_rate": 4.103241538655049e-06, "loss": 14.823, "step": 300830 }, { "epoch": 0.6077158336599102, "grad_norm": 347.156005859375, "learning_rate": 4.1028981350851885e-06, "loss": 8.7515, "step": 300840 }, { "epoch": 0.607736034292594, "grad_norm": 338.92706298828125, "learning_rate": 4.102554735887694e-06, "loss": 20.7175, "step": 300850 }, { "epoch": 0.6077562349252779, "grad_norm": 262.94232177734375, "learning_rate": 4.102211341064237e-06, "loss": 16.7035, "step": 300860 }, { "epoch": 0.6077764355579617, "grad_norm": 54.84391784667969, "learning_rate": 4.101867950616493e-06, "loss": 14.0018, "step": 300870 }, { "epoch": 0.6077966361906455, "grad_norm": 39.956153869628906, "learning_rate": 4.101524564546139e-06, "loss": 20.8418, "step": 300880 }, { "epoch": 0.6078168368233293, "grad_norm": 400.2420349121094, "learning_rate": 4.101181182854841e-06, "loss": 13.893, "step": 300890 }, { "epoch": 0.6078370374560131, "grad_norm": 701.3252563476562, "learning_rate": 4.100837805544279e-06, "loss": 26.9054, "step": 300900 }, { "epoch": 0.607857238088697, "grad_norm": 66.36013793945312, "learning_rate": 4.100494432616126e-06, "loss": 13.9931, "step": 300910 }, { "epoch": 0.6078774387213808, "grad_norm": 552.8189697265625, "learning_rate": 4.1001510640720525e-06, "loss": 35.3448, "step": 300920 }, { "epoch": 0.6078976393540646, "grad_norm": 425.0431823730469, "learning_rate": 4.099807699913733e-06, "loss": 23.3374, "step": 300930 }, { "epoch": 0.6079178399867484, "grad_norm": 142.89114379882812, "learning_rate": 4.09946434014284e-06, "loss": 17.9236, "step": 300940 }, { "epoch": 0.6079380406194322, "grad_norm": 329.4931640625, "learning_rate": 4.099120984761053e-06, "loss": 21.6973, "step": 300950 }, { "epoch": 0.6079582412521161, "grad_norm": 145.770263671875, "learning_rate": 4.098777633770038e-06, "loss": 8.8998, "step": 300960 }, { "epoch": 0.6079784418847999, "grad_norm": 125.50902557373047, "learning_rate": 4.0984342871714725e-06, "loss": 28.4501, "step": 300970 }, { "epoch": 0.6079986425174837, "grad_norm": 1056.3038330078125, "learning_rate": 4.0980909449670295e-06, "loss": 32.9664, "step": 300980 }, { "epoch": 0.6080188431501674, "grad_norm": 501.4185791015625, "learning_rate": 4.09774760715838e-06, "loss": 18.9642, "step": 300990 }, { "epoch": 0.6080390437828512, "grad_norm": 400.7528991699219, "learning_rate": 4.0974042737472005e-06, "loss": 13.6935, "step": 301000 }, { "epoch": 0.608059244415535, "grad_norm": 769.8255615234375, "learning_rate": 4.0970609447351635e-06, "loss": 22.393, "step": 301010 }, { "epoch": 0.6080794450482189, "grad_norm": 406.9213562011719, "learning_rate": 4.096717620123941e-06, "loss": 27.2523, "step": 301020 }, { "epoch": 0.6080996456809027, "grad_norm": 329.3095397949219, "learning_rate": 4.096374299915207e-06, "loss": 16.3404, "step": 301030 }, { "epoch": 0.6081198463135865, "grad_norm": 603.5701904296875, "learning_rate": 4.096030984110638e-06, "loss": 15.9213, "step": 301040 }, { "epoch": 0.6081400469462703, "grad_norm": 262.6082763671875, "learning_rate": 4.0956876727119e-06, "loss": 23.9107, "step": 301050 }, { "epoch": 0.6081602475789541, "grad_norm": 231.5294952392578, "learning_rate": 4.095344365720673e-06, "loss": 18.7248, "step": 301060 }, { "epoch": 0.608180448211638, "grad_norm": 33.1486930847168, "learning_rate": 4.095001063138629e-06, "loss": 10.5146, "step": 301070 }, { "epoch": 0.6082006488443218, "grad_norm": 385.7687683105469, "learning_rate": 4.0946577649674375e-06, "loss": 19.0383, "step": 301080 }, { "epoch": 0.6082208494770056, "grad_norm": 578.4713745117188, "learning_rate": 4.094314471208775e-06, "loss": 17.3994, "step": 301090 }, { "epoch": 0.6082410501096894, "grad_norm": 613.3488159179688, "learning_rate": 4.093971181864313e-06, "loss": 20.3948, "step": 301100 }, { "epoch": 0.6082612507423732, "grad_norm": 88.45452117919922, "learning_rate": 4.093627896935727e-06, "loss": 12.3156, "step": 301110 }, { "epoch": 0.6082814513750571, "grad_norm": 139.64923095703125, "learning_rate": 4.093284616424688e-06, "loss": 16.1912, "step": 301120 }, { "epoch": 0.6083016520077409, "grad_norm": 367.98553466796875, "learning_rate": 4.092941340332871e-06, "loss": 22.5281, "step": 301130 }, { "epoch": 0.6083218526404247, "grad_norm": 462.8436279296875, "learning_rate": 4.092598068661948e-06, "loss": 24.1809, "step": 301140 }, { "epoch": 0.6083420532731085, "grad_norm": 382.7926025390625, "learning_rate": 4.092254801413591e-06, "loss": 14.3907, "step": 301150 }, { "epoch": 0.6083622539057923, "grad_norm": 198.9886932373047, "learning_rate": 4.091911538589474e-06, "loss": 16.9348, "step": 301160 }, { "epoch": 0.6083824545384762, "grad_norm": 504.3564453125, "learning_rate": 4.091568280191271e-06, "loss": 12.4013, "step": 301170 }, { "epoch": 0.60840265517116, "grad_norm": 413.9426574707031, "learning_rate": 4.091225026220652e-06, "loss": 20.0764, "step": 301180 }, { "epoch": 0.6084228558038438, "grad_norm": 357.5829162597656, "learning_rate": 4.090881776679293e-06, "loss": 10.3754, "step": 301190 }, { "epoch": 0.6084430564365276, "grad_norm": 1410.547607421875, "learning_rate": 4.090538531568867e-06, "loss": 35.9018, "step": 301200 }, { "epoch": 0.6084632570692114, "grad_norm": 63.061458587646484, "learning_rate": 4.090195290891045e-06, "loss": 13.956, "step": 301210 }, { "epoch": 0.6084834577018953, "grad_norm": 487.25469970703125, "learning_rate": 4.0898520546475e-06, "loss": 32.4872, "step": 301220 }, { "epoch": 0.6085036583345791, "grad_norm": 333.2745361328125, "learning_rate": 4.089508822839907e-06, "loss": 16.9703, "step": 301230 }, { "epoch": 0.6085238589672629, "grad_norm": 230.5712127685547, "learning_rate": 4.089165595469937e-06, "loss": 13.3602, "step": 301240 }, { "epoch": 0.6085440595999466, "grad_norm": 283.78399658203125, "learning_rate": 4.088822372539263e-06, "loss": 21.2786, "step": 301250 }, { "epoch": 0.6085642602326304, "grad_norm": 206.6420440673828, "learning_rate": 4.0884791540495585e-06, "loss": 12.7147, "step": 301260 }, { "epoch": 0.6085844608653143, "grad_norm": 735.728271484375, "learning_rate": 4.0881359400024964e-06, "loss": 31.9729, "step": 301270 }, { "epoch": 0.6086046614979981, "grad_norm": 251.7333526611328, "learning_rate": 4.087792730399749e-06, "loss": 15.1294, "step": 301280 }, { "epoch": 0.6086248621306819, "grad_norm": 673.1650390625, "learning_rate": 4.087449525242989e-06, "loss": 18.04, "step": 301290 }, { "epoch": 0.6086450627633657, "grad_norm": 457.9283447265625, "learning_rate": 4.087106324533891e-06, "loss": 16.8589, "step": 301300 }, { "epoch": 0.6086652633960495, "grad_norm": 679.8662719726562, "learning_rate": 4.086763128274124e-06, "loss": 15.5843, "step": 301310 }, { "epoch": 0.6086854640287334, "grad_norm": 296.8544921875, "learning_rate": 4.086419936465362e-06, "loss": 15.3972, "step": 301320 }, { "epoch": 0.6087056646614172, "grad_norm": 234.82400512695312, "learning_rate": 4.0860767491092825e-06, "loss": 19.8179, "step": 301330 }, { "epoch": 0.608725865294101, "grad_norm": 485.77532958984375, "learning_rate": 4.08573356620755e-06, "loss": 17.0995, "step": 301340 }, { "epoch": 0.6087460659267848, "grad_norm": 162.58717346191406, "learning_rate": 4.0853903877618425e-06, "loss": 17.0327, "step": 301350 }, { "epoch": 0.6087662665594686, "grad_norm": 101.74900817871094, "learning_rate": 4.085047213773831e-06, "loss": 10.7914, "step": 301360 }, { "epoch": 0.6087864671921525, "grad_norm": 405.9892272949219, "learning_rate": 4.08470404424519e-06, "loss": 26.3598, "step": 301370 }, { "epoch": 0.6088066678248363, "grad_norm": 504.0001525878906, "learning_rate": 4.084360879177588e-06, "loss": 14.5078, "step": 301380 }, { "epoch": 0.6088268684575201, "grad_norm": 297.1695251464844, "learning_rate": 4.0840177185727005e-06, "loss": 23.0532, "step": 301390 }, { "epoch": 0.6088470690902039, "grad_norm": 337.6854553222656, "learning_rate": 4.083674562432203e-06, "loss": 25.5926, "step": 301400 }, { "epoch": 0.6088672697228877, "grad_norm": 258.78253173828125, "learning_rate": 4.0833314107577605e-06, "loss": 7.1162, "step": 301410 }, { "epoch": 0.6088874703555716, "grad_norm": 299.7767333984375, "learning_rate": 4.08298826355105e-06, "loss": 22.6414, "step": 301420 }, { "epoch": 0.6089076709882554, "grad_norm": 147.05128479003906, "learning_rate": 4.082645120813746e-06, "loss": 16.316, "step": 301430 }, { "epoch": 0.6089278716209392, "grad_norm": 709.7376708984375, "learning_rate": 4.082301982547517e-06, "loss": 13.4846, "step": 301440 }, { "epoch": 0.608948072253623, "grad_norm": 324.18280029296875, "learning_rate": 4.081958848754035e-06, "loss": 8.5537, "step": 301450 }, { "epoch": 0.6089682728863068, "grad_norm": 267.200927734375, "learning_rate": 4.081615719434978e-06, "loss": 16.7705, "step": 301460 }, { "epoch": 0.6089884735189907, "grad_norm": 24.94508934020996, "learning_rate": 4.081272594592011e-06, "loss": 14.3104, "step": 301470 }, { "epoch": 0.6090086741516745, "grad_norm": 2.499239921569824, "learning_rate": 4.080929474226811e-06, "loss": 15.5801, "step": 301480 }, { "epoch": 0.6090288747843583, "grad_norm": 652.1822509765625, "learning_rate": 4.080586358341051e-06, "loss": 19.2077, "step": 301490 }, { "epoch": 0.609049075417042, "grad_norm": 338.8230285644531, "learning_rate": 4.0802432469364e-06, "loss": 6.6709, "step": 301500 }, { "epoch": 0.6090692760497258, "grad_norm": 340.7795715332031, "learning_rate": 4.0799001400145315e-06, "loss": 20.0169, "step": 301510 }, { "epoch": 0.6090894766824096, "grad_norm": 232.5472869873047, "learning_rate": 4.07955703757712e-06, "loss": 9.472, "step": 301520 }, { "epoch": 0.6091096773150935, "grad_norm": 235.46383666992188, "learning_rate": 4.079213939625834e-06, "loss": 28.0348, "step": 301530 }, { "epoch": 0.6091298779477773, "grad_norm": 275.21759033203125, "learning_rate": 4.078870846162349e-06, "loss": 12.917, "step": 301540 }, { "epoch": 0.6091500785804611, "grad_norm": 352.4283752441406, "learning_rate": 4.078527757188333e-06, "loss": 26.7873, "step": 301550 }, { "epoch": 0.6091702792131449, "grad_norm": 514.5021362304688, "learning_rate": 4.078184672705465e-06, "loss": 25.0382, "step": 301560 }, { "epoch": 0.6091904798458287, "grad_norm": 575.0938110351562, "learning_rate": 4.077841592715409e-06, "loss": 12.1316, "step": 301570 }, { "epoch": 0.6092106804785126, "grad_norm": 349.18182373046875, "learning_rate": 4.077498517219844e-06, "loss": 17.5067, "step": 301580 }, { "epoch": 0.6092308811111964, "grad_norm": 136.44674682617188, "learning_rate": 4.0771554462204395e-06, "loss": 27.4129, "step": 301590 }, { "epoch": 0.6092510817438802, "grad_norm": 2124.14599609375, "learning_rate": 4.0768123797188665e-06, "loss": 25.9766, "step": 301600 }, { "epoch": 0.609271282376564, "grad_norm": 524.9080200195312, "learning_rate": 4.076469317716798e-06, "loss": 16.3862, "step": 301610 }, { "epoch": 0.6092914830092478, "grad_norm": 674.9454345703125, "learning_rate": 4.076126260215906e-06, "loss": 21.6937, "step": 301620 }, { "epoch": 0.6093116836419317, "grad_norm": 213.24224853515625, "learning_rate": 4.0757832072178626e-06, "loss": 7.0525, "step": 301630 }, { "epoch": 0.6093318842746155, "grad_norm": 69.69252014160156, "learning_rate": 4.075440158724339e-06, "loss": 16.8704, "step": 301640 }, { "epoch": 0.6093520849072993, "grad_norm": 332.0189514160156, "learning_rate": 4.075097114737011e-06, "loss": 15.391, "step": 301650 }, { "epoch": 0.6093722855399831, "grad_norm": 916.816650390625, "learning_rate": 4.074754075257543e-06, "loss": 23.7619, "step": 301660 }, { "epoch": 0.609392486172667, "grad_norm": 1299.2314453125, "learning_rate": 4.074411040287614e-06, "loss": 22.8498, "step": 301670 }, { "epoch": 0.6094126868053508, "grad_norm": 494.7002258300781, "learning_rate": 4.074068009828894e-06, "loss": 23.4934, "step": 301680 }, { "epoch": 0.6094328874380346, "grad_norm": 860.8915405273438, "learning_rate": 4.073724983883053e-06, "loss": 22.2737, "step": 301690 }, { "epoch": 0.6094530880707184, "grad_norm": 55.833587646484375, "learning_rate": 4.073381962451764e-06, "loss": 11.6702, "step": 301700 }, { "epoch": 0.6094732887034022, "grad_norm": 487.8169860839844, "learning_rate": 4.073038945536698e-06, "loss": 14.0798, "step": 301710 }, { "epoch": 0.609493489336086, "grad_norm": 296.30450439453125, "learning_rate": 4.07269593313953e-06, "loss": 14.3807, "step": 301720 }, { "epoch": 0.6095136899687699, "grad_norm": 198.1383819580078, "learning_rate": 4.0723529252619276e-06, "loss": 14.6119, "step": 301730 }, { "epoch": 0.6095338906014537, "grad_norm": 911.2178955078125, "learning_rate": 4.0720099219055655e-06, "loss": 12.2603, "step": 301740 }, { "epoch": 0.6095540912341375, "grad_norm": 492.0929260253906, "learning_rate": 4.0716669230721154e-06, "loss": 18.7033, "step": 301750 }, { "epoch": 0.6095742918668212, "grad_norm": 137.6938018798828, "learning_rate": 4.071323928763247e-06, "loss": 10.1898, "step": 301760 }, { "epoch": 0.609594492499505, "grad_norm": 84.15969848632812, "learning_rate": 4.070980938980633e-06, "loss": 20.4293, "step": 301770 }, { "epoch": 0.6096146931321889, "grad_norm": 538.3965454101562, "learning_rate": 4.070637953725946e-06, "loss": 20.5844, "step": 301780 }, { "epoch": 0.6096348937648727, "grad_norm": 504.7793884277344, "learning_rate": 4.0702949730008565e-06, "loss": 30.9394, "step": 301790 }, { "epoch": 0.6096550943975565, "grad_norm": 293.6222229003906, "learning_rate": 4.069951996807034e-06, "loss": 15.7182, "step": 301800 }, { "epoch": 0.6096752950302403, "grad_norm": 395.0409851074219, "learning_rate": 4.069609025146156e-06, "loss": 18.8731, "step": 301810 }, { "epoch": 0.6096954956629241, "grad_norm": 337.16558837890625, "learning_rate": 4.0692660580198905e-06, "loss": 23.5865, "step": 301820 }, { "epoch": 0.609715696295608, "grad_norm": 289.2772216796875, "learning_rate": 4.068923095429909e-06, "loss": 16.9094, "step": 301830 }, { "epoch": 0.6097358969282918, "grad_norm": 0.0, "learning_rate": 4.068580137377882e-06, "loss": 16.1499, "step": 301840 }, { "epoch": 0.6097560975609756, "grad_norm": 395.0556640625, "learning_rate": 4.068237183865485e-06, "loss": 17.7865, "step": 301850 }, { "epoch": 0.6097762981936594, "grad_norm": 392.32684326171875, "learning_rate": 4.067894234894384e-06, "loss": 32.0226, "step": 301860 }, { "epoch": 0.6097964988263432, "grad_norm": 276.8617248535156, "learning_rate": 4.067551290466255e-06, "loss": 29.1514, "step": 301870 }, { "epoch": 0.6098166994590271, "grad_norm": 499.0665588378906, "learning_rate": 4.067208350582769e-06, "loss": 14.1026, "step": 301880 }, { "epoch": 0.6098369000917109, "grad_norm": 710.1145629882812, "learning_rate": 4.066865415245594e-06, "loss": 29.4181, "step": 301890 }, { "epoch": 0.6098571007243947, "grad_norm": 307.7794189453125, "learning_rate": 4.066522484456406e-06, "loss": 13.1219, "step": 301900 }, { "epoch": 0.6098773013570785, "grad_norm": 589.836181640625, "learning_rate": 4.066179558216874e-06, "loss": 21.7507, "step": 301910 }, { "epoch": 0.6098975019897623, "grad_norm": 137.0048828125, "learning_rate": 4.0658366365286684e-06, "loss": 22.991, "step": 301920 }, { "epoch": 0.6099177026224462, "grad_norm": 503.49127197265625, "learning_rate": 4.06549371939346e-06, "loss": 11.3635, "step": 301930 }, { "epoch": 0.60993790325513, "grad_norm": 541.7255859375, "learning_rate": 4.0651508068129264e-06, "loss": 17.8539, "step": 301940 }, { "epoch": 0.6099581038878138, "grad_norm": 244.3643035888672, "learning_rate": 4.064807898788731e-06, "loss": 23.5271, "step": 301950 }, { "epoch": 0.6099783045204976, "grad_norm": 189.54734802246094, "learning_rate": 4.064464995322549e-06, "loss": 15.1665, "step": 301960 }, { "epoch": 0.6099985051531814, "grad_norm": 291.32281494140625, "learning_rate": 4.064122096416053e-06, "loss": 24.556, "step": 301970 }, { "epoch": 0.6100187057858653, "grad_norm": 221.17601013183594, "learning_rate": 4.063779202070911e-06, "loss": 18.5762, "step": 301980 }, { "epoch": 0.6100389064185491, "grad_norm": 510.4002990722656, "learning_rate": 4.0634363122887945e-06, "loss": 20.7079, "step": 301990 }, { "epoch": 0.6100591070512329, "grad_norm": 287.87322998046875, "learning_rate": 4.063093427071376e-06, "loss": 16.008, "step": 302000 }, { "epoch": 0.6100793076839166, "grad_norm": 572.6348876953125, "learning_rate": 4.06275054642033e-06, "loss": 21.2857, "step": 302010 }, { "epoch": 0.6100995083166004, "grad_norm": 485.3209228515625, "learning_rate": 4.06240767033732e-06, "loss": 14.0891, "step": 302020 }, { "epoch": 0.6101197089492842, "grad_norm": 688.4270629882812, "learning_rate": 4.0620647988240225e-06, "loss": 22.3187, "step": 302030 }, { "epoch": 0.6101399095819681, "grad_norm": 452.1419372558594, "learning_rate": 4.061721931882109e-06, "loss": 16.7207, "step": 302040 }, { "epoch": 0.6101601102146519, "grad_norm": 0.0, "learning_rate": 4.061379069513248e-06, "loss": 19.5551, "step": 302050 }, { "epoch": 0.6101803108473357, "grad_norm": 138.41639709472656, "learning_rate": 4.0610362117191106e-06, "loss": 17.4711, "step": 302060 }, { "epoch": 0.6102005114800195, "grad_norm": 408.5327453613281, "learning_rate": 4.0606933585013704e-06, "loss": 19.5146, "step": 302070 }, { "epoch": 0.6102207121127033, "grad_norm": 504.6182861328125, "learning_rate": 4.060350509861696e-06, "loss": 15.9573, "step": 302080 }, { "epoch": 0.6102409127453872, "grad_norm": 39.33086013793945, "learning_rate": 4.0600076658017585e-06, "loss": 29.3718, "step": 302090 }, { "epoch": 0.610261113378071, "grad_norm": 667.924560546875, "learning_rate": 4.0596648263232315e-06, "loss": 11.2847, "step": 302100 }, { "epoch": 0.6102813140107548, "grad_norm": 562.177734375, "learning_rate": 4.059321991427782e-06, "loss": 13.6215, "step": 302110 }, { "epoch": 0.6103015146434386, "grad_norm": 124.45332336425781, "learning_rate": 4.058979161117084e-06, "loss": 12.5601, "step": 302120 }, { "epoch": 0.6103217152761224, "grad_norm": 294.8182373046875, "learning_rate": 4.058636335392809e-06, "loss": 21.3163, "step": 302130 }, { "epoch": 0.6103419159088063, "grad_norm": 695.095947265625, "learning_rate": 4.0582935142566245e-06, "loss": 22.359, "step": 302140 }, { "epoch": 0.6103621165414901, "grad_norm": 1088.446044921875, "learning_rate": 4.057950697710203e-06, "loss": 19.9529, "step": 302150 }, { "epoch": 0.6103823171741739, "grad_norm": 591.505859375, "learning_rate": 4.057607885755215e-06, "loss": 26.2497, "step": 302160 }, { "epoch": 0.6104025178068577, "grad_norm": 757.7903442382812, "learning_rate": 4.057265078393335e-06, "loss": 18.1819, "step": 302170 }, { "epoch": 0.6104227184395415, "grad_norm": 473.31011962890625, "learning_rate": 4.056922275626227e-06, "loss": 27.9173, "step": 302180 }, { "epoch": 0.6104429190722254, "grad_norm": 497.7650451660156, "learning_rate": 4.056579477455567e-06, "loss": 24.6485, "step": 302190 }, { "epoch": 0.6104631197049092, "grad_norm": 115.89996337890625, "learning_rate": 4.0562366838830255e-06, "loss": 11.2858, "step": 302200 }, { "epoch": 0.610483320337593, "grad_norm": 630.05029296875, "learning_rate": 4.05589389491027e-06, "loss": 17.092, "step": 302210 }, { "epoch": 0.6105035209702768, "grad_norm": 26.682065963745117, "learning_rate": 4.0555511105389735e-06, "loss": 15.3019, "step": 302220 }, { "epoch": 0.6105237216029606, "grad_norm": 286.85235595703125, "learning_rate": 4.055208330770808e-06, "loss": 16.3573, "step": 302230 }, { "epoch": 0.6105439222356445, "grad_norm": 315.05401611328125, "learning_rate": 4.054865555607441e-06, "loss": 20.918, "step": 302240 }, { "epoch": 0.6105641228683283, "grad_norm": 283.9547119140625, "learning_rate": 4.054522785050543e-06, "loss": 28.8137, "step": 302250 }, { "epoch": 0.6105843235010121, "grad_norm": 698.4987182617188, "learning_rate": 4.05418001910179e-06, "loss": 9.4429, "step": 302260 }, { "epoch": 0.6106045241336958, "grad_norm": 179.47476196289062, "learning_rate": 4.053837257762846e-06, "loss": 15.6023, "step": 302270 }, { "epoch": 0.6106247247663796, "grad_norm": 462.5314025878906, "learning_rate": 4.053494501035385e-06, "loss": 23.7735, "step": 302280 }, { "epoch": 0.6106449253990635, "grad_norm": 890.564697265625, "learning_rate": 4.053151748921078e-06, "loss": 16.0309, "step": 302290 }, { "epoch": 0.6106651260317473, "grad_norm": 143.55178833007812, "learning_rate": 4.052809001421595e-06, "loss": 30.7759, "step": 302300 }, { "epoch": 0.6106853266644311, "grad_norm": 368.9315185546875, "learning_rate": 4.0524662585386045e-06, "loss": 19.7443, "step": 302310 }, { "epoch": 0.6107055272971149, "grad_norm": 427.68548583984375, "learning_rate": 4.0521235202737775e-06, "loss": 21.6405, "step": 302320 }, { "epoch": 0.6107257279297987, "grad_norm": 155.68882751464844, "learning_rate": 4.051780786628789e-06, "loss": 10.4967, "step": 302330 }, { "epoch": 0.6107459285624826, "grad_norm": 407.12408447265625, "learning_rate": 4.0514380576053035e-06, "loss": 23.8343, "step": 302340 }, { "epoch": 0.6107661291951664, "grad_norm": 1344.190673828125, "learning_rate": 4.051095333204994e-06, "loss": 18.8498, "step": 302350 }, { "epoch": 0.6107863298278502, "grad_norm": 346.4909362792969, "learning_rate": 4.0507526134295314e-06, "loss": 23.4103, "step": 302360 }, { "epoch": 0.610806530460534, "grad_norm": 316.17620849609375, "learning_rate": 4.050409898280585e-06, "loss": 9.4559, "step": 302370 }, { "epoch": 0.6108267310932178, "grad_norm": 574.623046875, "learning_rate": 4.050067187759826e-06, "loss": 17.2412, "step": 302380 }, { "epoch": 0.6108469317259017, "grad_norm": 708.8782348632812, "learning_rate": 4.049724481868924e-06, "loss": 23.2733, "step": 302390 }, { "epoch": 0.6108671323585855, "grad_norm": 695.9046020507812, "learning_rate": 4.0493817806095504e-06, "loss": 24.038, "step": 302400 }, { "epoch": 0.6108873329912693, "grad_norm": 128.50205993652344, "learning_rate": 4.049039083983372e-06, "loss": 20.3051, "step": 302410 }, { "epoch": 0.6109075336239531, "grad_norm": 255.7003631591797, "learning_rate": 4.048696391992065e-06, "loss": 24.7297, "step": 302420 }, { "epoch": 0.6109277342566369, "grad_norm": 461.51251220703125, "learning_rate": 4.048353704637295e-06, "loss": 11.599, "step": 302430 }, { "epoch": 0.6109479348893208, "grad_norm": 117.34037017822266, "learning_rate": 4.048011021920733e-06, "loss": 18.9199, "step": 302440 }, { "epoch": 0.6109681355220046, "grad_norm": 323.08642578125, "learning_rate": 4.047668343844051e-06, "loss": 12.1964, "step": 302450 }, { "epoch": 0.6109883361546884, "grad_norm": 204.79774475097656, "learning_rate": 4.047325670408918e-06, "loss": 15.2674, "step": 302460 }, { "epoch": 0.6110085367873722, "grad_norm": 46.07692337036133, "learning_rate": 4.046983001617004e-06, "loss": 18.4622, "step": 302470 }, { "epoch": 0.611028737420056, "grad_norm": 294.8068542480469, "learning_rate": 4.0466403374699775e-06, "loss": 41.0519, "step": 302480 }, { "epoch": 0.6110489380527399, "grad_norm": 298.0314025878906, "learning_rate": 4.046297677969513e-06, "loss": 17.706, "step": 302490 }, { "epoch": 0.6110691386854237, "grad_norm": 676.768310546875, "learning_rate": 4.045955023117276e-06, "loss": 21.6609, "step": 302500 }, { "epoch": 0.6110893393181075, "grad_norm": 455.82550048828125, "learning_rate": 4.045612372914939e-06, "loss": 19.1227, "step": 302510 }, { "epoch": 0.6111095399507913, "grad_norm": 204.07211303710938, "learning_rate": 4.045269727364173e-06, "loss": 11.8948, "step": 302520 }, { "epoch": 0.611129740583475, "grad_norm": 638.8299560546875, "learning_rate": 4.044927086466646e-06, "loss": 16.8545, "step": 302530 }, { "epoch": 0.6111499412161588, "grad_norm": 543.645263671875, "learning_rate": 4.044584450224026e-06, "loss": 15.1937, "step": 302540 }, { "epoch": 0.6111701418488427, "grad_norm": 8.59873104095459, "learning_rate": 4.0442418186379895e-06, "loss": 10.9402, "step": 302550 }, { "epoch": 0.6111903424815265, "grad_norm": 274.8184814453125, "learning_rate": 4.043899191710199e-06, "loss": 19.9204, "step": 302560 }, { "epoch": 0.6112105431142103, "grad_norm": 982.5318603515625, "learning_rate": 4.043556569442329e-06, "loss": 22.9097, "step": 302570 }, { "epoch": 0.6112307437468941, "grad_norm": 172.61416625976562, "learning_rate": 4.0432139518360495e-06, "loss": 16.2588, "step": 302580 }, { "epoch": 0.6112509443795779, "grad_norm": 596.6406860351562, "learning_rate": 4.0428713388930276e-06, "loss": 18.488, "step": 302590 }, { "epoch": 0.6112711450122618, "grad_norm": 794.8352661132812, "learning_rate": 4.042528730614935e-06, "loss": 30.5124, "step": 302600 }, { "epoch": 0.6112913456449456, "grad_norm": 319.686767578125, "learning_rate": 4.042186127003441e-06, "loss": 19.7382, "step": 302610 }, { "epoch": 0.6113115462776294, "grad_norm": 648.2125244140625, "learning_rate": 4.0418435280602185e-06, "loss": 13.3559, "step": 302620 }, { "epoch": 0.6113317469103132, "grad_norm": 453.7065734863281, "learning_rate": 4.04150093378693e-06, "loss": 28.3858, "step": 302630 }, { "epoch": 0.611351947542997, "grad_norm": 233.4899139404297, "learning_rate": 4.041158344185252e-06, "loss": 20.3837, "step": 302640 }, { "epoch": 0.6113721481756809, "grad_norm": 516.4542846679688, "learning_rate": 4.040815759256852e-06, "loss": 31.8236, "step": 302650 }, { "epoch": 0.6113923488083647, "grad_norm": 262.2412414550781, "learning_rate": 4.0404731790034e-06, "loss": 35.1186, "step": 302660 }, { "epoch": 0.6114125494410485, "grad_norm": 518.053955078125, "learning_rate": 4.040130603426565e-06, "loss": 29.4273, "step": 302670 }, { "epoch": 0.6114327500737323, "grad_norm": 485.6481018066406, "learning_rate": 4.039788032528017e-06, "loss": 9.7625, "step": 302680 }, { "epoch": 0.6114529507064161, "grad_norm": 330.59075927734375, "learning_rate": 4.039445466309426e-06, "loss": 21.0793, "step": 302690 }, { "epoch": 0.6114731513391, "grad_norm": 378.89361572265625, "learning_rate": 4.039102904772459e-06, "loss": 10.9552, "step": 302700 }, { "epoch": 0.6114933519717838, "grad_norm": 561.077392578125, "learning_rate": 4.0387603479187915e-06, "loss": 28.2956, "step": 302710 }, { "epoch": 0.6115135526044676, "grad_norm": 745.5084838867188, "learning_rate": 4.038417795750086e-06, "loss": 17.587, "step": 302720 }, { "epoch": 0.6115337532371514, "grad_norm": 403.7786865234375, "learning_rate": 4.038075248268018e-06, "loss": 19.6259, "step": 302730 }, { "epoch": 0.6115539538698352, "grad_norm": 409.99603271484375, "learning_rate": 4.0377327054742544e-06, "loss": 20.5259, "step": 302740 }, { "epoch": 0.6115741545025191, "grad_norm": 316.7395324707031, "learning_rate": 4.037390167370464e-06, "loss": 15.7151, "step": 302750 }, { "epoch": 0.6115943551352029, "grad_norm": 449.1668395996094, "learning_rate": 4.037047633958317e-06, "loss": 17.8964, "step": 302760 }, { "epoch": 0.6116145557678867, "grad_norm": 250.11282348632812, "learning_rate": 4.0367051052394825e-06, "loss": 17.4639, "step": 302770 }, { "epoch": 0.6116347564005704, "grad_norm": 535.4613037109375, "learning_rate": 4.036362581215633e-06, "loss": 18.9022, "step": 302780 }, { "epoch": 0.6116549570332542, "grad_norm": 19.670238494873047, "learning_rate": 4.036020061888432e-06, "loss": 18.115, "step": 302790 }, { "epoch": 0.611675157665938, "grad_norm": 480.7803039550781, "learning_rate": 4.035677547259555e-06, "loss": 18.1596, "step": 302800 }, { "epoch": 0.6116953582986219, "grad_norm": 227.60894775390625, "learning_rate": 4.035335037330668e-06, "loss": 28.7043, "step": 302810 }, { "epoch": 0.6117155589313057, "grad_norm": 541.886962890625, "learning_rate": 4.034992532103441e-06, "loss": 20.1744, "step": 302820 }, { "epoch": 0.6117357595639895, "grad_norm": 239.96633911132812, "learning_rate": 4.034650031579543e-06, "loss": 14.6629, "step": 302830 }, { "epoch": 0.6117559601966733, "grad_norm": 655.57470703125, "learning_rate": 4.0343075357606445e-06, "loss": 18.103, "step": 302840 }, { "epoch": 0.6117761608293572, "grad_norm": 320.12969970703125, "learning_rate": 4.0339650446484135e-06, "loss": 21.4581, "step": 302850 }, { "epoch": 0.611796361462041, "grad_norm": 46.63835144042969, "learning_rate": 4.033622558244519e-06, "loss": 11.3173, "step": 302860 }, { "epoch": 0.6118165620947248, "grad_norm": 1075.1484375, "learning_rate": 4.0332800765506325e-06, "loss": 11.4977, "step": 302870 }, { "epoch": 0.6118367627274086, "grad_norm": 473.89617919921875, "learning_rate": 4.03293759956842e-06, "loss": 15.609, "step": 302880 }, { "epoch": 0.6118569633600924, "grad_norm": 121.10155487060547, "learning_rate": 4.032595127299552e-06, "loss": 22.2685, "step": 302890 }, { "epoch": 0.6118771639927763, "grad_norm": 155.80386352539062, "learning_rate": 4.032252659745699e-06, "loss": 15.1171, "step": 302900 }, { "epoch": 0.6118973646254601, "grad_norm": 72.47950744628906, "learning_rate": 4.03191019690853e-06, "loss": 12.6798, "step": 302910 }, { "epoch": 0.6119175652581439, "grad_norm": 267.1290283203125, "learning_rate": 4.031567738789713e-06, "loss": 20.5022, "step": 302920 }, { "epoch": 0.6119377658908277, "grad_norm": 473.3138427734375, "learning_rate": 4.031225285390915e-06, "loss": 15.3429, "step": 302930 }, { "epoch": 0.6119579665235115, "grad_norm": 401.8825378417969, "learning_rate": 4.0308828367138106e-06, "loss": 8.9975, "step": 302940 }, { "epoch": 0.6119781671561954, "grad_norm": 543.8609619140625, "learning_rate": 4.030540392760064e-06, "loss": 31.137, "step": 302950 }, { "epoch": 0.6119983677888792, "grad_norm": 147.74777221679688, "learning_rate": 4.030197953531346e-06, "loss": 17.4207, "step": 302960 }, { "epoch": 0.612018568421563, "grad_norm": 444.2214660644531, "learning_rate": 4.029855519029326e-06, "loss": 26.9827, "step": 302970 }, { "epoch": 0.6120387690542468, "grad_norm": 561.8872680664062, "learning_rate": 4.029513089255673e-06, "loss": 15.0142, "step": 302980 }, { "epoch": 0.6120589696869306, "grad_norm": 523.2400512695312, "learning_rate": 4.0291706642120545e-06, "loss": 12.7622, "step": 302990 }, { "epoch": 0.6120791703196145, "grad_norm": 34.56816864013672, "learning_rate": 4.028828243900141e-06, "loss": 13.7786, "step": 303000 }, { "epoch": 0.6120993709522983, "grad_norm": 695.3958129882812, "learning_rate": 4.028485828321601e-06, "loss": 19.7008, "step": 303010 }, { "epoch": 0.6121195715849821, "grad_norm": 662.0638427734375, "learning_rate": 4.028143417478102e-06, "loss": 18.3338, "step": 303020 }, { "epoch": 0.6121397722176659, "grad_norm": 19.25673484802246, "learning_rate": 4.0278010113713165e-06, "loss": 16.5985, "step": 303030 }, { "epoch": 0.6121599728503496, "grad_norm": 665.7344360351562, "learning_rate": 4.027458610002908e-06, "loss": 15.9306, "step": 303040 }, { "epoch": 0.6121801734830334, "grad_norm": 688.4962158203125, "learning_rate": 4.02711621337455e-06, "loss": 35.8098, "step": 303050 }, { "epoch": 0.6122003741157173, "grad_norm": 160.4550323486328, "learning_rate": 4.0267738214879095e-06, "loss": 17.0107, "step": 303060 }, { "epoch": 0.6122205747484011, "grad_norm": 70.0464096069336, "learning_rate": 4.026431434344656e-06, "loss": 18.876, "step": 303070 }, { "epoch": 0.6122407753810849, "grad_norm": 385.9349670410156, "learning_rate": 4.0260890519464565e-06, "loss": 19.8726, "step": 303080 }, { "epoch": 0.6122609760137687, "grad_norm": 198.9353790283203, "learning_rate": 4.02574667429498e-06, "loss": 14.8484, "step": 303090 }, { "epoch": 0.6122811766464525, "grad_norm": 312.6994934082031, "learning_rate": 4.025404301391898e-06, "loss": 13.5631, "step": 303100 }, { "epoch": 0.6123013772791364, "grad_norm": 167.55181884765625, "learning_rate": 4.0250619332388765e-06, "loss": 8.2391, "step": 303110 }, { "epoch": 0.6123215779118202, "grad_norm": 446.42742919921875, "learning_rate": 4.024719569837584e-06, "loss": 13.5969, "step": 303120 }, { "epoch": 0.612341778544504, "grad_norm": 112.34558868408203, "learning_rate": 4.024377211189693e-06, "loss": 13.0806, "step": 303130 }, { "epoch": 0.6123619791771878, "grad_norm": 133.58258056640625, "learning_rate": 4.024034857296866e-06, "loss": 18.9411, "step": 303140 }, { "epoch": 0.6123821798098716, "grad_norm": 633.3317260742188, "learning_rate": 4.023692508160776e-06, "loss": 19.7699, "step": 303150 }, { "epoch": 0.6124023804425555, "grad_norm": 499.4707946777344, "learning_rate": 4.0233501637830905e-06, "loss": 15.0687, "step": 303160 }, { "epoch": 0.6124225810752393, "grad_norm": 623.6322021484375, "learning_rate": 4.023007824165476e-06, "loss": 25.112, "step": 303170 }, { "epoch": 0.6124427817079231, "grad_norm": 69.01964569091797, "learning_rate": 4.022665489309604e-06, "loss": 25.0718, "step": 303180 }, { "epoch": 0.6124629823406069, "grad_norm": 588.364501953125, "learning_rate": 4.022323159217144e-06, "loss": 13.9179, "step": 303190 }, { "epoch": 0.6124831829732907, "grad_norm": 367.04351806640625, "learning_rate": 4.02198083388976e-06, "loss": 19.4609, "step": 303200 }, { "epoch": 0.6125033836059746, "grad_norm": 573.3627319335938, "learning_rate": 4.021638513329123e-06, "loss": 16.3759, "step": 303210 }, { "epoch": 0.6125235842386584, "grad_norm": 401.361328125, "learning_rate": 4.0212961975369e-06, "loss": 22.8179, "step": 303220 }, { "epoch": 0.6125437848713422, "grad_norm": 376.02496337890625, "learning_rate": 4.020953886514764e-06, "loss": 16.6355, "step": 303230 }, { "epoch": 0.612563985504026, "grad_norm": 86.31979370117188, "learning_rate": 4.020611580264377e-06, "loss": 29.3063, "step": 303240 }, { "epoch": 0.6125841861367098, "grad_norm": 227.11631774902344, "learning_rate": 4.020269278787411e-06, "loss": 15.518, "step": 303250 }, { "epoch": 0.6126043867693937, "grad_norm": 182.04917907714844, "learning_rate": 4.019926982085536e-06, "loss": 11.5511, "step": 303260 }, { "epoch": 0.6126245874020775, "grad_norm": 373.6564636230469, "learning_rate": 4.019584690160416e-06, "loss": 14.9567, "step": 303270 }, { "epoch": 0.6126447880347613, "grad_norm": 124.20471954345703, "learning_rate": 4.019242403013721e-06, "loss": 21.3783, "step": 303280 }, { "epoch": 0.612664988667445, "grad_norm": 322.8251037597656, "learning_rate": 4.0189001206471215e-06, "loss": 27.5457, "step": 303290 }, { "epoch": 0.6126851893001288, "grad_norm": 661.531982421875, "learning_rate": 4.018557843062282e-06, "loss": 24.307, "step": 303300 }, { "epoch": 0.6127053899328126, "grad_norm": 225.1516571044922, "learning_rate": 4.018215570260872e-06, "loss": 22.9267, "step": 303310 }, { "epoch": 0.6127255905654965, "grad_norm": 188.3852996826172, "learning_rate": 4.017873302244563e-06, "loss": 19.5008, "step": 303320 }, { "epoch": 0.6127457911981803, "grad_norm": 1034.5069580078125, "learning_rate": 4.017531039015017e-06, "loss": 20.9308, "step": 303330 }, { "epoch": 0.6127659918308641, "grad_norm": 228.44070434570312, "learning_rate": 4.017188780573907e-06, "loss": 16.3561, "step": 303340 }, { "epoch": 0.6127861924635479, "grad_norm": 220.6223602294922, "learning_rate": 4.016846526922901e-06, "loss": 18.9106, "step": 303350 }, { "epoch": 0.6128063930962317, "grad_norm": 465.0409240722656, "learning_rate": 4.016504278063664e-06, "loss": 11.076, "step": 303360 }, { "epoch": 0.6128265937289156, "grad_norm": 175.07225036621094, "learning_rate": 4.016162033997867e-06, "loss": 13.1767, "step": 303370 }, { "epoch": 0.6128467943615994, "grad_norm": 375.5794372558594, "learning_rate": 4.0158197947271746e-06, "loss": 10.0069, "step": 303380 }, { "epoch": 0.6128669949942832, "grad_norm": 803.9150390625, "learning_rate": 4.015477560253261e-06, "loss": 21.1764, "step": 303390 }, { "epoch": 0.612887195626967, "grad_norm": 204.79698181152344, "learning_rate": 4.015135330577787e-06, "loss": 13.4105, "step": 303400 }, { "epoch": 0.6129073962596508, "grad_norm": 984.7935791015625, "learning_rate": 4.014793105702425e-06, "loss": 23.9986, "step": 303410 }, { "epoch": 0.6129275968923347, "grad_norm": 1003.0968627929688, "learning_rate": 4.014450885628843e-06, "loss": 26.145, "step": 303420 }, { "epoch": 0.6129477975250185, "grad_norm": 239.93844604492188, "learning_rate": 4.014108670358707e-06, "loss": 12.8953, "step": 303430 }, { "epoch": 0.6129679981577023, "grad_norm": 448.0235595703125, "learning_rate": 4.013766459893686e-06, "loss": 15.014, "step": 303440 }, { "epoch": 0.6129881987903861, "grad_norm": 345.88165283203125, "learning_rate": 4.0134242542354486e-06, "loss": 11.6867, "step": 303450 }, { "epoch": 0.61300839942307, "grad_norm": 535.87548828125, "learning_rate": 4.013082053385661e-06, "loss": 20.6952, "step": 303460 }, { "epoch": 0.6130286000557538, "grad_norm": 1100.1561279296875, "learning_rate": 4.01273985734599e-06, "loss": 35.3558, "step": 303470 }, { "epoch": 0.6130488006884376, "grad_norm": 496.0742492675781, "learning_rate": 4.012397666118108e-06, "loss": 31.4369, "step": 303480 }, { "epoch": 0.6130690013211214, "grad_norm": 101.76065063476562, "learning_rate": 4.012055479703678e-06, "loss": 13.0555, "step": 303490 }, { "epoch": 0.6130892019538052, "grad_norm": 87.12376403808594, "learning_rate": 4.0117132981043695e-06, "loss": 16.1753, "step": 303500 }, { "epoch": 0.613109402586489, "grad_norm": 330.2441711425781, "learning_rate": 4.011371121321851e-06, "loss": 36.0391, "step": 303510 }, { "epoch": 0.6131296032191729, "grad_norm": 332.4491882324219, "learning_rate": 4.011028949357791e-06, "loss": 17.1912, "step": 303520 }, { "epoch": 0.6131498038518567, "grad_norm": 324.7025146484375, "learning_rate": 4.010686782213855e-06, "loss": 13.4496, "step": 303530 }, { "epoch": 0.6131700044845405, "grad_norm": 522.0205688476562, "learning_rate": 4.01034461989171e-06, "loss": 23.7555, "step": 303540 }, { "epoch": 0.6131902051172242, "grad_norm": 150.48866271972656, "learning_rate": 4.01000246239303e-06, "loss": 22.1188, "step": 303550 }, { "epoch": 0.613210405749908, "grad_norm": 96.71208190917969, "learning_rate": 4.009660309719473e-06, "loss": 7.4786, "step": 303560 }, { "epoch": 0.6132306063825919, "grad_norm": 51.67375946044922, "learning_rate": 4.009318161872714e-06, "loss": 6.5284, "step": 303570 }, { "epoch": 0.6132508070152757, "grad_norm": 144.28921508789062, "learning_rate": 4.008976018854418e-06, "loss": 15.5809, "step": 303580 }, { "epoch": 0.6132710076479595, "grad_norm": 659.6348266601562, "learning_rate": 4.0086338806662525e-06, "loss": 20.033, "step": 303590 }, { "epoch": 0.6132912082806433, "grad_norm": 925.919921875, "learning_rate": 4.0082917473098845e-06, "loss": 28.6464, "step": 303600 }, { "epoch": 0.6133114089133271, "grad_norm": 655.435302734375, "learning_rate": 4.007949618786984e-06, "loss": 15.9405, "step": 303610 }, { "epoch": 0.613331609546011, "grad_norm": 416.43170166015625, "learning_rate": 4.007607495099215e-06, "loss": 23.06, "step": 303620 }, { "epoch": 0.6133518101786948, "grad_norm": 147.84814453125, "learning_rate": 4.007265376248246e-06, "loss": 22.9756, "step": 303630 }, { "epoch": 0.6133720108113786, "grad_norm": 256.745361328125, "learning_rate": 4.0069232622357475e-06, "loss": 23.1978, "step": 303640 }, { "epoch": 0.6133922114440624, "grad_norm": 379.4791259765625, "learning_rate": 4.006581153063383e-06, "loss": 25.9361, "step": 303650 }, { "epoch": 0.6134124120767462, "grad_norm": 322.6779479980469, "learning_rate": 4.006239048732822e-06, "loss": 27.4228, "step": 303660 }, { "epoch": 0.6134326127094301, "grad_norm": 1167.3402099609375, "learning_rate": 4.005896949245731e-06, "loss": 17.2036, "step": 303670 }, { "epoch": 0.6134528133421139, "grad_norm": 494.796630859375, "learning_rate": 4.005554854603779e-06, "loss": 8.5401, "step": 303680 }, { "epoch": 0.6134730139747977, "grad_norm": 279.4873962402344, "learning_rate": 4.0052127648086305e-06, "loss": 29.294, "step": 303690 }, { "epoch": 0.6134932146074815, "grad_norm": 65.90160369873047, "learning_rate": 4.004870679861953e-06, "loss": 10.5197, "step": 303700 }, { "epoch": 0.6135134152401653, "grad_norm": 313.8829040527344, "learning_rate": 4.004528599765419e-06, "loss": 20.6517, "step": 303710 }, { "epoch": 0.6135336158728492, "grad_norm": 222.48297119140625, "learning_rate": 4.004186524520689e-06, "loss": 27.6633, "step": 303720 }, { "epoch": 0.613553816505533, "grad_norm": 238.88255310058594, "learning_rate": 4.003844454129434e-06, "loss": 15.0304, "step": 303730 }, { "epoch": 0.6135740171382168, "grad_norm": 487.00274658203125, "learning_rate": 4.003502388593321e-06, "loss": 18.3681, "step": 303740 }, { "epoch": 0.6135942177709006, "grad_norm": 429.69561767578125, "learning_rate": 4.003160327914015e-06, "loss": 22.5376, "step": 303750 }, { "epoch": 0.6136144184035844, "grad_norm": 675.3248901367188, "learning_rate": 4.002818272093185e-06, "loss": 40.3278, "step": 303760 }, { "epoch": 0.6136346190362683, "grad_norm": 607.4163818359375, "learning_rate": 4.002476221132499e-06, "loss": 21.3432, "step": 303770 }, { "epoch": 0.6136548196689521, "grad_norm": 403.9083557128906, "learning_rate": 4.002134175033621e-06, "loss": 31.5958, "step": 303780 }, { "epoch": 0.6136750203016359, "grad_norm": 455.50909423828125, "learning_rate": 4.001792133798221e-06, "loss": 18.6742, "step": 303790 }, { "epoch": 0.6136952209343196, "grad_norm": 652.5449829101562, "learning_rate": 4.001450097427965e-06, "loss": 15.4099, "step": 303800 }, { "epoch": 0.6137154215670034, "grad_norm": 347.6587829589844, "learning_rate": 4.001108065924521e-06, "loss": 25.1859, "step": 303810 }, { "epoch": 0.6137356221996872, "grad_norm": 527.2731323242188, "learning_rate": 4.000766039289554e-06, "loss": 23.9697, "step": 303820 }, { "epoch": 0.6137558228323711, "grad_norm": 234.25405883789062, "learning_rate": 4.000424017524732e-06, "loss": 20.098, "step": 303830 }, { "epoch": 0.6137760234650549, "grad_norm": 370.8182678222656, "learning_rate": 4.000082000631724e-06, "loss": 16.4596, "step": 303840 }, { "epoch": 0.6137962240977387, "grad_norm": 269.0312805175781, "learning_rate": 3.999739988612192e-06, "loss": 8.1935, "step": 303850 }, { "epoch": 0.6138164247304225, "grad_norm": 119.41081237792969, "learning_rate": 3.999397981467808e-06, "loss": 10.811, "step": 303860 }, { "epoch": 0.6138366253631063, "grad_norm": 600.5228271484375, "learning_rate": 3.999055979200238e-06, "loss": 20.6857, "step": 303870 }, { "epoch": 0.6138568259957902, "grad_norm": 313.8599548339844, "learning_rate": 3.998713981811145e-06, "loss": 21.2647, "step": 303880 }, { "epoch": 0.613877026628474, "grad_norm": 486.4746398925781, "learning_rate": 3.9983719893022e-06, "loss": 20.1357, "step": 303890 }, { "epoch": 0.6138972272611578, "grad_norm": 305.7726135253906, "learning_rate": 3.9980300016750696e-06, "loss": 13.1338, "step": 303900 }, { "epoch": 0.6139174278938416, "grad_norm": 425.5389404296875, "learning_rate": 3.997688018931418e-06, "loss": 16.3044, "step": 303910 }, { "epoch": 0.6139376285265254, "grad_norm": 0.0, "learning_rate": 3.997346041072912e-06, "loss": 25.3658, "step": 303920 }, { "epoch": 0.6139578291592093, "grad_norm": 366.0242614746094, "learning_rate": 3.997004068101224e-06, "loss": 13.5478, "step": 303930 }, { "epoch": 0.6139780297918931, "grad_norm": 76.50985717773438, "learning_rate": 3.9966621000180125e-06, "loss": 22.9871, "step": 303940 }, { "epoch": 0.6139982304245769, "grad_norm": 428.3826599121094, "learning_rate": 3.9963201368249495e-06, "loss": 23.8295, "step": 303950 }, { "epoch": 0.6140184310572607, "grad_norm": 525.0429077148438, "learning_rate": 3.9959781785237e-06, "loss": 21.652, "step": 303960 }, { "epoch": 0.6140386316899445, "grad_norm": 276.49359130859375, "learning_rate": 3.995636225115933e-06, "loss": 18.2424, "step": 303970 }, { "epoch": 0.6140588323226284, "grad_norm": 287.13702392578125, "learning_rate": 3.995294276603312e-06, "loss": 21.179, "step": 303980 }, { "epoch": 0.6140790329553122, "grad_norm": 619.6387939453125, "learning_rate": 3.9949523329875025e-06, "loss": 22.7562, "step": 303990 }, { "epoch": 0.614099233587996, "grad_norm": 584.1336059570312, "learning_rate": 3.994610394270178e-06, "loss": 20.0137, "step": 304000 }, { "epoch": 0.6141194342206798, "grad_norm": 1.6148418188095093, "learning_rate": 3.994268460452997e-06, "loss": 19.0874, "step": 304010 }, { "epoch": 0.6141396348533636, "grad_norm": 380.2392272949219, "learning_rate": 3.993926531537631e-06, "loss": 11.8434, "step": 304020 }, { "epoch": 0.6141598354860475, "grad_norm": 501.98760986328125, "learning_rate": 3.993584607525745e-06, "loss": 23.473, "step": 304030 }, { "epoch": 0.6141800361187313, "grad_norm": 296.1498107910156, "learning_rate": 3.993242688419006e-06, "loss": 21.8972, "step": 304040 }, { "epoch": 0.6142002367514151, "grad_norm": 742.5661010742188, "learning_rate": 3.992900774219078e-06, "loss": 23.2279, "step": 304050 }, { "epoch": 0.6142204373840988, "grad_norm": 823.869140625, "learning_rate": 3.992558864927633e-06, "loss": 25.0372, "step": 304060 }, { "epoch": 0.6142406380167826, "grad_norm": 145.0618896484375, "learning_rate": 3.9922169605463305e-06, "loss": 11.2473, "step": 304070 }, { "epoch": 0.6142608386494665, "grad_norm": 239.45742797851562, "learning_rate": 3.991875061076841e-06, "loss": 18.8624, "step": 304080 }, { "epoch": 0.6142810392821503, "grad_norm": 312.9098815917969, "learning_rate": 3.991533166520832e-06, "loss": 17.5972, "step": 304090 }, { "epoch": 0.6143012399148341, "grad_norm": 787.7771606445312, "learning_rate": 3.991191276879966e-06, "loss": 29.638, "step": 304100 }, { "epoch": 0.6143214405475179, "grad_norm": 487.5486755371094, "learning_rate": 3.990849392155912e-06, "loss": 15.2809, "step": 304110 }, { "epoch": 0.6143416411802017, "grad_norm": 830.9132080078125, "learning_rate": 3.990507512350336e-06, "loss": 33.5881, "step": 304120 }, { "epoch": 0.6143618418128856, "grad_norm": 979.7211303710938, "learning_rate": 3.990165637464904e-06, "loss": 27.8697, "step": 304130 }, { "epoch": 0.6143820424455694, "grad_norm": 261.5354309082031, "learning_rate": 3.9898237675012815e-06, "loss": 15.2131, "step": 304140 }, { "epoch": 0.6144022430782532, "grad_norm": 354.2962341308594, "learning_rate": 3.989481902461135e-06, "loss": 30.4945, "step": 304150 }, { "epoch": 0.614422443710937, "grad_norm": 229.3984375, "learning_rate": 3.989140042346134e-06, "loss": 19.7272, "step": 304160 }, { "epoch": 0.6144426443436208, "grad_norm": 605.9855346679688, "learning_rate": 3.988798187157939e-06, "loss": 20.3877, "step": 304170 }, { "epoch": 0.6144628449763047, "grad_norm": 405.9658508300781, "learning_rate": 3.988456336898219e-06, "loss": 13.6832, "step": 304180 }, { "epoch": 0.6144830456089885, "grad_norm": 666.5990600585938, "learning_rate": 3.988114491568642e-06, "loss": 21.0847, "step": 304190 }, { "epoch": 0.6145032462416723, "grad_norm": 371.8177185058594, "learning_rate": 3.987772651170871e-06, "loss": 43.3977, "step": 304200 }, { "epoch": 0.6145234468743561, "grad_norm": 229.81198120117188, "learning_rate": 3.9874308157065735e-06, "loss": 23.0913, "step": 304210 }, { "epoch": 0.6145436475070399, "grad_norm": 616.7988891601562, "learning_rate": 3.987088985177417e-06, "loss": 22.285, "step": 304220 }, { "epoch": 0.6145638481397238, "grad_norm": 8.764073371887207, "learning_rate": 3.986747159585063e-06, "loss": 23.5479, "step": 304230 }, { "epoch": 0.6145840487724076, "grad_norm": 497.9407653808594, "learning_rate": 3.986405338931182e-06, "loss": 26.8621, "step": 304240 }, { "epoch": 0.6146042494050914, "grad_norm": 566.9827270507812, "learning_rate": 3.986063523217439e-06, "loss": 17.8532, "step": 304250 }, { "epoch": 0.6146244500377752, "grad_norm": 63.36670684814453, "learning_rate": 3.9857217124454985e-06, "loss": 27.5186, "step": 304260 }, { "epoch": 0.614644650670459, "grad_norm": 1081.5235595703125, "learning_rate": 3.985379906617027e-06, "loss": 28.2573, "step": 304270 }, { "epoch": 0.6146648513031429, "grad_norm": 623.7537841796875, "learning_rate": 3.985038105733691e-06, "loss": 18.582, "step": 304280 }, { "epoch": 0.6146850519358267, "grad_norm": 146.89149475097656, "learning_rate": 3.984696309797157e-06, "loss": 12.7521, "step": 304290 }, { "epoch": 0.6147052525685105, "grad_norm": 223.62774658203125, "learning_rate": 3.98435451880909e-06, "loss": 35.098, "step": 304300 }, { "epoch": 0.6147254532011943, "grad_norm": 286.808837890625, "learning_rate": 3.984012732771154e-06, "loss": 14.6493, "step": 304310 }, { "epoch": 0.614745653833878, "grad_norm": 549.3030395507812, "learning_rate": 3.98367095168502e-06, "loss": 16.418, "step": 304320 }, { "epoch": 0.6147658544665618, "grad_norm": 268.3217468261719, "learning_rate": 3.983329175552348e-06, "loss": 18.5564, "step": 304330 }, { "epoch": 0.6147860550992457, "grad_norm": 547.75, "learning_rate": 3.9829874043748064e-06, "loss": 24.7735, "step": 304340 }, { "epoch": 0.6148062557319295, "grad_norm": 301.2500915527344, "learning_rate": 3.982645638154062e-06, "loss": 22.7877, "step": 304350 }, { "epoch": 0.6148264563646133, "grad_norm": 413.0594177246094, "learning_rate": 3.982303876891778e-06, "loss": 17.6814, "step": 304360 }, { "epoch": 0.6148466569972971, "grad_norm": 201.77066040039062, "learning_rate": 3.981962120589623e-06, "loss": 20.4034, "step": 304370 }, { "epoch": 0.614866857629981, "grad_norm": 174.31021118164062, "learning_rate": 3.981620369249261e-06, "loss": 11.626, "step": 304380 }, { "epoch": 0.6148870582626648, "grad_norm": 620.9212646484375, "learning_rate": 3.981278622872357e-06, "loss": 14.6557, "step": 304390 }, { "epoch": 0.6149072588953486, "grad_norm": 311.0087585449219, "learning_rate": 3.980936881460576e-06, "loss": 44.8629, "step": 304400 }, { "epoch": 0.6149274595280324, "grad_norm": 224.42835998535156, "learning_rate": 3.980595145015588e-06, "loss": 15.2197, "step": 304410 }, { "epoch": 0.6149476601607162, "grad_norm": 236.81829833984375, "learning_rate": 3.9802534135390544e-06, "loss": 8.8921, "step": 304420 }, { "epoch": 0.6149678607934, "grad_norm": 615.7477416992188, "learning_rate": 3.979911687032642e-06, "loss": 17.2336, "step": 304430 }, { "epoch": 0.6149880614260839, "grad_norm": 40.66111755371094, "learning_rate": 3.979569965498016e-06, "loss": 14.4898, "step": 304440 }, { "epoch": 0.6150082620587677, "grad_norm": 454.35565185546875, "learning_rate": 3.979228248936843e-06, "loss": 46.3418, "step": 304450 }, { "epoch": 0.6150284626914515, "grad_norm": 646.9774780273438, "learning_rate": 3.978886537350786e-06, "loss": 30.9838, "step": 304460 }, { "epoch": 0.6150486633241353, "grad_norm": 284.7167663574219, "learning_rate": 3.978544830741513e-06, "loss": 16.6017, "step": 304470 }, { "epoch": 0.6150688639568191, "grad_norm": 661.4013061523438, "learning_rate": 3.9782031291106895e-06, "loss": 18.4857, "step": 304480 }, { "epoch": 0.615089064589503, "grad_norm": 55.05121612548828, "learning_rate": 3.97786143245998e-06, "loss": 23.2033, "step": 304490 }, { "epoch": 0.6151092652221868, "grad_norm": 568.1244506835938, "learning_rate": 3.977519740791049e-06, "loss": 15.2688, "step": 304500 }, { "epoch": 0.6151294658548706, "grad_norm": 469.814697265625, "learning_rate": 3.977178054105564e-06, "loss": 22.9027, "step": 304510 }, { "epoch": 0.6151496664875544, "grad_norm": 908.1153564453125, "learning_rate": 3.9768363724051875e-06, "loss": 17.428, "step": 304520 }, { "epoch": 0.6151698671202382, "grad_norm": 77.04640197753906, "learning_rate": 3.976494695691586e-06, "loss": 22.9595, "step": 304530 }, { "epoch": 0.6151900677529221, "grad_norm": 361.153564453125, "learning_rate": 3.976153023966428e-06, "loss": 15.1676, "step": 304540 }, { "epoch": 0.6152102683856059, "grad_norm": 409.250732421875, "learning_rate": 3.9758113572313735e-06, "loss": 16.0041, "step": 304550 }, { "epoch": 0.6152304690182897, "grad_norm": 555.212890625, "learning_rate": 3.975469695488091e-06, "loss": 29.4781, "step": 304560 }, { "epoch": 0.6152506696509734, "grad_norm": 321.8433837890625, "learning_rate": 3.975128038738245e-06, "loss": 24.9564, "step": 304570 }, { "epoch": 0.6152708702836572, "grad_norm": 48.59434509277344, "learning_rate": 3.974786386983501e-06, "loss": 19.8498, "step": 304580 }, { "epoch": 0.615291070916341, "grad_norm": 245.10911560058594, "learning_rate": 3.974444740225524e-06, "loss": 18.3821, "step": 304590 }, { "epoch": 0.6153112715490249, "grad_norm": 297.1552734375, "learning_rate": 3.974103098465976e-06, "loss": 15.4168, "step": 304600 }, { "epoch": 0.6153314721817087, "grad_norm": 179.0855712890625, "learning_rate": 3.97376146170653e-06, "loss": 18.0489, "step": 304610 }, { "epoch": 0.6153516728143925, "grad_norm": 794.9833374023438, "learning_rate": 3.973419829948843e-06, "loss": 21.6872, "step": 304620 }, { "epoch": 0.6153718734470763, "grad_norm": 347.2474365234375, "learning_rate": 3.973078203194584e-06, "loss": 19.7647, "step": 304630 }, { "epoch": 0.6153920740797602, "grad_norm": 254.1619110107422, "learning_rate": 3.972736581445418e-06, "loss": 13.7167, "step": 304640 }, { "epoch": 0.615412274712444, "grad_norm": 251.6357879638672, "learning_rate": 3.972394964703008e-06, "loss": 23.3833, "step": 304650 }, { "epoch": 0.6154324753451278, "grad_norm": 192.97264099121094, "learning_rate": 3.97205335296902e-06, "loss": 21.9039, "step": 304660 }, { "epoch": 0.6154526759778116, "grad_norm": 363.0151672363281, "learning_rate": 3.971711746245122e-06, "loss": 19.9189, "step": 304670 }, { "epoch": 0.6154728766104954, "grad_norm": 130.87635803222656, "learning_rate": 3.971370144532973e-06, "loss": 14.7775, "step": 304680 }, { "epoch": 0.6154930772431793, "grad_norm": 377.2640075683594, "learning_rate": 3.971028547834241e-06, "loss": 14.1147, "step": 304690 }, { "epoch": 0.6155132778758631, "grad_norm": 354.9339599609375, "learning_rate": 3.970686956150595e-06, "loss": 16.043, "step": 304700 }, { "epoch": 0.6155334785085469, "grad_norm": 534.2333374023438, "learning_rate": 3.970345369483693e-06, "loss": 15.6232, "step": 304710 }, { "epoch": 0.6155536791412307, "grad_norm": 553.8934936523438, "learning_rate": 3.970003787835203e-06, "loss": 25.1067, "step": 304720 }, { "epoch": 0.6155738797739145, "grad_norm": 279.48504638671875, "learning_rate": 3.969662211206789e-06, "loss": 19.6881, "step": 304730 }, { "epoch": 0.6155940804065984, "grad_norm": 542.3599243164062, "learning_rate": 3.969320639600118e-06, "loss": 21.9852, "step": 304740 }, { "epoch": 0.6156142810392822, "grad_norm": 461.4300842285156, "learning_rate": 3.968979073016853e-06, "loss": 17.8222, "step": 304750 }, { "epoch": 0.615634481671966, "grad_norm": 145.6568145751953, "learning_rate": 3.968637511458657e-06, "loss": 25.1161, "step": 304760 }, { "epoch": 0.6156546823046498, "grad_norm": 176.57940673828125, "learning_rate": 3.9682959549272e-06, "loss": 16.715, "step": 304770 }, { "epoch": 0.6156748829373336, "grad_norm": 222.05470275878906, "learning_rate": 3.9679544034241406e-06, "loss": 26.3055, "step": 304780 }, { "epoch": 0.6156950835700175, "grad_norm": 26.114879608154297, "learning_rate": 3.967612856951146e-06, "loss": 11.9065, "step": 304790 }, { "epoch": 0.6157152842027013, "grad_norm": 470.2635192871094, "learning_rate": 3.967271315509884e-06, "loss": 27.9257, "step": 304800 }, { "epoch": 0.6157354848353851, "grad_norm": 0.0, "learning_rate": 3.966929779102015e-06, "loss": 27.4237, "step": 304810 }, { "epoch": 0.6157556854680689, "grad_norm": 225.7501983642578, "learning_rate": 3.9665882477292036e-06, "loss": 18.1502, "step": 304820 }, { "epoch": 0.6157758861007526, "grad_norm": 927.1912841796875, "learning_rate": 3.966246721393118e-06, "loss": 15.8755, "step": 304830 }, { "epoch": 0.6157960867334364, "grad_norm": 717.72900390625, "learning_rate": 3.965905200095419e-06, "loss": 31.2173, "step": 304840 }, { "epoch": 0.6158162873661203, "grad_norm": 178.7381591796875, "learning_rate": 3.965563683837772e-06, "loss": 14.679, "step": 304850 }, { "epoch": 0.6158364879988041, "grad_norm": 286.8285827636719, "learning_rate": 3.965222172621844e-06, "loss": 22.5503, "step": 304860 }, { "epoch": 0.6158566886314879, "grad_norm": 566.3438720703125, "learning_rate": 3.964880666449296e-06, "loss": 30.0114, "step": 304870 }, { "epoch": 0.6158768892641717, "grad_norm": 261.7210388183594, "learning_rate": 3.964539165321795e-06, "loss": 26.8727, "step": 304880 }, { "epoch": 0.6158970898968555, "grad_norm": 740.4841918945312, "learning_rate": 3.964197669241004e-06, "loss": 19.0218, "step": 304890 }, { "epoch": 0.6159172905295394, "grad_norm": 70.38446807861328, "learning_rate": 3.963856178208588e-06, "loss": 8.1849, "step": 304900 }, { "epoch": 0.6159374911622232, "grad_norm": 555.62255859375, "learning_rate": 3.963514692226212e-06, "loss": 15.2413, "step": 304910 }, { "epoch": 0.615957691794907, "grad_norm": 241.1875, "learning_rate": 3.963173211295538e-06, "loss": 22.6484, "step": 304920 }, { "epoch": 0.6159778924275908, "grad_norm": 395.1697692871094, "learning_rate": 3.962831735418235e-06, "loss": 22.5365, "step": 304930 }, { "epoch": 0.6159980930602746, "grad_norm": 334.2985534667969, "learning_rate": 3.962490264595961e-06, "loss": 27.5217, "step": 304940 }, { "epoch": 0.6160182936929585, "grad_norm": 288.04449462890625, "learning_rate": 3.962148798830385e-06, "loss": 10.9517, "step": 304950 }, { "epoch": 0.6160384943256423, "grad_norm": 292.25628662109375, "learning_rate": 3.9618073381231705e-06, "loss": 32.295, "step": 304960 }, { "epoch": 0.6160586949583261, "grad_norm": 226.55381774902344, "learning_rate": 3.9614658824759815e-06, "loss": 26.4119, "step": 304970 }, { "epoch": 0.6160788955910099, "grad_norm": 145.2852325439453, "learning_rate": 3.96112443189048e-06, "loss": 23.9111, "step": 304980 }, { "epoch": 0.6160990962236937, "grad_norm": 78.74932861328125, "learning_rate": 3.960782986368334e-06, "loss": 17.1106, "step": 304990 }, { "epoch": 0.6161192968563776, "grad_norm": 62.719512939453125, "learning_rate": 3.960441545911205e-06, "loss": 21.7747, "step": 305000 }, { "epoch": 0.6161394974890614, "grad_norm": 69.73066711425781, "learning_rate": 3.960100110520756e-06, "loss": 12.6648, "step": 305010 }, { "epoch": 0.6161596981217452, "grad_norm": 481.134765625, "learning_rate": 3.9597586801986544e-06, "loss": 18.5707, "step": 305020 }, { "epoch": 0.616179898754429, "grad_norm": 297.143798828125, "learning_rate": 3.959417254946563e-06, "loss": 20.0953, "step": 305030 }, { "epoch": 0.6162000993871128, "grad_norm": 262.7975158691406, "learning_rate": 3.9590758347661465e-06, "loss": 10.907, "step": 305040 }, { "epoch": 0.6162203000197967, "grad_norm": 55.615638732910156, "learning_rate": 3.9587344196590665e-06, "loss": 16.1214, "step": 305050 }, { "epoch": 0.6162405006524805, "grad_norm": 562.6689453125, "learning_rate": 3.95839300962699e-06, "loss": 9.3046, "step": 305060 }, { "epoch": 0.6162607012851643, "grad_norm": 822.115478515625, "learning_rate": 3.958051604671579e-06, "loss": 21.9632, "step": 305070 }, { "epoch": 0.616280901917848, "grad_norm": 326.88604736328125, "learning_rate": 3.957710204794497e-06, "loss": 11.3189, "step": 305080 }, { "epoch": 0.6163011025505318, "grad_norm": 299.18133544921875, "learning_rate": 3.95736880999741e-06, "loss": 17.1818, "step": 305090 }, { "epoch": 0.6163213031832157, "grad_norm": 348.8399963378906, "learning_rate": 3.957027420281981e-06, "loss": 24.7105, "step": 305100 }, { "epoch": 0.6163415038158995, "grad_norm": 324.0838317871094, "learning_rate": 3.956686035649874e-06, "loss": 19.6219, "step": 305110 }, { "epoch": 0.6163617044485833, "grad_norm": 147.7954559326172, "learning_rate": 3.956344656102754e-06, "loss": 12.9203, "step": 305120 }, { "epoch": 0.6163819050812671, "grad_norm": 707.2263793945312, "learning_rate": 3.9560032816422825e-06, "loss": 20.8565, "step": 305130 }, { "epoch": 0.6164021057139509, "grad_norm": 122.77702331542969, "learning_rate": 3.955661912270123e-06, "loss": 19.8666, "step": 305140 }, { "epoch": 0.6164223063466348, "grad_norm": 300.4569091796875, "learning_rate": 3.955320547987943e-06, "loss": 11.3689, "step": 305150 }, { "epoch": 0.6164425069793186, "grad_norm": 995.3106079101562, "learning_rate": 3.954979188797402e-06, "loss": 38.2021, "step": 305160 }, { "epoch": 0.6164627076120024, "grad_norm": 627.9874877929688, "learning_rate": 3.954637834700166e-06, "loss": 17.7492, "step": 305170 }, { "epoch": 0.6164829082446862, "grad_norm": 489.7788391113281, "learning_rate": 3.954296485697899e-06, "loss": 20.5142, "step": 305180 }, { "epoch": 0.61650310887737, "grad_norm": 258.4852294921875, "learning_rate": 3.953955141792264e-06, "loss": 12.8674, "step": 305190 }, { "epoch": 0.6165233095100539, "grad_norm": 503.8763122558594, "learning_rate": 3.9536138029849244e-06, "loss": 22.3047, "step": 305200 }, { "epoch": 0.6165435101427377, "grad_norm": 588.955322265625, "learning_rate": 3.953272469277544e-06, "loss": 24.0847, "step": 305210 }, { "epoch": 0.6165637107754215, "grad_norm": 323.96630859375, "learning_rate": 3.952931140671789e-06, "loss": 15.9219, "step": 305220 }, { "epoch": 0.6165839114081053, "grad_norm": 281.60577392578125, "learning_rate": 3.9525898171693175e-06, "loss": 32.4246, "step": 305230 }, { "epoch": 0.6166041120407891, "grad_norm": 11.025744438171387, "learning_rate": 3.952248498771797e-06, "loss": 16.3389, "step": 305240 }, { "epoch": 0.616624312673473, "grad_norm": 113.29571533203125, "learning_rate": 3.951907185480892e-06, "loss": 14.3495, "step": 305250 }, { "epoch": 0.6166445133061568, "grad_norm": 148.98886108398438, "learning_rate": 3.9515658772982625e-06, "loss": 19.239, "step": 305260 }, { "epoch": 0.6166647139388406, "grad_norm": 600.8794555664062, "learning_rate": 3.951224574225574e-06, "loss": 22.2261, "step": 305270 }, { "epoch": 0.6166849145715244, "grad_norm": 353.5679016113281, "learning_rate": 3.950883276264491e-06, "loss": 24.45, "step": 305280 }, { "epoch": 0.6167051152042082, "grad_norm": 126.06834411621094, "learning_rate": 3.950541983416675e-06, "loss": 21.8497, "step": 305290 }, { "epoch": 0.616725315836892, "grad_norm": 434.01812744140625, "learning_rate": 3.950200695683788e-06, "loss": 12.7697, "step": 305300 }, { "epoch": 0.6167455164695759, "grad_norm": 572.202880859375, "learning_rate": 3.9498594130674985e-06, "loss": 13.4393, "step": 305310 }, { "epoch": 0.6167657171022597, "grad_norm": 701.241455078125, "learning_rate": 3.949518135569465e-06, "loss": 25.3619, "step": 305320 }, { "epoch": 0.6167859177349435, "grad_norm": 184.28607177734375, "learning_rate": 3.949176863191353e-06, "loss": 10.7238, "step": 305330 }, { "epoch": 0.6168061183676272, "grad_norm": 10.19638442993164, "learning_rate": 3.948835595934826e-06, "loss": 10.2953, "step": 305340 }, { "epoch": 0.616826319000311, "grad_norm": 28.1948184967041, "learning_rate": 3.9484943338015465e-06, "loss": 20.9164, "step": 305350 }, { "epoch": 0.6168465196329949, "grad_norm": 194.9653778076172, "learning_rate": 3.948153076793179e-06, "loss": 25.6456, "step": 305360 }, { "epoch": 0.6168667202656787, "grad_norm": 489.40155029296875, "learning_rate": 3.947811824911383e-06, "loss": 11.7695, "step": 305370 }, { "epoch": 0.6168869208983625, "grad_norm": 212.13853454589844, "learning_rate": 3.947470578157829e-06, "loss": 12.6597, "step": 305380 }, { "epoch": 0.6169071215310463, "grad_norm": 862.0413818359375, "learning_rate": 3.9471293365341716e-06, "loss": 17.0545, "step": 305390 }, { "epoch": 0.6169273221637301, "grad_norm": 81.56665802001953, "learning_rate": 3.94678810004208e-06, "loss": 25.2166, "step": 305400 }, { "epoch": 0.616947522796414, "grad_norm": 32.02766799926758, "learning_rate": 3.946446868683216e-06, "loss": 19.9479, "step": 305410 }, { "epoch": 0.6169677234290978, "grad_norm": 316.7696838378906, "learning_rate": 3.946105642459241e-06, "loss": 13.6745, "step": 305420 }, { "epoch": 0.6169879240617816, "grad_norm": 420.5115661621094, "learning_rate": 3.9457644213718195e-06, "loss": 16.9697, "step": 305430 }, { "epoch": 0.6170081246944654, "grad_norm": 628.47412109375, "learning_rate": 3.945423205422616e-06, "loss": 15.0264, "step": 305440 }, { "epoch": 0.6170283253271492, "grad_norm": 293.2240905761719, "learning_rate": 3.94508199461329e-06, "loss": 26.2494, "step": 305450 }, { "epoch": 0.6170485259598331, "grad_norm": 660.800048828125, "learning_rate": 3.9447407889455054e-06, "loss": 30.5347, "step": 305460 }, { "epoch": 0.6170687265925169, "grad_norm": 447.24627685546875, "learning_rate": 3.944399588420928e-06, "loss": 11.819, "step": 305470 }, { "epoch": 0.6170889272252007, "grad_norm": 427.98876953125, "learning_rate": 3.944058393041219e-06, "loss": 16.9037, "step": 305480 }, { "epoch": 0.6171091278578845, "grad_norm": 300.6212463378906, "learning_rate": 3.943717202808041e-06, "loss": 22.4586, "step": 305490 }, { "epoch": 0.6171293284905683, "grad_norm": 499.1124267578125, "learning_rate": 3.943376017723058e-06, "loss": 33.0014, "step": 305500 }, { "epoch": 0.6171495291232522, "grad_norm": 298.091064453125, "learning_rate": 3.9430348377879315e-06, "loss": 9.2643, "step": 305510 }, { "epoch": 0.617169729755936, "grad_norm": 240.9927978515625, "learning_rate": 3.942693663004324e-06, "loss": 34.2811, "step": 305520 }, { "epoch": 0.6171899303886198, "grad_norm": 204.7012176513672, "learning_rate": 3.942352493373899e-06, "loss": 11.6956, "step": 305530 }, { "epoch": 0.6172101310213036, "grad_norm": 183.16407775878906, "learning_rate": 3.9420113288983235e-06, "loss": 20.7804, "step": 305540 }, { "epoch": 0.6172303316539874, "grad_norm": 362.5018615722656, "learning_rate": 3.941670169579252e-06, "loss": 12.9724, "step": 305550 }, { "epoch": 0.6172505322866713, "grad_norm": 191.02978515625, "learning_rate": 3.9413290154183536e-06, "loss": 22.3215, "step": 305560 }, { "epoch": 0.6172707329193551, "grad_norm": 222.10240173339844, "learning_rate": 3.94098786641729e-06, "loss": 11.3918, "step": 305570 }, { "epoch": 0.6172909335520389, "grad_norm": 40.461708068847656, "learning_rate": 3.940646722577722e-06, "loss": 5.4216, "step": 305580 }, { "epoch": 0.6173111341847227, "grad_norm": 328.94140625, "learning_rate": 3.940305583901314e-06, "loss": 32.9386, "step": 305590 }, { "epoch": 0.6173313348174064, "grad_norm": 19.103342056274414, "learning_rate": 3.939964450389728e-06, "loss": 12.3217, "step": 305600 }, { "epoch": 0.6173515354500902, "grad_norm": 590.836181640625, "learning_rate": 3.939623322044627e-06, "loss": 33.5012, "step": 305610 }, { "epoch": 0.6173717360827741, "grad_norm": 513.8269653320312, "learning_rate": 3.9392821988676715e-06, "loss": 12.7758, "step": 305620 }, { "epoch": 0.6173919367154579, "grad_norm": 133.01303100585938, "learning_rate": 3.938941080860527e-06, "loss": 29.4992, "step": 305630 }, { "epoch": 0.6174121373481417, "grad_norm": 326.3035583496094, "learning_rate": 3.938599968024855e-06, "loss": 12.9297, "step": 305640 }, { "epoch": 0.6174323379808255, "grad_norm": 894.0367431640625, "learning_rate": 3.938258860362319e-06, "loss": 20.602, "step": 305650 }, { "epoch": 0.6174525386135093, "grad_norm": 750.4785766601562, "learning_rate": 3.937917757874579e-06, "loss": 28.8761, "step": 305660 }, { "epoch": 0.6174727392461932, "grad_norm": 487.0335998535156, "learning_rate": 3.9375766605633005e-06, "loss": 29.3503, "step": 305670 }, { "epoch": 0.617492939878877, "grad_norm": 322.8643493652344, "learning_rate": 3.937235568430143e-06, "loss": 18.7492, "step": 305680 }, { "epoch": 0.6175131405115608, "grad_norm": 54.897117614746094, "learning_rate": 3.9368944814767704e-06, "loss": 26.2001, "step": 305690 }, { "epoch": 0.6175333411442446, "grad_norm": 934.5421142578125, "learning_rate": 3.936553399704848e-06, "loss": 18.8792, "step": 305700 }, { "epoch": 0.6175535417769284, "grad_norm": 739.4932250976562, "learning_rate": 3.936212323116032e-06, "loss": 27.0148, "step": 305710 }, { "epoch": 0.6175737424096123, "grad_norm": 747.6240234375, "learning_rate": 3.935871251711989e-06, "loss": 35.2148, "step": 305720 }, { "epoch": 0.6175939430422961, "grad_norm": 114.89200592041016, "learning_rate": 3.935530185494381e-06, "loss": 6.3092, "step": 305730 }, { "epoch": 0.6176141436749799, "grad_norm": 351.3406982421875, "learning_rate": 3.93518912446487e-06, "loss": 23.3945, "step": 305740 }, { "epoch": 0.6176343443076637, "grad_norm": 286.9163513183594, "learning_rate": 3.934848068625117e-06, "loss": 23.4078, "step": 305750 }, { "epoch": 0.6176545449403475, "grad_norm": 425.3580017089844, "learning_rate": 3.934507017976788e-06, "loss": 19.2041, "step": 305760 }, { "epoch": 0.6176747455730314, "grad_norm": 288.7109680175781, "learning_rate": 3.9341659725215395e-06, "loss": 28.5857, "step": 305770 }, { "epoch": 0.6176949462057152, "grad_norm": 673.9471435546875, "learning_rate": 3.9338249322610375e-06, "loss": 30.6302, "step": 305780 }, { "epoch": 0.617715146838399, "grad_norm": 189.43878173828125, "learning_rate": 3.933483897196944e-06, "loss": 16.6219, "step": 305790 }, { "epoch": 0.6177353474710828, "grad_norm": 242.22720336914062, "learning_rate": 3.933142867330921e-06, "loss": 17.8705, "step": 305800 }, { "epoch": 0.6177555481037666, "grad_norm": 492.8101806640625, "learning_rate": 3.932801842664629e-06, "loss": 23.6165, "step": 305810 }, { "epoch": 0.6177757487364505, "grad_norm": 320.36065673828125, "learning_rate": 3.932460823199732e-06, "loss": 18.9187, "step": 305820 }, { "epoch": 0.6177959493691343, "grad_norm": 657.8652954101562, "learning_rate": 3.932119808937892e-06, "loss": 18.8226, "step": 305830 }, { "epoch": 0.6178161500018181, "grad_norm": 594.8040771484375, "learning_rate": 3.9317787998807695e-06, "loss": 15.5412, "step": 305840 }, { "epoch": 0.6178363506345018, "grad_norm": 173.3395538330078, "learning_rate": 3.931437796030028e-06, "loss": 18.2146, "step": 305850 }, { "epoch": 0.6178565512671856, "grad_norm": 205.6853485107422, "learning_rate": 3.93109679738733e-06, "loss": 16.1719, "step": 305860 }, { "epoch": 0.6178767518998695, "grad_norm": 234.95541381835938, "learning_rate": 3.9307558039543355e-06, "loss": 19.4078, "step": 305870 }, { "epoch": 0.6178969525325533, "grad_norm": 233.0981903076172, "learning_rate": 3.930414815732709e-06, "loss": 17.0163, "step": 305880 }, { "epoch": 0.6179171531652371, "grad_norm": 615.3446655273438, "learning_rate": 3.93007383272411e-06, "loss": 18.0681, "step": 305890 }, { "epoch": 0.6179373537979209, "grad_norm": 184.77142333984375, "learning_rate": 3.9297328549302e-06, "loss": 18.3668, "step": 305900 }, { "epoch": 0.6179575544306047, "grad_norm": 169.15184020996094, "learning_rate": 3.929391882352643e-06, "loss": 17.5263, "step": 305910 }, { "epoch": 0.6179777550632886, "grad_norm": 324.7297668457031, "learning_rate": 3.929050914993102e-06, "loss": 21.9217, "step": 305920 }, { "epoch": 0.6179979556959724, "grad_norm": 523.7531127929688, "learning_rate": 3.928709952853235e-06, "loss": 12.4756, "step": 305930 }, { "epoch": 0.6180181563286562, "grad_norm": 439.7877197265625, "learning_rate": 3.928368995934706e-06, "loss": 25.2746, "step": 305940 }, { "epoch": 0.61803835696134, "grad_norm": 74.69799041748047, "learning_rate": 3.928028044239176e-06, "loss": 14.5338, "step": 305950 }, { "epoch": 0.6180585575940238, "grad_norm": 190.77581787109375, "learning_rate": 3.927687097768309e-06, "loss": 14.2405, "step": 305960 }, { "epoch": 0.6180787582267077, "grad_norm": 178.42745971679688, "learning_rate": 3.927346156523764e-06, "loss": 15.6832, "step": 305970 }, { "epoch": 0.6180989588593915, "grad_norm": 258.56976318359375, "learning_rate": 3.927005220507203e-06, "loss": 16.2546, "step": 305980 }, { "epoch": 0.6181191594920753, "grad_norm": 330.5308837890625, "learning_rate": 3.926664289720291e-06, "loss": 26.6828, "step": 305990 }, { "epoch": 0.6181393601247591, "grad_norm": 504.2160339355469, "learning_rate": 3.926323364164684e-06, "loss": 33.5386, "step": 306000 }, { "epoch": 0.6181595607574429, "grad_norm": 229.79554748535156, "learning_rate": 3.925982443842048e-06, "loss": 11.3198, "step": 306010 }, { "epoch": 0.6181797613901268, "grad_norm": 323.853759765625, "learning_rate": 3.925641528754045e-06, "loss": 18.9552, "step": 306020 }, { "epoch": 0.6181999620228106, "grad_norm": 537.7722778320312, "learning_rate": 3.925300618902332e-06, "loss": 16.2266, "step": 306030 }, { "epoch": 0.6182201626554944, "grad_norm": 889.2670288085938, "learning_rate": 3.924959714288575e-06, "loss": 21.2791, "step": 306040 }, { "epoch": 0.6182403632881782, "grad_norm": 188.69305419921875, "learning_rate": 3.924618814914435e-06, "loss": 2.5766, "step": 306050 }, { "epoch": 0.618260563920862, "grad_norm": 116.12600708007812, "learning_rate": 3.924277920781571e-06, "loss": 14.2036, "step": 306060 }, { "epoch": 0.6182807645535459, "grad_norm": 758.5924072265625, "learning_rate": 3.9239370318916445e-06, "loss": 23.0557, "step": 306070 }, { "epoch": 0.6183009651862297, "grad_norm": 328.2210998535156, "learning_rate": 3.92359614824632e-06, "loss": 26.6263, "step": 306080 }, { "epoch": 0.6183211658189135, "grad_norm": 452.1582946777344, "learning_rate": 3.923255269847258e-06, "loss": 23.0582, "step": 306090 }, { "epoch": 0.6183413664515973, "grad_norm": 236.93359375, "learning_rate": 3.922914396696118e-06, "loss": 18.4523, "step": 306100 }, { "epoch": 0.618361567084281, "grad_norm": 373.7156066894531, "learning_rate": 3.9225735287945635e-06, "loss": 19.818, "step": 306110 }, { "epoch": 0.6183817677169648, "grad_norm": 716.6705322265625, "learning_rate": 3.922232666144255e-06, "loss": 26.8796, "step": 306120 }, { "epoch": 0.6184019683496487, "grad_norm": 351.5494384765625, "learning_rate": 3.921891808746853e-06, "loss": 23.5783, "step": 306130 }, { "epoch": 0.6184221689823325, "grad_norm": 891.4788818359375, "learning_rate": 3.921550956604019e-06, "loss": 30.9712, "step": 306140 }, { "epoch": 0.6184423696150163, "grad_norm": 733.364990234375, "learning_rate": 3.921210109717417e-06, "loss": 22.7058, "step": 306150 }, { "epoch": 0.6184625702477001, "grad_norm": 238.14309692382812, "learning_rate": 3.920869268088704e-06, "loss": 14.636, "step": 306160 }, { "epoch": 0.618482770880384, "grad_norm": 396.8487854003906, "learning_rate": 3.920528431719544e-06, "loss": 19.4024, "step": 306170 }, { "epoch": 0.6185029715130678, "grad_norm": 256.1111145019531, "learning_rate": 3.9201876006115985e-06, "loss": 16.7025, "step": 306180 }, { "epoch": 0.6185231721457516, "grad_norm": 3.769822120666504, "learning_rate": 3.9198467747665265e-06, "loss": 39.128, "step": 306190 }, { "epoch": 0.6185433727784354, "grad_norm": 212.6315155029297, "learning_rate": 3.91950595418599e-06, "loss": 31.1895, "step": 306200 }, { "epoch": 0.6185635734111192, "grad_norm": 152.0775909423828, "learning_rate": 3.919165138871652e-06, "loss": 16.7545, "step": 306210 }, { "epoch": 0.618583774043803, "grad_norm": 682.8530883789062, "learning_rate": 3.918824328825171e-06, "loss": 26.2047, "step": 306220 }, { "epoch": 0.6186039746764869, "grad_norm": 472.5846252441406, "learning_rate": 3.918483524048208e-06, "loss": 16.3899, "step": 306230 }, { "epoch": 0.6186241753091707, "grad_norm": 108.487548828125, "learning_rate": 3.9181427245424266e-06, "loss": 33.4169, "step": 306240 }, { "epoch": 0.6186443759418545, "grad_norm": 850.3353271484375, "learning_rate": 3.917801930309486e-06, "loss": 27.5641, "step": 306250 }, { "epoch": 0.6186645765745383, "grad_norm": 224.6884002685547, "learning_rate": 3.9174611413510474e-06, "loss": 17.4463, "step": 306260 }, { "epoch": 0.6186847772072221, "grad_norm": 484.1405334472656, "learning_rate": 3.9171203576687725e-06, "loss": 17.8375, "step": 306270 }, { "epoch": 0.618704977839906, "grad_norm": 306.95953369140625, "learning_rate": 3.916779579264322e-06, "loss": 29.5048, "step": 306280 }, { "epoch": 0.6187251784725898, "grad_norm": 196.38314819335938, "learning_rate": 3.916438806139355e-06, "loss": 13.314, "step": 306290 }, { "epoch": 0.6187453791052736, "grad_norm": 2.9020464420318604, "learning_rate": 3.9160980382955336e-06, "loss": 30.9055, "step": 306300 }, { "epoch": 0.6187655797379574, "grad_norm": 332.1005859375, "learning_rate": 3.9157572757345215e-06, "loss": 11.4602, "step": 306310 }, { "epoch": 0.6187857803706412, "grad_norm": 113.29553985595703, "learning_rate": 3.915416518457974e-06, "loss": 17.0311, "step": 306320 }, { "epoch": 0.6188059810033251, "grad_norm": 109.80133056640625, "learning_rate": 3.915075766467556e-06, "loss": 15.5748, "step": 306330 }, { "epoch": 0.6188261816360089, "grad_norm": 497.6032409667969, "learning_rate": 3.914735019764928e-06, "loss": 21.3831, "step": 306340 }, { "epoch": 0.6188463822686927, "grad_norm": 340.36737060546875, "learning_rate": 3.914394278351749e-06, "loss": 17.5105, "step": 306350 }, { "epoch": 0.6188665829013764, "grad_norm": 211.54811096191406, "learning_rate": 3.91405354222968e-06, "loss": 23.4012, "step": 306360 }, { "epoch": 0.6188867835340602, "grad_norm": 58.582115173339844, "learning_rate": 3.913712811400384e-06, "loss": 15.3924, "step": 306370 }, { "epoch": 0.6189069841667441, "grad_norm": 320.6402893066406, "learning_rate": 3.913372085865519e-06, "loss": 8.7064, "step": 306380 }, { "epoch": 0.6189271847994279, "grad_norm": 321.906005859375, "learning_rate": 3.913031365626746e-06, "loss": 17.4174, "step": 306390 }, { "epoch": 0.6189473854321117, "grad_norm": 69.85985565185547, "learning_rate": 3.912690650685726e-06, "loss": 30.7, "step": 306400 }, { "epoch": 0.6189675860647955, "grad_norm": 440.7568664550781, "learning_rate": 3.912349941044122e-06, "loss": 20.0458, "step": 306410 }, { "epoch": 0.6189877866974793, "grad_norm": 377.60418701171875, "learning_rate": 3.912009236703591e-06, "loss": 16.6445, "step": 306420 }, { "epoch": 0.6190079873301632, "grad_norm": 434.11669921875, "learning_rate": 3.911668537665796e-06, "loss": 7.9538, "step": 306430 }, { "epoch": 0.619028187962847, "grad_norm": 396.4697570800781, "learning_rate": 3.9113278439323965e-06, "loss": 23.5603, "step": 306440 }, { "epoch": 0.6190483885955308, "grad_norm": 686.4473266601562, "learning_rate": 3.9109871555050514e-06, "loss": 22.799, "step": 306450 }, { "epoch": 0.6190685892282146, "grad_norm": 188.32522583007812, "learning_rate": 3.910646472385423e-06, "loss": 21.7802, "step": 306460 }, { "epoch": 0.6190887898608984, "grad_norm": 173.89407348632812, "learning_rate": 3.910305794575174e-06, "loss": 24.067, "step": 306470 }, { "epoch": 0.6191089904935823, "grad_norm": 423.35076904296875, "learning_rate": 3.90996512207596e-06, "loss": 24.6759, "step": 306480 }, { "epoch": 0.6191291911262661, "grad_norm": 258.6020812988281, "learning_rate": 3.9096244548894445e-06, "loss": 16.9252, "step": 306490 }, { "epoch": 0.6191493917589499, "grad_norm": 638.9388427734375, "learning_rate": 3.909283793017289e-06, "loss": 29.1464, "step": 306500 }, { "epoch": 0.6191695923916337, "grad_norm": 127.90264129638672, "learning_rate": 3.90894313646115e-06, "loss": 19.206, "step": 306510 }, { "epoch": 0.6191897930243175, "grad_norm": 198.00038146972656, "learning_rate": 3.908602485222688e-06, "loss": 11.8646, "step": 306520 }, { "epoch": 0.6192099936570014, "grad_norm": 430.843505859375, "learning_rate": 3.908261839303568e-06, "loss": 17.6792, "step": 306530 }, { "epoch": 0.6192301942896852, "grad_norm": 493.9074401855469, "learning_rate": 3.9079211987054475e-06, "loss": 21.0978, "step": 306540 }, { "epoch": 0.619250394922369, "grad_norm": 332.5508728027344, "learning_rate": 3.907580563429985e-06, "loss": 12.8158, "step": 306550 }, { "epoch": 0.6192705955550528, "grad_norm": 138.62803649902344, "learning_rate": 3.907239933478843e-06, "loss": 24.9861, "step": 306560 }, { "epoch": 0.6192907961877366, "grad_norm": 404.02410888671875, "learning_rate": 3.906899308853682e-06, "loss": 22.7066, "step": 306570 }, { "epoch": 0.6193109968204205, "grad_norm": 316.9248962402344, "learning_rate": 3.9065586895561605e-06, "loss": 38.4835, "step": 306580 }, { "epoch": 0.6193311974531043, "grad_norm": 875.6754760742188, "learning_rate": 3.906218075587938e-06, "loss": 21.3687, "step": 306590 }, { "epoch": 0.6193513980857881, "grad_norm": 437.1156311035156, "learning_rate": 3.905877466950679e-06, "loss": 14.8848, "step": 306600 }, { "epoch": 0.6193715987184719, "grad_norm": 74.68804931640625, "learning_rate": 3.905536863646037e-06, "loss": 12.2746, "step": 306610 }, { "epoch": 0.6193917993511556, "grad_norm": 239.02340698242188, "learning_rate": 3.905196265675677e-06, "loss": 28.3326, "step": 306620 }, { "epoch": 0.6194119999838394, "grad_norm": 157.01031494140625, "learning_rate": 3.904855673041259e-06, "loss": 12.1806, "step": 306630 }, { "epoch": 0.6194322006165233, "grad_norm": 178.1280059814453, "learning_rate": 3.90451508574444e-06, "loss": 30.4363, "step": 306640 }, { "epoch": 0.6194524012492071, "grad_norm": 613.2965698242188, "learning_rate": 3.904174503786882e-06, "loss": 23.6172, "step": 306650 }, { "epoch": 0.6194726018818909, "grad_norm": 587.6942749023438, "learning_rate": 3.903833927170245e-06, "loss": 17.304, "step": 306660 }, { "epoch": 0.6194928025145747, "grad_norm": 779.6909790039062, "learning_rate": 3.9034933558961885e-06, "loss": 32.5964, "step": 306670 }, { "epoch": 0.6195130031472585, "grad_norm": 370.8109436035156, "learning_rate": 3.9031527899663705e-06, "loss": 31.931, "step": 306680 }, { "epoch": 0.6195332037799424, "grad_norm": 36.33906555175781, "learning_rate": 3.9028122293824535e-06, "loss": 13.0738, "step": 306690 }, { "epoch": 0.6195534044126262, "grad_norm": 881.9010620117188, "learning_rate": 3.902471674146099e-06, "loss": 20.5898, "step": 306700 }, { "epoch": 0.61957360504531, "grad_norm": 220.03152465820312, "learning_rate": 3.902131124258962e-06, "loss": 8.8976, "step": 306710 }, { "epoch": 0.6195938056779938, "grad_norm": 357.095947265625, "learning_rate": 3.901790579722706e-06, "loss": 29.167, "step": 306720 }, { "epoch": 0.6196140063106776, "grad_norm": 217.9332733154297, "learning_rate": 3.90145004053899e-06, "loss": 17.4131, "step": 306730 }, { "epoch": 0.6196342069433615, "grad_norm": 160.70803833007812, "learning_rate": 3.901109506709472e-06, "loss": 19.6984, "step": 306740 }, { "epoch": 0.6196544075760453, "grad_norm": 485.2276916503906, "learning_rate": 3.900768978235812e-06, "loss": 16.8355, "step": 306750 }, { "epoch": 0.6196746082087291, "grad_norm": 225.5886688232422, "learning_rate": 3.900428455119674e-06, "loss": 10.9822, "step": 306760 }, { "epoch": 0.6196948088414129, "grad_norm": 381.4754333496094, "learning_rate": 3.900087937362711e-06, "loss": 9.0045, "step": 306770 }, { "epoch": 0.6197150094740967, "grad_norm": 563.8507690429688, "learning_rate": 3.899747424966588e-06, "loss": 18.8555, "step": 306780 }, { "epoch": 0.6197352101067806, "grad_norm": 165.44664001464844, "learning_rate": 3.899406917932962e-06, "loss": 14.7919, "step": 306790 }, { "epoch": 0.6197554107394644, "grad_norm": 352.6623840332031, "learning_rate": 3.899066416263493e-06, "loss": 14.8232, "step": 306800 }, { "epoch": 0.6197756113721482, "grad_norm": 863.5376586914062, "learning_rate": 3.898725919959841e-06, "loss": 34.6785, "step": 306810 }, { "epoch": 0.619795812004832, "grad_norm": 593.6881713867188, "learning_rate": 3.898385429023666e-06, "loss": 13.6415, "step": 306820 }, { "epoch": 0.6198160126375158, "grad_norm": 652.6988525390625, "learning_rate": 3.898044943456626e-06, "loss": 30.2298, "step": 306830 }, { "epoch": 0.6198362132701997, "grad_norm": 423.9455261230469, "learning_rate": 3.89770446326038e-06, "loss": 38.2633, "step": 306840 }, { "epoch": 0.6198564139028835, "grad_norm": 132.27984619140625, "learning_rate": 3.89736398843659e-06, "loss": 22.5795, "step": 306850 }, { "epoch": 0.6198766145355673, "grad_norm": 302.6119689941406, "learning_rate": 3.897023518986915e-06, "loss": 13.0181, "step": 306860 }, { "epoch": 0.619896815168251, "grad_norm": 662.6363525390625, "learning_rate": 3.896683054913013e-06, "loss": 31.4033, "step": 306870 }, { "epoch": 0.6199170158009348, "grad_norm": 678.5469970703125, "learning_rate": 3.896342596216543e-06, "loss": 24.0122, "step": 306880 }, { "epoch": 0.6199372164336187, "grad_norm": 443.99639892578125, "learning_rate": 3.896002142899167e-06, "loss": 16.1085, "step": 306890 }, { "epoch": 0.6199574170663025, "grad_norm": 462.47393798828125, "learning_rate": 3.895661694962542e-06, "loss": 14.1491, "step": 306900 }, { "epoch": 0.6199776176989863, "grad_norm": 354.35906982421875, "learning_rate": 3.895321252408326e-06, "loss": 39.664, "step": 306910 }, { "epoch": 0.6199978183316701, "grad_norm": 326.7041320800781, "learning_rate": 3.894980815238184e-06, "loss": 11.0437, "step": 306920 }, { "epoch": 0.6200180189643539, "grad_norm": 410.8968811035156, "learning_rate": 3.894640383453769e-06, "loss": 15.1732, "step": 306930 }, { "epoch": 0.6200382195970378, "grad_norm": 319.5135192871094, "learning_rate": 3.894299957056743e-06, "loss": 26.3188, "step": 306940 }, { "epoch": 0.6200584202297216, "grad_norm": 248.26473999023438, "learning_rate": 3.8939595360487655e-06, "loss": 15.6178, "step": 306950 }, { "epoch": 0.6200786208624054, "grad_norm": 694.5548095703125, "learning_rate": 3.893619120431494e-06, "loss": 23.5753, "step": 306960 }, { "epoch": 0.6200988214950892, "grad_norm": 651.9447631835938, "learning_rate": 3.893278710206589e-06, "loss": 19.1723, "step": 306970 }, { "epoch": 0.620119022127773, "grad_norm": 243.91006469726562, "learning_rate": 3.892938305375712e-06, "loss": 20.8598, "step": 306980 }, { "epoch": 0.6201392227604569, "grad_norm": 360.2294006347656, "learning_rate": 3.892597905940516e-06, "loss": 16.3503, "step": 306990 }, { "epoch": 0.6201594233931407, "grad_norm": 479.9851989746094, "learning_rate": 3.892257511902664e-06, "loss": 17.4354, "step": 307000 }, { "epoch": 0.6201796240258245, "grad_norm": 254.92938232421875, "learning_rate": 3.891917123263815e-06, "loss": 10.1215, "step": 307010 }, { "epoch": 0.6201998246585083, "grad_norm": 357.59124755859375, "learning_rate": 3.891576740025628e-06, "loss": 37.9377, "step": 307020 }, { "epoch": 0.6202200252911921, "grad_norm": 23.786584854125977, "learning_rate": 3.891236362189761e-06, "loss": 12.9558, "step": 307030 }, { "epoch": 0.620240225923876, "grad_norm": 344.36334228515625, "learning_rate": 3.890895989757874e-06, "loss": 20.0871, "step": 307040 }, { "epoch": 0.6202604265565598, "grad_norm": 98.81551361083984, "learning_rate": 3.890555622731626e-06, "loss": 11.1531, "step": 307050 }, { "epoch": 0.6202806271892436, "grad_norm": 317.57373046875, "learning_rate": 3.890215261112674e-06, "loss": 11.9396, "step": 307060 }, { "epoch": 0.6203008278219274, "grad_norm": 446.8298034667969, "learning_rate": 3.889874904902678e-06, "loss": 13.6021, "step": 307070 }, { "epoch": 0.6203210284546112, "grad_norm": 1420.668701171875, "learning_rate": 3.889534554103299e-06, "loss": 28.161, "step": 307080 }, { "epoch": 0.620341229087295, "grad_norm": 562.9849853515625, "learning_rate": 3.889194208716192e-06, "loss": 23.4622, "step": 307090 }, { "epoch": 0.6203614297199789, "grad_norm": 553.1746215820312, "learning_rate": 3.888853868743018e-06, "loss": 18.4087, "step": 307100 }, { "epoch": 0.6203816303526627, "grad_norm": 381.9627990722656, "learning_rate": 3.888513534185438e-06, "loss": 20.6817, "step": 307110 }, { "epoch": 0.6204018309853465, "grad_norm": 84.61685943603516, "learning_rate": 3.888173205045105e-06, "loss": 17.944, "step": 307120 }, { "epoch": 0.6204220316180302, "grad_norm": 1.7519794702529907, "learning_rate": 3.887832881323681e-06, "loss": 14.0085, "step": 307130 }, { "epoch": 0.620442232250714, "grad_norm": 775.5814819335938, "learning_rate": 3.887492563022826e-06, "loss": 18.5329, "step": 307140 }, { "epoch": 0.6204624328833979, "grad_norm": 185.97178649902344, "learning_rate": 3.887152250144197e-06, "loss": 14.3087, "step": 307150 }, { "epoch": 0.6204826335160817, "grad_norm": 565.8896484375, "learning_rate": 3.886811942689453e-06, "loss": 35.2329, "step": 307160 }, { "epoch": 0.6205028341487655, "grad_norm": 564.121337890625, "learning_rate": 3.8864716406602525e-06, "loss": 16.3574, "step": 307170 }, { "epoch": 0.6205230347814493, "grad_norm": 712.0159912109375, "learning_rate": 3.886131344058255e-06, "loss": 19.8137, "step": 307180 }, { "epoch": 0.6205432354141331, "grad_norm": 13.809062957763672, "learning_rate": 3.8857910528851175e-06, "loss": 15.8435, "step": 307190 }, { "epoch": 0.620563436046817, "grad_norm": 517.502685546875, "learning_rate": 3.885450767142498e-06, "loss": 21.0594, "step": 307200 }, { "epoch": 0.6205836366795008, "grad_norm": 635.3877563476562, "learning_rate": 3.8851104868320595e-06, "loss": 31.6942, "step": 307210 }, { "epoch": 0.6206038373121846, "grad_norm": 230.7758026123047, "learning_rate": 3.884770211955454e-06, "loss": 19.3524, "step": 307220 }, { "epoch": 0.6206240379448684, "grad_norm": 414.101318359375, "learning_rate": 3.884429942514345e-06, "loss": 13.2517, "step": 307230 }, { "epoch": 0.6206442385775522, "grad_norm": 230.80601501464844, "learning_rate": 3.884089678510389e-06, "loss": 15.3668, "step": 307240 }, { "epoch": 0.6206644392102361, "grad_norm": 294.1860046386719, "learning_rate": 3.883749419945244e-06, "loss": 14.2085, "step": 307250 }, { "epoch": 0.6206846398429199, "grad_norm": 375.5740051269531, "learning_rate": 3.883409166820569e-06, "loss": 23.9223, "step": 307260 }, { "epoch": 0.6207048404756037, "grad_norm": 352.6200256347656, "learning_rate": 3.883068919138023e-06, "loss": 18.2123, "step": 307270 }, { "epoch": 0.6207250411082875, "grad_norm": 19.69367027282715, "learning_rate": 3.882728676899263e-06, "loss": 24.5393, "step": 307280 }, { "epoch": 0.6207452417409713, "grad_norm": 553.554931640625, "learning_rate": 3.882388440105947e-06, "loss": 21.5132, "step": 307290 }, { "epoch": 0.6207654423736552, "grad_norm": 261.3301696777344, "learning_rate": 3.882048208759735e-06, "loss": 24.2588, "step": 307300 }, { "epoch": 0.620785643006339, "grad_norm": 284.4010009765625, "learning_rate": 3.8817079828622855e-06, "loss": 21.4293, "step": 307310 }, { "epoch": 0.6208058436390228, "grad_norm": 109.50505065917969, "learning_rate": 3.881367762415255e-06, "loss": 10.3847, "step": 307320 }, { "epoch": 0.6208260442717066, "grad_norm": 1052.4429931640625, "learning_rate": 3.881027547420302e-06, "loss": 36.2355, "step": 307330 }, { "epoch": 0.6208462449043904, "grad_norm": 629.2431640625, "learning_rate": 3.880687337879086e-06, "loss": 15.3413, "step": 307340 }, { "epoch": 0.6208664455370743, "grad_norm": 250.07577514648438, "learning_rate": 3.880347133793263e-06, "loss": 19.1702, "step": 307350 }, { "epoch": 0.6208866461697581, "grad_norm": 201.0216522216797, "learning_rate": 3.880006935164491e-06, "loss": 11.6162, "step": 307360 }, { "epoch": 0.6209068468024419, "grad_norm": 322.825439453125, "learning_rate": 3.8796667419944335e-06, "loss": 15.8074, "step": 307370 }, { "epoch": 0.6209270474351257, "grad_norm": 298.6944274902344, "learning_rate": 3.87932655428474e-06, "loss": 22.8595, "step": 307380 }, { "epoch": 0.6209472480678094, "grad_norm": 602.3025512695312, "learning_rate": 3.878986372037074e-06, "loss": 18.5704, "step": 307390 }, { "epoch": 0.6209674487004933, "grad_norm": 183.25563049316406, "learning_rate": 3.8786461952530955e-06, "loss": 29.4538, "step": 307400 }, { "epoch": 0.6209876493331771, "grad_norm": 150.2943115234375, "learning_rate": 3.878306023934457e-06, "loss": 20.3949, "step": 307410 }, { "epoch": 0.6210078499658609, "grad_norm": 508.4177551269531, "learning_rate": 3.877965858082818e-06, "loss": 14.1169, "step": 307420 }, { "epoch": 0.6210280505985447, "grad_norm": 229.1527099609375, "learning_rate": 3.87762569769984e-06, "loss": 16.7883, "step": 307430 }, { "epoch": 0.6210482512312285, "grad_norm": 92.78300476074219, "learning_rate": 3.877285542787176e-06, "loss": 23.3407, "step": 307440 }, { "epoch": 0.6210684518639124, "grad_norm": 571.0462646484375, "learning_rate": 3.876945393346486e-06, "loss": 12.3129, "step": 307450 }, { "epoch": 0.6210886524965962, "grad_norm": 69.4331283569336, "learning_rate": 3.8766052493794286e-06, "loss": 13.6684, "step": 307460 }, { "epoch": 0.62110885312928, "grad_norm": 178.68429565429688, "learning_rate": 3.876265110887662e-06, "loss": 14.7824, "step": 307470 }, { "epoch": 0.6211290537619638, "grad_norm": 421.0312194824219, "learning_rate": 3.875924977872842e-06, "loss": 15.1686, "step": 307480 }, { "epoch": 0.6211492543946476, "grad_norm": 0.0, "learning_rate": 3.875584850336627e-06, "loss": 26.5098, "step": 307490 }, { "epoch": 0.6211694550273315, "grad_norm": 365.2734375, "learning_rate": 3.875244728280676e-06, "loss": 15.8003, "step": 307500 }, { "epoch": 0.6211896556600153, "grad_norm": 729.2528076171875, "learning_rate": 3.8749046117066455e-06, "loss": 11.6154, "step": 307510 }, { "epoch": 0.6212098562926991, "grad_norm": 625.0670166015625, "learning_rate": 3.874564500616192e-06, "loss": 15.0667, "step": 307520 }, { "epoch": 0.6212300569253829, "grad_norm": 0.0, "learning_rate": 3.874224395010977e-06, "loss": 11.4449, "step": 307530 }, { "epoch": 0.6212502575580667, "grad_norm": 430.8566589355469, "learning_rate": 3.873884294892654e-06, "loss": 19.7118, "step": 307540 }, { "epoch": 0.6212704581907506, "grad_norm": 661.2942504882812, "learning_rate": 3.873544200262882e-06, "loss": 15.6916, "step": 307550 }, { "epoch": 0.6212906588234344, "grad_norm": 253.31130981445312, "learning_rate": 3.873204111123321e-06, "loss": 15.5921, "step": 307560 }, { "epoch": 0.6213108594561182, "grad_norm": 159.25498962402344, "learning_rate": 3.872864027475626e-06, "loss": 19.4433, "step": 307570 }, { "epoch": 0.621331060088802, "grad_norm": 432.11834716796875, "learning_rate": 3.872523949321454e-06, "loss": 15.8293, "step": 307580 }, { "epoch": 0.6213512607214858, "grad_norm": 53.555694580078125, "learning_rate": 3.872183876662462e-06, "loss": 26.4413, "step": 307590 }, { "epoch": 0.6213714613541697, "grad_norm": 308.6258239746094, "learning_rate": 3.871843809500313e-06, "loss": 28.8189, "step": 307600 }, { "epoch": 0.6213916619868535, "grad_norm": 20.56621551513672, "learning_rate": 3.871503747836657e-06, "loss": 10.9581, "step": 307610 }, { "epoch": 0.6214118626195373, "grad_norm": 465.7938537597656, "learning_rate": 3.8711636916731566e-06, "loss": 17.9902, "step": 307620 }, { "epoch": 0.6214320632522211, "grad_norm": 476.42034912109375, "learning_rate": 3.870823641011467e-06, "loss": 21.1891, "step": 307630 }, { "epoch": 0.6214522638849048, "grad_norm": 242.0484161376953, "learning_rate": 3.870483595853246e-06, "loss": 27.705, "step": 307640 }, { "epoch": 0.6214724645175886, "grad_norm": 685.3764038085938, "learning_rate": 3.870143556200152e-06, "loss": 27.2119, "step": 307650 }, { "epoch": 0.6214926651502725, "grad_norm": 190.67459106445312, "learning_rate": 3.8698035220538404e-06, "loss": 12.5892, "step": 307660 }, { "epoch": 0.6215128657829563, "grad_norm": 3.617690324783325, "learning_rate": 3.869463493415969e-06, "loss": 28.9364, "step": 307670 }, { "epoch": 0.6215330664156401, "grad_norm": 232.81556701660156, "learning_rate": 3.869123470288195e-06, "loss": 20.8313, "step": 307680 }, { "epoch": 0.6215532670483239, "grad_norm": 785.2691650390625, "learning_rate": 3.868783452672177e-06, "loss": 18.9252, "step": 307690 }, { "epoch": 0.6215734676810077, "grad_norm": 150.1544647216797, "learning_rate": 3.868443440569571e-06, "loss": 22.7764, "step": 307700 }, { "epoch": 0.6215936683136916, "grad_norm": 529.515380859375, "learning_rate": 3.868103433982034e-06, "loss": 16.2962, "step": 307710 }, { "epoch": 0.6216138689463754, "grad_norm": 536.1862182617188, "learning_rate": 3.867763432911225e-06, "loss": 28.8676, "step": 307720 }, { "epoch": 0.6216340695790592, "grad_norm": 254.0311737060547, "learning_rate": 3.867423437358799e-06, "loss": 16.9141, "step": 307730 }, { "epoch": 0.621654270211743, "grad_norm": 218.4225616455078, "learning_rate": 3.867083447326413e-06, "loss": 19.5715, "step": 307740 }, { "epoch": 0.6216744708444268, "grad_norm": 485.4092102050781, "learning_rate": 3.866743462815724e-06, "loss": 13.436, "step": 307750 }, { "epoch": 0.6216946714771107, "grad_norm": 256.953857421875, "learning_rate": 3.866403483828392e-06, "loss": 11.0728, "step": 307760 }, { "epoch": 0.6217148721097945, "grad_norm": 264.557373046875, "learning_rate": 3.866063510366072e-06, "loss": 17.1047, "step": 307770 }, { "epoch": 0.6217350727424783, "grad_norm": 23.962257385253906, "learning_rate": 3.86572354243042e-06, "loss": 26.9807, "step": 307780 }, { "epoch": 0.6217552733751621, "grad_norm": 92.05508422851562, "learning_rate": 3.865383580023094e-06, "loss": 24.2598, "step": 307790 }, { "epoch": 0.6217754740078459, "grad_norm": 243.84164428710938, "learning_rate": 3.865043623145751e-06, "loss": 18.737, "step": 307800 }, { "epoch": 0.6217956746405298, "grad_norm": 205.98390197753906, "learning_rate": 3.864703671800047e-06, "loss": 22.9812, "step": 307810 }, { "epoch": 0.6218158752732136, "grad_norm": 365.3945617675781, "learning_rate": 3.8643637259876415e-06, "loss": 21.3828, "step": 307820 }, { "epoch": 0.6218360759058974, "grad_norm": 270.4975891113281, "learning_rate": 3.864023785710187e-06, "loss": 11.1547, "step": 307830 }, { "epoch": 0.6218562765385812, "grad_norm": 793.963623046875, "learning_rate": 3.863683850969343e-06, "loss": 19.8202, "step": 307840 }, { "epoch": 0.621876477171265, "grad_norm": 559.8876342773438, "learning_rate": 3.863343921766769e-06, "loss": 18.1483, "step": 307850 }, { "epoch": 0.6218966778039489, "grad_norm": 616.4458618164062, "learning_rate": 3.863003998104117e-06, "loss": 17.1349, "step": 307860 }, { "epoch": 0.6219168784366327, "grad_norm": 214.35891723632812, "learning_rate": 3.862664079983045e-06, "loss": 24.9051, "step": 307870 }, { "epoch": 0.6219370790693165, "grad_norm": 561.56494140625, "learning_rate": 3.862324167405212e-06, "loss": 26.8446, "step": 307880 }, { "epoch": 0.6219572797020003, "grad_norm": 651.3684692382812, "learning_rate": 3.8619842603722715e-06, "loss": 12.9932, "step": 307890 }, { "epoch": 0.621977480334684, "grad_norm": 356.9894104003906, "learning_rate": 3.86164435888588e-06, "loss": 27.2443, "step": 307900 }, { "epoch": 0.6219976809673678, "grad_norm": 55.86592483520508, "learning_rate": 3.861304462947698e-06, "loss": 10.004, "step": 307910 }, { "epoch": 0.6220178816000517, "grad_norm": 355.6819152832031, "learning_rate": 3.860964572559381e-06, "loss": 14.478, "step": 307920 }, { "epoch": 0.6220380822327355, "grad_norm": 8.241705894470215, "learning_rate": 3.860624687722583e-06, "loss": 15.1137, "step": 307930 }, { "epoch": 0.6220582828654193, "grad_norm": 602.1299438476562, "learning_rate": 3.860284808438962e-06, "loss": 15.2554, "step": 307940 }, { "epoch": 0.6220784834981031, "grad_norm": 548.4798583984375, "learning_rate": 3.859944934710177e-06, "loss": 23.1541, "step": 307950 }, { "epoch": 0.622098684130787, "grad_norm": 29.896265029907227, "learning_rate": 3.859605066537879e-06, "loss": 19.2662, "step": 307960 }, { "epoch": 0.6221188847634708, "grad_norm": 246.86521911621094, "learning_rate": 3.859265203923728e-06, "loss": 26.9573, "step": 307970 }, { "epoch": 0.6221390853961546, "grad_norm": 296.05096435546875, "learning_rate": 3.858925346869383e-06, "loss": 32.5155, "step": 307980 }, { "epoch": 0.6221592860288384, "grad_norm": 455.86322021484375, "learning_rate": 3.858585495376494e-06, "loss": 21.6094, "step": 307990 }, { "epoch": 0.6221794866615222, "grad_norm": 224.6339111328125, "learning_rate": 3.8582456494467214e-06, "loss": 18.4335, "step": 308000 }, { "epoch": 0.622199687294206, "grad_norm": 256.71368408203125, "learning_rate": 3.857905809081723e-06, "loss": 19.7549, "step": 308010 }, { "epoch": 0.6222198879268899, "grad_norm": 132.54017639160156, "learning_rate": 3.857565974283152e-06, "loss": 20.27, "step": 308020 }, { "epoch": 0.6222400885595737, "grad_norm": 639.8458251953125, "learning_rate": 3.857226145052665e-06, "loss": 18.2413, "step": 308030 }, { "epoch": 0.6222602891922575, "grad_norm": 36.40522384643555, "learning_rate": 3.856886321391919e-06, "loss": 16.8783, "step": 308040 }, { "epoch": 0.6222804898249413, "grad_norm": 183.32699584960938, "learning_rate": 3.856546503302573e-06, "loss": 12.8777, "step": 308050 }, { "epoch": 0.6223006904576251, "grad_norm": 393.983642578125, "learning_rate": 3.856206690786278e-06, "loss": 12.4519, "step": 308060 }, { "epoch": 0.622320891090309, "grad_norm": 348.84417724609375, "learning_rate": 3.8558668838446935e-06, "loss": 19.8369, "step": 308070 }, { "epoch": 0.6223410917229928, "grad_norm": 527.478515625, "learning_rate": 3.855527082479477e-06, "loss": 17.1344, "step": 308080 }, { "epoch": 0.6223612923556766, "grad_norm": 833.7142333984375, "learning_rate": 3.85518728669228e-06, "loss": 25.6628, "step": 308090 }, { "epoch": 0.6223814929883604, "grad_norm": 902.67431640625, "learning_rate": 3.854847496484762e-06, "loss": 26.7043, "step": 308100 }, { "epoch": 0.6224016936210442, "grad_norm": 354.50384521484375, "learning_rate": 3.85450771185858e-06, "loss": 17.5971, "step": 308110 }, { "epoch": 0.6224218942537281, "grad_norm": 440.0574035644531, "learning_rate": 3.854167932815387e-06, "loss": 21.2498, "step": 308120 }, { "epoch": 0.6224420948864119, "grad_norm": 260.6983337402344, "learning_rate": 3.85382815935684e-06, "loss": 15.5941, "step": 308130 }, { "epoch": 0.6224622955190957, "grad_norm": 99.20164489746094, "learning_rate": 3.853488391484599e-06, "loss": 20.4455, "step": 308140 }, { "epoch": 0.6224824961517794, "grad_norm": 410.4075012207031, "learning_rate": 3.853148629200312e-06, "loss": 18.4926, "step": 308150 }, { "epoch": 0.6225026967844632, "grad_norm": 401.4082336425781, "learning_rate": 3.852808872505642e-06, "loss": 17.5811, "step": 308160 }, { "epoch": 0.6225228974171471, "grad_norm": 375.050048828125, "learning_rate": 3.8524691214022425e-06, "loss": 26.6744, "step": 308170 }, { "epoch": 0.6225430980498309, "grad_norm": 225.84666442871094, "learning_rate": 3.8521293758917684e-06, "loss": 10.9746, "step": 308180 }, { "epoch": 0.6225632986825147, "grad_norm": 373.8145751953125, "learning_rate": 3.851789635975877e-06, "loss": 21.9365, "step": 308190 }, { "epoch": 0.6225834993151985, "grad_norm": 219.49147033691406, "learning_rate": 3.8514499016562216e-06, "loss": 23.9357, "step": 308200 }, { "epoch": 0.6226036999478823, "grad_norm": 391.24957275390625, "learning_rate": 3.851110172934463e-06, "loss": 15.7127, "step": 308210 }, { "epoch": 0.6226239005805662, "grad_norm": 306.0335388183594, "learning_rate": 3.850770449812252e-06, "loss": 14.8573, "step": 308220 }, { "epoch": 0.62264410121325, "grad_norm": 827.5209350585938, "learning_rate": 3.850430732291248e-06, "loss": 17.0767, "step": 308230 }, { "epoch": 0.6226643018459338, "grad_norm": 101.06029510498047, "learning_rate": 3.850091020373105e-06, "loss": 16.6881, "step": 308240 }, { "epoch": 0.6226845024786176, "grad_norm": 448.7660827636719, "learning_rate": 3.849751314059479e-06, "loss": 30.5649, "step": 308250 }, { "epoch": 0.6227047031113014, "grad_norm": 690.1036987304688, "learning_rate": 3.849411613352024e-06, "loss": 17.4533, "step": 308260 }, { "epoch": 0.6227249037439853, "grad_norm": 189.1039276123047, "learning_rate": 3.8490719182524e-06, "loss": 11.6213, "step": 308270 }, { "epoch": 0.6227451043766691, "grad_norm": 436.6559143066406, "learning_rate": 3.848732228762257e-06, "loss": 14.4833, "step": 308280 }, { "epoch": 0.6227653050093529, "grad_norm": 481.327392578125, "learning_rate": 3.848392544883254e-06, "loss": 40.662, "step": 308290 }, { "epoch": 0.6227855056420367, "grad_norm": 279.5513916015625, "learning_rate": 3.8480528666170495e-06, "loss": 17.9576, "step": 308300 }, { "epoch": 0.6228057062747205, "grad_norm": 109.25330352783203, "learning_rate": 3.847713193965291e-06, "loss": 16.0483, "step": 308310 }, { "epoch": 0.6228259069074044, "grad_norm": 411.5627136230469, "learning_rate": 3.84737352692964e-06, "loss": 15.8277, "step": 308320 }, { "epoch": 0.6228461075400882, "grad_norm": 320.0434875488281, "learning_rate": 3.847033865511752e-06, "loss": 20.7567, "step": 308330 }, { "epoch": 0.622866308172772, "grad_norm": 234.54380798339844, "learning_rate": 3.84669420971328e-06, "loss": 9.6087, "step": 308340 }, { "epoch": 0.6228865088054558, "grad_norm": 503.6078796386719, "learning_rate": 3.846354559535881e-06, "loss": 29.8908, "step": 308350 }, { "epoch": 0.6229067094381396, "grad_norm": 79.07080841064453, "learning_rate": 3.846014914981209e-06, "loss": 22.0454, "step": 308360 }, { "epoch": 0.6229269100708235, "grad_norm": 222.7816925048828, "learning_rate": 3.845675276050923e-06, "loss": 34.9026, "step": 308370 }, { "epoch": 0.6229471107035073, "grad_norm": 399.2646484375, "learning_rate": 3.845335642746672e-06, "loss": 13.7178, "step": 308380 }, { "epoch": 0.6229673113361911, "grad_norm": 372.23077392578125, "learning_rate": 3.8449960150701175e-06, "loss": 13.8694, "step": 308390 }, { "epoch": 0.6229875119688749, "grad_norm": 367.197509765625, "learning_rate": 3.844656393022912e-06, "loss": 11.3064, "step": 308400 }, { "epoch": 0.6230077126015586, "grad_norm": 295.2317199707031, "learning_rate": 3.84431677660671e-06, "loss": 13.8095, "step": 308410 }, { "epoch": 0.6230279132342424, "grad_norm": 685.9607543945312, "learning_rate": 3.843977165823169e-06, "loss": 21.6459, "step": 308420 }, { "epoch": 0.6230481138669263, "grad_norm": 554.89794921875, "learning_rate": 3.843637560673943e-06, "loss": 13.4703, "step": 308430 }, { "epoch": 0.6230683144996101, "grad_norm": 56.357337951660156, "learning_rate": 3.843297961160686e-06, "loss": 14.792, "step": 308440 }, { "epoch": 0.6230885151322939, "grad_norm": 233.34291076660156, "learning_rate": 3.842958367285056e-06, "loss": 22.8048, "step": 308450 }, { "epoch": 0.6231087157649777, "grad_norm": 790.2981567382812, "learning_rate": 3.842618779048706e-06, "loss": 31.7122, "step": 308460 }, { "epoch": 0.6231289163976615, "grad_norm": 167.50323486328125, "learning_rate": 3.842279196453292e-06, "loss": 13.0795, "step": 308470 }, { "epoch": 0.6231491170303454, "grad_norm": 189.74818420410156, "learning_rate": 3.841939619500468e-06, "loss": 29.6577, "step": 308480 }, { "epoch": 0.6231693176630292, "grad_norm": 196.10964965820312, "learning_rate": 3.841600048191891e-06, "loss": 13.5237, "step": 308490 }, { "epoch": 0.623189518295713, "grad_norm": 686.2725219726562, "learning_rate": 3.841260482529215e-06, "loss": 18.2192, "step": 308500 }, { "epoch": 0.6232097189283968, "grad_norm": 2298.264404296875, "learning_rate": 3.8409209225140925e-06, "loss": 12.5312, "step": 308510 }, { "epoch": 0.6232299195610806, "grad_norm": 201.07447814941406, "learning_rate": 3.840581368148182e-06, "loss": 25.0888, "step": 308520 }, { "epoch": 0.6232501201937645, "grad_norm": 0.0, "learning_rate": 3.840241819433139e-06, "loss": 18.6599, "step": 308530 }, { "epoch": 0.6232703208264483, "grad_norm": 283.84600830078125, "learning_rate": 3.839902276370615e-06, "loss": 18.5898, "step": 308540 }, { "epoch": 0.6232905214591321, "grad_norm": 341.5215759277344, "learning_rate": 3.839562738962267e-06, "loss": 16.1746, "step": 308550 }, { "epoch": 0.6233107220918159, "grad_norm": 183.77569580078125, "learning_rate": 3.83922320720975e-06, "loss": 8.8788, "step": 308560 }, { "epoch": 0.6233309227244997, "grad_norm": 322.90435791015625, "learning_rate": 3.838883681114718e-06, "loss": 19.1294, "step": 308570 }, { "epoch": 0.6233511233571836, "grad_norm": 507.79547119140625, "learning_rate": 3.838544160678824e-06, "loss": 10.0477, "step": 308580 }, { "epoch": 0.6233713239898674, "grad_norm": 461.77557373046875, "learning_rate": 3.838204645903729e-06, "loss": 34.7119, "step": 308590 }, { "epoch": 0.6233915246225512, "grad_norm": 84.36308288574219, "learning_rate": 3.83786513679108e-06, "loss": 13.4838, "step": 308600 }, { "epoch": 0.623411725255235, "grad_norm": 264.8419494628906, "learning_rate": 3.837525633342537e-06, "loss": 19.1045, "step": 308610 }, { "epoch": 0.6234319258879188, "grad_norm": 530.059814453125, "learning_rate": 3.837186135559754e-06, "loss": 30.9889, "step": 308620 }, { "epoch": 0.6234521265206027, "grad_norm": 306.8770751953125, "learning_rate": 3.836846643444383e-06, "loss": 20.6282, "step": 308630 }, { "epoch": 0.6234723271532865, "grad_norm": 247.51690673828125, "learning_rate": 3.836507156998081e-06, "loss": 28.366, "step": 308640 }, { "epoch": 0.6234925277859703, "grad_norm": 301.9272766113281, "learning_rate": 3.836167676222501e-06, "loss": 22.2943, "step": 308650 }, { "epoch": 0.6235127284186541, "grad_norm": 432.4485168457031, "learning_rate": 3.835828201119302e-06, "loss": 19.5995, "step": 308660 }, { "epoch": 0.6235329290513378, "grad_norm": 429.926025390625, "learning_rate": 3.835488731690131e-06, "loss": 34.4836, "step": 308670 }, { "epoch": 0.6235531296840217, "grad_norm": 627.3590698242188, "learning_rate": 3.835149267936649e-06, "loss": 17.1499, "step": 308680 }, { "epoch": 0.6235733303167055, "grad_norm": 121.15222930908203, "learning_rate": 3.834809809860508e-06, "loss": 21.2045, "step": 308690 }, { "epoch": 0.6235935309493893, "grad_norm": 364.7321472167969, "learning_rate": 3.834470357463362e-06, "loss": 15.5802, "step": 308700 }, { "epoch": 0.6236137315820731, "grad_norm": 205.34031677246094, "learning_rate": 3.834130910746866e-06, "loss": 16.2231, "step": 308710 }, { "epoch": 0.6236339322147569, "grad_norm": 555.9500732421875, "learning_rate": 3.833791469712676e-06, "loss": 21.4223, "step": 308720 }, { "epoch": 0.6236541328474408, "grad_norm": 374.8460388183594, "learning_rate": 3.833452034362444e-06, "loss": 29.5215, "step": 308730 }, { "epoch": 0.6236743334801246, "grad_norm": 712.3816528320312, "learning_rate": 3.833112604697824e-06, "loss": 18.2273, "step": 308740 }, { "epoch": 0.6236945341128084, "grad_norm": 773.9443969726562, "learning_rate": 3.832773180720475e-06, "loss": 17.7833, "step": 308750 }, { "epoch": 0.6237147347454922, "grad_norm": 219.06417846679688, "learning_rate": 3.832433762432044e-06, "loss": 21.787, "step": 308760 }, { "epoch": 0.623734935378176, "grad_norm": 337.8908386230469, "learning_rate": 3.832094349834191e-06, "loss": 20.1779, "step": 308770 }, { "epoch": 0.6237551360108599, "grad_norm": 743.0489501953125, "learning_rate": 3.83175494292857e-06, "loss": 17.6274, "step": 308780 }, { "epoch": 0.6237753366435437, "grad_norm": 373.329833984375, "learning_rate": 3.831415541716831e-06, "loss": 14.9177, "step": 308790 }, { "epoch": 0.6237955372762275, "grad_norm": 205.29086303710938, "learning_rate": 3.831076146200633e-06, "loss": 12.49, "step": 308800 }, { "epoch": 0.6238157379089113, "grad_norm": 265.4069519042969, "learning_rate": 3.830736756381626e-06, "loss": 18.5815, "step": 308810 }, { "epoch": 0.6238359385415951, "grad_norm": 422.3825378417969, "learning_rate": 3.830397372261469e-06, "loss": 15.7553, "step": 308820 }, { "epoch": 0.623856139174279, "grad_norm": 391.0090637207031, "learning_rate": 3.8300579938418105e-06, "loss": 15.3095, "step": 308830 }, { "epoch": 0.6238763398069628, "grad_norm": 315.3426513671875, "learning_rate": 3.8297186211243085e-06, "loss": 17.8949, "step": 308840 }, { "epoch": 0.6238965404396466, "grad_norm": 0.0, "learning_rate": 3.829379254110617e-06, "loss": 28.5212, "step": 308850 }, { "epoch": 0.6239167410723304, "grad_norm": 1928.339599609375, "learning_rate": 3.829039892802388e-06, "loss": 29.9941, "step": 308860 }, { "epoch": 0.6239369417050142, "grad_norm": 159.69752502441406, "learning_rate": 3.828700537201277e-06, "loss": 6.9251, "step": 308870 }, { "epoch": 0.6239571423376981, "grad_norm": 973.2032470703125, "learning_rate": 3.828361187308938e-06, "loss": 27.3199, "step": 308880 }, { "epoch": 0.6239773429703819, "grad_norm": 399.58697509765625, "learning_rate": 3.828021843127024e-06, "loss": 21.6232, "step": 308890 }, { "epoch": 0.6239975436030657, "grad_norm": 173.24356079101562, "learning_rate": 3.827682504657187e-06, "loss": 16.9302, "step": 308900 }, { "epoch": 0.6240177442357495, "grad_norm": 319.8014221191406, "learning_rate": 3.827343171901089e-06, "loss": 10.0086, "step": 308910 }, { "epoch": 0.6240379448684332, "grad_norm": 644.0597534179688, "learning_rate": 3.827003844860373e-06, "loss": 37.1336, "step": 308920 }, { "epoch": 0.624058145501117, "grad_norm": 12.235321998596191, "learning_rate": 3.8266645235367e-06, "loss": 10.2737, "step": 308930 }, { "epoch": 0.6240783461338009, "grad_norm": 857.0070190429688, "learning_rate": 3.826325207931722e-06, "loss": 33.6433, "step": 308940 }, { "epoch": 0.6240985467664847, "grad_norm": 195.88980102539062, "learning_rate": 3.825985898047091e-06, "loss": 27.6858, "step": 308950 }, { "epoch": 0.6241187473991685, "grad_norm": 135.80322265625, "learning_rate": 3.8256465938844635e-06, "loss": 25.9452, "step": 308960 }, { "epoch": 0.6241389480318523, "grad_norm": 196.93142700195312, "learning_rate": 3.8253072954454905e-06, "loss": 16.756, "step": 308970 }, { "epoch": 0.6241591486645361, "grad_norm": 171.48313903808594, "learning_rate": 3.824968002731831e-06, "loss": 23.1062, "step": 308980 }, { "epoch": 0.62417934929722, "grad_norm": 144.01405334472656, "learning_rate": 3.824628715745131e-06, "loss": 19.0789, "step": 308990 }, { "epoch": 0.6241995499299038, "grad_norm": 355.7646179199219, "learning_rate": 3.82428943448705e-06, "loss": 15.1852, "step": 309000 }, { "epoch": 0.6242197505625876, "grad_norm": 198.680419921875, "learning_rate": 3.82395015895924e-06, "loss": 23.6251, "step": 309010 }, { "epoch": 0.6242399511952714, "grad_norm": 581.5150756835938, "learning_rate": 3.823610889163354e-06, "loss": 20.0148, "step": 309020 }, { "epoch": 0.6242601518279552, "grad_norm": 468.5245666503906, "learning_rate": 3.823271625101045e-06, "loss": 14.7354, "step": 309030 }, { "epoch": 0.6242803524606391, "grad_norm": 198.03665161132812, "learning_rate": 3.822932366773969e-06, "loss": 14.4478, "step": 309040 }, { "epoch": 0.6243005530933229, "grad_norm": 324.575927734375, "learning_rate": 3.822593114183777e-06, "loss": 16.6424, "step": 309050 }, { "epoch": 0.6243207537260067, "grad_norm": 225.9619903564453, "learning_rate": 3.822253867332122e-06, "loss": 22.4816, "step": 309060 }, { "epoch": 0.6243409543586905, "grad_norm": 278.31353759765625, "learning_rate": 3.821914626220661e-06, "loss": 13.9285, "step": 309070 }, { "epoch": 0.6243611549913743, "grad_norm": 112.58853912353516, "learning_rate": 3.8215753908510435e-06, "loss": 19.6135, "step": 309080 }, { "epoch": 0.6243813556240582, "grad_norm": 440.0562744140625, "learning_rate": 3.8212361612249255e-06, "loss": 16.0825, "step": 309090 }, { "epoch": 0.624401556256742, "grad_norm": 377.1228942871094, "learning_rate": 3.820896937343959e-06, "loss": 30.0899, "step": 309100 }, { "epoch": 0.6244217568894258, "grad_norm": 407.56207275390625, "learning_rate": 3.820557719209799e-06, "loss": 14.0827, "step": 309110 }, { "epoch": 0.6244419575221096, "grad_norm": 263.2430114746094, "learning_rate": 3.820218506824096e-06, "loss": 6.6251, "step": 309120 }, { "epoch": 0.6244621581547934, "grad_norm": 319.5250549316406, "learning_rate": 3.819879300188505e-06, "loss": 19.9556, "step": 309130 }, { "epoch": 0.6244823587874773, "grad_norm": 184.48289489746094, "learning_rate": 3.8195400993046815e-06, "loss": 16.9671, "step": 309140 }, { "epoch": 0.6245025594201611, "grad_norm": 173.50814819335938, "learning_rate": 3.819200904174274e-06, "loss": 15.9102, "step": 309150 }, { "epoch": 0.6245227600528449, "grad_norm": 686.6409912109375, "learning_rate": 3.818861714798939e-06, "loss": 37.288, "step": 309160 }, { "epoch": 0.6245429606855287, "grad_norm": 157.87832641601562, "learning_rate": 3.8185225311803295e-06, "loss": 10.6197, "step": 309170 }, { "epoch": 0.6245631613182124, "grad_norm": 201.24237060546875, "learning_rate": 3.8181833533200965e-06, "loss": 9.6761, "step": 309180 }, { "epoch": 0.6245833619508963, "grad_norm": 208.93910217285156, "learning_rate": 3.817844181219893e-06, "loss": 42.3918, "step": 309190 }, { "epoch": 0.6246035625835801, "grad_norm": 808.4969482421875, "learning_rate": 3.817505014881378e-06, "loss": 17.2931, "step": 309200 }, { "epoch": 0.6246237632162639, "grad_norm": 677.9012451171875, "learning_rate": 3.817165854306197e-06, "loss": 10.4501, "step": 309210 }, { "epoch": 0.6246439638489477, "grad_norm": 765.8230590820312, "learning_rate": 3.816826699496006e-06, "loss": 26.1429, "step": 309220 }, { "epoch": 0.6246641644816315, "grad_norm": 210.09230041503906, "learning_rate": 3.81648755045246e-06, "loss": 18.1209, "step": 309230 }, { "epoch": 0.6246843651143154, "grad_norm": 301.4053649902344, "learning_rate": 3.816148407177209e-06, "loss": 17.2816, "step": 309240 }, { "epoch": 0.6247045657469992, "grad_norm": 409.62109375, "learning_rate": 3.815809269671908e-06, "loss": 14.4398, "step": 309250 }, { "epoch": 0.624724766379683, "grad_norm": 247.8682098388672, "learning_rate": 3.8154701379382064e-06, "loss": 21.5997, "step": 309260 }, { "epoch": 0.6247449670123668, "grad_norm": 464.99627685546875, "learning_rate": 3.815131011977763e-06, "loss": 20.3094, "step": 309270 }, { "epoch": 0.6247651676450506, "grad_norm": 595.7064819335938, "learning_rate": 3.814791891792225e-06, "loss": 24.8131, "step": 309280 }, { "epoch": 0.6247853682777345, "grad_norm": 353.93017578125, "learning_rate": 3.814452777383248e-06, "loss": 17.6499, "step": 309290 }, { "epoch": 0.6248055689104183, "grad_norm": 65.7148666381836, "learning_rate": 3.814113668752486e-06, "loss": 18.4265, "step": 309300 }, { "epoch": 0.6248257695431021, "grad_norm": 394.8728942871094, "learning_rate": 3.8137745659015884e-06, "loss": 18.7722, "step": 309310 }, { "epoch": 0.6248459701757859, "grad_norm": 6.185215473175049, "learning_rate": 3.81343546883221e-06, "loss": 16.4115, "step": 309320 }, { "epoch": 0.6248661708084697, "grad_norm": 838.2324829101562, "learning_rate": 3.8130963775460045e-06, "loss": 22.9985, "step": 309330 }, { "epoch": 0.6248863714411536, "grad_norm": 232.22332763671875, "learning_rate": 3.812757292044622e-06, "loss": 13.2857, "step": 309340 }, { "epoch": 0.6249065720738374, "grad_norm": 90.10667419433594, "learning_rate": 3.8124182123297153e-06, "loss": 17.6612, "step": 309350 }, { "epoch": 0.6249267727065212, "grad_norm": 521.38037109375, "learning_rate": 3.8120791384029414e-06, "loss": 18.6967, "step": 309360 }, { "epoch": 0.624946973339205, "grad_norm": 755.168701171875, "learning_rate": 3.811740070265947e-06, "loss": 15.8787, "step": 309370 }, { "epoch": 0.6249671739718888, "grad_norm": 355.4938659667969, "learning_rate": 3.8114010079203877e-06, "loss": 24.3521, "step": 309380 }, { "epoch": 0.6249873746045727, "grad_norm": 1087.0955810546875, "learning_rate": 3.8110619513679176e-06, "loss": 15.2451, "step": 309390 }, { "epoch": 0.6250075752372565, "grad_norm": 869.311767578125, "learning_rate": 3.810722900610186e-06, "loss": 25.5715, "step": 309400 }, { "epoch": 0.6250277758699403, "grad_norm": 234.51182556152344, "learning_rate": 3.8103838556488467e-06, "loss": 18.2496, "step": 309410 }, { "epoch": 0.6250479765026241, "grad_norm": 622.8326416015625, "learning_rate": 3.810044816485551e-06, "loss": 28.3993, "step": 309420 }, { "epoch": 0.6250681771353078, "grad_norm": 56.3055419921875, "learning_rate": 3.809705783121956e-06, "loss": 25.9545, "step": 309430 }, { "epoch": 0.6250883777679916, "grad_norm": 138.11941528320312, "learning_rate": 3.8093667555597068e-06, "loss": 20.2521, "step": 309440 }, { "epoch": 0.6251085784006755, "grad_norm": 470.74615478515625, "learning_rate": 3.809027733800461e-06, "loss": 13.6337, "step": 309450 }, { "epoch": 0.6251287790333593, "grad_norm": 838.5234985351562, "learning_rate": 3.808688717845871e-06, "loss": 19.7129, "step": 309460 }, { "epoch": 0.6251489796660431, "grad_norm": 184.8392791748047, "learning_rate": 3.8083497076975863e-06, "loss": 16.6563, "step": 309470 }, { "epoch": 0.6251691802987269, "grad_norm": 320.599365234375, "learning_rate": 3.808010703357261e-06, "loss": 24.2884, "step": 309480 }, { "epoch": 0.6251893809314107, "grad_norm": 146.20480346679688, "learning_rate": 3.8076717048265477e-06, "loss": 14.4137, "step": 309490 }, { "epoch": 0.6252095815640946, "grad_norm": 42.37262725830078, "learning_rate": 3.8073327121070968e-06, "loss": 31.002, "step": 309500 }, { "epoch": 0.6252297821967784, "grad_norm": 226.96316528320312, "learning_rate": 3.8069937252005606e-06, "loss": 21.3917, "step": 309510 }, { "epoch": 0.6252499828294622, "grad_norm": 541.5800170898438, "learning_rate": 3.8066547441085956e-06, "loss": 30.4584, "step": 309520 }, { "epoch": 0.625270183462146, "grad_norm": 1380.770751953125, "learning_rate": 3.806315768832847e-06, "loss": 44.5262, "step": 309530 }, { "epoch": 0.6252903840948298, "grad_norm": 390.832275390625, "learning_rate": 3.805976799374972e-06, "loss": 43.467, "step": 309540 }, { "epoch": 0.6253105847275137, "grad_norm": 647.9263305664062, "learning_rate": 3.8056378357366224e-06, "loss": 20.2768, "step": 309550 }, { "epoch": 0.6253307853601975, "grad_norm": 167.531005859375, "learning_rate": 3.8052988779194478e-06, "loss": 7.7091, "step": 309560 }, { "epoch": 0.6253509859928813, "grad_norm": 307.0677490234375, "learning_rate": 3.804959925925102e-06, "loss": 16.0153, "step": 309570 }, { "epoch": 0.6253711866255651, "grad_norm": 326.3149719238281, "learning_rate": 3.8046209797552353e-06, "loss": 10.6156, "step": 309580 }, { "epoch": 0.6253913872582489, "grad_norm": 71.02802276611328, "learning_rate": 3.804282039411504e-06, "loss": 25.8399, "step": 309590 }, { "epoch": 0.6254115878909328, "grad_norm": 35.13526153564453, "learning_rate": 3.8039431048955537e-06, "loss": 24.6193, "step": 309600 }, { "epoch": 0.6254317885236166, "grad_norm": 987.6118774414062, "learning_rate": 3.8036041762090416e-06, "loss": 12.8252, "step": 309610 }, { "epoch": 0.6254519891563004, "grad_norm": 401.3702087402344, "learning_rate": 3.8032652533536173e-06, "loss": 21.9011, "step": 309620 }, { "epoch": 0.6254721897889842, "grad_norm": 359.6427917480469, "learning_rate": 3.802926336330933e-06, "loss": 22.5889, "step": 309630 }, { "epoch": 0.625492390421668, "grad_norm": 8.788739204406738, "learning_rate": 3.80258742514264e-06, "loss": 9.3963, "step": 309640 }, { "epoch": 0.6255125910543519, "grad_norm": 454.0259094238281, "learning_rate": 3.8022485197903924e-06, "loss": 15.8655, "step": 309650 }, { "epoch": 0.6255327916870357, "grad_norm": 294.662109375, "learning_rate": 3.801909620275839e-06, "loss": 26.1775, "step": 309660 }, { "epoch": 0.6255529923197195, "grad_norm": 452.68206787109375, "learning_rate": 3.8015707266006307e-06, "loss": 32.2405, "step": 309670 }, { "epoch": 0.6255731929524033, "grad_norm": 565.408203125, "learning_rate": 3.801231838766425e-06, "loss": 21.2661, "step": 309680 }, { "epoch": 0.625593393585087, "grad_norm": 261.2580871582031, "learning_rate": 3.8008929567748676e-06, "loss": 23.6187, "step": 309690 }, { "epoch": 0.6256135942177709, "grad_norm": 815.6544799804688, "learning_rate": 3.8005540806276132e-06, "loss": 24.3779, "step": 309700 }, { "epoch": 0.6256337948504547, "grad_norm": 316.2920227050781, "learning_rate": 3.800215210326312e-06, "loss": 25.2519, "step": 309710 }, { "epoch": 0.6256539954831385, "grad_norm": 341.334716796875, "learning_rate": 3.7998763458726183e-06, "loss": 20.4134, "step": 309720 }, { "epoch": 0.6256741961158223, "grad_norm": 85.28587341308594, "learning_rate": 3.79953748726818e-06, "loss": 21.7302, "step": 309730 }, { "epoch": 0.6256943967485061, "grad_norm": 273.3590393066406, "learning_rate": 3.7991986345146503e-06, "loss": 22.2922, "step": 309740 }, { "epoch": 0.62571459738119, "grad_norm": 473.0509338378906, "learning_rate": 3.798859787613682e-06, "loss": 24.1152, "step": 309750 }, { "epoch": 0.6257347980138738, "grad_norm": 535.754150390625, "learning_rate": 3.7985209465669248e-06, "loss": 28.4428, "step": 309760 }, { "epoch": 0.6257549986465576, "grad_norm": 671.6353759765625, "learning_rate": 3.7981821113760305e-06, "loss": 15.1593, "step": 309770 }, { "epoch": 0.6257751992792414, "grad_norm": 219.89952087402344, "learning_rate": 3.797843282042652e-06, "loss": 11.1584, "step": 309780 }, { "epoch": 0.6257953999119252, "grad_norm": 437.75531005859375, "learning_rate": 3.7975044585684382e-06, "loss": 10.5002, "step": 309790 }, { "epoch": 0.625815600544609, "grad_norm": 177.61224365234375, "learning_rate": 3.797165640955041e-06, "loss": 23.1008, "step": 309800 }, { "epoch": 0.6258358011772929, "grad_norm": 311.1289978027344, "learning_rate": 3.796826829204116e-06, "loss": 22.5967, "step": 309810 }, { "epoch": 0.6258560018099767, "grad_norm": 255.26515197753906, "learning_rate": 3.796488023317308e-06, "loss": 24.2938, "step": 309820 }, { "epoch": 0.6258762024426605, "grad_norm": 243.3842315673828, "learning_rate": 3.796149223296272e-06, "loss": 11.8302, "step": 309830 }, { "epoch": 0.6258964030753443, "grad_norm": 488.80859375, "learning_rate": 3.79581042914266e-06, "loss": 32.4117, "step": 309840 }, { "epoch": 0.6259166037080282, "grad_norm": 558.3318481445312, "learning_rate": 3.7954716408581206e-06, "loss": 21.5674, "step": 309850 }, { "epoch": 0.625936804340712, "grad_norm": 383.96905517578125, "learning_rate": 3.7951328584443063e-06, "loss": 16.1835, "step": 309860 }, { "epoch": 0.6259570049733958, "grad_norm": 351.1549377441406, "learning_rate": 3.7947940819028678e-06, "loss": 16.7501, "step": 309870 }, { "epoch": 0.6259772056060796, "grad_norm": 608.1884155273438, "learning_rate": 3.79445531123546e-06, "loss": 18.2358, "step": 309880 }, { "epoch": 0.6259974062387634, "grad_norm": 593.22021484375, "learning_rate": 3.794116546443727e-06, "loss": 21.3049, "step": 309890 }, { "epoch": 0.6260176068714473, "grad_norm": 402.9985046386719, "learning_rate": 3.793777787529325e-06, "loss": 18.023, "step": 309900 }, { "epoch": 0.6260378075041311, "grad_norm": 287.7555236816406, "learning_rate": 3.793439034493905e-06, "loss": 28.1099, "step": 309910 }, { "epoch": 0.6260580081368149, "grad_norm": 412.591796875, "learning_rate": 3.7931002873391156e-06, "loss": 16.8485, "step": 309920 }, { "epoch": 0.6260782087694987, "grad_norm": 298.6705322265625, "learning_rate": 3.792761546066609e-06, "loss": 10.4336, "step": 309930 }, { "epoch": 0.6260984094021824, "grad_norm": 157.7567901611328, "learning_rate": 3.792422810678037e-06, "loss": 9.5392, "step": 309940 }, { "epoch": 0.6261186100348662, "grad_norm": 10.429789543151855, "learning_rate": 3.7920840811750485e-06, "loss": 10.1944, "step": 309950 }, { "epoch": 0.6261388106675501, "grad_norm": 173.67898559570312, "learning_rate": 3.7917453575592956e-06, "loss": 8.241, "step": 309960 }, { "epoch": 0.6261590113002339, "grad_norm": 150.3766326904297, "learning_rate": 3.7914066398324317e-06, "loss": 15.8893, "step": 309970 }, { "epoch": 0.6261792119329177, "grad_norm": 94.3939208984375, "learning_rate": 3.7910679279961025e-06, "loss": 18.6726, "step": 309980 }, { "epoch": 0.6261994125656015, "grad_norm": 316.7868347167969, "learning_rate": 3.790729222051962e-06, "loss": 19.7568, "step": 309990 }, { "epoch": 0.6262196131982853, "grad_norm": 248.9897003173828, "learning_rate": 3.790390522001662e-06, "loss": 27.3872, "step": 310000 }, { "epoch": 0.6262398138309692, "grad_norm": 206.1515350341797, "learning_rate": 3.790051827846851e-06, "loss": 24.9463, "step": 310010 }, { "epoch": 0.626260014463653, "grad_norm": 340.69610595703125, "learning_rate": 3.789713139589181e-06, "loss": 27.058, "step": 310020 }, { "epoch": 0.6262802150963368, "grad_norm": 232.6924591064453, "learning_rate": 3.789374457230301e-06, "loss": 15.3988, "step": 310030 }, { "epoch": 0.6263004157290206, "grad_norm": 541.9817504882812, "learning_rate": 3.789035780771866e-06, "loss": 36.8641, "step": 310040 }, { "epoch": 0.6263206163617044, "grad_norm": 483.5481262207031, "learning_rate": 3.7886971102155205e-06, "loss": 48.6675, "step": 310050 }, { "epoch": 0.6263408169943883, "grad_norm": 46.497520446777344, "learning_rate": 3.78835844556292e-06, "loss": 15.9449, "step": 310060 }, { "epoch": 0.6263610176270721, "grad_norm": 92.47747039794922, "learning_rate": 3.7880197868157143e-06, "loss": 15.4105, "step": 310070 }, { "epoch": 0.6263812182597559, "grad_norm": 259.61871337890625, "learning_rate": 3.7876811339755522e-06, "loss": 17.3801, "step": 310080 }, { "epoch": 0.6264014188924397, "grad_norm": 244.34396362304688, "learning_rate": 3.7873424870440845e-06, "loss": 21.6106, "step": 310090 }, { "epoch": 0.6264216195251235, "grad_norm": 634.0042114257812, "learning_rate": 3.787003846022964e-06, "loss": 32.435, "step": 310100 }, { "epoch": 0.6264418201578074, "grad_norm": 480.94622802734375, "learning_rate": 3.786665210913839e-06, "loss": 15.1496, "step": 310110 }, { "epoch": 0.6264620207904912, "grad_norm": 491.8776550292969, "learning_rate": 3.786326581718359e-06, "loss": 15.3441, "step": 310120 }, { "epoch": 0.626482221423175, "grad_norm": 540.437255859375, "learning_rate": 3.785987958438179e-06, "loss": 16.8419, "step": 310130 }, { "epoch": 0.6265024220558588, "grad_norm": 169.4524383544922, "learning_rate": 3.785649341074944e-06, "loss": 17.0484, "step": 310140 }, { "epoch": 0.6265226226885426, "grad_norm": 538.591064453125, "learning_rate": 3.785310729630307e-06, "loss": 20.2068, "step": 310150 }, { "epoch": 0.6265428233212265, "grad_norm": 571.2584838867188, "learning_rate": 3.784972124105919e-06, "loss": 18.1828, "step": 310160 }, { "epoch": 0.6265630239539103, "grad_norm": 177.67758178710938, "learning_rate": 3.7846335245034304e-06, "loss": 23.9734, "step": 310170 }, { "epoch": 0.6265832245865941, "grad_norm": 356.0690002441406, "learning_rate": 3.784294930824489e-06, "loss": 16.6091, "step": 310180 }, { "epoch": 0.6266034252192779, "grad_norm": 701.2662353515625, "learning_rate": 3.783956343070746e-06, "loss": 22.5459, "step": 310190 }, { "epoch": 0.6266236258519616, "grad_norm": 488.3169250488281, "learning_rate": 3.7836177612438557e-06, "loss": 17.9133, "step": 310200 }, { "epoch": 0.6266438264846454, "grad_norm": 568.4744262695312, "learning_rate": 3.7832791853454616e-06, "loss": 34.4656, "step": 310210 }, { "epoch": 0.6266640271173293, "grad_norm": 627.6993408203125, "learning_rate": 3.782940615377218e-06, "loss": 17.7456, "step": 310220 }, { "epoch": 0.6266842277500131, "grad_norm": 429.12939453125, "learning_rate": 3.7826020513407753e-06, "loss": 18.8567, "step": 310230 }, { "epoch": 0.6267044283826969, "grad_norm": 638.3252563476562, "learning_rate": 3.7822634932377814e-06, "loss": 27.4842, "step": 310240 }, { "epoch": 0.6267246290153807, "grad_norm": 44.70691680908203, "learning_rate": 3.7819249410698877e-06, "loss": 25.7978, "step": 310250 }, { "epoch": 0.6267448296480645, "grad_norm": 253.63330078125, "learning_rate": 3.7815863948387455e-06, "loss": 12.2805, "step": 310260 }, { "epoch": 0.6267650302807484, "grad_norm": 644.2283325195312, "learning_rate": 3.7812478545460017e-06, "loss": 24.6817, "step": 310270 }, { "epoch": 0.6267852309134322, "grad_norm": 488.9724426269531, "learning_rate": 3.7809093201933078e-06, "loss": 33.4482, "step": 310280 }, { "epoch": 0.626805431546116, "grad_norm": 310.28338623046875, "learning_rate": 3.7805707917823165e-06, "loss": 16.1545, "step": 310290 }, { "epoch": 0.6268256321787998, "grad_norm": 294.4074401855469, "learning_rate": 3.7802322693146726e-06, "loss": 28.3809, "step": 310300 }, { "epoch": 0.6268458328114836, "grad_norm": 351.2818603515625, "learning_rate": 3.7798937527920294e-06, "loss": 20.5131, "step": 310310 }, { "epoch": 0.6268660334441675, "grad_norm": 283.7843322753906, "learning_rate": 3.7795552422160364e-06, "loss": 20.6313, "step": 310320 }, { "epoch": 0.6268862340768513, "grad_norm": 283.344970703125, "learning_rate": 3.779216737588344e-06, "loss": 24.1549, "step": 310330 }, { "epoch": 0.6269064347095351, "grad_norm": 543.825927734375, "learning_rate": 3.7788782389105994e-06, "loss": 13.9601, "step": 310340 }, { "epoch": 0.6269266353422189, "grad_norm": 472.18084716796875, "learning_rate": 3.778539746184454e-06, "loss": 42.524, "step": 310350 }, { "epoch": 0.6269468359749027, "grad_norm": 314.9637756347656, "learning_rate": 3.77820125941156e-06, "loss": 37.5033, "step": 310360 }, { "epoch": 0.6269670366075866, "grad_norm": 219.22573852539062, "learning_rate": 3.7778627785935627e-06, "loss": 25.2901, "step": 310370 }, { "epoch": 0.6269872372402704, "grad_norm": 186.105224609375, "learning_rate": 3.777524303732115e-06, "loss": 16.8399, "step": 310380 }, { "epoch": 0.6270074378729542, "grad_norm": 413.4272155761719, "learning_rate": 3.777185834828866e-06, "loss": 24.5102, "step": 310390 }, { "epoch": 0.627027638505638, "grad_norm": 345.4469299316406, "learning_rate": 3.776847371885464e-06, "loss": 19.8398, "step": 310400 }, { "epoch": 0.6270478391383218, "grad_norm": 323.447509765625, "learning_rate": 3.77650891490356e-06, "loss": 15.6293, "step": 310410 }, { "epoch": 0.6270680397710057, "grad_norm": 263.461181640625, "learning_rate": 3.776170463884804e-06, "loss": 13.2407, "step": 310420 }, { "epoch": 0.6270882404036895, "grad_norm": 169.77740478515625, "learning_rate": 3.775832018830843e-06, "loss": 22.9864, "step": 310430 }, { "epoch": 0.6271084410363733, "grad_norm": 720.8308715820312, "learning_rate": 3.7754935797433284e-06, "loss": 23.985, "step": 310440 }, { "epoch": 0.6271286416690571, "grad_norm": 538.3779296875, "learning_rate": 3.7751551466239113e-06, "loss": 9.9866, "step": 310450 }, { "epoch": 0.6271488423017408, "grad_norm": 314.5245361328125, "learning_rate": 3.774816719474238e-06, "loss": 16.5886, "step": 310460 }, { "epoch": 0.6271690429344247, "grad_norm": 285.7383117675781, "learning_rate": 3.77447829829596e-06, "loss": 25.6008, "step": 310470 }, { "epoch": 0.6271892435671085, "grad_norm": 823.970947265625, "learning_rate": 3.7741398830907256e-06, "loss": 24.0385, "step": 310480 }, { "epoch": 0.6272094441997923, "grad_norm": 648.63427734375, "learning_rate": 3.7738014738601856e-06, "loss": 21.385, "step": 310490 }, { "epoch": 0.6272296448324761, "grad_norm": 459.6174011230469, "learning_rate": 3.7734630706059873e-06, "loss": 19.2144, "step": 310500 }, { "epoch": 0.6272498454651599, "grad_norm": 404.1106872558594, "learning_rate": 3.7731246733297816e-06, "loss": 30.6253, "step": 310510 }, { "epoch": 0.6272700460978438, "grad_norm": 747.3106689453125, "learning_rate": 3.772786282033218e-06, "loss": 30.2892, "step": 310520 }, { "epoch": 0.6272902467305276, "grad_norm": 395.8547058105469, "learning_rate": 3.7724478967179457e-06, "loss": 22.2229, "step": 310530 }, { "epoch": 0.6273104473632114, "grad_norm": 424.5818176269531, "learning_rate": 3.7721095173856126e-06, "loss": 23.7399, "step": 310540 }, { "epoch": 0.6273306479958952, "grad_norm": 326.8860778808594, "learning_rate": 3.7717711440378695e-06, "loss": 19.5756, "step": 310550 }, { "epoch": 0.627350848628579, "grad_norm": 653.8480224609375, "learning_rate": 3.771432776676364e-06, "loss": 23.9726, "step": 310560 }, { "epoch": 0.6273710492612629, "grad_norm": 372.75592041015625, "learning_rate": 3.771094415302745e-06, "loss": 16.9206, "step": 310570 }, { "epoch": 0.6273912498939467, "grad_norm": 274.6285705566406, "learning_rate": 3.7707560599186664e-06, "loss": 31.2049, "step": 310580 }, { "epoch": 0.6274114505266305, "grad_norm": 58.87826156616211, "learning_rate": 3.7704177105257707e-06, "loss": 34.7307, "step": 310590 }, { "epoch": 0.6274316511593143, "grad_norm": 473.7864990234375, "learning_rate": 3.77007936712571e-06, "loss": 32.2064, "step": 310600 }, { "epoch": 0.6274518517919981, "grad_norm": 375.2981872558594, "learning_rate": 3.769741029720134e-06, "loss": 26.7564, "step": 310610 }, { "epoch": 0.627472052424682, "grad_norm": 242.23406982421875, "learning_rate": 3.769402698310692e-06, "loss": 11.6546, "step": 310620 }, { "epoch": 0.6274922530573658, "grad_norm": 734.3656005859375, "learning_rate": 3.7690643728990306e-06, "loss": 18.6892, "step": 310630 }, { "epoch": 0.6275124536900496, "grad_norm": 1153.2177734375, "learning_rate": 3.7687260534868e-06, "loss": 27.5929, "step": 310640 }, { "epoch": 0.6275326543227334, "grad_norm": 521.0066528320312, "learning_rate": 3.7683877400756513e-06, "loss": 13.9517, "step": 310650 }, { "epoch": 0.6275528549554172, "grad_norm": 916.9699096679688, "learning_rate": 3.768049432667229e-06, "loss": 10.7277, "step": 310660 }, { "epoch": 0.6275730555881011, "grad_norm": 442.46075439453125, "learning_rate": 3.7677111312631848e-06, "loss": 12.1446, "step": 310670 }, { "epoch": 0.6275932562207849, "grad_norm": 144.5475311279297, "learning_rate": 3.7673728358651683e-06, "loss": 25.2282, "step": 310680 }, { "epoch": 0.6276134568534687, "grad_norm": 227.0414581298828, "learning_rate": 3.7670345464748266e-06, "loss": 26.1985, "step": 310690 }, { "epoch": 0.6276336574861525, "grad_norm": 123.23226928710938, "learning_rate": 3.7666962630938084e-06, "loss": 9.3128, "step": 310700 }, { "epoch": 0.6276538581188362, "grad_norm": 381.5736999511719, "learning_rate": 3.7663579857237642e-06, "loss": 11.2019, "step": 310710 }, { "epoch": 0.62767405875152, "grad_norm": 282.8387451171875, "learning_rate": 3.7660197143663407e-06, "loss": 23.1677, "step": 310720 }, { "epoch": 0.6276942593842039, "grad_norm": 281.12042236328125, "learning_rate": 3.7656814490231864e-06, "loss": 30.3826, "step": 310730 }, { "epoch": 0.6277144600168877, "grad_norm": 271.38616943359375, "learning_rate": 3.765343189695954e-06, "loss": 34.6207, "step": 310740 }, { "epoch": 0.6277346606495715, "grad_norm": 295.36065673828125, "learning_rate": 3.765004936386286e-06, "loss": 12.8942, "step": 310750 }, { "epoch": 0.6277548612822553, "grad_norm": 93.36185455322266, "learning_rate": 3.764666689095835e-06, "loss": 19.0268, "step": 310760 }, { "epoch": 0.6277750619149391, "grad_norm": 547.2508544921875, "learning_rate": 3.7643284478262494e-06, "loss": 20.2266, "step": 310770 }, { "epoch": 0.627795262547623, "grad_norm": 323.8477783203125, "learning_rate": 3.7639902125791774e-06, "loss": 25.2107, "step": 310780 }, { "epoch": 0.6278154631803068, "grad_norm": 288.24127197265625, "learning_rate": 3.7636519833562668e-06, "loss": 28.4912, "step": 310790 }, { "epoch": 0.6278356638129906, "grad_norm": 0.0, "learning_rate": 3.7633137601591647e-06, "loss": 36.0997, "step": 310800 }, { "epoch": 0.6278558644456744, "grad_norm": 452.8955383300781, "learning_rate": 3.762975542989525e-06, "loss": 21.4517, "step": 310810 }, { "epoch": 0.6278760650783582, "grad_norm": 303.65679931640625, "learning_rate": 3.762637331848989e-06, "loss": 20.1704, "step": 310820 }, { "epoch": 0.6278962657110421, "grad_norm": 466.28228759765625, "learning_rate": 3.76229912673921e-06, "loss": 30.9657, "step": 310830 }, { "epoch": 0.6279164663437259, "grad_norm": 189.0724639892578, "learning_rate": 3.761960927661836e-06, "loss": 8.2174, "step": 310840 }, { "epoch": 0.6279366669764097, "grad_norm": 487.91607666015625, "learning_rate": 3.761622734618513e-06, "loss": 11.4559, "step": 310850 }, { "epoch": 0.6279568676090935, "grad_norm": 331.7529296875, "learning_rate": 3.7612845476108906e-06, "loss": 13.7425, "step": 310860 }, { "epoch": 0.6279770682417773, "grad_norm": 489.21881103515625, "learning_rate": 3.7609463666406175e-06, "loss": 13.6078, "step": 310870 }, { "epoch": 0.6279972688744612, "grad_norm": 186.98060607910156, "learning_rate": 3.7606081917093416e-06, "loss": 22.1829, "step": 310880 }, { "epoch": 0.628017469507145, "grad_norm": 330.9054870605469, "learning_rate": 3.7602700228187096e-06, "loss": 16.8957, "step": 310890 }, { "epoch": 0.6280376701398288, "grad_norm": 496.9715576171875, "learning_rate": 3.759931859970374e-06, "loss": 16.041, "step": 310900 }, { "epoch": 0.6280578707725126, "grad_norm": 460.6424255371094, "learning_rate": 3.7595937031659775e-06, "loss": 12.1437, "step": 310910 }, { "epoch": 0.6280780714051964, "grad_norm": 303.2657470703125, "learning_rate": 3.7592555524071716e-06, "loss": 8.7752, "step": 310920 }, { "epoch": 0.6280982720378803, "grad_norm": 712.6080932617188, "learning_rate": 3.7589174076956036e-06, "loss": 27.3341, "step": 310930 }, { "epoch": 0.6281184726705641, "grad_norm": 289.4288635253906, "learning_rate": 3.7585792690329224e-06, "loss": 18.1774, "step": 310940 }, { "epoch": 0.6281386733032479, "grad_norm": 559.0425415039062, "learning_rate": 3.758241136420775e-06, "loss": 23.819, "step": 310950 }, { "epoch": 0.6281588739359317, "grad_norm": 271.9239196777344, "learning_rate": 3.7579030098608077e-06, "loss": 12.9474, "step": 310960 }, { "epoch": 0.6281790745686154, "grad_norm": 141.35003662109375, "learning_rate": 3.7575648893546745e-06, "loss": 13.6415, "step": 310970 }, { "epoch": 0.6281992752012993, "grad_norm": 308.360595703125, "learning_rate": 3.757226774904016e-06, "loss": 20.1192, "step": 310980 }, { "epoch": 0.6282194758339831, "grad_norm": 738.3095092773438, "learning_rate": 3.7568886665104836e-06, "loss": 16.2548, "step": 310990 }, { "epoch": 0.6282396764666669, "grad_norm": 2.4224720001220703, "learning_rate": 3.756550564175727e-06, "loss": 14.6956, "step": 311000 }, { "epoch": 0.6282598770993507, "grad_norm": 794.7053833007812, "learning_rate": 3.756212467901391e-06, "loss": 14.715, "step": 311010 }, { "epoch": 0.6282800777320345, "grad_norm": 13.162860870361328, "learning_rate": 3.755874377689125e-06, "loss": 11.476, "step": 311020 }, { "epoch": 0.6283002783647184, "grad_norm": 348.586669921875, "learning_rate": 3.7555362935405766e-06, "loss": 11.4744, "step": 311030 }, { "epoch": 0.6283204789974022, "grad_norm": 177.64598083496094, "learning_rate": 3.7551982154573928e-06, "loss": 23.1496, "step": 311040 }, { "epoch": 0.628340679630086, "grad_norm": 467.7887268066406, "learning_rate": 3.75486014344122e-06, "loss": 21.1767, "step": 311050 }, { "epoch": 0.6283608802627698, "grad_norm": 406.47515869140625, "learning_rate": 3.7545220774937115e-06, "loss": 39.5656, "step": 311060 }, { "epoch": 0.6283810808954536, "grad_norm": 168.41502380371094, "learning_rate": 3.754184017616509e-06, "loss": 23.2303, "step": 311070 }, { "epoch": 0.6284012815281375, "grad_norm": 72.38956451416016, "learning_rate": 3.7538459638112635e-06, "loss": 26.4261, "step": 311080 }, { "epoch": 0.6284214821608213, "grad_norm": 300.6415710449219, "learning_rate": 3.7535079160796207e-06, "loss": 19.0704, "step": 311090 }, { "epoch": 0.6284416827935051, "grad_norm": 391.12310791015625, "learning_rate": 3.7531698744232307e-06, "loss": 21.1738, "step": 311100 }, { "epoch": 0.6284618834261889, "grad_norm": 82.55567169189453, "learning_rate": 3.7528318388437375e-06, "loss": 38.9412, "step": 311110 }, { "epoch": 0.6284820840588727, "grad_norm": 283.8590393066406, "learning_rate": 3.752493809342791e-06, "loss": 20.1902, "step": 311120 }, { "epoch": 0.6285022846915566, "grad_norm": 518.7247314453125, "learning_rate": 3.7521557859220405e-06, "loss": 20.6802, "step": 311130 }, { "epoch": 0.6285224853242404, "grad_norm": 338.15576171875, "learning_rate": 3.751817768583129e-06, "loss": 11.9434, "step": 311140 }, { "epoch": 0.6285426859569242, "grad_norm": 275.5709228515625, "learning_rate": 3.7514797573277075e-06, "loss": 19.4292, "step": 311150 }, { "epoch": 0.628562886589608, "grad_norm": 585.8729858398438, "learning_rate": 3.751141752157423e-06, "loss": 20.0025, "step": 311160 }, { "epoch": 0.6285830872222918, "grad_norm": 671.9908447265625, "learning_rate": 3.7508037530739207e-06, "loss": 23.6709, "step": 311170 }, { "epoch": 0.6286032878549757, "grad_norm": 299.63189697265625, "learning_rate": 3.7504657600788484e-06, "loss": 19.1549, "step": 311180 }, { "epoch": 0.6286234884876595, "grad_norm": 507.8825378417969, "learning_rate": 3.750127773173858e-06, "loss": 18.7713, "step": 311190 }, { "epoch": 0.6286436891203433, "grad_norm": 126.44001770019531, "learning_rate": 3.74978979236059e-06, "loss": 16.1273, "step": 311200 }, { "epoch": 0.6286638897530271, "grad_norm": 3399.062744140625, "learning_rate": 3.7494518176406956e-06, "loss": 28.3728, "step": 311210 }, { "epoch": 0.6286840903857108, "grad_norm": 226.7476806640625, "learning_rate": 3.7491138490158213e-06, "loss": 12.0473, "step": 311220 }, { "epoch": 0.6287042910183946, "grad_norm": 197.57363891601562, "learning_rate": 3.7487758864876157e-06, "loss": 15.778, "step": 311230 }, { "epoch": 0.6287244916510785, "grad_norm": 279.3779296875, "learning_rate": 3.7484379300577233e-06, "loss": 16.9205, "step": 311240 }, { "epoch": 0.6287446922837623, "grad_norm": 504.41650390625, "learning_rate": 3.748099979727792e-06, "loss": 22.9833, "step": 311250 }, { "epoch": 0.6287648929164461, "grad_norm": 263.5953063964844, "learning_rate": 3.7477620354994733e-06, "loss": 16.6843, "step": 311260 }, { "epoch": 0.6287850935491299, "grad_norm": 2.402722120285034, "learning_rate": 3.7474240973744063e-06, "loss": 8.7284, "step": 311270 }, { "epoch": 0.6288052941818137, "grad_norm": 89.73186492919922, "learning_rate": 3.7470861653542438e-06, "loss": 12.8616, "step": 311280 }, { "epoch": 0.6288254948144976, "grad_norm": 234.08595275878906, "learning_rate": 3.746748239440633e-06, "loss": 27.2208, "step": 311290 }, { "epoch": 0.6288456954471814, "grad_norm": 146.21871948242188, "learning_rate": 3.7464103196352176e-06, "loss": 17.5699, "step": 311300 }, { "epoch": 0.6288658960798652, "grad_norm": 1773.0771484375, "learning_rate": 3.746072405939646e-06, "loss": 50.9809, "step": 311310 }, { "epoch": 0.628886096712549, "grad_norm": 215.26148986816406, "learning_rate": 3.7457344983555666e-06, "loss": 29.4295, "step": 311320 }, { "epoch": 0.6289062973452328, "grad_norm": 429.1287536621094, "learning_rate": 3.7453965968846244e-06, "loss": 14.8721, "step": 311330 }, { "epoch": 0.6289264979779167, "grad_norm": 365.86956787109375, "learning_rate": 3.7450587015284655e-06, "loss": 15.0175, "step": 311340 }, { "epoch": 0.6289466986106005, "grad_norm": 155.48924255371094, "learning_rate": 3.7447208122887425e-06, "loss": 28.3511, "step": 311350 }, { "epoch": 0.6289668992432843, "grad_norm": 1091.9840087890625, "learning_rate": 3.744382929167094e-06, "loss": 22.5819, "step": 311360 }, { "epoch": 0.6289870998759681, "grad_norm": 469.363525390625, "learning_rate": 3.744045052165172e-06, "loss": 21.6345, "step": 311370 }, { "epoch": 0.629007300508652, "grad_norm": 602.15234375, "learning_rate": 3.7437071812846216e-06, "loss": 11.4731, "step": 311380 }, { "epoch": 0.6290275011413358, "grad_norm": 235.06373596191406, "learning_rate": 3.7433693165270918e-06, "loss": 15.778, "step": 311390 }, { "epoch": 0.6290477017740196, "grad_norm": 307.0328674316406, "learning_rate": 3.7430314578942263e-06, "loss": 19.5603, "step": 311400 }, { "epoch": 0.6290679024067034, "grad_norm": 518.789306640625, "learning_rate": 3.7426936053876715e-06, "loss": 24.6092, "step": 311410 }, { "epoch": 0.6290881030393872, "grad_norm": 1032.965087890625, "learning_rate": 3.74235575900908e-06, "loss": 21.2157, "step": 311420 }, { "epoch": 0.629108303672071, "grad_norm": 1.090482234954834, "learning_rate": 3.742017918760089e-06, "loss": 14.1625, "step": 311430 }, { "epoch": 0.6291285043047549, "grad_norm": 374.23968505859375, "learning_rate": 3.741680084642353e-06, "loss": 13.4388, "step": 311440 }, { "epoch": 0.6291487049374387, "grad_norm": 486.8422546386719, "learning_rate": 3.7413422566575153e-06, "loss": 17.2724, "step": 311450 }, { "epoch": 0.6291689055701225, "grad_norm": 76.85228729248047, "learning_rate": 3.741004434807223e-06, "loss": 14.5166, "step": 311460 }, { "epoch": 0.6291891062028063, "grad_norm": 596.9525756835938, "learning_rate": 3.7406666190931213e-06, "loss": 12.2197, "step": 311470 }, { "epoch": 0.62920930683549, "grad_norm": 241.2703094482422, "learning_rate": 3.740328809516859e-06, "loss": 9.6603, "step": 311480 }, { "epoch": 0.6292295074681739, "grad_norm": 472.1174011230469, "learning_rate": 3.7399910060800806e-06, "loss": 19.4802, "step": 311490 }, { "epoch": 0.6292497081008577, "grad_norm": 348.6268615722656, "learning_rate": 3.7396532087844318e-06, "loss": 17.3046, "step": 311500 }, { "epoch": 0.6292699087335415, "grad_norm": 490.8245849609375, "learning_rate": 3.7393154176315637e-06, "loss": 15.2983, "step": 311510 }, { "epoch": 0.6292901093662253, "grad_norm": 168.55055236816406, "learning_rate": 3.7389776326231163e-06, "loss": 14.4658, "step": 311520 }, { "epoch": 0.6293103099989091, "grad_norm": 235.340576171875, "learning_rate": 3.73863985376074e-06, "loss": 35.2457, "step": 311530 }, { "epoch": 0.629330510631593, "grad_norm": 492.92547607421875, "learning_rate": 3.73830208104608e-06, "loss": 19.1454, "step": 311540 }, { "epoch": 0.6293507112642768, "grad_norm": 261.28448486328125, "learning_rate": 3.7379643144807835e-06, "loss": 20.8435, "step": 311550 }, { "epoch": 0.6293709118969606, "grad_norm": 269.05230712890625, "learning_rate": 3.737626554066495e-06, "loss": 27.2126, "step": 311560 }, { "epoch": 0.6293911125296444, "grad_norm": 1340.7391357421875, "learning_rate": 3.7372887998048608e-06, "loss": 58.1968, "step": 311570 }, { "epoch": 0.6294113131623282, "grad_norm": 156.26901245117188, "learning_rate": 3.7369510516975303e-06, "loss": 15.1275, "step": 311580 }, { "epoch": 0.629431513795012, "grad_norm": 232.62744140625, "learning_rate": 3.736613309746145e-06, "loss": 18.4961, "step": 311590 }, { "epoch": 0.6294517144276959, "grad_norm": 362.4714660644531, "learning_rate": 3.736275573952354e-06, "loss": 25.5498, "step": 311600 }, { "epoch": 0.6294719150603797, "grad_norm": 1.2732248306274414, "learning_rate": 3.735937844317803e-06, "loss": 10.7706, "step": 311610 }, { "epoch": 0.6294921156930635, "grad_norm": 22.51714324951172, "learning_rate": 3.735600120844137e-06, "loss": 26.3688, "step": 311620 }, { "epoch": 0.6295123163257473, "grad_norm": 1563.1842041015625, "learning_rate": 3.735262403533002e-06, "loss": 31.6529, "step": 311630 }, { "epoch": 0.6295325169584312, "grad_norm": 315.1824645996094, "learning_rate": 3.7349246923860465e-06, "loss": 9.2269, "step": 311640 }, { "epoch": 0.629552717591115, "grad_norm": 768.6835327148438, "learning_rate": 3.7345869874049136e-06, "loss": 17.4475, "step": 311650 }, { "epoch": 0.6295729182237988, "grad_norm": 954.1617431640625, "learning_rate": 3.734249288591249e-06, "loss": 14.1233, "step": 311660 }, { "epoch": 0.6295931188564826, "grad_norm": 656.4332885742188, "learning_rate": 3.733911595946701e-06, "loss": 17.6862, "step": 311670 }, { "epoch": 0.6296133194891664, "grad_norm": 489.056884765625, "learning_rate": 3.7335739094729153e-06, "loss": 22.9326, "step": 311680 }, { "epoch": 0.6296335201218503, "grad_norm": 345.76007080078125, "learning_rate": 3.7332362291715353e-06, "loss": 16.6467, "step": 311690 }, { "epoch": 0.6296537207545341, "grad_norm": 861.7413330078125, "learning_rate": 3.7328985550442086e-06, "loss": 26.7971, "step": 311700 }, { "epoch": 0.6296739213872179, "grad_norm": 249.65028381347656, "learning_rate": 3.7325608870925817e-06, "loss": 26.8184, "step": 311710 }, { "epoch": 0.6296941220199017, "grad_norm": 374.0284423828125, "learning_rate": 3.7322232253182984e-06, "loss": 18.6817, "step": 311720 }, { "epoch": 0.6297143226525855, "grad_norm": 8.936347961425781, "learning_rate": 3.731885569723004e-06, "loss": 16.0041, "step": 311730 }, { "epoch": 0.6297345232852692, "grad_norm": 359.57806396484375, "learning_rate": 3.7315479203083483e-06, "loss": 7.8561, "step": 311740 }, { "epoch": 0.6297547239179531, "grad_norm": 796.0985717773438, "learning_rate": 3.7312102770759724e-06, "loss": 17.5286, "step": 311750 }, { "epoch": 0.6297749245506369, "grad_norm": 198.72044372558594, "learning_rate": 3.7308726400275243e-06, "loss": 14.124, "step": 311760 }, { "epoch": 0.6297951251833207, "grad_norm": 567.6104125976562, "learning_rate": 3.7305350091646496e-06, "loss": 24.3485, "step": 311770 }, { "epoch": 0.6298153258160045, "grad_norm": 550.8214721679688, "learning_rate": 3.7301973844889922e-06, "loss": 23.281, "step": 311780 }, { "epoch": 0.6298355264486883, "grad_norm": 31.740415573120117, "learning_rate": 3.729859766002198e-06, "loss": 11.4073, "step": 311790 }, { "epoch": 0.6298557270813722, "grad_norm": 7.452092170715332, "learning_rate": 3.7295221537059162e-06, "loss": 28.5895, "step": 311800 }, { "epoch": 0.629875927714056, "grad_norm": 260.27752685546875, "learning_rate": 3.729184547601786e-06, "loss": 31.3245, "step": 311810 }, { "epoch": 0.6298961283467398, "grad_norm": 464.4610595703125, "learning_rate": 3.728846947691458e-06, "loss": 23.1242, "step": 311820 }, { "epoch": 0.6299163289794236, "grad_norm": 203.24380493164062, "learning_rate": 3.7285093539765747e-06, "loss": 13.3049, "step": 311830 }, { "epoch": 0.6299365296121074, "grad_norm": 235.9980926513672, "learning_rate": 3.728171766458785e-06, "loss": 8.6781, "step": 311840 }, { "epoch": 0.6299567302447913, "grad_norm": 376.7706604003906, "learning_rate": 3.72783418513973e-06, "loss": 26.8187, "step": 311850 }, { "epoch": 0.6299769308774751, "grad_norm": 231.58253479003906, "learning_rate": 3.727496610021055e-06, "loss": 16.7101, "step": 311860 }, { "epoch": 0.6299971315101589, "grad_norm": 457.01019287109375, "learning_rate": 3.727159041104412e-06, "loss": 10.4923, "step": 311870 }, { "epoch": 0.6300173321428427, "grad_norm": 2049.08837890625, "learning_rate": 3.7268214783914375e-06, "loss": 22.617, "step": 311880 }, { "epoch": 0.6300375327755265, "grad_norm": 251.09019470214844, "learning_rate": 3.7264839218837817e-06, "loss": 11.8721, "step": 311890 }, { "epoch": 0.6300577334082104, "grad_norm": 245.5015411376953, "learning_rate": 3.7261463715830902e-06, "loss": 11.1776, "step": 311900 }, { "epoch": 0.6300779340408942, "grad_norm": 606.9202270507812, "learning_rate": 3.7258088274910054e-06, "loss": 12.1486, "step": 311910 }, { "epoch": 0.630098134673578, "grad_norm": 2910.516357421875, "learning_rate": 3.725471289609174e-06, "loss": 40.8246, "step": 311920 }, { "epoch": 0.6301183353062618, "grad_norm": 465.2891540527344, "learning_rate": 3.7251337579392415e-06, "loss": 10.9321, "step": 311930 }, { "epoch": 0.6301385359389456, "grad_norm": 212.12435913085938, "learning_rate": 3.724796232482852e-06, "loss": 26.7457, "step": 311940 }, { "epoch": 0.6301587365716295, "grad_norm": 591.8861694335938, "learning_rate": 3.7244587132416497e-06, "loss": 25.5461, "step": 311950 }, { "epoch": 0.6301789372043133, "grad_norm": 407.2118225097656, "learning_rate": 3.7241212002172846e-06, "loss": 10.734, "step": 311960 }, { "epoch": 0.6301991378369971, "grad_norm": 366.46966552734375, "learning_rate": 3.723783693411394e-06, "loss": 18.4331, "step": 311970 }, { "epoch": 0.6302193384696809, "grad_norm": 2.5028135776519775, "learning_rate": 3.723446192825628e-06, "loss": 9.702, "step": 311980 }, { "epoch": 0.6302395391023646, "grad_norm": 440.0711364746094, "learning_rate": 3.7231086984616312e-06, "loss": 5.4914, "step": 311990 }, { "epoch": 0.6302597397350485, "grad_norm": 218.40322875976562, "learning_rate": 3.7227712103210485e-06, "loss": 9.4796, "step": 312000 }, { "epoch": 0.6302799403677323, "grad_norm": 123.79711151123047, "learning_rate": 3.722433728405522e-06, "loss": 11.0336, "step": 312010 }, { "epoch": 0.6303001410004161, "grad_norm": 235.97381591796875, "learning_rate": 3.7220962527166994e-06, "loss": 9.7656, "step": 312020 }, { "epoch": 0.6303203416330999, "grad_norm": 372.2127380371094, "learning_rate": 3.7217587832562264e-06, "loss": 23.8709, "step": 312030 }, { "epoch": 0.6303405422657837, "grad_norm": 144.38809204101562, "learning_rate": 3.7214213200257433e-06, "loss": 17.704, "step": 312040 }, { "epoch": 0.6303607428984676, "grad_norm": 559.6666870117188, "learning_rate": 3.7210838630268986e-06, "loss": 12.9917, "step": 312050 }, { "epoch": 0.6303809435311514, "grad_norm": 380.5607604980469, "learning_rate": 3.720746412261337e-06, "loss": 12.0142, "step": 312060 }, { "epoch": 0.6304011441638352, "grad_norm": 447.0756530761719, "learning_rate": 3.7204089677307015e-06, "loss": 41.8091, "step": 312070 }, { "epoch": 0.630421344796519, "grad_norm": 12.453298568725586, "learning_rate": 3.7200715294366376e-06, "loss": 8.5864, "step": 312080 }, { "epoch": 0.6304415454292028, "grad_norm": 476.9683837890625, "learning_rate": 3.7197340973807905e-06, "loss": 14.3071, "step": 312090 }, { "epoch": 0.6304617460618867, "grad_norm": 312.4320068359375, "learning_rate": 3.7193966715648026e-06, "loss": 16.1088, "step": 312100 }, { "epoch": 0.6304819466945705, "grad_norm": 666.4554443359375, "learning_rate": 3.7190592519903198e-06, "loss": 14.9029, "step": 312110 }, { "epoch": 0.6305021473272543, "grad_norm": 502.0362854003906, "learning_rate": 3.71872183865899e-06, "loss": 23.3207, "step": 312120 }, { "epoch": 0.6305223479599381, "grad_norm": 325.1219482421875, "learning_rate": 3.7183844315724505e-06, "loss": 15.0487, "step": 312130 }, { "epoch": 0.6305425485926219, "grad_norm": 390.6490173339844, "learning_rate": 3.718047030732352e-06, "loss": 27.0646, "step": 312140 }, { "epoch": 0.6305627492253058, "grad_norm": 776.7241821289062, "learning_rate": 3.7177096361403362e-06, "loss": 14.8082, "step": 312150 }, { "epoch": 0.6305829498579896, "grad_norm": 1288.8662109375, "learning_rate": 3.717372247798049e-06, "loss": 19.7318, "step": 312160 }, { "epoch": 0.6306031504906734, "grad_norm": 469.3023986816406, "learning_rate": 3.717034865707133e-06, "loss": 19.5128, "step": 312170 }, { "epoch": 0.6306233511233572, "grad_norm": 461.573974609375, "learning_rate": 3.7166974898692324e-06, "loss": 22.3097, "step": 312180 }, { "epoch": 0.630643551756041, "grad_norm": 1052.504150390625, "learning_rate": 3.7163601202859963e-06, "loss": 26.9412, "step": 312190 }, { "epoch": 0.6306637523887249, "grad_norm": 786.5824584960938, "learning_rate": 3.716022756959061e-06, "loss": 26.1058, "step": 312200 }, { "epoch": 0.6306839530214087, "grad_norm": 367.0870666503906, "learning_rate": 3.715685399890078e-06, "loss": 23.6425, "step": 312210 }, { "epoch": 0.6307041536540925, "grad_norm": 503.5752868652344, "learning_rate": 3.7153480490806883e-06, "loss": 10.9421, "step": 312220 }, { "epoch": 0.6307243542867763, "grad_norm": 342.5792236328125, "learning_rate": 3.715010704532535e-06, "loss": 14.9556, "step": 312230 }, { "epoch": 0.6307445549194601, "grad_norm": 385.3841857910156, "learning_rate": 3.7146733662472645e-06, "loss": 24.8425, "step": 312240 }, { "epoch": 0.6307647555521438, "grad_norm": 482.5807189941406, "learning_rate": 3.7143360342265206e-06, "loss": 34.6575, "step": 312250 }, { "epoch": 0.6307849561848277, "grad_norm": 468.9497375488281, "learning_rate": 3.7139987084719463e-06, "loss": 15.1575, "step": 312260 }, { "epoch": 0.6308051568175115, "grad_norm": 313.30010986328125, "learning_rate": 3.7136613889851847e-06, "loss": 14.4734, "step": 312270 }, { "epoch": 0.6308253574501953, "grad_norm": 363.6047668457031, "learning_rate": 3.7133240757678835e-06, "loss": 17.5019, "step": 312280 }, { "epoch": 0.6308455580828791, "grad_norm": 829.5293579101562, "learning_rate": 3.7129867688216848e-06, "loss": 43.489, "step": 312290 }, { "epoch": 0.6308657587155629, "grad_norm": 271.2547302246094, "learning_rate": 3.7126494681482317e-06, "loss": 30.5318, "step": 312300 }, { "epoch": 0.6308859593482468, "grad_norm": 435.1318664550781, "learning_rate": 3.712312173749169e-06, "loss": 27.9105, "step": 312310 }, { "epoch": 0.6309061599809306, "grad_norm": 616.8460693359375, "learning_rate": 3.7119748856261416e-06, "loss": 17.2781, "step": 312320 }, { "epoch": 0.6309263606136144, "grad_norm": 183.11244201660156, "learning_rate": 3.7116376037807915e-06, "loss": 15.1674, "step": 312330 }, { "epoch": 0.6309465612462982, "grad_norm": 589.213623046875, "learning_rate": 3.7113003282147625e-06, "loss": 25.8361, "step": 312340 }, { "epoch": 0.630966761878982, "grad_norm": 73.51373291015625, "learning_rate": 3.7109630589297014e-06, "loss": 18.547, "step": 312350 }, { "epoch": 0.6309869625116659, "grad_norm": 488.0601501464844, "learning_rate": 3.710625795927249e-06, "loss": 18.0595, "step": 312360 }, { "epoch": 0.6310071631443497, "grad_norm": 586.9658813476562, "learning_rate": 3.7102885392090497e-06, "loss": 15.2669, "step": 312370 }, { "epoch": 0.6310273637770335, "grad_norm": 574.9307861328125, "learning_rate": 3.709951288776749e-06, "loss": 13.9294, "step": 312380 }, { "epoch": 0.6310475644097173, "grad_norm": 412.48822021484375, "learning_rate": 3.7096140446319884e-06, "loss": 25.85, "step": 312390 }, { "epoch": 0.6310677650424011, "grad_norm": 895.47119140625, "learning_rate": 3.709276806776412e-06, "loss": 35.0245, "step": 312400 }, { "epoch": 0.631087965675085, "grad_norm": 314.1524963378906, "learning_rate": 3.7089395752116653e-06, "loss": 30.6317, "step": 312410 }, { "epoch": 0.6311081663077688, "grad_norm": 392.1971435546875, "learning_rate": 3.7086023499393887e-06, "loss": 22.1168, "step": 312420 }, { "epoch": 0.6311283669404526, "grad_norm": 38.94456100463867, "learning_rate": 3.7082651309612283e-06, "loss": 15.1257, "step": 312430 }, { "epoch": 0.6311485675731364, "grad_norm": 572.1344604492188, "learning_rate": 3.7079279182788263e-06, "loss": 23.5438, "step": 312440 }, { "epoch": 0.6311687682058202, "grad_norm": 118.326416015625, "learning_rate": 3.707590711893829e-06, "loss": 21.0824, "step": 312450 }, { "epoch": 0.6311889688385041, "grad_norm": 340.10986328125, "learning_rate": 3.707253511807877e-06, "loss": 16.1145, "step": 312460 }, { "epoch": 0.6312091694711879, "grad_norm": 157.62193298339844, "learning_rate": 3.706916318022612e-06, "loss": 11.8272, "step": 312470 }, { "epoch": 0.6312293701038717, "grad_norm": 295.4103088378906, "learning_rate": 3.7065791305396846e-06, "loss": 25.9566, "step": 312480 }, { "epoch": 0.6312495707365555, "grad_norm": 198.5532989501953, "learning_rate": 3.70624194936073e-06, "loss": 18.1501, "step": 312490 }, { "epoch": 0.6312697713692392, "grad_norm": 152.81765747070312, "learning_rate": 3.705904774487396e-06, "loss": 14.3211, "step": 312500 }, { "epoch": 0.631289972001923, "grad_norm": 379.5767517089844, "learning_rate": 3.7055676059213265e-06, "loss": 14.4697, "step": 312510 }, { "epoch": 0.6313101726346069, "grad_norm": 245.6889190673828, "learning_rate": 3.705230443664163e-06, "loss": 18.7405, "step": 312520 }, { "epoch": 0.6313303732672907, "grad_norm": 36.4300537109375, "learning_rate": 3.704893287717548e-06, "loss": 28.9301, "step": 312530 }, { "epoch": 0.6313505738999745, "grad_norm": 356.6920471191406, "learning_rate": 3.7045561380831287e-06, "loss": 25.2074, "step": 312540 }, { "epoch": 0.6313707745326583, "grad_norm": 235.33872985839844, "learning_rate": 3.704218994762543e-06, "loss": 17.2906, "step": 312550 }, { "epoch": 0.6313909751653421, "grad_norm": 102.42072296142578, "learning_rate": 3.7038818577574363e-06, "loss": 19.8175, "step": 312560 }, { "epoch": 0.631411175798026, "grad_norm": 181.9062957763672, "learning_rate": 3.7035447270694558e-06, "loss": 10.4626, "step": 312570 }, { "epoch": 0.6314313764307098, "grad_norm": 305.3853759765625, "learning_rate": 3.7032076027002377e-06, "loss": 19.9018, "step": 312580 }, { "epoch": 0.6314515770633936, "grad_norm": 417.63153076171875, "learning_rate": 3.7028704846514296e-06, "loss": 32.7365, "step": 312590 }, { "epoch": 0.6314717776960774, "grad_norm": 547.8931884765625, "learning_rate": 3.7025333729246733e-06, "loss": 21.1438, "step": 312600 }, { "epoch": 0.6314919783287612, "grad_norm": 307.0222473144531, "learning_rate": 3.7021962675216126e-06, "loss": 15.7725, "step": 312610 }, { "epoch": 0.6315121789614451, "grad_norm": 172.6763153076172, "learning_rate": 3.70185916844389e-06, "loss": 14.39, "step": 312620 }, { "epoch": 0.6315323795941289, "grad_norm": 325.3825378417969, "learning_rate": 3.701522075693146e-06, "loss": 16.4441, "step": 312630 }, { "epoch": 0.6315525802268127, "grad_norm": 444.0788879394531, "learning_rate": 3.7011849892710293e-06, "loss": 21.4759, "step": 312640 }, { "epoch": 0.6315727808594965, "grad_norm": 470.6161804199219, "learning_rate": 3.700847909179177e-06, "loss": 49.357, "step": 312650 }, { "epoch": 0.6315929814921803, "grad_norm": 320.4682922363281, "learning_rate": 3.7005108354192356e-06, "loss": 15.674, "step": 312660 }, { "epoch": 0.6316131821248642, "grad_norm": 211.36276245117188, "learning_rate": 3.7001737679928467e-06, "loss": 10.0086, "step": 312670 }, { "epoch": 0.631633382757548, "grad_norm": 726.2916259765625, "learning_rate": 3.6998367069016527e-06, "loss": 33.8859, "step": 312680 }, { "epoch": 0.6316535833902318, "grad_norm": 28.509401321411133, "learning_rate": 3.699499652147297e-06, "loss": 21.0636, "step": 312690 }, { "epoch": 0.6316737840229156, "grad_norm": 381.392822265625, "learning_rate": 3.699162603731423e-06, "loss": 11.1215, "step": 312700 }, { "epoch": 0.6316939846555994, "grad_norm": 734.5407104492188, "learning_rate": 3.6988255616556725e-06, "loss": 13.3186, "step": 312710 }, { "epoch": 0.6317141852882833, "grad_norm": 346.9736328125, "learning_rate": 3.6984885259216866e-06, "loss": 11.5427, "step": 312720 }, { "epoch": 0.6317343859209671, "grad_norm": 270.8279113769531, "learning_rate": 3.698151496531111e-06, "loss": 16.8361, "step": 312730 }, { "epoch": 0.6317545865536509, "grad_norm": 396.7002258300781, "learning_rate": 3.697814473485588e-06, "loss": 13.7949, "step": 312740 }, { "epoch": 0.6317747871863347, "grad_norm": 163.82882690429688, "learning_rate": 3.6974774567867586e-06, "loss": 16.538, "step": 312750 }, { "epoch": 0.6317949878190184, "grad_norm": 465.40869140625, "learning_rate": 3.6971404464362657e-06, "loss": 31.0662, "step": 312760 }, { "epoch": 0.6318151884517023, "grad_norm": 456.25384521484375, "learning_rate": 3.6968034424357535e-06, "loss": 24.0063, "step": 312770 }, { "epoch": 0.6318353890843861, "grad_norm": 238.4464874267578, "learning_rate": 3.6964664447868626e-06, "loss": 20.4208, "step": 312780 }, { "epoch": 0.6318555897170699, "grad_norm": 237.52659606933594, "learning_rate": 3.696129453491235e-06, "loss": 16.6057, "step": 312790 }, { "epoch": 0.6318757903497537, "grad_norm": 865.7783813476562, "learning_rate": 3.695792468550517e-06, "loss": 18.1219, "step": 312800 }, { "epoch": 0.6318959909824375, "grad_norm": 280.9550476074219, "learning_rate": 3.6954554899663454e-06, "loss": 14.5367, "step": 312810 }, { "epoch": 0.6319161916151214, "grad_norm": 719.3519287109375, "learning_rate": 3.6951185177403667e-06, "loss": 16.1767, "step": 312820 }, { "epoch": 0.6319363922478052, "grad_norm": 114.07866668701172, "learning_rate": 3.6947815518742226e-06, "loss": 17.0786, "step": 312830 }, { "epoch": 0.631956592880489, "grad_norm": 723.0438232421875, "learning_rate": 3.6944445923695542e-06, "loss": 21.3457, "step": 312840 }, { "epoch": 0.6319767935131728, "grad_norm": 181.6907958984375, "learning_rate": 3.694107639228005e-06, "loss": 12.0535, "step": 312850 }, { "epoch": 0.6319969941458566, "grad_norm": 46.40886306762695, "learning_rate": 3.6937706924512175e-06, "loss": 13.2532, "step": 312860 }, { "epoch": 0.6320171947785405, "grad_norm": 576.8805541992188, "learning_rate": 3.6934337520408313e-06, "loss": 11.5144, "step": 312870 }, { "epoch": 0.6320373954112243, "grad_norm": 299.54864501953125, "learning_rate": 3.6930968179984905e-06, "loss": 23.6593, "step": 312880 }, { "epoch": 0.6320575960439081, "grad_norm": 122.38074493408203, "learning_rate": 3.6927598903258375e-06, "loss": 14.0141, "step": 312890 }, { "epoch": 0.6320777966765919, "grad_norm": 150.1681671142578, "learning_rate": 3.6924229690245163e-06, "loss": 11.8433, "step": 312900 }, { "epoch": 0.6320979973092757, "grad_norm": 588.6844482421875, "learning_rate": 3.6920860540961656e-06, "loss": 18.118, "step": 312910 }, { "epoch": 0.6321181979419596, "grad_norm": 159.3857421875, "learning_rate": 3.6917491455424285e-06, "loss": 14.1074, "step": 312920 }, { "epoch": 0.6321383985746434, "grad_norm": 338.532958984375, "learning_rate": 3.691412243364949e-06, "loss": 22.701, "step": 312930 }, { "epoch": 0.6321585992073272, "grad_norm": 129.49403381347656, "learning_rate": 3.691075347565366e-06, "loss": 16.3717, "step": 312940 }, { "epoch": 0.632178799840011, "grad_norm": 233.03042602539062, "learning_rate": 3.690738458145322e-06, "loss": 38.7909, "step": 312950 }, { "epoch": 0.6321990004726948, "grad_norm": 638.060546875, "learning_rate": 3.6904015751064637e-06, "loss": 25.8626, "step": 312960 }, { "epoch": 0.6322192011053787, "grad_norm": 21.61378288269043, "learning_rate": 3.690064698450425e-06, "loss": 22.3261, "step": 312970 }, { "epoch": 0.6322394017380625, "grad_norm": 412.9071350097656, "learning_rate": 3.689727828178854e-06, "loss": 9.898, "step": 312980 }, { "epoch": 0.6322596023707463, "grad_norm": 496.47308349609375, "learning_rate": 3.689390964293391e-06, "loss": 9.9742, "step": 312990 }, { "epoch": 0.6322798030034301, "grad_norm": 352.8420104980469, "learning_rate": 3.6890541067956775e-06, "loss": 11.8739, "step": 313000 }, { "epoch": 0.6323000036361138, "grad_norm": 387.5815124511719, "learning_rate": 3.6887172556873545e-06, "loss": 17.3686, "step": 313010 }, { "epoch": 0.6323202042687976, "grad_norm": 157.61289978027344, "learning_rate": 3.688380410970066e-06, "loss": 21.7196, "step": 313020 }, { "epoch": 0.6323404049014815, "grad_norm": 476.6892395019531, "learning_rate": 3.68804357264545e-06, "loss": 22.1875, "step": 313030 }, { "epoch": 0.6323606055341653, "grad_norm": 220.13218688964844, "learning_rate": 3.6877067407151514e-06, "loss": 19.917, "step": 313040 }, { "epoch": 0.6323808061668491, "grad_norm": 779.32275390625, "learning_rate": 3.6873699151808105e-06, "loss": 17.107, "step": 313050 }, { "epoch": 0.6324010067995329, "grad_norm": 373.5910949707031, "learning_rate": 3.6870330960440713e-06, "loss": 27.1193, "step": 313060 }, { "epoch": 0.6324212074322167, "grad_norm": 211.95901489257812, "learning_rate": 3.686696283306572e-06, "loss": 10.6057, "step": 313070 }, { "epoch": 0.6324414080649006, "grad_norm": 186.3962860107422, "learning_rate": 3.686359476969957e-06, "loss": 8.7685, "step": 313080 }, { "epoch": 0.6324616086975844, "grad_norm": 844.3349609375, "learning_rate": 3.6860226770358663e-06, "loss": 25.5248, "step": 313090 }, { "epoch": 0.6324818093302682, "grad_norm": 560.161376953125, "learning_rate": 3.68568588350594e-06, "loss": 13.9366, "step": 313100 }, { "epoch": 0.632502009962952, "grad_norm": 159.23397827148438, "learning_rate": 3.6853490963818224e-06, "loss": 16.6956, "step": 313110 }, { "epoch": 0.6325222105956358, "grad_norm": 828.654052734375, "learning_rate": 3.6850123156651544e-06, "loss": 23.5992, "step": 313120 }, { "epoch": 0.6325424112283197, "grad_norm": 289.0027160644531, "learning_rate": 3.6846755413575764e-06, "loss": 17.886, "step": 313130 }, { "epoch": 0.6325626118610035, "grad_norm": 553.8969116210938, "learning_rate": 3.6843387734607304e-06, "loss": 15.7221, "step": 313140 }, { "epoch": 0.6325828124936873, "grad_norm": 135.4904022216797, "learning_rate": 3.684002011976259e-06, "loss": 10.7774, "step": 313150 }, { "epoch": 0.6326030131263711, "grad_norm": 449.1959533691406, "learning_rate": 3.6836652569057994e-06, "loss": 21.7374, "step": 313160 }, { "epoch": 0.632623213759055, "grad_norm": 357.0682373046875, "learning_rate": 3.6833285082509962e-06, "loss": 14.2281, "step": 313170 }, { "epoch": 0.6326434143917388, "grad_norm": 202.2461700439453, "learning_rate": 3.682991766013493e-06, "loss": 15.7554, "step": 313180 }, { "epoch": 0.6326636150244226, "grad_norm": 1196.7135009765625, "learning_rate": 3.6826550301949248e-06, "loss": 20.5433, "step": 313190 }, { "epoch": 0.6326838156571064, "grad_norm": 240.29473876953125, "learning_rate": 3.6823183007969375e-06, "loss": 33.4275, "step": 313200 }, { "epoch": 0.6327040162897902, "grad_norm": 432.63262939453125, "learning_rate": 3.681981577821171e-06, "loss": 15.9644, "step": 313210 }, { "epoch": 0.632724216922474, "grad_norm": 469.47955322265625, "learning_rate": 3.681644861269267e-06, "loss": 13.7639, "step": 313220 }, { "epoch": 0.6327444175551579, "grad_norm": 729.5411376953125, "learning_rate": 3.681308151142866e-06, "loss": 18.7102, "step": 313230 }, { "epoch": 0.6327646181878417, "grad_norm": 130.8728485107422, "learning_rate": 3.6809714474436075e-06, "loss": 27.1396, "step": 313240 }, { "epoch": 0.6327848188205255, "grad_norm": 280.59307861328125, "learning_rate": 3.680634750173137e-06, "loss": 15.3041, "step": 313250 }, { "epoch": 0.6328050194532093, "grad_norm": 106.77202606201172, "learning_rate": 3.6802980593330893e-06, "loss": 23.9739, "step": 313260 }, { "epoch": 0.632825220085893, "grad_norm": 443.0906066894531, "learning_rate": 3.6799613749251105e-06, "loss": 10.7108, "step": 313270 }, { "epoch": 0.6328454207185769, "grad_norm": 91.66646575927734, "learning_rate": 3.6796246969508408e-06, "loss": 23.3665, "step": 313280 }, { "epoch": 0.6328656213512607, "grad_norm": 612.2748413085938, "learning_rate": 3.6792880254119195e-06, "loss": 21.3941, "step": 313290 }, { "epoch": 0.6328858219839445, "grad_norm": 499.7066650390625, "learning_rate": 3.678951360309988e-06, "loss": 19.2343, "step": 313300 }, { "epoch": 0.6329060226166283, "grad_norm": 330.5376892089844, "learning_rate": 3.678614701646688e-06, "loss": 15.7366, "step": 313310 }, { "epoch": 0.6329262232493121, "grad_norm": 242.13572692871094, "learning_rate": 3.678278049423659e-06, "loss": 18.0754, "step": 313320 }, { "epoch": 0.632946423881996, "grad_norm": 445.71099853515625, "learning_rate": 3.677941403642541e-06, "loss": 22.9995, "step": 313330 }, { "epoch": 0.6329666245146798, "grad_norm": 293.3953857421875, "learning_rate": 3.6776047643049777e-06, "loss": 15.1248, "step": 313340 }, { "epoch": 0.6329868251473636, "grad_norm": 626.185302734375, "learning_rate": 3.6772681314126097e-06, "loss": 20.843, "step": 313350 }, { "epoch": 0.6330070257800474, "grad_norm": 174.92031860351562, "learning_rate": 3.676931504967075e-06, "loss": 16.2623, "step": 313360 }, { "epoch": 0.6330272264127312, "grad_norm": 381.0513610839844, "learning_rate": 3.6765948849700155e-06, "loss": 25.8901, "step": 313370 }, { "epoch": 0.6330474270454151, "grad_norm": 316.2397766113281, "learning_rate": 3.6762582714230733e-06, "loss": 23.8412, "step": 313380 }, { "epoch": 0.6330676276780989, "grad_norm": 478.4018249511719, "learning_rate": 3.6759216643278865e-06, "loss": 33.0218, "step": 313390 }, { "epoch": 0.6330878283107827, "grad_norm": 296.0896301269531, "learning_rate": 3.6755850636860956e-06, "loss": 34.0951, "step": 313400 }, { "epoch": 0.6331080289434665, "grad_norm": 359.9607238769531, "learning_rate": 3.675248469499346e-06, "loss": 47.9103, "step": 313410 }, { "epoch": 0.6331282295761503, "grad_norm": 132.58859252929688, "learning_rate": 3.674911881769272e-06, "loss": 8.2303, "step": 313420 }, { "epoch": 0.6331484302088342, "grad_norm": 313.1063232421875, "learning_rate": 3.674575300497517e-06, "loss": 21.5348, "step": 313430 }, { "epoch": 0.633168630841518, "grad_norm": 2743.5791015625, "learning_rate": 3.6742387256857224e-06, "loss": 32.8768, "step": 313440 }, { "epoch": 0.6331888314742018, "grad_norm": 144.70936584472656, "learning_rate": 3.6739021573355273e-06, "loss": 18.3357, "step": 313450 }, { "epoch": 0.6332090321068856, "grad_norm": 155.98593139648438, "learning_rate": 3.673565595448572e-06, "loss": 15.6992, "step": 313460 }, { "epoch": 0.6332292327395694, "grad_norm": 189.23052978515625, "learning_rate": 3.673229040026497e-06, "loss": 20.7769, "step": 313470 }, { "epoch": 0.6332494333722533, "grad_norm": 324.03497314453125, "learning_rate": 3.672892491070943e-06, "loss": 25.4356, "step": 313480 }, { "epoch": 0.6332696340049371, "grad_norm": 125.66798400878906, "learning_rate": 3.672555948583548e-06, "loss": 19.2222, "step": 313490 }, { "epoch": 0.6332898346376209, "grad_norm": 839.9357299804688, "learning_rate": 3.672219412565956e-06, "loss": 32.5873, "step": 313500 }, { "epoch": 0.6333100352703047, "grad_norm": 76.7645263671875, "learning_rate": 3.671882883019806e-06, "loss": 8.779, "step": 313510 }, { "epoch": 0.6333302359029885, "grad_norm": 546.8753662109375, "learning_rate": 3.6715463599467372e-06, "loss": 23.4845, "step": 313520 }, { "epoch": 0.6333504365356722, "grad_norm": 115.98934936523438, "learning_rate": 3.67120984334839e-06, "loss": 15.4719, "step": 313530 }, { "epoch": 0.6333706371683561, "grad_norm": 333.93670654296875, "learning_rate": 3.670873333226407e-06, "loss": 16.8072, "step": 313540 }, { "epoch": 0.6333908378010399, "grad_norm": 748.4852905273438, "learning_rate": 3.670536829582424e-06, "loss": 26.3143, "step": 313550 }, { "epoch": 0.6334110384337237, "grad_norm": 1537.15185546875, "learning_rate": 3.6702003324180823e-06, "loss": 23.7573, "step": 313560 }, { "epoch": 0.6334312390664075, "grad_norm": 238.7601776123047, "learning_rate": 3.669863841735026e-06, "loss": 11.5274, "step": 313570 }, { "epoch": 0.6334514396990913, "grad_norm": 291.588134765625, "learning_rate": 3.669527357534889e-06, "loss": 24.5547, "step": 313580 }, { "epoch": 0.6334716403317752, "grad_norm": 350.3074951171875, "learning_rate": 3.6691908798193155e-06, "loss": 9.7203, "step": 313590 }, { "epoch": 0.633491840964459, "grad_norm": 444.2533264160156, "learning_rate": 3.668854408589945e-06, "loss": 21.6721, "step": 313600 }, { "epoch": 0.6335120415971428, "grad_norm": 212.96951293945312, "learning_rate": 3.668517943848416e-06, "loss": 20.951, "step": 313610 }, { "epoch": 0.6335322422298266, "grad_norm": 250.44496154785156, "learning_rate": 3.6681814855963687e-06, "loss": 15.8623, "step": 313620 }, { "epoch": 0.6335524428625104, "grad_norm": 695.5618896484375, "learning_rate": 3.6678450338354443e-06, "loss": 25.7222, "step": 313630 }, { "epoch": 0.6335726434951943, "grad_norm": 183.17965698242188, "learning_rate": 3.667508588567281e-06, "loss": 11.9885, "step": 313640 }, { "epoch": 0.6335928441278781, "grad_norm": 7.7826128005981445, "learning_rate": 3.6671721497935177e-06, "loss": 28.6275, "step": 313650 }, { "epoch": 0.6336130447605619, "grad_norm": 606.100830078125, "learning_rate": 3.6668357175157974e-06, "loss": 18.2872, "step": 313660 }, { "epoch": 0.6336332453932457, "grad_norm": 383.4219970703125, "learning_rate": 3.666499291735759e-06, "loss": 13.3002, "step": 313670 }, { "epoch": 0.6336534460259295, "grad_norm": 191.7081756591797, "learning_rate": 3.66616287245504e-06, "loss": 14.0361, "step": 313680 }, { "epoch": 0.6336736466586134, "grad_norm": 527.5343627929688, "learning_rate": 3.6658264596752814e-06, "loss": 21.7557, "step": 313690 }, { "epoch": 0.6336938472912972, "grad_norm": 122.24197387695312, "learning_rate": 3.6654900533981234e-06, "loss": 34.4489, "step": 313700 }, { "epoch": 0.633714047923981, "grad_norm": 390.60882568359375, "learning_rate": 3.6651536536252047e-06, "loss": 13.396, "step": 313710 }, { "epoch": 0.6337342485566648, "grad_norm": 253.22512817382812, "learning_rate": 3.664817260358164e-06, "loss": 9.8017, "step": 313720 }, { "epoch": 0.6337544491893486, "grad_norm": 384.0865173339844, "learning_rate": 3.6644808735986437e-06, "loss": 16.8831, "step": 313730 }, { "epoch": 0.6337746498220325, "grad_norm": 350.7376403808594, "learning_rate": 3.664144493348281e-06, "loss": 10.132, "step": 313740 }, { "epoch": 0.6337948504547163, "grad_norm": 428.6234436035156, "learning_rate": 3.663808119608716e-06, "loss": 18.2588, "step": 313750 }, { "epoch": 0.6338150510874001, "grad_norm": 441.23016357421875, "learning_rate": 3.663471752381589e-06, "loss": 9.8741, "step": 313760 }, { "epoch": 0.6338352517200839, "grad_norm": 530.990234375, "learning_rate": 3.663135391668538e-06, "loss": 31.5144, "step": 313770 }, { "epoch": 0.6338554523527676, "grad_norm": 170.01712036132812, "learning_rate": 3.662799037471201e-06, "loss": 9.309, "step": 313780 }, { "epoch": 0.6338756529854515, "grad_norm": 442.9435729980469, "learning_rate": 3.6624626897912213e-06, "loss": 12.849, "step": 313790 }, { "epoch": 0.6338958536181353, "grad_norm": 328.24481201171875, "learning_rate": 3.6621263486302373e-06, "loss": 15.3321, "step": 313800 }, { "epoch": 0.6339160542508191, "grad_norm": 589.2186889648438, "learning_rate": 3.6617900139898854e-06, "loss": 12.429, "step": 313810 }, { "epoch": 0.6339362548835029, "grad_norm": 172.483642578125, "learning_rate": 3.6614536858718074e-06, "loss": 28.6527, "step": 313820 }, { "epoch": 0.6339564555161867, "grad_norm": 405.9244079589844, "learning_rate": 3.661117364277642e-06, "loss": 18.1824, "step": 313830 }, { "epoch": 0.6339766561488706, "grad_norm": 183.97055053710938, "learning_rate": 3.6607810492090278e-06, "loss": 9.8192, "step": 313840 }, { "epoch": 0.6339968567815544, "grad_norm": 68.43452453613281, "learning_rate": 3.6604447406676036e-06, "loss": 13.4357, "step": 313850 }, { "epoch": 0.6340170574142382, "grad_norm": 515.4369506835938, "learning_rate": 3.6601084386550117e-06, "loss": 16.5309, "step": 313860 }, { "epoch": 0.634037258046922, "grad_norm": 319.5745849609375, "learning_rate": 3.659772143172886e-06, "loss": 24.3469, "step": 313870 }, { "epoch": 0.6340574586796058, "grad_norm": 262.00408935546875, "learning_rate": 3.659435854222869e-06, "loss": 28.8416, "step": 313880 }, { "epoch": 0.6340776593122897, "grad_norm": 344.2080383300781, "learning_rate": 3.6590995718066003e-06, "loss": 18.1119, "step": 313890 }, { "epoch": 0.6340978599449735, "grad_norm": 244.47593688964844, "learning_rate": 3.6587632959257168e-06, "loss": 17.9972, "step": 313900 }, { "epoch": 0.6341180605776573, "grad_norm": 657.9804077148438, "learning_rate": 3.658427026581858e-06, "loss": 26.6822, "step": 313910 }, { "epoch": 0.6341382612103411, "grad_norm": 283.5820617675781, "learning_rate": 3.6580907637766642e-06, "loss": 11.8351, "step": 313920 }, { "epoch": 0.6341584618430249, "grad_norm": 154.59458923339844, "learning_rate": 3.657754507511773e-06, "loss": 19.1057, "step": 313930 }, { "epoch": 0.6341786624757088, "grad_norm": 435.4707336425781, "learning_rate": 3.657418257788822e-06, "loss": 20.9711, "step": 313940 }, { "epoch": 0.6341988631083926, "grad_norm": 15.950176239013672, "learning_rate": 3.657082014609452e-06, "loss": 10.2715, "step": 313950 }, { "epoch": 0.6342190637410764, "grad_norm": 365.6403503417969, "learning_rate": 3.656745777975303e-06, "loss": 18.9155, "step": 313960 }, { "epoch": 0.6342392643737602, "grad_norm": 159.98983764648438, "learning_rate": 3.6564095478880114e-06, "loss": 17.8937, "step": 313970 }, { "epoch": 0.634259465006444, "grad_norm": 179.79183959960938, "learning_rate": 3.656073324349216e-06, "loss": 9.7839, "step": 313980 }, { "epoch": 0.6342796656391279, "grad_norm": 36.0970344543457, "learning_rate": 3.6557371073605574e-06, "loss": 12.2711, "step": 313990 }, { "epoch": 0.6342998662718117, "grad_norm": 266.9267272949219, "learning_rate": 3.655400896923672e-06, "loss": 29.3467, "step": 314000 }, { "epoch": 0.6343200669044955, "grad_norm": 135.0736083984375, "learning_rate": 3.655064693040199e-06, "loss": 40.2592, "step": 314010 }, { "epoch": 0.6343402675371793, "grad_norm": 228.64602661132812, "learning_rate": 3.6547284957117805e-06, "loss": 22.6136, "step": 314020 }, { "epoch": 0.6343604681698631, "grad_norm": 495.4220886230469, "learning_rate": 3.6543923049400487e-06, "loss": 22.3554, "step": 314030 }, { "epoch": 0.6343806688025468, "grad_norm": 297.6684875488281, "learning_rate": 3.6540561207266475e-06, "loss": 9.3782, "step": 314040 }, { "epoch": 0.6344008694352307, "grad_norm": 150.00657653808594, "learning_rate": 3.653719943073214e-06, "loss": 17.1163, "step": 314050 }, { "epoch": 0.6344210700679145, "grad_norm": 582.0066528320312, "learning_rate": 3.653383771981385e-06, "loss": 20.8307, "step": 314060 }, { "epoch": 0.6344412707005983, "grad_norm": 614.0116577148438, "learning_rate": 3.6530476074528005e-06, "loss": 25.7003, "step": 314070 }, { "epoch": 0.6344614713332821, "grad_norm": 305.2926330566406, "learning_rate": 3.652711449489099e-06, "loss": 23.6369, "step": 314080 }, { "epoch": 0.6344816719659659, "grad_norm": 743.9189453125, "learning_rate": 3.6523752980919183e-06, "loss": 25.1215, "step": 314090 }, { "epoch": 0.6345018725986498, "grad_norm": 246.1226348876953, "learning_rate": 3.6520391532628953e-06, "loss": 8.944, "step": 314100 }, { "epoch": 0.6345220732313336, "grad_norm": 331.6296081542969, "learning_rate": 3.6517030150036716e-06, "loss": 18.8529, "step": 314110 }, { "epoch": 0.6345422738640174, "grad_norm": 249.5379180908203, "learning_rate": 3.6513668833158846e-06, "loss": 18.1074, "step": 314120 }, { "epoch": 0.6345624744967012, "grad_norm": 305.54498291015625, "learning_rate": 3.6510307582011706e-06, "loss": 8.4351, "step": 314130 }, { "epoch": 0.634582675129385, "grad_norm": 318.6712951660156, "learning_rate": 3.650694639661169e-06, "loss": 12.2587, "step": 314140 }, { "epoch": 0.6346028757620689, "grad_norm": 1094.7587890625, "learning_rate": 3.6503585276975196e-06, "loss": 21.2978, "step": 314150 }, { "epoch": 0.6346230763947527, "grad_norm": 384.2978515625, "learning_rate": 3.6500224223118576e-06, "loss": 17.935, "step": 314160 }, { "epoch": 0.6346432770274365, "grad_norm": 781.41796875, "learning_rate": 3.6496863235058223e-06, "loss": 39.0964, "step": 314170 }, { "epoch": 0.6346634776601203, "grad_norm": 237.30960083007812, "learning_rate": 3.649350231281054e-06, "loss": 10.8582, "step": 314180 }, { "epoch": 0.6346836782928041, "grad_norm": 107.89735412597656, "learning_rate": 3.6490141456391864e-06, "loss": 19.7089, "step": 314190 }, { "epoch": 0.634703878925488, "grad_norm": 555.4315185546875, "learning_rate": 3.648678066581861e-06, "loss": 36.4434, "step": 314200 }, { "epoch": 0.6347240795581718, "grad_norm": 184.58253479003906, "learning_rate": 3.6483419941107156e-06, "loss": 15.3814, "step": 314210 }, { "epoch": 0.6347442801908556, "grad_norm": 252.2710418701172, "learning_rate": 3.6480059282273872e-06, "loss": 11.9636, "step": 314220 }, { "epoch": 0.6347644808235394, "grad_norm": 151.30789184570312, "learning_rate": 3.647669868933513e-06, "loss": 14.5175, "step": 314230 }, { "epoch": 0.6347846814562232, "grad_norm": 313.2535705566406, "learning_rate": 3.6473338162307314e-06, "loss": 20.2478, "step": 314240 }, { "epoch": 0.6348048820889071, "grad_norm": 66.79661560058594, "learning_rate": 3.6469977701206833e-06, "loss": 33.3599, "step": 314250 }, { "epoch": 0.6348250827215909, "grad_norm": 209.22415161132812, "learning_rate": 3.6466617306050014e-06, "loss": 15.8479, "step": 314260 }, { "epoch": 0.6348452833542747, "grad_norm": 349.41436767578125, "learning_rate": 3.646325697685327e-06, "loss": 19.6468, "step": 314270 }, { "epoch": 0.6348654839869585, "grad_norm": 198.7777099609375, "learning_rate": 3.645989671363297e-06, "loss": 14.5404, "step": 314280 }, { "epoch": 0.6348856846196422, "grad_norm": 153.7720947265625, "learning_rate": 3.6456536516405494e-06, "loss": 13.566, "step": 314290 }, { "epoch": 0.634905885252326, "grad_norm": 359.6976013183594, "learning_rate": 3.645317638518721e-06, "loss": 14.9885, "step": 314300 }, { "epoch": 0.6349260858850099, "grad_norm": 0.0, "learning_rate": 3.6449816319994512e-06, "loss": 11.6684, "step": 314310 }, { "epoch": 0.6349462865176937, "grad_norm": 245.663330078125, "learning_rate": 3.644645632084376e-06, "loss": 22.7374, "step": 314320 }, { "epoch": 0.6349664871503775, "grad_norm": 316.9748840332031, "learning_rate": 3.644309638775132e-06, "loss": 20.8261, "step": 314330 }, { "epoch": 0.6349866877830613, "grad_norm": 382.9841613769531, "learning_rate": 3.6439736520733606e-06, "loss": 10.0096, "step": 314340 }, { "epoch": 0.6350068884157452, "grad_norm": 476.1699523925781, "learning_rate": 3.6436376719806965e-06, "loss": 26.7912, "step": 314350 }, { "epoch": 0.635027089048429, "grad_norm": 326.27081298828125, "learning_rate": 3.6433016984987774e-06, "loss": 22.6489, "step": 314360 }, { "epoch": 0.6350472896811128, "grad_norm": 296.7281494140625, "learning_rate": 3.642965731629242e-06, "loss": 19.9827, "step": 314370 }, { "epoch": 0.6350674903137966, "grad_norm": 498.3815612792969, "learning_rate": 3.6426297713737268e-06, "loss": 22.878, "step": 314380 }, { "epoch": 0.6350876909464804, "grad_norm": 473.14617919921875, "learning_rate": 3.6422938177338695e-06, "loss": 13.6374, "step": 314390 }, { "epoch": 0.6351078915791643, "grad_norm": 576.0936279296875, "learning_rate": 3.6419578707113055e-06, "loss": 19.8624, "step": 314400 }, { "epoch": 0.6351280922118481, "grad_norm": 462.81964111328125, "learning_rate": 3.6416219303076772e-06, "loss": 13.8512, "step": 314410 }, { "epoch": 0.6351482928445319, "grad_norm": 911.2876586914062, "learning_rate": 3.6412859965246173e-06, "loss": 25.2457, "step": 314420 }, { "epoch": 0.6351684934772157, "grad_norm": 304.0901794433594, "learning_rate": 3.640950069363765e-06, "loss": 18.7084, "step": 314430 }, { "epoch": 0.6351886941098995, "grad_norm": 731.5963745117188, "learning_rate": 3.6406141488267575e-06, "loss": 17.0662, "step": 314440 }, { "epoch": 0.6352088947425834, "grad_norm": 271.031005859375, "learning_rate": 3.640278234915232e-06, "loss": 24.9593, "step": 314450 }, { "epoch": 0.6352290953752672, "grad_norm": 142.2168731689453, "learning_rate": 3.639942327630823e-06, "loss": 27.8881, "step": 314460 }, { "epoch": 0.635249296007951, "grad_norm": 344.64324951171875, "learning_rate": 3.6396064269751747e-06, "loss": 17.679, "step": 314470 }, { "epoch": 0.6352694966406348, "grad_norm": 387.39263916015625, "learning_rate": 3.6392705329499156e-06, "loss": 17.0301, "step": 314480 }, { "epoch": 0.6352896972733186, "grad_norm": 417.09710693359375, "learning_rate": 3.638934645556688e-06, "loss": 18.0962, "step": 314490 }, { "epoch": 0.6353098979060025, "grad_norm": 494.6003112792969, "learning_rate": 3.6385987647971287e-06, "loss": 15.4787, "step": 314500 }, { "epoch": 0.6353300985386863, "grad_norm": 480.74456787109375, "learning_rate": 3.6382628906728735e-06, "loss": 16.8352, "step": 314510 }, { "epoch": 0.6353502991713701, "grad_norm": 347.3243713378906, "learning_rate": 3.63792702318556e-06, "loss": 22.1253, "step": 314520 }, { "epoch": 0.6353704998040539, "grad_norm": 115.21092987060547, "learning_rate": 3.6375911623368252e-06, "loss": 13.5392, "step": 314530 }, { "epoch": 0.6353907004367377, "grad_norm": 289.0970153808594, "learning_rate": 3.637255308128305e-06, "loss": 10.7989, "step": 314540 }, { "epoch": 0.6354109010694214, "grad_norm": 370.20806884765625, "learning_rate": 3.6369194605616364e-06, "loss": 11.1195, "step": 314550 }, { "epoch": 0.6354311017021053, "grad_norm": 217.1336212158203, "learning_rate": 3.636583619638458e-06, "loss": 38.4268, "step": 314560 }, { "epoch": 0.6354513023347891, "grad_norm": 652.8502807617188, "learning_rate": 3.6362477853604066e-06, "loss": 24.1619, "step": 314570 }, { "epoch": 0.6354715029674729, "grad_norm": 674.3751220703125, "learning_rate": 3.635911957729117e-06, "loss": 17.7115, "step": 314580 }, { "epoch": 0.6354917036001567, "grad_norm": 299.0431213378906, "learning_rate": 3.6355761367462274e-06, "loss": 35.0862, "step": 314590 }, { "epoch": 0.6355119042328405, "grad_norm": 600.8801879882812, "learning_rate": 3.635240322413375e-06, "loss": 26.1539, "step": 314600 }, { "epoch": 0.6355321048655244, "grad_norm": 536.4166870117188, "learning_rate": 3.634904514732195e-06, "loss": 21.8917, "step": 314610 }, { "epoch": 0.6355523054982082, "grad_norm": 150.32162475585938, "learning_rate": 3.634568713704323e-06, "loss": 23.1291, "step": 314620 }, { "epoch": 0.635572506130892, "grad_norm": 430.8785095214844, "learning_rate": 3.634232919331401e-06, "loss": 24.9513, "step": 314630 }, { "epoch": 0.6355927067635758, "grad_norm": 410.3782653808594, "learning_rate": 3.6338971316150593e-06, "loss": 17.2716, "step": 314640 }, { "epoch": 0.6356129073962596, "grad_norm": 171.90130615234375, "learning_rate": 3.6335613505569386e-06, "loss": 15.6909, "step": 314650 }, { "epoch": 0.6356331080289435, "grad_norm": 330.6631164550781, "learning_rate": 3.6332255761586745e-06, "loss": 33.5184, "step": 314660 }, { "epoch": 0.6356533086616273, "grad_norm": 41.728023529052734, "learning_rate": 3.6328898084219023e-06, "loss": 24.4267, "step": 314670 }, { "epoch": 0.6356735092943111, "grad_norm": 580.1731567382812, "learning_rate": 3.632554047348259e-06, "loss": 20.6272, "step": 314680 }, { "epoch": 0.6356937099269949, "grad_norm": 781.712890625, "learning_rate": 3.6322182929393833e-06, "loss": 13.5489, "step": 314690 }, { "epoch": 0.6357139105596787, "grad_norm": 38.953609466552734, "learning_rate": 3.6318825451969085e-06, "loss": 11.9947, "step": 314700 }, { "epoch": 0.6357341111923626, "grad_norm": 680.6676025390625, "learning_rate": 3.631546804122471e-06, "loss": 33.3907, "step": 314710 }, { "epoch": 0.6357543118250464, "grad_norm": 690.2257080078125, "learning_rate": 3.6312110697177095e-06, "loss": 27.6252, "step": 314720 }, { "epoch": 0.6357745124577302, "grad_norm": 198.4851837158203, "learning_rate": 3.63087534198426e-06, "loss": 17.0928, "step": 314730 }, { "epoch": 0.635794713090414, "grad_norm": 180.5613555908203, "learning_rate": 3.630539620923757e-06, "loss": 16.4493, "step": 314740 }, { "epoch": 0.6358149137230978, "grad_norm": 272.0371398925781, "learning_rate": 3.630203906537838e-06, "loss": 20.4483, "step": 314750 }, { "epoch": 0.6358351143557817, "grad_norm": 234.10031127929688, "learning_rate": 3.6298681988281405e-06, "loss": 23.7162, "step": 314760 }, { "epoch": 0.6358553149884655, "grad_norm": 139.9898681640625, "learning_rate": 3.6295324977962976e-06, "loss": 21.5939, "step": 314770 }, { "epoch": 0.6358755156211493, "grad_norm": 27.606124877929688, "learning_rate": 3.6291968034439463e-06, "loss": 14.8901, "step": 314780 }, { "epoch": 0.6358957162538331, "grad_norm": 286.2486267089844, "learning_rate": 3.628861115772726e-06, "loss": 18.0712, "step": 314790 }, { "epoch": 0.6359159168865169, "grad_norm": 76.69282531738281, "learning_rate": 3.628525434784268e-06, "loss": 23.0269, "step": 314800 }, { "epoch": 0.6359361175192006, "grad_norm": 8.483139991760254, "learning_rate": 3.6281897604802113e-06, "loss": 20.4712, "step": 314810 }, { "epoch": 0.6359563181518845, "grad_norm": 345.8614807128906, "learning_rate": 3.6278540928621927e-06, "loss": 29.5564, "step": 314820 }, { "epoch": 0.6359765187845683, "grad_norm": 227.73464965820312, "learning_rate": 3.6275184319318456e-06, "loss": 9.1113, "step": 314830 }, { "epoch": 0.6359967194172521, "grad_norm": 468.9640808105469, "learning_rate": 3.627182777690807e-06, "loss": 7.3737, "step": 314840 }, { "epoch": 0.6360169200499359, "grad_norm": 416.2913513183594, "learning_rate": 3.6268471301407127e-06, "loss": 13.9147, "step": 314850 }, { "epoch": 0.6360371206826197, "grad_norm": 251.77516174316406, "learning_rate": 3.626511489283201e-06, "loss": 18.7787, "step": 314860 }, { "epoch": 0.6360573213153036, "grad_norm": 70.39207458496094, "learning_rate": 3.6261758551199033e-06, "loss": 26.701, "step": 314870 }, { "epoch": 0.6360775219479874, "grad_norm": 114.36405944824219, "learning_rate": 3.6258402276524585e-06, "loss": 14.7027, "step": 314880 }, { "epoch": 0.6360977225806712, "grad_norm": 881.9510498046875, "learning_rate": 3.6255046068825035e-06, "loss": 20.1666, "step": 314890 }, { "epoch": 0.636117923213355, "grad_norm": 692.839599609375, "learning_rate": 3.625168992811671e-06, "loss": 15.1088, "step": 314900 }, { "epoch": 0.6361381238460388, "grad_norm": 71.59687805175781, "learning_rate": 3.6248333854415975e-06, "loss": 13.9184, "step": 314910 }, { "epoch": 0.6361583244787227, "grad_norm": 435.7094421386719, "learning_rate": 3.624497784773921e-06, "loss": 22.1059, "step": 314920 }, { "epoch": 0.6361785251114065, "grad_norm": 15.940292358398438, "learning_rate": 3.624162190810274e-06, "loss": 12.5004, "step": 314930 }, { "epoch": 0.6361987257440903, "grad_norm": 485.35101318359375, "learning_rate": 3.623826603552293e-06, "loss": 20.1395, "step": 314940 }, { "epoch": 0.6362189263767741, "grad_norm": 307.1765441894531, "learning_rate": 3.6234910230016173e-06, "loss": 10.4658, "step": 314950 }, { "epoch": 0.636239127009458, "grad_norm": 320.6990966796875, "learning_rate": 3.6231554491598766e-06, "loss": 17.335, "step": 314960 }, { "epoch": 0.6362593276421418, "grad_norm": 1441.4237060546875, "learning_rate": 3.622819882028709e-06, "loss": 22.589, "step": 314970 }, { "epoch": 0.6362795282748256, "grad_norm": 150.527099609375, "learning_rate": 3.6224843216097526e-06, "loss": 10.7138, "step": 314980 }, { "epoch": 0.6362997289075094, "grad_norm": 171.6363983154297, "learning_rate": 3.6221487679046384e-06, "loss": 41.1594, "step": 314990 }, { "epoch": 0.6363199295401932, "grad_norm": 53.00648880004883, "learning_rate": 3.6218132209150047e-06, "loss": 13.5146, "step": 315000 }, { "epoch": 0.636340130172877, "grad_norm": 488.0824279785156, "learning_rate": 3.621477680642486e-06, "loss": 19.003, "step": 315010 }, { "epoch": 0.6363603308055609, "grad_norm": 819.4788818359375, "learning_rate": 3.6211421470887187e-06, "loss": 30.1531, "step": 315020 }, { "epoch": 0.6363805314382447, "grad_norm": 68.24623107910156, "learning_rate": 3.620806620255336e-06, "loss": 16.0678, "step": 315030 }, { "epoch": 0.6364007320709285, "grad_norm": 411.6278381347656, "learning_rate": 3.6204711001439754e-06, "loss": 19.2273, "step": 315040 }, { "epoch": 0.6364209327036123, "grad_norm": 56.952491760253906, "learning_rate": 3.6201355867562725e-06, "loss": 18.4849, "step": 315050 }, { "epoch": 0.636441133336296, "grad_norm": 507.9949951171875, "learning_rate": 3.61980008009386e-06, "loss": 15.0319, "step": 315060 }, { "epoch": 0.6364613339689799, "grad_norm": 265.7088623046875, "learning_rate": 3.6194645801583745e-06, "loss": 18.3823, "step": 315070 }, { "epoch": 0.6364815346016637, "grad_norm": 573.2406005859375, "learning_rate": 3.6191290869514523e-06, "loss": 17.9595, "step": 315080 }, { "epoch": 0.6365017352343475, "grad_norm": 827.1715087890625, "learning_rate": 3.6187936004747248e-06, "loss": 16.9535, "step": 315090 }, { "epoch": 0.6365219358670313, "grad_norm": 414.61083984375, "learning_rate": 3.618458120729832e-06, "loss": 8.8065, "step": 315100 }, { "epoch": 0.6365421364997151, "grad_norm": 84.46508026123047, "learning_rate": 3.6181226477184074e-06, "loss": 20.7389, "step": 315110 }, { "epoch": 0.636562337132399, "grad_norm": 683.1246948242188, "learning_rate": 3.617787181442084e-06, "loss": 19.4811, "step": 315120 }, { "epoch": 0.6365825377650828, "grad_norm": 314.67730712890625, "learning_rate": 3.6174517219024985e-06, "loss": 34.0484, "step": 315130 }, { "epoch": 0.6366027383977666, "grad_norm": 330.02545166015625, "learning_rate": 3.617116269101286e-06, "loss": 27.6005, "step": 315140 }, { "epoch": 0.6366229390304504, "grad_norm": 82.23473358154297, "learning_rate": 3.616780823040081e-06, "loss": 8.8885, "step": 315150 }, { "epoch": 0.6366431396631342, "grad_norm": 491.30096435546875, "learning_rate": 3.616445383720517e-06, "loss": 15.6178, "step": 315160 }, { "epoch": 0.6366633402958181, "grad_norm": 504.1806640625, "learning_rate": 3.616109951144231e-06, "loss": 29.5404, "step": 315170 }, { "epoch": 0.6366835409285019, "grad_norm": 309.6594543457031, "learning_rate": 3.615774525312859e-06, "loss": 11.1125, "step": 315180 }, { "epoch": 0.6367037415611857, "grad_norm": 67.88604736328125, "learning_rate": 3.6154391062280326e-06, "loss": 27.2476, "step": 315190 }, { "epoch": 0.6367239421938695, "grad_norm": 557.1927490234375, "learning_rate": 3.6151036938913887e-06, "loss": 25.8452, "step": 315200 }, { "epoch": 0.6367441428265533, "grad_norm": 500.59161376953125, "learning_rate": 3.614768288304562e-06, "loss": 14.6013, "step": 315210 }, { "epoch": 0.6367643434592372, "grad_norm": 248.150634765625, "learning_rate": 3.6144328894691854e-06, "loss": 33.7717, "step": 315220 }, { "epoch": 0.636784544091921, "grad_norm": 124.32286071777344, "learning_rate": 3.614097497386894e-06, "loss": 17.7882, "step": 315230 }, { "epoch": 0.6368047447246048, "grad_norm": 211.30921936035156, "learning_rate": 3.613762112059327e-06, "loss": 20.3821, "step": 315240 }, { "epoch": 0.6368249453572886, "grad_norm": 201.91744995117188, "learning_rate": 3.613426733488111e-06, "loss": 13.8047, "step": 315250 }, { "epoch": 0.6368451459899724, "grad_norm": 244.3423614501953, "learning_rate": 3.613091361674887e-06, "loss": 18.9554, "step": 315260 }, { "epoch": 0.6368653466226563, "grad_norm": 173.76109313964844, "learning_rate": 3.6127559966212885e-06, "loss": 15.9367, "step": 315270 }, { "epoch": 0.6368855472553401, "grad_norm": 622.0409545898438, "learning_rate": 3.6124206383289474e-06, "loss": 29.1095, "step": 315280 }, { "epoch": 0.6369057478880239, "grad_norm": 198.35536193847656, "learning_rate": 3.6120852867995003e-06, "loss": 22.4082, "step": 315290 }, { "epoch": 0.6369259485207077, "grad_norm": 682.5012817382812, "learning_rate": 3.61174994203458e-06, "loss": 17.6079, "step": 315300 }, { "epoch": 0.6369461491533915, "grad_norm": 381.4442138671875, "learning_rate": 3.611414604035825e-06, "loss": 19.0631, "step": 315310 }, { "epoch": 0.6369663497860752, "grad_norm": 244.13351440429688, "learning_rate": 3.6110792728048636e-06, "loss": 8.6647, "step": 315320 }, { "epoch": 0.6369865504187591, "grad_norm": 184.3904266357422, "learning_rate": 3.610743948343335e-06, "loss": 10.3614, "step": 315330 }, { "epoch": 0.6370067510514429, "grad_norm": 312.16729736328125, "learning_rate": 3.610408630652873e-06, "loss": 17.1388, "step": 315340 }, { "epoch": 0.6370269516841267, "grad_norm": 79.96022033691406, "learning_rate": 3.610073319735109e-06, "loss": 14.7381, "step": 315350 }, { "epoch": 0.6370471523168105, "grad_norm": 112.77809143066406, "learning_rate": 3.6097380155916795e-06, "loss": 17.2471, "step": 315360 }, { "epoch": 0.6370673529494943, "grad_norm": 729.2623291015625, "learning_rate": 3.609402718224219e-06, "loss": 18.7645, "step": 315370 }, { "epoch": 0.6370875535821782, "grad_norm": 424.6807556152344, "learning_rate": 3.6090674276343608e-06, "loss": 17.2392, "step": 315380 }, { "epoch": 0.637107754214862, "grad_norm": 431.84405517578125, "learning_rate": 3.608732143823737e-06, "loss": 12.3354, "step": 315390 }, { "epoch": 0.6371279548475458, "grad_norm": 1119.903076171875, "learning_rate": 3.608396866793988e-06, "loss": 14.1539, "step": 315400 }, { "epoch": 0.6371481554802296, "grad_norm": 506.5306396484375, "learning_rate": 3.60806159654674e-06, "loss": 24.5863, "step": 315410 }, { "epoch": 0.6371683561129134, "grad_norm": 62.214229583740234, "learning_rate": 3.607726333083633e-06, "loss": 17.4817, "step": 315420 }, { "epoch": 0.6371885567455973, "grad_norm": 669.62548828125, "learning_rate": 3.607391076406299e-06, "loss": 27.4537, "step": 315430 }, { "epoch": 0.6372087573782811, "grad_norm": 468.5104675292969, "learning_rate": 3.607055826516372e-06, "loss": 28.7736, "step": 315440 }, { "epoch": 0.6372289580109649, "grad_norm": 238.2803497314453, "learning_rate": 3.606720583415485e-06, "loss": 35.1728, "step": 315450 }, { "epoch": 0.6372491586436487, "grad_norm": 253.49220275878906, "learning_rate": 3.6063853471052724e-06, "loss": 27.1324, "step": 315460 }, { "epoch": 0.6372693592763325, "grad_norm": 42.56626510620117, "learning_rate": 3.606050117587372e-06, "loss": 14.8215, "step": 315470 }, { "epoch": 0.6372895599090164, "grad_norm": 472.00201416015625, "learning_rate": 3.605714894863411e-06, "loss": 15.7142, "step": 315480 }, { "epoch": 0.6373097605417002, "grad_norm": 421.8933410644531, "learning_rate": 3.605379678935027e-06, "loss": 25.5709, "step": 315490 }, { "epoch": 0.637329961174384, "grad_norm": 550.9469604492188, "learning_rate": 3.6050444698038547e-06, "loss": 11.3782, "step": 315500 }, { "epoch": 0.6373501618070678, "grad_norm": 549.83056640625, "learning_rate": 3.6047092674715257e-06, "loss": 20.7691, "step": 315510 }, { "epoch": 0.6373703624397516, "grad_norm": 247.39259338378906, "learning_rate": 3.6043740719396736e-06, "loss": 27.8713, "step": 315520 }, { "epoch": 0.6373905630724355, "grad_norm": 478.87542724609375, "learning_rate": 3.604038883209935e-06, "loss": 21.6742, "step": 315530 }, { "epoch": 0.6374107637051193, "grad_norm": 282.81341552734375, "learning_rate": 3.60370370128394e-06, "loss": 25.0744, "step": 315540 }, { "epoch": 0.6374309643378031, "grad_norm": 207.08831787109375, "learning_rate": 3.603368526163323e-06, "loss": 15.2893, "step": 315550 }, { "epoch": 0.6374511649704869, "grad_norm": 375.98291015625, "learning_rate": 3.6030333578497213e-06, "loss": 14.9089, "step": 315560 }, { "epoch": 0.6374713656031706, "grad_norm": 270.4388732910156, "learning_rate": 3.602698196344763e-06, "loss": 10.7293, "step": 315570 }, { "epoch": 0.6374915662358545, "grad_norm": 320.18365478515625, "learning_rate": 3.6023630416500843e-06, "loss": 18.4042, "step": 315580 }, { "epoch": 0.6375117668685383, "grad_norm": 268.7408752441406, "learning_rate": 3.6020278937673202e-06, "loss": 21.908, "step": 315590 }, { "epoch": 0.6375319675012221, "grad_norm": 375.9366760253906, "learning_rate": 3.6016927526981014e-06, "loss": 10.5216, "step": 315600 }, { "epoch": 0.6375521681339059, "grad_norm": 238.86119079589844, "learning_rate": 3.601357618444063e-06, "loss": 14.4453, "step": 315610 }, { "epoch": 0.6375723687665897, "grad_norm": 149.0080108642578, "learning_rate": 3.6010224910068363e-06, "loss": 14.9466, "step": 315620 }, { "epoch": 0.6375925693992736, "grad_norm": 772.7325439453125, "learning_rate": 3.6006873703880595e-06, "loss": 17.9658, "step": 315630 }, { "epoch": 0.6376127700319574, "grad_norm": 257.7185974121094, "learning_rate": 3.60035225658936e-06, "loss": 20.0764, "step": 315640 }, { "epoch": 0.6376329706646412, "grad_norm": 518.2299194335938, "learning_rate": 3.600017149612375e-06, "loss": 10.1733, "step": 315650 }, { "epoch": 0.637653171297325, "grad_norm": 158.87094116210938, "learning_rate": 3.599682049458737e-06, "loss": 20.2825, "step": 315660 }, { "epoch": 0.6376733719300088, "grad_norm": 332.2776184082031, "learning_rate": 3.5993469561300785e-06, "loss": 17.557, "step": 315670 }, { "epoch": 0.6376935725626927, "grad_norm": 9.964910507202148, "learning_rate": 3.599011869628033e-06, "loss": 21.0277, "step": 315680 }, { "epoch": 0.6377137731953765, "grad_norm": 357.9776611328125, "learning_rate": 3.598676789954234e-06, "loss": 15.1413, "step": 315690 }, { "epoch": 0.6377339738280603, "grad_norm": 422.00701904296875, "learning_rate": 3.598341717110313e-06, "loss": 15.5945, "step": 315700 }, { "epoch": 0.6377541744607441, "grad_norm": 451.0946350097656, "learning_rate": 3.598006651097905e-06, "loss": 13.4531, "step": 315710 }, { "epoch": 0.6377743750934279, "grad_norm": 430.7082214355469, "learning_rate": 3.5976715919186443e-06, "loss": 12.7757, "step": 315720 }, { "epoch": 0.6377945757261118, "grad_norm": 654.5879516601562, "learning_rate": 3.5973365395741612e-06, "loss": 55.4897, "step": 315730 }, { "epoch": 0.6378147763587956, "grad_norm": 693.4959716796875, "learning_rate": 3.597001494066089e-06, "loss": 15.4373, "step": 315740 }, { "epoch": 0.6378349769914794, "grad_norm": 596.0556030273438, "learning_rate": 3.5966664553960622e-06, "loss": 33.6988, "step": 315750 }, { "epoch": 0.6378551776241632, "grad_norm": 85.95985412597656, "learning_rate": 3.596331423565712e-06, "loss": 10.1279, "step": 315760 }, { "epoch": 0.637875378256847, "grad_norm": 535.9702758789062, "learning_rate": 3.595996398576672e-06, "loss": 20.2559, "step": 315770 }, { "epoch": 0.6378955788895309, "grad_norm": 607.033447265625, "learning_rate": 3.5956613804305755e-06, "loss": 14.3539, "step": 315780 }, { "epoch": 0.6379157795222147, "grad_norm": 423.5137023925781, "learning_rate": 3.5953263691290564e-06, "loss": 19.5415, "step": 315790 }, { "epoch": 0.6379359801548985, "grad_norm": 305.0976257324219, "learning_rate": 3.5949913646737456e-06, "loss": 17.0146, "step": 315800 }, { "epoch": 0.6379561807875823, "grad_norm": 324.80523681640625, "learning_rate": 3.594656367066276e-06, "loss": 19.6394, "step": 315810 }, { "epoch": 0.6379763814202661, "grad_norm": 582.4668579101562, "learning_rate": 3.594321376308282e-06, "loss": 27.4317, "step": 315820 }, { "epoch": 0.6379965820529498, "grad_norm": 5703.4150390625, "learning_rate": 3.5939863924013937e-06, "loss": 38.9872, "step": 315830 }, { "epoch": 0.6380167826856337, "grad_norm": 150.55264282226562, "learning_rate": 3.593651415347244e-06, "loss": 18.9136, "step": 315840 }, { "epoch": 0.6380369833183175, "grad_norm": 552.1156005859375, "learning_rate": 3.5933164451474708e-06, "loss": 15.5248, "step": 315850 }, { "epoch": 0.6380571839510013, "grad_norm": 335.07537841796875, "learning_rate": 3.592981481803699e-06, "loss": 18.5343, "step": 315860 }, { "epoch": 0.6380773845836851, "grad_norm": 318.16680908203125, "learning_rate": 3.5926465253175656e-06, "loss": 24.1763, "step": 315870 }, { "epoch": 0.638097585216369, "grad_norm": 275.1593322753906, "learning_rate": 3.5923115756907033e-06, "loss": 18.2714, "step": 315880 }, { "epoch": 0.6381177858490528, "grad_norm": 214.022216796875, "learning_rate": 3.591976632924743e-06, "loss": 19.615, "step": 315890 }, { "epoch": 0.6381379864817366, "grad_norm": 143.54090881347656, "learning_rate": 3.5916416970213173e-06, "loss": 29.774, "step": 315900 }, { "epoch": 0.6381581871144204, "grad_norm": 275.9958190917969, "learning_rate": 3.5913067679820592e-06, "loss": 12.2171, "step": 315910 }, { "epoch": 0.6381783877471042, "grad_norm": 398.24871826171875, "learning_rate": 3.5909718458086033e-06, "loss": 28.6376, "step": 315920 }, { "epoch": 0.638198588379788, "grad_norm": 519.3574829101562, "learning_rate": 3.5906369305025767e-06, "loss": 13.1821, "step": 315930 }, { "epoch": 0.6382187890124719, "grad_norm": 74.91946411132812, "learning_rate": 3.590302022065616e-06, "loss": 9.2701, "step": 315940 }, { "epoch": 0.6382389896451557, "grad_norm": 314.8899841308594, "learning_rate": 3.5899671204993535e-06, "loss": 20.7149, "step": 315950 }, { "epoch": 0.6382591902778395, "grad_norm": 198.80355834960938, "learning_rate": 3.589632225805419e-06, "loss": 34.3014, "step": 315960 }, { "epoch": 0.6382793909105233, "grad_norm": 125.96674346923828, "learning_rate": 3.589297337985446e-06, "loss": 14.2554, "step": 315970 }, { "epoch": 0.6382995915432071, "grad_norm": 0.0, "learning_rate": 3.5889624570410675e-06, "loss": 10.631, "step": 315980 }, { "epoch": 0.638319792175891, "grad_norm": 372.5596008300781, "learning_rate": 3.5886275829739144e-06, "loss": 16.3936, "step": 315990 }, { "epoch": 0.6383399928085748, "grad_norm": 350.9508361816406, "learning_rate": 3.5882927157856175e-06, "loss": 17.8655, "step": 316000 }, { "epoch": 0.6383601934412586, "grad_norm": 185.98765563964844, "learning_rate": 3.5879578554778137e-06, "loss": 32.5148, "step": 316010 }, { "epoch": 0.6383803940739424, "grad_norm": 164.63009643554688, "learning_rate": 3.5876230020521298e-06, "loss": 12.2725, "step": 316020 }, { "epoch": 0.6384005947066262, "grad_norm": 289.0091552734375, "learning_rate": 3.587288155510201e-06, "loss": 38.6229, "step": 316030 }, { "epoch": 0.6384207953393101, "grad_norm": 200.04074096679688, "learning_rate": 3.5869533158536583e-06, "loss": 31.8896, "step": 316040 }, { "epoch": 0.6384409959719939, "grad_norm": 318.0293884277344, "learning_rate": 3.586618483084134e-06, "loss": 17.5421, "step": 316050 }, { "epoch": 0.6384611966046777, "grad_norm": 653.26611328125, "learning_rate": 3.586283657203259e-06, "loss": 22.8768, "step": 316060 }, { "epoch": 0.6384813972373615, "grad_norm": 291.316650390625, "learning_rate": 3.5859488382126656e-06, "loss": 14.7977, "step": 316070 }, { "epoch": 0.6385015978700452, "grad_norm": 417.2750549316406, "learning_rate": 3.585614026113989e-06, "loss": 16.1932, "step": 316080 }, { "epoch": 0.638521798502729, "grad_norm": 424.402587890625, "learning_rate": 3.5852792209088543e-06, "loss": 17.4103, "step": 316090 }, { "epoch": 0.6385419991354129, "grad_norm": 281.0193786621094, "learning_rate": 3.584944422598899e-06, "loss": 16.3783, "step": 316100 }, { "epoch": 0.6385621997680967, "grad_norm": 548.0072021484375, "learning_rate": 3.5846096311857537e-06, "loss": 16.5864, "step": 316110 }, { "epoch": 0.6385824004007805, "grad_norm": 534.6842041015625, "learning_rate": 3.584274846671048e-06, "loss": 18.8777, "step": 316120 }, { "epoch": 0.6386026010334643, "grad_norm": 341.5393981933594, "learning_rate": 3.583940069056415e-06, "loss": 21.7011, "step": 316130 }, { "epoch": 0.6386228016661482, "grad_norm": 112.1914291381836, "learning_rate": 3.5836052983434878e-06, "loss": 29.0516, "step": 316140 }, { "epoch": 0.638643002298832, "grad_norm": 13.720465660095215, "learning_rate": 3.583270534533896e-06, "loss": 21.6894, "step": 316150 }, { "epoch": 0.6386632029315158, "grad_norm": 207.6351318359375, "learning_rate": 3.5829357776292694e-06, "loss": 16.9002, "step": 316160 }, { "epoch": 0.6386834035641996, "grad_norm": 38.644596099853516, "learning_rate": 3.582601027631246e-06, "loss": 17.5596, "step": 316170 }, { "epoch": 0.6387036041968834, "grad_norm": 460.9317626953125, "learning_rate": 3.5822662845414502e-06, "loss": 14.0099, "step": 316180 }, { "epoch": 0.6387238048295673, "grad_norm": 157.43426513671875, "learning_rate": 3.5819315483615175e-06, "loss": 20.7033, "step": 316190 }, { "epoch": 0.6387440054622511, "grad_norm": 424.1911926269531, "learning_rate": 3.5815968190930793e-06, "loss": 17.9277, "step": 316200 }, { "epoch": 0.6387642060949349, "grad_norm": 155.28729248046875, "learning_rate": 3.5812620967377653e-06, "loss": 9.7971, "step": 316210 }, { "epoch": 0.6387844067276187, "grad_norm": 581.9959716796875, "learning_rate": 3.5809273812972078e-06, "loss": 14.0527, "step": 316220 }, { "epoch": 0.6388046073603025, "grad_norm": 108.32624053955078, "learning_rate": 3.5805926727730367e-06, "loss": 15.8633, "step": 316230 }, { "epoch": 0.6388248079929864, "grad_norm": 359.7661437988281, "learning_rate": 3.5802579711668883e-06, "loss": 30.5839, "step": 316240 }, { "epoch": 0.6388450086256702, "grad_norm": 307.0447998046875, "learning_rate": 3.579923276480387e-06, "loss": 35.9667, "step": 316250 }, { "epoch": 0.638865209258354, "grad_norm": 421.31005859375, "learning_rate": 3.5795885887151687e-06, "loss": 17.2982, "step": 316260 }, { "epoch": 0.6388854098910378, "grad_norm": 143.21043395996094, "learning_rate": 3.5792539078728644e-06, "loss": 11.963, "step": 316270 }, { "epoch": 0.6389056105237216, "grad_norm": 504.7131042480469, "learning_rate": 3.578919233955103e-06, "loss": 26.5282, "step": 316280 }, { "epoch": 0.6389258111564055, "grad_norm": 73.53141021728516, "learning_rate": 3.5785845669635165e-06, "loss": 24.9844, "step": 316290 }, { "epoch": 0.6389460117890893, "grad_norm": 93.7257080078125, "learning_rate": 3.5782499068997386e-06, "loss": 13.139, "step": 316300 }, { "epoch": 0.6389662124217731, "grad_norm": 151.9158935546875, "learning_rate": 3.577915253765396e-06, "loss": 46.7024, "step": 316310 }, { "epoch": 0.6389864130544569, "grad_norm": 352.59100341796875, "learning_rate": 3.5775806075621215e-06, "loss": 20.6736, "step": 316320 }, { "epoch": 0.6390066136871407, "grad_norm": 134.93736267089844, "learning_rate": 3.5772459682915484e-06, "loss": 17.2972, "step": 316330 }, { "epoch": 0.6390268143198244, "grad_norm": 1136.881591796875, "learning_rate": 3.5769113359553055e-06, "loss": 28.1377, "step": 316340 }, { "epoch": 0.6390470149525083, "grad_norm": 588.3436279296875, "learning_rate": 3.5765767105550236e-06, "loss": 19.0263, "step": 316350 }, { "epoch": 0.6390672155851921, "grad_norm": 246.8373565673828, "learning_rate": 3.576242092092334e-06, "loss": 17.4476, "step": 316360 }, { "epoch": 0.6390874162178759, "grad_norm": 352.61163330078125, "learning_rate": 3.5759074805688694e-06, "loss": 24.8118, "step": 316370 }, { "epoch": 0.6391076168505597, "grad_norm": 385.70147705078125, "learning_rate": 3.5755728759862573e-06, "loss": 26.2128, "step": 316380 }, { "epoch": 0.6391278174832435, "grad_norm": 468.3145446777344, "learning_rate": 3.5752382783461297e-06, "loss": 24.5198, "step": 316390 }, { "epoch": 0.6391480181159274, "grad_norm": 68.4457015991211, "learning_rate": 3.5749036876501196e-06, "loss": 15.4026, "step": 316400 }, { "epoch": 0.6391682187486112, "grad_norm": 310.00048828125, "learning_rate": 3.5745691038998555e-06, "loss": 37.8392, "step": 316410 }, { "epoch": 0.639188419381295, "grad_norm": 363.8419494628906, "learning_rate": 3.5742345270969688e-06, "loss": 26.5229, "step": 316420 }, { "epoch": 0.6392086200139788, "grad_norm": 380.4399719238281, "learning_rate": 3.573899957243091e-06, "loss": 15.0728, "step": 316430 }, { "epoch": 0.6392288206466626, "grad_norm": 241.99363708496094, "learning_rate": 3.573565394339851e-06, "loss": 15.3496, "step": 316440 }, { "epoch": 0.6392490212793465, "grad_norm": 458.9059753417969, "learning_rate": 3.573230838388878e-06, "loss": 22.49, "step": 316450 }, { "epoch": 0.6392692219120303, "grad_norm": 110.33128356933594, "learning_rate": 3.572896289391809e-06, "loss": 21.3822, "step": 316460 }, { "epoch": 0.6392894225447141, "grad_norm": 110.6622314453125, "learning_rate": 3.5725617473502673e-06, "loss": 13.4087, "step": 316470 }, { "epoch": 0.6393096231773979, "grad_norm": 388.13360595703125, "learning_rate": 3.5722272122658874e-06, "loss": 9.8195, "step": 316480 }, { "epoch": 0.6393298238100817, "grad_norm": 321.4127197265625, "learning_rate": 3.5718926841402993e-06, "loss": 9.2979, "step": 316490 }, { "epoch": 0.6393500244427656, "grad_norm": 517.9979858398438, "learning_rate": 3.571558162975133e-06, "loss": 16.5989, "step": 316500 }, { "epoch": 0.6393702250754494, "grad_norm": 353.75634765625, "learning_rate": 3.5712236487720185e-06, "loss": 15.6718, "step": 316510 }, { "epoch": 0.6393904257081332, "grad_norm": 127.14871978759766, "learning_rate": 3.570889141532586e-06, "loss": 14.981, "step": 316520 }, { "epoch": 0.639410626340817, "grad_norm": 169.82476806640625, "learning_rate": 3.570554641258469e-06, "loss": 11.7585, "step": 316530 }, { "epoch": 0.6394308269735008, "grad_norm": 224.20399475097656, "learning_rate": 3.570220147951292e-06, "loss": 16.3962, "step": 316540 }, { "epoch": 0.6394510276061847, "grad_norm": 100.33985137939453, "learning_rate": 3.569885661612691e-06, "loss": 11.0874, "step": 316550 }, { "epoch": 0.6394712282388685, "grad_norm": 530.1268310546875, "learning_rate": 3.5695511822442934e-06, "loss": 18.7147, "step": 316560 }, { "epoch": 0.6394914288715523, "grad_norm": 151.81561279296875, "learning_rate": 3.5692167098477292e-06, "loss": 16.5754, "step": 316570 }, { "epoch": 0.6395116295042361, "grad_norm": 276.953369140625, "learning_rate": 3.5688822444246297e-06, "loss": 16.6768, "step": 316580 }, { "epoch": 0.6395318301369199, "grad_norm": 272.35028076171875, "learning_rate": 3.5685477859766254e-06, "loss": 6.2578, "step": 316590 }, { "epoch": 0.6395520307696037, "grad_norm": 531.3369750976562, "learning_rate": 3.568213334505345e-06, "loss": 14.722, "step": 316600 }, { "epoch": 0.6395722314022875, "grad_norm": 402.7672119140625, "learning_rate": 3.567878890012417e-06, "loss": 17.8026, "step": 316610 }, { "epoch": 0.6395924320349713, "grad_norm": 181.8702392578125, "learning_rate": 3.567544452499477e-06, "loss": 30.1878, "step": 316620 }, { "epoch": 0.6396126326676551, "grad_norm": 39.49435806274414, "learning_rate": 3.5672100219681495e-06, "loss": 18.6056, "step": 316630 }, { "epoch": 0.6396328333003389, "grad_norm": 177.6253204345703, "learning_rate": 3.5668755984200664e-06, "loss": 30.4399, "step": 316640 }, { "epoch": 0.6396530339330228, "grad_norm": 298.5770568847656, "learning_rate": 3.5665411818568596e-06, "loss": 17.0189, "step": 316650 }, { "epoch": 0.6396732345657066, "grad_norm": 22.274227142333984, "learning_rate": 3.5662067722801556e-06, "loss": 9.9698, "step": 316660 }, { "epoch": 0.6396934351983904, "grad_norm": 280.33935546875, "learning_rate": 3.5658723696915864e-06, "loss": 26.7317, "step": 316670 }, { "epoch": 0.6397136358310742, "grad_norm": 452.43603515625, "learning_rate": 3.5655379740927796e-06, "loss": 11.8348, "step": 316680 }, { "epoch": 0.639733836463758, "grad_norm": 514.5504150390625, "learning_rate": 3.5652035854853706e-06, "loss": 17.5395, "step": 316690 }, { "epoch": 0.6397540370964419, "grad_norm": 184.27651977539062, "learning_rate": 3.564869203870982e-06, "loss": 27.3692, "step": 316700 }, { "epoch": 0.6397742377291257, "grad_norm": 185.65682983398438, "learning_rate": 3.564534829251248e-06, "loss": 13.7503, "step": 316710 }, { "epoch": 0.6397944383618095, "grad_norm": 395.0429992675781, "learning_rate": 3.564200461627798e-06, "loss": 17.4748, "step": 316720 }, { "epoch": 0.6398146389944933, "grad_norm": 604.4706420898438, "learning_rate": 3.5638661010022604e-06, "loss": 11.7635, "step": 316730 }, { "epoch": 0.6398348396271771, "grad_norm": 352.2259826660156, "learning_rate": 3.5635317473762642e-06, "loss": 24.211, "step": 316740 }, { "epoch": 0.639855040259861, "grad_norm": 209.53041076660156, "learning_rate": 3.5631974007514414e-06, "loss": 12.0263, "step": 316750 }, { "epoch": 0.6398752408925448, "grad_norm": 357.42181396484375, "learning_rate": 3.562863061129419e-06, "loss": 15.5566, "step": 316760 }, { "epoch": 0.6398954415252286, "grad_norm": 541.9507446289062, "learning_rate": 3.562528728511827e-06, "loss": 16.3403, "step": 316770 }, { "epoch": 0.6399156421579124, "grad_norm": 752.5086669921875, "learning_rate": 3.562194402900299e-06, "loss": 22.2073, "step": 316780 }, { "epoch": 0.6399358427905962, "grad_norm": 343.2121276855469, "learning_rate": 3.561860084296458e-06, "loss": 11.9575, "step": 316790 }, { "epoch": 0.63995604342328, "grad_norm": 189.61074829101562, "learning_rate": 3.561525772701937e-06, "loss": 11.9739, "step": 316800 }, { "epoch": 0.6399762440559639, "grad_norm": 220.5204315185547, "learning_rate": 3.5611914681183647e-06, "loss": 10.8192, "step": 316810 }, { "epoch": 0.6399964446886477, "grad_norm": 346.1005554199219, "learning_rate": 3.5608571705473725e-06, "loss": 38.7872, "step": 316820 }, { "epoch": 0.6400166453213315, "grad_norm": 56.42607116699219, "learning_rate": 3.5605228799905865e-06, "loss": 14.3211, "step": 316830 }, { "epoch": 0.6400368459540153, "grad_norm": 864.3530883789062, "learning_rate": 3.5601885964496364e-06, "loss": 26.8919, "step": 316840 }, { "epoch": 0.640057046586699, "grad_norm": 749.6741943359375, "learning_rate": 3.559854319926156e-06, "loss": 21.41, "step": 316850 }, { "epoch": 0.6400772472193829, "grad_norm": 251.44070434570312, "learning_rate": 3.559520050421767e-06, "loss": 14.2742, "step": 316860 }, { "epoch": 0.6400974478520667, "grad_norm": 424.0389709472656, "learning_rate": 3.559185787938104e-06, "loss": 27.6596, "step": 316870 }, { "epoch": 0.6401176484847505, "grad_norm": 130.6412353515625, "learning_rate": 3.558851532476796e-06, "loss": 24.9391, "step": 316880 }, { "epoch": 0.6401378491174343, "grad_norm": 1513.4830322265625, "learning_rate": 3.5585172840394695e-06, "loss": 18.4112, "step": 316890 }, { "epoch": 0.6401580497501181, "grad_norm": 254.52268981933594, "learning_rate": 3.5581830426277554e-06, "loss": 27.6619, "step": 316900 }, { "epoch": 0.640178250382802, "grad_norm": 243.79246520996094, "learning_rate": 3.5578488082432828e-06, "loss": 18.7862, "step": 316910 }, { "epoch": 0.6401984510154858, "grad_norm": 346.9362487792969, "learning_rate": 3.557514580887679e-06, "loss": 9.7744, "step": 316920 }, { "epoch": 0.6402186516481696, "grad_norm": 449.9012145996094, "learning_rate": 3.5571803605625734e-06, "loss": 27.1587, "step": 316930 }, { "epoch": 0.6402388522808534, "grad_norm": 437.7616271972656, "learning_rate": 3.556846147269598e-06, "loss": 28.9926, "step": 316940 }, { "epoch": 0.6402590529135372, "grad_norm": 733.4630126953125, "learning_rate": 3.556511941010378e-06, "loss": 21.667, "step": 316950 }, { "epoch": 0.6402792535462211, "grad_norm": 65.58394622802734, "learning_rate": 3.5561777417865438e-06, "loss": 12.3725, "step": 316960 }, { "epoch": 0.6402994541789049, "grad_norm": 613.214599609375, "learning_rate": 3.5558435495997245e-06, "loss": 27.3721, "step": 316970 }, { "epoch": 0.6403196548115887, "grad_norm": 54.631591796875, "learning_rate": 3.5555093644515496e-06, "loss": 20.7772, "step": 316980 }, { "epoch": 0.6403398554442725, "grad_norm": 275.8143615722656, "learning_rate": 3.5551751863436458e-06, "loss": 10.1682, "step": 316990 }, { "epoch": 0.6403600560769563, "grad_norm": 332.1075134277344, "learning_rate": 3.5548410152776414e-06, "loss": 14.8546, "step": 317000 }, { "epoch": 0.6403802567096402, "grad_norm": 378.8788757324219, "learning_rate": 3.5545068512551695e-06, "loss": 12.4129, "step": 317010 }, { "epoch": 0.640400457342324, "grad_norm": 625.5926513671875, "learning_rate": 3.5541726942778544e-06, "loss": 14.9639, "step": 317020 }, { "epoch": 0.6404206579750078, "grad_norm": 1099.2337646484375, "learning_rate": 3.553838544347326e-06, "loss": 31.2647, "step": 317030 }, { "epoch": 0.6404408586076916, "grad_norm": 859.2303466796875, "learning_rate": 3.5535044014652143e-06, "loss": 22.844, "step": 317040 }, { "epoch": 0.6404610592403754, "grad_norm": 304.50958251953125, "learning_rate": 3.553170265633146e-06, "loss": 16.3254, "step": 317050 }, { "epoch": 0.6404812598730593, "grad_norm": 1029.9991455078125, "learning_rate": 3.5528361368527503e-06, "loss": 32.4318, "step": 317060 }, { "epoch": 0.6405014605057431, "grad_norm": 398.1458740234375, "learning_rate": 3.552502015125656e-06, "loss": 14.9865, "step": 317070 }, { "epoch": 0.6405216611384269, "grad_norm": 386.78741455078125, "learning_rate": 3.5521679004534905e-06, "loss": 35.0742, "step": 317080 }, { "epoch": 0.6405418617711107, "grad_norm": 220.7437286376953, "learning_rate": 3.551833792837883e-06, "loss": 18.2597, "step": 317090 }, { "epoch": 0.6405620624037945, "grad_norm": 330.812255859375, "learning_rate": 3.5514996922804636e-06, "loss": 20.4139, "step": 317100 }, { "epoch": 0.6405822630364782, "grad_norm": 7.566112995147705, "learning_rate": 3.5511655987828583e-06, "loss": 19.3291, "step": 317110 }, { "epoch": 0.6406024636691621, "grad_norm": 336.25860595703125, "learning_rate": 3.550831512346695e-06, "loss": 10.7505, "step": 317120 }, { "epoch": 0.6406226643018459, "grad_norm": 455.1893005371094, "learning_rate": 3.550497432973603e-06, "loss": 11.5504, "step": 317130 }, { "epoch": 0.6406428649345297, "grad_norm": 328.352783203125, "learning_rate": 3.5501633606652143e-06, "loss": 10.1881, "step": 317140 }, { "epoch": 0.6406630655672135, "grad_norm": 340.2912902832031, "learning_rate": 3.5498292954231497e-06, "loss": 17.0818, "step": 317150 }, { "epoch": 0.6406832661998973, "grad_norm": 1005.4075317382812, "learning_rate": 3.549495237249042e-06, "loss": 36.5023, "step": 317160 }, { "epoch": 0.6407034668325812, "grad_norm": 9.111109733581543, "learning_rate": 3.5491611861445198e-06, "loss": 11.3665, "step": 317170 }, { "epoch": 0.640723667465265, "grad_norm": 2.9664359092712402, "learning_rate": 3.5488271421112093e-06, "loss": 17.2836, "step": 317180 }, { "epoch": 0.6407438680979488, "grad_norm": 191.48548889160156, "learning_rate": 3.5484931051507387e-06, "loss": 11.0455, "step": 317190 }, { "epoch": 0.6407640687306326, "grad_norm": 114.32453918457031, "learning_rate": 3.548159075264738e-06, "loss": 24.9184, "step": 317200 }, { "epoch": 0.6407842693633164, "grad_norm": 285.5618591308594, "learning_rate": 3.547825052454833e-06, "loss": 14.7186, "step": 317210 }, { "epoch": 0.6408044699960003, "grad_norm": 222.58468627929688, "learning_rate": 3.5474910367226517e-06, "loss": 12.1983, "step": 317220 }, { "epoch": 0.6408246706286841, "grad_norm": 240.85787963867188, "learning_rate": 3.5471570280698257e-06, "loss": 17.1003, "step": 317230 }, { "epoch": 0.6408448712613679, "grad_norm": 673.8321533203125, "learning_rate": 3.5468230264979774e-06, "loss": 18.5469, "step": 317240 }, { "epoch": 0.6408650718940517, "grad_norm": 172.52186584472656, "learning_rate": 3.5464890320087374e-06, "loss": 14.3257, "step": 317250 }, { "epoch": 0.6408852725267355, "grad_norm": 260.2057189941406, "learning_rate": 3.5461550446037363e-06, "loss": 26.1872, "step": 317260 }, { "epoch": 0.6409054731594194, "grad_norm": 325.0608825683594, "learning_rate": 3.545821064284597e-06, "loss": 18.0519, "step": 317270 }, { "epoch": 0.6409256737921032, "grad_norm": 111.28011322021484, "learning_rate": 3.5454870910529494e-06, "loss": 22.6814, "step": 317280 }, { "epoch": 0.640945874424787, "grad_norm": 1047.8089599609375, "learning_rate": 3.545153124910421e-06, "loss": 27.8755, "step": 317290 }, { "epoch": 0.6409660750574708, "grad_norm": 836.8259887695312, "learning_rate": 3.5448191658586423e-06, "loss": 16.5274, "step": 317300 }, { "epoch": 0.6409862756901546, "grad_norm": 279.5603332519531, "learning_rate": 3.5444852138992357e-06, "loss": 5.7976, "step": 317310 }, { "epoch": 0.6410064763228385, "grad_norm": 363.93927001953125, "learning_rate": 3.544151269033832e-06, "loss": 14.0555, "step": 317320 }, { "epoch": 0.6410266769555223, "grad_norm": 260.6170349121094, "learning_rate": 3.54381733126406e-06, "loss": 12.1871, "step": 317330 }, { "epoch": 0.6410468775882061, "grad_norm": 340.6725158691406, "learning_rate": 3.5434834005915453e-06, "loss": 41.2043, "step": 317340 }, { "epoch": 0.6410670782208899, "grad_norm": 303.91064453125, "learning_rate": 3.5431494770179154e-06, "loss": 22.6507, "step": 317350 }, { "epoch": 0.6410872788535736, "grad_norm": 644.1407470703125, "learning_rate": 3.5428155605447988e-06, "loss": 27.9938, "step": 317360 }, { "epoch": 0.6411074794862575, "grad_norm": 138.9471435546875, "learning_rate": 3.5424816511738213e-06, "loss": 22.4929, "step": 317370 }, { "epoch": 0.6411276801189413, "grad_norm": 172.72393798828125, "learning_rate": 3.5421477489066115e-06, "loss": 26.1033, "step": 317380 }, { "epoch": 0.6411478807516251, "grad_norm": 636.4935913085938, "learning_rate": 3.541813853744799e-06, "loss": 19.903, "step": 317390 }, { "epoch": 0.6411680813843089, "grad_norm": 612.6116943359375, "learning_rate": 3.5414799656900057e-06, "loss": 20.3424, "step": 317400 }, { "epoch": 0.6411882820169927, "grad_norm": 270.06976318359375, "learning_rate": 3.541146084743864e-06, "loss": 17.2903, "step": 317410 }, { "epoch": 0.6412084826496766, "grad_norm": 270.260498046875, "learning_rate": 3.540812210907999e-06, "loss": 17.609, "step": 317420 }, { "epoch": 0.6412286832823604, "grad_norm": 544.850830078125, "learning_rate": 3.5404783441840383e-06, "loss": 10.9086, "step": 317430 }, { "epoch": 0.6412488839150442, "grad_norm": 265.4259033203125, "learning_rate": 3.5401444845736092e-06, "loss": 32.4151, "step": 317440 }, { "epoch": 0.641269084547728, "grad_norm": 77.37215423583984, "learning_rate": 3.539810632078338e-06, "loss": 8.7036, "step": 317450 }, { "epoch": 0.6412892851804118, "grad_norm": 321.9674072265625, "learning_rate": 3.5394767866998555e-06, "loss": 15.9489, "step": 317460 }, { "epoch": 0.6413094858130957, "grad_norm": 332.39422607421875, "learning_rate": 3.539142948439782e-06, "loss": 10.9451, "step": 317470 }, { "epoch": 0.6413296864457795, "grad_norm": 7.552122592926025, "learning_rate": 3.538809117299751e-06, "loss": 14.4336, "step": 317480 }, { "epoch": 0.6413498870784633, "grad_norm": 172.63917541503906, "learning_rate": 3.538475293281387e-06, "loss": 15.9912, "step": 317490 }, { "epoch": 0.6413700877111471, "grad_norm": 254.71444702148438, "learning_rate": 3.538141476386317e-06, "loss": 28.018, "step": 317500 }, { "epoch": 0.6413902883438309, "grad_norm": 185.06678771972656, "learning_rate": 3.5378076666161677e-06, "loss": 24.2105, "step": 317510 }, { "epoch": 0.6414104889765148, "grad_norm": 95.70032501220703, "learning_rate": 3.537473863972568e-06, "loss": 11.2273, "step": 317520 }, { "epoch": 0.6414306896091986, "grad_norm": 920.5741577148438, "learning_rate": 3.537140068457142e-06, "loss": 24.7874, "step": 317530 }, { "epoch": 0.6414508902418824, "grad_norm": 1017.8689575195312, "learning_rate": 3.5368062800715163e-06, "loss": 12.5289, "step": 317540 }, { "epoch": 0.6414710908745662, "grad_norm": 587.6309204101562, "learning_rate": 3.536472498817323e-06, "loss": 20.4492, "step": 317550 }, { "epoch": 0.64149129150725, "grad_norm": 298.1929931640625, "learning_rate": 3.536138724696182e-06, "loss": 18.8642, "step": 317560 }, { "epoch": 0.6415114921399339, "grad_norm": 717.33056640625, "learning_rate": 3.535804957709724e-06, "loss": 20.3758, "step": 317570 }, { "epoch": 0.6415316927726177, "grad_norm": 240.915771484375, "learning_rate": 3.5354711978595757e-06, "loss": 22.284, "step": 317580 }, { "epoch": 0.6415518934053015, "grad_norm": 494.214111328125, "learning_rate": 3.5351374451473643e-06, "loss": 17.0445, "step": 317590 }, { "epoch": 0.6415720940379853, "grad_norm": 544.8488159179688, "learning_rate": 3.5348036995747135e-06, "loss": 14.0251, "step": 317600 }, { "epoch": 0.6415922946706691, "grad_norm": 244.27670288085938, "learning_rate": 3.5344699611432515e-06, "loss": 23.8881, "step": 317610 }, { "epoch": 0.6416124953033528, "grad_norm": 0.0, "learning_rate": 3.5341362298546077e-06, "loss": 10.7942, "step": 317620 }, { "epoch": 0.6416326959360367, "grad_norm": 710.4559936523438, "learning_rate": 3.533802505710403e-06, "loss": 22.6903, "step": 317630 }, { "epoch": 0.6416528965687205, "grad_norm": 348.663818359375, "learning_rate": 3.5334687887122687e-06, "loss": 23.5508, "step": 317640 }, { "epoch": 0.6416730972014043, "grad_norm": 430.3636779785156, "learning_rate": 3.5331350788618303e-06, "loss": 24.0841, "step": 317650 }, { "epoch": 0.6416932978340881, "grad_norm": 450.00042724609375, "learning_rate": 3.532801376160713e-06, "loss": 17.1423, "step": 317660 }, { "epoch": 0.641713498466772, "grad_norm": 459.4471740722656, "learning_rate": 3.5324676806105428e-06, "loss": 18.9422, "step": 317670 }, { "epoch": 0.6417336990994558, "grad_norm": 651.5348510742188, "learning_rate": 3.5321339922129493e-06, "loss": 19.6253, "step": 317680 }, { "epoch": 0.6417538997321396, "grad_norm": 201.7771759033203, "learning_rate": 3.5318003109695544e-06, "loss": 11.8734, "step": 317690 }, { "epoch": 0.6417741003648234, "grad_norm": 113.5165023803711, "learning_rate": 3.531466636881987e-06, "loss": 24.3048, "step": 317700 }, { "epoch": 0.6417943009975072, "grad_norm": 626.1658935546875, "learning_rate": 3.531132969951875e-06, "loss": 21.0166, "step": 317710 }, { "epoch": 0.641814501630191, "grad_norm": 527.8425903320312, "learning_rate": 3.5307993101808415e-06, "loss": 23.8735, "step": 317720 }, { "epoch": 0.6418347022628749, "grad_norm": 415.9170837402344, "learning_rate": 3.5304656575705133e-06, "loss": 21.1418, "step": 317730 }, { "epoch": 0.6418549028955587, "grad_norm": 46.663726806640625, "learning_rate": 3.530132012122518e-06, "loss": 17.8677, "step": 317740 }, { "epoch": 0.6418751035282425, "grad_norm": 438.56439208984375, "learning_rate": 3.5297983738384813e-06, "loss": 21.2878, "step": 317750 }, { "epoch": 0.6418953041609263, "grad_norm": 356.69024658203125, "learning_rate": 3.529464742720028e-06, "loss": 16.4946, "step": 317760 }, { "epoch": 0.6419155047936101, "grad_norm": 89.83252716064453, "learning_rate": 3.5291311187687847e-06, "loss": 21.94, "step": 317770 }, { "epoch": 0.641935705426294, "grad_norm": 420.8074951171875, "learning_rate": 3.5287975019863806e-06, "loss": 17.3688, "step": 317780 }, { "epoch": 0.6419559060589778, "grad_norm": 24.430877685546875, "learning_rate": 3.5284638923744373e-06, "loss": 11.2702, "step": 317790 }, { "epoch": 0.6419761066916616, "grad_norm": 2.147768259048462, "learning_rate": 3.5281302899345825e-06, "loss": 24.0063, "step": 317800 }, { "epoch": 0.6419963073243454, "grad_norm": 535.706787109375, "learning_rate": 3.527796694668443e-06, "loss": 16.868, "step": 317810 }, { "epoch": 0.6420165079570292, "grad_norm": 460.1740417480469, "learning_rate": 3.5274631065776433e-06, "loss": 32.3511, "step": 317820 }, { "epoch": 0.6420367085897131, "grad_norm": 386.732177734375, "learning_rate": 3.527129525663808e-06, "loss": 11.5894, "step": 317830 }, { "epoch": 0.6420569092223969, "grad_norm": 189.55186462402344, "learning_rate": 3.526795951928569e-06, "loss": 15.4929, "step": 317840 }, { "epoch": 0.6420771098550807, "grad_norm": 546.142822265625, "learning_rate": 3.5264623853735435e-06, "loss": 21.2189, "step": 317850 }, { "epoch": 0.6420973104877645, "grad_norm": 237.81240844726562, "learning_rate": 3.5261288260003635e-06, "loss": 16.0755, "step": 317860 }, { "epoch": 0.6421175111204483, "grad_norm": 119.85090637207031, "learning_rate": 3.5257952738106528e-06, "loss": 29.9819, "step": 317870 }, { "epoch": 0.6421377117531321, "grad_norm": 2269.2265625, "learning_rate": 3.525461728806038e-06, "loss": 28.9295, "step": 317880 }, { "epoch": 0.6421579123858159, "grad_norm": 533.5177001953125, "learning_rate": 3.525128190988143e-06, "loss": 11.0176, "step": 317890 }, { "epoch": 0.6421781130184997, "grad_norm": 296.90008544921875, "learning_rate": 3.524794660358593e-06, "loss": 18.0153, "step": 317900 }, { "epoch": 0.6421983136511835, "grad_norm": 170.51734924316406, "learning_rate": 3.5244611369190184e-06, "loss": 10.8991, "step": 317910 }, { "epoch": 0.6422185142838673, "grad_norm": 29.92284393310547, "learning_rate": 3.5241276206710374e-06, "loss": 22.0229, "step": 317920 }, { "epoch": 0.6422387149165512, "grad_norm": 337.820068359375, "learning_rate": 3.5237941116162812e-06, "loss": 14.0883, "step": 317930 }, { "epoch": 0.642258915549235, "grad_norm": 317.5273132324219, "learning_rate": 3.523460609756374e-06, "loss": 15.1656, "step": 317940 }, { "epoch": 0.6422791161819188, "grad_norm": 71.62299346923828, "learning_rate": 3.5231271150929403e-06, "loss": 14.2908, "step": 317950 }, { "epoch": 0.6422993168146026, "grad_norm": 34.81813049316406, "learning_rate": 3.5227936276276055e-06, "loss": 8.2766, "step": 317960 }, { "epoch": 0.6423195174472864, "grad_norm": 441.2760314941406, "learning_rate": 3.522460147361996e-06, "loss": 16.7027, "step": 317970 }, { "epoch": 0.6423397180799703, "grad_norm": 452.09429931640625, "learning_rate": 3.522126674297736e-06, "loss": 24.5151, "step": 317980 }, { "epoch": 0.6423599187126541, "grad_norm": 331.5494079589844, "learning_rate": 3.5217932084364505e-06, "loss": 14.5132, "step": 317990 }, { "epoch": 0.6423801193453379, "grad_norm": 896.404541015625, "learning_rate": 3.521459749779769e-06, "loss": 18.3222, "step": 318000 }, { "epoch": 0.6424003199780217, "grad_norm": 366.61956787109375, "learning_rate": 3.5211262983293094e-06, "loss": 17.7489, "step": 318010 }, { "epoch": 0.6424205206107055, "grad_norm": 638.5956420898438, "learning_rate": 3.520792854086702e-06, "loss": 14.1063, "step": 318020 }, { "epoch": 0.6424407212433894, "grad_norm": 483.6697692871094, "learning_rate": 3.520459417053571e-06, "loss": 14.0071, "step": 318030 }, { "epoch": 0.6424609218760732, "grad_norm": 473.08154296875, "learning_rate": 3.520125987231542e-06, "loss": 21.0332, "step": 318040 }, { "epoch": 0.642481122508757, "grad_norm": 607.0555419921875, "learning_rate": 3.5197925646222387e-06, "loss": 22.463, "step": 318050 }, { "epoch": 0.6425013231414408, "grad_norm": 68.604736328125, "learning_rate": 3.5194591492272863e-06, "loss": 18.1951, "step": 318060 }, { "epoch": 0.6425215237741246, "grad_norm": 1254.7269287109375, "learning_rate": 3.519125741048313e-06, "loss": 21.7519, "step": 318070 }, { "epoch": 0.6425417244068085, "grad_norm": 354.39910888671875, "learning_rate": 3.5187923400869384e-06, "loss": 20.0988, "step": 318080 }, { "epoch": 0.6425619250394923, "grad_norm": 697.2653198242188, "learning_rate": 3.5184589463447918e-06, "loss": 16.5158, "step": 318090 }, { "epoch": 0.6425821256721761, "grad_norm": 53.732444763183594, "learning_rate": 3.5181255598234963e-06, "loss": 21.9239, "step": 318100 }, { "epoch": 0.6426023263048599, "grad_norm": 291.7712707519531, "learning_rate": 3.5177921805246772e-06, "loss": 20.0169, "step": 318110 }, { "epoch": 0.6426225269375437, "grad_norm": 314.7015686035156, "learning_rate": 3.5174588084499594e-06, "loss": 16.4968, "step": 318120 }, { "epoch": 0.6426427275702274, "grad_norm": 634.839111328125, "learning_rate": 3.5171254436009684e-06, "loss": 15.5266, "step": 318130 }, { "epoch": 0.6426629282029113, "grad_norm": 398.2506408691406, "learning_rate": 3.5167920859793263e-06, "loss": 16.1789, "step": 318140 }, { "epoch": 0.6426831288355951, "grad_norm": 501.5930480957031, "learning_rate": 3.5164587355866593e-06, "loss": 26.3209, "step": 318150 }, { "epoch": 0.6427033294682789, "grad_norm": 763.8973999023438, "learning_rate": 3.5161253924245955e-06, "loss": 24.0423, "step": 318160 }, { "epoch": 0.6427235301009627, "grad_norm": 507.8896789550781, "learning_rate": 3.5157920564947535e-06, "loss": 18.6836, "step": 318170 }, { "epoch": 0.6427437307336465, "grad_norm": 478.6862487792969, "learning_rate": 3.5154587277987618e-06, "loss": 13.5403, "step": 318180 }, { "epoch": 0.6427639313663304, "grad_norm": 221.6300506591797, "learning_rate": 3.5151254063382445e-06, "loss": 18.9185, "step": 318190 }, { "epoch": 0.6427841319990142, "grad_norm": 795.54052734375, "learning_rate": 3.5147920921148267e-06, "loss": 12.6649, "step": 318200 }, { "epoch": 0.642804332631698, "grad_norm": 219.04025268554688, "learning_rate": 3.514458785130131e-06, "loss": 31.2245, "step": 318210 }, { "epoch": 0.6428245332643818, "grad_norm": 804.419677734375, "learning_rate": 3.514125485385782e-06, "loss": 18.6786, "step": 318220 }, { "epoch": 0.6428447338970656, "grad_norm": 180.21267700195312, "learning_rate": 3.5137921928834085e-06, "loss": 20.5546, "step": 318230 }, { "epoch": 0.6428649345297495, "grad_norm": 443.3706970214844, "learning_rate": 3.5134589076246284e-06, "loss": 31.4394, "step": 318240 }, { "epoch": 0.6428851351624333, "grad_norm": 238.41075134277344, "learning_rate": 3.5131256296110703e-06, "loss": 13.2463, "step": 318250 }, { "epoch": 0.6429053357951171, "grad_norm": 209.2345428466797, "learning_rate": 3.512792358844359e-06, "loss": 12.0016, "step": 318260 }, { "epoch": 0.6429255364278009, "grad_norm": 417.237548828125, "learning_rate": 3.5124590953261155e-06, "loss": 7.2993, "step": 318270 }, { "epoch": 0.6429457370604847, "grad_norm": 220.30819702148438, "learning_rate": 3.5121258390579667e-06, "loss": 25.4603, "step": 318280 }, { "epoch": 0.6429659376931686, "grad_norm": 382.668212890625, "learning_rate": 3.511792590041537e-06, "loss": 19.8533, "step": 318290 }, { "epoch": 0.6429861383258524, "grad_norm": 755.5155639648438, "learning_rate": 3.511459348278448e-06, "loss": 27.1435, "step": 318300 }, { "epoch": 0.6430063389585362, "grad_norm": 353.08807373046875, "learning_rate": 3.511126113770325e-06, "loss": 16.8397, "step": 318310 }, { "epoch": 0.64302653959122, "grad_norm": 201.4923553466797, "learning_rate": 3.510792886518795e-06, "loss": 22.9137, "step": 318320 }, { "epoch": 0.6430467402239038, "grad_norm": 553.3013916015625, "learning_rate": 3.5104596665254786e-06, "loss": 21.7877, "step": 318330 }, { "epoch": 0.6430669408565877, "grad_norm": 88.64137268066406, "learning_rate": 3.510126453792001e-06, "loss": 11.063, "step": 318340 }, { "epoch": 0.6430871414892715, "grad_norm": 314.2059326171875, "learning_rate": 3.509793248319987e-06, "loss": 15.1198, "step": 318350 }, { "epoch": 0.6431073421219553, "grad_norm": 189.05133056640625, "learning_rate": 3.509460050111061e-06, "loss": 23.6499, "step": 318360 }, { "epoch": 0.6431275427546391, "grad_norm": 161.51707458496094, "learning_rate": 3.5091268591668427e-06, "loss": 22.2294, "step": 318370 }, { "epoch": 0.643147743387323, "grad_norm": 360.7963562011719, "learning_rate": 3.5087936754889614e-06, "loss": 12.5579, "step": 318380 }, { "epoch": 0.6431679440200067, "grad_norm": 192.18182373046875, "learning_rate": 3.5084604990790395e-06, "loss": 21.6322, "step": 318390 }, { "epoch": 0.6431881446526905, "grad_norm": 82.76537322998047, "learning_rate": 3.508127329938699e-06, "loss": 14.3149, "step": 318400 }, { "epoch": 0.6432083452853743, "grad_norm": 241.1047821044922, "learning_rate": 3.5077941680695653e-06, "loss": 22.9015, "step": 318410 }, { "epoch": 0.6432285459180581, "grad_norm": 192.6389617919922, "learning_rate": 3.507461013473263e-06, "loss": 14.514, "step": 318420 }, { "epoch": 0.6432487465507419, "grad_norm": 626.179443359375, "learning_rate": 3.507127866151413e-06, "loss": 16.351, "step": 318430 }, { "epoch": 0.6432689471834258, "grad_norm": 791.4384765625, "learning_rate": 3.50679472610564e-06, "loss": 27.5719, "step": 318440 }, { "epoch": 0.6432891478161096, "grad_norm": 598.8052978515625, "learning_rate": 3.5064615933375724e-06, "loss": 14.8814, "step": 318450 }, { "epoch": 0.6433093484487934, "grad_norm": 482.283935546875, "learning_rate": 3.506128467848826e-06, "loss": 13.2172, "step": 318460 }, { "epoch": 0.6433295490814772, "grad_norm": 335.20953369140625, "learning_rate": 3.505795349641029e-06, "loss": 18.9089, "step": 318470 }, { "epoch": 0.643349749714161, "grad_norm": 229.18142700195312, "learning_rate": 3.5054622387158044e-06, "loss": 38.285, "step": 318480 }, { "epoch": 0.6433699503468449, "grad_norm": 599.0580444335938, "learning_rate": 3.505129135074777e-06, "loss": 20.717, "step": 318490 }, { "epoch": 0.6433901509795287, "grad_norm": 186.5522918701172, "learning_rate": 3.5047960387195673e-06, "loss": 9.9613, "step": 318500 }, { "epoch": 0.6434103516122125, "grad_norm": 494.20751953125, "learning_rate": 3.5044629496517997e-06, "loss": 26.5297, "step": 318510 }, { "epoch": 0.6434305522448963, "grad_norm": 444.7451477050781, "learning_rate": 3.5041298678731017e-06, "loss": 32.5897, "step": 318520 }, { "epoch": 0.6434507528775801, "grad_norm": 436.12908935546875, "learning_rate": 3.503796793385089e-06, "loss": 18.1273, "step": 318530 }, { "epoch": 0.643470953510264, "grad_norm": 414.7536315917969, "learning_rate": 3.503463726189391e-06, "loss": 21.6207, "step": 318540 }, { "epoch": 0.6434911541429478, "grad_norm": 160.6037139892578, "learning_rate": 3.503130666287631e-06, "loss": 10.8208, "step": 318550 }, { "epoch": 0.6435113547756316, "grad_norm": 529.0670776367188, "learning_rate": 3.502797613681429e-06, "loss": 16.9283, "step": 318560 }, { "epoch": 0.6435315554083154, "grad_norm": 207.29319763183594, "learning_rate": 3.50246456837241e-06, "loss": 18.6089, "step": 318570 }, { "epoch": 0.6435517560409992, "grad_norm": 298.92926025390625, "learning_rate": 3.5021315303621973e-06, "loss": 11.2367, "step": 318580 }, { "epoch": 0.643571956673683, "grad_norm": 394.2572937011719, "learning_rate": 3.5017984996524134e-06, "loss": 8.8136, "step": 318590 }, { "epoch": 0.6435921573063669, "grad_norm": 236.72897338867188, "learning_rate": 3.501465476244681e-06, "loss": 16.2428, "step": 318600 }, { "epoch": 0.6436123579390507, "grad_norm": 625.5629272460938, "learning_rate": 3.501132460140627e-06, "loss": 16.4657, "step": 318610 }, { "epoch": 0.6436325585717345, "grad_norm": 346.51800537109375, "learning_rate": 3.5007994513418687e-06, "loss": 14.7584, "step": 318620 }, { "epoch": 0.6436527592044183, "grad_norm": 295.20001220703125, "learning_rate": 3.500466449850033e-06, "loss": 19.2477, "step": 318630 }, { "epoch": 0.643672959837102, "grad_norm": 506.7414245605469, "learning_rate": 3.500133455666742e-06, "loss": 16.469, "step": 318640 }, { "epoch": 0.6436931604697859, "grad_norm": 559.2400512695312, "learning_rate": 3.49980046879362e-06, "loss": 33.4135, "step": 318650 }, { "epoch": 0.6437133611024697, "grad_norm": 353.7267761230469, "learning_rate": 3.4994674892322867e-06, "loss": 24.5059, "step": 318660 }, { "epoch": 0.6437335617351535, "grad_norm": 522.9329223632812, "learning_rate": 3.4991345169843666e-06, "loss": 32.1731, "step": 318670 }, { "epoch": 0.6437537623678373, "grad_norm": 488.3183898925781, "learning_rate": 3.4988015520514856e-06, "loss": 20.1564, "step": 318680 }, { "epoch": 0.6437739630005211, "grad_norm": 139.21360778808594, "learning_rate": 3.4984685944352604e-06, "loss": 19.5322, "step": 318690 }, { "epoch": 0.643794163633205, "grad_norm": 479.5440368652344, "learning_rate": 3.498135644137318e-06, "loss": 43.3984, "step": 318700 }, { "epoch": 0.6438143642658888, "grad_norm": 432.9505615234375, "learning_rate": 3.4978027011592826e-06, "loss": 25.1056, "step": 318710 }, { "epoch": 0.6438345648985726, "grad_norm": 688.2528076171875, "learning_rate": 3.4974697655027724e-06, "loss": 14.1868, "step": 318720 }, { "epoch": 0.6438547655312564, "grad_norm": 500.7982482910156, "learning_rate": 3.4971368371694126e-06, "loss": 31.8568, "step": 318730 }, { "epoch": 0.6438749661639402, "grad_norm": 783.8529052734375, "learning_rate": 3.496803916160827e-06, "loss": 24.109, "step": 318740 }, { "epoch": 0.6438951667966241, "grad_norm": 94.44432830810547, "learning_rate": 3.4964710024786354e-06, "loss": 10.444, "step": 318750 }, { "epoch": 0.6439153674293079, "grad_norm": 167.6468963623047, "learning_rate": 3.4961380961244605e-06, "loss": 16.7094, "step": 318760 }, { "epoch": 0.6439355680619917, "grad_norm": 152.32901000976562, "learning_rate": 3.49580519709993e-06, "loss": 23.922, "step": 318770 }, { "epoch": 0.6439557686946755, "grad_norm": 65.15750122070312, "learning_rate": 3.4954723054066593e-06, "loss": 13.0673, "step": 318780 }, { "epoch": 0.6439759693273593, "grad_norm": 154.20431518554688, "learning_rate": 3.4951394210462746e-06, "loss": 9.2022, "step": 318790 }, { "epoch": 0.6439961699600432, "grad_norm": 574.928466796875, "learning_rate": 3.4948065440203982e-06, "loss": 30.3903, "step": 318800 }, { "epoch": 0.644016370592727, "grad_norm": 173.9928741455078, "learning_rate": 3.494473674330653e-06, "loss": 12.7227, "step": 318810 }, { "epoch": 0.6440365712254108, "grad_norm": 493.5740051269531, "learning_rate": 3.49414081197866e-06, "loss": 23.3376, "step": 318820 }, { "epoch": 0.6440567718580946, "grad_norm": 10.109909057617188, "learning_rate": 3.4938079569660398e-06, "loss": 29.0626, "step": 318830 }, { "epoch": 0.6440769724907784, "grad_norm": 125.75299072265625, "learning_rate": 3.493475109294421e-06, "loss": 14.8489, "step": 318840 }, { "epoch": 0.6440971731234623, "grad_norm": 105.9942626953125, "learning_rate": 3.4931422689654186e-06, "loss": 20.9156, "step": 318850 }, { "epoch": 0.6441173737561461, "grad_norm": 210.97653198242188, "learning_rate": 3.492809435980659e-06, "loss": 8.7984, "step": 318860 }, { "epoch": 0.6441375743888299, "grad_norm": 297.12152099609375, "learning_rate": 3.4924766103417648e-06, "loss": 8.53, "step": 318870 }, { "epoch": 0.6441577750215137, "grad_norm": 337.85479736328125, "learning_rate": 3.492143792050355e-06, "loss": 21.912, "step": 318880 }, { "epoch": 0.6441779756541975, "grad_norm": 246.87548828125, "learning_rate": 3.4918109811080535e-06, "loss": 8.0384, "step": 318890 }, { "epoch": 0.6441981762868813, "grad_norm": 320.1974182128906, "learning_rate": 3.491478177516484e-06, "loss": 11.805, "step": 318900 }, { "epoch": 0.6442183769195651, "grad_norm": 217.13697814941406, "learning_rate": 3.4911453812772658e-06, "loss": 24.665, "step": 318910 }, { "epoch": 0.6442385775522489, "grad_norm": 700.4960327148438, "learning_rate": 3.4908125923920204e-06, "loss": 20.8438, "step": 318920 }, { "epoch": 0.6442587781849327, "grad_norm": 30.923364639282227, "learning_rate": 3.490479810862373e-06, "loss": 15.8241, "step": 318930 }, { "epoch": 0.6442789788176165, "grad_norm": 1212.7904052734375, "learning_rate": 3.490147036689945e-06, "loss": 21.6489, "step": 318940 }, { "epoch": 0.6442991794503004, "grad_norm": 391.1068115234375, "learning_rate": 3.4898142698763555e-06, "loss": 16.3236, "step": 318950 }, { "epoch": 0.6443193800829842, "grad_norm": 338.1729431152344, "learning_rate": 3.4894815104232283e-06, "loss": 22.8707, "step": 318960 }, { "epoch": 0.644339580715668, "grad_norm": 279.7657470703125, "learning_rate": 3.489148758332186e-06, "loss": 26.0585, "step": 318970 }, { "epoch": 0.6443597813483518, "grad_norm": 447.3832702636719, "learning_rate": 3.4888160136048488e-06, "loss": 13.4344, "step": 318980 }, { "epoch": 0.6443799819810356, "grad_norm": 33.17680740356445, "learning_rate": 3.4884832762428376e-06, "loss": 25.5586, "step": 318990 }, { "epoch": 0.6444001826137195, "grad_norm": 232.6408233642578, "learning_rate": 3.488150546247778e-06, "loss": 13.2822, "step": 319000 }, { "epoch": 0.6444203832464033, "grad_norm": 619.4563598632812, "learning_rate": 3.4878178236212883e-06, "loss": 29.3946, "step": 319010 }, { "epoch": 0.6444405838790871, "grad_norm": 175.9522705078125, "learning_rate": 3.4874851083649906e-06, "loss": 18.2922, "step": 319020 }, { "epoch": 0.6444607845117709, "grad_norm": 27.794336318969727, "learning_rate": 3.487152400480509e-06, "loss": 22.8812, "step": 319030 }, { "epoch": 0.6444809851444547, "grad_norm": 100.01497650146484, "learning_rate": 3.4868196999694616e-06, "loss": 11.4736, "step": 319040 }, { "epoch": 0.6445011857771386, "grad_norm": 412.4709777832031, "learning_rate": 3.486487006833471e-06, "loss": 30.6363, "step": 319050 }, { "epoch": 0.6445213864098224, "grad_norm": 359.1695251464844, "learning_rate": 3.4861543210741607e-06, "loss": 14.4729, "step": 319060 }, { "epoch": 0.6445415870425062, "grad_norm": 520.4990234375, "learning_rate": 3.485821642693148e-06, "loss": 22.3197, "step": 319070 }, { "epoch": 0.64456178767519, "grad_norm": 387.98980712890625, "learning_rate": 3.4854889716920588e-06, "loss": 14.8493, "step": 319080 }, { "epoch": 0.6445819883078738, "grad_norm": 22.09465980529785, "learning_rate": 3.485156308072512e-06, "loss": 21.9085, "step": 319090 }, { "epoch": 0.6446021889405577, "grad_norm": 94.3161849975586, "learning_rate": 3.484823651836131e-06, "loss": 13.9138, "step": 319100 }, { "epoch": 0.6446223895732415, "grad_norm": 10.478737831115723, "learning_rate": 3.484491002984535e-06, "loss": 8.5433, "step": 319110 }, { "epoch": 0.6446425902059253, "grad_norm": 185.5895233154297, "learning_rate": 3.4841583615193444e-06, "loss": 15.9812, "step": 319120 }, { "epoch": 0.6446627908386091, "grad_norm": 866.3886108398438, "learning_rate": 3.4838257274421853e-06, "loss": 31.4951, "step": 319130 }, { "epoch": 0.6446829914712929, "grad_norm": 319.8884582519531, "learning_rate": 3.483493100754673e-06, "loss": 15.8195, "step": 319140 }, { "epoch": 0.6447031921039766, "grad_norm": 472.67138671875, "learning_rate": 3.483160481458432e-06, "loss": 19.566, "step": 319150 }, { "epoch": 0.6447233927366605, "grad_norm": 158.82705688476562, "learning_rate": 3.4828278695550845e-06, "loss": 18.621, "step": 319160 }, { "epoch": 0.6447435933693443, "grad_norm": 383.1800231933594, "learning_rate": 3.4824952650462486e-06, "loss": 11.5811, "step": 319170 }, { "epoch": 0.6447637940020281, "grad_norm": 97.35182189941406, "learning_rate": 3.4821626679335464e-06, "loss": 10.1588, "step": 319180 }, { "epoch": 0.6447839946347119, "grad_norm": 712.7573852539062, "learning_rate": 3.4818300782186e-06, "loss": 31.499, "step": 319190 }, { "epoch": 0.6448041952673957, "grad_norm": 765.3334350585938, "learning_rate": 3.4814974959030294e-06, "loss": 33.6473, "step": 319200 }, { "epoch": 0.6448243959000796, "grad_norm": 335.5063171386719, "learning_rate": 3.4811649209884544e-06, "loss": 12.8144, "step": 319210 }, { "epoch": 0.6448445965327634, "grad_norm": 473.3268737792969, "learning_rate": 3.480832353476501e-06, "loss": 17.6606, "step": 319220 }, { "epoch": 0.6448647971654472, "grad_norm": 186.41500854492188, "learning_rate": 3.480499793368783e-06, "loss": 11.0611, "step": 319230 }, { "epoch": 0.644884997798131, "grad_norm": 407.2973327636719, "learning_rate": 3.4801672406669253e-06, "loss": 18.3399, "step": 319240 }, { "epoch": 0.6449051984308148, "grad_norm": 515.4907836914062, "learning_rate": 3.4798346953725487e-06, "loss": 23.0567, "step": 319250 }, { "epoch": 0.6449253990634987, "grad_norm": 468.15130615234375, "learning_rate": 3.4795021574872743e-06, "loss": 24.6056, "step": 319260 }, { "epoch": 0.6449455996961825, "grad_norm": 61.45684814453125, "learning_rate": 3.479169627012721e-06, "loss": 7.1199, "step": 319270 }, { "epoch": 0.6449658003288663, "grad_norm": 203.83148193359375, "learning_rate": 3.478837103950509e-06, "loss": 18.5914, "step": 319280 }, { "epoch": 0.6449860009615501, "grad_norm": 429.20367431640625, "learning_rate": 3.4785045883022645e-06, "loss": 16.3068, "step": 319290 }, { "epoch": 0.6450062015942339, "grad_norm": 225.7417449951172, "learning_rate": 3.4781720800696006e-06, "loss": 17.3463, "step": 319300 }, { "epoch": 0.6450264022269178, "grad_norm": 598.3883666992188, "learning_rate": 3.477839579254142e-06, "loss": 19.2953, "step": 319310 }, { "epoch": 0.6450466028596016, "grad_norm": 245.51226806640625, "learning_rate": 3.47750708585751e-06, "loss": 14.2051, "step": 319320 }, { "epoch": 0.6450668034922854, "grad_norm": 1205.3701171875, "learning_rate": 3.4771745998813228e-06, "loss": 29.5674, "step": 319330 }, { "epoch": 0.6450870041249692, "grad_norm": 202.94825744628906, "learning_rate": 3.4768421213272017e-06, "loss": 13.627, "step": 319340 }, { "epoch": 0.645107204757653, "grad_norm": 247.85951232910156, "learning_rate": 3.476509650196769e-06, "loss": 16.0709, "step": 319350 }, { "epoch": 0.6451274053903369, "grad_norm": 202.9886016845703, "learning_rate": 3.4761771864916415e-06, "loss": 22.1857, "step": 319360 }, { "epoch": 0.6451476060230207, "grad_norm": 436.1661376953125, "learning_rate": 3.4758447302134414e-06, "loss": 15.4678, "step": 319370 }, { "epoch": 0.6451678066557045, "grad_norm": 538.0279541015625, "learning_rate": 3.475512281363792e-06, "loss": 14.7858, "step": 319380 }, { "epoch": 0.6451880072883883, "grad_norm": 320.42462158203125, "learning_rate": 3.4751798399443075e-06, "loss": 28.9839, "step": 319390 }, { "epoch": 0.6452082079210721, "grad_norm": 314.9214172363281, "learning_rate": 3.474847405956613e-06, "loss": 14.5805, "step": 319400 }, { "epoch": 0.6452284085537559, "grad_norm": 377.7002868652344, "learning_rate": 3.474514979402327e-06, "loss": 27.1503, "step": 319410 }, { "epoch": 0.6452486091864397, "grad_norm": 15.950569152832031, "learning_rate": 3.4741825602830716e-06, "loss": 11.5035, "step": 319420 }, { "epoch": 0.6452688098191235, "grad_norm": 1371.911865234375, "learning_rate": 3.4738501486004632e-06, "loss": 26.0395, "step": 319430 }, { "epoch": 0.6452890104518073, "grad_norm": 323.29290771484375, "learning_rate": 3.4735177443561243e-06, "loss": 15.6237, "step": 319440 }, { "epoch": 0.6453092110844911, "grad_norm": 0.0, "learning_rate": 3.4731853475516763e-06, "loss": 17.3917, "step": 319450 }, { "epoch": 0.645329411717175, "grad_norm": 186.2850799560547, "learning_rate": 3.472852958188736e-06, "loss": 19.1316, "step": 319460 }, { "epoch": 0.6453496123498588, "grad_norm": 415.9254150390625, "learning_rate": 3.4725205762689256e-06, "loss": 30.8706, "step": 319470 }, { "epoch": 0.6453698129825426, "grad_norm": 179.19468688964844, "learning_rate": 3.472188201793866e-06, "loss": 16.1167, "step": 319480 }, { "epoch": 0.6453900136152264, "grad_norm": 485.9315490722656, "learning_rate": 3.4718558347651742e-06, "loss": 17.328, "step": 319490 }, { "epoch": 0.6454102142479102, "grad_norm": 513.5218505859375, "learning_rate": 3.471523475184472e-06, "loss": 13.9099, "step": 319500 }, { "epoch": 0.645430414880594, "grad_norm": 224.21621704101562, "learning_rate": 3.47119112305338e-06, "loss": 31.7493, "step": 319510 }, { "epoch": 0.6454506155132779, "grad_norm": 300.30120849609375, "learning_rate": 3.4708587783735164e-06, "loss": 15.918, "step": 319520 }, { "epoch": 0.6454708161459617, "grad_norm": 314.58984375, "learning_rate": 3.4705264411465004e-06, "loss": 17.2455, "step": 319530 }, { "epoch": 0.6454910167786455, "grad_norm": 237.69674682617188, "learning_rate": 3.470194111373954e-06, "loss": 19.3716, "step": 319540 }, { "epoch": 0.6455112174113293, "grad_norm": 427.9591979980469, "learning_rate": 3.4698617890574972e-06, "loss": 18.6955, "step": 319550 }, { "epoch": 0.6455314180440131, "grad_norm": 522.3770751953125, "learning_rate": 3.4695294741987474e-06, "loss": 26.1161, "step": 319560 }, { "epoch": 0.645551618676697, "grad_norm": 97.43962097167969, "learning_rate": 3.4691971667993254e-06, "loss": 20.7762, "step": 319570 }, { "epoch": 0.6455718193093808, "grad_norm": 446.9584045410156, "learning_rate": 3.468864866860851e-06, "loss": 13.0959, "step": 319580 }, { "epoch": 0.6455920199420646, "grad_norm": 2.0021135807037354, "learning_rate": 3.468532574384943e-06, "loss": 14.8358, "step": 319590 }, { "epoch": 0.6456122205747484, "grad_norm": 835.078857421875, "learning_rate": 3.4682002893732203e-06, "loss": 26.5909, "step": 319600 }, { "epoch": 0.6456324212074322, "grad_norm": 9.252861976623535, "learning_rate": 3.467868011827306e-06, "loss": 17.4459, "step": 319610 }, { "epoch": 0.6456526218401161, "grad_norm": 277.0822448730469, "learning_rate": 3.4675357417488163e-06, "loss": 22.8973, "step": 319620 }, { "epoch": 0.6456728224727999, "grad_norm": 386.9530944824219, "learning_rate": 3.467203479139371e-06, "loss": 12.6119, "step": 319630 }, { "epoch": 0.6456930231054837, "grad_norm": 738.8014526367188, "learning_rate": 3.4668712240005912e-06, "loss": 36.349, "step": 319640 }, { "epoch": 0.6457132237381675, "grad_norm": 318.0730285644531, "learning_rate": 3.4665389763340945e-06, "loss": 15.5838, "step": 319650 }, { "epoch": 0.6457334243708513, "grad_norm": 244.52935791015625, "learning_rate": 3.466206736141501e-06, "loss": 17.9775, "step": 319660 }, { "epoch": 0.6457536250035351, "grad_norm": 139.79449462890625, "learning_rate": 3.46587450342443e-06, "loss": 18.0357, "step": 319670 }, { "epoch": 0.6457738256362189, "grad_norm": 785.7474975585938, "learning_rate": 3.465542278184499e-06, "loss": 24.5396, "step": 319680 }, { "epoch": 0.6457940262689027, "grad_norm": 695.35595703125, "learning_rate": 3.4652100604233296e-06, "loss": 12.9501, "step": 319690 }, { "epoch": 0.6458142269015865, "grad_norm": 684.6602172851562, "learning_rate": 3.464877850142541e-06, "loss": 14.3548, "step": 319700 }, { "epoch": 0.6458344275342703, "grad_norm": 122.92704010009766, "learning_rate": 3.464545647343751e-06, "loss": 19.3015, "step": 319710 }, { "epoch": 0.6458546281669542, "grad_norm": 579.4113159179688, "learning_rate": 3.4642134520285796e-06, "loss": 16.8775, "step": 319720 }, { "epoch": 0.645874828799638, "grad_norm": 333.3854064941406, "learning_rate": 3.463881264198645e-06, "loss": 12.5376, "step": 319730 }, { "epoch": 0.6458950294323218, "grad_norm": 319.0225524902344, "learning_rate": 3.4635490838555687e-06, "loss": 20.1659, "step": 319740 }, { "epoch": 0.6459152300650056, "grad_norm": 227.57228088378906, "learning_rate": 3.463216911000965e-06, "loss": 9.6832, "step": 319750 }, { "epoch": 0.6459354306976894, "grad_norm": 404.5276184082031, "learning_rate": 3.4628847456364567e-06, "loss": 17.3493, "step": 319760 }, { "epoch": 0.6459556313303733, "grad_norm": 174.25889587402344, "learning_rate": 3.462552587763663e-06, "loss": 26.6191, "step": 319770 }, { "epoch": 0.6459758319630571, "grad_norm": 263.53826904296875, "learning_rate": 3.4622204373842006e-06, "loss": 14.4223, "step": 319780 }, { "epoch": 0.6459960325957409, "grad_norm": 572.0341186523438, "learning_rate": 3.461888294499689e-06, "loss": 13.2673, "step": 319790 }, { "epoch": 0.6460162332284247, "grad_norm": 282.9263916015625, "learning_rate": 3.4615561591117486e-06, "loss": 17.9608, "step": 319800 }, { "epoch": 0.6460364338611085, "grad_norm": 522.0670166015625, "learning_rate": 3.461224031221995e-06, "loss": 31.4396, "step": 319810 }, { "epoch": 0.6460566344937924, "grad_norm": 762.7666015625, "learning_rate": 3.4608919108320488e-06, "loss": 24.7711, "step": 319820 }, { "epoch": 0.6460768351264762, "grad_norm": 581.8294067382812, "learning_rate": 3.4605597979435313e-06, "loss": 71.8205, "step": 319830 }, { "epoch": 0.64609703575916, "grad_norm": 192.70274353027344, "learning_rate": 3.460227692558056e-06, "loss": 16.0339, "step": 319840 }, { "epoch": 0.6461172363918438, "grad_norm": 493.2317810058594, "learning_rate": 3.459895594677245e-06, "loss": 11.9203, "step": 319850 }, { "epoch": 0.6461374370245276, "grad_norm": 234.32284545898438, "learning_rate": 3.459563504302716e-06, "loss": 13.9755, "step": 319860 }, { "epoch": 0.6461576376572115, "grad_norm": 185.91566467285156, "learning_rate": 3.4592314214360888e-06, "loss": 12.033, "step": 319870 }, { "epoch": 0.6461778382898953, "grad_norm": 175.99267578125, "learning_rate": 3.4588993460789795e-06, "loss": 15.0761, "step": 319880 }, { "epoch": 0.6461980389225791, "grad_norm": 634.8748168945312, "learning_rate": 3.4585672782330072e-06, "loss": 24.8269, "step": 319890 }, { "epoch": 0.6462182395552629, "grad_norm": 182.73977661132812, "learning_rate": 3.4582352178997937e-06, "loss": 16.3021, "step": 319900 }, { "epoch": 0.6462384401879467, "grad_norm": 194.32150268554688, "learning_rate": 3.457903165080952e-06, "loss": 22.7858, "step": 319910 }, { "epoch": 0.6462586408206304, "grad_norm": 355.3138427734375, "learning_rate": 3.457571119778104e-06, "loss": 19.6149, "step": 319920 }, { "epoch": 0.6462788414533143, "grad_norm": 333.3360900878906, "learning_rate": 3.4572390819928686e-06, "loss": 16.6614, "step": 319930 }, { "epoch": 0.6462990420859981, "grad_norm": 88.16149139404297, "learning_rate": 3.4569070517268616e-06, "loss": 31.911, "step": 319940 }, { "epoch": 0.6463192427186819, "grad_norm": 863.8450927734375, "learning_rate": 3.4565750289817024e-06, "loss": 15.8914, "step": 319950 }, { "epoch": 0.6463394433513657, "grad_norm": 412.30096435546875, "learning_rate": 3.4562430137590107e-06, "loss": 21.9311, "step": 319960 }, { "epoch": 0.6463596439840495, "grad_norm": 468.2781982421875, "learning_rate": 3.4559110060604016e-06, "loss": 11.973, "step": 319970 }, { "epoch": 0.6463798446167334, "grad_norm": 226.75779724121094, "learning_rate": 3.455579005887495e-06, "loss": 25.6286, "step": 319980 }, { "epoch": 0.6464000452494172, "grad_norm": 109.05878448486328, "learning_rate": 3.455247013241909e-06, "loss": 16.3357, "step": 319990 }, { "epoch": 0.646420245882101, "grad_norm": 463.2908020019531, "learning_rate": 3.4549150281252635e-06, "loss": 14.0002, "step": 320000 }, { "epoch": 0.6464404465147848, "grad_norm": 329.0850524902344, "learning_rate": 3.454583050539173e-06, "loss": 23.0141, "step": 320010 }, { "epoch": 0.6464606471474686, "grad_norm": 121.69821166992188, "learning_rate": 3.454251080485258e-06, "loss": 14.8856, "step": 320020 }, { "epoch": 0.6464808477801525, "grad_norm": 432.4167785644531, "learning_rate": 3.4539191179651367e-06, "loss": 24.9661, "step": 320030 }, { "epoch": 0.6465010484128363, "grad_norm": 292.9191589355469, "learning_rate": 3.4535871629804246e-06, "loss": 18.8819, "step": 320040 }, { "epoch": 0.6465212490455201, "grad_norm": 372.0992431640625, "learning_rate": 3.4532552155327404e-06, "loss": 18.7296, "step": 320050 }, { "epoch": 0.6465414496782039, "grad_norm": 294.7443542480469, "learning_rate": 3.4529232756237058e-06, "loss": 18.6634, "step": 320060 }, { "epoch": 0.6465616503108877, "grad_norm": 921.65869140625, "learning_rate": 3.4525913432549326e-06, "loss": 24.3927, "step": 320070 }, { "epoch": 0.6465818509435716, "grad_norm": 125.10314178466797, "learning_rate": 3.452259418428042e-06, "loss": 10.5639, "step": 320080 }, { "epoch": 0.6466020515762554, "grad_norm": 408.093994140625, "learning_rate": 3.451927501144653e-06, "loss": 14.3607, "step": 320090 }, { "epoch": 0.6466222522089392, "grad_norm": 391.8179626464844, "learning_rate": 3.4515955914063796e-06, "loss": 16.8749, "step": 320100 }, { "epoch": 0.646642452841623, "grad_norm": 272.0223083496094, "learning_rate": 3.451263689214842e-06, "loss": 13.1955, "step": 320110 }, { "epoch": 0.6466626534743068, "grad_norm": 517.6617431640625, "learning_rate": 3.4509317945716585e-06, "loss": 14.1607, "step": 320120 }, { "epoch": 0.6466828541069907, "grad_norm": 313.298095703125, "learning_rate": 3.4505999074784447e-06, "loss": 15.2244, "step": 320130 }, { "epoch": 0.6467030547396745, "grad_norm": 261.96746826171875, "learning_rate": 3.450268027936817e-06, "loss": 14.7913, "step": 320140 }, { "epoch": 0.6467232553723583, "grad_norm": 241.56088256835938, "learning_rate": 3.4499361559483976e-06, "loss": 19.1366, "step": 320150 }, { "epoch": 0.6467434560050421, "grad_norm": 413.460205078125, "learning_rate": 3.4496042915148008e-06, "loss": 21.075, "step": 320160 }, { "epoch": 0.646763656637726, "grad_norm": 276.2939758300781, "learning_rate": 3.449272434637645e-06, "loss": 11.5727, "step": 320170 }, { "epoch": 0.6467838572704097, "grad_norm": 371.8274841308594, "learning_rate": 3.4489405853185465e-06, "loss": 32.8321, "step": 320180 }, { "epoch": 0.6468040579030935, "grad_norm": 8.812861442565918, "learning_rate": 3.4486087435591243e-06, "loss": 18.4471, "step": 320190 }, { "epoch": 0.6468242585357773, "grad_norm": 26.10768699645996, "learning_rate": 3.4482769093609945e-06, "loss": 15.3442, "step": 320200 }, { "epoch": 0.6468444591684611, "grad_norm": 118.00762939453125, "learning_rate": 3.4479450827257733e-06, "loss": 24.4206, "step": 320210 }, { "epoch": 0.6468646598011449, "grad_norm": 348.4974365234375, "learning_rate": 3.447613263655083e-06, "loss": 26.2684, "step": 320220 }, { "epoch": 0.6468848604338288, "grad_norm": 977.09130859375, "learning_rate": 3.447281452150534e-06, "loss": 21.9423, "step": 320230 }, { "epoch": 0.6469050610665126, "grad_norm": 385.6293029785156, "learning_rate": 3.4469496482137484e-06, "loss": 20.0977, "step": 320240 }, { "epoch": 0.6469252616991964, "grad_norm": 2031.3492431640625, "learning_rate": 3.4466178518463424e-06, "loss": 41.0484, "step": 320250 }, { "epoch": 0.6469454623318802, "grad_norm": 375.87310791015625, "learning_rate": 3.4462860630499316e-06, "loss": 14.9024, "step": 320260 }, { "epoch": 0.646965662964564, "grad_norm": 288.5296936035156, "learning_rate": 3.445954281826134e-06, "loss": 20.3958, "step": 320270 }, { "epoch": 0.6469858635972479, "grad_norm": 542.4863891601562, "learning_rate": 3.4456225081765683e-06, "loss": 17.2139, "step": 320280 }, { "epoch": 0.6470060642299317, "grad_norm": 680.419189453125, "learning_rate": 3.445290742102848e-06, "loss": 18.0196, "step": 320290 }, { "epoch": 0.6470262648626155, "grad_norm": 213.6794891357422, "learning_rate": 3.444958983606592e-06, "loss": 13.3369, "step": 320300 }, { "epoch": 0.6470464654952993, "grad_norm": 345.41864013671875, "learning_rate": 3.444627232689418e-06, "loss": 18.4047, "step": 320310 }, { "epoch": 0.6470666661279831, "grad_norm": 244.41014099121094, "learning_rate": 3.4442954893529436e-06, "loss": 15.8739, "step": 320320 }, { "epoch": 0.647086866760667, "grad_norm": 50.69621658325195, "learning_rate": 3.4439637535987825e-06, "loss": 25.0043, "step": 320330 }, { "epoch": 0.6471070673933508, "grad_norm": 448.411376953125, "learning_rate": 3.4436320254285537e-06, "loss": 13.899, "step": 320340 }, { "epoch": 0.6471272680260346, "grad_norm": 318.6735534667969, "learning_rate": 3.4433003048438748e-06, "loss": 23.9269, "step": 320350 }, { "epoch": 0.6471474686587184, "grad_norm": 312.3635559082031, "learning_rate": 3.442968591846359e-06, "loss": 18.9191, "step": 320360 }, { "epoch": 0.6471676692914022, "grad_norm": 75.13298034667969, "learning_rate": 3.442636886437627e-06, "loss": 29.475, "step": 320370 }, { "epoch": 0.6471878699240861, "grad_norm": 607.415283203125, "learning_rate": 3.4423051886192944e-06, "loss": 25.1213, "step": 320380 }, { "epoch": 0.6472080705567699, "grad_norm": 506.7091064453125, "learning_rate": 3.4419734983929763e-06, "loss": 23.628, "step": 320390 }, { "epoch": 0.6472282711894537, "grad_norm": 535.0528564453125, "learning_rate": 3.441641815760291e-06, "loss": 11.2879, "step": 320400 }, { "epoch": 0.6472484718221375, "grad_norm": 20.201257705688477, "learning_rate": 3.4413101407228557e-06, "loss": 12.2529, "step": 320410 }, { "epoch": 0.6472686724548213, "grad_norm": 380.4597473144531, "learning_rate": 3.440978473282284e-06, "loss": 8.8845, "step": 320420 }, { "epoch": 0.647288873087505, "grad_norm": 15.600037574768066, "learning_rate": 3.440646813440193e-06, "loss": 16.2048, "step": 320430 }, { "epoch": 0.6473090737201889, "grad_norm": 172.44436645507812, "learning_rate": 3.4403151611982016e-06, "loss": 26.6767, "step": 320440 }, { "epoch": 0.6473292743528727, "grad_norm": 677.5240478515625, "learning_rate": 3.4399835165579266e-06, "loss": 17.4968, "step": 320450 }, { "epoch": 0.6473494749855565, "grad_norm": 144.59420776367188, "learning_rate": 3.439651879520981e-06, "loss": 13.6249, "step": 320460 }, { "epoch": 0.6473696756182403, "grad_norm": 104.53440856933594, "learning_rate": 3.4393202500889827e-06, "loss": 13.4822, "step": 320470 }, { "epoch": 0.6473898762509241, "grad_norm": 220.8779296875, "learning_rate": 3.43898862826355e-06, "loss": 22.2717, "step": 320480 }, { "epoch": 0.647410076883608, "grad_norm": 531.7034912109375, "learning_rate": 3.438657014046296e-06, "loss": 30.2536, "step": 320490 }, { "epoch": 0.6474302775162918, "grad_norm": 219.97042846679688, "learning_rate": 3.4383254074388373e-06, "loss": 12.5737, "step": 320500 }, { "epoch": 0.6474504781489756, "grad_norm": 433.6363525390625, "learning_rate": 3.437993808442794e-06, "loss": 21.8176, "step": 320510 }, { "epoch": 0.6474706787816594, "grad_norm": 481.9315490722656, "learning_rate": 3.437662217059776e-06, "loss": 17.6614, "step": 320520 }, { "epoch": 0.6474908794143432, "grad_norm": 570.3108520507812, "learning_rate": 3.437330633291405e-06, "loss": 13.3415, "step": 320530 }, { "epoch": 0.6475110800470271, "grad_norm": 189.6717529296875, "learning_rate": 3.436999057139295e-06, "loss": 19.1386, "step": 320540 }, { "epoch": 0.6475312806797109, "grad_norm": 802.9407958984375, "learning_rate": 3.4366674886050618e-06, "loss": 22.4112, "step": 320550 }, { "epoch": 0.6475514813123947, "grad_norm": 400.6368713378906, "learning_rate": 3.436335927690321e-06, "loss": 16.2096, "step": 320560 }, { "epoch": 0.6475716819450785, "grad_norm": 447.6169738769531, "learning_rate": 3.4360043743966907e-06, "loss": 8.7606, "step": 320570 }, { "epoch": 0.6475918825777623, "grad_norm": 28.27263832092285, "learning_rate": 3.4356728287257845e-06, "loss": 11.5367, "step": 320580 }, { "epoch": 0.6476120832104462, "grad_norm": 783.3766479492188, "learning_rate": 3.4353412906792175e-06, "loss": 26.0925, "step": 320590 }, { "epoch": 0.64763228384313, "grad_norm": 555.4591064453125, "learning_rate": 3.4350097602586085e-06, "loss": 20.1014, "step": 320600 }, { "epoch": 0.6476524844758138, "grad_norm": 247.10903930664062, "learning_rate": 3.4346782374655743e-06, "loss": 20.2857, "step": 320610 }, { "epoch": 0.6476726851084976, "grad_norm": 633.5286254882812, "learning_rate": 3.4343467223017256e-06, "loss": 20.1002, "step": 320620 }, { "epoch": 0.6476928857411814, "grad_norm": 0.0, "learning_rate": 3.4340152147686824e-06, "loss": 13.0941, "step": 320630 }, { "epoch": 0.6477130863738653, "grad_norm": 333.5049743652344, "learning_rate": 3.4336837148680595e-06, "loss": 29.6691, "step": 320640 }, { "epoch": 0.6477332870065491, "grad_norm": 0.0, "learning_rate": 3.4333522226014715e-06, "loss": 30.9545, "step": 320650 }, { "epoch": 0.6477534876392329, "grad_norm": 360.1062927246094, "learning_rate": 3.433020737970534e-06, "loss": 23.8065, "step": 320660 }, { "epoch": 0.6477736882719167, "grad_norm": 1085.951416015625, "learning_rate": 3.432689260976866e-06, "loss": 20.8398, "step": 320670 }, { "epoch": 0.6477938889046005, "grad_norm": 242.34632873535156, "learning_rate": 3.4323577916220773e-06, "loss": 27.6058, "step": 320680 }, { "epoch": 0.6478140895372843, "grad_norm": 161.37716674804688, "learning_rate": 3.4320263299077877e-06, "loss": 34.7406, "step": 320690 }, { "epoch": 0.6478342901699681, "grad_norm": 421.85528564453125, "learning_rate": 3.4316948758356127e-06, "loss": 16.1686, "step": 320700 }, { "epoch": 0.6478544908026519, "grad_norm": 493.4578857421875, "learning_rate": 3.431363429407166e-06, "loss": 23.4922, "step": 320710 }, { "epoch": 0.6478746914353357, "grad_norm": 622.5317993164062, "learning_rate": 3.431031990624063e-06, "loss": 24.4836, "step": 320720 }, { "epoch": 0.6478948920680195, "grad_norm": 581.2822875976562, "learning_rate": 3.4307005594879215e-06, "loss": 13.4607, "step": 320730 }, { "epoch": 0.6479150927007034, "grad_norm": 489.0071716308594, "learning_rate": 3.4303691360003533e-06, "loss": 19.3171, "step": 320740 }, { "epoch": 0.6479352933333872, "grad_norm": 664.3818359375, "learning_rate": 3.4300377201629753e-06, "loss": 24.7402, "step": 320750 }, { "epoch": 0.647955493966071, "grad_norm": 254.8978729248047, "learning_rate": 3.4297063119774037e-06, "loss": 18.9693, "step": 320760 }, { "epoch": 0.6479756945987548, "grad_norm": 369.8358459472656, "learning_rate": 3.4293749114452546e-06, "loss": 29.1014, "step": 320770 }, { "epoch": 0.6479958952314386, "grad_norm": 347.9951477050781, "learning_rate": 3.4290435185681404e-06, "loss": 30.3701, "step": 320780 }, { "epoch": 0.6480160958641225, "grad_norm": 242.16900634765625, "learning_rate": 3.428712133347677e-06, "loss": 20.367, "step": 320790 }, { "epoch": 0.6480362964968063, "grad_norm": 604.1107788085938, "learning_rate": 3.4283807557854814e-06, "loss": 24.7284, "step": 320800 }, { "epoch": 0.6480564971294901, "grad_norm": 300.874755859375, "learning_rate": 3.4280493858831665e-06, "loss": 19.1906, "step": 320810 }, { "epoch": 0.6480766977621739, "grad_norm": 453.137451171875, "learning_rate": 3.4277180236423467e-06, "loss": 17.5896, "step": 320820 }, { "epoch": 0.6480968983948577, "grad_norm": 229.75108337402344, "learning_rate": 3.427386669064643e-06, "loss": 22.6297, "step": 320830 }, { "epoch": 0.6481170990275416, "grad_norm": 161.3295440673828, "learning_rate": 3.4270553221516618e-06, "loss": 17.7979, "step": 320840 }, { "epoch": 0.6481372996602254, "grad_norm": 355.29730224609375, "learning_rate": 3.426723982905023e-06, "loss": 29.1742, "step": 320850 }, { "epoch": 0.6481575002929092, "grad_norm": 221.48448181152344, "learning_rate": 3.4263926513263424e-06, "loss": 11.8878, "step": 320860 }, { "epoch": 0.648177700925593, "grad_norm": 40.39225387573242, "learning_rate": 3.4260613274172316e-06, "loss": 22.6435, "step": 320870 }, { "epoch": 0.6481979015582768, "grad_norm": 672.4365844726562, "learning_rate": 3.4257300111793073e-06, "loss": 30.1296, "step": 320880 }, { "epoch": 0.6482181021909607, "grad_norm": 651.0625610351562, "learning_rate": 3.425398702614185e-06, "loss": 23.6063, "step": 320890 }, { "epoch": 0.6482383028236445, "grad_norm": 191.00192260742188, "learning_rate": 3.4250674017234774e-06, "loss": 16.1958, "step": 320900 }, { "epoch": 0.6482585034563283, "grad_norm": 581.9676513671875, "learning_rate": 3.4247361085087993e-06, "loss": 12.2446, "step": 320910 }, { "epoch": 0.6482787040890121, "grad_norm": 393.1851806640625, "learning_rate": 3.4244048229717676e-06, "loss": 21.6946, "step": 320920 }, { "epoch": 0.6482989047216959, "grad_norm": 289.7122802734375, "learning_rate": 3.4240735451139963e-06, "loss": 23.8145, "step": 320930 }, { "epoch": 0.6483191053543798, "grad_norm": 508.9690856933594, "learning_rate": 3.4237422749370986e-06, "loss": 23.4573, "step": 320940 }, { "epoch": 0.6483393059870635, "grad_norm": 235.05738830566406, "learning_rate": 3.4234110124426893e-06, "loss": 19.8642, "step": 320950 }, { "epoch": 0.6483595066197473, "grad_norm": 213.62127685546875, "learning_rate": 3.4230797576323847e-06, "loss": 26.8772, "step": 320960 }, { "epoch": 0.6483797072524311, "grad_norm": 416.9248352050781, "learning_rate": 3.422748510507798e-06, "loss": 11.4521, "step": 320970 }, { "epoch": 0.6483999078851149, "grad_norm": 244.31906127929688, "learning_rate": 3.422417271070542e-06, "loss": 29.9691, "step": 320980 }, { "epoch": 0.6484201085177987, "grad_norm": 489.66864013671875, "learning_rate": 3.4220860393222347e-06, "loss": 15.2944, "step": 320990 }, { "epoch": 0.6484403091504826, "grad_norm": 291.2265625, "learning_rate": 3.4217548152644887e-06, "loss": 13.3252, "step": 321000 }, { "epoch": 0.6484605097831664, "grad_norm": 195.59718322753906, "learning_rate": 3.4214235988989173e-06, "loss": 20.0309, "step": 321010 }, { "epoch": 0.6484807104158502, "grad_norm": 386.9002685546875, "learning_rate": 3.421092390227137e-06, "loss": 17.6624, "step": 321020 }, { "epoch": 0.648500911048534, "grad_norm": 53.192901611328125, "learning_rate": 3.42076118925076e-06, "loss": 15.1449, "step": 321030 }, { "epoch": 0.6485211116812178, "grad_norm": 804.015869140625, "learning_rate": 3.4204299959714006e-06, "loss": 17.1687, "step": 321040 }, { "epoch": 0.6485413123139017, "grad_norm": 366.16265869140625, "learning_rate": 3.4200988103906747e-06, "loss": 12.5538, "step": 321050 }, { "epoch": 0.6485615129465855, "grad_norm": 384.0155944824219, "learning_rate": 3.4197676325101965e-06, "loss": 19.0518, "step": 321060 }, { "epoch": 0.6485817135792693, "grad_norm": 417.5281066894531, "learning_rate": 3.419436462331578e-06, "loss": 16.7324, "step": 321070 }, { "epoch": 0.6486019142119531, "grad_norm": 81.71173858642578, "learning_rate": 3.4191052998564344e-06, "loss": 15.7786, "step": 321080 }, { "epoch": 0.6486221148446369, "grad_norm": 453.1468200683594, "learning_rate": 3.4187741450863816e-06, "loss": 15.2719, "step": 321090 }, { "epoch": 0.6486423154773208, "grad_norm": 78.19654846191406, "learning_rate": 3.4184429980230305e-06, "loss": 10.1305, "step": 321100 }, { "epoch": 0.6486625161100046, "grad_norm": 656.2283325195312, "learning_rate": 3.418111858667995e-06, "loss": 27.1522, "step": 321110 }, { "epoch": 0.6486827167426884, "grad_norm": 535.2967529296875, "learning_rate": 3.4177807270228942e-06, "loss": 28.5655, "step": 321120 }, { "epoch": 0.6487029173753722, "grad_norm": 273.2851867675781, "learning_rate": 3.4174496030893346e-06, "loss": 15.6931, "step": 321130 }, { "epoch": 0.648723118008056, "grad_norm": 1340.5679931640625, "learning_rate": 3.4171184868689345e-06, "loss": 30.0804, "step": 321140 }, { "epoch": 0.6487433186407399, "grad_norm": 475.08551025390625, "learning_rate": 3.4167873783633087e-06, "loss": 16.6618, "step": 321150 }, { "epoch": 0.6487635192734237, "grad_norm": 51.13777542114258, "learning_rate": 3.416456277574068e-06, "loss": 18.7528, "step": 321160 }, { "epoch": 0.6487837199061075, "grad_norm": 139.28160095214844, "learning_rate": 3.4161251845028264e-06, "loss": 21.9506, "step": 321170 }, { "epoch": 0.6488039205387913, "grad_norm": 271.64013671875, "learning_rate": 3.4157940991512007e-06, "loss": 11.4651, "step": 321180 }, { "epoch": 0.6488241211714751, "grad_norm": 424.78521728515625, "learning_rate": 3.4154630215208005e-06, "loss": 27.9061, "step": 321190 }, { "epoch": 0.6488443218041589, "grad_norm": 200.58016967773438, "learning_rate": 3.4151319516132414e-06, "loss": 23.6057, "step": 321200 }, { "epoch": 0.6488645224368427, "grad_norm": 382.99884033203125, "learning_rate": 3.4148008894301378e-06, "loss": 20.8119, "step": 321210 }, { "epoch": 0.6488847230695265, "grad_norm": 342.0165100097656, "learning_rate": 3.4144698349731025e-06, "loss": 7.7871, "step": 321220 }, { "epoch": 0.6489049237022103, "grad_norm": 445.2557678222656, "learning_rate": 3.4141387882437483e-06, "loss": 20.2608, "step": 321230 }, { "epoch": 0.6489251243348941, "grad_norm": 120.84185791015625, "learning_rate": 3.4138077492436896e-06, "loss": 17.3855, "step": 321240 }, { "epoch": 0.648945324967578, "grad_norm": 429.3396301269531, "learning_rate": 3.4134767179745404e-06, "loss": 19.8901, "step": 321250 }, { "epoch": 0.6489655256002618, "grad_norm": 76.77388763427734, "learning_rate": 3.4131456944379126e-06, "loss": 27.5789, "step": 321260 }, { "epoch": 0.6489857262329456, "grad_norm": 79.2790298461914, "learning_rate": 3.412814678635419e-06, "loss": 12.1772, "step": 321270 }, { "epoch": 0.6490059268656294, "grad_norm": 287.5595397949219, "learning_rate": 3.4124836705686765e-06, "loss": 38.2287, "step": 321280 }, { "epoch": 0.6490261274983132, "grad_norm": 418.327392578125, "learning_rate": 3.4121526702392938e-06, "loss": 19.3629, "step": 321290 }, { "epoch": 0.649046328130997, "grad_norm": 362.4310607910156, "learning_rate": 3.411821677648887e-06, "loss": 15.4171, "step": 321300 }, { "epoch": 0.6490665287636809, "grad_norm": 224.0290985107422, "learning_rate": 3.4114906927990697e-06, "loss": 20.4532, "step": 321310 }, { "epoch": 0.6490867293963647, "grad_norm": 311.35284423828125, "learning_rate": 3.4111597156914535e-06, "loss": 18.5941, "step": 321320 }, { "epoch": 0.6491069300290485, "grad_norm": 428.9659118652344, "learning_rate": 3.4108287463276517e-06, "loss": 28.53, "step": 321330 }, { "epoch": 0.6491271306617323, "grad_norm": 213.68759155273438, "learning_rate": 3.410497784709279e-06, "loss": 14.7193, "step": 321340 }, { "epoch": 0.6491473312944162, "grad_norm": 288.65863037109375, "learning_rate": 3.4101668308379465e-06, "loss": 20.0432, "step": 321350 }, { "epoch": 0.6491675319271, "grad_norm": 28.574405670166016, "learning_rate": 3.409835884715267e-06, "loss": 12.2773, "step": 321360 }, { "epoch": 0.6491877325597838, "grad_norm": 7.877237796783447, "learning_rate": 3.4095049463428553e-06, "loss": 28.7102, "step": 321370 }, { "epoch": 0.6492079331924676, "grad_norm": 253.9036407470703, "learning_rate": 3.4091740157223253e-06, "loss": 18.7362, "step": 321380 }, { "epoch": 0.6492281338251514, "grad_norm": 448.0572814941406, "learning_rate": 3.4088430928552863e-06, "loss": 14.4309, "step": 321390 }, { "epoch": 0.6492483344578353, "grad_norm": 458.5237731933594, "learning_rate": 3.4085121777433532e-06, "loss": 20.4872, "step": 321400 }, { "epoch": 0.6492685350905191, "grad_norm": 361.4665832519531, "learning_rate": 3.40818127038814e-06, "loss": 12.8249, "step": 321410 }, { "epoch": 0.6492887357232029, "grad_norm": 186.07550048828125, "learning_rate": 3.407850370791257e-06, "loss": 24.1596, "step": 321420 }, { "epoch": 0.6493089363558867, "grad_norm": 131.7120819091797, "learning_rate": 3.4075194789543174e-06, "loss": 13.3453, "step": 321430 }, { "epoch": 0.6493291369885705, "grad_norm": 232.39071655273438, "learning_rate": 3.407188594878938e-06, "loss": 10.0244, "step": 321440 }, { "epoch": 0.6493493376212544, "grad_norm": 686.4042358398438, "learning_rate": 3.4068577185667253e-06, "loss": 30.2223, "step": 321450 }, { "epoch": 0.6493695382539381, "grad_norm": 517.8428955078125, "learning_rate": 3.406526850019295e-06, "loss": 11.6241, "step": 321460 }, { "epoch": 0.6493897388866219, "grad_norm": 28.20896339416504, "learning_rate": 3.4061959892382613e-06, "loss": 10.4526, "step": 321470 }, { "epoch": 0.6494099395193057, "grad_norm": 149.054443359375, "learning_rate": 3.4058651362252337e-06, "loss": 32.4825, "step": 321480 }, { "epoch": 0.6494301401519895, "grad_norm": 590.716796875, "learning_rate": 3.4055342909818255e-06, "loss": 18.6806, "step": 321490 }, { "epoch": 0.6494503407846733, "grad_norm": 0.5362585186958313, "learning_rate": 3.40520345350965e-06, "loss": 20.8464, "step": 321500 }, { "epoch": 0.6494705414173572, "grad_norm": 4.272317886352539, "learning_rate": 3.4048726238103214e-06, "loss": 14.8859, "step": 321510 }, { "epoch": 0.649490742050041, "grad_norm": 98.34452819824219, "learning_rate": 3.404541801885448e-06, "loss": 26.1643, "step": 321520 }, { "epoch": 0.6495109426827248, "grad_norm": 228.53408813476562, "learning_rate": 3.4042109877366447e-06, "loss": 18.4722, "step": 321530 }, { "epoch": 0.6495311433154086, "grad_norm": 162.33099365234375, "learning_rate": 3.403880181365525e-06, "loss": 27.2294, "step": 321540 }, { "epoch": 0.6495513439480924, "grad_norm": 280.77728271484375, "learning_rate": 3.403549382773699e-06, "loss": 33.4775, "step": 321550 }, { "epoch": 0.6495715445807763, "grad_norm": 281.2795715332031, "learning_rate": 3.4032185919627784e-06, "loss": 11.3548, "step": 321560 }, { "epoch": 0.6495917452134601, "grad_norm": 842.7446899414062, "learning_rate": 3.4028878089343784e-06, "loss": 26.5638, "step": 321570 }, { "epoch": 0.6496119458461439, "grad_norm": 230.534423828125, "learning_rate": 3.402557033690109e-06, "loss": 41.9931, "step": 321580 }, { "epoch": 0.6496321464788277, "grad_norm": 1114.5538330078125, "learning_rate": 3.4022262662315813e-06, "loss": 23.1025, "step": 321590 }, { "epoch": 0.6496523471115115, "grad_norm": 321.23773193359375, "learning_rate": 3.401895506560411e-06, "loss": 31.6811, "step": 321600 }, { "epoch": 0.6496725477441954, "grad_norm": 184.3525848388672, "learning_rate": 3.401564754678207e-06, "loss": 17.5569, "step": 321610 }, { "epoch": 0.6496927483768792, "grad_norm": 119.01651000976562, "learning_rate": 3.401234010586583e-06, "loss": 12.5658, "step": 321620 }, { "epoch": 0.649712949009563, "grad_norm": 1051.844970703125, "learning_rate": 3.4009032742871515e-06, "loss": 16.8424, "step": 321630 }, { "epoch": 0.6497331496422468, "grad_norm": 417.7512512207031, "learning_rate": 3.4005725457815225e-06, "loss": 17.1054, "step": 321640 }, { "epoch": 0.6497533502749306, "grad_norm": 336.3253479003906, "learning_rate": 3.400241825071309e-06, "loss": 15.4779, "step": 321650 }, { "epoch": 0.6497735509076145, "grad_norm": 192.74798583984375, "learning_rate": 3.3999111121581215e-06, "loss": 27.2385, "step": 321660 }, { "epoch": 0.6497937515402983, "grad_norm": 1039.3184814453125, "learning_rate": 3.399580407043576e-06, "loss": 30.0241, "step": 321670 }, { "epoch": 0.6498139521729821, "grad_norm": 165.76678466796875, "learning_rate": 3.3992497097292786e-06, "loss": 21.279, "step": 321680 }, { "epoch": 0.6498341528056659, "grad_norm": 524.4998168945312, "learning_rate": 3.3989190202168452e-06, "loss": 13.9386, "step": 321690 }, { "epoch": 0.6498543534383497, "grad_norm": 302.688720703125, "learning_rate": 3.3985883385078875e-06, "loss": 15.2754, "step": 321700 }, { "epoch": 0.6498745540710335, "grad_norm": 401.597412109375, "learning_rate": 3.398257664604015e-06, "loss": 15.3946, "step": 321710 }, { "epoch": 0.6498947547037173, "grad_norm": 374.7026672363281, "learning_rate": 3.3979269985068387e-06, "loss": 10.6275, "step": 321720 }, { "epoch": 0.6499149553364011, "grad_norm": 394.505615234375, "learning_rate": 3.3975963402179756e-06, "loss": 20.2509, "step": 321730 }, { "epoch": 0.6499351559690849, "grad_norm": 201.50865173339844, "learning_rate": 3.39726568973903e-06, "loss": 29.1401, "step": 321740 }, { "epoch": 0.6499553566017687, "grad_norm": 1979.6138916015625, "learning_rate": 3.396935047071619e-06, "loss": 28.8079, "step": 321750 }, { "epoch": 0.6499755572344526, "grad_norm": 889.3950805664062, "learning_rate": 3.3966044122173526e-06, "loss": 24.7522, "step": 321760 }, { "epoch": 0.6499957578671364, "grad_norm": 597.9691772460938, "learning_rate": 3.3962737851778406e-06, "loss": 5.9687, "step": 321770 }, { "epoch": 0.6500159584998202, "grad_norm": 24.719833374023438, "learning_rate": 3.3959431659546952e-06, "loss": 13.8951, "step": 321780 }, { "epoch": 0.650036159132504, "grad_norm": 447.026611328125, "learning_rate": 3.39561255454953e-06, "loss": 14.6619, "step": 321790 }, { "epoch": 0.6500563597651878, "grad_norm": 456.4205017089844, "learning_rate": 3.3952819509639534e-06, "loss": 16.8525, "step": 321800 }, { "epoch": 0.6500765603978717, "grad_norm": 207.09373474121094, "learning_rate": 3.394951355199577e-06, "loss": 33.0193, "step": 321810 }, { "epoch": 0.6500967610305555, "grad_norm": 286.6849670410156, "learning_rate": 3.3946207672580144e-06, "loss": 24.4564, "step": 321820 }, { "epoch": 0.6501169616632393, "grad_norm": 282.37506103515625, "learning_rate": 3.3942901871408763e-06, "loss": 17.8353, "step": 321830 }, { "epoch": 0.6501371622959231, "grad_norm": 366.56939697265625, "learning_rate": 3.3939596148497717e-06, "loss": 10.8935, "step": 321840 }, { "epoch": 0.6501573629286069, "grad_norm": 162.059814453125, "learning_rate": 3.3936290503863132e-06, "loss": 19.6672, "step": 321850 }, { "epoch": 0.6501775635612908, "grad_norm": 337.82073974609375, "learning_rate": 3.393298493752113e-06, "loss": 21.535, "step": 321860 }, { "epoch": 0.6501977641939746, "grad_norm": 287.4216613769531, "learning_rate": 3.392967944948781e-06, "loss": 18.5025, "step": 321870 }, { "epoch": 0.6502179648266584, "grad_norm": 700.3267822265625, "learning_rate": 3.392637403977925e-06, "loss": 14.0155, "step": 321880 }, { "epoch": 0.6502381654593422, "grad_norm": 298.53656005859375, "learning_rate": 3.3923068708411645e-06, "loss": 22.7206, "step": 321890 }, { "epoch": 0.650258366092026, "grad_norm": 648.0995483398438, "learning_rate": 3.3919763455401016e-06, "loss": 21.8986, "step": 321900 }, { "epoch": 0.6502785667247098, "grad_norm": 331.8587646484375, "learning_rate": 3.391645828076352e-06, "loss": 18.3617, "step": 321910 }, { "epoch": 0.6502987673573937, "grad_norm": 208.50125122070312, "learning_rate": 3.391315318451527e-06, "loss": 15.3929, "step": 321920 }, { "epoch": 0.6503189679900775, "grad_norm": 666.7942504882812, "learning_rate": 3.3909848166672343e-06, "loss": 31.979, "step": 321930 }, { "epoch": 0.6503391686227613, "grad_norm": 364.9177551269531, "learning_rate": 3.3906543227250866e-06, "loss": 14.7562, "step": 321940 }, { "epoch": 0.6503593692554451, "grad_norm": 1887.9747314453125, "learning_rate": 3.3903238366266956e-06, "loss": 45.4767, "step": 321950 }, { "epoch": 0.650379569888129, "grad_norm": 334.8154296875, "learning_rate": 3.38999335837367e-06, "loss": 14.8031, "step": 321960 }, { "epoch": 0.6503997705208127, "grad_norm": 140.39060974121094, "learning_rate": 3.389662887967621e-06, "loss": 16.8329, "step": 321970 }, { "epoch": 0.6504199711534965, "grad_norm": 400.18804931640625, "learning_rate": 3.38933242541016e-06, "loss": 22.3485, "step": 321980 }, { "epoch": 0.6504401717861803, "grad_norm": 245.1291961669922, "learning_rate": 3.3890019707028987e-06, "loss": 19.8698, "step": 321990 }, { "epoch": 0.6504603724188641, "grad_norm": 511.9997253417969, "learning_rate": 3.3886715238474454e-06, "loss": 15.2886, "step": 322000 }, { "epoch": 0.6504805730515479, "grad_norm": 277.54144287109375, "learning_rate": 3.388341084845411e-06, "loss": 11.5422, "step": 322010 }, { "epoch": 0.6505007736842318, "grad_norm": 180.7924346923828, "learning_rate": 3.388010653698409e-06, "loss": 10.1466, "step": 322020 }, { "epoch": 0.6505209743169156, "grad_norm": 413.6370849609375, "learning_rate": 3.3876802304080457e-06, "loss": 22.5183, "step": 322030 }, { "epoch": 0.6505411749495994, "grad_norm": 745.9916381835938, "learning_rate": 3.3873498149759325e-06, "loss": 13.8117, "step": 322040 }, { "epoch": 0.6505613755822832, "grad_norm": 215.02685546875, "learning_rate": 3.3870194074036846e-06, "loss": 15.0504, "step": 322050 }, { "epoch": 0.650581576214967, "grad_norm": 453.3199462890625, "learning_rate": 3.3866890076929036e-06, "loss": 21.1833, "step": 322060 }, { "epoch": 0.6506017768476509, "grad_norm": 102.20661926269531, "learning_rate": 3.3863586158452074e-06, "loss": 22.2214, "step": 322070 }, { "epoch": 0.6506219774803347, "grad_norm": 655.1390380859375, "learning_rate": 3.386028231862204e-06, "loss": 30.9475, "step": 322080 }, { "epoch": 0.6506421781130185, "grad_norm": 591.4625244140625, "learning_rate": 3.385697855745502e-06, "loss": 28.0162, "step": 322090 }, { "epoch": 0.6506623787457023, "grad_norm": 214.79214477539062, "learning_rate": 3.3853674874967134e-06, "loss": 36.577, "step": 322100 }, { "epoch": 0.6506825793783861, "grad_norm": 764.1506958007812, "learning_rate": 3.3850371271174465e-06, "loss": 29.5662, "step": 322110 }, { "epoch": 0.65070278001107, "grad_norm": 311.96209716796875, "learning_rate": 3.384706774609316e-06, "loss": 20.8651, "step": 322120 }, { "epoch": 0.6507229806437538, "grad_norm": 378.3182373046875, "learning_rate": 3.3843764299739258e-06, "loss": 21.0682, "step": 322130 }, { "epoch": 0.6507431812764376, "grad_norm": 713.8678588867188, "learning_rate": 3.3840460932128894e-06, "loss": 13.7881, "step": 322140 }, { "epoch": 0.6507633819091214, "grad_norm": 171.61329650878906, "learning_rate": 3.3837157643278173e-06, "loss": 30.0439, "step": 322150 }, { "epoch": 0.6507835825418052, "grad_norm": 453.4654541015625, "learning_rate": 3.3833854433203185e-06, "loss": 12.3456, "step": 322160 }, { "epoch": 0.6508037831744891, "grad_norm": 128.6647186279297, "learning_rate": 3.3830551301920024e-06, "loss": 23.3729, "step": 322170 }, { "epoch": 0.6508239838071729, "grad_norm": 90.92256927490234, "learning_rate": 3.38272482494448e-06, "loss": 17.1569, "step": 322180 }, { "epoch": 0.6508441844398567, "grad_norm": 277.4646301269531, "learning_rate": 3.38239452757936e-06, "loss": 16.6768, "step": 322190 }, { "epoch": 0.6508643850725405, "grad_norm": 482.8577575683594, "learning_rate": 3.3820642380982527e-06, "loss": 35.2332, "step": 322200 }, { "epoch": 0.6508845857052243, "grad_norm": 300.772705078125, "learning_rate": 3.38173395650277e-06, "loss": 12.6839, "step": 322210 }, { "epoch": 0.650904786337908, "grad_norm": 114.0345687866211, "learning_rate": 3.3814036827945173e-06, "loss": 11.9252, "step": 322220 }, { "epoch": 0.6509249869705919, "grad_norm": 613.072998046875, "learning_rate": 3.3810734169751075e-06, "loss": 24.3405, "step": 322230 }, { "epoch": 0.6509451876032757, "grad_norm": 285.8229675292969, "learning_rate": 3.3807431590461502e-06, "loss": 13.8982, "step": 322240 }, { "epoch": 0.6509653882359595, "grad_norm": 245.60195922851562, "learning_rate": 3.3804129090092542e-06, "loss": 26.4386, "step": 322250 }, { "epoch": 0.6509855888686433, "grad_norm": 405.0401611328125, "learning_rate": 3.3800826668660286e-06, "loss": 17.3309, "step": 322260 }, { "epoch": 0.6510057895013271, "grad_norm": 374.2948303222656, "learning_rate": 3.3797524326180825e-06, "loss": 19.3296, "step": 322270 }, { "epoch": 0.651025990134011, "grad_norm": 510.6011657714844, "learning_rate": 3.379422206267029e-06, "loss": 19.7713, "step": 322280 }, { "epoch": 0.6510461907666948, "grad_norm": 803.0283813476562, "learning_rate": 3.3790919878144737e-06, "loss": 16.3765, "step": 322290 }, { "epoch": 0.6510663913993786, "grad_norm": 216.36788940429688, "learning_rate": 3.378761777262028e-06, "loss": 23.0298, "step": 322300 }, { "epoch": 0.6510865920320624, "grad_norm": 684.405517578125, "learning_rate": 3.3784315746113017e-06, "loss": 17.6894, "step": 322310 }, { "epoch": 0.6511067926647462, "grad_norm": 306.9706726074219, "learning_rate": 3.378101379863902e-06, "loss": 18.5892, "step": 322320 }, { "epoch": 0.6511269932974301, "grad_norm": 798.5531005859375, "learning_rate": 3.377771193021439e-06, "loss": 18.135, "step": 322330 }, { "epoch": 0.6511471939301139, "grad_norm": 517.0232543945312, "learning_rate": 3.377441014085524e-06, "loss": 20.094, "step": 322340 }, { "epoch": 0.6511673945627977, "grad_norm": 476.083984375, "learning_rate": 3.3771108430577624e-06, "loss": 13.3595, "step": 322350 }, { "epoch": 0.6511875951954815, "grad_norm": 558.2871704101562, "learning_rate": 3.376780679939767e-06, "loss": 10.8419, "step": 322360 }, { "epoch": 0.6512077958281653, "grad_norm": 72.24461364746094, "learning_rate": 3.376450524733147e-06, "loss": 21.8569, "step": 322370 }, { "epoch": 0.6512279964608492, "grad_norm": 436.09051513671875, "learning_rate": 3.3761203774395083e-06, "loss": 18.2238, "step": 322380 }, { "epoch": 0.651248197093533, "grad_norm": 480.0425109863281, "learning_rate": 3.3757902380604624e-06, "loss": 12.3229, "step": 322390 }, { "epoch": 0.6512683977262168, "grad_norm": 175.87149047851562, "learning_rate": 3.375460106597619e-06, "loss": 20.0545, "step": 322400 }, { "epoch": 0.6512885983589006, "grad_norm": 229.50296020507812, "learning_rate": 3.375129983052585e-06, "loss": 25.8444, "step": 322410 }, { "epoch": 0.6513087989915844, "grad_norm": 39.605587005615234, "learning_rate": 3.3747998674269693e-06, "loss": 21.599, "step": 322420 }, { "epoch": 0.6513289996242683, "grad_norm": 211.9730987548828, "learning_rate": 3.374469759722383e-06, "loss": 10.087, "step": 322430 }, { "epoch": 0.6513492002569521, "grad_norm": 411.57080078125, "learning_rate": 3.3741396599404353e-06, "loss": 17.3464, "step": 322440 }, { "epoch": 0.6513694008896359, "grad_norm": 195.10816955566406, "learning_rate": 3.373809568082732e-06, "loss": 19.8394, "step": 322450 }, { "epoch": 0.6513896015223197, "grad_norm": 1152.62255859375, "learning_rate": 3.3734794841508838e-06, "loss": 16.5249, "step": 322460 }, { "epoch": 0.6514098021550035, "grad_norm": 182.6773223876953, "learning_rate": 3.3731494081465003e-06, "loss": 10.9776, "step": 322470 }, { "epoch": 0.6514300027876873, "grad_norm": 93.03421783447266, "learning_rate": 3.3728193400711884e-06, "loss": 12.1965, "step": 322480 }, { "epoch": 0.6514502034203711, "grad_norm": 0.0, "learning_rate": 3.3724892799265567e-06, "loss": 13.6043, "step": 322490 }, { "epoch": 0.6514704040530549, "grad_norm": 605.5923461914062, "learning_rate": 3.372159227714218e-06, "loss": 19.1769, "step": 322500 }, { "epoch": 0.6514906046857387, "grad_norm": 113.98303985595703, "learning_rate": 3.371829183435775e-06, "loss": 22.1165, "step": 322510 }, { "epoch": 0.6515108053184225, "grad_norm": 316.26275634765625, "learning_rate": 3.3714991470928393e-06, "loss": 25.8412, "step": 322520 }, { "epoch": 0.6515310059511064, "grad_norm": 166.5083770751953, "learning_rate": 3.371169118687021e-06, "loss": 20.1652, "step": 322530 }, { "epoch": 0.6515512065837902, "grad_norm": 501.1871643066406, "learning_rate": 3.370839098219926e-06, "loss": 13.4596, "step": 322540 }, { "epoch": 0.651571407216474, "grad_norm": 256.255126953125, "learning_rate": 3.3705090856931626e-06, "loss": 12.8425, "step": 322550 }, { "epoch": 0.6515916078491578, "grad_norm": 269.9193420410156, "learning_rate": 3.37017908110834e-06, "loss": 13.7187, "step": 322560 }, { "epoch": 0.6516118084818416, "grad_norm": 479.13092041015625, "learning_rate": 3.3698490844670693e-06, "loss": 12.8763, "step": 322570 }, { "epoch": 0.6516320091145255, "grad_norm": 341.1509704589844, "learning_rate": 3.3695190957709546e-06, "loss": 17.577, "step": 322580 }, { "epoch": 0.6516522097472093, "grad_norm": 374.6517028808594, "learning_rate": 3.369189115021606e-06, "loss": 17.6605, "step": 322590 }, { "epoch": 0.6516724103798931, "grad_norm": 70.8856430053711, "learning_rate": 3.3688591422206333e-06, "loss": 29.0356, "step": 322600 }, { "epoch": 0.6516926110125769, "grad_norm": 79.68965148925781, "learning_rate": 3.3685291773696425e-06, "loss": 18.5466, "step": 322610 }, { "epoch": 0.6517128116452607, "grad_norm": 243.4609832763672, "learning_rate": 3.3681992204702425e-06, "loss": 20.2885, "step": 322620 }, { "epoch": 0.6517330122779446, "grad_norm": 136.55223083496094, "learning_rate": 3.3678692715240423e-06, "loss": 15.1329, "step": 322630 }, { "epoch": 0.6517532129106284, "grad_norm": 193.76109313964844, "learning_rate": 3.3675393305326487e-06, "loss": 17.1184, "step": 322640 }, { "epoch": 0.6517734135433122, "grad_norm": 689.5020141601562, "learning_rate": 3.367209397497669e-06, "loss": 25.9347, "step": 322650 }, { "epoch": 0.651793614175996, "grad_norm": 236.0913848876953, "learning_rate": 3.3668794724207153e-06, "loss": 19.0471, "step": 322660 }, { "epoch": 0.6518138148086798, "grad_norm": 129.02688598632812, "learning_rate": 3.3665495553033913e-06, "loss": 8.0022, "step": 322670 }, { "epoch": 0.6518340154413637, "grad_norm": 435.196044921875, "learning_rate": 3.366219646147306e-06, "loss": 15.066, "step": 322680 }, { "epoch": 0.6518542160740475, "grad_norm": 61.95845413208008, "learning_rate": 3.3658897449540707e-06, "loss": 19.4422, "step": 322690 }, { "epoch": 0.6518744167067313, "grad_norm": 81.69084167480469, "learning_rate": 3.3655598517252886e-06, "loss": 16.341, "step": 322700 }, { "epoch": 0.6518946173394151, "grad_norm": 133.38026428222656, "learning_rate": 3.36522996646257e-06, "loss": 14.8244, "step": 322710 }, { "epoch": 0.6519148179720989, "grad_norm": 155.47174072265625, "learning_rate": 3.364900089167521e-06, "loss": 25.1622, "step": 322720 }, { "epoch": 0.6519350186047828, "grad_norm": 625.8029174804688, "learning_rate": 3.364570219841753e-06, "loss": 22.288, "step": 322730 }, { "epoch": 0.6519552192374665, "grad_norm": 575.62158203125, "learning_rate": 3.3642403584868694e-06, "loss": 31.1175, "step": 322740 }, { "epoch": 0.6519754198701503, "grad_norm": 593.5516357421875, "learning_rate": 3.3639105051044807e-06, "loss": 17.7484, "step": 322750 }, { "epoch": 0.6519956205028341, "grad_norm": 280.8475036621094, "learning_rate": 3.363580659696194e-06, "loss": 19.1936, "step": 322760 }, { "epoch": 0.6520158211355179, "grad_norm": 88.53751373291016, "learning_rate": 3.3632508222636163e-06, "loss": 18.2626, "step": 322770 }, { "epoch": 0.6520360217682017, "grad_norm": 435.2064514160156, "learning_rate": 3.362920992808356e-06, "loss": 14.0426, "step": 322780 }, { "epoch": 0.6520562224008856, "grad_norm": 233.89483642578125, "learning_rate": 3.3625911713320205e-06, "loss": 17.1296, "step": 322790 }, { "epoch": 0.6520764230335694, "grad_norm": 409.69061279296875, "learning_rate": 3.3622613578362162e-06, "loss": 14.7899, "step": 322800 }, { "epoch": 0.6520966236662532, "grad_norm": 500.92132568359375, "learning_rate": 3.3619315523225506e-06, "loss": 29.0302, "step": 322810 }, { "epoch": 0.652116824298937, "grad_norm": 292.7793273925781, "learning_rate": 3.361601754792635e-06, "loss": 19.0044, "step": 322820 }, { "epoch": 0.6521370249316208, "grad_norm": 163.41346740722656, "learning_rate": 3.3612719652480707e-06, "loss": 19.6094, "step": 322830 }, { "epoch": 0.6521572255643047, "grad_norm": 448.2548522949219, "learning_rate": 3.3609421836904688e-06, "loss": 19.8406, "step": 322840 }, { "epoch": 0.6521774261969885, "grad_norm": 333.2283020019531, "learning_rate": 3.360612410121438e-06, "loss": 21.349, "step": 322850 }, { "epoch": 0.6521976268296723, "grad_norm": 619.9572143554688, "learning_rate": 3.3602826445425805e-06, "loss": 29.5249, "step": 322860 }, { "epoch": 0.6522178274623561, "grad_norm": 338.3478698730469, "learning_rate": 3.3599528869555074e-06, "loss": 17.5572, "step": 322870 }, { "epoch": 0.65223802809504, "grad_norm": 203.17066955566406, "learning_rate": 3.359623137361825e-06, "loss": 23.9921, "step": 322880 }, { "epoch": 0.6522582287277238, "grad_norm": 245.39356994628906, "learning_rate": 3.3592933957631424e-06, "loss": 38.2778, "step": 322890 }, { "epoch": 0.6522784293604076, "grad_norm": 2.3841423988342285, "learning_rate": 3.358963662161062e-06, "loss": 11.8317, "step": 322900 }, { "epoch": 0.6522986299930914, "grad_norm": 427.2539367675781, "learning_rate": 3.358633936557195e-06, "loss": 23.7271, "step": 322910 }, { "epoch": 0.6523188306257752, "grad_norm": 364.3153381347656, "learning_rate": 3.358304218953148e-06, "loss": 25.2088, "step": 322920 }, { "epoch": 0.652339031258459, "grad_norm": 465.3929748535156, "learning_rate": 3.3579745093505256e-06, "loss": 22.5208, "step": 322930 }, { "epoch": 0.6523592318911429, "grad_norm": 314.6540222167969, "learning_rate": 3.3576448077509373e-06, "loss": 29.2729, "step": 322940 }, { "epoch": 0.6523794325238267, "grad_norm": 447.7454833984375, "learning_rate": 3.357315114155989e-06, "loss": 22.0601, "step": 322950 }, { "epoch": 0.6523996331565105, "grad_norm": 565.6834106445312, "learning_rate": 3.356985428567287e-06, "loss": 35.9726, "step": 322960 }, { "epoch": 0.6524198337891943, "grad_norm": 918.8121948242188, "learning_rate": 3.3566557509864374e-06, "loss": 14.5047, "step": 322970 }, { "epoch": 0.6524400344218781, "grad_norm": 226.97035217285156, "learning_rate": 3.3563260814150512e-06, "loss": 20.9802, "step": 322980 }, { "epoch": 0.6524602350545619, "grad_norm": 568.095703125, "learning_rate": 3.3559964198547307e-06, "loss": 22.3045, "step": 322990 }, { "epoch": 0.6524804356872457, "grad_norm": 411.2491760253906, "learning_rate": 3.355666766307084e-06, "loss": 19.7654, "step": 323000 }, { "epoch": 0.6525006363199295, "grad_norm": 423.4187316894531, "learning_rate": 3.3553371207737183e-06, "loss": 14.6672, "step": 323010 }, { "epoch": 0.6525208369526133, "grad_norm": 330.9416809082031, "learning_rate": 3.3550074832562417e-06, "loss": 21.8089, "step": 323020 }, { "epoch": 0.6525410375852971, "grad_norm": 660.3540649414062, "learning_rate": 3.3546778537562563e-06, "loss": 17.7518, "step": 323030 }, { "epoch": 0.652561238217981, "grad_norm": 183.2447052001953, "learning_rate": 3.354348232275373e-06, "loss": 17.8972, "step": 323040 }, { "epoch": 0.6525814388506648, "grad_norm": 329.89556884765625, "learning_rate": 3.3540186188151976e-06, "loss": 30.9821, "step": 323050 }, { "epoch": 0.6526016394833486, "grad_norm": 222.6219940185547, "learning_rate": 3.3536890133773346e-06, "loss": 14.2322, "step": 323060 }, { "epoch": 0.6526218401160324, "grad_norm": 327.657958984375, "learning_rate": 3.353359415963392e-06, "loss": 11.7364, "step": 323070 }, { "epoch": 0.6526420407487162, "grad_norm": 53.36893844604492, "learning_rate": 3.353029826574977e-06, "loss": 19.6695, "step": 323080 }, { "epoch": 0.6526622413814, "grad_norm": 521.5284423828125, "learning_rate": 3.352700245213693e-06, "loss": 19.8244, "step": 323090 }, { "epoch": 0.6526824420140839, "grad_norm": 81.0189208984375, "learning_rate": 3.352370671881148e-06, "loss": 11.9868, "step": 323100 }, { "epoch": 0.6527026426467677, "grad_norm": 275.5891418457031, "learning_rate": 3.3520411065789513e-06, "loss": 25.3043, "step": 323110 }, { "epoch": 0.6527228432794515, "grad_norm": 423.15283203125, "learning_rate": 3.3517115493087036e-06, "loss": 21.4593, "step": 323120 }, { "epoch": 0.6527430439121353, "grad_norm": 375.1765441894531, "learning_rate": 3.3513820000720145e-06, "loss": 25.8436, "step": 323130 }, { "epoch": 0.6527632445448192, "grad_norm": 728.240234375, "learning_rate": 3.3510524588704908e-06, "loss": 23.8151, "step": 323140 }, { "epoch": 0.652783445177503, "grad_norm": 278.7547912597656, "learning_rate": 3.350722925705736e-06, "loss": 10.8667, "step": 323150 }, { "epoch": 0.6528036458101868, "grad_norm": 40.4306640625, "learning_rate": 3.350393400579358e-06, "loss": 24.5749, "step": 323160 }, { "epoch": 0.6528238464428706, "grad_norm": 357.2164001464844, "learning_rate": 3.350063883492962e-06, "loss": 18.1901, "step": 323170 }, { "epoch": 0.6528440470755544, "grad_norm": 864.6455078125, "learning_rate": 3.349734374448157e-06, "loss": 15.8515, "step": 323180 }, { "epoch": 0.6528642477082383, "grad_norm": 870.7262573242188, "learning_rate": 3.3494048734465433e-06, "loss": 49.4953, "step": 323190 }, { "epoch": 0.6528844483409221, "grad_norm": 385.034423828125, "learning_rate": 3.3490753804897315e-06, "loss": 15.4344, "step": 323200 }, { "epoch": 0.6529046489736059, "grad_norm": 542.6068725585938, "learning_rate": 3.3487458955793273e-06, "loss": 18.1847, "step": 323210 }, { "epoch": 0.6529248496062897, "grad_norm": 256.6165466308594, "learning_rate": 3.3484164187169334e-06, "loss": 20.8445, "step": 323220 }, { "epoch": 0.6529450502389735, "grad_norm": 306.36187744140625, "learning_rate": 3.348086949904158e-06, "loss": 20.4172, "step": 323230 }, { "epoch": 0.6529652508716574, "grad_norm": 3.7985031604766846, "learning_rate": 3.347757489142608e-06, "loss": 20.9858, "step": 323240 }, { "epoch": 0.6529854515043411, "grad_norm": 503.82421875, "learning_rate": 3.347428036433886e-06, "loss": 11.0117, "step": 323250 }, { "epoch": 0.6530056521370249, "grad_norm": 296.73895263671875, "learning_rate": 3.3470985917795983e-06, "loss": 14.5541, "step": 323260 }, { "epoch": 0.6530258527697087, "grad_norm": 330.89776611328125, "learning_rate": 3.3467691551813547e-06, "loss": 17.3829, "step": 323270 }, { "epoch": 0.6530460534023925, "grad_norm": 199.73748779296875, "learning_rate": 3.3464397266407543e-06, "loss": 22.8537, "step": 323280 }, { "epoch": 0.6530662540350763, "grad_norm": 461.0065002441406, "learning_rate": 3.346110306159408e-06, "loss": 18.9679, "step": 323290 }, { "epoch": 0.6530864546677602, "grad_norm": 609.6250610351562, "learning_rate": 3.34578089373892e-06, "loss": 13.0132, "step": 323300 }, { "epoch": 0.653106655300444, "grad_norm": 22.474637985229492, "learning_rate": 3.3454514893808943e-06, "loss": 22.382, "step": 323310 }, { "epoch": 0.6531268559331278, "grad_norm": 1025.268798828125, "learning_rate": 3.3451220930869377e-06, "loss": 21.649, "step": 323320 }, { "epoch": 0.6531470565658116, "grad_norm": 264.25006103515625, "learning_rate": 3.3447927048586538e-06, "loss": 16.3413, "step": 323330 }, { "epoch": 0.6531672571984954, "grad_norm": 346.7159729003906, "learning_rate": 3.3444633246976526e-06, "loss": 11.7635, "step": 323340 }, { "epoch": 0.6531874578311793, "grad_norm": 153.5816650390625, "learning_rate": 3.344133952605534e-06, "loss": 8.6615, "step": 323350 }, { "epoch": 0.6532076584638631, "grad_norm": 622.320068359375, "learning_rate": 3.3438045885839053e-06, "loss": 16.3805, "step": 323360 }, { "epoch": 0.6532278590965469, "grad_norm": 120.74202728271484, "learning_rate": 3.3434752326343745e-06, "loss": 19.6188, "step": 323370 }, { "epoch": 0.6532480597292307, "grad_norm": 136.3270721435547, "learning_rate": 3.343145884758543e-06, "loss": 22.3427, "step": 323380 }, { "epoch": 0.6532682603619145, "grad_norm": 701.3666381835938, "learning_rate": 3.3428165449580174e-06, "loss": 14.1135, "step": 323390 }, { "epoch": 0.6532884609945984, "grad_norm": 411.7037353515625, "learning_rate": 3.3424872132344044e-06, "loss": 14.4849, "step": 323400 }, { "epoch": 0.6533086616272822, "grad_norm": 182.9111328125, "learning_rate": 3.3421578895893058e-06, "loss": 18.9418, "step": 323410 }, { "epoch": 0.653328862259966, "grad_norm": 206.97998046875, "learning_rate": 3.3418285740243285e-06, "loss": 15.5435, "step": 323420 }, { "epoch": 0.6533490628926498, "grad_norm": 336.7099609375, "learning_rate": 3.3414992665410806e-06, "loss": 17.1012, "step": 323430 }, { "epoch": 0.6533692635253336, "grad_norm": 3.9773671627044678, "learning_rate": 3.34116996714116e-06, "loss": 13.594, "step": 323440 }, { "epoch": 0.6533894641580175, "grad_norm": 426.3591613769531, "learning_rate": 3.340840675826178e-06, "loss": 26.4791, "step": 323450 }, { "epoch": 0.6534096647907013, "grad_norm": 611.9093017578125, "learning_rate": 3.3405113925977383e-06, "loss": 23.0561, "step": 323460 }, { "epoch": 0.6534298654233851, "grad_norm": 222.60874938964844, "learning_rate": 3.340182117457443e-06, "loss": 21.197, "step": 323470 }, { "epoch": 0.6534500660560689, "grad_norm": 185.4382781982422, "learning_rate": 3.3398528504068996e-06, "loss": 20.9105, "step": 323480 }, { "epoch": 0.6534702666887527, "grad_norm": 705.8347778320312, "learning_rate": 3.3395235914477104e-06, "loss": 33.4218, "step": 323490 }, { "epoch": 0.6534904673214365, "grad_norm": 76.52244567871094, "learning_rate": 3.339194340581485e-06, "loss": 36.8078, "step": 323500 }, { "epoch": 0.6535106679541203, "grad_norm": 77.32313537597656, "learning_rate": 3.3388650978098218e-06, "loss": 20.5604, "step": 323510 }, { "epoch": 0.6535308685868041, "grad_norm": 272.7183837890625, "learning_rate": 3.33853586313433e-06, "loss": 12.8314, "step": 323520 }, { "epoch": 0.6535510692194879, "grad_norm": 60.551856994628906, "learning_rate": 3.3382066365566133e-06, "loss": 18.617, "step": 323530 }, { "epoch": 0.6535712698521717, "grad_norm": 396.419921875, "learning_rate": 3.337877418078276e-06, "loss": 29.6913, "step": 323540 }, { "epoch": 0.6535914704848556, "grad_norm": 177.9261932373047, "learning_rate": 3.3375482077009213e-06, "loss": 26.6844, "step": 323550 }, { "epoch": 0.6536116711175394, "grad_norm": 475.1094055175781, "learning_rate": 3.3372190054261565e-06, "loss": 27.4537, "step": 323560 }, { "epoch": 0.6536318717502232, "grad_norm": 540.54248046875, "learning_rate": 3.3368898112555843e-06, "loss": 21.2653, "step": 323570 }, { "epoch": 0.653652072382907, "grad_norm": 240.32376098632812, "learning_rate": 3.336560625190808e-06, "loss": 7.5049, "step": 323580 }, { "epoch": 0.6536722730155908, "grad_norm": 78.92088317871094, "learning_rate": 3.3362314472334353e-06, "loss": 25.9188, "step": 323590 }, { "epoch": 0.6536924736482747, "grad_norm": 289.92327880859375, "learning_rate": 3.3359022773850673e-06, "loss": 23.9437, "step": 323600 }, { "epoch": 0.6537126742809585, "grad_norm": 0.13065364956855774, "learning_rate": 3.3355731156473105e-06, "loss": 16.9742, "step": 323610 }, { "epoch": 0.6537328749136423, "grad_norm": 31.578344345092773, "learning_rate": 3.335243962021768e-06, "loss": 20.8097, "step": 323620 }, { "epoch": 0.6537530755463261, "grad_norm": 306.77130126953125, "learning_rate": 3.334914816510046e-06, "loss": 17.3479, "step": 323630 }, { "epoch": 0.6537732761790099, "grad_norm": 602.3389282226562, "learning_rate": 3.3345856791137456e-06, "loss": 24.9415, "step": 323640 }, { "epoch": 0.6537934768116938, "grad_norm": 342.4835510253906, "learning_rate": 3.334256549834472e-06, "loss": 20.2902, "step": 323650 }, { "epoch": 0.6538136774443776, "grad_norm": 416.8805236816406, "learning_rate": 3.333927428673832e-06, "loss": 19.6605, "step": 323660 }, { "epoch": 0.6538338780770614, "grad_norm": 42.473670959472656, "learning_rate": 3.333598315633426e-06, "loss": 10.3506, "step": 323670 }, { "epoch": 0.6538540787097452, "grad_norm": 390.04583740234375, "learning_rate": 3.3332692107148605e-06, "loss": 14.6933, "step": 323680 }, { "epoch": 0.653874279342429, "grad_norm": 136.37127685546875, "learning_rate": 3.3329401139197393e-06, "loss": 17.3831, "step": 323690 }, { "epoch": 0.6538944799751129, "grad_norm": 290.8580322265625, "learning_rate": 3.3326110252496652e-06, "loss": 14.5204, "step": 323700 }, { "epoch": 0.6539146806077967, "grad_norm": 127.44840240478516, "learning_rate": 3.3322819447062417e-06, "loss": 8.5128, "step": 323710 }, { "epoch": 0.6539348812404805, "grad_norm": 486.5702819824219, "learning_rate": 3.3319528722910767e-06, "loss": 27.6796, "step": 323720 }, { "epoch": 0.6539550818731643, "grad_norm": 593.7366333007812, "learning_rate": 3.3316238080057674e-06, "loss": 33.287, "step": 323730 }, { "epoch": 0.6539752825058481, "grad_norm": 304.499267578125, "learning_rate": 3.3312947518519228e-06, "loss": 18.4255, "step": 323740 }, { "epoch": 0.653995483138532, "grad_norm": 307.52728271484375, "learning_rate": 3.330965703831146e-06, "loss": 16.0626, "step": 323750 }, { "epoch": 0.6540156837712157, "grad_norm": 694.5188598632812, "learning_rate": 3.3306366639450394e-06, "loss": 20.4025, "step": 323760 }, { "epoch": 0.6540358844038995, "grad_norm": 380.5897216796875, "learning_rate": 3.3303076321952066e-06, "loss": 24.0266, "step": 323770 }, { "epoch": 0.6540560850365833, "grad_norm": 583.7579345703125, "learning_rate": 3.329978608583252e-06, "loss": 18.3361, "step": 323780 }, { "epoch": 0.6540762856692671, "grad_norm": 211.36448669433594, "learning_rate": 3.329649593110781e-06, "loss": 12.6569, "step": 323790 }, { "epoch": 0.6540964863019509, "grad_norm": 454.0394592285156, "learning_rate": 3.3293205857793924e-06, "loss": 19.8624, "step": 323800 }, { "epoch": 0.6541166869346348, "grad_norm": 340.27069091796875, "learning_rate": 3.328991586590694e-06, "loss": 15.6996, "step": 323810 }, { "epoch": 0.6541368875673186, "grad_norm": 574.0722045898438, "learning_rate": 3.328662595546289e-06, "loss": 25.6051, "step": 323820 }, { "epoch": 0.6541570882000024, "grad_norm": 614.04931640625, "learning_rate": 3.3283336126477785e-06, "loss": 15.3504, "step": 323830 }, { "epoch": 0.6541772888326862, "grad_norm": 208.8754119873047, "learning_rate": 3.3280046378967673e-06, "loss": 17.9637, "step": 323840 }, { "epoch": 0.65419748946537, "grad_norm": 288.2249755859375, "learning_rate": 3.32767567129486e-06, "loss": 18.1662, "step": 323850 }, { "epoch": 0.6542176900980539, "grad_norm": 125.99740600585938, "learning_rate": 3.3273467128436575e-06, "loss": 10.6629, "step": 323860 }, { "epoch": 0.6542378907307377, "grad_norm": 532.9415283203125, "learning_rate": 3.3270177625447632e-06, "loss": 24.2573, "step": 323870 }, { "epoch": 0.6542580913634215, "grad_norm": 930.7927856445312, "learning_rate": 3.326688820399784e-06, "loss": 19.2765, "step": 323880 }, { "epoch": 0.6542782919961053, "grad_norm": 153.9420166015625, "learning_rate": 3.326359886410318e-06, "loss": 13.2919, "step": 323890 }, { "epoch": 0.6542984926287891, "grad_norm": 452.17352294921875, "learning_rate": 3.3260309605779717e-06, "loss": 21.4966, "step": 323900 }, { "epoch": 0.654318693261473, "grad_norm": 314.76812744140625, "learning_rate": 3.3257020429043485e-06, "loss": 15.2828, "step": 323910 }, { "epoch": 0.6543388938941568, "grad_norm": 285.5110778808594, "learning_rate": 3.32537313339105e-06, "loss": 18.2423, "step": 323920 }, { "epoch": 0.6543590945268406, "grad_norm": 212.53077697753906, "learning_rate": 3.325044232039679e-06, "loss": 16.4085, "step": 323930 }, { "epoch": 0.6543792951595244, "grad_norm": 321.1883544921875, "learning_rate": 3.3247153388518387e-06, "loss": 29.8498, "step": 323940 }, { "epoch": 0.6543994957922082, "grad_norm": 602.4036254882812, "learning_rate": 3.3243864538291358e-06, "loss": 10.0415, "step": 323950 }, { "epoch": 0.6544196964248921, "grad_norm": 497.0145568847656, "learning_rate": 3.3240575769731662e-06, "loss": 27.176, "step": 323960 }, { "epoch": 0.6544398970575759, "grad_norm": 302.6004333496094, "learning_rate": 3.3237287082855386e-06, "loss": 19.8218, "step": 323970 }, { "epoch": 0.6544600976902597, "grad_norm": 337.1525573730469, "learning_rate": 3.3233998477678555e-06, "loss": 31.7608, "step": 323980 }, { "epoch": 0.6544802983229435, "grad_norm": 317.685791015625, "learning_rate": 3.3230709954217156e-06, "loss": 13.8257, "step": 323990 }, { "epoch": 0.6545004989556273, "grad_norm": 255.78964233398438, "learning_rate": 3.322742151248726e-06, "loss": 26.6619, "step": 324000 }, { "epoch": 0.6545206995883112, "grad_norm": 243.2002716064453, "learning_rate": 3.3224133152504874e-06, "loss": 32.395, "step": 324010 }, { "epoch": 0.6545409002209949, "grad_norm": 399.0644836425781, "learning_rate": 3.3220844874286017e-06, "loss": 15.7088, "step": 324020 }, { "epoch": 0.6545611008536787, "grad_norm": 641.73974609375, "learning_rate": 3.321755667784673e-06, "loss": 22.6925, "step": 324030 }, { "epoch": 0.6545813014863625, "grad_norm": 224.15005493164062, "learning_rate": 3.3214268563203056e-06, "loss": 17.7867, "step": 324040 }, { "epoch": 0.6546015021190463, "grad_norm": 0.0, "learning_rate": 3.3210980530370974e-06, "loss": 81.2715, "step": 324050 }, { "epoch": 0.6546217027517302, "grad_norm": 333.16357421875, "learning_rate": 3.3207692579366548e-06, "loss": 9.333, "step": 324060 }, { "epoch": 0.654641903384414, "grad_norm": 451.720703125, "learning_rate": 3.3204404710205786e-06, "loss": 16.7943, "step": 324070 }, { "epoch": 0.6546621040170978, "grad_norm": 596.48681640625, "learning_rate": 3.3201116922904737e-06, "loss": 20.8438, "step": 324080 }, { "epoch": 0.6546823046497816, "grad_norm": 379.9711608886719, "learning_rate": 3.3197829217479396e-06, "loss": 10.409, "step": 324090 }, { "epoch": 0.6547025052824654, "grad_norm": 341.3067321777344, "learning_rate": 3.319454159394578e-06, "loss": 10.2977, "step": 324100 }, { "epoch": 0.6547227059151493, "grad_norm": 177.4818878173828, "learning_rate": 3.3191254052319967e-06, "loss": 18.6199, "step": 324110 }, { "epoch": 0.6547429065478331, "grad_norm": 611.534423828125, "learning_rate": 3.3187966592617905e-06, "loss": 17.9969, "step": 324120 }, { "epoch": 0.6547631071805169, "grad_norm": 295.9464416503906, "learning_rate": 3.318467921485567e-06, "loss": 20.3785, "step": 324130 }, { "epoch": 0.6547833078132007, "grad_norm": 15.180954933166504, "learning_rate": 3.3181391919049277e-06, "loss": 21.2955, "step": 324140 }, { "epoch": 0.6548035084458845, "grad_norm": 385.1485900878906, "learning_rate": 3.317810470521473e-06, "loss": 12.7336, "step": 324150 }, { "epoch": 0.6548237090785684, "grad_norm": 234.79217529296875, "learning_rate": 3.3174817573368068e-06, "loss": 18.8312, "step": 324160 }, { "epoch": 0.6548439097112522, "grad_norm": 401.0411682128906, "learning_rate": 3.317153052352531e-06, "loss": 17.0286, "step": 324170 }, { "epoch": 0.654864110343936, "grad_norm": 83.79252624511719, "learning_rate": 3.3168243555702455e-06, "loss": 13.5033, "step": 324180 }, { "epoch": 0.6548843109766198, "grad_norm": 42.736671447753906, "learning_rate": 3.316495666991554e-06, "loss": 11.2485, "step": 324190 }, { "epoch": 0.6549045116093036, "grad_norm": 317.3556823730469, "learning_rate": 3.31616698661806e-06, "loss": 15.2511, "step": 324200 }, { "epoch": 0.6549247122419875, "grad_norm": 476.33062744140625, "learning_rate": 3.3158383144513618e-06, "loss": 6.2523, "step": 324210 }, { "epoch": 0.6549449128746713, "grad_norm": 429.07763671875, "learning_rate": 3.315509650493065e-06, "loss": 10.6325, "step": 324220 }, { "epoch": 0.6549651135073551, "grad_norm": 419.2195739746094, "learning_rate": 3.315180994744769e-06, "loss": 13.1369, "step": 324230 }, { "epoch": 0.6549853141400389, "grad_norm": 597.6172485351562, "learning_rate": 3.3148523472080773e-06, "loss": 14.6593, "step": 324240 }, { "epoch": 0.6550055147727227, "grad_norm": 406.0660400390625, "learning_rate": 3.3145237078845903e-06, "loss": 18.3823, "step": 324250 }, { "epoch": 0.6550257154054066, "grad_norm": 55.36674118041992, "learning_rate": 3.3141950767759096e-06, "loss": 13.1285, "step": 324260 }, { "epoch": 0.6550459160380903, "grad_norm": 564.5735473632812, "learning_rate": 3.3138664538836395e-06, "loss": 27.3588, "step": 324270 }, { "epoch": 0.6550661166707741, "grad_norm": 321.94818115234375, "learning_rate": 3.3135378392093788e-06, "loss": 12.9262, "step": 324280 }, { "epoch": 0.6550863173034579, "grad_norm": 346.2619323730469, "learning_rate": 3.3132092327547296e-06, "loss": 18.7073, "step": 324290 }, { "epoch": 0.6551065179361417, "grad_norm": 269.2696838378906, "learning_rate": 3.312880634521295e-06, "loss": 23.6498, "step": 324300 }, { "epoch": 0.6551267185688255, "grad_norm": 29.812774658203125, "learning_rate": 3.3125520445106753e-06, "loss": 12.0455, "step": 324310 }, { "epoch": 0.6551469192015094, "grad_norm": 529.48486328125, "learning_rate": 3.312223462724472e-06, "loss": 41.1307, "step": 324320 }, { "epoch": 0.6551671198341932, "grad_norm": 530.9104614257812, "learning_rate": 3.3118948891642875e-06, "loss": 17.0012, "step": 324330 }, { "epoch": 0.655187320466877, "grad_norm": 386.05133056640625, "learning_rate": 3.311566323831721e-06, "loss": 17.2066, "step": 324340 }, { "epoch": 0.6552075210995608, "grad_norm": 282.726806640625, "learning_rate": 3.3112377667283756e-06, "loss": 18.5096, "step": 324350 }, { "epoch": 0.6552277217322446, "grad_norm": 467.5544738769531, "learning_rate": 3.3109092178558546e-06, "loss": 30.0215, "step": 324360 }, { "epoch": 0.6552479223649285, "grad_norm": 596.86865234375, "learning_rate": 3.3105806772157556e-06, "loss": 15.1171, "step": 324370 }, { "epoch": 0.6552681229976123, "grad_norm": 376.2838134765625, "learning_rate": 3.310252144809682e-06, "loss": 16.6493, "step": 324380 }, { "epoch": 0.6552883236302961, "grad_norm": 406.9255676269531, "learning_rate": 3.309923620639233e-06, "loss": 16.4322, "step": 324390 }, { "epoch": 0.6553085242629799, "grad_norm": 354.6062316894531, "learning_rate": 3.3095951047060147e-06, "loss": 20.3177, "step": 324400 }, { "epoch": 0.6553287248956637, "grad_norm": 174.62908935546875, "learning_rate": 3.309266597011621e-06, "loss": 40.0574, "step": 324410 }, { "epoch": 0.6553489255283476, "grad_norm": 480.7134094238281, "learning_rate": 3.308938097557659e-06, "loss": 24.4233, "step": 324420 }, { "epoch": 0.6553691261610314, "grad_norm": 511.335693359375, "learning_rate": 3.308609606345728e-06, "loss": 40.798, "step": 324430 }, { "epoch": 0.6553893267937152, "grad_norm": 374.6742858886719, "learning_rate": 3.3082811233774277e-06, "loss": 15.7719, "step": 324440 }, { "epoch": 0.655409527426399, "grad_norm": 221.66168212890625, "learning_rate": 3.30795264865436e-06, "loss": 26.3338, "step": 324450 }, { "epoch": 0.6554297280590828, "grad_norm": 399.8490295410156, "learning_rate": 3.307624182178127e-06, "loss": 17.0068, "step": 324460 }, { "epoch": 0.6554499286917667, "grad_norm": 241.8368377685547, "learning_rate": 3.3072957239503273e-06, "loss": 23.1237, "step": 324470 }, { "epoch": 0.6554701293244505, "grad_norm": 264.2138977050781, "learning_rate": 3.3069672739725616e-06, "loss": 32.1639, "step": 324480 }, { "epoch": 0.6554903299571343, "grad_norm": 449.2158203125, "learning_rate": 3.306638832246436e-06, "loss": 11.5351, "step": 324490 }, { "epoch": 0.6555105305898181, "grad_norm": 279.5223083496094, "learning_rate": 3.3063103987735433e-06, "loss": 11.1526, "step": 324500 }, { "epoch": 0.6555307312225019, "grad_norm": 485.7489318847656, "learning_rate": 3.30598197355549e-06, "loss": 25.4161, "step": 324510 }, { "epoch": 0.6555509318551858, "grad_norm": 450.6777648925781, "learning_rate": 3.3056535565938764e-06, "loss": 12.6259, "step": 324520 }, { "epoch": 0.6555711324878695, "grad_norm": 97.11417388916016, "learning_rate": 3.3053251478902996e-06, "loss": 16.3099, "step": 324530 }, { "epoch": 0.6555913331205533, "grad_norm": 154.92950439453125, "learning_rate": 3.3049967474463634e-06, "loss": 20.5589, "step": 324540 }, { "epoch": 0.6556115337532371, "grad_norm": 337.1134948730469, "learning_rate": 3.3046683552636665e-06, "loss": 18.9115, "step": 324550 }, { "epoch": 0.6556317343859209, "grad_norm": 358.4677734375, "learning_rate": 3.304339971343813e-06, "loss": 19.0148, "step": 324560 }, { "epoch": 0.6556519350186047, "grad_norm": 680.4251708984375, "learning_rate": 3.3040115956883984e-06, "loss": 21.4263, "step": 324570 }, { "epoch": 0.6556721356512886, "grad_norm": 955.8135375976562, "learning_rate": 3.3036832282990263e-06, "loss": 36.9836, "step": 324580 }, { "epoch": 0.6556923362839724, "grad_norm": 286.5036315917969, "learning_rate": 3.3033548691772976e-06, "loss": 26.052, "step": 324590 }, { "epoch": 0.6557125369166562, "grad_norm": 609.9530029296875, "learning_rate": 3.30302651832481e-06, "loss": 18.3765, "step": 324600 }, { "epoch": 0.65573273754934, "grad_norm": 775.6493530273438, "learning_rate": 3.302698175743165e-06, "loss": 25.0592, "step": 324610 }, { "epoch": 0.6557529381820238, "grad_norm": 314.5340576171875, "learning_rate": 3.3023698414339656e-06, "loss": 17.5775, "step": 324620 }, { "epoch": 0.6557731388147077, "grad_norm": 423.2784118652344, "learning_rate": 3.302041515398808e-06, "loss": 15.2809, "step": 324630 }, { "epoch": 0.6557933394473915, "grad_norm": 183.7259521484375, "learning_rate": 3.3017131976392926e-06, "loss": 13.2716, "step": 324640 }, { "epoch": 0.6558135400800753, "grad_norm": 521.1258544921875, "learning_rate": 3.3013848881570243e-06, "loss": 37.3796, "step": 324650 }, { "epoch": 0.6558337407127591, "grad_norm": 512.5210571289062, "learning_rate": 3.3010565869535976e-06, "loss": 27.3887, "step": 324660 }, { "epoch": 0.655853941345443, "grad_norm": 401.6698303222656, "learning_rate": 3.3007282940306155e-06, "loss": 24.3654, "step": 324670 }, { "epoch": 0.6558741419781268, "grad_norm": 525.7962036132812, "learning_rate": 3.300400009389678e-06, "loss": 31.9341, "step": 324680 }, { "epoch": 0.6558943426108106, "grad_norm": 407.7822265625, "learning_rate": 3.3000717330323857e-06, "loss": 18.6044, "step": 324690 }, { "epoch": 0.6559145432434944, "grad_norm": 880.857421875, "learning_rate": 3.2997434649603368e-06, "loss": 15.3205, "step": 324700 }, { "epoch": 0.6559347438761782, "grad_norm": 359.1968994140625, "learning_rate": 3.2994152051751305e-06, "loss": 33.5997, "step": 324710 }, { "epoch": 0.655954944508862, "grad_norm": 571.3761596679688, "learning_rate": 3.299086953678371e-06, "loss": 26.202, "step": 324720 }, { "epoch": 0.6559751451415459, "grad_norm": 791.4124145507812, "learning_rate": 3.298758710471653e-06, "loss": 22.1367, "step": 324730 }, { "epoch": 0.6559953457742297, "grad_norm": 372.0487976074219, "learning_rate": 3.298430475556579e-06, "loss": 9.4637, "step": 324740 }, { "epoch": 0.6560155464069135, "grad_norm": 322.2548828125, "learning_rate": 3.2981022489347503e-06, "loss": 12.5813, "step": 324750 }, { "epoch": 0.6560357470395973, "grad_norm": 410.0075988769531, "learning_rate": 3.297774030607763e-06, "loss": 11.1279, "step": 324760 }, { "epoch": 0.6560559476722811, "grad_norm": 337.5087585449219, "learning_rate": 3.2974458205772197e-06, "loss": 10.0531, "step": 324770 }, { "epoch": 0.6560761483049649, "grad_norm": 604.0032348632812, "learning_rate": 3.2971176188447196e-06, "loss": 15.9948, "step": 324780 }, { "epoch": 0.6560963489376487, "grad_norm": 67.086181640625, "learning_rate": 3.2967894254118605e-06, "loss": 9.4685, "step": 324790 }, { "epoch": 0.6561165495703325, "grad_norm": 699.3897094726562, "learning_rate": 3.2964612402802422e-06, "loss": 29.6587, "step": 324800 }, { "epoch": 0.6561367502030163, "grad_norm": 178.92552185058594, "learning_rate": 3.2961330634514676e-06, "loss": 19.5481, "step": 324810 }, { "epoch": 0.6561569508357001, "grad_norm": 621.947265625, "learning_rate": 3.2958048949271314e-06, "loss": 18.8215, "step": 324820 }, { "epoch": 0.656177151468384, "grad_norm": 619.4153442382812, "learning_rate": 3.2954767347088367e-06, "loss": 18.5938, "step": 324830 }, { "epoch": 0.6561973521010678, "grad_norm": 182.00775146484375, "learning_rate": 3.295148582798181e-06, "loss": 8.6175, "step": 324840 }, { "epoch": 0.6562175527337516, "grad_norm": 303.0915832519531, "learning_rate": 3.2948204391967657e-06, "loss": 16.7521, "step": 324850 }, { "epoch": 0.6562377533664354, "grad_norm": 100.56477355957031, "learning_rate": 3.294492303906188e-06, "loss": 33.7697, "step": 324860 }, { "epoch": 0.6562579539991192, "grad_norm": 1357.78466796875, "learning_rate": 3.2941641769280464e-06, "loss": 15.8703, "step": 324870 }, { "epoch": 0.6562781546318031, "grad_norm": 186.18832397460938, "learning_rate": 3.293836058263945e-06, "loss": 20.8526, "step": 324880 }, { "epoch": 0.6562983552644869, "grad_norm": 159.1144256591797, "learning_rate": 3.293507947915477e-06, "loss": 37.2876, "step": 324890 }, { "epoch": 0.6563185558971707, "grad_norm": 444.4486083984375, "learning_rate": 3.293179845884245e-06, "loss": 30.4774, "step": 324900 }, { "epoch": 0.6563387565298545, "grad_norm": 665.237060546875, "learning_rate": 3.2928517521718483e-06, "loss": 21.2222, "step": 324910 }, { "epoch": 0.6563589571625383, "grad_norm": 696.8388671875, "learning_rate": 3.2925236667798843e-06, "loss": 29.906, "step": 324920 }, { "epoch": 0.6563791577952222, "grad_norm": 123.77020263671875, "learning_rate": 3.2921955897099534e-06, "loss": 20.4031, "step": 324930 }, { "epoch": 0.656399358427906, "grad_norm": 360.2903137207031, "learning_rate": 3.2918675209636542e-06, "loss": 18.8322, "step": 324940 }, { "epoch": 0.6564195590605898, "grad_norm": 750.037353515625, "learning_rate": 3.2915394605425836e-06, "loss": 13.6613, "step": 324950 }, { "epoch": 0.6564397596932736, "grad_norm": 382.3537292480469, "learning_rate": 3.2912114084483437e-06, "loss": 21.9366, "step": 324960 }, { "epoch": 0.6564599603259574, "grad_norm": 273.78765869140625, "learning_rate": 3.290883364682533e-06, "loss": 14.4265, "step": 324970 }, { "epoch": 0.6564801609586413, "grad_norm": 171.11929321289062, "learning_rate": 3.2905553292467487e-06, "loss": 14.784, "step": 324980 }, { "epoch": 0.6565003615913251, "grad_norm": 404.71026611328125, "learning_rate": 3.29022730214259e-06, "loss": 18.4426, "step": 324990 }, { "epoch": 0.6565205622240089, "grad_norm": 516.9315795898438, "learning_rate": 3.289899283371657e-06, "loss": 19.952, "step": 325000 }, { "epoch": 0.6565407628566927, "grad_norm": 42.328216552734375, "learning_rate": 3.2895712729355477e-06, "loss": 22.5429, "step": 325010 }, { "epoch": 0.6565609634893765, "grad_norm": 215.39959716796875, "learning_rate": 3.2892432708358583e-06, "loss": 16.8509, "step": 325020 }, { "epoch": 0.6565811641220604, "grad_norm": 225.04336547851562, "learning_rate": 3.288915277074192e-06, "loss": 15.3697, "step": 325030 }, { "epoch": 0.6566013647547441, "grad_norm": 241.2082977294922, "learning_rate": 3.2885872916521445e-06, "loss": 20.5328, "step": 325040 }, { "epoch": 0.6566215653874279, "grad_norm": 419.43109130859375, "learning_rate": 3.2882593145713148e-06, "loss": 26.5507, "step": 325050 }, { "epoch": 0.6566417660201117, "grad_norm": 740.9419555664062, "learning_rate": 3.2879313458333017e-06, "loss": 29.9435, "step": 325060 }, { "epoch": 0.6566619666527955, "grad_norm": 574.1981811523438, "learning_rate": 3.2876033854397037e-06, "loss": 12.1584, "step": 325070 }, { "epoch": 0.6566821672854793, "grad_norm": 196.2428436279297, "learning_rate": 3.287275433392119e-06, "loss": 11.8524, "step": 325080 }, { "epoch": 0.6567023679181632, "grad_norm": 529.787353515625, "learning_rate": 3.286947489692145e-06, "loss": 23.899, "step": 325090 }, { "epoch": 0.656722568550847, "grad_norm": 380.5361022949219, "learning_rate": 3.2866195543413843e-06, "loss": 14.2219, "step": 325100 }, { "epoch": 0.6567427691835308, "grad_norm": 505.2468566894531, "learning_rate": 3.2862916273414284e-06, "loss": 15.0427, "step": 325110 }, { "epoch": 0.6567629698162146, "grad_norm": 572.7343139648438, "learning_rate": 3.285963708693881e-06, "loss": 23.6778, "step": 325120 }, { "epoch": 0.6567831704488984, "grad_norm": 358.7130126953125, "learning_rate": 3.2856357984003382e-06, "loss": 16.2286, "step": 325130 }, { "epoch": 0.6568033710815823, "grad_norm": 661.60302734375, "learning_rate": 3.2853078964623995e-06, "loss": 21.5851, "step": 325140 }, { "epoch": 0.6568235717142661, "grad_norm": 169.0994873046875, "learning_rate": 3.2849800028816613e-06, "loss": 9.0037, "step": 325150 }, { "epoch": 0.6568437723469499, "grad_norm": 667.0199584960938, "learning_rate": 3.2846521176597217e-06, "loss": 27.2752, "step": 325160 }, { "epoch": 0.6568639729796337, "grad_norm": 1.3522008657455444, "learning_rate": 3.2843242407981823e-06, "loss": 19.3959, "step": 325170 }, { "epoch": 0.6568841736123175, "grad_norm": 801.1975708007812, "learning_rate": 3.2839963722986356e-06, "loss": 19.4962, "step": 325180 }, { "epoch": 0.6569043742450014, "grad_norm": 214.1361083984375, "learning_rate": 3.283668512162684e-06, "loss": 17.1053, "step": 325190 }, { "epoch": 0.6569245748776852, "grad_norm": 346.63372802734375, "learning_rate": 3.2833406603919243e-06, "loss": 14.8104, "step": 325200 }, { "epoch": 0.656944775510369, "grad_norm": 382.4580078125, "learning_rate": 3.2830128169879535e-06, "loss": 32.4047, "step": 325210 }, { "epoch": 0.6569649761430528, "grad_norm": 421.2641296386719, "learning_rate": 3.282684981952369e-06, "loss": 12.9478, "step": 325220 }, { "epoch": 0.6569851767757366, "grad_norm": 134.71836853027344, "learning_rate": 3.2823571552867717e-06, "loss": 18.223, "step": 325230 }, { "epoch": 0.6570053774084205, "grad_norm": 559.95361328125, "learning_rate": 3.282029336992756e-06, "loss": 50.2567, "step": 325240 }, { "epoch": 0.6570255780411043, "grad_norm": 733.6630859375, "learning_rate": 3.28170152707192e-06, "loss": 50.7487, "step": 325250 }, { "epoch": 0.6570457786737881, "grad_norm": 0.0, "learning_rate": 3.281373725525865e-06, "loss": 17.5435, "step": 325260 }, { "epoch": 0.6570659793064719, "grad_norm": 1.3321958780288696, "learning_rate": 3.2810459323561826e-06, "loss": 14.6807, "step": 325270 }, { "epoch": 0.6570861799391557, "grad_norm": 292.2225036621094, "learning_rate": 3.2807181475644755e-06, "loss": 29.7416, "step": 325280 }, { "epoch": 0.6571063805718395, "grad_norm": 566.6941528320312, "learning_rate": 3.28039037115234e-06, "loss": 23.5769, "step": 325290 }, { "epoch": 0.6571265812045233, "grad_norm": 44.12610626220703, "learning_rate": 3.280062603121373e-06, "loss": 11.8867, "step": 325300 }, { "epoch": 0.6571467818372071, "grad_norm": 145.26451110839844, "learning_rate": 3.2797348434731725e-06, "loss": 12.4323, "step": 325310 }, { "epoch": 0.6571669824698909, "grad_norm": 1019.871826171875, "learning_rate": 3.2794070922093347e-06, "loss": 15.0018, "step": 325320 }, { "epoch": 0.6571871831025747, "grad_norm": 80.88499450683594, "learning_rate": 3.2790793493314605e-06, "loss": 9.0907, "step": 325330 }, { "epoch": 0.6572073837352586, "grad_norm": 760.2362060546875, "learning_rate": 3.2787516148411417e-06, "loss": 23.8629, "step": 325340 }, { "epoch": 0.6572275843679424, "grad_norm": 560.3916625976562, "learning_rate": 3.27842388873998e-06, "loss": 21.0015, "step": 325350 }, { "epoch": 0.6572477850006262, "grad_norm": 539.2189331054688, "learning_rate": 3.2780961710295727e-06, "loss": 21.1835, "step": 325360 }, { "epoch": 0.65726798563331, "grad_norm": 1000.8939819335938, "learning_rate": 3.2777684617115145e-06, "loss": 33.7129, "step": 325370 }, { "epoch": 0.6572881862659938, "grad_norm": 0.1469816267490387, "learning_rate": 3.277440760787404e-06, "loss": 10.3213, "step": 325380 }, { "epoch": 0.6573083868986777, "grad_norm": 224.1309814453125, "learning_rate": 3.277113068258839e-06, "loss": 17.3751, "step": 325390 }, { "epoch": 0.6573285875313615, "grad_norm": 827.6720581054688, "learning_rate": 3.2767853841274154e-06, "loss": 24.9989, "step": 325400 }, { "epoch": 0.6573487881640453, "grad_norm": 564.5892333984375, "learning_rate": 3.2764577083947303e-06, "loss": 21.6816, "step": 325410 }, { "epoch": 0.6573689887967291, "grad_norm": 362.488525390625, "learning_rate": 3.2761300410623834e-06, "loss": 33.0794, "step": 325420 }, { "epoch": 0.6573891894294129, "grad_norm": 112.11978149414062, "learning_rate": 3.2758023821319673e-06, "loss": 29.824, "step": 325430 }, { "epoch": 0.6574093900620968, "grad_norm": 395.3289794921875, "learning_rate": 3.2754747316050815e-06, "loss": 23.8979, "step": 325440 }, { "epoch": 0.6574295906947806, "grad_norm": 251.3035888671875, "learning_rate": 3.2751470894833236e-06, "loss": 11.2302, "step": 325450 }, { "epoch": 0.6574497913274644, "grad_norm": 250.07052612304688, "learning_rate": 3.27481945576829e-06, "loss": 20.3601, "step": 325460 }, { "epoch": 0.6574699919601482, "grad_norm": 145.04916381835938, "learning_rate": 3.2744918304615757e-06, "loss": 20.5191, "step": 325470 }, { "epoch": 0.657490192592832, "grad_norm": 444.8187255859375, "learning_rate": 3.2741642135647787e-06, "loss": 23.9869, "step": 325480 }, { "epoch": 0.6575103932255159, "grad_norm": 397.5899353027344, "learning_rate": 3.273836605079499e-06, "loss": 19.1656, "step": 325490 }, { "epoch": 0.6575305938581997, "grad_norm": 667.8988647460938, "learning_rate": 3.273509005007327e-06, "loss": 27.4597, "step": 325500 }, { "epoch": 0.6575507944908835, "grad_norm": 472.8876647949219, "learning_rate": 3.273181413349864e-06, "loss": 17.3634, "step": 325510 }, { "epoch": 0.6575709951235673, "grad_norm": 49.553226470947266, "learning_rate": 3.2728538301087066e-06, "loss": 19.2197, "step": 325520 }, { "epoch": 0.6575911957562511, "grad_norm": 480.49407958984375, "learning_rate": 3.2725262552854485e-06, "loss": 21.1468, "step": 325530 }, { "epoch": 0.657611396388935, "grad_norm": 215.25254821777344, "learning_rate": 3.272198688881688e-06, "loss": 14.1663, "step": 325540 }, { "epoch": 0.6576315970216187, "grad_norm": 177.4603729248047, "learning_rate": 3.2718711308990226e-06, "loss": 10.0919, "step": 325550 }, { "epoch": 0.6576517976543025, "grad_norm": 821.0975341796875, "learning_rate": 3.271543581339047e-06, "loss": 23.5161, "step": 325560 }, { "epoch": 0.6576719982869863, "grad_norm": 482.9058837890625, "learning_rate": 3.271216040203357e-06, "loss": 32.7778, "step": 325570 }, { "epoch": 0.6576921989196701, "grad_norm": 13.38960075378418, "learning_rate": 3.2708885074935515e-06, "loss": 18.5887, "step": 325580 }, { "epoch": 0.6577123995523539, "grad_norm": 249.48719787597656, "learning_rate": 3.270560983211227e-06, "loss": 14.3424, "step": 325590 }, { "epoch": 0.6577326001850378, "grad_norm": 514.5307006835938, "learning_rate": 3.2702334673579765e-06, "loss": 12.4389, "step": 325600 }, { "epoch": 0.6577528008177216, "grad_norm": 179.92340087890625, "learning_rate": 3.2699059599353987e-06, "loss": 20.0307, "step": 325610 }, { "epoch": 0.6577730014504054, "grad_norm": 158.58670043945312, "learning_rate": 3.2695784609450908e-06, "loss": 22.2668, "step": 325620 }, { "epoch": 0.6577932020830892, "grad_norm": 362.43304443359375, "learning_rate": 3.2692509703886467e-06, "loss": 23.5387, "step": 325630 }, { "epoch": 0.657813402715773, "grad_norm": 437.4108581542969, "learning_rate": 3.2689234882676622e-06, "loss": 20.1188, "step": 325640 }, { "epoch": 0.6578336033484569, "grad_norm": 8.09162712097168, "learning_rate": 3.268596014583737e-06, "loss": 25.9325, "step": 325650 }, { "epoch": 0.6578538039811407, "grad_norm": 22.787580490112305, "learning_rate": 3.2682685493384636e-06, "loss": 12.1297, "step": 325660 }, { "epoch": 0.6578740046138245, "grad_norm": 344.8905944824219, "learning_rate": 3.2679410925334394e-06, "loss": 10.7535, "step": 325670 }, { "epoch": 0.6578942052465083, "grad_norm": 437.6022033691406, "learning_rate": 3.267613644170261e-06, "loss": 28.6233, "step": 325680 }, { "epoch": 0.6579144058791921, "grad_norm": 89.2320556640625, "learning_rate": 3.2672862042505227e-06, "loss": 17.6414, "step": 325690 }, { "epoch": 0.657934606511876, "grad_norm": 237.4945526123047, "learning_rate": 3.26695877277582e-06, "loss": 20.6416, "step": 325700 }, { "epoch": 0.6579548071445598, "grad_norm": 148.95309448242188, "learning_rate": 3.266631349747753e-06, "loss": 13.3693, "step": 325710 }, { "epoch": 0.6579750077772436, "grad_norm": 237.44046020507812, "learning_rate": 3.266303935167912e-06, "loss": 15.7174, "step": 325720 }, { "epoch": 0.6579952084099274, "grad_norm": 681.7918090820312, "learning_rate": 3.2659765290378963e-06, "loss": 16.4314, "step": 325730 }, { "epoch": 0.6580154090426112, "grad_norm": 135.437744140625, "learning_rate": 3.265649131359301e-06, "loss": 28.3991, "step": 325740 }, { "epoch": 0.6580356096752951, "grad_norm": 420.930419921875, "learning_rate": 3.2653217421337213e-06, "loss": 19.6065, "step": 325750 }, { "epoch": 0.6580558103079789, "grad_norm": 755.5789794921875, "learning_rate": 3.264994361362753e-06, "loss": 14.2181, "step": 325760 }, { "epoch": 0.6580760109406627, "grad_norm": 41.67082595825195, "learning_rate": 3.26466698904799e-06, "loss": 12.7884, "step": 325770 }, { "epoch": 0.6580962115733465, "grad_norm": 355.4094543457031, "learning_rate": 3.2643396251910338e-06, "loss": 14.9274, "step": 325780 }, { "epoch": 0.6581164122060303, "grad_norm": 464.47406005859375, "learning_rate": 3.2640122697934716e-06, "loss": 27.0586, "step": 325790 }, { "epoch": 0.6581366128387142, "grad_norm": 322.71044921875, "learning_rate": 3.263684922856905e-06, "loss": 15.635, "step": 325800 }, { "epoch": 0.6581568134713979, "grad_norm": 211.86778259277344, "learning_rate": 3.2633575843829278e-06, "loss": 11.8521, "step": 325810 }, { "epoch": 0.6581770141040817, "grad_norm": 664.3461303710938, "learning_rate": 3.2630302543731347e-06, "loss": 26.4699, "step": 325820 }, { "epoch": 0.6581972147367655, "grad_norm": 583.2703857421875, "learning_rate": 3.262702932829121e-06, "loss": 11.8234, "step": 325830 }, { "epoch": 0.6582174153694493, "grad_norm": 1423.8182373046875, "learning_rate": 3.262375619752484e-06, "loss": 15.3818, "step": 325840 }, { "epoch": 0.6582376160021332, "grad_norm": 562.4296875, "learning_rate": 3.262048315144816e-06, "loss": 18.7142, "step": 325850 }, { "epoch": 0.658257816634817, "grad_norm": 51.41434097290039, "learning_rate": 3.2617210190077132e-06, "loss": 22.0297, "step": 325860 }, { "epoch": 0.6582780172675008, "grad_norm": 282.89849853515625, "learning_rate": 3.2613937313427735e-06, "loss": 14.765, "step": 325870 }, { "epoch": 0.6582982179001846, "grad_norm": 38.96006393432617, "learning_rate": 3.2610664521515874e-06, "loss": 21.5365, "step": 325880 }, { "epoch": 0.6583184185328684, "grad_norm": 119.70616912841797, "learning_rate": 3.2607391814357537e-06, "loss": 15.3491, "step": 325890 }, { "epoch": 0.6583386191655523, "grad_norm": 201.7650604248047, "learning_rate": 3.260411919196866e-06, "loss": 17.0382, "step": 325900 }, { "epoch": 0.6583588197982361, "grad_norm": 386.3955383300781, "learning_rate": 3.2600846654365202e-06, "loss": 11.8478, "step": 325910 }, { "epoch": 0.6583790204309199, "grad_norm": 216.8415985107422, "learning_rate": 3.2597574201563104e-06, "loss": 29.6346, "step": 325920 }, { "epoch": 0.6583992210636037, "grad_norm": 551.940185546875, "learning_rate": 3.2594301833578307e-06, "loss": 20.8153, "step": 325930 }, { "epoch": 0.6584194216962875, "grad_norm": 380.404052734375, "learning_rate": 3.25910295504268e-06, "loss": 14.7257, "step": 325940 }, { "epoch": 0.6584396223289714, "grad_norm": 615.5536499023438, "learning_rate": 3.258775735212447e-06, "loss": 23.8163, "step": 325950 }, { "epoch": 0.6584598229616552, "grad_norm": 664.9302978515625, "learning_rate": 3.2584485238687318e-06, "loss": 24.1192, "step": 325960 }, { "epoch": 0.658480023594339, "grad_norm": 243.3323516845703, "learning_rate": 3.258121321013128e-06, "loss": 7.5279, "step": 325970 }, { "epoch": 0.6585002242270228, "grad_norm": 567.731689453125, "learning_rate": 3.257794126647228e-06, "loss": 12.0984, "step": 325980 }, { "epoch": 0.6585204248597066, "grad_norm": 119.06748962402344, "learning_rate": 3.257466940772629e-06, "loss": 17.4653, "step": 325990 }, { "epoch": 0.6585406254923905, "grad_norm": 556.4931640625, "learning_rate": 3.2571397633909252e-06, "loss": 20.6214, "step": 326000 }, { "epoch": 0.6585608261250743, "grad_norm": 45.02443313598633, "learning_rate": 3.2568125945037098e-06, "loss": 6.4558, "step": 326010 }, { "epoch": 0.6585810267577581, "grad_norm": 813.0732421875, "learning_rate": 3.256485434112578e-06, "loss": 26.3669, "step": 326020 }, { "epoch": 0.6586012273904419, "grad_norm": 670.9710693359375, "learning_rate": 3.2561582822191273e-06, "loss": 12.1677, "step": 326030 }, { "epoch": 0.6586214280231257, "grad_norm": 584.9721069335938, "learning_rate": 3.2558311388249465e-06, "loss": 16.4993, "step": 326040 }, { "epoch": 0.6586416286558096, "grad_norm": 294.2562255859375, "learning_rate": 3.2555040039316344e-06, "loss": 15.16, "step": 326050 }, { "epoch": 0.6586618292884933, "grad_norm": 582.047607421875, "learning_rate": 3.255176877540784e-06, "loss": 21.793, "step": 326060 }, { "epoch": 0.6586820299211771, "grad_norm": 369.1705322265625, "learning_rate": 3.2548497596539907e-06, "loss": 12.7556, "step": 326070 }, { "epoch": 0.6587022305538609, "grad_norm": 794.9580688476562, "learning_rate": 3.2545226502728477e-06, "loss": 23.036, "step": 326080 }, { "epoch": 0.6587224311865447, "grad_norm": 387.3789367675781, "learning_rate": 3.254195549398948e-06, "loss": 12.0966, "step": 326090 }, { "epoch": 0.6587426318192285, "grad_norm": 436.01580810546875, "learning_rate": 3.2538684570338908e-06, "loss": 8.9334, "step": 326100 }, { "epoch": 0.6587628324519124, "grad_norm": 239.38002014160156, "learning_rate": 3.253541373179264e-06, "loss": 13.6047, "step": 326110 }, { "epoch": 0.6587830330845962, "grad_norm": 441.0944519042969, "learning_rate": 3.2532142978366654e-06, "loss": 40.3395, "step": 326120 }, { "epoch": 0.65880323371728, "grad_norm": 282.71319580078125, "learning_rate": 3.252887231007689e-06, "loss": 24.0975, "step": 326130 }, { "epoch": 0.6588234343499638, "grad_norm": 290.3285217285156, "learning_rate": 3.2525601726939283e-06, "loss": 22.3136, "step": 326140 }, { "epoch": 0.6588436349826476, "grad_norm": 197.89186096191406, "learning_rate": 3.2522331228969774e-06, "loss": 20.9678, "step": 326150 }, { "epoch": 0.6588638356153315, "grad_norm": 401.70330810546875, "learning_rate": 3.2519060816184307e-06, "loss": 18.9292, "step": 326160 }, { "epoch": 0.6588840362480153, "grad_norm": 576.4989013671875, "learning_rate": 3.251579048859881e-06, "loss": 18.1169, "step": 326170 }, { "epoch": 0.6589042368806991, "grad_norm": 324.4037170410156, "learning_rate": 3.2512520246229217e-06, "loss": 25.7619, "step": 326180 }, { "epoch": 0.6589244375133829, "grad_norm": 756.5455932617188, "learning_rate": 3.2509250089091494e-06, "loss": 23.6764, "step": 326190 }, { "epoch": 0.6589446381460667, "grad_norm": 53.63800048828125, "learning_rate": 3.2505980017201564e-06, "loss": 25.0827, "step": 326200 }, { "epoch": 0.6589648387787506, "grad_norm": 427.29864501953125, "learning_rate": 3.250271003057537e-06, "loss": 21.0991, "step": 326210 }, { "epoch": 0.6589850394114344, "grad_norm": 492.3072509765625, "learning_rate": 3.249944012922883e-06, "loss": 22.0644, "step": 326220 }, { "epoch": 0.6590052400441182, "grad_norm": 1197.179931640625, "learning_rate": 3.249617031317792e-06, "loss": 24.7577, "step": 326230 }, { "epoch": 0.659025440676802, "grad_norm": 388.35888671875, "learning_rate": 3.2492900582438537e-06, "loss": 29.4192, "step": 326240 }, { "epoch": 0.6590456413094858, "grad_norm": 219.8641815185547, "learning_rate": 3.248963093702663e-06, "loss": 14.3934, "step": 326250 }, { "epoch": 0.6590658419421697, "grad_norm": 675.7544555664062, "learning_rate": 3.248636137695815e-06, "loss": 10.9527, "step": 326260 }, { "epoch": 0.6590860425748535, "grad_norm": 193.2775421142578, "learning_rate": 3.2483091902249008e-06, "loss": 16.3143, "step": 326270 }, { "epoch": 0.6591062432075373, "grad_norm": 216.4387664794922, "learning_rate": 3.247982251291516e-06, "loss": 9.014, "step": 326280 }, { "epoch": 0.6591264438402211, "grad_norm": 0.0, "learning_rate": 3.247655320897254e-06, "loss": 19.7007, "step": 326290 }, { "epoch": 0.6591466444729049, "grad_norm": 233.0736846923828, "learning_rate": 3.247328399043706e-06, "loss": 15.0967, "step": 326300 }, { "epoch": 0.6591668451055888, "grad_norm": 402.33111572265625, "learning_rate": 3.2470014857324673e-06, "loss": 27.7267, "step": 326310 }, { "epoch": 0.6591870457382725, "grad_norm": 402.5802307128906, "learning_rate": 3.2466745809651312e-06, "loss": 12.5053, "step": 326320 }, { "epoch": 0.6592072463709563, "grad_norm": 641.3466186523438, "learning_rate": 3.2463476847432883e-06, "loss": 19.0037, "step": 326330 }, { "epoch": 0.6592274470036401, "grad_norm": 346.65838623046875, "learning_rate": 3.2460207970685363e-06, "loss": 10.3399, "step": 326340 }, { "epoch": 0.6592476476363239, "grad_norm": 122.47904205322266, "learning_rate": 3.245693917942465e-06, "loss": 12.2482, "step": 326350 }, { "epoch": 0.6592678482690078, "grad_norm": 138.75299072265625, "learning_rate": 3.245367047366671e-06, "loss": 27.0093, "step": 326360 }, { "epoch": 0.6592880489016916, "grad_norm": 277.60235595703125, "learning_rate": 3.2450401853427432e-06, "loss": 18.9396, "step": 326370 }, { "epoch": 0.6593082495343754, "grad_norm": 36.98881530761719, "learning_rate": 3.2447133318722756e-06, "loss": 23.1334, "step": 326380 }, { "epoch": 0.6593284501670592, "grad_norm": 343.0567932128906, "learning_rate": 3.2443864869568666e-06, "loss": 25.0246, "step": 326390 }, { "epoch": 0.659348650799743, "grad_norm": 147.71640014648438, "learning_rate": 3.2440596505981005e-06, "loss": 17.0263, "step": 326400 }, { "epoch": 0.6593688514324269, "grad_norm": 25.792308807373047, "learning_rate": 3.243732822797576e-06, "loss": 11.4002, "step": 326410 }, { "epoch": 0.6593890520651107, "grad_norm": 331.8695983886719, "learning_rate": 3.243406003556886e-06, "loss": 16.7536, "step": 326420 }, { "epoch": 0.6594092526977945, "grad_norm": 281.3816833496094, "learning_rate": 3.2430791928776217e-06, "loss": 33.9329, "step": 326430 }, { "epoch": 0.6594294533304783, "grad_norm": 276.3078308105469, "learning_rate": 3.2427523907613755e-06, "loss": 20.8404, "step": 326440 }, { "epoch": 0.6594496539631621, "grad_norm": 43.945648193359375, "learning_rate": 3.242425597209742e-06, "loss": 21.5606, "step": 326450 }, { "epoch": 0.659469854595846, "grad_norm": 536.5602416992188, "learning_rate": 3.2420988122243123e-06, "loss": 19.6857, "step": 326460 }, { "epoch": 0.6594900552285298, "grad_norm": 199.89599609375, "learning_rate": 3.2417720358066785e-06, "loss": 25.8947, "step": 326470 }, { "epoch": 0.6595102558612136, "grad_norm": 94.38127136230469, "learning_rate": 3.241445267958438e-06, "loss": 23.4073, "step": 326480 }, { "epoch": 0.6595304564938974, "grad_norm": 418.309326171875, "learning_rate": 3.2411185086811763e-06, "loss": 20.8045, "step": 326490 }, { "epoch": 0.6595506571265812, "grad_norm": 296.802978515625, "learning_rate": 3.2407917579764914e-06, "loss": 16.1665, "step": 326500 }, { "epoch": 0.659570857759265, "grad_norm": 273.4130859375, "learning_rate": 3.2404650158459737e-06, "loss": 29.4563, "step": 326510 }, { "epoch": 0.6595910583919489, "grad_norm": 47.27882766723633, "learning_rate": 3.240138282291217e-06, "loss": 11.6505, "step": 326520 }, { "epoch": 0.6596112590246327, "grad_norm": 216.8551483154297, "learning_rate": 3.2398115573138123e-06, "loss": 21.9539, "step": 326530 }, { "epoch": 0.6596314596573165, "grad_norm": 372.9217834472656, "learning_rate": 3.2394848409153514e-06, "loss": 27.4066, "step": 326540 }, { "epoch": 0.6596516602900003, "grad_norm": 488.9245300292969, "learning_rate": 3.2391581330974307e-06, "loss": 26.7345, "step": 326550 }, { "epoch": 0.6596718609226842, "grad_norm": 277.7620849609375, "learning_rate": 3.238831433861637e-06, "loss": 15.3652, "step": 326560 }, { "epoch": 0.6596920615553679, "grad_norm": 320.3008117675781, "learning_rate": 3.2385047432095656e-06, "loss": 15.7351, "step": 326570 }, { "epoch": 0.6597122621880517, "grad_norm": 191.49447631835938, "learning_rate": 3.23817806114281e-06, "loss": 10.6893, "step": 326580 }, { "epoch": 0.6597324628207355, "grad_norm": 363.738037109375, "learning_rate": 3.23785138766296e-06, "loss": 16.5293, "step": 326590 }, { "epoch": 0.6597526634534193, "grad_norm": 444.9259338378906, "learning_rate": 3.2375247227716077e-06, "loss": 20.5872, "step": 326600 }, { "epoch": 0.6597728640861031, "grad_norm": 256.9393310546875, "learning_rate": 3.2371980664703486e-06, "loss": 26.2523, "step": 326610 }, { "epoch": 0.659793064718787, "grad_norm": 264.544921875, "learning_rate": 3.2368714187607696e-06, "loss": 14.5543, "step": 326620 }, { "epoch": 0.6598132653514708, "grad_norm": 273.64703369140625, "learning_rate": 3.236544779644466e-06, "loss": 11.8041, "step": 326630 }, { "epoch": 0.6598334659841546, "grad_norm": 354.6824645996094, "learning_rate": 3.2362181491230295e-06, "loss": 14.7286, "step": 326640 }, { "epoch": 0.6598536666168384, "grad_norm": 52.0087776184082, "learning_rate": 3.235891527198053e-06, "loss": 14.8979, "step": 326650 }, { "epoch": 0.6598738672495222, "grad_norm": 541.7445068359375, "learning_rate": 3.235564913871126e-06, "loss": 19.7645, "step": 326660 }, { "epoch": 0.6598940678822061, "grad_norm": 415.505126953125, "learning_rate": 3.235238309143842e-06, "loss": 19.1636, "step": 326670 }, { "epoch": 0.6599142685148899, "grad_norm": 420.2804260253906, "learning_rate": 3.234911713017793e-06, "loss": 22.9831, "step": 326680 }, { "epoch": 0.6599344691475737, "grad_norm": 0.0, "learning_rate": 3.2345851254945695e-06, "loss": 7.5803, "step": 326690 }, { "epoch": 0.6599546697802575, "grad_norm": 2.467928409576416, "learning_rate": 3.2342585465757625e-06, "loss": 4.4995, "step": 326700 }, { "epoch": 0.6599748704129413, "grad_norm": 271.5982666015625, "learning_rate": 3.2339319762629694e-06, "loss": 20.3659, "step": 326710 }, { "epoch": 0.6599950710456252, "grad_norm": 837.4721069335938, "learning_rate": 3.2336054145577735e-06, "loss": 15.789, "step": 326720 }, { "epoch": 0.660015271678309, "grad_norm": 289.23284912109375, "learning_rate": 3.233278861461772e-06, "loss": 9.8312, "step": 326730 }, { "epoch": 0.6600354723109928, "grad_norm": 925.2801513671875, "learning_rate": 3.2329523169765566e-06, "loss": 42.8915, "step": 326740 }, { "epoch": 0.6600556729436766, "grad_norm": 245.6300506591797, "learning_rate": 3.2326257811037154e-06, "loss": 17.8565, "step": 326750 }, { "epoch": 0.6600758735763604, "grad_norm": 81.37186431884766, "learning_rate": 3.2322992538448418e-06, "loss": 24.6701, "step": 326760 }, { "epoch": 0.6600960742090443, "grad_norm": 364.2679443359375, "learning_rate": 3.2319727352015286e-06, "loss": 17.6833, "step": 326770 }, { "epoch": 0.6601162748417281, "grad_norm": 765.7305297851562, "learning_rate": 3.2316462251753646e-06, "loss": 18.536, "step": 326780 }, { "epoch": 0.6601364754744119, "grad_norm": 131.59066772460938, "learning_rate": 3.2313197237679416e-06, "loss": 25.45, "step": 326790 }, { "epoch": 0.6601566761070957, "grad_norm": 483.0262756347656, "learning_rate": 3.230993230980853e-06, "loss": 18.5174, "step": 326800 }, { "epoch": 0.6601768767397795, "grad_norm": 569.9622802734375, "learning_rate": 3.2306667468156895e-06, "loss": 31.4212, "step": 326810 }, { "epoch": 0.6601970773724634, "grad_norm": 1281.259033203125, "learning_rate": 3.2303402712740404e-06, "loss": 35.0354, "step": 326820 }, { "epoch": 0.6602172780051471, "grad_norm": 297.14898681640625, "learning_rate": 3.2300138043574992e-06, "loss": 14.748, "step": 326830 }, { "epoch": 0.6602374786378309, "grad_norm": 359.8337707519531, "learning_rate": 3.2296873460676557e-06, "loss": 19.4653, "step": 326840 }, { "epoch": 0.6602576792705147, "grad_norm": 7.154877185821533, "learning_rate": 3.229360896406102e-06, "loss": 11.0349, "step": 326850 }, { "epoch": 0.6602778799031985, "grad_norm": 132.37347412109375, "learning_rate": 3.229034455374426e-06, "loss": 21.3548, "step": 326860 }, { "epoch": 0.6602980805358823, "grad_norm": 246.46514892578125, "learning_rate": 3.2287080229742253e-06, "loss": 18.76, "step": 326870 }, { "epoch": 0.6603182811685662, "grad_norm": 567.5435180664062, "learning_rate": 3.228381599207083e-06, "loss": 16.071, "step": 326880 }, { "epoch": 0.66033848180125, "grad_norm": 587.0892333984375, "learning_rate": 3.2280551840745953e-06, "loss": 27.1945, "step": 326890 }, { "epoch": 0.6603586824339338, "grad_norm": 369.40252685546875, "learning_rate": 3.227728777578353e-06, "loss": 9.7181, "step": 326900 }, { "epoch": 0.6603788830666176, "grad_norm": 687.49853515625, "learning_rate": 3.2274023797199446e-06, "loss": 35.6283, "step": 326910 }, { "epoch": 0.6603990836993014, "grad_norm": 322.2925109863281, "learning_rate": 3.227075990500962e-06, "loss": 11.0406, "step": 326920 }, { "epoch": 0.6604192843319853, "grad_norm": 670.8181762695312, "learning_rate": 3.226749609922997e-06, "loss": 22.8369, "step": 326930 }, { "epoch": 0.6604394849646691, "grad_norm": 28.338125228881836, "learning_rate": 3.226423237987637e-06, "loss": 8.6988, "step": 326940 }, { "epoch": 0.6604596855973529, "grad_norm": 735.6610107421875, "learning_rate": 3.226096874696476e-06, "loss": 22.8139, "step": 326950 }, { "epoch": 0.6604798862300367, "grad_norm": 132.19671630859375, "learning_rate": 3.2257705200511035e-06, "loss": 8.7654, "step": 326960 }, { "epoch": 0.6605000868627205, "grad_norm": 343.75146484375, "learning_rate": 3.2254441740531124e-06, "loss": 18.5073, "step": 326970 }, { "epoch": 0.6605202874954044, "grad_norm": 335.85968017578125, "learning_rate": 3.225117836704089e-06, "loss": 9.4129, "step": 326980 }, { "epoch": 0.6605404881280882, "grad_norm": 58.72859191894531, "learning_rate": 3.224791508005627e-06, "loss": 12.8179, "step": 326990 }, { "epoch": 0.660560688760772, "grad_norm": 241.4238739013672, "learning_rate": 3.224465187959316e-06, "loss": 31.1767, "step": 327000 }, { "epoch": 0.6605808893934558, "grad_norm": 271.1313781738281, "learning_rate": 3.224138876566745e-06, "loss": 7.9185, "step": 327010 }, { "epoch": 0.6606010900261396, "grad_norm": 309.2290954589844, "learning_rate": 3.2238125738295063e-06, "loss": 11.6941, "step": 327020 }, { "epoch": 0.6606212906588235, "grad_norm": 437.2591247558594, "learning_rate": 3.2234862797491905e-06, "loss": 15.7673, "step": 327030 }, { "epoch": 0.6606414912915073, "grad_norm": 211.36199951171875, "learning_rate": 3.2231599943273865e-06, "loss": 13.6241, "step": 327040 }, { "epoch": 0.6606616919241911, "grad_norm": 247.3621368408203, "learning_rate": 3.2228337175656856e-06, "loss": 33.3884, "step": 327050 }, { "epoch": 0.6606818925568749, "grad_norm": 245.90342712402344, "learning_rate": 3.222507449465678e-06, "loss": 15.0339, "step": 327060 }, { "epoch": 0.6607020931895587, "grad_norm": 576.1591186523438, "learning_rate": 3.2221811900289524e-06, "loss": 17.7405, "step": 327070 }, { "epoch": 0.6607222938222426, "grad_norm": 268.83837890625, "learning_rate": 3.221854939257099e-06, "loss": 19.998, "step": 327080 }, { "epoch": 0.6607424944549263, "grad_norm": 196.15208435058594, "learning_rate": 3.2215286971517123e-06, "loss": 16.0432, "step": 327090 }, { "epoch": 0.6607626950876101, "grad_norm": 657.3632202148438, "learning_rate": 3.2212024637143756e-06, "loss": 17.1239, "step": 327100 }, { "epoch": 0.6607828957202939, "grad_norm": 289.90020751953125, "learning_rate": 3.220876238946684e-06, "loss": 18.2402, "step": 327110 }, { "epoch": 0.6608030963529777, "grad_norm": 92.29066467285156, "learning_rate": 3.2205500228502257e-06, "loss": 12.4329, "step": 327120 }, { "epoch": 0.6608232969856616, "grad_norm": 157.49974060058594, "learning_rate": 3.220223815426592e-06, "loss": 16.8881, "step": 327130 }, { "epoch": 0.6608434976183454, "grad_norm": 817.279296875, "learning_rate": 3.21989761667737e-06, "loss": 21.3352, "step": 327140 }, { "epoch": 0.6608636982510292, "grad_norm": 80.90983581542969, "learning_rate": 3.21957142660415e-06, "loss": 19.8514, "step": 327150 }, { "epoch": 0.660883898883713, "grad_norm": 14.334986686706543, "learning_rate": 3.2192452452085265e-06, "loss": 15.8893, "step": 327160 }, { "epoch": 0.6609040995163968, "grad_norm": 234.1011199951172, "learning_rate": 3.218919072492082e-06, "loss": 24.0918, "step": 327170 }, { "epoch": 0.6609243001490807, "grad_norm": 79.61859130859375, "learning_rate": 3.2185929084564115e-06, "loss": 8.2091, "step": 327180 }, { "epoch": 0.6609445007817645, "grad_norm": 300.50640869140625, "learning_rate": 3.2182667531031044e-06, "loss": 15.4628, "step": 327190 }, { "epoch": 0.6609647014144483, "grad_norm": 274.9285888671875, "learning_rate": 3.217940606433747e-06, "loss": 17.9761, "step": 327200 }, { "epoch": 0.6609849020471321, "grad_norm": 396.72149658203125, "learning_rate": 3.2176144684499315e-06, "loss": 19.7943, "step": 327210 }, { "epoch": 0.6610051026798159, "grad_norm": 450.2877502441406, "learning_rate": 3.2172883391532484e-06, "loss": 12.6675, "step": 327220 }, { "epoch": 0.6610253033124998, "grad_norm": 39.47370147705078, "learning_rate": 3.216962218545284e-06, "loss": 14.0956, "step": 327230 }, { "epoch": 0.6610455039451836, "grad_norm": 720.3800659179688, "learning_rate": 3.2166361066276287e-06, "loss": 32.4461, "step": 327240 }, { "epoch": 0.6610657045778674, "grad_norm": 390.7908935546875, "learning_rate": 3.2163100034018735e-06, "loss": 18.8091, "step": 327250 }, { "epoch": 0.6610859052105512, "grad_norm": 0.0, "learning_rate": 3.2159839088696088e-06, "loss": 15.7301, "step": 327260 }, { "epoch": 0.661106105843235, "grad_norm": 674.4672241210938, "learning_rate": 3.21565782303242e-06, "loss": 16.456, "step": 327270 }, { "epoch": 0.6611263064759189, "grad_norm": 139.64060974121094, "learning_rate": 3.2153317458918997e-06, "loss": 24.8868, "step": 327280 }, { "epoch": 0.6611465071086027, "grad_norm": 366.8919372558594, "learning_rate": 3.2150056774496363e-06, "loss": 17.7777, "step": 327290 }, { "epoch": 0.6611667077412865, "grad_norm": 519.717041015625, "learning_rate": 3.2146796177072183e-06, "loss": 21.0956, "step": 327300 }, { "epoch": 0.6611869083739703, "grad_norm": 39.76637649536133, "learning_rate": 3.214353566666234e-06, "loss": 16.4979, "step": 327310 }, { "epoch": 0.6612071090066541, "grad_norm": 307.4339294433594, "learning_rate": 3.2140275243282765e-06, "loss": 17.9161, "step": 327320 }, { "epoch": 0.661227309639338, "grad_norm": 88.10888671875, "learning_rate": 3.2137014906949295e-06, "loss": 17.8225, "step": 327330 }, { "epoch": 0.6612475102720217, "grad_norm": 500.12506103515625, "learning_rate": 3.2133754657677857e-06, "loss": 10.8803, "step": 327340 }, { "epoch": 0.6612677109047055, "grad_norm": 160.55833435058594, "learning_rate": 3.2130494495484345e-06, "loss": 22.7973, "step": 327350 }, { "epoch": 0.6612879115373893, "grad_norm": 369.9012451171875, "learning_rate": 3.2127234420384624e-06, "loss": 20.0095, "step": 327360 }, { "epoch": 0.6613081121700731, "grad_norm": 421.60223388671875, "learning_rate": 3.212397443239459e-06, "loss": 13.6308, "step": 327370 }, { "epoch": 0.661328312802757, "grad_norm": 454.09698486328125, "learning_rate": 3.212071453153015e-06, "loss": 23.0256, "step": 327380 }, { "epoch": 0.6613485134354408, "grad_norm": 433.3726501464844, "learning_rate": 3.2117454717807174e-06, "loss": 12.1194, "step": 327390 }, { "epoch": 0.6613687140681246, "grad_norm": 49.15796661376953, "learning_rate": 3.211419499124154e-06, "loss": 10.5911, "step": 327400 }, { "epoch": 0.6613889147008084, "grad_norm": 455.6471862792969, "learning_rate": 3.2110935351849158e-06, "loss": 16.7057, "step": 327410 }, { "epoch": 0.6614091153334922, "grad_norm": 291.10888671875, "learning_rate": 3.2107675799645923e-06, "loss": 16.6149, "step": 327420 }, { "epoch": 0.661429315966176, "grad_norm": 775.1611938476562, "learning_rate": 3.210441633464769e-06, "loss": 26.5504, "step": 327430 }, { "epoch": 0.6614495165988599, "grad_norm": 403.2657470703125, "learning_rate": 3.2101156956870367e-06, "loss": 20.3346, "step": 327440 }, { "epoch": 0.6614697172315437, "grad_norm": 134.37828063964844, "learning_rate": 3.209789766632984e-06, "loss": 13.6932, "step": 327450 }, { "epoch": 0.6614899178642275, "grad_norm": 393.8540344238281, "learning_rate": 3.209463846304198e-06, "loss": 17.1115, "step": 327460 }, { "epoch": 0.6615101184969113, "grad_norm": 214.12506103515625, "learning_rate": 3.209137934702267e-06, "loss": 16.2317, "step": 327470 }, { "epoch": 0.6615303191295951, "grad_norm": 541.5082397460938, "learning_rate": 3.2088120318287843e-06, "loss": 12.0737, "step": 327480 }, { "epoch": 0.661550519762279, "grad_norm": 148.84764099121094, "learning_rate": 3.2084861376853304e-06, "loss": 22.5428, "step": 327490 }, { "epoch": 0.6615707203949628, "grad_norm": 470.9176025390625, "learning_rate": 3.2081602522734987e-06, "loss": 11.4023, "step": 327500 }, { "epoch": 0.6615909210276466, "grad_norm": 234.14559936523438, "learning_rate": 3.2078343755948783e-06, "loss": 16.6708, "step": 327510 }, { "epoch": 0.6616111216603304, "grad_norm": 0.9575088024139404, "learning_rate": 3.2075085076510548e-06, "loss": 18.4365, "step": 327520 }, { "epoch": 0.6616313222930142, "grad_norm": 274.15380859375, "learning_rate": 3.207182648443617e-06, "loss": 12.2141, "step": 327530 }, { "epoch": 0.6616515229256981, "grad_norm": 220.66366577148438, "learning_rate": 3.206856797974155e-06, "loss": 10.9902, "step": 327540 }, { "epoch": 0.6616717235583819, "grad_norm": 4.854771137237549, "learning_rate": 3.2065309562442536e-06, "loss": 20.7919, "step": 327550 }, { "epoch": 0.6616919241910657, "grad_norm": 294.95452880859375, "learning_rate": 3.2062051232555024e-06, "loss": 11.8438, "step": 327560 }, { "epoch": 0.6617121248237495, "grad_norm": 395.31402587890625, "learning_rate": 3.205879299009491e-06, "loss": 15.7949, "step": 327570 }, { "epoch": 0.6617323254564333, "grad_norm": 577.9132080078125, "learning_rate": 3.2055534835078075e-06, "loss": 23.7941, "step": 327580 }, { "epoch": 0.6617525260891172, "grad_norm": 206.83401489257812, "learning_rate": 3.205227676752037e-06, "loss": 20.8896, "step": 327590 }, { "epoch": 0.6617727267218009, "grad_norm": 240.10752868652344, "learning_rate": 3.2049018787437693e-06, "loss": 10.2279, "step": 327600 }, { "epoch": 0.6617929273544847, "grad_norm": 512.9368896484375, "learning_rate": 3.2045760894845932e-06, "loss": 31.879, "step": 327610 }, { "epoch": 0.6618131279871685, "grad_norm": 280.0784912109375, "learning_rate": 3.2042503089760934e-06, "loss": 28.417, "step": 327620 }, { "epoch": 0.6618333286198523, "grad_norm": 113.35942840576172, "learning_rate": 3.2039245372198613e-06, "loss": 10.603, "step": 327630 }, { "epoch": 0.6618535292525362, "grad_norm": 392.9140930175781, "learning_rate": 3.203598774217484e-06, "loss": 13.0894, "step": 327640 }, { "epoch": 0.66187372988522, "grad_norm": 259.1398010253906, "learning_rate": 3.2032730199705477e-06, "loss": 11.6942, "step": 327650 }, { "epoch": 0.6618939305179038, "grad_norm": 383.2666015625, "learning_rate": 3.20294727448064e-06, "loss": 36.4547, "step": 327660 }, { "epoch": 0.6619141311505876, "grad_norm": 432.5340270996094, "learning_rate": 3.2026215377493507e-06, "loss": 24.846, "step": 327670 }, { "epoch": 0.6619343317832714, "grad_norm": 304.7824401855469, "learning_rate": 3.2022958097782646e-06, "loss": 36.4396, "step": 327680 }, { "epoch": 0.6619545324159553, "grad_norm": 1352.296875, "learning_rate": 3.20197009056897e-06, "loss": 20.8479, "step": 327690 }, { "epoch": 0.6619747330486391, "grad_norm": 615.2682495117188, "learning_rate": 3.201644380123056e-06, "loss": 22.2662, "step": 327700 }, { "epoch": 0.6619949336813229, "grad_norm": 319.700927734375, "learning_rate": 3.201318678442111e-06, "loss": 35.5316, "step": 327710 }, { "epoch": 0.6620151343140067, "grad_norm": 402.1708984375, "learning_rate": 3.2009929855277187e-06, "loss": 37.3906, "step": 327720 }, { "epoch": 0.6620353349466905, "grad_norm": 90.54914093017578, "learning_rate": 3.200667301381468e-06, "loss": 22.2098, "step": 327730 }, { "epoch": 0.6620555355793744, "grad_norm": 589.4803466796875, "learning_rate": 3.2003416260049493e-06, "loss": 17.6694, "step": 327740 }, { "epoch": 0.6620757362120582, "grad_norm": 640.0857543945312, "learning_rate": 3.2000159593997447e-06, "loss": 24.2283, "step": 327750 }, { "epoch": 0.662095936844742, "grad_norm": 237.67442321777344, "learning_rate": 3.1996903015674434e-06, "loss": 11.5415, "step": 327760 }, { "epoch": 0.6621161374774258, "grad_norm": 412.76593017578125, "learning_rate": 3.1993646525096368e-06, "loss": 13.7273, "step": 327770 }, { "epoch": 0.6621363381101096, "grad_norm": 374.91485595703125, "learning_rate": 3.1990390122279046e-06, "loss": 13.3486, "step": 327780 }, { "epoch": 0.6621565387427935, "grad_norm": 289.9313049316406, "learning_rate": 3.198713380723839e-06, "loss": 10.005, "step": 327790 }, { "epoch": 0.6621767393754773, "grad_norm": 34.16901779174805, "learning_rate": 3.1983877579990276e-06, "loss": 20.7103, "step": 327800 }, { "epoch": 0.6621969400081611, "grad_norm": 311.5425720214844, "learning_rate": 3.198062144055054e-06, "loss": 21.6194, "step": 327810 }, { "epoch": 0.6622171406408449, "grad_norm": 463.7889099121094, "learning_rate": 3.1977365388935076e-06, "loss": 24.8787, "step": 327820 }, { "epoch": 0.6622373412735287, "grad_norm": 37.17445755004883, "learning_rate": 3.1974109425159754e-06, "loss": 9.7137, "step": 327830 }, { "epoch": 0.6622575419062126, "grad_norm": 588.4010620117188, "learning_rate": 3.1970853549240425e-06, "loss": 17.2024, "step": 327840 }, { "epoch": 0.6622777425388963, "grad_norm": 439.13299560546875, "learning_rate": 3.196759776119296e-06, "loss": 33.6498, "step": 327850 }, { "epoch": 0.6622979431715801, "grad_norm": 242.45326232910156, "learning_rate": 3.1964342061033247e-06, "loss": 13.121, "step": 327860 }, { "epoch": 0.6623181438042639, "grad_norm": 679.048583984375, "learning_rate": 3.1961086448777157e-06, "loss": 21.9139, "step": 327870 }, { "epoch": 0.6623383444369477, "grad_norm": 211.0595245361328, "learning_rate": 3.1957830924440524e-06, "loss": 13.6705, "step": 327880 }, { "epoch": 0.6623585450696315, "grad_norm": 153.071533203125, "learning_rate": 3.195457548803925e-06, "loss": 30.9075, "step": 327890 }, { "epoch": 0.6623787457023154, "grad_norm": 534.3335571289062, "learning_rate": 3.195132013958918e-06, "loss": 14.811, "step": 327900 }, { "epoch": 0.6623989463349992, "grad_norm": 635.1417846679688, "learning_rate": 3.1948064879106187e-06, "loss": 15.1571, "step": 327910 }, { "epoch": 0.662419146967683, "grad_norm": 1015.3916625976562, "learning_rate": 3.1944809706606123e-06, "loss": 35.8303, "step": 327920 }, { "epoch": 0.6624393476003668, "grad_norm": 43.98500061035156, "learning_rate": 3.1941554622104897e-06, "loss": 16.0305, "step": 327930 }, { "epoch": 0.6624595482330506, "grad_norm": 554.8844604492188, "learning_rate": 3.1938299625618313e-06, "loss": 19.7504, "step": 327940 }, { "epoch": 0.6624797488657345, "grad_norm": 184.5054168701172, "learning_rate": 3.193504471716228e-06, "loss": 8.8416, "step": 327950 }, { "epoch": 0.6624999494984183, "grad_norm": 489.889892578125, "learning_rate": 3.1931789896752654e-06, "loss": 15.2668, "step": 327960 }, { "epoch": 0.6625201501311021, "grad_norm": 953.59912109375, "learning_rate": 3.192853516440528e-06, "loss": 23.7423, "step": 327970 }, { "epoch": 0.6625403507637859, "grad_norm": 707.7154541015625, "learning_rate": 3.192528052013604e-06, "loss": 13.3632, "step": 327980 }, { "epoch": 0.6625605513964697, "grad_norm": 248.6231689453125, "learning_rate": 3.1922025963960796e-06, "loss": 21.4877, "step": 327990 }, { "epoch": 0.6625807520291536, "grad_norm": 330.9288635253906, "learning_rate": 3.1918771495895395e-06, "loss": 29.9458, "step": 328000 }, { "epoch": 0.6626009526618374, "grad_norm": 290.83905029296875, "learning_rate": 3.1915517115955704e-06, "loss": 16.653, "step": 328010 }, { "epoch": 0.6626211532945212, "grad_norm": 234.9760284423828, "learning_rate": 3.1912262824157592e-06, "loss": 13.6941, "step": 328020 }, { "epoch": 0.662641353927205, "grad_norm": 183.3705291748047, "learning_rate": 3.1909008620516933e-06, "loss": 15.5814, "step": 328030 }, { "epoch": 0.6626615545598888, "grad_norm": 536.9821166992188, "learning_rate": 3.190575450504956e-06, "loss": 10.8537, "step": 328040 }, { "epoch": 0.6626817551925727, "grad_norm": 404.4079895019531, "learning_rate": 3.190250047777134e-06, "loss": 28.2419, "step": 328050 }, { "epoch": 0.6627019558252565, "grad_norm": 256.31536865234375, "learning_rate": 3.1899246538698157e-06, "loss": 12.2106, "step": 328060 }, { "epoch": 0.6627221564579403, "grad_norm": 326.7830505371094, "learning_rate": 3.1895992687845836e-06, "loss": 11.1497, "step": 328070 }, { "epoch": 0.6627423570906241, "grad_norm": 775.0346069335938, "learning_rate": 3.1892738925230236e-06, "loss": 17.4392, "step": 328080 }, { "epoch": 0.6627625577233079, "grad_norm": 111.34048461914062, "learning_rate": 3.188948525086727e-06, "loss": 21.5712, "step": 328090 }, { "epoch": 0.6627827583559918, "grad_norm": 248.71444702148438, "learning_rate": 3.188623166477272e-06, "loss": 17.674, "step": 328100 }, { "epoch": 0.6628029589886755, "grad_norm": 463.4655456542969, "learning_rate": 3.188297816696249e-06, "loss": 26.3609, "step": 328110 }, { "epoch": 0.6628231596213593, "grad_norm": 56.81084060668945, "learning_rate": 3.187972475745244e-06, "loss": 15.9719, "step": 328120 }, { "epoch": 0.6628433602540431, "grad_norm": 163.7423095703125, "learning_rate": 3.1876471436258407e-06, "loss": 20.6289, "step": 328130 }, { "epoch": 0.6628635608867269, "grad_norm": 1115.420654296875, "learning_rate": 3.1873218203396246e-06, "loss": 27.2812, "step": 328140 }, { "epoch": 0.6628837615194108, "grad_norm": 171.6507568359375, "learning_rate": 3.1869965058881836e-06, "loss": 21.7811, "step": 328150 }, { "epoch": 0.6629039621520946, "grad_norm": 386.964599609375, "learning_rate": 3.1866712002731004e-06, "loss": 18.4272, "step": 328160 }, { "epoch": 0.6629241627847784, "grad_norm": 267.7464904785156, "learning_rate": 3.186345903495961e-06, "loss": 11.4173, "step": 328170 }, { "epoch": 0.6629443634174622, "grad_norm": 207.80772399902344, "learning_rate": 3.1860206155583527e-06, "loss": 33.6552, "step": 328180 }, { "epoch": 0.662964564050146, "grad_norm": 219.84579467773438, "learning_rate": 3.185695336461861e-06, "loss": 10.6895, "step": 328190 }, { "epoch": 0.6629847646828299, "grad_norm": 350.85308837890625, "learning_rate": 3.185370066208069e-06, "loss": 6.2564, "step": 328200 }, { "epoch": 0.6630049653155137, "grad_norm": 77.57797241210938, "learning_rate": 3.185044804798564e-06, "loss": 15.2196, "step": 328210 }, { "epoch": 0.6630251659481975, "grad_norm": 708.864501953125, "learning_rate": 3.1847195522349305e-06, "loss": 27.5036, "step": 328220 }, { "epoch": 0.6630453665808813, "grad_norm": 9.681346893310547, "learning_rate": 3.1843943085187527e-06, "loss": 16.3971, "step": 328230 }, { "epoch": 0.6630655672135651, "grad_norm": 297.8474426269531, "learning_rate": 3.1840690736516166e-06, "loss": 9.6413, "step": 328240 }, { "epoch": 0.663085767846249, "grad_norm": 424.9536437988281, "learning_rate": 3.183743847635109e-06, "loss": 17.2958, "step": 328250 }, { "epoch": 0.6631059684789328, "grad_norm": 70.27574920654297, "learning_rate": 3.1834186304708126e-06, "loss": 17.2863, "step": 328260 }, { "epoch": 0.6631261691116166, "grad_norm": 121.83522033691406, "learning_rate": 3.183093422160314e-06, "loss": 14.4273, "step": 328270 }, { "epoch": 0.6631463697443004, "grad_norm": 342.9933776855469, "learning_rate": 3.182768222705198e-06, "loss": 12.4681, "step": 328280 }, { "epoch": 0.6631665703769842, "grad_norm": 213.04074096679688, "learning_rate": 3.182443032107049e-06, "loss": 19.0667, "step": 328290 }, { "epoch": 0.663186771009668, "grad_norm": 223.21646118164062, "learning_rate": 3.1821178503674515e-06, "loss": 11.2374, "step": 328300 }, { "epoch": 0.6632069716423519, "grad_norm": 373.4936218261719, "learning_rate": 3.1817926774879903e-06, "loss": 28.1364, "step": 328310 }, { "epoch": 0.6632271722750357, "grad_norm": 426.1025695800781, "learning_rate": 3.1814675134702534e-06, "loss": 19.7539, "step": 328320 }, { "epoch": 0.6632473729077195, "grad_norm": 459.76727294921875, "learning_rate": 3.181142358315822e-06, "loss": 20.8025, "step": 328330 }, { "epoch": 0.6632675735404033, "grad_norm": 893.7747192382812, "learning_rate": 3.1808172120262824e-06, "loss": 31.0751, "step": 328340 }, { "epoch": 0.6632877741730872, "grad_norm": 176.30690002441406, "learning_rate": 3.1804920746032197e-06, "loss": 23.8882, "step": 328350 }, { "epoch": 0.6633079748057709, "grad_norm": 355.8739929199219, "learning_rate": 3.1801669460482176e-06, "loss": 33.8059, "step": 328360 }, { "epoch": 0.6633281754384547, "grad_norm": 730.5296630859375, "learning_rate": 3.1798418263628595e-06, "loss": 22.4636, "step": 328370 }, { "epoch": 0.6633483760711385, "grad_norm": 305.6308288574219, "learning_rate": 3.179516715548735e-06, "loss": 33.3801, "step": 328380 }, { "epoch": 0.6633685767038223, "grad_norm": 306.0620422363281, "learning_rate": 3.179191613607422e-06, "loss": 10.5533, "step": 328390 }, { "epoch": 0.6633887773365061, "grad_norm": 284.91986083984375, "learning_rate": 3.178866520540509e-06, "loss": 24.745, "step": 328400 }, { "epoch": 0.66340897796919, "grad_norm": 65.59046173095703, "learning_rate": 3.1785414363495808e-06, "loss": 17.17, "step": 328410 }, { "epoch": 0.6634291786018738, "grad_norm": 260.3446350097656, "learning_rate": 3.17821636103622e-06, "loss": 16.0564, "step": 328420 }, { "epoch": 0.6634493792345576, "grad_norm": 270.76068115234375, "learning_rate": 3.1778912946020114e-06, "loss": 33.7841, "step": 328430 }, { "epoch": 0.6634695798672414, "grad_norm": 580.8342895507812, "learning_rate": 3.1775662370485406e-06, "loss": 22.612, "step": 328440 }, { "epoch": 0.6634897804999252, "grad_norm": 399.6605529785156, "learning_rate": 3.17724118837739e-06, "loss": 19.6052, "step": 328450 }, { "epoch": 0.6635099811326091, "grad_norm": 189.4546661376953, "learning_rate": 3.1769161485901445e-06, "loss": 13.8748, "step": 328460 }, { "epoch": 0.6635301817652929, "grad_norm": 482.87615966796875, "learning_rate": 3.176591117688389e-06, "loss": 14.321, "step": 328470 }, { "epoch": 0.6635503823979767, "grad_norm": 473.2443542480469, "learning_rate": 3.176266095673708e-06, "loss": 10.8053, "step": 328480 }, { "epoch": 0.6635705830306605, "grad_norm": 369.6527404785156, "learning_rate": 3.175941082547684e-06, "loss": 27.7805, "step": 328490 }, { "epoch": 0.6635907836633443, "grad_norm": 400.4463806152344, "learning_rate": 3.1756160783119015e-06, "loss": 20.2652, "step": 328500 }, { "epoch": 0.6636109842960282, "grad_norm": 126.27311706542969, "learning_rate": 3.175291082967947e-06, "loss": 28.937, "step": 328510 }, { "epoch": 0.663631184928712, "grad_norm": 10.93095874786377, "learning_rate": 3.1749660965174007e-06, "loss": 15.2746, "step": 328520 }, { "epoch": 0.6636513855613958, "grad_norm": 949.2610473632812, "learning_rate": 3.1746411189618478e-06, "loss": 15.0331, "step": 328530 }, { "epoch": 0.6636715861940796, "grad_norm": 179.00579833984375, "learning_rate": 3.174316150302875e-06, "loss": 25.1306, "step": 328540 }, { "epoch": 0.6636917868267634, "grad_norm": 293.65496826171875, "learning_rate": 3.1739911905420617e-06, "loss": 25.5459, "step": 328550 }, { "epoch": 0.6637119874594473, "grad_norm": 533.2905883789062, "learning_rate": 3.1736662396809936e-06, "loss": 15.8173, "step": 328560 }, { "epoch": 0.6637321880921311, "grad_norm": 880.5447387695312, "learning_rate": 3.173341297721257e-06, "loss": 12.0572, "step": 328570 }, { "epoch": 0.6637523887248149, "grad_norm": 255.260009765625, "learning_rate": 3.1730163646644317e-06, "loss": 19.1106, "step": 328580 }, { "epoch": 0.6637725893574987, "grad_norm": 169.76583862304688, "learning_rate": 3.1726914405121034e-06, "loss": 7.0164, "step": 328590 }, { "epoch": 0.6637927899901825, "grad_norm": 329.3076477050781, "learning_rate": 3.1723665252658564e-06, "loss": 12.133, "step": 328600 }, { "epoch": 0.6638129906228664, "grad_norm": 1094.4727783203125, "learning_rate": 3.172041618927272e-06, "loss": 24.7534, "step": 328610 }, { "epoch": 0.6638331912555501, "grad_norm": 408.4696044921875, "learning_rate": 3.171716721497934e-06, "loss": 23.1637, "step": 328620 }, { "epoch": 0.6638533918882339, "grad_norm": 706.7051391601562, "learning_rate": 3.171391832979428e-06, "loss": 20.8366, "step": 328630 }, { "epoch": 0.6638735925209177, "grad_norm": 414.6630554199219, "learning_rate": 3.171066953373338e-06, "loss": 16.9479, "step": 328640 }, { "epoch": 0.6638937931536015, "grad_norm": 1162.2161865234375, "learning_rate": 3.170742082681244e-06, "loss": 21.0609, "step": 328650 }, { "epoch": 0.6639139937862854, "grad_norm": 615.6329956054688, "learning_rate": 3.1704172209047324e-06, "loss": 10.0619, "step": 328660 }, { "epoch": 0.6639341944189692, "grad_norm": 697.70703125, "learning_rate": 3.1700923680453855e-06, "loss": 19.9357, "step": 328670 }, { "epoch": 0.663954395051653, "grad_norm": 610.2066040039062, "learning_rate": 3.1697675241047852e-06, "loss": 17.9655, "step": 328680 }, { "epoch": 0.6639745956843368, "grad_norm": 363.06414794921875, "learning_rate": 3.1694426890845155e-06, "loss": 15.7865, "step": 328690 }, { "epoch": 0.6639947963170206, "grad_norm": 415.978759765625, "learning_rate": 3.169117862986163e-06, "loss": 25.984, "step": 328700 }, { "epoch": 0.6640149969497045, "grad_norm": 528.6858520507812, "learning_rate": 3.168793045811305e-06, "loss": 19.4374, "step": 328710 }, { "epoch": 0.6640351975823883, "grad_norm": 336.0189514160156, "learning_rate": 3.1684682375615283e-06, "loss": 12.8563, "step": 328720 }, { "epoch": 0.6640553982150721, "grad_norm": 494.2802734375, "learning_rate": 3.168143438238417e-06, "loss": 29.0306, "step": 328730 }, { "epoch": 0.6640755988477559, "grad_norm": 487.3626403808594, "learning_rate": 3.1678186478435508e-06, "loss": 13.4801, "step": 328740 }, { "epoch": 0.6640957994804397, "grad_norm": 750.1293334960938, "learning_rate": 3.167493866378514e-06, "loss": 11.5802, "step": 328750 }, { "epoch": 0.6641160001131236, "grad_norm": 341.471435546875, "learning_rate": 3.1671690938448895e-06, "loss": 15.909, "step": 328760 }, { "epoch": 0.6641362007458074, "grad_norm": 532.2777099609375, "learning_rate": 3.166844330244263e-06, "loss": 24.0081, "step": 328770 }, { "epoch": 0.6641564013784912, "grad_norm": 184.27723693847656, "learning_rate": 3.166519575578213e-06, "loss": 15.1475, "step": 328780 }, { "epoch": 0.664176602011175, "grad_norm": 480.2397766113281, "learning_rate": 3.1661948298483243e-06, "loss": 12.8562, "step": 328790 }, { "epoch": 0.6641968026438588, "grad_norm": 394.6187438964844, "learning_rate": 3.16587009305618e-06, "loss": 20.0084, "step": 328800 }, { "epoch": 0.6642170032765427, "grad_norm": 279.446533203125, "learning_rate": 3.165545365203363e-06, "loss": 12.7865, "step": 328810 }, { "epoch": 0.6642372039092265, "grad_norm": 415.18682861328125, "learning_rate": 3.1652206462914542e-06, "loss": 21.5043, "step": 328820 }, { "epoch": 0.6642574045419103, "grad_norm": 271.2012634277344, "learning_rate": 3.164895936322039e-06, "loss": 19.6453, "step": 328830 }, { "epoch": 0.6642776051745941, "grad_norm": 502.14935302734375, "learning_rate": 3.1645712352966967e-06, "loss": 21.6256, "step": 328840 }, { "epoch": 0.6642978058072779, "grad_norm": 263.9889831542969, "learning_rate": 3.164246543217011e-06, "loss": 20.0408, "step": 328850 }, { "epoch": 0.6643180064399618, "grad_norm": 60.78538131713867, "learning_rate": 3.1639218600845673e-06, "loss": 5.9968, "step": 328860 }, { "epoch": 0.6643382070726456, "grad_norm": 582.1119384765625, "learning_rate": 3.1635971859009444e-06, "loss": 13.539, "step": 328870 }, { "epoch": 0.6643584077053293, "grad_norm": 556.9094848632812, "learning_rate": 3.1632725206677264e-06, "loss": 12.4881, "step": 328880 }, { "epoch": 0.6643786083380131, "grad_norm": 218.09471130371094, "learning_rate": 3.1629478643864963e-06, "loss": 15.9212, "step": 328890 }, { "epoch": 0.6643988089706969, "grad_norm": 414.8309020996094, "learning_rate": 3.1626232170588343e-06, "loss": 22.3698, "step": 328900 }, { "epoch": 0.6644190096033807, "grad_norm": 553.9437255859375, "learning_rate": 3.1622985786863236e-06, "loss": 29.8867, "step": 328910 }, { "epoch": 0.6644392102360646, "grad_norm": 58.52224349975586, "learning_rate": 3.1619739492705464e-06, "loss": 9.5943, "step": 328920 }, { "epoch": 0.6644594108687484, "grad_norm": 156.68765258789062, "learning_rate": 3.1616493288130866e-06, "loss": 9.0621, "step": 328930 }, { "epoch": 0.6644796115014322, "grad_norm": 456.7655944824219, "learning_rate": 3.1613247173155247e-06, "loss": 17.5479, "step": 328940 }, { "epoch": 0.664499812134116, "grad_norm": 321.54046630859375, "learning_rate": 3.161000114779443e-06, "loss": 12.8307, "step": 328950 }, { "epoch": 0.6645200127667998, "grad_norm": 551.7155151367188, "learning_rate": 3.1606755212064246e-06, "loss": 16.9916, "step": 328960 }, { "epoch": 0.6645402133994837, "grad_norm": 280.09429931640625, "learning_rate": 3.1603509365980495e-06, "loss": 8.5942, "step": 328970 }, { "epoch": 0.6645604140321675, "grad_norm": 338.11456298828125, "learning_rate": 3.1600263609559005e-06, "loss": 10.2786, "step": 328980 }, { "epoch": 0.6645806146648513, "grad_norm": 212.71983337402344, "learning_rate": 3.159701794281561e-06, "loss": 14.1564, "step": 328990 }, { "epoch": 0.6646008152975351, "grad_norm": 646.0335693359375, "learning_rate": 3.1593772365766107e-06, "loss": 14.0678, "step": 329000 }, { "epoch": 0.6646210159302189, "grad_norm": 187.28985595703125, "learning_rate": 3.1590526878426326e-06, "loss": 24.6466, "step": 329010 }, { "epoch": 0.6646412165629028, "grad_norm": 370.43438720703125, "learning_rate": 3.1587281480812093e-06, "loss": 15.3338, "step": 329020 }, { "epoch": 0.6646614171955866, "grad_norm": 360.96368408203125, "learning_rate": 3.1584036172939213e-06, "loss": 29.5299, "step": 329030 }, { "epoch": 0.6646816178282704, "grad_norm": 200.5292205810547, "learning_rate": 3.1580790954823505e-06, "loss": 9.6293, "step": 329040 }, { "epoch": 0.6647018184609542, "grad_norm": 532.2564697265625, "learning_rate": 3.157754582648079e-06, "loss": 21.1889, "step": 329050 }, { "epoch": 0.664722019093638, "grad_norm": 564.5127563476562, "learning_rate": 3.1574300787926883e-06, "loss": 14.2916, "step": 329060 }, { "epoch": 0.6647422197263219, "grad_norm": 379.1608581542969, "learning_rate": 3.1571055839177583e-06, "loss": 25.0758, "step": 329070 }, { "epoch": 0.6647624203590057, "grad_norm": 337.03515625, "learning_rate": 3.156781098024874e-06, "loss": 14.498, "step": 329080 }, { "epoch": 0.6647826209916895, "grad_norm": 65.75376892089844, "learning_rate": 3.156456621115615e-06, "loss": 15.2741, "step": 329090 }, { "epoch": 0.6648028216243733, "grad_norm": 137.52867126464844, "learning_rate": 3.1561321531915622e-06, "loss": 17.7733, "step": 329100 }, { "epoch": 0.6648230222570571, "grad_norm": 20.54436492919922, "learning_rate": 3.155807694254298e-06, "loss": 22.4343, "step": 329110 }, { "epoch": 0.664843222889741, "grad_norm": 733.7474365234375, "learning_rate": 3.155483244305404e-06, "loss": 21.4831, "step": 329120 }, { "epoch": 0.6648634235224247, "grad_norm": 144.43870544433594, "learning_rate": 3.15515880334646e-06, "loss": 25.4423, "step": 329130 }, { "epoch": 0.6648836241551085, "grad_norm": 480.5118713378906, "learning_rate": 3.1548343713790474e-06, "loss": 20.144, "step": 329140 }, { "epoch": 0.6649038247877923, "grad_norm": 49.870323181152344, "learning_rate": 3.1545099484047514e-06, "loss": 7.7324, "step": 329150 }, { "epoch": 0.6649240254204761, "grad_norm": 91.82637023925781, "learning_rate": 3.154185534425147e-06, "loss": 19.8717, "step": 329160 }, { "epoch": 0.66494422605316, "grad_norm": 1162.68798828125, "learning_rate": 3.153861129441819e-06, "loss": 24.6791, "step": 329170 }, { "epoch": 0.6649644266858438, "grad_norm": 262.5423889160156, "learning_rate": 3.1535367334563493e-06, "loss": 11.2788, "step": 329180 }, { "epoch": 0.6649846273185276, "grad_norm": 433.8036193847656, "learning_rate": 3.153212346470317e-06, "loss": 18.7376, "step": 329190 }, { "epoch": 0.6650048279512114, "grad_norm": 267.2512512207031, "learning_rate": 3.152887968485303e-06, "loss": 15.4686, "step": 329200 }, { "epoch": 0.6650250285838952, "grad_norm": 940.6142578125, "learning_rate": 3.1525635995028884e-06, "loss": 16.464, "step": 329210 }, { "epoch": 0.665045229216579, "grad_norm": 304.4906005859375, "learning_rate": 3.1522392395246584e-06, "loss": 13.4658, "step": 329220 }, { "epoch": 0.6650654298492629, "grad_norm": 397.871337890625, "learning_rate": 3.151914888552186e-06, "loss": 17.0634, "step": 329230 }, { "epoch": 0.6650856304819467, "grad_norm": 334.5021667480469, "learning_rate": 3.1515905465870576e-06, "loss": 18.8055, "step": 329240 }, { "epoch": 0.6651058311146305, "grad_norm": 124.223876953125, "learning_rate": 3.151266213630854e-06, "loss": 19.8596, "step": 329250 }, { "epoch": 0.6651260317473143, "grad_norm": 611.5267333984375, "learning_rate": 3.150941889685154e-06, "loss": 24.7954, "step": 329260 }, { "epoch": 0.6651462323799981, "grad_norm": 458.2458801269531, "learning_rate": 3.1506175747515384e-06, "loss": 18.2301, "step": 329270 }, { "epoch": 0.665166433012682, "grad_norm": 694.9378662109375, "learning_rate": 3.1502932688315897e-06, "loss": 22.8646, "step": 329280 }, { "epoch": 0.6651866336453658, "grad_norm": 414.5040283203125, "learning_rate": 3.1499689719268854e-06, "loss": 20.0289, "step": 329290 }, { "epoch": 0.6652068342780496, "grad_norm": 443.5479736328125, "learning_rate": 3.149644684039008e-06, "loss": 22.4756, "step": 329300 }, { "epoch": 0.6652270349107334, "grad_norm": 325.9361877441406, "learning_rate": 3.1493204051695407e-06, "loss": 12.9403, "step": 329310 }, { "epoch": 0.6652472355434172, "grad_norm": 743.5194091796875, "learning_rate": 3.148996135320058e-06, "loss": 34.9779, "step": 329320 }, { "epoch": 0.6652674361761011, "grad_norm": 560.8590698242188, "learning_rate": 3.148671874492145e-06, "loss": 26.4579, "step": 329330 }, { "epoch": 0.6652876368087849, "grad_norm": 336.4598693847656, "learning_rate": 3.1483476226873822e-06, "loss": 15.5427, "step": 329340 }, { "epoch": 0.6653078374414687, "grad_norm": 311.89337158203125, "learning_rate": 3.1480233799073467e-06, "loss": 17.8434, "step": 329350 }, { "epoch": 0.6653280380741525, "grad_norm": 153.28713989257812, "learning_rate": 3.147699146153621e-06, "loss": 11.3492, "step": 329360 }, { "epoch": 0.6653482387068363, "grad_norm": 156.05831909179688, "learning_rate": 3.147374921427784e-06, "loss": 10.0524, "step": 329370 }, { "epoch": 0.6653684393395202, "grad_norm": 197.22079467773438, "learning_rate": 3.14705070573142e-06, "loss": 11.2639, "step": 329380 }, { "epoch": 0.6653886399722039, "grad_norm": 464.6989440917969, "learning_rate": 3.146726499066103e-06, "loss": 20.3292, "step": 329390 }, { "epoch": 0.6654088406048877, "grad_norm": 846.1508178710938, "learning_rate": 3.1464023014334164e-06, "loss": 19.1258, "step": 329400 }, { "epoch": 0.6654290412375715, "grad_norm": 165.53643798828125, "learning_rate": 3.146078112834943e-06, "loss": 18.3069, "step": 329410 }, { "epoch": 0.6654492418702553, "grad_norm": 402.226806640625, "learning_rate": 3.1457539332722577e-06, "loss": 21.9273, "step": 329420 }, { "epoch": 0.6654694425029392, "grad_norm": 496.2322692871094, "learning_rate": 3.145429762746943e-06, "loss": 24.0402, "step": 329430 }, { "epoch": 0.665489643135623, "grad_norm": 171.00973510742188, "learning_rate": 3.1451056012605796e-06, "loss": 11.35, "step": 329440 }, { "epoch": 0.6655098437683068, "grad_norm": 595.9259643554688, "learning_rate": 3.144781448814746e-06, "loss": 22.6688, "step": 329450 }, { "epoch": 0.6655300444009906, "grad_norm": 568.1805419921875, "learning_rate": 3.1444573054110216e-06, "loss": 23.0682, "step": 329460 }, { "epoch": 0.6655502450336744, "grad_norm": 398.0702819824219, "learning_rate": 3.14413317105099e-06, "loss": 36.9149, "step": 329470 }, { "epoch": 0.6655704456663583, "grad_norm": 359.6332702636719, "learning_rate": 3.1438090457362253e-06, "loss": 22.6934, "step": 329480 }, { "epoch": 0.6655906462990421, "grad_norm": 732.9029541015625, "learning_rate": 3.1434849294683113e-06, "loss": 15.2932, "step": 329490 }, { "epoch": 0.6656108469317259, "grad_norm": 238.02476501464844, "learning_rate": 3.1431608222488276e-06, "loss": 19.7181, "step": 329500 }, { "epoch": 0.6656310475644097, "grad_norm": 98.36227416992188, "learning_rate": 3.1428367240793513e-06, "loss": 20.1967, "step": 329510 }, { "epoch": 0.6656512481970935, "grad_norm": 402.2587585449219, "learning_rate": 3.1425126349614636e-06, "loss": 18.2152, "step": 329520 }, { "epoch": 0.6656714488297774, "grad_norm": 712.5654296875, "learning_rate": 3.1421885548967436e-06, "loss": 22.9547, "step": 329530 }, { "epoch": 0.6656916494624612, "grad_norm": 271.1004943847656, "learning_rate": 3.141864483886774e-06, "loss": 12.3942, "step": 329540 }, { "epoch": 0.665711850095145, "grad_norm": 75.97625732421875, "learning_rate": 3.1415404219331287e-06, "loss": 19.4859, "step": 329550 }, { "epoch": 0.6657320507278288, "grad_norm": 0.4789038896560669, "learning_rate": 3.141216369037391e-06, "loss": 12.5291, "step": 329560 }, { "epoch": 0.6657522513605126, "grad_norm": 166.28794860839844, "learning_rate": 3.14089232520114e-06, "loss": 15.3067, "step": 329570 }, { "epoch": 0.6657724519931965, "grad_norm": 132.44432067871094, "learning_rate": 3.1405682904259534e-06, "loss": 27.2135, "step": 329580 }, { "epoch": 0.6657926526258803, "grad_norm": 301.5104675292969, "learning_rate": 3.1402442647134115e-06, "loss": 21.5171, "step": 329590 }, { "epoch": 0.6658128532585641, "grad_norm": 234.42132568359375, "learning_rate": 3.139920248065095e-06, "loss": 21.408, "step": 329600 }, { "epoch": 0.6658330538912479, "grad_norm": 349.06353759765625, "learning_rate": 3.1395962404825787e-06, "loss": 20.4292, "step": 329610 }, { "epoch": 0.6658532545239317, "grad_norm": 97.02141571044922, "learning_rate": 3.139272241967446e-06, "loss": 11.4214, "step": 329620 }, { "epoch": 0.6658734551566156, "grad_norm": 326.3496398925781, "learning_rate": 3.1389482525212753e-06, "loss": 16.2651, "step": 329630 }, { "epoch": 0.6658936557892993, "grad_norm": 231.70472717285156, "learning_rate": 3.138624272145645e-06, "loss": 14.6593, "step": 329640 }, { "epoch": 0.6659138564219831, "grad_norm": 5.015622615814209, "learning_rate": 3.1383003008421336e-06, "loss": 25.0485, "step": 329650 }, { "epoch": 0.6659340570546669, "grad_norm": 598.1875, "learning_rate": 3.137976338612322e-06, "loss": 11.656, "step": 329660 }, { "epoch": 0.6659542576873507, "grad_norm": 453.4358215332031, "learning_rate": 3.1376523854577866e-06, "loss": 18.5764, "step": 329670 }, { "epoch": 0.6659744583200345, "grad_norm": 262.5253601074219, "learning_rate": 3.1373284413801075e-06, "loss": 22.8158, "step": 329680 }, { "epoch": 0.6659946589527184, "grad_norm": 299.25579833984375, "learning_rate": 3.137004506380864e-06, "loss": 15.7987, "step": 329690 }, { "epoch": 0.6660148595854022, "grad_norm": 766.4141235351562, "learning_rate": 3.1366805804616353e-06, "loss": 29.9571, "step": 329700 }, { "epoch": 0.666035060218086, "grad_norm": 148.3333740234375, "learning_rate": 3.1363566636239983e-06, "loss": 32.8396, "step": 329710 }, { "epoch": 0.6660552608507698, "grad_norm": 270.7835998535156, "learning_rate": 3.1360327558695336e-06, "loss": 10.3951, "step": 329720 }, { "epoch": 0.6660754614834536, "grad_norm": 445.0180969238281, "learning_rate": 3.1357088571998203e-06, "loss": 12.224, "step": 329730 }, { "epoch": 0.6660956621161375, "grad_norm": 342.1134948730469, "learning_rate": 3.1353849676164344e-06, "loss": 17.4202, "step": 329740 }, { "epoch": 0.6661158627488213, "grad_norm": 517.21435546875, "learning_rate": 3.1350610871209553e-06, "loss": 26.8176, "step": 329750 }, { "epoch": 0.6661360633815051, "grad_norm": 182.5183563232422, "learning_rate": 3.1347372157149647e-06, "loss": 15.5527, "step": 329760 }, { "epoch": 0.6661562640141889, "grad_norm": 373.9974365234375, "learning_rate": 3.1344133534000364e-06, "loss": 16.5057, "step": 329770 }, { "epoch": 0.6661764646468727, "grad_norm": 532.1063842773438, "learning_rate": 3.1340895001777518e-06, "loss": 21.1695, "step": 329780 }, { "epoch": 0.6661966652795566, "grad_norm": 397.7397155761719, "learning_rate": 3.13376565604969e-06, "loss": 33.9767, "step": 329790 }, { "epoch": 0.6662168659122404, "grad_norm": 538.232421875, "learning_rate": 3.1334418210174268e-06, "loss": 19.266, "step": 329800 }, { "epoch": 0.6662370665449242, "grad_norm": 487.9904479980469, "learning_rate": 3.1331179950825415e-06, "loss": 15.1283, "step": 329810 }, { "epoch": 0.666257267177608, "grad_norm": 740.597412109375, "learning_rate": 3.132794178246612e-06, "loss": 23.8183, "step": 329820 }, { "epoch": 0.6662774678102918, "grad_norm": 196.72503662109375, "learning_rate": 3.1324703705112203e-06, "loss": 14.3614, "step": 329830 }, { "epoch": 0.6662976684429757, "grad_norm": 298.3857116699219, "learning_rate": 3.1321465718779375e-06, "loss": 16.3721, "step": 329840 }, { "epoch": 0.6663178690756595, "grad_norm": 271.487548828125, "learning_rate": 3.1318227823483483e-06, "loss": 34.8018, "step": 329850 }, { "epoch": 0.6663380697083433, "grad_norm": 263.2033996582031, "learning_rate": 3.1314990019240283e-06, "loss": 12.5965, "step": 329860 }, { "epoch": 0.6663582703410271, "grad_norm": 636.3667602539062, "learning_rate": 3.1311752306065547e-06, "loss": 14.2167, "step": 329870 }, { "epoch": 0.666378470973711, "grad_norm": 199.5261993408203, "learning_rate": 3.1308514683975067e-06, "loss": 28.0261, "step": 329880 }, { "epoch": 0.6663986716063948, "grad_norm": 336.9678649902344, "learning_rate": 3.1305277152984624e-06, "loss": 19.1767, "step": 329890 }, { "epoch": 0.6664188722390785, "grad_norm": 571.2289428710938, "learning_rate": 3.130203971310999e-06, "loss": 32.7854, "step": 329900 }, { "epoch": 0.6664390728717623, "grad_norm": 508.0943603515625, "learning_rate": 3.129880236436693e-06, "loss": 19.3444, "step": 329910 }, { "epoch": 0.6664592735044461, "grad_norm": 184.9727325439453, "learning_rate": 3.1295565106771275e-06, "loss": 17.249, "step": 329920 }, { "epoch": 0.6664794741371299, "grad_norm": 423.1129150390625, "learning_rate": 3.129232794033873e-06, "loss": 17.9122, "step": 329930 }, { "epoch": 0.6664996747698138, "grad_norm": 498.97930908203125, "learning_rate": 3.1289090865085124e-06, "loss": 16.3645, "step": 329940 }, { "epoch": 0.6665198754024976, "grad_norm": 365.0946044921875, "learning_rate": 3.128585388102623e-06, "loss": 21.9605, "step": 329950 }, { "epoch": 0.6665400760351814, "grad_norm": 587.8406982421875, "learning_rate": 3.1282616988177806e-06, "loss": 35.0797, "step": 329960 }, { "epoch": 0.6665602766678652, "grad_norm": 378.7322082519531, "learning_rate": 3.127938018655563e-06, "loss": 24.5784, "step": 329970 }, { "epoch": 0.666580477300549, "grad_norm": 382.29925537109375, "learning_rate": 3.127614347617548e-06, "loss": 9.5781, "step": 329980 }, { "epoch": 0.6666006779332329, "grad_norm": 717.412109375, "learning_rate": 3.1272906857053164e-06, "loss": 22.2062, "step": 329990 }, { "epoch": 0.6666208785659167, "grad_norm": 229.9202423095703, "learning_rate": 3.12696703292044e-06, "loss": 17.7849, "step": 330000 }, { "epoch": 0.6666410791986005, "grad_norm": 2.28291916847229, "learning_rate": 3.1266433892645e-06, "loss": 12.6165, "step": 330010 }, { "epoch": 0.6666612798312843, "grad_norm": 1385.194091796875, "learning_rate": 3.126319754739074e-06, "loss": 33.7563, "step": 330020 }, { "epoch": 0.6666814804639681, "grad_norm": 512.7279663085938, "learning_rate": 3.1259961293457373e-06, "loss": 22.3427, "step": 330030 }, { "epoch": 0.666701681096652, "grad_norm": 364.13262939453125, "learning_rate": 3.1256725130860676e-06, "loss": 15.5721, "step": 330040 }, { "epoch": 0.6667218817293358, "grad_norm": 336.09173583984375, "learning_rate": 3.1253489059616448e-06, "loss": 12.7987, "step": 330050 }, { "epoch": 0.6667420823620196, "grad_norm": 549.6753540039062, "learning_rate": 3.1250253079740423e-06, "loss": 13.6582, "step": 330060 }, { "epoch": 0.6667622829947034, "grad_norm": 97.47869110107422, "learning_rate": 3.124701719124838e-06, "loss": 19.1391, "step": 330070 }, { "epoch": 0.6667824836273872, "grad_norm": 358.6037292480469, "learning_rate": 3.1243781394156138e-06, "loss": 14.0605, "step": 330080 }, { "epoch": 0.666802684260071, "grad_norm": 398.0151062011719, "learning_rate": 3.1240545688479395e-06, "loss": 22.144, "step": 330090 }, { "epoch": 0.6668228848927549, "grad_norm": 106.11761474609375, "learning_rate": 3.1237310074233964e-06, "loss": 15.5378, "step": 330100 }, { "epoch": 0.6668430855254387, "grad_norm": 579.0884399414062, "learning_rate": 3.1234074551435624e-06, "loss": 16.5559, "step": 330110 }, { "epoch": 0.6668632861581225, "grad_norm": 381.1731262207031, "learning_rate": 3.1230839120100114e-06, "loss": 31.6331, "step": 330120 }, { "epoch": 0.6668834867908063, "grad_norm": 507.9053039550781, "learning_rate": 3.122760378024322e-06, "loss": 17.4301, "step": 330130 }, { "epoch": 0.6669036874234902, "grad_norm": 492.3564147949219, "learning_rate": 3.12243685318807e-06, "loss": 16.9787, "step": 330140 }, { "epoch": 0.666923888056174, "grad_norm": 503.4457092285156, "learning_rate": 3.1221133375028357e-06, "loss": 17.3873, "step": 330150 }, { "epoch": 0.6669440886888577, "grad_norm": 192.4401397705078, "learning_rate": 3.1217898309701903e-06, "loss": 40.3832, "step": 330160 }, { "epoch": 0.6669642893215415, "grad_norm": 1312.8914794921875, "learning_rate": 3.121466333591715e-06, "loss": 20.2297, "step": 330170 }, { "epoch": 0.6669844899542253, "grad_norm": 299.81024169921875, "learning_rate": 3.1211428453689853e-06, "loss": 20.149, "step": 330180 }, { "epoch": 0.6670046905869091, "grad_norm": 115.20481872558594, "learning_rate": 3.120819366303577e-06, "loss": 32.3739, "step": 330190 }, { "epoch": 0.667024891219593, "grad_norm": 260.2235412597656, "learning_rate": 3.1204958963970666e-06, "loss": 17.1079, "step": 330200 }, { "epoch": 0.6670450918522768, "grad_norm": 383.4471740722656, "learning_rate": 3.1201724356510328e-06, "loss": 18.2828, "step": 330210 }, { "epoch": 0.6670652924849606, "grad_norm": 612.00830078125, "learning_rate": 3.1198489840670494e-06, "loss": 17.7996, "step": 330220 }, { "epoch": 0.6670854931176444, "grad_norm": 296.54345703125, "learning_rate": 3.119525541646693e-06, "loss": 9.9124, "step": 330230 }, { "epoch": 0.6671056937503282, "grad_norm": 484.1433410644531, "learning_rate": 3.1192021083915424e-06, "loss": 19.5186, "step": 330240 }, { "epoch": 0.6671258943830121, "grad_norm": 278.9736633300781, "learning_rate": 3.1188786843031727e-06, "loss": 23.9681, "step": 330250 }, { "epoch": 0.6671460950156959, "grad_norm": 433.03204345703125, "learning_rate": 3.1185552693831595e-06, "loss": 20.3783, "step": 330260 }, { "epoch": 0.6671662956483797, "grad_norm": 453.2152099609375, "learning_rate": 3.1182318636330798e-06, "loss": 18.2361, "step": 330270 }, { "epoch": 0.6671864962810635, "grad_norm": 39.3110466003418, "learning_rate": 3.1179084670545114e-06, "loss": 12.2447, "step": 330280 }, { "epoch": 0.6672066969137473, "grad_norm": 106.91869354248047, "learning_rate": 3.117585079649026e-06, "loss": 17.7015, "step": 330290 }, { "epoch": 0.6672268975464312, "grad_norm": 397.95843505859375, "learning_rate": 3.117261701418204e-06, "loss": 23.5151, "step": 330300 }, { "epoch": 0.667247098179115, "grad_norm": 127.22858428955078, "learning_rate": 3.1169383323636205e-06, "loss": 6.6119, "step": 330310 }, { "epoch": 0.6672672988117988, "grad_norm": 248.42555236816406, "learning_rate": 3.116614972486851e-06, "loss": 17.023, "step": 330320 }, { "epoch": 0.6672874994444826, "grad_norm": 526.100341796875, "learning_rate": 3.1162916217894713e-06, "loss": 22.9472, "step": 330330 }, { "epoch": 0.6673077000771664, "grad_norm": 299.6604919433594, "learning_rate": 3.115968280273059e-06, "loss": 26.6155, "step": 330340 }, { "epoch": 0.6673279007098503, "grad_norm": 172.5897674560547, "learning_rate": 3.1156449479391876e-06, "loss": 18.7303, "step": 330350 }, { "epoch": 0.6673481013425341, "grad_norm": 111.12664794921875, "learning_rate": 3.115321624789433e-06, "loss": 11.1953, "step": 330360 }, { "epoch": 0.6673683019752179, "grad_norm": 416.7669372558594, "learning_rate": 3.1149983108253754e-06, "loss": 11.8979, "step": 330370 }, { "epoch": 0.6673885026079017, "grad_norm": 191.52410888671875, "learning_rate": 3.1146750060485847e-06, "loss": 17.6313, "step": 330380 }, { "epoch": 0.6674087032405855, "grad_norm": 23.746089935302734, "learning_rate": 3.1143517104606404e-06, "loss": 14.1874, "step": 330390 }, { "epoch": 0.6674289038732694, "grad_norm": 284.149658203125, "learning_rate": 3.114028424063118e-06, "loss": 11.372, "step": 330400 }, { "epoch": 0.6674491045059531, "grad_norm": 512.7546997070312, "learning_rate": 3.1137051468575905e-06, "loss": 13.7233, "step": 330410 }, { "epoch": 0.6674693051386369, "grad_norm": 625.3117065429688, "learning_rate": 3.113381878845636e-06, "loss": 15.1766, "step": 330420 }, { "epoch": 0.6674895057713207, "grad_norm": 174.7843780517578, "learning_rate": 3.113058620028829e-06, "loss": 18.5447, "step": 330430 }, { "epoch": 0.6675097064040045, "grad_norm": 718.687255859375, "learning_rate": 3.1127353704087477e-06, "loss": 19.0231, "step": 330440 }, { "epoch": 0.6675299070366884, "grad_norm": 50.9957389831543, "learning_rate": 3.1124121299869627e-06, "loss": 21.3549, "step": 330450 }, { "epoch": 0.6675501076693722, "grad_norm": 693.8781127929688, "learning_rate": 3.112088898765052e-06, "loss": 22.8268, "step": 330460 }, { "epoch": 0.667570308302056, "grad_norm": 347.0184631347656, "learning_rate": 3.1117656767445936e-06, "loss": 22.9062, "step": 330470 }, { "epoch": 0.6675905089347398, "grad_norm": 299.36224365234375, "learning_rate": 3.1114424639271584e-06, "loss": 14.3362, "step": 330480 }, { "epoch": 0.6676107095674236, "grad_norm": 12.645113945007324, "learning_rate": 3.1111192603143235e-06, "loss": 11.6925, "step": 330490 }, { "epoch": 0.6676309102001075, "grad_norm": 472.9318542480469, "learning_rate": 3.110796065907665e-06, "loss": 21.9861, "step": 330500 }, { "epoch": 0.6676511108327913, "grad_norm": 542.18212890625, "learning_rate": 3.110472880708757e-06, "loss": 17.1566, "step": 330510 }, { "epoch": 0.6676713114654751, "grad_norm": 24.699552536010742, "learning_rate": 3.110149704719174e-06, "loss": 13.7014, "step": 330520 }, { "epoch": 0.6676915120981589, "grad_norm": 77.46388244628906, "learning_rate": 3.1098265379404953e-06, "loss": 16.0435, "step": 330530 }, { "epoch": 0.6677117127308427, "grad_norm": 839.5580444335938, "learning_rate": 3.109503380374289e-06, "loss": 21.5353, "step": 330540 }, { "epoch": 0.6677319133635266, "grad_norm": 429.29266357421875, "learning_rate": 3.109180232022135e-06, "loss": 21.3935, "step": 330550 }, { "epoch": 0.6677521139962104, "grad_norm": 262.7479553222656, "learning_rate": 3.1088570928856087e-06, "loss": 12.7966, "step": 330560 }, { "epoch": 0.6677723146288942, "grad_norm": 150.66383361816406, "learning_rate": 3.1085339629662826e-06, "loss": 31.2936, "step": 330570 }, { "epoch": 0.667792515261578, "grad_norm": 726.124755859375, "learning_rate": 3.1082108422657328e-06, "loss": 17.9742, "step": 330580 }, { "epoch": 0.6678127158942618, "grad_norm": 610.7399291992188, "learning_rate": 3.1078877307855325e-06, "loss": 8.6843, "step": 330590 }, { "epoch": 0.6678329165269457, "grad_norm": 124.41638946533203, "learning_rate": 3.1075646285272608e-06, "loss": 27.1409, "step": 330600 }, { "epoch": 0.6678531171596295, "grad_norm": 326.7791442871094, "learning_rate": 3.1072415354924866e-06, "loss": 21.5234, "step": 330610 }, { "epoch": 0.6678733177923133, "grad_norm": 294.0032958984375, "learning_rate": 3.106918451682789e-06, "loss": 18.8087, "step": 330620 }, { "epoch": 0.6678935184249971, "grad_norm": 319.2794189453125, "learning_rate": 3.1065953770997416e-06, "loss": 21.2712, "step": 330630 }, { "epoch": 0.6679137190576809, "grad_norm": 379.7613220214844, "learning_rate": 3.106272311744918e-06, "loss": 11.9827, "step": 330640 }, { "epoch": 0.6679339196903648, "grad_norm": 259.8501892089844, "learning_rate": 3.1059492556198933e-06, "loss": 30.4546, "step": 330650 }, { "epoch": 0.6679541203230486, "grad_norm": 118.22823333740234, "learning_rate": 3.1056262087262432e-06, "loss": 17.333, "step": 330660 }, { "epoch": 0.6679743209557323, "grad_norm": 58.96928787231445, "learning_rate": 3.105303171065541e-06, "loss": 22.3445, "step": 330670 }, { "epoch": 0.6679945215884161, "grad_norm": 283.99456787109375, "learning_rate": 3.10498014263936e-06, "loss": 15.5069, "step": 330680 }, { "epoch": 0.6680147222210999, "grad_norm": 157.41123962402344, "learning_rate": 3.1046571234492782e-06, "loss": 14.8355, "step": 330690 }, { "epoch": 0.6680349228537837, "grad_norm": 30.65309715270996, "learning_rate": 3.1043341134968653e-06, "loss": 24.6806, "step": 330700 }, { "epoch": 0.6680551234864676, "grad_norm": 165.40370178222656, "learning_rate": 3.1040111127836994e-06, "loss": 35.1521, "step": 330710 }, { "epoch": 0.6680753241191514, "grad_norm": 443.48193359375, "learning_rate": 3.1036881213113533e-06, "loss": 20.7113, "step": 330720 }, { "epoch": 0.6680955247518352, "grad_norm": 614.755859375, "learning_rate": 3.1033651390814012e-06, "loss": 13.9008, "step": 330730 }, { "epoch": 0.668115725384519, "grad_norm": 424.2988586425781, "learning_rate": 3.103042166095417e-06, "loss": 19.4053, "step": 330740 }, { "epoch": 0.6681359260172028, "grad_norm": 177.65480041503906, "learning_rate": 3.102719202354974e-06, "loss": 13.7094, "step": 330750 }, { "epoch": 0.6681561266498867, "grad_norm": 1192.040283203125, "learning_rate": 3.102396247861651e-06, "loss": 30.5522, "step": 330760 }, { "epoch": 0.6681763272825705, "grad_norm": 656.6998901367188, "learning_rate": 3.102073302617015e-06, "loss": 13.8427, "step": 330770 }, { "epoch": 0.6681965279152543, "grad_norm": 4368.40283203125, "learning_rate": 3.101750366622645e-06, "loss": 25.8686, "step": 330780 }, { "epoch": 0.6682167285479381, "grad_norm": 581.3051147460938, "learning_rate": 3.101427439880115e-06, "loss": 17.9275, "step": 330790 }, { "epoch": 0.6682369291806219, "grad_norm": 462.6530456542969, "learning_rate": 3.1011045223909954e-06, "loss": 21.2542, "step": 330800 }, { "epoch": 0.6682571298133058, "grad_norm": 409.5375061035156, "learning_rate": 3.1007816141568625e-06, "loss": 14.8786, "step": 330810 }, { "epoch": 0.6682773304459896, "grad_norm": 633.002685546875, "learning_rate": 3.1004587151792903e-06, "loss": 27.8254, "step": 330820 }, { "epoch": 0.6682975310786734, "grad_norm": 141.9521942138672, "learning_rate": 3.1001358254598512e-06, "loss": 11.3634, "step": 330830 }, { "epoch": 0.6683177317113572, "grad_norm": 553.1273193359375, "learning_rate": 3.0998129450001184e-06, "loss": 14.1767, "step": 330840 }, { "epoch": 0.668337932344041, "grad_norm": 347.21759033203125, "learning_rate": 3.0994900738016693e-06, "loss": 15.1919, "step": 330850 }, { "epoch": 0.6683581329767249, "grad_norm": 453.7741394042969, "learning_rate": 3.099167211866074e-06, "loss": 13.2673, "step": 330860 }, { "epoch": 0.6683783336094087, "grad_norm": 77.05499267578125, "learning_rate": 3.098844359194907e-06, "loss": 11.8338, "step": 330870 }, { "epoch": 0.6683985342420925, "grad_norm": 314.4021911621094, "learning_rate": 3.098521515789742e-06, "loss": 13.9856, "step": 330880 }, { "epoch": 0.6684187348747763, "grad_norm": 332.9153747558594, "learning_rate": 3.0981986816521536e-06, "loss": 21.6265, "step": 330890 }, { "epoch": 0.6684389355074601, "grad_norm": 773.8599853515625, "learning_rate": 3.097875856783713e-06, "loss": 25.3767, "step": 330900 }, { "epoch": 0.668459136140144, "grad_norm": 431.6402587890625, "learning_rate": 3.097553041185993e-06, "loss": 16.8906, "step": 330910 }, { "epoch": 0.6684793367728277, "grad_norm": 448.1880798339844, "learning_rate": 3.097230234860572e-06, "loss": 13.0612, "step": 330920 }, { "epoch": 0.6684995374055115, "grad_norm": 48.33029556274414, "learning_rate": 3.096907437809019e-06, "loss": 16.4912, "step": 330930 }, { "epoch": 0.6685197380381953, "grad_norm": 537.0992431640625, "learning_rate": 3.096584650032908e-06, "loss": 29.7366, "step": 330940 }, { "epoch": 0.6685399386708791, "grad_norm": 524.2745971679688, "learning_rate": 3.0962618715338135e-06, "loss": 16.0425, "step": 330950 }, { "epoch": 0.668560139303563, "grad_norm": 417.84759521484375, "learning_rate": 3.095939102313307e-06, "loss": 19.2592, "step": 330960 }, { "epoch": 0.6685803399362468, "grad_norm": 320.2450866699219, "learning_rate": 3.0956163423729624e-06, "loss": 19.275, "step": 330970 }, { "epoch": 0.6686005405689306, "grad_norm": 183.8946990966797, "learning_rate": 3.0952935917143533e-06, "loss": 17.7646, "step": 330980 }, { "epoch": 0.6686207412016144, "grad_norm": 433.9994812011719, "learning_rate": 3.0949708503390507e-06, "loss": 14.775, "step": 330990 }, { "epoch": 0.6686409418342982, "grad_norm": 441.6136474609375, "learning_rate": 3.09464811824863e-06, "loss": 11.5845, "step": 331000 }, { "epoch": 0.668661142466982, "grad_norm": 179.2563018798828, "learning_rate": 3.094325395444664e-06, "loss": 10.134, "step": 331010 }, { "epoch": 0.6686813430996659, "grad_norm": 332.4448547363281, "learning_rate": 3.094002681928724e-06, "loss": 18.0252, "step": 331020 }, { "epoch": 0.6687015437323497, "grad_norm": 576.9011840820312, "learning_rate": 3.093679977702384e-06, "loss": 25.92, "step": 331030 }, { "epoch": 0.6687217443650335, "grad_norm": 361.36859130859375, "learning_rate": 3.0933572827672155e-06, "loss": 22.8571, "step": 331040 }, { "epoch": 0.6687419449977173, "grad_norm": 227.5767364501953, "learning_rate": 3.093034597124795e-06, "loss": 15.8042, "step": 331050 }, { "epoch": 0.6687621456304012, "grad_norm": 70.67207336425781, "learning_rate": 3.09271192077669e-06, "loss": 12.0711, "step": 331060 }, { "epoch": 0.668782346263085, "grad_norm": 440.76043701171875, "learning_rate": 3.092389253724476e-06, "loss": 10.1043, "step": 331070 }, { "epoch": 0.6688025468957688, "grad_norm": 102.35005187988281, "learning_rate": 3.092066595969727e-06, "loss": 16.9162, "step": 331080 }, { "epoch": 0.6688227475284526, "grad_norm": 297.5807800292969, "learning_rate": 3.0917439475140133e-06, "loss": 16.2803, "step": 331090 }, { "epoch": 0.6688429481611364, "grad_norm": 481.68914794921875, "learning_rate": 3.0914213083589086e-06, "loss": 23.1711, "step": 331100 }, { "epoch": 0.6688631487938203, "grad_norm": 483.2679138183594, "learning_rate": 3.091098678505985e-06, "loss": 12.9829, "step": 331110 }, { "epoch": 0.6688833494265041, "grad_norm": 443.21087646484375, "learning_rate": 3.0907760579568135e-06, "loss": 27.5186, "step": 331120 }, { "epoch": 0.6689035500591879, "grad_norm": 659.8768920898438, "learning_rate": 3.0904534467129677e-06, "loss": 12.9115, "step": 331130 }, { "epoch": 0.6689237506918717, "grad_norm": 319.8518981933594, "learning_rate": 3.0901308447760236e-06, "loss": 9.9042, "step": 331140 }, { "epoch": 0.6689439513245555, "grad_norm": 123.06887817382812, "learning_rate": 3.0898082521475463e-06, "loss": 16.7963, "step": 331150 }, { "epoch": 0.6689641519572394, "grad_norm": 1182.05517578125, "learning_rate": 3.089485668829113e-06, "loss": 31.6144, "step": 331160 }, { "epoch": 0.6689843525899232, "grad_norm": 556.3887939453125, "learning_rate": 3.089163094822296e-06, "loss": 26.4783, "step": 331170 }, { "epoch": 0.6690045532226069, "grad_norm": 955.7506103515625, "learning_rate": 3.0888405301286662e-06, "loss": 24.3341, "step": 331180 }, { "epoch": 0.6690247538552907, "grad_norm": 48.68580627441406, "learning_rate": 3.088517974749795e-06, "loss": 7.7547, "step": 331190 }, { "epoch": 0.6690449544879745, "grad_norm": 229.7146759033203, "learning_rate": 3.088195428687254e-06, "loss": 20.9266, "step": 331200 }, { "epoch": 0.6690651551206583, "grad_norm": 264.4624328613281, "learning_rate": 3.0878728919426203e-06, "loss": 12.4706, "step": 331210 }, { "epoch": 0.6690853557533422, "grad_norm": 216.6519012451172, "learning_rate": 3.0875503645174586e-06, "loss": 19.0743, "step": 331220 }, { "epoch": 0.669105556386026, "grad_norm": 612.282470703125, "learning_rate": 3.087227846413346e-06, "loss": 18.8406, "step": 331230 }, { "epoch": 0.6691257570187098, "grad_norm": 406.8621826171875, "learning_rate": 3.0869053376318538e-06, "loss": 20.439, "step": 331240 }, { "epoch": 0.6691459576513936, "grad_norm": 436.4408874511719, "learning_rate": 3.0865828381745515e-06, "loss": 21.0851, "step": 331250 }, { "epoch": 0.6691661582840774, "grad_norm": 467.5116882324219, "learning_rate": 3.086260348043013e-06, "loss": 20.9692, "step": 331260 }, { "epoch": 0.6691863589167613, "grad_norm": 320.90179443359375, "learning_rate": 3.08593786723881e-06, "loss": 16.047, "step": 331270 }, { "epoch": 0.6692065595494451, "grad_norm": 607.719970703125, "learning_rate": 3.0856153957635127e-06, "loss": 12.6264, "step": 331280 }, { "epoch": 0.6692267601821289, "grad_norm": 124.06726837158203, "learning_rate": 3.085292933618693e-06, "loss": 8.4945, "step": 331290 }, { "epoch": 0.6692469608148127, "grad_norm": 204.34312438964844, "learning_rate": 3.0849704808059266e-06, "loss": 22.4943, "step": 331300 }, { "epoch": 0.6692671614474965, "grad_norm": 419.5154724121094, "learning_rate": 3.0846480373267783e-06, "loss": 43.9789, "step": 331310 }, { "epoch": 0.6692873620801804, "grad_norm": 347.163818359375, "learning_rate": 3.0843256031828245e-06, "loss": 10.7923, "step": 331320 }, { "epoch": 0.6693075627128642, "grad_norm": 793.1201171875, "learning_rate": 3.0840031783756353e-06, "loss": 26.9269, "step": 331330 }, { "epoch": 0.669327763345548, "grad_norm": 370.4996337890625, "learning_rate": 3.0836807629067828e-06, "loss": 13.3176, "step": 331340 }, { "epoch": 0.6693479639782318, "grad_norm": 635.1002197265625, "learning_rate": 3.083358356777837e-06, "loss": 22.2128, "step": 331350 }, { "epoch": 0.6693681646109156, "grad_norm": 196.98524475097656, "learning_rate": 3.083035959990369e-06, "loss": 14.1499, "step": 331360 }, { "epoch": 0.6693883652435995, "grad_norm": 283.0307922363281, "learning_rate": 3.082713572545955e-06, "loss": 16.3014, "step": 331370 }, { "epoch": 0.6694085658762833, "grad_norm": 211.803466796875, "learning_rate": 3.0823911944461593e-06, "loss": 15.4895, "step": 331380 }, { "epoch": 0.6694287665089671, "grad_norm": 195.05145263671875, "learning_rate": 3.082068825692557e-06, "loss": 17.9763, "step": 331390 }, { "epoch": 0.6694489671416509, "grad_norm": 380.41046142578125, "learning_rate": 3.0817464662867192e-06, "loss": 20.3055, "step": 331400 }, { "epoch": 0.6694691677743347, "grad_norm": 318.0858459472656, "learning_rate": 3.081424116230216e-06, "loss": 30.3429, "step": 331410 }, { "epoch": 0.6694893684070186, "grad_norm": 263.2286376953125, "learning_rate": 3.0811017755246185e-06, "loss": 14.8603, "step": 331420 }, { "epoch": 0.6695095690397023, "grad_norm": 24.916217803955078, "learning_rate": 3.0807794441715e-06, "loss": 16.4783, "step": 331430 }, { "epoch": 0.6695297696723861, "grad_norm": 263.9822998046875, "learning_rate": 3.080457122172429e-06, "loss": 12.776, "step": 331440 }, { "epoch": 0.6695499703050699, "grad_norm": 38.8775634765625, "learning_rate": 3.080134809528975e-06, "loss": 22.0455, "step": 331450 }, { "epoch": 0.6695701709377537, "grad_norm": 0.0, "learning_rate": 3.0798125062427143e-06, "loss": 17.708, "step": 331460 }, { "epoch": 0.6695903715704375, "grad_norm": 308.3204345703125, "learning_rate": 3.079490212315212e-06, "loss": 24.2277, "step": 331470 }, { "epoch": 0.6696105722031214, "grad_norm": 341.6029968261719, "learning_rate": 3.0791679277480422e-06, "loss": 11.5234, "step": 331480 }, { "epoch": 0.6696307728358052, "grad_norm": 613.443359375, "learning_rate": 3.078845652542775e-06, "loss": 21.7781, "step": 331490 }, { "epoch": 0.669650973468489, "grad_norm": 363.759521484375, "learning_rate": 3.078523386700982e-06, "loss": 23.5264, "step": 331500 }, { "epoch": 0.6696711741011728, "grad_norm": 635.1989135742188, "learning_rate": 3.0782011302242326e-06, "loss": 16.1766, "step": 331510 }, { "epoch": 0.6696913747338566, "grad_norm": 473.2365417480469, "learning_rate": 3.077878883114096e-06, "loss": 16.9013, "step": 331520 }, { "epoch": 0.6697115753665405, "grad_norm": 412.7676696777344, "learning_rate": 3.0775566453721475e-06, "loss": 25.0393, "step": 331530 }, { "epoch": 0.6697317759992243, "grad_norm": 669.5061645507812, "learning_rate": 3.077234416999953e-06, "loss": 12.7492, "step": 331540 }, { "epoch": 0.6697519766319081, "grad_norm": 325.734130859375, "learning_rate": 3.0769121979990845e-06, "loss": 21.3628, "step": 331550 }, { "epoch": 0.6697721772645919, "grad_norm": 888.1439819335938, "learning_rate": 3.0765899883711148e-06, "loss": 32.886, "step": 331560 }, { "epoch": 0.6697923778972757, "grad_norm": 1865.0050048828125, "learning_rate": 3.0762677881176102e-06, "loss": 29.264, "step": 331570 }, { "epoch": 0.6698125785299596, "grad_norm": 265.69354248046875, "learning_rate": 3.075945597240143e-06, "loss": 23.1557, "step": 331580 }, { "epoch": 0.6698327791626434, "grad_norm": 323.1497497558594, "learning_rate": 3.0756234157402854e-06, "loss": 24.5348, "step": 331590 }, { "epoch": 0.6698529797953272, "grad_norm": 156.175048828125, "learning_rate": 3.0753012436196033e-06, "loss": 31.3367, "step": 331600 }, { "epoch": 0.669873180428011, "grad_norm": 362.2181396484375, "learning_rate": 3.074979080879671e-06, "loss": 9.6042, "step": 331610 }, { "epoch": 0.6698933810606948, "grad_norm": 336.6182556152344, "learning_rate": 3.074656927522058e-06, "loss": 27.9608, "step": 331620 }, { "epoch": 0.6699135816933787, "grad_norm": 389.6551208496094, "learning_rate": 3.0743347835483323e-06, "loss": 30.4426, "step": 331630 }, { "epoch": 0.6699337823260625, "grad_norm": 272.8407897949219, "learning_rate": 3.074012648960065e-06, "loss": 18.8419, "step": 331640 }, { "epoch": 0.6699539829587463, "grad_norm": 282.4333190917969, "learning_rate": 3.0736905237588275e-06, "loss": 32.4853, "step": 331650 }, { "epoch": 0.6699741835914301, "grad_norm": 279.2618103027344, "learning_rate": 3.073368407946189e-06, "loss": 15.6446, "step": 331660 }, { "epoch": 0.669994384224114, "grad_norm": 47.570289611816406, "learning_rate": 3.0730463015237177e-06, "loss": 14.2794, "step": 331670 }, { "epoch": 0.6700145848567978, "grad_norm": 164.76727294921875, "learning_rate": 3.072724204492985e-06, "loss": 10.9514, "step": 331680 }, { "epoch": 0.6700347854894815, "grad_norm": 283.4651184082031, "learning_rate": 3.072402116855563e-06, "loss": 16.4175, "step": 331690 }, { "epoch": 0.6700549861221653, "grad_norm": 248.5535125732422, "learning_rate": 3.0720800386130176e-06, "loss": 19.2702, "step": 331700 }, { "epoch": 0.6700751867548491, "grad_norm": 86.50074768066406, "learning_rate": 3.071757969766921e-06, "loss": 18.4815, "step": 331710 }, { "epoch": 0.6700953873875329, "grad_norm": 660.4617919921875, "learning_rate": 3.0714359103188422e-06, "loss": 14.7241, "step": 331720 }, { "epoch": 0.6701155880202168, "grad_norm": 408.3411560058594, "learning_rate": 3.0711138602703505e-06, "loss": 16.9852, "step": 331730 }, { "epoch": 0.6701357886529006, "grad_norm": 438.41009521484375, "learning_rate": 3.0707918196230147e-06, "loss": 11.7692, "step": 331740 }, { "epoch": 0.6701559892855844, "grad_norm": 213.214599609375, "learning_rate": 3.0704697883784083e-06, "loss": 19.0089, "step": 331750 }, { "epoch": 0.6701761899182682, "grad_norm": 576.30908203125, "learning_rate": 3.0701477665380958e-06, "loss": 17.0343, "step": 331760 }, { "epoch": 0.670196390550952, "grad_norm": 226.357177734375, "learning_rate": 3.069825754103649e-06, "loss": 17.4609, "step": 331770 }, { "epoch": 0.6702165911836359, "grad_norm": 279.99755859375, "learning_rate": 3.0695037510766384e-06, "loss": 24.8922, "step": 331780 }, { "epoch": 0.6702367918163197, "grad_norm": 625.1746215820312, "learning_rate": 3.069181757458633e-06, "loss": 32.1062, "step": 331790 }, { "epoch": 0.6702569924490035, "grad_norm": 982.3031005859375, "learning_rate": 3.0688597732512004e-06, "loss": 27.7022, "step": 331800 }, { "epoch": 0.6702771930816873, "grad_norm": 530.5897216796875, "learning_rate": 3.0685377984559104e-06, "loss": 24.4026, "step": 331810 }, { "epoch": 0.6702973937143711, "grad_norm": 169.71556091308594, "learning_rate": 3.0682158330743363e-06, "loss": 19.3734, "step": 331820 }, { "epoch": 0.670317594347055, "grad_norm": 54.75619888305664, "learning_rate": 3.0678938771080395e-06, "loss": 25.9983, "step": 331830 }, { "epoch": 0.6703377949797388, "grad_norm": 375.5094909667969, "learning_rate": 3.067571930558596e-06, "loss": 36.7743, "step": 331840 }, { "epoch": 0.6703579956124226, "grad_norm": 181.7646942138672, "learning_rate": 3.0672499934275725e-06, "loss": 19.5657, "step": 331850 }, { "epoch": 0.6703781962451064, "grad_norm": 11.570462226867676, "learning_rate": 3.066928065716538e-06, "loss": 16.3424, "step": 331860 }, { "epoch": 0.6703983968777902, "grad_norm": 289.7773742675781, "learning_rate": 3.066606147427061e-06, "loss": 18.3625, "step": 331870 }, { "epoch": 0.6704185975104741, "grad_norm": 357.0284423828125, "learning_rate": 3.066284238560713e-06, "loss": 13.1519, "step": 331880 }, { "epoch": 0.6704387981431579, "grad_norm": 285.1661682128906, "learning_rate": 3.0659623391190583e-06, "loss": 20.7197, "step": 331890 }, { "epoch": 0.6704589987758417, "grad_norm": 229.07870483398438, "learning_rate": 3.0656404491036696e-06, "loss": 14.8636, "step": 331900 }, { "epoch": 0.6704791994085255, "grad_norm": 759.066650390625, "learning_rate": 3.0653185685161168e-06, "loss": 24.2076, "step": 331910 }, { "epoch": 0.6704994000412093, "grad_norm": 167.57725524902344, "learning_rate": 3.064996697357964e-06, "loss": 13.3535, "step": 331920 }, { "epoch": 0.6705196006738932, "grad_norm": 0.0030020507983863354, "learning_rate": 3.0646748356307833e-06, "loss": 23.629, "step": 331930 }, { "epoch": 0.670539801306577, "grad_norm": 274.72418212890625, "learning_rate": 3.0643529833361425e-06, "loss": 9.825, "step": 331940 }, { "epoch": 0.6705600019392607, "grad_norm": 442.8713073730469, "learning_rate": 3.0640311404756116e-06, "loss": 30.5478, "step": 331950 }, { "epoch": 0.6705802025719445, "grad_norm": 575.1695556640625, "learning_rate": 3.063709307050757e-06, "loss": 8.4817, "step": 331960 }, { "epoch": 0.6706004032046283, "grad_norm": 643.443359375, "learning_rate": 3.063387483063148e-06, "loss": 18.8981, "step": 331970 }, { "epoch": 0.6706206038373121, "grad_norm": 670.76611328125, "learning_rate": 3.063065668514357e-06, "loss": 36.5922, "step": 331980 }, { "epoch": 0.670640804469996, "grad_norm": 71.35189056396484, "learning_rate": 3.0627438634059447e-06, "loss": 19.2127, "step": 331990 }, { "epoch": 0.6706610051026798, "grad_norm": 427.9266662597656, "learning_rate": 3.0624220677394854e-06, "loss": 16.5064, "step": 332000 }, { "epoch": 0.6706812057353636, "grad_norm": 226.159912109375, "learning_rate": 3.062100281516547e-06, "loss": 24.8843, "step": 332010 }, { "epoch": 0.6707014063680474, "grad_norm": 345.7786865234375, "learning_rate": 3.0617785047386953e-06, "loss": 13.7342, "step": 332020 }, { "epoch": 0.6707216070007312, "grad_norm": 342.26702880859375, "learning_rate": 3.0614567374075007e-06, "loss": 11.6023, "step": 332030 }, { "epoch": 0.6707418076334151, "grad_norm": 441.6045227050781, "learning_rate": 3.0611349795245317e-06, "loss": 26.8497, "step": 332040 }, { "epoch": 0.6707620082660989, "grad_norm": 272.1783142089844, "learning_rate": 3.060813231091354e-06, "loss": 8.2914, "step": 332050 }, { "epoch": 0.6707822088987827, "grad_norm": 0.0, "learning_rate": 3.0604914921095373e-06, "loss": 12.8405, "step": 332060 }, { "epoch": 0.6708024095314665, "grad_norm": 621.2687377929688, "learning_rate": 3.0601697625806527e-06, "loss": 29.513, "step": 332070 }, { "epoch": 0.6708226101641503, "grad_norm": 573.689208984375, "learning_rate": 3.0598480425062626e-06, "loss": 13.719, "step": 332080 }, { "epoch": 0.6708428107968342, "grad_norm": 245.5880889892578, "learning_rate": 3.0595263318879383e-06, "loss": 23.5509, "step": 332090 }, { "epoch": 0.670863011429518, "grad_norm": 203.6055145263672, "learning_rate": 3.059204630727247e-06, "loss": 18.965, "step": 332100 }, { "epoch": 0.6708832120622018, "grad_norm": 151.3707275390625, "learning_rate": 3.0588829390257592e-06, "loss": 16.137, "step": 332110 }, { "epoch": 0.6709034126948856, "grad_norm": 243.07208251953125, "learning_rate": 3.0585612567850385e-06, "loss": 15.4447, "step": 332120 }, { "epoch": 0.6709236133275694, "grad_norm": 303.47564697265625, "learning_rate": 3.0582395840066537e-06, "loss": 11.0508, "step": 332130 }, { "epoch": 0.6709438139602533, "grad_norm": 203.67666625976562, "learning_rate": 3.0579179206921773e-06, "loss": 25.1175, "step": 332140 }, { "epoch": 0.6709640145929371, "grad_norm": 463.525146484375, "learning_rate": 3.0575962668431704e-06, "loss": 46.7824, "step": 332150 }, { "epoch": 0.6709842152256209, "grad_norm": 507.78851318359375, "learning_rate": 3.057274622461204e-06, "loss": 16.6115, "step": 332160 }, { "epoch": 0.6710044158583047, "grad_norm": 252.3871307373047, "learning_rate": 3.0569529875478465e-06, "loss": 10.5707, "step": 332170 }, { "epoch": 0.6710246164909885, "grad_norm": 123.0634994506836, "learning_rate": 3.0566313621046638e-06, "loss": 18.2667, "step": 332180 }, { "epoch": 0.6710448171236724, "grad_norm": 213.22824096679688, "learning_rate": 3.0563097461332246e-06, "loss": 11.4551, "step": 332190 }, { "epoch": 0.6710650177563561, "grad_norm": 216.26004028320312, "learning_rate": 3.0559881396350967e-06, "loss": 35.535, "step": 332200 }, { "epoch": 0.6710852183890399, "grad_norm": 514.7017211914062, "learning_rate": 3.0556665426118456e-06, "loss": 32.0472, "step": 332210 }, { "epoch": 0.6711054190217237, "grad_norm": 693.4927978515625, "learning_rate": 3.055344955065039e-06, "loss": 19.8245, "step": 332220 }, { "epoch": 0.6711256196544075, "grad_norm": 648.6301879882812, "learning_rate": 3.0550233769962468e-06, "loss": 16.481, "step": 332230 }, { "epoch": 0.6711458202870914, "grad_norm": 439.83880615234375, "learning_rate": 3.0547018084070344e-06, "loss": 17.4814, "step": 332240 }, { "epoch": 0.6711660209197752, "grad_norm": 284.37274169921875, "learning_rate": 3.0543802492989693e-06, "loss": 21.2819, "step": 332250 }, { "epoch": 0.671186221552459, "grad_norm": 294.681884765625, "learning_rate": 3.054058699673619e-06, "loss": 8.6201, "step": 332260 }, { "epoch": 0.6712064221851428, "grad_norm": 124.22969055175781, "learning_rate": 3.0537371595325515e-06, "loss": 13.3757, "step": 332270 }, { "epoch": 0.6712266228178266, "grad_norm": 557.1804809570312, "learning_rate": 3.0534156288773307e-06, "loss": 13.7903, "step": 332280 }, { "epoch": 0.6712468234505105, "grad_norm": 172.43777465820312, "learning_rate": 3.0530941077095273e-06, "loss": 26.861, "step": 332290 }, { "epoch": 0.6712670240831943, "grad_norm": 580.1685180664062, "learning_rate": 3.0527725960307083e-06, "loss": 13.5221, "step": 332300 }, { "epoch": 0.6712872247158781, "grad_norm": 460.0882873535156, "learning_rate": 3.0524510938424377e-06, "loss": 16.4842, "step": 332310 }, { "epoch": 0.6713074253485619, "grad_norm": 408.4290466308594, "learning_rate": 3.0521296011462852e-06, "loss": 15.9181, "step": 332320 }, { "epoch": 0.6713276259812457, "grad_norm": 355.74078369140625, "learning_rate": 3.0518081179438173e-06, "loss": 8.2587, "step": 332330 }, { "epoch": 0.6713478266139296, "grad_norm": 231.6913299560547, "learning_rate": 3.0514866442365998e-06, "loss": 25.9216, "step": 332340 }, { "epoch": 0.6713680272466134, "grad_norm": 223.23443603515625, "learning_rate": 3.0511651800261987e-06, "loss": 21.761, "step": 332350 }, { "epoch": 0.6713882278792972, "grad_norm": 80.02017211914062, "learning_rate": 3.0508437253141855e-06, "loss": 12.4434, "step": 332360 }, { "epoch": 0.671408428511981, "grad_norm": 394.69403076171875, "learning_rate": 3.0505222801021193e-06, "loss": 21.9772, "step": 332370 }, { "epoch": 0.6714286291446648, "grad_norm": 557.94873046875, "learning_rate": 3.0502008443915733e-06, "loss": 17.7909, "step": 332380 }, { "epoch": 0.6714488297773487, "grad_norm": 954.2001953125, "learning_rate": 3.049879418184112e-06, "loss": 23.7708, "step": 332390 }, { "epoch": 0.6714690304100325, "grad_norm": 360.5732727050781, "learning_rate": 3.049558001481302e-06, "loss": 21.7766, "step": 332400 }, { "epoch": 0.6714892310427163, "grad_norm": 448.75384521484375, "learning_rate": 3.0492365942847097e-06, "loss": 22.361, "step": 332410 }, { "epoch": 0.6715094316754001, "grad_norm": 48.64217758178711, "learning_rate": 3.0489151965958998e-06, "loss": 18.3156, "step": 332420 }, { "epoch": 0.6715296323080839, "grad_norm": 142.99681091308594, "learning_rate": 3.0485938084164445e-06, "loss": 29.1174, "step": 332430 }, { "epoch": 0.6715498329407678, "grad_norm": 340.4880676269531, "learning_rate": 3.048272429747903e-06, "loss": 17.4341, "step": 332440 }, { "epoch": 0.6715700335734516, "grad_norm": 271.4729309082031, "learning_rate": 3.047951060591845e-06, "loss": 18.4208, "step": 332450 }, { "epoch": 0.6715902342061353, "grad_norm": 582.4696044921875, "learning_rate": 3.047629700949839e-06, "loss": 15.2522, "step": 332460 }, { "epoch": 0.6716104348388191, "grad_norm": 477.7011413574219, "learning_rate": 3.0473083508234474e-06, "loss": 21.3859, "step": 332470 }, { "epoch": 0.6716306354715029, "grad_norm": 357.6345520019531, "learning_rate": 3.0469870102142387e-06, "loss": 17.1324, "step": 332480 }, { "epoch": 0.6716508361041867, "grad_norm": 267.4741516113281, "learning_rate": 3.046665679123778e-06, "loss": 17.3637, "step": 332490 }, { "epoch": 0.6716710367368706, "grad_norm": 266.8260498046875, "learning_rate": 3.0463443575536324e-06, "loss": 21.8634, "step": 332500 }, { "epoch": 0.6716912373695544, "grad_norm": 515.8812255859375, "learning_rate": 3.046023045505366e-06, "loss": 11.6234, "step": 332510 }, { "epoch": 0.6717114380022382, "grad_norm": 665.266845703125, "learning_rate": 3.045701742980549e-06, "loss": 36.3828, "step": 332520 }, { "epoch": 0.671731638634922, "grad_norm": 324.485107421875, "learning_rate": 3.0453804499807416e-06, "loss": 13.2951, "step": 332530 }, { "epoch": 0.6717518392676058, "grad_norm": 406.4908447265625, "learning_rate": 3.045059166507515e-06, "loss": 17.0992, "step": 332540 }, { "epoch": 0.6717720399002897, "grad_norm": 182.7077178955078, "learning_rate": 3.0447378925624316e-06, "loss": 31.4674, "step": 332550 }, { "epoch": 0.6717922405329735, "grad_norm": 235.09619140625, "learning_rate": 3.04441662814706e-06, "loss": 15.2149, "step": 332560 }, { "epoch": 0.6718124411656573, "grad_norm": 128.1046905517578, "learning_rate": 3.044095373262963e-06, "loss": 20.1439, "step": 332570 }, { "epoch": 0.6718326417983411, "grad_norm": 153.95370483398438, "learning_rate": 3.043774127911707e-06, "loss": 18.1651, "step": 332580 }, { "epoch": 0.6718528424310249, "grad_norm": 123.7090835571289, "learning_rate": 3.0434528920948626e-06, "loss": 15.8325, "step": 332590 }, { "epoch": 0.6718730430637088, "grad_norm": 126.16527557373047, "learning_rate": 3.043131665813988e-06, "loss": 14.3916, "step": 332600 }, { "epoch": 0.6718932436963926, "grad_norm": 829.8492431640625, "learning_rate": 3.0428104490706536e-06, "loss": 18.0627, "step": 332610 }, { "epoch": 0.6719134443290764, "grad_norm": 34.451019287109375, "learning_rate": 3.0424892418664244e-06, "loss": 9.6449, "step": 332620 }, { "epoch": 0.6719336449617602, "grad_norm": 169.1112518310547, "learning_rate": 3.0421680442028644e-06, "loss": 13.193, "step": 332630 }, { "epoch": 0.671953845594444, "grad_norm": 443.2011413574219, "learning_rate": 3.0418468560815396e-06, "loss": 16.4191, "step": 332640 }, { "epoch": 0.6719740462271279, "grad_norm": 565.5675048828125, "learning_rate": 3.041525677504017e-06, "loss": 17.3625, "step": 332650 }, { "epoch": 0.6719942468598117, "grad_norm": 370.6284484863281, "learning_rate": 3.0412045084718597e-06, "loss": 11.8785, "step": 332660 }, { "epoch": 0.6720144474924955, "grad_norm": 648.5980834960938, "learning_rate": 3.0408833489866326e-06, "loss": 21.9276, "step": 332670 }, { "epoch": 0.6720346481251793, "grad_norm": 477.17620849609375, "learning_rate": 3.040562199049906e-06, "loss": 26.8964, "step": 332680 }, { "epoch": 0.6720548487578631, "grad_norm": 518.5248413085938, "learning_rate": 3.040241058663238e-06, "loss": 19.8161, "step": 332690 }, { "epoch": 0.672075049390547, "grad_norm": 708.6114501953125, "learning_rate": 3.0399199278281986e-06, "loss": 22.0998, "step": 332700 }, { "epoch": 0.6720952500232307, "grad_norm": 29.858028411865234, "learning_rate": 3.039598806546352e-06, "loss": 11.2511, "step": 332710 }, { "epoch": 0.6721154506559145, "grad_norm": 3.5512747764587402, "learning_rate": 3.039277694819264e-06, "loss": 12.7374, "step": 332720 }, { "epoch": 0.6721356512885983, "grad_norm": 539.164306640625, "learning_rate": 3.0389565926484974e-06, "loss": 16.9234, "step": 332730 }, { "epoch": 0.6721558519212821, "grad_norm": 192.2627410888672, "learning_rate": 3.038635500035618e-06, "loss": 18.5794, "step": 332740 }, { "epoch": 0.672176052553966, "grad_norm": 176.16976928710938, "learning_rate": 3.0383144169821944e-06, "loss": 27.3413, "step": 332750 }, { "epoch": 0.6721962531866498, "grad_norm": 2305.068115234375, "learning_rate": 3.0379933434897846e-06, "loss": 79.7899, "step": 332760 }, { "epoch": 0.6722164538193336, "grad_norm": 297.8510437011719, "learning_rate": 3.0376722795599587e-06, "loss": 12.3784, "step": 332770 }, { "epoch": 0.6722366544520174, "grad_norm": 238.76150512695312, "learning_rate": 3.0373512251942817e-06, "loss": 13.8894, "step": 332780 }, { "epoch": 0.6722568550847012, "grad_norm": 1.7508667707443237, "learning_rate": 3.0370301803943147e-06, "loss": 18.9702, "step": 332790 }, { "epoch": 0.672277055717385, "grad_norm": 530.020751953125, "learning_rate": 3.0367091451616254e-06, "loss": 13.963, "step": 332800 }, { "epoch": 0.6722972563500689, "grad_norm": 418.89166259765625, "learning_rate": 3.0363881194977784e-06, "loss": 21.8154, "step": 332810 }, { "epoch": 0.6723174569827527, "grad_norm": 42.768985748291016, "learning_rate": 3.0360671034043365e-06, "loss": 6.1037, "step": 332820 }, { "epoch": 0.6723376576154365, "grad_norm": 186.217529296875, "learning_rate": 3.035746096882864e-06, "loss": 12.7598, "step": 332830 }, { "epoch": 0.6723578582481203, "grad_norm": 259.81866455078125, "learning_rate": 3.0354250999349277e-06, "loss": 24.601, "step": 332840 }, { "epoch": 0.6723780588808042, "grad_norm": 418.35357666015625, "learning_rate": 3.035104112562093e-06, "loss": 15.1968, "step": 332850 }, { "epoch": 0.672398259513488, "grad_norm": 705.2666625976562, "learning_rate": 3.034783134765921e-06, "loss": 32.0045, "step": 332860 }, { "epoch": 0.6724184601461718, "grad_norm": 841.7813110351562, "learning_rate": 3.034462166547978e-06, "loss": 24.4324, "step": 332870 }, { "epoch": 0.6724386607788556, "grad_norm": 312.4965515136719, "learning_rate": 3.0341412079098285e-06, "loss": 20.5919, "step": 332880 }, { "epoch": 0.6724588614115394, "grad_norm": 266.7486877441406, "learning_rate": 3.033820258853035e-06, "loss": 20.5033, "step": 332890 }, { "epoch": 0.6724790620442233, "grad_norm": 155.8134002685547, "learning_rate": 3.033499319379163e-06, "loss": 19.8645, "step": 332900 }, { "epoch": 0.6724992626769071, "grad_norm": 156.40814208984375, "learning_rate": 3.033178389489779e-06, "loss": 9.9884, "step": 332910 }, { "epoch": 0.6725194633095909, "grad_norm": 522.968994140625, "learning_rate": 3.0328574691864434e-06, "loss": 19.5756, "step": 332920 }, { "epoch": 0.6725396639422747, "grad_norm": 226.67446899414062, "learning_rate": 3.0325365584707216e-06, "loss": 15.3141, "step": 332930 }, { "epoch": 0.6725598645749585, "grad_norm": 249.93190002441406, "learning_rate": 3.03221565734418e-06, "loss": 9.8185, "step": 332940 }, { "epoch": 0.6725800652076424, "grad_norm": 376.44781494140625, "learning_rate": 3.0318947658083787e-06, "loss": 14.493, "step": 332950 }, { "epoch": 0.6726002658403262, "grad_norm": 231.81051635742188, "learning_rate": 3.031573883864882e-06, "loss": 6.6049, "step": 332960 }, { "epoch": 0.6726204664730099, "grad_norm": 0.0, "learning_rate": 3.03125301151526e-06, "loss": 14.4246, "step": 332970 }, { "epoch": 0.6726406671056937, "grad_norm": 296.96624755859375, "learning_rate": 3.0309321487610682e-06, "loss": 19.5908, "step": 332980 }, { "epoch": 0.6726608677383775, "grad_norm": 409.90753173828125, "learning_rate": 3.030611295603876e-06, "loss": 17.2561, "step": 332990 }, { "epoch": 0.6726810683710613, "grad_norm": 477.5803527832031, "learning_rate": 3.030290452045245e-06, "loss": 19.0041, "step": 333000 }, { "epoch": 0.6727012690037452, "grad_norm": 343.8766784667969, "learning_rate": 3.02996961808674e-06, "loss": 23.4326, "step": 333010 }, { "epoch": 0.672721469636429, "grad_norm": 538.6311645507812, "learning_rate": 3.0296487937299235e-06, "loss": 12.0906, "step": 333020 }, { "epoch": 0.6727416702691128, "grad_norm": 185.01937866210938, "learning_rate": 3.0293279789763584e-06, "loss": 17.3637, "step": 333030 }, { "epoch": 0.6727618709017966, "grad_norm": 257.2732238769531, "learning_rate": 3.0290071738276132e-06, "loss": 22.591, "step": 333040 }, { "epoch": 0.6727820715344804, "grad_norm": 710.9138793945312, "learning_rate": 3.028686378285245e-06, "loss": 21.7439, "step": 333050 }, { "epoch": 0.6728022721671643, "grad_norm": 348.0620422363281, "learning_rate": 3.0283655923508214e-06, "loss": 24.9464, "step": 333060 }, { "epoch": 0.6728224727998481, "grad_norm": 0.0, "learning_rate": 3.0280448160259057e-06, "loss": 24.0796, "step": 333070 }, { "epoch": 0.6728426734325319, "grad_norm": 271.998291015625, "learning_rate": 3.027724049312059e-06, "loss": 17.1429, "step": 333080 }, { "epoch": 0.6728628740652157, "grad_norm": 433.5883483886719, "learning_rate": 3.0274032922108465e-06, "loss": 16.8932, "step": 333090 }, { "epoch": 0.6728830746978995, "grad_norm": 386.6865539550781, "learning_rate": 3.0270825447238316e-06, "loss": 19.7961, "step": 333100 }, { "epoch": 0.6729032753305834, "grad_norm": 306.2874450683594, "learning_rate": 3.0267618068525773e-06, "loss": 16.5354, "step": 333110 }, { "epoch": 0.6729234759632672, "grad_norm": 272.48138427734375, "learning_rate": 3.0264410785986444e-06, "loss": 25.4983, "step": 333120 }, { "epoch": 0.672943676595951, "grad_norm": 491.2439270019531, "learning_rate": 3.026120359963602e-06, "loss": 13.5619, "step": 333130 }, { "epoch": 0.6729638772286348, "grad_norm": 544.17138671875, "learning_rate": 3.025799650949006e-06, "loss": 13.9844, "step": 333140 }, { "epoch": 0.6729840778613186, "grad_norm": 191.28065490722656, "learning_rate": 3.025478951556424e-06, "loss": 13.5854, "step": 333150 }, { "epoch": 0.6730042784940025, "grad_norm": 503.2778625488281, "learning_rate": 3.0251582617874187e-06, "loss": 19.4419, "step": 333160 }, { "epoch": 0.6730244791266863, "grad_norm": 342.5376281738281, "learning_rate": 3.0248375816435537e-06, "loss": 25.083, "step": 333170 }, { "epoch": 0.6730446797593701, "grad_norm": 163.40673828125, "learning_rate": 3.024516911126389e-06, "loss": 13.9405, "step": 333180 }, { "epoch": 0.6730648803920539, "grad_norm": 339.19110107421875, "learning_rate": 3.024196250237489e-06, "loss": 20.5884, "step": 333190 }, { "epoch": 0.6730850810247377, "grad_norm": 216.12643432617188, "learning_rate": 3.023875598978419e-06, "loss": 19.2741, "step": 333200 }, { "epoch": 0.6731052816574216, "grad_norm": 448.638671875, "learning_rate": 3.0235549573507374e-06, "loss": 29.0432, "step": 333210 }, { "epoch": 0.6731254822901054, "grad_norm": 457.5461730957031, "learning_rate": 3.0232343253560097e-06, "loss": 14.686, "step": 333220 }, { "epoch": 0.6731456829227891, "grad_norm": 240.27005004882812, "learning_rate": 3.0229137029957993e-06, "loss": 20.8447, "step": 333230 }, { "epoch": 0.6731658835554729, "grad_norm": 180.43292236328125, "learning_rate": 3.022593090271667e-06, "loss": 13.5783, "step": 333240 }, { "epoch": 0.6731860841881567, "grad_norm": 133.79052734375, "learning_rate": 3.022272487185176e-06, "loss": 12.6388, "step": 333250 }, { "epoch": 0.6732062848208406, "grad_norm": 243.9981689453125, "learning_rate": 3.02195189373789e-06, "loss": 20.7873, "step": 333260 }, { "epoch": 0.6732264854535244, "grad_norm": 375.3045959472656, "learning_rate": 3.0216313099313694e-06, "loss": 7.6527, "step": 333270 }, { "epoch": 0.6732466860862082, "grad_norm": 211.34437561035156, "learning_rate": 3.0213107357671767e-06, "loss": 5.5753, "step": 333280 }, { "epoch": 0.673266886718892, "grad_norm": 399.2186584472656, "learning_rate": 3.020990171246879e-06, "loss": 14.1786, "step": 333290 }, { "epoch": 0.6732870873515758, "grad_norm": 150.73834228515625, "learning_rate": 3.0206696163720317e-06, "loss": 25.2453, "step": 333300 }, { "epoch": 0.6733072879842597, "grad_norm": 225.5262908935547, "learning_rate": 3.0203490711442018e-06, "loss": 17.0998, "step": 333310 }, { "epoch": 0.6733274886169435, "grad_norm": 422.083740234375, "learning_rate": 3.0200285355649504e-06, "loss": 17.2822, "step": 333320 }, { "epoch": 0.6733476892496273, "grad_norm": 88.32089233398438, "learning_rate": 3.019708009635841e-06, "loss": 22.9718, "step": 333330 }, { "epoch": 0.6733678898823111, "grad_norm": 281.3292236328125, "learning_rate": 3.019387493358434e-06, "loss": 13.1169, "step": 333340 }, { "epoch": 0.6733880905149949, "grad_norm": 584.72900390625, "learning_rate": 3.0190669867342902e-06, "loss": 15.8181, "step": 333350 }, { "epoch": 0.6734082911476788, "grad_norm": 479.5703125, "learning_rate": 3.0187464897649774e-06, "loss": 27.213, "step": 333360 }, { "epoch": 0.6734284917803626, "grad_norm": 337.3212890625, "learning_rate": 3.0184260024520508e-06, "loss": 19.3892, "step": 333370 }, { "epoch": 0.6734486924130464, "grad_norm": 227.15969848632812, "learning_rate": 3.018105524797076e-06, "loss": 26.0862, "step": 333380 }, { "epoch": 0.6734688930457302, "grad_norm": 128.68341064453125, "learning_rate": 3.017785056801615e-06, "loss": 22.8761, "step": 333390 }, { "epoch": 0.673489093678414, "grad_norm": 461.95404052734375, "learning_rate": 3.0174645984672298e-06, "loss": 10.7197, "step": 333400 }, { "epoch": 0.6735092943110979, "grad_norm": 443.9961242675781, "learning_rate": 3.0171441497954807e-06, "loss": 9.9019, "step": 333410 }, { "epoch": 0.6735294949437817, "grad_norm": 482.46185302734375, "learning_rate": 3.0168237107879315e-06, "loss": 21.0501, "step": 333420 }, { "epoch": 0.6735496955764655, "grad_norm": 503.1119689941406, "learning_rate": 3.0165032814461426e-06, "loss": 30.3579, "step": 333430 }, { "epoch": 0.6735698962091493, "grad_norm": 437.2008361816406, "learning_rate": 3.016182861771675e-06, "loss": 30.5888, "step": 333440 }, { "epoch": 0.6735900968418331, "grad_norm": 368.2873229980469, "learning_rate": 3.015862451766092e-06, "loss": 20.0231, "step": 333450 }, { "epoch": 0.673610297474517, "grad_norm": 457.2514343261719, "learning_rate": 3.0155420514309563e-06, "loss": 41.0843, "step": 333460 }, { "epoch": 0.6736304981072008, "grad_norm": 254.58042907714844, "learning_rate": 3.0152216607678262e-06, "loss": 17.6813, "step": 333470 }, { "epoch": 0.6736506987398845, "grad_norm": 565.47314453125, "learning_rate": 3.0149012797782655e-06, "loss": 25.8258, "step": 333480 }, { "epoch": 0.6736708993725683, "grad_norm": 167.13243103027344, "learning_rate": 3.0145809084638366e-06, "loss": 23.5754, "step": 333490 }, { "epoch": 0.6736911000052521, "grad_norm": 352.0194091796875, "learning_rate": 3.0142605468260976e-06, "loss": 35.8935, "step": 333500 }, { "epoch": 0.6737113006379359, "grad_norm": 212.7182159423828, "learning_rate": 3.0139401948666112e-06, "loss": 13.6547, "step": 333510 }, { "epoch": 0.6737315012706198, "grad_norm": 168.4481201171875, "learning_rate": 3.013619852586942e-06, "loss": 27.5947, "step": 333520 }, { "epoch": 0.6737517019033036, "grad_norm": 175.2161102294922, "learning_rate": 3.013299519988647e-06, "loss": 15.3217, "step": 333530 }, { "epoch": 0.6737719025359874, "grad_norm": 37.93092346191406, "learning_rate": 3.0129791970732904e-06, "loss": 16.1902, "step": 333540 }, { "epoch": 0.6737921031686712, "grad_norm": 446.0578918457031, "learning_rate": 3.012658883842432e-06, "loss": 25.7368, "step": 333550 }, { "epoch": 0.673812303801355, "grad_norm": 71.3383560180664, "learning_rate": 3.0123385802976323e-06, "loss": 14.8846, "step": 333560 }, { "epoch": 0.6738325044340389, "grad_norm": 314.0921630859375, "learning_rate": 3.0120182864404535e-06, "loss": 14.2352, "step": 333570 }, { "epoch": 0.6738527050667227, "grad_norm": 783.0932006835938, "learning_rate": 3.0116980022724575e-06, "loss": 14.542, "step": 333580 }, { "epoch": 0.6738729056994065, "grad_norm": 690.4853515625, "learning_rate": 3.011377727795202e-06, "loss": 20.697, "step": 333590 }, { "epoch": 0.6738931063320903, "grad_norm": 1.116074562072754, "learning_rate": 3.011057463010252e-06, "loss": 32.6198, "step": 333600 }, { "epoch": 0.6739133069647741, "grad_norm": 68.26553344726562, "learning_rate": 3.0107372079191656e-06, "loss": 12.875, "step": 333610 }, { "epoch": 0.673933507597458, "grad_norm": 531.0414428710938, "learning_rate": 3.010416962523507e-06, "loss": 18.5981, "step": 333620 }, { "epoch": 0.6739537082301418, "grad_norm": 451.71649169921875, "learning_rate": 3.0100967268248326e-06, "loss": 11.6052, "step": 333630 }, { "epoch": 0.6739739088628256, "grad_norm": 403.9212341308594, "learning_rate": 3.009776500824706e-06, "loss": 8.2194, "step": 333640 }, { "epoch": 0.6739941094955094, "grad_norm": 181.1913299560547, "learning_rate": 3.009456284524688e-06, "loss": 14.8881, "step": 333650 }, { "epoch": 0.6740143101281932, "grad_norm": 387.13323974609375, "learning_rate": 3.0091360779263373e-06, "loss": 28.0891, "step": 333660 }, { "epoch": 0.6740345107608771, "grad_norm": 456.1517639160156, "learning_rate": 3.0088158810312163e-06, "loss": 15.3469, "step": 333670 }, { "epoch": 0.6740547113935609, "grad_norm": 475.6103820800781, "learning_rate": 3.0084956938408873e-06, "loss": 16.1338, "step": 333680 }, { "epoch": 0.6740749120262447, "grad_norm": 417.0465393066406, "learning_rate": 3.0081755163569066e-06, "loss": 21.4768, "step": 333690 }, { "epoch": 0.6740951126589285, "grad_norm": 322.91546630859375, "learning_rate": 3.007855348580837e-06, "loss": 25.1519, "step": 333700 }, { "epoch": 0.6741153132916123, "grad_norm": 215.58827209472656, "learning_rate": 3.0075351905142404e-06, "loss": 25.6934, "step": 333710 }, { "epoch": 0.6741355139242962, "grad_norm": 553.62060546875, "learning_rate": 3.0072150421586737e-06, "loss": 25.4559, "step": 333720 }, { "epoch": 0.67415571455698, "grad_norm": 398.96392822265625, "learning_rate": 3.0068949035156984e-06, "loss": 18.9469, "step": 333730 }, { "epoch": 0.6741759151896637, "grad_norm": 617.6716918945312, "learning_rate": 3.006574774586879e-06, "loss": 30.2767, "step": 333740 }, { "epoch": 0.6741961158223475, "grad_norm": 322.0965881347656, "learning_rate": 3.0062546553737692e-06, "loss": 8.5848, "step": 333750 }, { "epoch": 0.6742163164550313, "grad_norm": 267.24102783203125, "learning_rate": 3.0059345458779337e-06, "loss": 16.5096, "step": 333760 }, { "epoch": 0.6742365170877151, "grad_norm": 366.47442626953125, "learning_rate": 3.0056144461009313e-06, "loss": 11.5499, "step": 333770 }, { "epoch": 0.674256717720399, "grad_norm": 962.688232421875, "learning_rate": 3.005294356044323e-06, "loss": 21.765, "step": 333780 }, { "epoch": 0.6742769183530828, "grad_norm": 19.26727294921875, "learning_rate": 3.0049742757096666e-06, "loss": 13.6877, "step": 333790 }, { "epoch": 0.6742971189857666, "grad_norm": 64.33914184570312, "learning_rate": 3.004654205098524e-06, "loss": 18.1675, "step": 333800 }, { "epoch": 0.6743173196184504, "grad_norm": 382.41302490234375, "learning_rate": 3.004334144212456e-06, "loss": 14.0915, "step": 333810 }, { "epoch": 0.6743375202511342, "grad_norm": 450.1775207519531, "learning_rate": 3.0040140930530198e-06, "loss": 21.0209, "step": 333820 }, { "epoch": 0.6743577208838181, "grad_norm": 7.644445419311523, "learning_rate": 3.003694051621777e-06, "loss": 14.2, "step": 333830 }, { "epoch": 0.6743779215165019, "grad_norm": 235.771240234375, "learning_rate": 3.003374019920289e-06, "loss": 25.1988, "step": 333840 }, { "epoch": 0.6743981221491857, "grad_norm": 372.7322998046875, "learning_rate": 3.003053997950112e-06, "loss": 11.7135, "step": 333850 }, { "epoch": 0.6744183227818695, "grad_norm": 789.88330078125, "learning_rate": 3.002733985712808e-06, "loss": 17.725, "step": 333860 }, { "epoch": 0.6744385234145533, "grad_norm": 373.9104309082031, "learning_rate": 3.0024139832099374e-06, "loss": 13.7187, "step": 333870 }, { "epoch": 0.6744587240472372, "grad_norm": 294.066162109375, "learning_rate": 3.002093990443058e-06, "loss": 9.0585, "step": 333880 }, { "epoch": 0.674478924679921, "grad_norm": 163.41845703125, "learning_rate": 3.001774007413729e-06, "loss": 19.8921, "step": 333890 }, { "epoch": 0.6744991253126048, "grad_norm": 325.6176452636719, "learning_rate": 3.001454034123512e-06, "loss": 17.5673, "step": 333900 }, { "epoch": 0.6745193259452886, "grad_norm": 257.6412048339844, "learning_rate": 3.0011340705739665e-06, "loss": 23.79, "step": 333910 }, { "epoch": 0.6745395265779724, "grad_norm": 342.8741455078125, "learning_rate": 3.0008141167666505e-06, "loss": 17.5091, "step": 333920 }, { "epoch": 0.6745597272106563, "grad_norm": 215.3223876953125, "learning_rate": 3.0004941727031233e-06, "loss": 26.7791, "step": 333930 }, { "epoch": 0.6745799278433401, "grad_norm": 797.1070556640625, "learning_rate": 3.0001742383849464e-06, "loss": 29.5955, "step": 333940 }, { "epoch": 0.6746001284760239, "grad_norm": 149.93658447265625, "learning_rate": 2.9998543138136773e-06, "loss": 15.479, "step": 333950 }, { "epoch": 0.6746203291087077, "grad_norm": 138.8621826171875, "learning_rate": 2.9995343989908743e-06, "loss": 19.0752, "step": 333960 }, { "epoch": 0.6746405297413915, "grad_norm": 311.9769287109375, "learning_rate": 2.9992144939181007e-06, "loss": 13.6434, "step": 333970 }, { "epoch": 0.6746607303740754, "grad_norm": 322.4996643066406, "learning_rate": 2.99889459859691e-06, "loss": 12.097, "step": 333980 }, { "epoch": 0.6746809310067591, "grad_norm": 152.3280792236328, "learning_rate": 2.9985747130288657e-06, "loss": 20.3478, "step": 333990 }, { "epoch": 0.6747011316394429, "grad_norm": 326.5318298339844, "learning_rate": 2.9982548372155264e-06, "loss": 20.6573, "step": 334000 }, { "epoch": 0.6747213322721267, "grad_norm": 543.2077026367188, "learning_rate": 2.9979349711584494e-06, "loss": 18.7329, "step": 334010 }, { "epoch": 0.6747415329048105, "grad_norm": 370.9176025390625, "learning_rate": 2.9976151148591937e-06, "loss": 30.0699, "step": 334020 }, { "epoch": 0.6747617335374944, "grad_norm": 654.6058349609375, "learning_rate": 2.9972952683193207e-06, "loss": 33.1942, "step": 334030 }, { "epoch": 0.6747819341701782, "grad_norm": 898.57958984375, "learning_rate": 2.9969754315403865e-06, "loss": 23.4372, "step": 334040 }, { "epoch": 0.674802134802862, "grad_norm": 494.323486328125, "learning_rate": 2.9966556045239504e-06, "loss": 15.1364, "step": 334050 }, { "epoch": 0.6748223354355458, "grad_norm": 587.9016723632812, "learning_rate": 2.9963357872715727e-06, "loss": 14.8658, "step": 334060 }, { "epoch": 0.6748425360682296, "grad_norm": 479.2344665527344, "learning_rate": 2.9960159797848123e-06, "loss": 24.7396, "step": 334070 }, { "epoch": 0.6748627367009135, "grad_norm": 62.46092224121094, "learning_rate": 2.9956961820652265e-06, "loss": 34.0494, "step": 334080 }, { "epoch": 0.6748829373335973, "grad_norm": 451.027587890625, "learning_rate": 2.995376394114374e-06, "loss": 11.8718, "step": 334090 }, { "epoch": 0.6749031379662811, "grad_norm": 221.69540405273438, "learning_rate": 2.9950566159338146e-06, "loss": 7.3101, "step": 334100 }, { "epoch": 0.6749233385989649, "grad_norm": 158.60073852539062, "learning_rate": 2.9947368475251048e-06, "loss": 13.518, "step": 334110 }, { "epoch": 0.6749435392316487, "grad_norm": 274.83953857421875, "learning_rate": 2.9944170888898037e-06, "loss": 8.8389, "step": 334120 }, { "epoch": 0.6749637398643326, "grad_norm": 208.35015869140625, "learning_rate": 2.994097340029474e-06, "loss": 27.7086, "step": 334130 }, { "epoch": 0.6749839404970164, "grad_norm": 759.3289184570312, "learning_rate": 2.9937776009456675e-06, "loss": 12.1259, "step": 334140 }, { "epoch": 0.6750041411297002, "grad_norm": 466.6158447265625, "learning_rate": 2.9934578716399465e-06, "loss": 22.5321, "step": 334150 }, { "epoch": 0.675024341762384, "grad_norm": 718.4124755859375, "learning_rate": 2.9931381521138693e-06, "loss": 19.5304, "step": 334160 }, { "epoch": 0.6750445423950678, "grad_norm": 850.4971923828125, "learning_rate": 2.9928184423689923e-06, "loss": 29.6138, "step": 334170 }, { "epoch": 0.6750647430277517, "grad_norm": 571.2877197265625, "learning_rate": 2.992498742406875e-06, "loss": 30.8835, "step": 334180 }, { "epoch": 0.6750849436604355, "grad_norm": 585.3817749023438, "learning_rate": 2.992179052229076e-06, "loss": 15.052, "step": 334190 }, { "epoch": 0.6751051442931193, "grad_norm": 5.335434913635254, "learning_rate": 2.991859371837151e-06, "loss": 15.5326, "step": 334200 }, { "epoch": 0.6751253449258031, "grad_norm": 334.8423767089844, "learning_rate": 2.991539701232661e-06, "loss": 12.7112, "step": 334210 }, { "epoch": 0.6751455455584869, "grad_norm": 74.74103546142578, "learning_rate": 2.991220040417162e-06, "loss": 26.1394, "step": 334220 }, { "epoch": 0.6751657461911708, "grad_norm": 542.1575927734375, "learning_rate": 2.990900389392215e-06, "loss": 26.1725, "step": 334230 }, { "epoch": 0.6751859468238546, "grad_norm": 204.94012451171875, "learning_rate": 2.9905807481593746e-06, "loss": 8.5823, "step": 334240 }, { "epoch": 0.6752061474565383, "grad_norm": 363.3467102050781, "learning_rate": 2.9902611167202e-06, "loss": 29.9696, "step": 334250 }, { "epoch": 0.6752263480892221, "grad_norm": 153.37847900390625, "learning_rate": 2.9899414950762497e-06, "loss": 15.0252, "step": 334260 }, { "epoch": 0.6752465487219059, "grad_norm": 837.5203247070312, "learning_rate": 2.9896218832290784e-06, "loss": 22.4676, "step": 334270 }, { "epoch": 0.6752667493545897, "grad_norm": 323.2222900390625, "learning_rate": 2.9893022811802474e-06, "loss": 14.4997, "step": 334280 }, { "epoch": 0.6752869499872736, "grad_norm": 14.713966369628906, "learning_rate": 2.9889826889313144e-06, "loss": 13.3263, "step": 334290 }, { "epoch": 0.6753071506199574, "grad_norm": 547.5470581054688, "learning_rate": 2.9886631064838355e-06, "loss": 16.369, "step": 334300 }, { "epoch": 0.6753273512526412, "grad_norm": 391.14898681640625, "learning_rate": 2.9883435338393674e-06, "loss": 9.4427, "step": 334310 }, { "epoch": 0.675347551885325, "grad_norm": 1.5440407991409302, "learning_rate": 2.988023970999471e-06, "loss": 10.4, "step": 334320 }, { "epoch": 0.6753677525180088, "grad_norm": 823.4159545898438, "learning_rate": 2.9877044179657e-06, "loss": 23.137, "step": 334330 }, { "epoch": 0.6753879531506927, "grad_norm": 299.2149963378906, "learning_rate": 2.9873848747396135e-06, "loss": 13.556, "step": 334340 }, { "epoch": 0.6754081537833765, "grad_norm": 921.8958129882812, "learning_rate": 2.9870653413227692e-06, "loss": 28.5156, "step": 334350 }, { "epoch": 0.6754283544160603, "grad_norm": 169.8895263671875, "learning_rate": 2.986745817716725e-06, "loss": 18.187, "step": 334360 }, { "epoch": 0.6754485550487441, "grad_norm": 261.49896240234375, "learning_rate": 2.9864263039230378e-06, "loss": 15.3056, "step": 334370 }, { "epoch": 0.675468755681428, "grad_norm": 322.1488037109375, "learning_rate": 2.9861067999432634e-06, "loss": 20.718, "step": 334380 }, { "epoch": 0.6754889563141118, "grad_norm": 534.993408203125, "learning_rate": 2.9857873057789623e-06, "loss": 31.4703, "step": 334390 }, { "epoch": 0.6755091569467956, "grad_norm": 580.8644409179688, "learning_rate": 2.9854678214316875e-06, "loss": 27.6865, "step": 334400 }, { "epoch": 0.6755293575794794, "grad_norm": 266.0247802734375, "learning_rate": 2.9851483469029975e-06, "loss": 24.7877, "step": 334410 }, { "epoch": 0.6755495582121632, "grad_norm": 396.6748962402344, "learning_rate": 2.984828882194453e-06, "loss": 14.4112, "step": 334420 }, { "epoch": 0.675569758844847, "grad_norm": 250.25758361816406, "learning_rate": 2.984509427307606e-06, "loss": 16.3577, "step": 334430 }, { "epoch": 0.6755899594775309, "grad_norm": 340.82952880859375, "learning_rate": 2.984189982244016e-06, "loss": 27.7192, "step": 334440 }, { "epoch": 0.6756101601102147, "grad_norm": 727.5569458007812, "learning_rate": 2.9838705470052397e-06, "loss": 19.2347, "step": 334450 }, { "epoch": 0.6756303607428985, "grad_norm": 293.1268005371094, "learning_rate": 2.983551121592834e-06, "loss": 12.2812, "step": 334460 }, { "epoch": 0.6756505613755823, "grad_norm": 374.130126953125, "learning_rate": 2.983231706008355e-06, "loss": 17.5481, "step": 334470 }, { "epoch": 0.6756707620082661, "grad_norm": 0.0, "learning_rate": 2.982912300253361e-06, "loss": 29.4153, "step": 334480 }, { "epoch": 0.67569096264095, "grad_norm": 302.3288879394531, "learning_rate": 2.982592904329407e-06, "loss": 9.5432, "step": 334490 }, { "epoch": 0.6757111632736337, "grad_norm": 296.753173828125, "learning_rate": 2.98227351823805e-06, "loss": 15.1205, "step": 334500 }, { "epoch": 0.6757313639063175, "grad_norm": 527.780029296875, "learning_rate": 2.981954141980847e-06, "loss": 28.0974, "step": 334510 }, { "epoch": 0.6757515645390013, "grad_norm": 252.00889587402344, "learning_rate": 2.981634775559357e-06, "loss": 20.3891, "step": 334520 }, { "epoch": 0.6757717651716851, "grad_norm": 396.3777160644531, "learning_rate": 2.9813154189751327e-06, "loss": 31.2829, "step": 334530 }, { "epoch": 0.675791965804369, "grad_norm": 132.64138793945312, "learning_rate": 2.980996072229732e-06, "loss": 21.0928, "step": 334540 }, { "epoch": 0.6758121664370528, "grad_norm": 207.51771545410156, "learning_rate": 2.9806767353247127e-06, "loss": 7.8165, "step": 334550 }, { "epoch": 0.6758323670697366, "grad_norm": 536.8202514648438, "learning_rate": 2.9803574082616294e-06, "loss": 20.4465, "step": 334560 }, { "epoch": 0.6758525677024204, "grad_norm": 299.7032775878906, "learning_rate": 2.980038091042038e-06, "loss": 14.682, "step": 334570 }, { "epoch": 0.6758727683351042, "grad_norm": 499.4809875488281, "learning_rate": 2.979718783667499e-06, "loss": 21.5977, "step": 334580 }, { "epoch": 0.675892968967788, "grad_norm": 2.10003924369812, "learning_rate": 2.9793994861395625e-06, "loss": 16.5296, "step": 334590 }, { "epoch": 0.6759131696004719, "grad_norm": 505.8760681152344, "learning_rate": 2.9790801984597885e-06, "loss": 14.1168, "step": 334600 }, { "epoch": 0.6759333702331557, "grad_norm": 477.48895263671875, "learning_rate": 2.978760920629734e-06, "loss": 24.5461, "step": 334610 }, { "epoch": 0.6759535708658395, "grad_norm": 206.78285217285156, "learning_rate": 2.9784416526509525e-06, "loss": 14.6992, "step": 334620 }, { "epoch": 0.6759737714985233, "grad_norm": 14.603195190429688, "learning_rate": 2.978122394525001e-06, "loss": 12.9751, "step": 334630 }, { "epoch": 0.6759939721312072, "grad_norm": 582.718505859375, "learning_rate": 2.977803146253437e-06, "loss": 17.8561, "step": 334640 }, { "epoch": 0.676014172763891, "grad_norm": 163.27503967285156, "learning_rate": 2.977483907837814e-06, "loss": 21.0278, "step": 334650 }, { "epoch": 0.6760343733965748, "grad_norm": 166.07298278808594, "learning_rate": 2.977164679279688e-06, "loss": 10.4512, "step": 334660 }, { "epoch": 0.6760545740292586, "grad_norm": 526.6989135742188, "learning_rate": 2.9768454605806176e-06, "loss": 13.9076, "step": 334670 }, { "epoch": 0.6760747746619424, "grad_norm": 2.295748472213745, "learning_rate": 2.976526251742158e-06, "loss": 14.5048, "step": 334680 }, { "epoch": 0.6760949752946263, "grad_norm": 531.06005859375, "learning_rate": 2.9762070527658628e-06, "loss": 27.5859, "step": 334690 }, { "epoch": 0.6761151759273101, "grad_norm": 206.268798828125, "learning_rate": 2.9758878636532884e-06, "loss": 17.0999, "step": 334700 }, { "epoch": 0.6761353765599939, "grad_norm": 431.9757385253906, "learning_rate": 2.975568684405993e-06, "loss": 22.6525, "step": 334710 }, { "epoch": 0.6761555771926777, "grad_norm": 497.8822937011719, "learning_rate": 2.9752495150255284e-06, "loss": 13.6454, "step": 334720 }, { "epoch": 0.6761757778253615, "grad_norm": 309.1185302734375, "learning_rate": 2.9749303555134512e-06, "loss": 34.7585, "step": 334730 }, { "epoch": 0.6761959784580454, "grad_norm": 716.8196411132812, "learning_rate": 2.9746112058713218e-06, "loss": 20.6415, "step": 334740 }, { "epoch": 0.6762161790907292, "grad_norm": 132.9484100341797, "learning_rate": 2.974292066100688e-06, "loss": 24.0512, "step": 334750 }, { "epoch": 0.6762363797234129, "grad_norm": 89.66337585449219, "learning_rate": 2.97397293620311e-06, "loss": 17.3719, "step": 334760 }, { "epoch": 0.6762565803560967, "grad_norm": 380.46734619140625, "learning_rate": 2.9736538161801433e-06, "loss": 20.5944, "step": 334770 }, { "epoch": 0.6762767809887805, "grad_norm": 394.9644470214844, "learning_rate": 2.9733347060333408e-06, "loss": 17.5288, "step": 334780 }, { "epoch": 0.6762969816214643, "grad_norm": 367.57666015625, "learning_rate": 2.9730156057642595e-06, "loss": 24.0068, "step": 334790 }, { "epoch": 0.6763171822541482, "grad_norm": 576.5689086914062, "learning_rate": 2.972696515374455e-06, "loss": 17.9293, "step": 334800 }, { "epoch": 0.676337382886832, "grad_norm": 913.5191040039062, "learning_rate": 2.972377434865481e-06, "loss": 19.3679, "step": 334810 }, { "epoch": 0.6763575835195158, "grad_norm": 130.3188018798828, "learning_rate": 2.972058364238892e-06, "loss": 15.4976, "step": 334820 }, { "epoch": 0.6763777841521996, "grad_norm": 254.9954071044922, "learning_rate": 2.9717393034962468e-06, "loss": 18.6514, "step": 334830 }, { "epoch": 0.6763979847848834, "grad_norm": 431.535888671875, "learning_rate": 2.9714202526390985e-06, "loss": 12.6177, "step": 334840 }, { "epoch": 0.6764181854175673, "grad_norm": 138.59254455566406, "learning_rate": 2.9711012116690007e-06, "loss": 15.4331, "step": 334850 }, { "epoch": 0.6764383860502511, "grad_norm": 568.0218505859375, "learning_rate": 2.97078218058751e-06, "loss": 12.4271, "step": 334860 }, { "epoch": 0.6764585866829349, "grad_norm": 759.718505859375, "learning_rate": 2.9704631593961815e-06, "loss": 23.042, "step": 334870 }, { "epoch": 0.6764787873156187, "grad_norm": 398.9481506347656, "learning_rate": 2.9701441480965683e-06, "loss": 23.598, "step": 334880 }, { "epoch": 0.6764989879483025, "grad_norm": 500.3331298828125, "learning_rate": 2.969825146690225e-06, "loss": 41.1918, "step": 334890 }, { "epoch": 0.6765191885809864, "grad_norm": 552.3027954101562, "learning_rate": 2.969506155178711e-06, "loss": 23.5349, "step": 334900 }, { "epoch": 0.6765393892136702, "grad_norm": 531.1264038085938, "learning_rate": 2.9691871735635753e-06, "loss": 26.5298, "step": 334910 }, { "epoch": 0.676559589846354, "grad_norm": 625.7227172851562, "learning_rate": 2.9688682018463755e-06, "loss": 21.6557, "step": 334920 }, { "epoch": 0.6765797904790378, "grad_norm": 190.6686553955078, "learning_rate": 2.968549240028667e-06, "loss": 7.9868, "step": 334930 }, { "epoch": 0.6765999911117216, "grad_norm": 94.68265533447266, "learning_rate": 2.968230288112002e-06, "loss": 12.1203, "step": 334940 }, { "epoch": 0.6766201917444055, "grad_norm": 575.996826171875, "learning_rate": 2.9679113460979347e-06, "loss": 18.6427, "step": 334950 }, { "epoch": 0.6766403923770893, "grad_norm": 317.9371643066406, "learning_rate": 2.967592413988023e-06, "loss": 25.3584, "step": 334960 }, { "epoch": 0.6766605930097731, "grad_norm": 964.5819091796875, "learning_rate": 2.9672734917838198e-06, "loss": 28.1624, "step": 334970 }, { "epoch": 0.6766807936424569, "grad_norm": 7.9333271980285645, "learning_rate": 2.9669545794868777e-06, "loss": 7.6235, "step": 334980 }, { "epoch": 0.6767009942751407, "grad_norm": 258.66424560546875, "learning_rate": 2.9666356770987524e-06, "loss": 21.1746, "step": 334990 }, { "epoch": 0.6767211949078246, "grad_norm": 128.112548828125, "learning_rate": 2.966316784621e-06, "loss": 27.1505, "step": 335000 }, { "epoch": 0.6767413955405084, "grad_norm": 335.5589599609375, "learning_rate": 2.965997902055171e-06, "loss": 20.931, "step": 335010 }, { "epoch": 0.6767615961731921, "grad_norm": 706.6641845703125, "learning_rate": 2.9656790294028216e-06, "loss": 12.962, "step": 335020 }, { "epoch": 0.6767817968058759, "grad_norm": 462.24798583984375, "learning_rate": 2.965360166665508e-06, "loss": 11.251, "step": 335030 }, { "epoch": 0.6768019974385597, "grad_norm": 355.06365966796875, "learning_rate": 2.9650413138447797e-06, "loss": 23.4497, "step": 335040 }, { "epoch": 0.6768221980712436, "grad_norm": 175.37826538085938, "learning_rate": 2.964722470942194e-06, "loss": 16.3155, "step": 335050 }, { "epoch": 0.6768423987039274, "grad_norm": 677.8414916992188, "learning_rate": 2.964403637959305e-06, "loss": 22.8367, "step": 335060 }, { "epoch": 0.6768625993366112, "grad_norm": 404.0458068847656, "learning_rate": 2.9640848148976655e-06, "loss": 14.63, "step": 335070 }, { "epoch": 0.676882799969295, "grad_norm": 377.67852783203125, "learning_rate": 2.963766001758829e-06, "loss": 13.5425, "step": 335080 }, { "epoch": 0.6769030006019788, "grad_norm": 28.240121841430664, "learning_rate": 2.96344719854435e-06, "loss": 9.9352, "step": 335090 }, { "epoch": 0.6769232012346627, "grad_norm": 477.0787658691406, "learning_rate": 2.963128405255783e-06, "loss": 26.4698, "step": 335100 }, { "epoch": 0.6769434018673465, "grad_norm": 376.7519226074219, "learning_rate": 2.96280962189468e-06, "loss": 15.0515, "step": 335110 }, { "epoch": 0.6769636025000303, "grad_norm": 212.5742645263672, "learning_rate": 2.962490848462596e-06, "loss": 9.7001, "step": 335120 }, { "epoch": 0.6769838031327141, "grad_norm": 58.66630172729492, "learning_rate": 2.9621720849610857e-06, "loss": 14.2607, "step": 335130 }, { "epoch": 0.6770040037653979, "grad_norm": 307.52044677734375, "learning_rate": 2.961853331391701e-06, "loss": 40.5167, "step": 335140 }, { "epoch": 0.6770242043980818, "grad_norm": 314.05023193359375, "learning_rate": 2.9615345877559953e-06, "loss": 11.2318, "step": 335150 }, { "epoch": 0.6770444050307656, "grad_norm": 243.64166259765625, "learning_rate": 2.9612158540555245e-06, "loss": 18.5069, "step": 335160 }, { "epoch": 0.6770646056634494, "grad_norm": 141.25399780273438, "learning_rate": 2.9608971302918387e-06, "loss": 14.9054, "step": 335170 }, { "epoch": 0.6770848062961332, "grad_norm": 133.18682861328125, "learning_rate": 2.9605784164664925e-06, "loss": 14.4311, "step": 335180 }, { "epoch": 0.677105006928817, "grad_norm": 839.1710815429688, "learning_rate": 2.960259712581043e-06, "loss": 26.1139, "step": 335190 }, { "epoch": 0.6771252075615009, "grad_norm": 374.13555908203125, "learning_rate": 2.9599410186370363e-06, "loss": 27.7875, "step": 335200 }, { "epoch": 0.6771454081941847, "grad_norm": 558.8908081054688, "learning_rate": 2.959622334636031e-06, "loss": 19.4503, "step": 335210 }, { "epoch": 0.6771656088268685, "grad_norm": 369.95635986328125, "learning_rate": 2.95930366057958e-06, "loss": 22.0149, "step": 335220 }, { "epoch": 0.6771858094595523, "grad_norm": 542.2275390625, "learning_rate": 2.9589849964692352e-06, "loss": 13.5066, "step": 335230 }, { "epoch": 0.6772060100922361, "grad_norm": 284.5793762207031, "learning_rate": 2.9586663423065487e-06, "loss": 20.0974, "step": 335240 }, { "epoch": 0.67722621072492, "grad_norm": 231.64053344726562, "learning_rate": 2.9583476980930768e-06, "loss": 13.341, "step": 335250 }, { "epoch": 0.6772464113576038, "grad_norm": 559.0064086914062, "learning_rate": 2.9580290638303692e-06, "loss": 17.23, "step": 335260 }, { "epoch": 0.6772666119902875, "grad_norm": 447.82196044921875, "learning_rate": 2.9577104395199795e-06, "loss": 10.8653, "step": 335270 }, { "epoch": 0.6772868126229713, "grad_norm": 657.632080078125, "learning_rate": 2.9573918251634627e-06, "loss": 19.4192, "step": 335280 }, { "epoch": 0.6773070132556551, "grad_norm": 514.7632446289062, "learning_rate": 2.957073220762371e-06, "loss": 18.8193, "step": 335290 }, { "epoch": 0.6773272138883389, "grad_norm": 424.8777770996094, "learning_rate": 2.9567546263182554e-06, "loss": 18.3145, "step": 335300 }, { "epoch": 0.6773474145210228, "grad_norm": 528.4025268554688, "learning_rate": 2.9564360418326698e-06, "loss": 12.5365, "step": 335310 }, { "epoch": 0.6773676151537066, "grad_norm": 536.476318359375, "learning_rate": 2.956117467307169e-06, "loss": 17.0952, "step": 335320 }, { "epoch": 0.6773878157863904, "grad_norm": 578.4048461914062, "learning_rate": 2.955798902743302e-06, "loss": 21.9617, "step": 335330 }, { "epoch": 0.6774080164190742, "grad_norm": 379.8204650878906, "learning_rate": 2.9554803481426223e-06, "loss": 8.0091, "step": 335340 }, { "epoch": 0.677428217051758, "grad_norm": 532.469482421875, "learning_rate": 2.9551618035066863e-06, "loss": 21.908, "step": 335350 }, { "epoch": 0.6774484176844419, "grad_norm": 403.3153381347656, "learning_rate": 2.954843268837041e-06, "loss": 19.0119, "step": 335360 }, { "epoch": 0.6774686183171257, "grad_norm": 391.7265930175781, "learning_rate": 2.954524744135243e-06, "loss": 25.1481, "step": 335370 }, { "epoch": 0.6774888189498095, "grad_norm": 124.90596008300781, "learning_rate": 2.954206229402844e-06, "loss": 11.7571, "step": 335380 }, { "epoch": 0.6775090195824933, "grad_norm": 302.9950866699219, "learning_rate": 2.9538877246413943e-06, "loss": 20.4238, "step": 335390 }, { "epoch": 0.6775292202151771, "grad_norm": 154.38528442382812, "learning_rate": 2.9535692298524477e-06, "loss": 16.8624, "step": 335400 }, { "epoch": 0.677549420847861, "grad_norm": 773.2611694335938, "learning_rate": 2.953250745037556e-06, "loss": 15.0836, "step": 335410 }, { "epoch": 0.6775696214805448, "grad_norm": 301.2328186035156, "learning_rate": 2.9529322701982744e-06, "loss": 10.0662, "step": 335420 }, { "epoch": 0.6775898221132286, "grad_norm": 367.5018005371094, "learning_rate": 2.9526138053361496e-06, "loss": 14.9167, "step": 335430 }, { "epoch": 0.6776100227459124, "grad_norm": 108.8554916381836, "learning_rate": 2.952295350452738e-06, "loss": 27.2063, "step": 335440 }, { "epoch": 0.6776302233785962, "grad_norm": 523.1973266601562, "learning_rate": 2.9519769055495917e-06, "loss": 38.7709, "step": 335450 }, { "epoch": 0.6776504240112801, "grad_norm": 782.3070068359375, "learning_rate": 2.9516584706282604e-06, "loss": 27.4142, "step": 335460 }, { "epoch": 0.6776706246439639, "grad_norm": 285.48138427734375, "learning_rate": 2.9513400456902975e-06, "loss": 27.1429, "step": 335470 }, { "epoch": 0.6776908252766477, "grad_norm": 609.79541015625, "learning_rate": 2.951021630737255e-06, "loss": 25.0206, "step": 335480 }, { "epoch": 0.6777110259093315, "grad_norm": 224.08694458007812, "learning_rate": 2.950703225770684e-06, "loss": 17.2162, "step": 335490 }, { "epoch": 0.6777312265420153, "grad_norm": 264.4524230957031, "learning_rate": 2.9503848307921363e-06, "loss": 12.9259, "step": 335500 }, { "epoch": 0.6777514271746992, "grad_norm": 192.50445556640625, "learning_rate": 2.9500664458031656e-06, "loss": 17.8412, "step": 335510 }, { "epoch": 0.677771627807383, "grad_norm": 578.9146728515625, "learning_rate": 2.949748070805322e-06, "loss": 28.7587, "step": 335520 }, { "epoch": 0.6777918284400667, "grad_norm": 208.0790557861328, "learning_rate": 2.9494297058001575e-06, "loss": 22.8426, "step": 335530 }, { "epoch": 0.6778120290727505, "grad_norm": 276.21063232421875, "learning_rate": 2.949111350789225e-06, "loss": 30.0007, "step": 335540 }, { "epoch": 0.6778322297054343, "grad_norm": 48.124542236328125, "learning_rate": 2.948793005774074e-06, "loss": 24.3713, "step": 335550 }, { "epoch": 0.6778524303381182, "grad_norm": 964.6512451171875, "learning_rate": 2.9484746707562573e-06, "loss": 21.5953, "step": 335560 }, { "epoch": 0.677872630970802, "grad_norm": 158.03167724609375, "learning_rate": 2.9481563457373247e-06, "loss": 9.1398, "step": 335570 }, { "epoch": 0.6778928316034858, "grad_norm": 239.17372131347656, "learning_rate": 2.9478380307188316e-06, "loss": 11.358, "step": 335580 }, { "epoch": 0.6779130322361696, "grad_norm": 148.4192657470703, "learning_rate": 2.947519725702326e-06, "loss": 17.2626, "step": 335590 }, { "epoch": 0.6779332328688534, "grad_norm": 20.930021286010742, "learning_rate": 2.9472014306893605e-06, "loss": 23.6575, "step": 335600 }, { "epoch": 0.6779534335015373, "grad_norm": 138.44613647460938, "learning_rate": 2.946883145681486e-06, "loss": 17.9701, "step": 335610 }, { "epoch": 0.6779736341342211, "grad_norm": 229.4267578125, "learning_rate": 2.946564870680255e-06, "loss": 26.3465, "step": 335620 }, { "epoch": 0.6779938347669049, "grad_norm": 698.5621337890625, "learning_rate": 2.946246605687215e-06, "loss": 18.6027, "step": 335630 }, { "epoch": 0.6780140353995887, "grad_norm": 289.07305908203125, "learning_rate": 2.945928350703924e-06, "loss": 19.505, "step": 335640 }, { "epoch": 0.6780342360322725, "grad_norm": 122.36067962646484, "learning_rate": 2.9456101057319266e-06, "loss": 15.033, "step": 335650 }, { "epoch": 0.6780544366649564, "grad_norm": 199.5050506591797, "learning_rate": 2.945291870772776e-06, "loss": 18.9662, "step": 335660 }, { "epoch": 0.6780746372976402, "grad_norm": 394.22943115234375, "learning_rate": 2.9449736458280253e-06, "loss": 25.678, "step": 335670 }, { "epoch": 0.678094837930324, "grad_norm": 533.0518188476562, "learning_rate": 2.9446554308992227e-06, "loss": 12.8919, "step": 335680 }, { "epoch": 0.6781150385630078, "grad_norm": 298.85821533203125, "learning_rate": 2.94433722598792e-06, "loss": 23.3194, "step": 335690 }, { "epoch": 0.6781352391956916, "grad_norm": 106.61241912841797, "learning_rate": 2.94401903109567e-06, "loss": 21.9341, "step": 335700 }, { "epoch": 0.6781554398283755, "grad_norm": 303.8421630859375, "learning_rate": 2.94370084622402e-06, "loss": 18.5334, "step": 335710 }, { "epoch": 0.6781756404610593, "grad_norm": 618.7250366210938, "learning_rate": 2.943382671374523e-06, "loss": 28.3782, "step": 335720 }, { "epoch": 0.6781958410937431, "grad_norm": 443.4075012207031, "learning_rate": 2.9430645065487296e-06, "loss": 11.1509, "step": 335730 }, { "epoch": 0.6782160417264269, "grad_norm": 48.277862548828125, "learning_rate": 2.9427463517481913e-06, "loss": 11.3507, "step": 335740 }, { "epoch": 0.6782362423591107, "grad_norm": 186.58279418945312, "learning_rate": 2.9424282069744564e-06, "loss": 8.9796, "step": 335750 }, { "epoch": 0.6782564429917946, "grad_norm": 268.7702331542969, "learning_rate": 2.9421100722290774e-06, "loss": 21.0618, "step": 335760 }, { "epoch": 0.6782766436244784, "grad_norm": 154.38316345214844, "learning_rate": 2.9417919475136053e-06, "loss": 13.6416, "step": 335770 }, { "epoch": 0.6782968442571621, "grad_norm": 215.0345916748047, "learning_rate": 2.9414738328295884e-06, "loss": 10.4715, "step": 335780 }, { "epoch": 0.6783170448898459, "grad_norm": 514.3206176757812, "learning_rate": 2.9411557281785772e-06, "loss": 13.2922, "step": 335790 }, { "epoch": 0.6783372455225297, "grad_norm": 338.4372253417969, "learning_rate": 2.940837633562127e-06, "loss": 19.6505, "step": 335800 }, { "epoch": 0.6783574461552135, "grad_norm": 795.0709228515625, "learning_rate": 2.9405195489817805e-06, "loss": 29.8077, "step": 335810 }, { "epoch": 0.6783776467878974, "grad_norm": 428.6066589355469, "learning_rate": 2.9402014744390937e-06, "loss": 20.2856, "step": 335820 }, { "epoch": 0.6783978474205812, "grad_norm": 1047.8885498046875, "learning_rate": 2.9398834099356155e-06, "loss": 21.924, "step": 335830 }, { "epoch": 0.678418048053265, "grad_norm": 366.1895751953125, "learning_rate": 2.9395653554728955e-06, "loss": 22.1305, "step": 335840 }, { "epoch": 0.6784382486859488, "grad_norm": 558.1474609375, "learning_rate": 2.9392473110524834e-06, "loss": 22.3471, "step": 335850 }, { "epoch": 0.6784584493186326, "grad_norm": 399.8312683105469, "learning_rate": 2.9389292766759313e-06, "loss": 22.832, "step": 335860 }, { "epoch": 0.6784786499513165, "grad_norm": 216.11851501464844, "learning_rate": 2.9386112523447863e-06, "loss": 10.0315, "step": 335870 }, { "epoch": 0.6784988505840003, "grad_norm": 330.2546691894531, "learning_rate": 2.9382932380606e-06, "loss": 19.3024, "step": 335880 }, { "epoch": 0.6785190512166841, "grad_norm": 329.8070373535156, "learning_rate": 2.9379752338249223e-06, "loss": 17.3057, "step": 335890 }, { "epoch": 0.6785392518493679, "grad_norm": 4.235108852386475, "learning_rate": 2.9376572396393047e-06, "loss": 17.1998, "step": 335900 }, { "epoch": 0.6785594524820517, "grad_norm": 467.51617431640625, "learning_rate": 2.937339255505295e-06, "loss": 7.4437, "step": 335910 }, { "epoch": 0.6785796531147356, "grad_norm": 695.2574462890625, "learning_rate": 2.9370212814244436e-06, "loss": 15.0593, "step": 335920 }, { "epoch": 0.6785998537474194, "grad_norm": 79.98819732666016, "learning_rate": 2.9367033173983006e-06, "loss": 21.2393, "step": 335930 }, { "epoch": 0.6786200543801032, "grad_norm": 419.0413818359375, "learning_rate": 2.9363853634284143e-06, "loss": 13.0665, "step": 335940 }, { "epoch": 0.678640255012787, "grad_norm": 0.0, "learning_rate": 2.9360674195163354e-06, "loss": 26.5614, "step": 335950 }, { "epoch": 0.6786604556454708, "grad_norm": 512.8369140625, "learning_rate": 2.935749485663616e-06, "loss": 33.0561, "step": 335960 }, { "epoch": 0.6786806562781547, "grad_norm": 297.6837158203125, "learning_rate": 2.9354315618718005e-06, "loss": 20.9158, "step": 335970 }, { "epoch": 0.6787008569108385, "grad_norm": 0.3239041268825531, "learning_rate": 2.9351136481424413e-06, "loss": 6.8588, "step": 335980 }, { "epoch": 0.6787210575435223, "grad_norm": 345.4874572753906, "learning_rate": 2.93479574447709e-06, "loss": 18.8582, "step": 335990 }, { "epoch": 0.6787412581762061, "grad_norm": 958.6956176757812, "learning_rate": 2.934477850877292e-06, "loss": 33.1137, "step": 336000 }, { "epoch": 0.6787614588088899, "grad_norm": 382.7071228027344, "learning_rate": 2.9341599673445988e-06, "loss": 25.8995, "step": 336010 }, { "epoch": 0.6787816594415738, "grad_norm": 475.3750305175781, "learning_rate": 2.933842093880558e-06, "loss": 34.536, "step": 336020 }, { "epoch": 0.6788018600742576, "grad_norm": 134.16317749023438, "learning_rate": 2.9335242304867233e-06, "loss": 18.3059, "step": 336030 }, { "epoch": 0.6788220607069413, "grad_norm": 291.14495849609375, "learning_rate": 2.933206377164638e-06, "loss": 10.2518, "step": 336040 }, { "epoch": 0.6788422613396251, "grad_norm": 263.8800048828125, "learning_rate": 2.9328885339158554e-06, "loss": 29.2639, "step": 336050 }, { "epoch": 0.6788624619723089, "grad_norm": 145.86029052734375, "learning_rate": 2.9325707007419235e-06, "loss": 15.119, "step": 336060 }, { "epoch": 0.6788826626049927, "grad_norm": 786.78369140625, "learning_rate": 2.9322528776443917e-06, "loss": 17.4758, "step": 336070 }, { "epoch": 0.6789028632376766, "grad_norm": 581.380126953125, "learning_rate": 2.9319350646248075e-06, "loss": 13.891, "step": 336080 }, { "epoch": 0.6789230638703604, "grad_norm": 144.01966857910156, "learning_rate": 2.931617261684722e-06, "loss": 14.0006, "step": 336090 }, { "epoch": 0.6789432645030442, "grad_norm": 309.3218994140625, "learning_rate": 2.931299468825682e-06, "loss": 17.6286, "step": 336100 }, { "epoch": 0.678963465135728, "grad_norm": 341.881591796875, "learning_rate": 2.930981686049237e-06, "loss": 30.1838, "step": 336110 }, { "epoch": 0.6789836657684118, "grad_norm": 43.649391174316406, "learning_rate": 2.9306639133569393e-06, "loss": 24.5629, "step": 336120 }, { "epoch": 0.6790038664010957, "grad_norm": 615.0656127929688, "learning_rate": 2.930346150750332e-06, "loss": 24.637, "step": 336130 }, { "epoch": 0.6790240670337795, "grad_norm": 281.3387145996094, "learning_rate": 2.930028398230966e-06, "loss": 16.123, "step": 336140 }, { "epoch": 0.6790442676664633, "grad_norm": 124.0038070678711, "learning_rate": 2.929710655800393e-06, "loss": 19.8334, "step": 336150 }, { "epoch": 0.6790644682991471, "grad_norm": 583.1646118164062, "learning_rate": 2.929392923460158e-06, "loss": 18.6541, "step": 336160 }, { "epoch": 0.679084668931831, "grad_norm": 407.7423095703125, "learning_rate": 2.9290752012118105e-06, "loss": 23.8656, "step": 336170 }, { "epoch": 0.6791048695645148, "grad_norm": 374.0619812011719, "learning_rate": 2.9287574890568982e-06, "loss": 19.12, "step": 336180 }, { "epoch": 0.6791250701971986, "grad_norm": 312.9776306152344, "learning_rate": 2.928439786996973e-06, "loss": 17.1294, "step": 336190 }, { "epoch": 0.6791452708298824, "grad_norm": 617.8344116210938, "learning_rate": 2.92812209503358e-06, "loss": 41.6107, "step": 336200 }, { "epoch": 0.6791654714625662, "grad_norm": 274.5924377441406, "learning_rate": 2.927804413168268e-06, "loss": 21.6986, "step": 336210 }, { "epoch": 0.67918567209525, "grad_norm": 133.44154357910156, "learning_rate": 2.9274867414025876e-06, "loss": 12.2877, "step": 336220 }, { "epoch": 0.6792058727279339, "grad_norm": 482.30206298828125, "learning_rate": 2.927169079738084e-06, "loss": 14.4421, "step": 336230 }, { "epoch": 0.6792260733606177, "grad_norm": 666.83447265625, "learning_rate": 2.9268514281763072e-06, "loss": 29.109, "step": 336240 }, { "epoch": 0.6792462739933015, "grad_norm": 437.9608459472656, "learning_rate": 2.926533786718806e-06, "loss": 20.7922, "step": 336250 }, { "epoch": 0.6792664746259853, "grad_norm": 578.3386840820312, "learning_rate": 2.926216155367126e-06, "loss": 13.6233, "step": 336260 }, { "epoch": 0.6792866752586691, "grad_norm": 294.40350341796875, "learning_rate": 2.9258985341228174e-06, "loss": 18.8183, "step": 336270 }, { "epoch": 0.679306875891353, "grad_norm": 184.0799560546875, "learning_rate": 2.9255809229874287e-06, "loss": 20.9594, "step": 336280 }, { "epoch": 0.6793270765240368, "grad_norm": 829.5468139648438, "learning_rate": 2.9252633219625073e-06, "loss": 17.8298, "step": 336290 }, { "epoch": 0.6793472771567205, "grad_norm": 905.2503051757812, "learning_rate": 2.9249457310495994e-06, "loss": 12.2561, "step": 336300 }, { "epoch": 0.6793674777894043, "grad_norm": 915.447265625, "learning_rate": 2.924628150250256e-06, "loss": 21.5375, "step": 336310 }, { "epoch": 0.6793876784220881, "grad_norm": 454.55279541015625, "learning_rate": 2.924310579566022e-06, "loss": 10.5602, "step": 336320 }, { "epoch": 0.679407879054772, "grad_norm": 173.49160766601562, "learning_rate": 2.9239930189984458e-06, "loss": 14.85, "step": 336330 }, { "epoch": 0.6794280796874558, "grad_norm": 319.1146240234375, "learning_rate": 2.9236754685490764e-06, "loss": 27.3328, "step": 336340 }, { "epoch": 0.6794482803201396, "grad_norm": 361.9736328125, "learning_rate": 2.9233579282194617e-06, "loss": 18.1259, "step": 336350 }, { "epoch": 0.6794684809528234, "grad_norm": 182.25999450683594, "learning_rate": 2.9230403980111482e-06, "loss": 16.7042, "step": 336360 }, { "epoch": 0.6794886815855072, "grad_norm": 581.5782470703125, "learning_rate": 2.922722877925683e-06, "loss": 17.7779, "step": 336370 }, { "epoch": 0.6795088822181911, "grad_norm": 244.02505493164062, "learning_rate": 2.922405367964617e-06, "loss": 19.2927, "step": 336380 }, { "epoch": 0.6795290828508749, "grad_norm": 319.10882568359375, "learning_rate": 2.9220878681294935e-06, "loss": 12.2844, "step": 336390 }, { "epoch": 0.6795492834835587, "grad_norm": 350.7702331542969, "learning_rate": 2.921770378421861e-06, "loss": 16.9566, "step": 336400 }, { "epoch": 0.6795694841162425, "grad_norm": 351.8890686035156, "learning_rate": 2.92145289884327e-06, "loss": 22.1177, "step": 336410 }, { "epoch": 0.6795896847489263, "grad_norm": 329.609619140625, "learning_rate": 2.9211354293952632e-06, "loss": 23.3981, "step": 336420 }, { "epoch": 0.6796098853816102, "grad_norm": 623.2437744140625, "learning_rate": 2.9208179700793905e-06, "loss": 29.25, "step": 336430 }, { "epoch": 0.679630086014294, "grad_norm": 357.9377136230469, "learning_rate": 2.9205005208972e-06, "loss": 12.7148, "step": 336440 }, { "epoch": 0.6796502866469778, "grad_norm": 281.5408020019531, "learning_rate": 2.920183081850237e-06, "loss": 17.607, "step": 336450 }, { "epoch": 0.6796704872796616, "grad_norm": 381.7162780761719, "learning_rate": 2.919865652940049e-06, "loss": 11.5911, "step": 336460 }, { "epoch": 0.6796906879123454, "grad_norm": 508.3994445800781, "learning_rate": 2.919548234168183e-06, "loss": 15.7111, "step": 336470 }, { "epoch": 0.6797108885450293, "grad_norm": 388.01800537109375, "learning_rate": 2.9192308255361895e-06, "loss": 16.966, "step": 336480 }, { "epoch": 0.6797310891777131, "grad_norm": 59.19709396362305, "learning_rate": 2.918913427045609e-06, "loss": 18.7259, "step": 336490 }, { "epoch": 0.6797512898103969, "grad_norm": 849.0178833007812, "learning_rate": 2.918596038697995e-06, "loss": 26.1783, "step": 336500 }, { "epoch": 0.6797714904430807, "grad_norm": 523.110107421875, "learning_rate": 2.918278660494891e-06, "loss": 11.7323, "step": 336510 }, { "epoch": 0.6797916910757645, "grad_norm": 440.6187438964844, "learning_rate": 2.917961292437842e-06, "loss": 19.7034, "step": 336520 }, { "epoch": 0.6798118917084484, "grad_norm": 446.0595397949219, "learning_rate": 2.917643934528398e-06, "loss": 27.6702, "step": 336530 }, { "epoch": 0.6798320923411322, "grad_norm": 439.68487548828125, "learning_rate": 2.917326586768106e-06, "loss": 20.364, "step": 336540 }, { "epoch": 0.6798522929738159, "grad_norm": 367.6255187988281, "learning_rate": 2.9170092491585122e-06, "loss": 19.888, "step": 336550 }, { "epoch": 0.6798724936064997, "grad_norm": 139.78103637695312, "learning_rate": 2.91669192170116e-06, "loss": 10.5209, "step": 336560 }, { "epoch": 0.6798926942391835, "grad_norm": 337.6980285644531, "learning_rate": 2.9163746043976014e-06, "loss": 11.5866, "step": 336570 }, { "epoch": 0.6799128948718673, "grad_norm": 354.5260314941406, "learning_rate": 2.91605729724938e-06, "loss": 18.4546, "step": 336580 }, { "epoch": 0.6799330955045512, "grad_norm": 57.688995361328125, "learning_rate": 2.9157400002580407e-06, "loss": 17.4047, "step": 336590 }, { "epoch": 0.679953296137235, "grad_norm": 344.17626953125, "learning_rate": 2.915422713425134e-06, "loss": 20.1476, "step": 336600 }, { "epoch": 0.6799734967699188, "grad_norm": 268.698486328125, "learning_rate": 2.9151054367522013e-06, "loss": 13.0335, "step": 336610 }, { "epoch": 0.6799936974026026, "grad_norm": 83.21357727050781, "learning_rate": 2.914788170240795e-06, "loss": 13.718, "step": 336620 }, { "epoch": 0.6800138980352864, "grad_norm": 331.0199279785156, "learning_rate": 2.9144709138924556e-06, "loss": 17.2042, "step": 336630 }, { "epoch": 0.6800340986679703, "grad_norm": 306.1650390625, "learning_rate": 2.9141536677087346e-06, "loss": 15.1955, "step": 336640 }, { "epoch": 0.6800542993006541, "grad_norm": 406.2598876953125, "learning_rate": 2.9138364316911747e-06, "loss": 12.8523, "step": 336650 }, { "epoch": 0.6800744999333379, "grad_norm": 230.9142608642578, "learning_rate": 2.9135192058413212e-06, "loss": 44.1466, "step": 336660 }, { "epoch": 0.6800947005660217, "grad_norm": 258.8046569824219, "learning_rate": 2.9132019901607246e-06, "loss": 21.7546, "step": 336670 }, { "epoch": 0.6801149011987055, "grad_norm": 289.5413818359375, "learning_rate": 2.912884784650926e-06, "loss": 26.8708, "step": 336680 }, { "epoch": 0.6801351018313894, "grad_norm": 1108.12158203125, "learning_rate": 2.9125675893134762e-06, "loss": 31.604, "step": 336690 }, { "epoch": 0.6801553024640732, "grad_norm": 339.9401550292969, "learning_rate": 2.912250404149918e-06, "loss": 33.6846, "step": 336700 }, { "epoch": 0.680175503096757, "grad_norm": 388.1901550292969, "learning_rate": 2.9119332291617974e-06, "loss": 21.4534, "step": 336710 }, { "epoch": 0.6801957037294408, "grad_norm": 454.15185546875, "learning_rate": 2.911616064350662e-06, "loss": 26.7128, "step": 336720 }, { "epoch": 0.6802159043621246, "grad_norm": 63.31000900268555, "learning_rate": 2.9112989097180567e-06, "loss": 20.6256, "step": 336730 }, { "epoch": 0.6802361049948085, "grad_norm": 1194.2406005859375, "learning_rate": 2.9109817652655253e-06, "loss": 18.5528, "step": 336740 }, { "epoch": 0.6802563056274923, "grad_norm": 487.3364562988281, "learning_rate": 2.910664630994615e-06, "loss": 20.9212, "step": 336750 }, { "epoch": 0.6802765062601761, "grad_norm": 126.3383560180664, "learning_rate": 2.9103475069068763e-06, "loss": 10.531, "step": 336760 }, { "epoch": 0.6802967068928599, "grad_norm": 56.48907470703125, "learning_rate": 2.910030393003847e-06, "loss": 19.3291, "step": 336770 }, { "epoch": 0.6803169075255437, "grad_norm": 390.70635986328125, "learning_rate": 2.9097132892870757e-06, "loss": 13.2997, "step": 336780 }, { "epoch": 0.6803371081582276, "grad_norm": 357.54412841796875, "learning_rate": 2.9093961957581096e-06, "loss": 17.5058, "step": 336790 }, { "epoch": 0.6803573087909114, "grad_norm": 347.0444641113281, "learning_rate": 2.9090791124184934e-06, "loss": 10.6631, "step": 336800 }, { "epoch": 0.6803775094235951, "grad_norm": 367.3725891113281, "learning_rate": 2.9087620392697703e-06, "loss": 9.7796, "step": 336810 }, { "epoch": 0.6803977100562789, "grad_norm": 298.2248840332031, "learning_rate": 2.908444976313487e-06, "loss": 19.3133, "step": 336820 }, { "epoch": 0.6804179106889627, "grad_norm": 271.5204772949219, "learning_rate": 2.908127923551194e-06, "loss": 12.7521, "step": 336830 }, { "epoch": 0.6804381113216466, "grad_norm": 1156.487548828125, "learning_rate": 2.9078108809844264e-06, "loss": 20.649, "step": 336840 }, { "epoch": 0.6804583119543304, "grad_norm": 220.82901000976562, "learning_rate": 2.9074938486147357e-06, "loss": 19.9492, "step": 336850 }, { "epoch": 0.6804785125870142, "grad_norm": 248.13047790527344, "learning_rate": 2.9071768264436685e-06, "loss": 12.7572, "step": 336860 }, { "epoch": 0.680498713219698, "grad_norm": 496.8698425292969, "learning_rate": 2.9068598144727666e-06, "loss": 19.8571, "step": 336870 }, { "epoch": 0.6805189138523818, "grad_norm": 543.551025390625, "learning_rate": 2.9065428127035743e-06, "loss": 32.1801, "step": 336880 }, { "epoch": 0.6805391144850657, "grad_norm": 411.4061279296875, "learning_rate": 2.9062258211376414e-06, "loss": 18.5994, "step": 336890 }, { "epoch": 0.6805593151177495, "grad_norm": 420.7814636230469, "learning_rate": 2.905908839776509e-06, "loss": 33.5048, "step": 336900 }, { "epoch": 0.6805795157504333, "grad_norm": 344.6456298828125, "learning_rate": 2.9055918686217212e-06, "loss": 12.5738, "step": 336910 }, { "epoch": 0.6805997163831171, "grad_norm": 507.3303527832031, "learning_rate": 2.9052749076748266e-06, "loss": 34.5787, "step": 336920 }, { "epoch": 0.6806199170158009, "grad_norm": 717.0743408203125, "learning_rate": 2.904957956937366e-06, "loss": 23.5352, "step": 336930 }, { "epoch": 0.6806401176484848, "grad_norm": 809.09912109375, "learning_rate": 2.9046410164108883e-06, "loss": 15.4761, "step": 336940 }, { "epoch": 0.6806603182811686, "grad_norm": 352.9372863769531, "learning_rate": 2.9043240860969342e-06, "loss": 20.2224, "step": 336950 }, { "epoch": 0.6806805189138524, "grad_norm": 312.1246032714844, "learning_rate": 2.904007165997052e-06, "loss": 5.6943, "step": 336960 }, { "epoch": 0.6807007195465362, "grad_norm": 636.2717895507812, "learning_rate": 2.903690256112785e-06, "loss": 16.9237, "step": 336970 }, { "epoch": 0.68072092017922, "grad_norm": 597.7210693359375, "learning_rate": 2.9033733564456756e-06, "loss": 13.5185, "step": 336980 }, { "epoch": 0.6807411208119039, "grad_norm": 290.738037109375, "learning_rate": 2.9030564669972717e-06, "loss": 8.1657, "step": 336990 }, { "epoch": 0.6807613214445877, "grad_norm": 300.6885986328125, "learning_rate": 2.9027395877691143e-06, "loss": 29.2064, "step": 337000 }, { "epoch": 0.6807815220772715, "grad_norm": 471.7787170410156, "learning_rate": 2.902422718762752e-06, "loss": 21.3975, "step": 337010 }, { "epoch": 0.6808017227099553, "grad_norm": 415.0528259277344, "learning_rate": 2.902105859979727e-06, "loss": 12.9827, "step": 337020 }, { "epoch": 0.6808219233426391, "grad_norm": 203.2621612548828, "learning_rate": 2.9017890114215814e-06, "loss": 13.1972, "step": 337030 }, { "epoch": 0.680842123975323, "grad_norm": 408.6101379394531, "learning_rate": 2.9014721730898637e-06, "loss": 20.3462, "step": 337040 }, { "epoch": 0.6808623246080068, "grad_norm": 1091.6729736328125, "learning_rate": 2.9011553449861163e-06, "loss": 22.4188, "step": 337050 }, { "epoch": 0.6808825252406905, "grad_norm": 163.2935028076172, "learning_rate": 2.900838527111881e-06, "loss": 22.3477, "step": 337060 }, { "epoch": 0.6809027258733743, "grad_norm": 378.9201354980469, "learning_rate": 2.900521719468704e-06, "loss": 19.6525, "step": 337070 }, { "epoch": 0.6809229265060581, "grad_norm": 116.05774688720703, "learning_rate": 2.900204922058132e-06, "loss": 13.6343, "step": 337080 }, { "epoch": 0.6809431271387419, "grad_norm": 703.281982421875, "learning_rate": 2.899888134881706e-06, "loss": 21.3625, "step": 337090 }, { "epoch": 0.6809633277714258, "grad_norm": 383.28643798828125, "learning_rate": 2.899571357940969e-06, "loss": 15.0211, "step": 337100 }, { "epoch": 0.6809835284041096, "grad_norm": 258.8240051269531, "learning_rate": 2.8992545912374683e-06, "loss": 22.9595, "step": 337110 }, { "epoch": 0.6810037290367934, "grad_norm": 443.8153381347656, "learning_rate": 2.8989378347727453e-06, "loss": 15.601, "step": 337120 }, { "epoch": 0.6810239296694772, "grad_norm": 243.36021423339844, "learning_rate": 2.8986210885483436e-06, "loss": 10.958, "step": 337130 }, { "epoch": 0.681044130302161, "grad_norm": 337.3719177246094, "learning_rate": 2.8983043525658065e-06, "loss": 26.0296, "step": 337140 }, { "epoch": 0.6810643309348449, "grad_norm": 334.7502136230469, "learning_rate": 2.897987626826682e-06, "loss": 17.04, "step": 337150 }, { "epoch": 0.6810845315675287, "grad_norm": 123.75239562988281, "learning_rate": 2.8976709113325107e-06, "loss": 19.8106, "step": 337160 }, { "epoch": 0.6811047322002125, "grad_norm": 188.6690673828125, "learning_rate": 2.8973542060848347e-06, "loss": 11.9762, "step": 337170 }, { "epoch": 0.6811249328328963, "grad_norm": 618.4735107421875, "learning_rate": 2.897037511085201e-06, "loss": 24.9842, "step": 337180 }, { "epoch": 0.6811451334655801, "grad_norm": 151.5784149169922, "learning_rate": 2.896720826335151e-06, "loss": 14.0799, "step": 337190 }, { "epoch": 0.681165334098264, "grad_norm": 710.8343505859375, "learning_rate": 2.896404151836227e-06, "loss": 23.4284, "step": 337200 }, { "epoch": 0.6811855347309478, "grad_norm": 135.69598388671875, "learning_rate": 2.896087487589976e-06, "loss": 21.608, "step": 337210 }, { "epoch": 0.6812057353636316, "grad_norm": 280.37451171875, "learning_rate": 2.8957708335979373e-06, "loss": 17.083, "step": 337220 }, { "epoch": 0.6812259359963154, "grad_norm": 575.2173461914062, "learning_rate": 2.8954541898616585e-06, "loss": 18.1872, "step": 337230 }, { "epoch": 0.6812461366289992, "grad_norm": 486.860107421875, "learning_rate": 2.895137556382679e-06, "loss": 16.7434, "step": 337240 }, { "epoch": 0.6812663372616831, "grad_norm": 495.7618103027344, "learning_rate": 2.8948209331625454e-06, "loss": 14.0619, "step": 337250 }, { "epoch": 0.6812865378943669, "grad_norm": 402.1931457519531, "learning_rate": 2.8945043202027987e-06, "loss": 25.143, "step": 337260 }, { "epoch": 0.6813067385270507, "grad_norm": 254.796875, "learning_rate": 2.8941877175049815e-06, "loss": 20.1072, "step": 337270 }, { "epoch": 0.6813269391597345, "grad_norm": 354.0314025878906, "learning_rate": 2.8938711250706397e-06, "loss": 14.5098, "step": 337280 }, { "epoch": 0.6813471397924183, "grad_norm": 353.9878234863281, "learning_rate": 2.8935545429013123e-06, "loss": 27.2135, "step": 337290 }, { "epoch": 0.6813673404251022, "grad_norm": 344.0814208984375, "learning_rate": 2.893237970998547e-06, "loss": 11.1809, "step": 337300 }, { "epoch": 0.681387541057786, "grad_norm": 241.19207763671875, "learning_rate": 2.892921409363884e-06, "loss": 23.2927, "step": 337310 }, { "epoch": 0.6814077416904697, "grad_norm": 337.1117248535156, "learning_rate": 2.8926048579988647e-06, "loss": 18.7983, "step": 337320 }, { "epoch": 0.6814279423231535, "grad_norm": 120.28765869140625, "learning_rate": 2.8922883169050354e-06, "loss": 15.756, "step": 337330 }, { "epoch": 0.6814481429558373, "grad_norm": 196.0370635986328, "learning_rate": 2.891971786083938e-06, "loss": 19.3709, "step": 337340 }, { "epoch": 0.6814683435885212, "grad_norm": 555.6265869140625, "learning_rate": 2.8916552655371117e-06, "loss": 25.1948, "step": 337350 }, { "epoch": 0.681488544221205, "grad_norm": 597.2166748046875, "learning_rate": 2.891338755266102e-06, "loss": 19.4583, "step": 337360 }, { "epoch": 0.6815087448538888, "grad_norm": 389.1532897949219, "learning_rate": 2.8910222552724552e-06, "loss": 16.0208, "step": 337370 }, { "epoch": 0.6815289454865726, "grad_norm": 138.45277404785156, "learning_rate": 2.8907057655577066e-06, "loss": 9.7212, "step": 337380 }, { "epoch": 0.6815491461192564, "grad_norm": 447.41461181640625, "learning_rate": 2.8903892861234023e-06, "loss": 16.6015, "step": 337390 }, { "epoch": 0.6815693467519403, "grad_norm": 268.70367431640625, "learning_rate": 2.8900728169710866e-06, "loss": 7.0208, "step": 337400 }, { "epoch": 0.6815895473846241, "grad_norm": 3082.040283203125, "learning_rate": 2.8897563581023e-06, "loss": 50.2535, "step": 337410 }, { "epoch": 0.6816097480173079, "grad_norm": 273.8852844238281, "learning_rate": 2.8894399095185833e-06, "loss": 19.967, "step": 337420 }, { "epoch": 0.6816299486499917, "grad_norm": 491.9859619140625, "learning_rate": 2.8891234712214798e-06, "loss": 40.1611, "step": 337430 }, { "epoch": 0.6816501492826755, "grad_norm": 494.9411315917969, "learning_rate": 2.888807043212537e-06, "loss": 17.7439, "step": 337440 }, { "epoch": 0.6816703499153594, "grad_norm": 145.7028350830078, "learning_rate": 2.888490625493289e-06, "loss": 10.0653, "step": 337450 }, { "epoch": 0.6816905505480432, "grad_norm": 113.2907943725586, "learning_rate": 2.8881742180652813e-06, "loss": 13.1933, "step": 337460 }, { "epoch": 0.681710751180727, "grad_norm": 562.1471557617188, "learning_rate": 2.8878578209300576e-06, "loss": 20.0976, "step": 337470 }, { "epoch": 0.6817309518134108, "grad_norm": 302.1080017089844, "learning_rate": 2.8875414340891596e-06, "loss": 17.5522, "step": 337480 }, { "epoch": 0.6817511524460946, "grad_norm": 157.92369079589844, "learning_rate": 2.887225057544126e-06, "loss": 21.4645, "step": 337490 }, { "epoch": 0.6817713530787785, "grad_norm": 583.1824340820312, "learning_rate": 2.886908691296504e-06, "loss": 9.4382, "step": 337500 }, { "epoch": 0.6817915537114623, "grad_norm": 252.84439086914062, "learning_rate": 2.886592335347832e-06, "loss": 11.8952, "step": 337510 }, { "epoch": 0.6818117543441461, "grad_norm": 287.48907470703125, "learning_rate": 2.8862759896996507e-06, "loss": 22.0179, "step": 337520 }, { "epoch": 0.6818319549768299, "grad_norm": 356.02166748046875, "learning_rate": 2.885959654353504e-06, "loss": 12.4505, "step": 337530 }, { "epoch": 0.6818521556095137, "grad_norm": 267.37646484375, "learning_rate": 2.8856433293109355e-06, "loss": 21.0376, "step": 337540 }, { "epoch": 0.6818723562421976, "grad_norm": 398.0299987792969, "learning_rate": 2.8853270145734846e-06, "loss": 20.8949, "step": 337550 }, { "epoch": 0.6818925568748814, "grad_norm": 0.0, "learning_rate": 2.8850107101426916e-06, "loss": 16.1071, "step": 337560 }, { "epoch": 0.6819127575075651, "grad_norm": 374.1020202636719, "learning_rate": 2.884694416020102e-06, "loss": 23.6504, "step": 337570 }, { "epoch": 0.6819329581402489, "grad_norm": 486.46051025390625, "learning_rate": 2.884378132207255e-06, "loss": 16.6881, "step": 337580 }, { "epoch": 0.6819531587729327, "grad_norm": 347.15631103515625, "learning_rate": 2.884061858705691e-06, "loss": 12.7797, "step": 337590 }, { "epoch": 0.6819733594056165, "grad_norm": 276.30218505859375, "learning_rate": 2.8837455955169547e-06, "loss": 15.8591, "step": 337600 }, { "epoch": 0.6819935600383004, "grad_norm": 32.22821044921875, "learning_rate": 2.883429342642583e-06, "loss": 22.1051, "step": 337610 }, { "epoch": 0.6820137606709842, "grad_norm": 624.7095336914062, "learning_rate": 2.8831131000841227e-06, "loss": 19.8468, "step": 337620 }, { "epoch": 0.682033961303668, "grad_norm": 66.20378112792969, "learning_rate": 2.882796867843112e-06, "loss": 19.7956, "step": 337630 }, { "epoch": 0.6820541619363518, "grad_norm": 572.2191772460938, "learning_rate": 2.8824806459210907e-06, "loss": 16.1606, "step": 337640 }, { "epoch": 0.6820743625690356, "grad_norm": 552.029296875, "learning_rate": 2.8821644343196042e-06, "loss": 19.6395, "step": 337650 }, { "epoch": 0.6820945632017195, "grad_norm": 209.0013427734375, "learning_rate": 2.881848233040191e-06, "loss": 27.2228, "step": 337660 }, { "epoch": 0.6821147638344033, "grad_norm": 323.4571838378906, "learning_rate": 2.8815320420843906e-06, "loss": 16.9253, "step": 337670 }, { "epoch": 0.6821349644670871, "grad_norm": 315.37335205078125, "learning_rate": 2.8812158614537465e-06, "loss": 18.8757, "step": 337680 }, { "epoch": 0.6821551650997709, "grad_norm": 407.3314208984375, "learning_rate": 2.8808996911498012e-06, "loss": 9.1621, "step": 337690 }, { "epoch": 0.6821753657324547, "grad_norm": 413.5022277832031, "learning_rate": 2.8805835311740933e-06, "loss": 15.6154, "step": 337700 }, { "epoch": 0.6821955663651386, "grad_norm": 843.661376953125, "learning_rate": 2.8802673815281622e-06, "loss": 11.72, "step": 337710 }, { "epoch": 0.6822157669978224, "grad_norm": 352.2972412109375, "learning_rate": 2.8799512422135534e-06, "loss": 16.2007, "step": 337720 }, { "epoch": 0.6822359676305062, "grad_norm": 353.974609375, "learning_rate": 2.8796351132318047e-06, "loss": 13.8269, "step": 337730 }, { "epoch": 0.68225616826319, "grad_norm": 717.4970703125, "learning_rate": 2.879318994584456e-06, "loss": 10.8968, "step": 337740 }, { "epoch": 0.6822763688958738, "grad_norm": 193.4995574951172, "learning_rate": 2.8790028862730487e-06, "loss": 18.4877, "step": 337750 }, { "epoch": 0.6822965695285577, "grad_norm": 472.2587585449219, "learning_rate": 2.8786867882991276e-06, "loss": 17.5904, "step": 337760 }, { "epoch": 0.6823167701612415, "grad_norm": 1694.71240234375, "learning_rate": 2.8783707006642266e-06, "loss": 18.691, "step": 337770 }, { "epoch": 0.6823369707939253, "grad_norm": 531.9982299804688, "learning_rate": 2.878054623369889e-06, "loss": 15.8425, "step": 337780 }, { "epoch": 0.6823571714266091, "grad_norm": 739.2018432617188, "learning_rate": 2.8777385564176585e-06, "loss": 10.0849, "step": 337790 }, { "epoch": 0.6823773720592929, "grad_norm": 247.94149780273438, "learning_rate": 2.877422499809072e-06, "loss": 20.2294, "step": 337800 }, { "epoch": 0.6823975726919768, "grad_norm": 390.6719665527344, "learning_rate": 2.8771064535456696e-06, "loss": 20.9425, "step": 337810 }, { "epoch": 0.6824177733246606, "grad_norm": 750.8737182617188, "learning_rate": 2.876790417628994e-06, "loss": 21.3811, "step": 337820 }, { "epoch": 0.6824379739573443, "grad_norm": 2.4040181636810303, "learning_rate": 2.8764743920605855e-06, "loss": 17.6284, "step": 337830 }, { "epoch": 0.6824581745900281, "grad_norm": 563.5733642578125, "learning_rate": 2.87615837684198e-06, "loss": 16.2708, "step": 337840 }, { "epoch": 0.6824783752227119, "grad_norm": 203.45425415039062, "learning_rate": 2.875842371974722e-06, "loss": 11.3526, "step": 337850 }, { "epoch": 0.6824985758553958, "grad_norm": 465.1794128417969, "learning_rate": 2.875526377460352e-06, "loss": 26.2164, "step": 337860 }, { "epoch": 0.6825187764880796, "grad_norm": 666.3756103515625, "learning_rate": 2.8752103933004095e-06, "loss": 25.2052, "step": 337870 }, { "epoch": 0.6825389771207634, "grad_norm": 49.69120788574219, "learning_rate": 2.874894419496431e-06, "loss": 14.9313, "step": 337880 }, { "epoch": 0.6825591777534472, "grad_norm": 733.9598999023438, "learning_rate": 2.874578456049961e-06, "loss": 24.4403, "step": 337890 }, { "epoch": 0.682579378386131, "grad_norm": 180.44061279296875, "learning_rate": 2.874262502962537e-06, "loss": 17.7423, "step": 337900 }, { "epoch": 0.6825995790188149, "grad_norm": 483.427001953125, "learning_rate": 2.8739465602357014e-06, "loss": 8.3198, "step": 337910 }, { "epoch": 0.6826197796514987, "grad_norm": 579.8331909179688, "learning_rate": 2.8736306278709923e-06, "loss": 17.613, "step": 337920 }, { "epoch": 0.6826399802841825, "grad_norm": 685.6690063476562, "learning_rate": 2.8733147058699473e-06, "loss": 17.5377, "step": 337930 }, { "epoch": 0.6826601809168663, "grad_norm": 218.8058319091797, "learning_rate": 2.872998794234111e-06, "loss": 15.3337, "step": 337940 }, { "epoch": 0.6826803815495501, "grad_norm": 597.5891723632812, "learning_rate": 2.8726828929650206e-06, "loss": 19.9905, "step": 337950 }, { "epoch": 0.682700582182234, "grad_norm": 56.42866516113281, "learning_rate": 2.8723670020642137e-06, "loss": 23.3822, "step": 337960 }, { "epoch": 0.6827207828149178, "grad_norm": 324.05364990234375, "learning_rate": 2.8720511215332314e-06, "loss": 14.599, "step": 337970 }, { "epoch": 0.6827409834476016, "grad_norm": 641.1737060546875, "learning_rate": 2.871735251373616e-06, "loss": 18.2005, "step": 337980 }, { "epoch": 0.6827611840802854, "grad_norm": 360.8026123046875, "learning_rate": 2.871419391586906e-06, "loss": 17.879, "step": 337990 }, { "epoch": 0.6827813847129692, "grad_norm": 388.5684814453125, "learning_rate": 2.871103542174637e-06, "loss": 21.4161, "step": 338000 }, { "epoch": 0.682801585345653, "grad_norm": 278.2432861328125, "learning_rate": 2.8707877031383523e-06, "loss": 19.7573, "step": 338010 }, { "epoch": 0.6828217859783369, "grad_norm": 622.8558349609375, "learning_rate": 2.870471874479591e-06, "loss": 14.9239, "step": 338020 }, { "epoch": 0.6828419866110207, "grad_norm": 3363.966796875, "learning_rate": 2.870156056199889e-06, "loss": 28.2797, "step": 338030 }, { "epoch": 0.6828621872437045, "grad_norm": 472.21014404296875, "learning_rate": 2.8698402483007885e-06, "loss": 23.524, "step": 338040 }, { "epoch": 0.6828823878763883, "grad_norm": 165.0049285888672, "learning_rate": 2.8695244507838326e-06, "loss": 15.7984, "step": 338050 }, { "epoch": 0.6829025885090722, "grad_norm": 367.30523681640625, "learning_rate": 2.869208663650551e-06, "loss": 15.7757, "step": 338060 }, { "epoch": 0.682922789141756, "grad_norm": 333.3113098144531, "learning_rate": 2.868892886902489e-06, "loss": 17.5221, "step": 338070 }, { "epoch": 0.6829429897744398, "grad_norm": 265.7660217285156, "learning_rate": 2.8685771205411862e-06, "loss": 18.43, "step": 338080 }, { "epoch": 0.6829631904071235, "grad_norm": 525.880859375, "learning_rate": 2.86826136456818e-06, "loss": 25.6477, "step": 338090 }, { "epoch": 0.6829833910398073, "grad_norm": 595.0288696289062, "learning_rate": 2.8679456189850076e-06, "loss": 22.5259, "step": 338100 }, { "epoch": 0.6830035916724911, "grad_norm": 438.26385498046875, "learning_rate": 2.8676298837932116e-06, "loss": 27.3373, "step": 338110 }, { "epoch": 0.683023792305175, "grad_norm": 1112.549560546875, "learning_rate": 2.8673141589943285e-06, "loss": 17.6199, "step": 338120 }, { "epoch": 0.6830439929378588, "grad_norm": 112.30738830566406, "learning_rate": 2.866998444589896e-06, "loss": 31.6832, "step": 338130 }, { "epoch": 0.6830641935705426, "grad_norm": 30.14986228942871, "learning_rate": 2.8666827405814535e-06, "loss": 8.3946, "step": 338140 }, { "epoch": 0.6830843942032264, "grad_norm": 616.5403442382812, "learning_rate": 2.8663670469705434e-06, "loss": 19.5467, "step": 338150 }, { "epoch": 0.6831045948359102, "grad_norm": 219.76979064941406, "learning_rate": 2.8660513637587016e-06, "loss": 15.6856, "step": 338160 }, { "epoch": 0.6831247954685941, "grad_norm": 734.9285888671875, "learning_rate": 2.8657356909474644e-06, "loss": 16.7782, "step": 338170 }, { "epoch": 0.6831449961012779, "grad_norm": 312.7208251953125, "learning_rate": 2.865420028538375e-06, "loss": 13.9156, "step": 338180 }, { "epoch": 0.6831651967339617, "grad_norm": 221.04898071289062, "learning_rate": 2.865104376532969e-06, "loss": 21.6149, "step": 338190 }, { "epoch": 0.6831853973666455, "grad_norm": 483.8321838378906, "learning_rate": 2.864788734932783e-06, "loss": 15.3848, "step": 338200 }, { "epoch": 0.6832055979993293, "grad_norm": 142.70033264160156, "learning_rate": 2.8644731037393605e-06, "loss": 11.8688, "step": 338210 }, { "epoch": 0.6832257986320132, "grad_norm": 1207.791015625, "learning_rate": 2.864157482954234e-06, "loss": 20.3958, "step": 338220 }, { "epoch": 0.683245999264697, "grad_norm": 204.30404663085938, "learning_rate": 2.863841872578948e-06, "loss": 20.5889, "step": 338230 }, { "epoch": 0.6832661998973808, "grad_norm": 316.9858093261719, "learning_rate": 2.863526272615037e-06, "loss": 20.7871, "step": 338240 }, { "epoch": 0.6832864005300646, "grad_norm": 245.21316528320312, "learning_rate": 2.863210683064038e-06, "loss": 11.0048, "step": 338250 }, { "epoch": 0.6833066011627484, "grad_norm": 50.83566665649414, "learning_rate": 2.8628951039274933e-06, "loss": 46.2538, "step": 338260 }, { "epoch": 0.6833268017954323, "grad_norm": 620.8017578125, "learning_rate": 2.8625795352069385e-06, "loss": 27.8345, "step": 338270 }, { "epoch": 0.6833470024281161, "grad_norm": 153.79302978515625, "learning_rate": 2.8622639769039094e-06, "loss": 18.5572, "step": 338280 }, { "epoch": 0.6833672030607999, "grad_norm": 352.8744812011719, "learning_rate": 2.861948429019947e-06, "loss": 13.159, "step": 338290 }, { "epoch": 0.6833874036934837, "grad_norm": 20.52808952331543, "learning_rate": 2.8616328915565907e-06, "loss": 8.4701, "step": 338300 }, { "epoch": 0.6834076043261675, "grad_norm": 4.267901420593262, "learning_rate": 2.861317364515376e-06, "loss": 9.4055, "step": 338310 }, { "epoch": 0.6834278049588514, "grad_norm": 82.28601837158203, "learning_rate": 2.8610018478978387e-06, "loss": 18.105, "step": 338320 }, { "epoch": 0.6834480055915352, "grad_norm": 597.6419067382812, "learning_rate": 2.8606863417055216e-06, "loss": 13.2078, "step": 338330 }, { "epoch": 0.6834682062242189, "grad_norm": 309.8203430175781, "learning_rate": 2.8603708459399594e-06, "loss": 14.3366, "step": 338340 }, { "epoch": 0.6834884068569027, "grad_norm": 11.254910469055176, "learning_rate": 2.8600553606026883e-06, "loss": 21.2411, "step": 338350 }, { "epoch": 0.6835086074895865, "grad_norm": 401.820068359375, "learning_rate": 2.8597398856952473e-06, "loss": 11.7122, "step": 338360 }, { "epoch": 0.6835288081222703, "grad_norm": 119.74920654296875, "learning_rate": 2.8594244212191796e-06, "loss": 16.6536, "step": 338370 }, { "epoch": 0.6835490087549542, "grad_norm": 805.9678955078125, "learning_rate": 2.859108967176013e-06, "loss": 24.8198, "step": 338380 }, { "epoch": 0.683569209387638, "grad_norm": 453.3701477050781, "learning_rate": 2.8587935235672897e-06, "loss": 6.5504, "step": 338390 }, { "epoch": 0.6835894100203218, "grad_norm": 621.1905517578125, "learning_rate": 2.858478090394549e-06, "loss": 20.8854, "step": 338400 }, { "epoch": 0.6836096106530056, "grad_norm": 121.0240249633789, "learning_rate": 2.8581626676593264e-06, "loss": 16.6853, "step": 338410 }, { "epoch": 0.6836298112856894, "grad_norm": 326.71929931640625, "learning_rate": 2.8578472553631577e-06, "loss": 32.0411, "step": 338420 }, { "epoch": 0.6836500119183733, "grad_norm": 388.282958984375, "learning_rate": 2.857531853507583e-06, "loss": 19.2871, "step": 338430 }, { "epoch": 0.6836702125510571, "grad_norm": 32314.8671875, "learning_rate": 2.8572164620941385e-06, "loss": 47.4179, "step": 338440 }, { "epoch": 0.6836904131837409, "grad_norm": 489.69525146484375, "learning_rate": 2.856901081124359e-06, "loss": 17.1072, "step": 338450 }, { "epoch": 0.6837106138164247, "grad_norm": 166.05245971679688, "learning_rate": 2.856585710599784e-06, "loss": 20.7168, "step": 338460 }, { "epoch": 0.6837308144491085, "grad_norm": 85.07772064208984, "learning_rate": 2.8562703505219513e-06, "loss": 18.3139, "step": 338470 }, { "epoch": 0.6837510150817924, "grad_norm": 665.2428588867188, "learning_rate": 2.855955000892398e-06, "loss": 48.6721, "step": 338480 }, { "epoch": 0.6837712157144762, "grad_norm": 501.0298156738281, "learning_rate": 2.8556396617126575e-06, "loss": 12.1529, "step": 338490 }, { "epoch": 0.68379141634716, "grad_norm": 435.16632080078125, "learning_rate": 2.8553243329842715e-06, "loss": 28.8214, "step": 338500 }, { "epoch": 0.6838116169798438, "grad_norm": 436.9760437011719, "learning_rate": 2.855009014708774e-06, "loss": 17.9255, "step": 338510 }, { "epoch": 0.6838318176125276, "grad_norm": 133.7801971435547, "learning_rate": 2.8546937068877013e-06, "loss": 27.8965, "step": 338520 }, { "epoch": 0.6838520182452115, "grad_norm": 261.2592468261719, "learning_rate": 2.8543784095225923e-06, "loss": 14.6261, "step": 338530 }, { "epoch": 0.6838722188778953, "grad_norm": 300.118408203125, "learning_rate": 2.8540631226149813e-06, "loss": 24.2721, "step": 338540 }, { "epoch": 0.6838924195105791, "grad_norm": 1.5835719108581543, "learning_rate": 2.853747846166408e-06, "loss": 5.3972, "step": 338550 }, { "epoch": 0.6839126201432629, "grad_norm": 583.8345947265625, "learning_rate": 2.853432580178408e-06, "loss": 16.2432, "step": 338560 }, { "epoch": 0.6839328207759467, "grad_norm": 344.1213073730469, "learning_rate": 2.8531173246525144e-06, "loss": 22.5829, "step": 338570 }, { "epoch": 0.6839530214086306, "grad_norm": 1210.4813232421875, "learning_rate": 2.8528020795902672e-06, "loss": 13.6372, "step": 338580 }, { "epoch": 0.6839732220413144, "grad_norm": 337.1247253417969, "learning_rate": 2.852486844993204e-06, "loss": 23.2272, "step": 338590 }, { "epoch": 0.6839934226739981, "grad_norm": 511.3916015625, "learning_rate": 2.8521716208628597e-06, "loss": 15.6157, "step": 338600 }, { "epoch": 0.6840136233066819, "grad_norm": 309.7074279785156, "learning_rate": 2.851856407200768e-06, "loss": 13.0667, "step": 338610 }, { "epoch": 0.6840338239393657, "grad_norm": 821.8313598632812, "learning_rate": 2.85154120400847e-06, "loss": 21.2477, "step": 338620 }, { "epoch": 0.6840540245720496, "grad_norm": 121.84247589111328, "learning_rate": 2.8512260112874994e-06, "loss": 16.0241, "step": 338630 }, { "epoch": 0.6840742252047334, "grad_norm": 248.17120361328125, "learning_rate": 2.8509108290393907e-06, "loss": 21.9676, "step": 338640 }, { "epoch": 0.6840944258374172, "grad_norm": 249.0458526611328, "learning_rate": 2.850595657265682e-06, "loss": 7.1093, "step": 338650 }, { "epoch": 0.684114626470101, "grad_norm": 786.6953735351562, "learning_rate": 2.850280495967913e-06, "loss": 16.8887, "step": 338660 }, { "epoch": 0.6841348271027848, "grad_norm": 357.6971130371094, "learning_rate": 2.8499653451476126e-06, "loss": 19.3543, "step": 338670 }, { "epoch": 0.6841550277354687, "grad_norm": 285.8201599121094, "learning_rate": 2.84965020480632e-06, "loss": 17.6934, "step": 338680 }, { "epoch": 0.6841752283681525, "grad_norm": 246.18898010253906, "learning_rate": 2.8493350749455737e-06, "loss": 41.294, "step": 338690 }, { "epoch": 0.6841954290008363, "grad_norm": 84.6827392578125, "learning_rate": 2.849019955566908e-06, "loss": 25.6304, "step": 338700 }, { "epoch": 0.6842156296335201, "grad_norm": 522.9943237304688, "learning_rate": 2.848704846671856e-06, "loss": 20.3795, "step": 338710 }, { "epoch": 0.6842358302662039, "grad_norm": 348.3674011230469, "learning_rate": 2.8483897482619566e-06, "loss": 26.6569, "step": 338720 }, { "epoch": 0.6842560308988878, "grad_norm": 92.71199035644531, "learning_rate": 2.8480746603387453e-06, "loss": 17.377, "step": 338730 }, { "epoch": 0.6842762315315716, "grad_norm": 505.63134765625, "learning_rate": 2.847759582903755e-06, "loss": 17.9122, "step": 338740 }, { "epoch": 0.6842964321642554, "grad_norm": 366.8626708984375, "learning_rate": 2.8474445159585235e-06, "loss": 20.0451, "step": 338750 }, { "epoch": 0.6843166327969392, "grad_norm": 413.54095458984375, "learning_rate": 2.8471294595045886e-06, "loss": 11.1325, "step": 338760 }, { "epoch": 0.684336833429623, "grad_norm": 396.3618469238281, "learning_rate": 2.8468144135434827e-06, "loss": 28.3938, "step": 338770 }, { "epoch": 0.6843570340623069, "grad_norm": 243.66842651367188, "learning_rate": 2.8464993780767414e-06, "loss": 32.5644, "step": 338780 }, { "epoch": 0.6843772346949907, "grad_norm": 327.5538635253906, "learning_rate": 2.846184353105902e-06, "loss": 29.411, "step": 338790 }, { "epoch": 0.6843974353276745, "grad_norm": 263.5344543457031, "learning_rate": 2.8458693386325e-06, "loss": 11.6262, "step": 338800 }, { "epoch": 0.6844176359603583, "grad_norm": 166.4638671875, "learning_rate": 2.845554334658066e-06, "loss": 21.8029, "step": 338810 }, { "epoch": 0.6844378365930421, "grad_norm": 165.18563842773438, "learning_rate": 2.8452393411841418e-06, "loss": 36.7276, "step": 338820 }, { "epoch": 0.684458037225726, "grad_norm": 85.10484313964844, "learning_rate": 2.8449243582122577e-06, "loss": 18.2525, "step": 338830 }, { "epoch": 0.6844782378584098, "grad_norm": 367.1798400878906, "learning_rate": 2.8446093857439527e-06, "loss": 17.5455, "step": 338840 }, { "epoch": 0.6844984384910935, "grad_norm": 522.3867797851562, "learning_rate": 2.84429442378076e-06, "loss": 16.7963, "step": 338850 }, { "epoch": 0.6845186391237773, "grad_norm": 301.0872802734375, "learning_rate": 2.843979472324213e-06, "loss": 42.7276, "step": 338860 }, { "epoch": 0.6845388397564611, "grad_norm": 269.10028076171875, "learning_rate": 2.843664531375851e-06, "loss": 8.065, "step": 338870 }, { "epoch": 0.684559040389145, "grad_norm": 162.30120849609375, "learning_rate": 2.8433496009372063e-06, "loss": 11.763, "step": 338880 }, { "epoch": 0.6845792410218288, "grad_norm": 270.188232421875, "learning_rate": 2.843034681009812e-06, "loss": 17.6547, "step": 338890 }, { "epoch": 0.6845994416545126, "grad_norm": 168.6224365234375, "learning_rate": 2.8427197715952047e-06, "loss": 14.2912, "step": 338900 }, { "epoch": 0.6846196422871964, "grad_norm": 388.95849609375, "learning_rate": 2.8424048726949215e-06, "loss": 15.9734, "step": 338910 }, { "epoch": 0.6846398429198802, "grad_norm": 239.84727478027344, "learning_rate": 2.842089984310496e-06, "loss": 9.5158, "step": 338920 }, { "epoch": 0.684660043552564, "grad_norm": 414.3646240234375, "learning_rate": 2.8417751064434596e-06, "loss": 23.4782, "step": 338930 }, { "epoch": 0.6846802441852479, "grad_norm": 264.4274597167969, "learning_rate": 2.8414602390953515e-06, "loss": 13.3265, "step": 338940 }, { "epoch": 0.6847004448179317, "grad_norm": 351.666015625, "learning_rate": 2.841145382267705e-06, "loss": 14.7839, "step": 338950 }, { "epoch": 0.6847206454506155, "grad_norm": 805.705810546875, "learning_rate": 2.840830535962052e-06, "loss": 22.2572, "step": 338960 }, { "epoch": 0.6847408460832993, "grad_norm": 145.11767578125, "learning_rate": 2.840515700179929e-06, "loss": 16.8044, "step": 338970 }, { "epoch": 0.6847610467159831, "grad_norm": 343.5023498535156, "learning_rate": 2.8402008749228737e-06, "loss": 42.1589, "step": 338980 }, { "epoch": 0.684781247348667, "grad_norm": 309.49102783203125, "learning_rate": 2.8398860601924145e-06, "loss": 9.1604, "step": 338990 }, { "epoch": 0.6848014479813508, "grad_norm": 277.977783203125, "learning_rate": 2.839571255990088e-06, "loss": 9.6512, "step": 339000 }, { "epoch": 0.6848216486140346, "grad_norm": 319.27227783203125, "learning_rate": 2.8392564623174314e-06, "loss": 21.7686, "step": 339010 }, { "epoch": 0.6848418492467184, "grad_norm": 428.2383728027344, "learning_rate": 2.838941679175977e-06, "loss": 27.1311, "step": 339020 }, { "epoch": 0.6848620498794022, "grad_norm": 497.61212158203125, "learning_rate": 2.838626906567257e-06, "loss": 34.5446, "step": 339030 }, { "epoch": 0.6848822505120861, "grad_norm": 288.2490234375, "learning_rate": 2.8383121444928063e-06, "loss": 10.8883, "step": 339040 }, { "epoch": 0.6849024511447699, "grad_norm": 619.1780395507812, "learning_rate": 2.837997392954165e-06, "loss": 16.7764, "step": 339050 }, { "epoch": 0.6849226517774537, "grad_norm": 366.1655578613281, "learning_rate": 2.8376826519528572e-06, "loss": 13.2817, "step": 339060 }, { "epoch": 0.6849428524101375, "grad_norm": 352.8001708984375, "learning_rate": 2.8373679214904225e-06, "loss": 29.2796, "step": 339070 }, { "epoch": 0.6849630530428213, "grad_norm": 0.0, "learning_rate": 2.837053201568396e-06, "loss": 14.3356, "step": 339080 }, { "epoch": 0.6849832536755052, "grad_norm": 440.4238586425781, "learning_rate": 2.83673849218831e-06, "loss": 18.8858, "step": 339090 }, { "epoch": 0.685003454308189, "grad_norm": 380.555419921875, "learning_rate": 2.8364237933516964e-06, "loss": 12.4292, "step": 339100 }, { "epoch": 0.6850236549408727, "grad_norm": 613.6964111328125, "learning_rate": 2.836109105060093e-06, "loss": 18.6043, "step": 339110 }, { "epoch": 0.6850438555735565, "grad_norm": 151.81381225585938, "learning_rate": 2.8357944273150304e-06, "loss": 13.9199, "step": 339120 }, { "epoch": 0.6850640562062403, "grad_norm": 622.8221435546875, "learning_rate": 2.835479760118042e-06, "loss": 17.62, "step": 339130 }, { "epoch": 0.6850842568389242, "grad_norm": 392.4091491699219, "learning_rate": 2.835165103470665e-06, "loss": 13.5514, "step": 339140 }, { "epoch": 0.685104457471608, "grad_norm": 155.42665100097656, "learning_rate": 2.8348504573744283e-06, "loss": 10.0394, "step": 339150 }, { "epoch": 0.6851246581042918, "grad_norm": 434.490478515625, "learning_rate": 2.83453582183087e-06, "loss": 20.22, "step": 339160 }, { "epoch": 0.6851448587369756, "grad_norm": 56.256961822509766, "learning_rate": 2.834221196841521e-06, "loss": 15.547, "step": 339170 }, { "epoch": 0.6851650593696594, "grad_norm": 348.5422668457031, "learning_rate": 2.8339065824079137e-06, "loss": 13.5531, "step": 339180 }, { "epoch": 0.6851852600023433, "grad_norm": 3.973308563232422, "learning_rate": 2.8335919785315854e-06, "loss": 13.6842, "step": 339190 }, { "epoch": 0.6852054606350271, "grad_norm": 196.72682189941406, "learning_rate": 2.8332773852140644e-06, "loss": 12.7807, "step": 339200 }, { "epoch": 0.6852256612677109, "grad_norm": 295.85382080078125, "learning_rate": 2.832962802456889e-06, "loss": 12.823, "step": 339210 }, { "epoch": 0.6852458619003947, "grad_norm": 159.17343139648438, "learning_rate": 2.8326482302615875e-06, "loss": 18.5173, "step": 339220 }, { "epoch": 0.6852660625330785, "grad_norm": 367.822021484375, "learning_rate": 2.832333668629698e-06, "loss": 14.9841, "step": 339230 }, { "epoch": 0.6852862631657624, "grad_norm": 262.43560791015625, "learning_rate": 2.8320191175627517e-06, "loss": 14.2051, "step": 339240 }, { "epoch": 0.6853064637984462, "grad_norm": 571.4912109375, "learning_rate": 2.8317045770622784e-06, "loss": 24.2336, "step": 339250 }, { "epoch": 0.68532666443113, "grad_norm": 177.7554931640625, "learning_rate": 2.831390047129815e-06, "loss": 26.9193, "step": 339260 }, { "epoch": 0.6853468650638138, "grad_norm": 156.73809814453125, "learning_rate": 2.8310755277668966e-06, "loss": 24.9445, "step": 339270 }, { "epoch": 0.6853670656964976, "grad_norm": 141.30067443847656, "learning_rate": 2.8307610189750496e-06, "loss": 19.265, "step": 339280 }, { "epoch": 0.6853872663291815, "grad_norm": 416.14892578125, "learning_rate": 2.8304465207558103e-06, "loss": 19.1132, "step": 339290 }, { "epoch": 0.6854074669618653, "grad_norm": 470.0908203125, "learning_rate": 2.830132033110713e-06, "loss": 22.0419, "step": 339300 }, { "epoch": 0.6854276675945491, "grad_norm": 431.71142578125, "learning_rate": 2.829817556041289e-06, "loss": 20.6272, "step": 339310 }, { "epoch": 0.6854478682272329, "grad_norm": 682.9428100585938, "learning_rate": 2.82950308954907e-06, "loss": 23.6525, "step": 339320 }, { "epoch": 0.6854680688599167, "grad_norm": 514.5043334960938, "learning_rate": 2.829188633635591e-06, "loss": 14.5834, "step": 339330 }, { "epoch": 0.6854882694926006, "grad_norm": 441.3464050292969, "learning_rate": 2.828874188302383e-06, "loss": 22.2951, "step": 339340 }, { "epoch": 0.6855084701252844, "grad_norm": 321.1398620605469, "learning_rate": 2.8285597535509775e-06, "loss": 23.3013, "step": 339350 }, { "epoch": 0.6855286707579682, "grad_norm": 122.99885559082031, "learning_rate": 2.8282453293829083e-06, "loss": 11.8054, "step": 339360 }, { "epoch": 0.6855488713906519, "grad_norm": 1375.604736328125, "learning_rate": 2.8279309157997093e-06, "loss": 28.3806, "step": 339370 }, { "epoch": 0.6855690720233357, "grad_norm": 684.9808349609375, "learning_rate": 2.827616512802912e-06, "loss": 13.7723, "step": 339380 }, { "epoch": 0.6855892726560195, "grad_norm": 208.48329162597656, "learning_rate": 2.8273021203940466e-06, "loss": 22.7337, "step": 339390 }, { "epoch": 0.6856094732887034, "grad_norm": 413.0373840332031, "learning_rate": 2.826987738574649e-06, "loss": 14.3434, "step": 339400 }, { "epoch": 0.6856296739213872, "grad_norm": 276.425537109375, "learning_rate": 2.8266733673462497e-06, "loss": 15.7779, "step": 339410 }, { "epoch": 0.685649874554071, "grad_norm": 8.154792785644531, "learning_rate": 2.8263590067103785e-06, "loss": 23.8365, "step": 339420 }, { "epoch": 0.6856700751867548, "grad_norm": 189.44625854492188, "learning_rate": 2.8260446566685723e-06, "loss": 16.6812, "step": 339430 }, { "epoch": 0.6856902758194386, "grad_norm": 614.949951171875, "learning_rate": 2.825730317222358e-06, "loss": 17.2176, "step": 339440 }, { "epoch": 0.6857104764521225, "grad_norm": 486.2034606933594, "learning_rate": 2.8254159883732735e-06, "loss": 13.4057, "step": 339450 }, { "epoch": 0.6857306770848063, "grad_norm": 469.6888122558594, "learning_rate": 2.8251016701228475e-06, "loss": 16.3905, "step": 339460 }, { "epoch": 0.6857508777174901, "grad_norm": 247.13253784179688, "learning_rate": 2.82478736247261e-06, "loss": 17.5554, "step": 339470 }, { "epoch": 0.6857710783501739, "grad_norm": 203.11962890625, "learning_rate": 2.824473065424096e-06, "loss": 26.9208, "step": 339480 }, { "epoch": 0.6857912789828577, "grad_norm": 152.78378295898438, "learning_rate": 2.824158778978838e-06, "loss": 27.2649, "step": 339490 }, { "epoch": 0.6858114796155416, "grad_norm": 208.7306671142578, "learning_rate": 2.8238445031383634e-06, "loss": 16.7673, "step": 339500 }, { "epoch": 0.6858316802482254, "grad_norm": 234.26898193359375, "learning_rate": 2.823530237904207e-06, "loss": 24.3152, "step": 339510 }, { "epoch": 0.6858518808809092, "grad_norm": 342.73193359375, "learning_rate": 2.8232159832779018e-06, "loss": 16.2689, "step": 339520 }, { "epoch": 0.685872081513593, "grad_norm": 607.8458862304688, "learning_rate": 2.8229017392609782e-06, "loss": 18.2768, "step": 339530 }, { "epoch": 0.6858922821462768, "grad_norm": 385.2176208496094, "learning_rate": 2.8225875058549656e-06, "loss": 11.7633, "step": 339540 }, { "epoch": 0.6859124827789607, "grad_norm": 220.71499633789062, "learning_rate": 2.8222732830613995e-06, "loss": 12.2941, "step": 339550 }, { "epoch": 0.6859326834116445, "grad_norm": 531.9205322265625, "learning_rate": 2.821959070881809e-06, "loss": 12.5715, "step": 339560 }, { "epoch": 0.6859528840443283, "grad_norm": 166.906005859375, "learning_rate": 2.821644869317724e-06, "loss": 13.1727, "step": 339570 }, { "epoch": 0.6859730846770121, "grad_norm": 926.1018676757812, "learning_rate": 2.8213306783706774e-06, "loss": 24.542, "step": 339580 }, { "epoch": 0.6859932853096959, "grad_norm": 295.1950988769531, "learning_rate": 2.821016498042205e-06, "loss": 15.5637, "step": 339590 }, { "epoch": 0.6860134859423798, "grad_norm": 18.469051361083984, "learning_rate": 2.8207023283338304e-06, "loss": 21.2554, "step": 339600 }, { "epoch": 0.6860336865750636, "grad_norm": 210.36146545410156, "learning_rate": 2.820388169247088e-06, "loss": 10.2452, "step": 339610 }, { "epoch": 0.6860538872077473, "grad_norm": 347.700927734375, "learning_rate": 2.820074020783511e-06, "loss": 15.1436, "step": 339620 }, { "epoch": 0.6860740878404311, "grad_norm": 155.03411865234375, "learning_rate": 2.8197598829446294e-06, "loss": 12.7612, "step": 339630 }, { "epoch": 0.6860942884731149, "grad_norm": 138.85855102539062, "learning_rate": 2.819445755731971e-06, "loss": 11.6971, "step": 339640 }, { "epoch": 0.6861144891057988, "grad_norm": 423.6790466308594, "learning_rate": 2.8191316391470703e-06, "loss": 15.4166, "step": 339650 }, { "epoch": 0.6861346897384826, "grad_norm": 835.9815063476562, "learning_rate": 2.8188175331914608e-06, "loss": 30.2213, "step": 339660 }, { "epoch": 0.6861548903711664, "grad_norm": 168.22265625, "learning_rate": 2.8185034378666666e-06, "loss": 37.2645, "step": 339670 }, { "epoch": 0.6861750910038502, "grad_norm": 154.26011657714844, "learning_rate": 2.818189353174221e-06, "loss": 9.9011, "step": 339680 }, { "epoch": 0.686195291636534, "grad_norm": 899.8419799804688, "learning_rate": 2.8178752791156593e-06, "loss": 15.8846, "step": 339690 }, { "epoch": 0.6862154922692179, "grad_norm": 362.2852783203125, "learning_rate": 2.8175612156925082e-06, "loss": 21.4574, "step": 339700 }, { "epoch": 0.6862356929019017, "grad_norm": 443.1937255859375, "learning_rate": 2.817247162906297e-06, "loss": 11.8531, "step": 339710 }, { "epoch": 0.6862558935345855, "grad_norm": 359.1485290527344, "learning_rate": 2.8169331207585603e-06, "loss": 18.6241, "step": 339720 }, { "epoch": 0.6862760941672693, "grad_norm": 391.692138671875, "learning_rate": 2.816619089250827e-06, "loss": 21.1239, "step": 339730 }, { "epoch": 0.6862962947999531, "grad_norm": 635.1595458984375, "learning_rate": 2.8163050683846256e-06, "loss": 21.8796, "step": 339740 }, { "epoch": 0.686316495432637, "grad_norm": 232.27285766601562, "learning_rate": 2.8159910581614904e-06, "loss": 19.166, "step": 339750 }, { "epoch": 0.6863366960653208, "grad_norm": 766.3213500976562, "learning_rate": 2.8156770585829475e-06, "loss": 22.994, "step": 339760 }, { "epoch": 0.6863568966980046, "grad_norm": 437.24273681640625, "learning_rate": 2.815363069650532e-06, "loss": 8.2184, "step": 339770 }, { "epoch": 0.6863770973306884, "grad_norm": 500.9024658203125, "learning_rate": 2.8150490913657713e-06, "loss": 24.9419, "step": 339780 }, { "epoch": 0.6863972979633722, "grad_norm": 323.9875793457031, "learning_rate": 2.8147351237301957e-06, "loss": 18.5207, "step": 339790 }, { "epoch": 0.686417498596056, "grad_norm": 620.3389892578125, "learning_rate": 2.814421166745337e-06, "loss": 15.5238, "step": 339800 }, { "epoch": 0.6864376992287399, "grad_norm": 126.69654846191406, "learning_rate": 2.814107220412723e-06, "loss": 25.5543, "step": 339810 }, { "epoch": 0.6864578998614237, "grad_norm": 902.9891967773438, "learning_rate": 2.8137932847338866e-06, "loss": 9.4116, "step": 339820 }, { "epoch": 0.6864781004941075, "grad_norm": 408.55572509765625, "learning_rate": 2.813479359710355e-06, "loss": 22.7702, "step": 339830 }, { "epoch": 0.6864983011267913, "grad_norm": 293.8913879394531, "learning_rate": 2.813165445343662e-06, "loss": 26.4214, "step": 339840 }, { "epoch": 0.6865185017594752, "grad_norm": 692.0927734375, "learning_rate": 2.8128515416353345e-06, "loss": 23.5343, "step": 339850 }, { "epoch": 0.686538702392159, "grad_norm": 1028.668701171875, "learning_rate": 2.8125376485869023e-06, "loss": 18.7257, "step": 339860 }, { "epoch": 0.6865589030248428, "grad_norm": 216.99252319335938, "learning_rate": 2.812223766199898e-06, "loss": 18.569, "step": 339870 }, { "epoch": 0.6865791036575265, "grad_norm": 318.1048889160156, "learning_rate": 2.8119098944758494e-06, "loss": 11.5931, "step": 339880 }, { "epoch": 0.6865993042902103, "grad_norm": 607.7545776367188, "learning_rate": 2.811596033416285e-06, "loss": 15.6965, "step": 339890 }, { "epoch": 0.6866195049228941, "grad_norm": 173.1877899169922, "learning_rate": 2.811282183022736e-06, "loss": 18.8509, "step": 339900 }, { "epoch": 0.686639705555578, "grad_norm": 424.3025817871094, "learning_rate": 2.8109683432967346e-06, "loss": 36.455, "step": 339910 }, { "epoch": 0.6866599061882618, "grad_norm": 353.19000244140625, "learning_rate": 2.8106545142398073e-06, "loss": 34.3502, "step": 339920 }, { "epoch": 0.6866801068209456, "grad_norm": 115.36778259277344, "learning_rate": 2.810340695853483e-06, "loss": 14.4016, "step": 339930 }, { "epoch": 0.6867003074536294, "grad_norm": 226.95738220214844, "learning_rate": 2.810026888139294e-06, "loss": 29.3139, "step": 339940 }, { "epoch": 0.6867205080863132, "grad_norm": 276.2227783203125, "learning_rate": 2.809713091098768e-06, "loss": 29.8927, "step": 339950 }, { "epoch": 0.6867407087189971, "grad_norm": 444.8854064941406, "learning_rate": 2.8093993047334333e-06, "loss": 15.124, "step": 339960 }, { "epoch": 0.6867609093516809, "grad_norm": 85.50798034667969, "learning_rate": 2.809085529044821e-06, "loss": 24.085, "step": 339970 }, { "epoch": 0.6867811099843647, "grad_norm": 788.4835815429688, "learning_rate": 2.808771764034462e-06, "loss": 13.8028, "step": 339980 }, { "epoch": 0.6868013106170485, "grad_norm": 32.53948974609375, "learning_rate": 2.8084580097038834e-06, "loss": 18.2738, "step": 339990 }, { "epoch": 0.6868215112497323, "grad_norm": 44.076515197753906, "learning_rate": 2.8081442660546126e-06, "loss": 15.7829, "step": 340000 }, { "epoch": 0.6868417118824162, "grad_norm": 288.90655517578125, "learning_rate": 2.8078305330881826e-06, "loss": 24.8038, "step": 340010 }, { "epoch": 0.6868619125151, "grad_norm": 221.90176391601562, "learning_rate": 2.8075168108061213e-06, "loss": 13.9836, "step": 340020 }, { "epoch": 0.6868821131477838, "grad_norm": 377.74688720703125, "learning_rate": 2.8072030992099552e-06, "loss": 15.0077, "step": 340030 }, { "epoch": 0.6869023137804676, "grad_norm": 825.9044799804688, "learning_rate": 2.806889398301217e-06, "loss": 29.1264, "step": 340040 }, { "epoch": 0.6869225144131514, "grad_norm": 353.9937438964844, "learning_rate": 2.8065757080814315e-06, "loss": 14.6999, "step": 340050 }, { "epoch": 0.6869427150458353, "grad_norm": 311.3169250488281, "learning_rate": 2.8062620285521325e-06, "loss": 16.7061, "step": 340060 }, { "epoch": 0.6869629156785191, "grad_norm": 561.126220703125, "learning_rate": 2.8059483597148457e-06, "loss": 15.8589, "step": 340070 }, { "epoch": 0.6869831163112029, "grad_norm": 450.1378173828125, "learning_rate": 2.8056347015710987e-06, "loss": 22.9202, "step": 340080 }, { "epoch": 0.6870033169438867, "grad_norm": 408.6428527832031, "learning_rate": 2.805321054122424e-06, "loss": 24.6382, "step": 340090 }, { "epoch": 0.6870235175765705, "grad_norm": 248.0974884033203, "learning_rate": 2.805007417370347e-06, "loss": 12.2664, "step": 340100 }, { "epoch": 0.6870437182092544, "grad_norm": 262.83172607421875, "learning_rate": 2.804693791316399e-06, "loss": 18.6025, "step": 340110 }, { "epoch": 0.6870639188419382, "grad_norm": 318.6762390136719, "learning_rate": 2.8043801759621053e-06, "loss": 17.1193, "step": 340120 }, { "epoch": 0.6870841194746219, "grad_norm": 1668.1658935546875, "learning_rate": 2.804066571308998e-06, "loss": 18.7926, "step": 340130 }, { "epoch": 0.6871043201073057, "grad_norm": 465.1991882324219, "learning_rate": 2.8037529773586047e-06, "loss": 18.1072, "step": 340140 }, { "epoch": 0.6871245207399895, "grad_norm": 276.9002380371094, "learning_rate": 2.8034393941124505e-06, "loss": 10.82, "step": 340150 }, { "epoch": 0.6871447213726734, "grad_norm": 17.017826080322266, "learning_rate": 2.803125821572068e-06, "loss": 20.1898, "step": 340160 }, { "epoch": 0.6871649220053572, "grad_norm": 163.25289916992188, "learning_rate": 2.802812259738984e-06, "loss": 16.3359, "step": 340170 }, { "epoch": 0.687185122638041, "grad_norm": 0.4149203896522522, "learning_rate": 2.8024987086147247e-06, "loss": 15.007, "step": 340180 }, { "epoch": 0.6872053232707248, "grad_norm": 103.61934661865234, "learning_rate": 2.8021851682008205e-06, "loss": 21.0726, "step": 340190 }, { "epoch": 0.6872255239034086, "grad_norm": 610.775146484375, "learning_rate": 2.8018716384988034e-06, "loss": 13.1409, "step": 340200 }, { "epoch": 0.6872457245360925, "grad_norm": 489.849853515625, "learning_rate": 2.8015581195101927e-06, "loss": 25.1898, "step": 340210 }, { "epoch": 0.6872659251687763, "grad_norm": 296.70062255859375, "learning_rate": 2.801244611236521e-06, "loss": 11.8347, "step": 340220 }, { "epoch": 0.6872861258014601, "grad_norm": 115.51083374023438, "learning_rate": 2.800931113679318e-06, "loss": 14.6181, "step": 340230 }, { "epoch": 0.6873063264341439, "grad_norm": 339.8750915527344, "learning_rate": 2.8006176268401107e-06, "loss": 14.6715, "step": 340240 }, { "epoch": 0.6873265270668277, "grad_norm": 586.9760131835938, "learning_rate": 2.800304150720424e-06, "loss": 13.9651, "step": 340250 }, { "epoch": 0.6873467276995116, "grad_norm": 333.0511779785156, "learning_rate": 2.7999906853217885e-06, "loss": 21.7675, "step": 340260 }, { "epoch": 0.6873669283321954, "grad_norm": 248.5399169921875, "learning_rate": 2.7996772306457354e-06, "loss": 16.6357, "step": 340270 }, { "epoch": 0.6873871289648792, "grad_norm": 473.8727111816406, "learning_rate": 2.799363786693785e-06, "loss": 33.6923, "step": 340280 }, { "epoch": 0.687407329597563, "grad_norm": 327.8541259765625, "learning_rate": 2.7990503534674684e-06, "loss": 19.5511, "step": 340290 }, { "epoch": 0.6874275302302468, "grad_norm": 413.4512939453125, "learning_rate": 2.798736930968315e-06, "loss": 19.8102, "step": 340300 }, { "epoch": 0.6874477308629307, "grad_norm": 217.4813232421875, "learning_rate": 2.798423519197851e-06, "loss": 12.5471, "step": 340310 }, { "epoch": 0.6874679314956145, "grad_norm": 8.071371078491211, "learning_rate": 2.798110118157602e-06, "loss": 17.3754, "step": 340320 }, { "epoch": 0.6874881321282983, "grad_norm": 216.73049926757812, "learning_rate": 2.797796727849099e-06, "loss": 13.7105, "step": 340330 }, { "epoch": 0.6875083327609821, "grad_norm": 2.946580648422241, "learning_rate": 2.7974833482738674e-06, "loss": 9.3621, "step": 340340 }, { "epoch": 0.6875285333936659, "grad_norm": 351.96160888671875, "learning_rate": 2.7971699794334332e-06, "loss": 29.4176, "step": 340350 }, { "epoch": 0.6875487340263498, "grad_norm": 365.6743469238281, "learning_rate": 2.7968566213293276e-06, "loss": 29.0831, "step": 340360 }, { "epoch": 0.6875689346590336, "grad_norm": 264.7963562011719, "learning_rate": 2.796543273963073e-06, "loss": 14.8826, "step": 340370 }, { "epoch": 0.6875891352917174, "grad_norm": 778.5166625976562, "learning_rate": 2.796229937336202e-06, "loss": 31.3981, "step": 340380 }, { "epoch": 0.6876093359244011, "grad_norm": 146.21185302734375, "learning_rate": 2.795916611450238e-06, "loss": 13.2399, "step": 340390 }, { "epoch": 0.6876295365570849, "grad_norm": 377.69317626953125, "learning_rate": 2.795603296306708e-06, "loss": 16.3829, "step": 340400 }, { "epoch": 0.6876497371897687, "grad_norm": 90.84944915771484, "learning_rate": 2.7952899919071417e-06, "loss": 27.3867, "step": 340410 }, { "epoch": 0.6876699378224526, "grad_norm": 327.8248291015625, "learning_rate": 2.7949766982530624e-06, "loss": 12.4236, "step": 340420 }, { "epoch": 0.6876901384551364, "grad_norm": 388.8680114746094, "learning_rate": 2.7946634153460016e-06, "loss": 18.8251, "step": 340430 }, { "epoch": 0.6877103390878202, "grad_norm": 348.58453369140625, "learning_rate": 2.794350143187482e-06, "loss": 26.4771, "step": 340440 }, { "epoch": 0.687730539720504, "grad_norm": 604.9873657226562, "learning_rate": 2.794036881779034e-06, "loss": 21.413, "step": 340450 }, { "epoch": 0.6877507403531878, "grad_norm": 299.6792907714844, "learning_rate": 2.7937236311221827e-06, "loss": 17.7782, "step": 340460 }, { "epoch": 0.6877709409858717, "grad_norm": 643.5337524414062, "learning_rate": 2.793410391218453e-06, "loss": 21.1648, "step": 340470 }, { "epoch": 0.6877911416185555, "grad_norm": 114.86630249023438, "learning_rate": 2.7930971620693746e-06, "loss": 13.6656, "step": 340480 }, { "epoch": 0.6878113422512393, "grad_norm": 294.94219970703125, "learning_rate": 2.792783943676474e-06, "loss": 18.2662, "step": 340490 }, { "epoch": 0.6878315428839231, "grad_norm": 344.4489440917969, "learning_rate": 2.7924707360412743e-06, "loss": 24.6059, "step": 340500 }, { "epoch": 0.6878517435166069, "grad_norm": 1122.903076171875, "learning_rate": 2.7921575391653048e-06, "loss": 26.4641, "step": 340510 }, { "epoch": 0.6878719441492908, "grad_norm": 7706.94482421875, "learning_rate": 2.791844353050094e-06, "loss": 41.3216, "step": 340520 }, { "epoch": 0.6878921447819746, "grad_norm": 545.328369140625, "learning_rate": 2.7915311776971655e-06, "loss": 17.2415, "step": 340530 }, { "epoch": 0.6879123454146584, "grad_norm": 413.46563720703125, "learning_rate": 2.7912180131080434e-06, "loss": 18.0958, "step": 340540 }, { "epoch": 0.6879325460473422, "grad_norm": 567.9567260742188, "learning_rate": 2.7909048592842602e-06, "loss": 18.6867, "step": 340550 }, { "epoch": 0.687952746680026, "grad_norm": 469.08050537109375, "learning_rate": 2.7905917162273377e-06, "loss": 12.8325, "step": 340560 }, { "epoch": 0.6879729473127099, "grad_norm": 45.83987045288086, "learning_rate": 2.790278583938802e-06, "loss": 12.1036, "step": 340570 }, { "epoch": 0.6879931479453937, "grad_norm": 58.32484817504883, "learning_rate": 2.78996546242018e-06, "loss": 13.5492, "step": 340580 }, { "epoch": 0.6880133485780775, "grad_norm": 198.85121154785156, "learning_rate": 2.7896523516730005e-06, "loss": 10.6656, "step": 340590 }, { "epoch": 0.6880335492107613, "grad_norm": 343.50848388671875, "learning_rate": 2.7893392516987873e-06, "loss": 11.052, "step": 340600 }, { "epoch": 0.6880537498434451, "grad_norm": 551.748291015625, "learning_rate": 2.7890261624990643e-06, "loss": 9.662, "step": 340610 }, { "epoch": 0.688073950476129, "grad_norm": 412.3675842285156, "learning_rate": 2.788713084075362e-06, "loss": 12.7007, "step": 340620 }, { "epoch": 0.6880941511088128, "grad_norm": 630.4569091796875, "learning_rate": 2.7884000164292034e-06, "loss": 11.6983, "step": 340630 }, { "epoch": 0.6881143517414965, "grad_norm": 284.4775085449219, "learning_rate": 2.7880869595621134e-06, "loss": 17.0826, "step": 340640 }, { "epoch": 0.6881345523741803, "grad_norm": 269.10418701171875, "learning_rate": 2.787773913475621e-06, "loss": 9.4737, "step": 340650 }, { "epoch": 0.6881547530068641, "grad_norm": 339.931640625, "learning_rate": 2.7874608781712486e-06, "loss": 37.7955, "step": 340660 }, { "epoch": 0.688174953639548, "grad_norm": 777.3720092773438, "learning_rate": 2.7871478536505253e-06, "loss": 26.072, "step": 340670 }, { "epoch": 0.6881951542722318, "grad_norm": 427.119384765625, "learning_rate": 2.7868348399149747e-06, "loss": 23.7469, "step": 340680 }, { "epoch": 0.6882153549049156, "grad_norm": 266.82476806640625, "learning_rate": 2.786521836966121e-06, "loss": 19.1638, "step": 340690 }, { "epoch": 0.6882355555375994, "grad_norm": 272.7963562011719, "learning_rate": 2.7862088448054936e-06, "loss": 21.1126, "step": 340700 }, { "epoch": 0.6882557561702832, "grad_norm": 87.15125274658203, "learning_rate": 2.7858958634346132e-06, "loss": 17.4884, "step": 340710 }, { "epoch": 0.688275956802967, "grad_norm": 209.98974609375, "learning_rate": 2.78558289285501e-06, "loss": 11.1753, "step": 340720 }, { "epoch": 0.6882961574356509, "grad_norm": 1364.9686279296875, "learning_rate": 2.7852699330682056e-06, "loss": 17.0484, "step": 340730 }, { "epoch": 0.6883163580683347, "grad_norm": 993.4461059570312, "learning_rate": 2.7849569840757284e-06, "loss": 44.8279, "step": 340740 }, { "epoch": 0.6883365587010185, "grad_norm": 228.784912109375, "learning_rate": 2.7846440458791024e-06, "loss": 24.1108, "step": 340750 }, { "epoch": 0.6883567593337023, "grad_norm": 422.5013122558594, "learning_rate": 2.784331118479851e-06, "loss": 15.2329, "step": 340760 }, { "epoch": 0.6883769599663861, "grad_norm": 281.162109375, "learning_rate": 2.7840182018795025e-06, "loss": 10.155, "step": 340770 }, { "epoch": 0.68839716059907, "grad_norm": 1008.8878784179688, "learning_rate": 2.7837052960795807e-06, "loss": 24.7039, "step": 340780 }, { "epoch": 0.6884173612317538, "grad_norm": 345.2157287597656, "learning_rate": 2.783392401081609e-06, "loss": 20.5017, "step": 340790 }, { "epoch": 0.6884375618644376, "grad_norm": 336.3854675292969, "learning_rate": 2.7830795168871127e-06, "loss": 14.436, "step": 340800 }, { "epoch": 0.6884577624971214, "grad_norm": 198.10012817382812, "learning_rate": 2.782766643497623e-06, "loss": 12.2432, "step": 340810 }, { "epoch": 0.6884779631298052, "grad_norm": 745.2477416992188, "learning_rate": 2.7824537809146555e-06, "loss": 20.2383, "step": 340820 }, { "epoch": 0.6884981637624891, "grad_norm": 218.8932342529297, "learning_rate": 2.7821409291397394e-06, "loss": 16.3388, "step": 340830 }, { "epoch": 0.6885183643951729, "grad_norm": 406.8684387207031, "learning_rate": 2.7818280881744007e-06, "loss": 22.412, "step": 340840 }, { "epoch": 0.6885385650278567, "grad_norm": 445.8983459472656, "learning_rate": 2.7815152580201637e-06, "loss": 19.376, "step": 340850 }, { "epoch": 0.6885587656605405, "grad_norm": 63.937171936035156, "learning_rate": 2.7812024386785495e-06, "loss": 15.8397, "step": 340860 }, { "epoch": 0.6885789662932243, "grad_norm": 108.40321350097656, "learning_rate": 2.7808896301510867e-06, "loss": 15.4774, "step": 340870 }, { "epoch": 0.6885991669259082, "grad_norm": 1771.914306640625, "learning_rate": 2.7805768324393017e-06, "loss": 26.9394, "step": 340880 }, { "epoch": 0.688619367558592, "grad_norm": 24.736854553222656, "learning_rate": 2.7802640455447123e-06, "loss": 8.4835, "step": 340890 }, { "epoch": 0.6886395681912757, "grad_norm": 332.3523254394531, "learning_rate": 2.779951269468847e-06, "loss": 13.3643, "step": 340900 }, { "epoch": 0.6886597688239595, "grad_norm": 195.14511108398438, "learning_rate": 2.779638504213231e-06, "loss": 9.4787, "step": 340910 }, { "epoch": 0.6886799694566433, "grad_norm": 4.786660194396973, "learning_rate": 2.7793257497793892e-06, "loss": 12.2524, "step": 340920 }, { "epoch": 0.6887001700893272, "grad_norm": 322.34686279296875, "learning_rate": 2.7790130061688416e-06, "loss": 11.9802, "step": 340930 }, { "epoch": 0.688720370722011, "grad_norm": 355.6814880371094, "learning_rate": 2.7787002733831166e-06, "loss": 18.3249, "step": 340940 }, { "epoch": 0.6887405713546948, "grad_norm": 230.53578186035156, "learning_rate": 2.7783875514237373e-06, "loss": 15.303, "step": 340950 }, { "epoch": 0.6887607719873786, "grad_norm": 258.0885009765625, "learning_rate": 2.7780748402922263e-06, "loss": 9.6966, "step": 340960 }, { "epoch": 0.6887809726200624, "grad_norm": 459.1393737792969, "learning_rate": 2.77776213999011e-06, "loss": 25.5466, "step": 340970 }, { "epoch": 0.6888011732527463, "grad_norm": 9.57157039642334, "learning_rate": 2.777449450518911e-06, "loss": 29.0427, "step": 340980 }, { "epoch": 0.6888213738854301, "grad_norm": 3.6269240379333496, "learning_rate": 2.7771367718801546e-06, "loss": 9.6223, "step": 340990 }, { "epoch": 0.6888415745181139, "grad_norm": 962.04931640625, "learning_rate": 2.776824104075364e-06, "loss": 25.6872, "step": 341000 }, { "epoch": 0.6888617751507977, "grad_norm": 0.0, "learning_rate": 2.776511447106062e-06, "loss": 3.9544, "step": 341010 }, { "epoch": 0.6888819757834815, "grad_norm": 584.7926635742188, "learning_rate": 2.7761988009737746e-06, "loss": 19.5068, "step": 341020 }, { "epoch": 0.6889021764161654, "grad_norm": 245.7073211669922, "learning_rate": 2.7758861656800227e-06, "loss": 11.2009, "step": 341030 }, { "epoch": 0.6889223770488492, "grad_norm": 553.265380859375, "learning_rate": 2.775573541226334e-06, "loss": 13.5094, "step": 341040 }, { "epoch": 0.688942577681533, "grad_norm": 148.35116577148438, "learning_rate": 2.7752609276142282e-06, "loss": 17.183, "step": 341050 }, { "epoch": 0.6889627783142168, "grad_norm": 292.98052978515625, "learning_rate": 2.7749483248452324e-06, "loss": 13.9737, "step": 341060 }, { "epoch": 0.6889829789469006, "grad_norm": 276.73663330078125, "learning_rate": 2.7746357329208693e-06, "loss": 8.6564, "step": 341070 }, { "epoch": 0.6890031795795845, "grad_norm": 230.5380859375, "learning_rate": 2.7743231518426594e-06, "loss": 24.8348, "step": 341080 }, { "epoch": 0.6890233802122683, "grad_norm": 314.87286376953125, "learning_rate": 2.7740105816121306e-06, "loss": 20.6187, "step": 341090 }, { "epoch": 0.6890435808449521, "grad_norm": 252.77398681640625, "learning_rate": 2.7736980222308042e-06, "loss": 14.1285, "step": 341100 }, { "epoch": 0.6890637814776359, "grad_norm": 1018.5267333984375, "learning_rate": 2.773385473700201e-06, "loss": 29.8728, "step": 341110 }, { "epoch": 0.6890839821103197, "grad_norm": 221.62344360351562, "learning_rate": 2.7730729360218478e-06, "loss": 19.5299, "step": 341120 }, { "epoch": 0.6891041827430036, "grad_norm": 212.8367919921875, "learning_rate": 2.7727604091972687e-06, "loss": 11.6628, "step": 341130 }, { "epoch": 0.6891243833756874, "grad_norm": 75.01510620117188, "learning_rate": 2.772447893227985e-06, "loss": 22.7795, "step": 341140 }, { "epoch": 0.6891445840083712, "grad_norm": 451.935791015625, "learning_rate": 2.772135388115519e-06, "loss": 15.9128, "step": 341150 }, { "epoch": 0.6891647846410549, "grad_norm": 385.2341003417969, "learning_rate": 2.7718228938613955e-06, "loss": 16.1654, "step": 341160 }, { "epoch": 0.6891849852737387, "grad_norm": 183.01031494140625, "learning_rate": 2.7715104104671377e-06, "loss": 20.1729, "step": 341170 }, { "epoch": 0.6892051859064225, "grad_norm": 433.5887756347656, "learning_rate": 2.7711979379342658e-06, "loss": 21.0788, "step": 341180 }, { "epoch": 0.6892253865391064, "grad_norm": 715.4939575195312, "learning_rate": 2.7708854762643055e-06, "loss": 36.1636, "step": 341190 }, { "epoch": 0.6892455871717902, "grad_norm": 442.5892028808594, "learning_rate": 2.7705730254587802e-06, "loss": 37.1358, "step": 341200 }, { "epoch": 0.689265787804474, "grad_norm": 147.4979248046875, "learning_rate": 2.770260585519212e-06, "loss": 26.1768, "step": 341210 }, { "epoch": 0.6892859884371578, "grad_norm": 188.3673858642578, "learning_rate": 2.769948156447121e-06, "loss": 14.2528, "step": 341220 }, { "epoch": 0.6893061890698416, "grad_norm": 719.9442138671875, "learning_rate": 2.7696357382440344e-06, "loss": 17.4133, "step": 341230 }, { "epoch": 0.6893263897025255, "grad_norm": 217.36753845214844, "learning_rate": 2.769323330911472e-06, "loss": 18.39, "step": 341240 }, { "epoch": 0.6893465903352093, "grad_norm": 645.3171997070312, "learning_rate": 2.7690109344509563e-06, "loss": 14.6812, "step": 341250 }, { "epoch": 0.6893667909678931, "grad_norm": 514.885009765625, "learning_rate": 2.768698548864012e-06, "loss": 14.1372, "step": 341260 }, { "epoch": 0.6893869916005769, "grad_norm": 0.0, "learning_rate": 2.768386174152159e-06, "loss": 15.9463, "step": 341270 }, { "epoch": 0.6894071922332607, "grad_norm": 447.8780822753906, "learning_rate": 2.7680738103169223e-06, "loss": 24.9264, "step": 341280 }, { "epoch": 0.6894273928659446, "grad_norm": 221.66900634765625, "learning_rate": 2.7677614573598232e-06, "loss": 19.5077, "step": 341290 }, { "epoch": 0.6894475934986284, "grad_norm": 137.110595703125, "learning_rate": 2.7674491152823825e-06, "loss": 16.2112, "step": 341300 }, { "epoch": 0.6894677941313122, "grad_norm": 224.9796142578125, "learning_rate": 2.7671367840861256e-06, "loss": 12.5398, "step": 341310 }, { "epoch": 0.689487994763996, "grad_norm": 287.8158264160156, "learning_rate": 2.766824463772572e-06, "loss": 46.7692, "step": 341320 }, { "epoch": 0.6895081953966798, "grad_norm": 142.9286346435547, "learning_rate": 2.766512154343246e-06, "loss": 18.3329, "step": 341330 }, { "epoch": 0.6895283960293637, "grad_norm": 205.15731811523438, "learning_rate": 2.766199855799667e-06, "loss": 11.1582, "step": 341340 }, { "epoch": 0.6895485966620475, "grad_norm": 496.43463134765625, "learning_rate": 2.765887568143362e-06, "loss": 19.4094, "step": 341350 }, { "epoch": 0.6895687972947313, "grad_norm": 566.0675048828125, "learning_rate": 2.7655752913758494e-06, "loss": 22.1426, "step": 341360 }, { "epoch": 0.6895889979274151, "grad_norm": 37.94282150268555, "learning_rate": 2.76526302549865e-06, "loss": 20.0147, "step": 341370 }, { "epoch": 0.689609198560099, "grad_norm": 142.8148956298828, "learning_rate": 2.7649507705132894e-06, "loss": 12.5573, "step": 341380 }, { "epoch": 0.6896293991927828, "grad_norm": 449.9120788574219, "learning_rate": 2.764638526421287e-06, "loss": 16.3284, "step": 341390 }, { "epoch": 0.6896495998254666, "grad_norm": 310.89874267578125, "learning_rate": 2.7643262932241642e-06, "loss": 17.8502, "step": 341400 }, { "epoch": 0.6896698004581503, "grad_norm": 720.1802978515625, "learning_rate": 2.7640140709234444e-06, "loss": 21.1976, "step": 341410 }, { "epoch": 0.6896900010908341, "grad_norm": 299.3088073730469, "learning_rate": 2.763701859520652e-06, "loss": 20.999, "step": 341420 }, { "epoch": 0.6897102017235179, "grad_norm": 226.9088592529297, "learning_rate": 2.7633896590173014e-06, "loss": 12.0909, "step": 341430 }, { "epoch": 0.6897304023562018, "grad_norm": 1.896621584892273, "learning_rate": 2.763077469414919e-06, "loss": 12.4256, "step": 341440 }, { "epoch": 0.6897506029888856, "grad_norm": 113.39884948730469, "learning_rate": 2.7627652907150272e-06, "loss": 15.5896, "step": 341450 }, { "epoch": 0.6897708036215694, "grad_norm": 564.82568359375, "learning_rate": 2.7624531229191453e-06, "loss": 15.1447, "step": 341460 }, { "epoch": 0.6897910042542532, "grad_norm": 170.11587524414062, "learning_rate": 2.7621409660287944e-06, "loss": 17.5078, "step": 341470 }, { "epoch": 0.689811204886937, "grad_norm": 656.7221069335938, "learning_rate": 2.7618288200454966e-06, "loss": 20.1535, "step": 341480 }, { "epoch": 0.6898314055196209, "grad_norm": 617.5283203125, "learning_rate": 2.7615166849707786e-06, "loss": 17.732, "step": 341490 }, { "epoch": 0.6898516061523047, "grad_norm": 623.5250244140625, "learning_rate": 2.761204560806152e-06, "loss": 28.6873, "step": 341500 }, { "epoch": 0.6898718067849885, "grad_norm": 537.3228759765625, "learning_rate": 2.760892447553143e-06, "loss": 27.0241, "step": 341510 }, { "epoch": 0.6898920074176723, "grad_norm": 262.24029541015625, "learning_rate": 2.7605803452132753e-06, "loss": 16.3141, "step": 341520 }, { "epoch": 0.6899122080503561, "grad_norm": 124.42293548583984, "learning_rate": 2.7602682537880663e-06, "loss": 13.0789, "step": 341530 }, { "epoch": 0.68993240868304, "grad_norm": 463.8037414550781, "learning_rate": 2.7599561732790364e-06, "loss": 16.2278, "step": 341540 }, { "epoch": 0.6899526093157238, "grad_norm": 343.3017578125, "learning_rate": 2.759644103687711e-06, "loss": 20.3726, "step": 341550 }, { "epoch": 0.6899728099484076, "grad_norm": 0.0, "learning_rate": 2.759332045015608e-06, "loss": 17.1994, "step": 341560 }, { "epoch": 0.6899930105810914, "grad_norm": 389.5445556640625, "learning_rate": 2.759019997264247e-06, "loss": 17.1864, "step": 341570 }, { "epoch": 0.6900132112137752, "grad_norm": 986.8057861328125, "learning_rate": 2.758707960435153e-06, "loss": 25.8143, "step": 341580 }, { "epoch": 0.690033411846459, "grad_norm": 150.53372192382812, "learning_rate": 2.7583959345298416e-06, "loss": 23.3609, "step": 341590 }, { "epoch": 0.6900536124791429, "grad_norm": 324.0406188964844, "learning_rate": 2.7580839195498397e-06, "loss": 18.046, "step": 341600 }, { "epoch": 0.6900738131118267, "grad_norm": 449.5357666015625, "learning_rate": 2.757771915496662e-06, "loss": 28.5391, "step": 341610 }, { "epoch": 0.6900940137445105, "grad_norm": 548.3353881835938, "learning_rate": 2.7574599223718347e-06, "loss": 14.7917, "step": 341620 }, { "epoch": 0.6901142143771943, "grad_norm": 565.8510131835938, "learning_rate": 2.7571479401768754e-06, "loss": 27.5098, "step": 341630 }, { "epoch": 0.6901344150098782, "grad_norm": 177.2288055419922, "learning_rate": 2.7568359689133027e-06, "loss": 16.4068, "step": 341640 }, { "epoch": 0.690154615642562, "grad_norm": 750.8839721679688, "learning_rate": 2.7565240085826423e-06, "loss": 18.1508, "step": 341650 }, { "epoch": 0.6901748162752458, "grad_norm": 427.1605224609375, "learning_rate": 2.7562120591864093e-06, "loss": 14.9471, "step": 341660 }, { "epoch": 0.6901950169079295, "grad_norm": 225.49070739746094, "learning_rate": 2.755900120726128e-06, "loss": 12.9618, "step": 341670 }, { "epoch": 0.6902152175406133, "grad_norm": 139.39871215820312, "learning_rate": 2.7555881932033186e-06, "loss": 19.017, "step": 341680 }, { "epoch": 0.6902354181732971, "grad_norm": 244.59457397460938, "learning_rate": 2.7552762766194975e-06, "loss": 6.6486, "step": 341690 }, { "epoch": 0.690255618805981, "grad_norm": 188.11622619628906, "learning_rate": 2.75496437097619e-06, "loss": 16.6627, "step": 341700 }, { "epoch": 0.6902758194386648, "grad_norm": 298.0274658203125, "learning_rate": 2.7546524762749126e-06, "loss": 23.5935, "step": 341710 }, { "epoch": 0.6902960200713486, "grad_norm": 817.8543090820312, "learning_rate": 2.7543405925171855e-06, "loss": 24.2122, "step": 341720 }, { "epoch": 0.6903162207040324, "grad_norm": 497.2781677246094, "learning_rate": 2.75402871970453e-06, "loss": 22.2856, "step": 341730 }, { "epoch": 0.6903364213367162, "grad_norm": 191.90689086914062, "learning_rate": 2.7537168578384703e-06, "loss": 18.6488, "step": 341740 }, { "epoch": 0.6903566219694001, "grad_norm": 8.177273750305176, "learning_rate": 2.753405006920518e-06, "loss": 26.7123, "step": 341750 }, { "epoch": 0.6903768226020839, "grad_norm": 293.4024353027344, "learning_rate": 2.7530931669521975e-06, "loss": 25.3256, "step": 341760 }, { "epoch": 0.6903970232347677, "grad_norm": 153.3657989501953, "learning_rate": 2.75278133793503e-06, "loss": 13.5059, "step": 341770 }, { "epoch": 0.6904172238674515, "grad_norm": 767.7716674804688, "learning_rate": 2.752469519870534e-06, "loss": 28.2242, "step": 341780 }, { "epoch": 0.6904374245001353, "grad_norm": 385.3144836425781, "learning_rate": 2.752157712760226e-06, "loss": 8.7122, "step": 341790 }, { "epoch": 0.6904576251328192, "grad_norm": 358.40167236328125, "learning_rate": 2.75184591660563e-06, "loss": 16.5276, "step": 341800 }, { "epoch": 0.690477825765503, "grad_norm": 265.4300231933594, "learning_rate": 2.7515341314082657e-06, "loss": 23.2288, "step": 341810 }, { "epoch": 0.6904980263981868, "grad_norm": 532.1017456054688, "learning_rate": 2.7512223571696515e-06, "loss": 21.0139, "step": 341820 }, { "epoch": 0.6905182270308706, "grad_norm": 345.5724792480469, "learning_rate": 2.750910593891305e-06, "loss": 19.7294, "step": 341830 }, { "epoch": 0.6905384276635544, "grad_norm": 136.15821838378906, "learning_rate": 2.7505988415747486e-06, "loss": 12.9156, "step": 341840 }, { "epoch": 0.6905586282962383, "grad_norm": 995.5159912109375, "learning_rate": 2.7502871002215016e-06, "loss": 32.061, "step": 341850 }, { "epoch": 0.6905788289289221, "grad_norm": 1319.545654296875, "learning_rate": 2.74997536983308e-06, "loss": 33.5073, "step": 341860 }, { "epoch": 0.6905990295616059, "grad_norm": 20.535587310791016, "learning_rate": 2.7496636504110077e-06, "loss": 19.944, "step": 341870 }, { "epoch": 0.6906192301942897, "grad_norm": 411.4344787597656, "learning_rate": 2.7493519419567995e-06, "loss": 14.5516, "step": 341880 }, { "epoch": 0.6906394308269735, "grad_norm": 391.94793701171875, "learning_rate": 2.749040244471979e-06, "loss": 27.9656, "step": 341890 }, { "epoch": 0.6906596314596574, "grad_norm": 394.89111328125, "learning_rate": 2.7487285579580635e-06, "loss": 28.7569, "step": 341900 }, { "epoch": 0.6906798320923412, "grad_norm": 564.4580688476562, "learning_rate": 2.7484168824165702e-06, "loss": 28.1125, "step": 341910 }, { "epoch": 0.6907000327250249, "grad_norm": 390.86895751953125, "learning_rate": 2.748105217849022e-06, "loss": 21.425, "step": 341920 }, { "epoch": 0.6907202333577087, "grad_norm": 591.1107788085938, "learning_rate": 2.747793564256933e-06, "loss": 13.648, "step": 341930 }, { "epoch": 0.6907404339903925, "grad_norm": 362.59063720703125, "learning_rate": 2.747481921641828e-06, "loss": 14.0846, "step": 341940 }, { "epoch": 0.6907606346230764, "grad_norm": 149.49891662597656, "learning_rate": 2.7471702900052204e-06, "loss": 10.2265, "step": 341950 }, { "epoch": 0.6907808352557602, "grad_norm": 184.87611389160156, "learning_rate": 2.746858669348634e-06, "loss": 13.4705, "step": 341960 }, { "epoch": 0.690801035888444, "grad_norm": 118.86833953857422, "learning_rate": 2.7465470596735843e-06, "loss": 18.1832, "step": 341970 }, { "epoch": 0.6908212365211278, "grad_norm": 5.063994884490967, "learning_rate": 2.74623546098159e-06, "loss": 14.1036, "step": 341980 }, { "epoch": 0.6908414371538116, "grad_norm": 283.65765380859375, "learning_rate": 2.745923873274172e-06, "loss": 24.1931, "step": 341990 }, { "epoch": 0.6908616377864955, "grad_norm": 286.74346923828125, "learning_rate": 2.7456122965528475e-06, "loss": 13.6474, "step": 342000 }, { "epoch": 0.6908818384191793, "grad_norm": 207.9541778564453, "learning_rate": 2.745300730819134e-06, "loss": 18.4033, "step": 342010 }, { "epoch": 0.6909020390518631, "grad_norm": 407.3016357421875, "learning_rate": 2.7449891760745504e-06, "loss": 20.6398, "step": 342020 }, { "epoch": 0.6909222396845469, "grad_norm": 126.01081085205078, "learning_rate": 2.744677632320621e-06, "loss": 12.2805, "step": 342030 }, { "epoch": 0.6909424403172307, "grad_norm": 422.3426208496094, "learning_rate": 2.744366099558855e-06, "loss": 22.3791, "step": 342040 }, { "epoch": 0.6909626409499146, "grad_norm": 180.5189971923828, "learning_rate": 2.7440545777907747e-06, "loss": 22.0096, "step": 342050 }, { "epoch": 0.6909828415825984, "grad_norm": 1454.185302734375, "learning_rate": 2.743743067017901e-06, "loss": 35.3015, "step": 342060 }, { "epoch": 0.6910030422152822, "grad_norm": 355.3629455566406, "learning_rate": 2.7434315672417493e-06, "loss": 16.1543, "step": 342070 }, { "epoch": 0.691023242847966, "grad_norm": 459.5791015625, "learning_rate": 2.743120078463837e-06, "loss": 14.744, "step": 342080 }, { "epoch": 0.6910434434806498, "grad_norm": 371.49664306640625, "learning_rate": 2.7428086006856843e-06, "loss": 18.5724, "step": 342090 }, { "epoch": 0.6910636441133337, "grad_norm": 1652.5516357421875, "learning_rate": 2.742497133908812e-06, "loss": 28.9154, "step": 342100 }, { "epoch": 0.6910838447460175, "grad_norm": 390.2788391113281, "learning_rate": 2.7421856781347313e-06, "loss": 16.0531, "step": 342110 }, { "epoch": 0.6911040453787013, "grad_norm": 583.36767578125, "learning_rate": 2.7418742333649628e-06, "loss": 11.0729, "step": 342120 }, { "epoch": 0.6911242460113851, "grad_norm": 72.25563049316406, "learning_rate": 2.7415627996010287e-06, "loss": 18.3156, "step": 342130 }, { "epoch": 0.6911444466440689, "grad_norm": 434.4340515136719, "learning_rate": 2.741251376844443e-06, "loss": 31.8693, "step": 342140 }, { "epoch": 0.6911646472767528, "grad_norm": 231.30372619628906, "learning_rate": 2.7409399650967217e-06, "loss": 14.0856, "step": 342150 }, { "epoch": 0.6911848479094366, "grad_norm": 382.9341125488281, "learning_rate": 2.7406285643593875e-06, "loss": 22.5025, "step": 342160 }, { "epoch": 0.6912050485421204, "grad_norm": 463.0118103027344, "learning_rate": 2.740317174633955e-06, "loss": 11.0961, "step": 342170 }, { "epoch": 0.6912252491748041, "grad_norm": 394.51226806640625, "learning_rate": 2.7400057959219416e-06, "loss": 16.2463, "step": 342180 }, { "epoch": 0.6912454498074879, "grad_norm": 669.329833984375, "learning_rate": 2.7396944282248672e-06, "loss": 15.9909, "step": 342190 }, { "epoch": 0.6912656504401717, "grad_norm": 488.08099365234375, "learning_rate": 2.739383071544246e-06, "loss": 16.1023, "step": 342200 }, { "epoch": 0.6912858510728556, "grad_norm": 474.5091552734375, "learning_rate": 2.7390717258816003e-06, "loss": 32.0847, "step": 342210 }, { "epoch": 0.6913060517055394, "grad_norm": 410.9049987792969, "learning_rate": 2.738760391238442e-06, "loss": 19.5689, "step": 342220 }, { "epoch": 0.6913262523382232, "grad_norm": 40.76900863647461, "learning_rate": 2.7384490676162932e-06, "loss": 16.7012, "step": 342230 }, { "epoch": 0.691346452970907, "grad_norm": 129.6337432861328, "learning_rate": 2.73813775501667e-06, "loss": 12.0863, "step": 342240 }, { "epoch": 0.6913666536035908, "grad_norm": 58.73592758178711, "learning_rate": 2.7378264534410865e-06, "loss": 18.7431, "step": 342250 }, { "epoch": 0.6913868542362747, "grad_norm": 42.233219146728516, "learning_rate": 2.7375151628910645e-06, "loss": 24.7578, "step": 342260 }, { "epoch": 0.6914070548689585, "grad_norm": 504.1520080566406, "learning_rate": 2.7372038833681176e-06, "loss": 12.358, "step": 342270 }, { "epoch": 0.6914272555016423, "grad_norm": 226.17491149902344, "learning_rate": 2.7368926148737663e-06, "loss": 17.2239, "step": 342280 }, { "epoch": 0.6914474561343261, "grad_norm": 329.98089599609375, "learning_rate": 2.736581357409526e-06, "loss": 26.5297, "step": 342290 }, { "epoch": 0.6914676567670099, "grad_norm": 76.80755615234375, "learning_rate": 2.736270110976912e-06, "loss": 9.6641, "step": 342300 }, { "epoch": 0.6914878573996938, "grad_norm": 203.5906219482422, "learning_rate": 2.7359588755774437e-06, "loss": 23.3691, "step": 342310 }, { "epoch": 0.6915080580323776, "grad_norm": 378.94305419921875, "learning_rate": 2.7356476512126386e-06, "loss": 20.2183, "step": 342320 }, { "epoch": 0.6915282586650614, "grad_norm": 243.5066375732422, "learning_rate": 2.7353364378840096e-06, "loss": 13.5426, "step": 342330 }, { "epoch": 0.6915484592977452, "grad_norm": 462.4680480957031, "learning_rate": 2.735025235593076e-06, "loss": 21.7532, "step": 342340 }, { "epoch": 0.691568659930429, "grad_norm": 440.2567138671875, "learning_rate": 2.734714044341359e-06, "loss": 26.9437, "step": 342350 }, { "epoch": 0.6915888605631129, "grad_norm": 340.2488098144531, "learning_rate": 2.7344028641303667e-06, "loss": 10.5369, "step": 342360 }, { "epoch": 0.6916090611957967, "grad_norm": 292.6058349609375, "learning_rate": 2.7340916949616204e-06, "loss": 50.0589, "step": 342370 }, { "epoch": 0.6916292618284805, "grad_norm": 706.38330078125, "learning_rate": 2.733780536836638e-06, "loss": 19.9434, "step": 342380 }, { "epoch": 0.6916494624611643, "grad_norm": 332.52581787109375, "learning_rate": 2.733469389756934e-06, "loss": 16.805, "step": 342390 }, { "epoch": 0.6916696630938481, "grad_norm": 335.47833251953125, "learning_rate": 2.7331582537240243e-06, "loss": 23.3698, "step": 342400 }, { "epoch": 0.691689863726532, "grad_norm": 650.4447021484375, "learning_rate": 2.7328471287394265e-06, "loss": 23.3412, "step": 342410 }, { "epoch": 0.6917100643592158, "grad_norm": 625.17431640625, "learning_rate": 2.7325360148046598e-06, "loss": 25.9586, "step": 342420 }, { "epoch": 0.6917302649918996, "grad_norm": 406.36944580078125, "learning_rate": 2.732224911921234e-06, "loss": 8.3115, "step": 342430 }, { "epoch": 0.6917504656245833, "grad_norm": 159.1592254638672, "learning_rate": 2.731913820090669e-06, "loss": 23.8566, "step": 342440 }, { "epoch": 0.6917706662572671, "grad_norm": 349.4911804199219, "learning_rate": 2.7316027393144827e-06, "loss": 29.5121, "step": 342450 }, { "epoch": 0.691790866889951, "grad_norm": 113.1489028930664, "learning_rate": 2.73129166959419e-06, "loss": 10.7803, "step": 342460 }, { "epoch": 0.6918110675226348, "grad_norm": 626.6707763671875, "learning_rate": 2.730980610931304e-06, "loss": 14.4416, "step": 342470 }, { "epoch": 0.6918312681553186, "grad_norm": 468.79327392578125, "learning_rate": 2.7306695633273454e-06, "loss": 8.2098, "step": 342480 }, { "epoch": 0.6918514687880024, "grad_norm": 246.52345275878906, "learning_rate": 2.7303585267838263e-06, "loss": 17.3883, "step": 342490 }, { "epoch": 0.6918716694206862, "grad_norm": 239.7262420654297, "learning_rate": 2.7300475013022666e-06, "loss": 19.8601, "step": 342500 }, { "epoch": 0.69189187005337, "grad_norm": 391.1923522949219, "learning_rate": 2.7297364868841803e-06, "loss": 19.1219, "step": 342510 }, { "epoch": 0.6919120706860539, "grad_norm": 628.7379760742188, "learning_rate": 2.72942548353108e-06, "loss": 15.5385, "step": 342520 }, { "epoch": 0.6919322713187377, "grad_norm": 211.38189697265625, "learning_rate": 2.729114491244487e-06, "loss": 14.2025, "step": 342530 }, { "epoch": 0.6919524719514215, "grad_norm": 499.23553466796875, "learning_rate": 2.728803510025913e-06, "loss": 16.8967, "step": 342540 }, { "epoch": 0.6919726725841053, "grad_norm": 363.2895202636719, "learning_rate": 2.728492539876877e-06, "loss": 13.3384, "step": 342550 }, { "epoch": 0.6919928732167892, "grad_norm": 587.162841796875, "learning_rate": 2.728181580798891e-06, "loss": 11.0642, "step": 342560 }, { "epoch": 0.692013073849473, "grad_norm": 361.87200927734375, "learning_rate": 2.727870632793474e-06, "loss": 12.462, "step": 342570 }, { "epoch": 0.6920332744821568, "grad_norm": 535.1802368164062, "learning_rate": 2.7275596958621397e-06, "loss": 25.5524, "step": 342580 }, { "epoch": 0.6920534751148406, "grad_norm": 839.19482421875, "learning_rate": 2.7272487700064027e-06, "loss": 23.5786, "step": 342590 }, { "epoch": 0.6920736757475244, "grad_norm": 384.6982727050781, "learning_rate": 2.726937855227781e-06, "loss": 34.0722, "step": 342600 }, { "epoch": 0.6920938763802083, "grad_norm": 692.6864624023438, "learning_rate": 2.7266269515277887e-06, "loss": 13.7394, "step": 342610 }, { "epoch": 0.6921140770128921, "grad_norm": 257.4065856933594, "learning_rate": 2.7263160589079385e-06, "loss": 34.1307, "step": 342620 }, { "epoch": 0.6921342776455759, "grad_norm": 430.5124816894531, "learning_rate": 2.726005177369749e-06, "loss": 20.9934, "step": 342630 }, { "epoch": 0.6921544782782597, "grad_norm": 414.63714599609375, "learning_rate": 2.7256943069147375e-06, "loss": 3.8553, "step": 342640 }, { "epoch": 0.6921746789109435, "grad_norm": 603.077880859375, "learning_rate": 2.7253834475444126e-06, "loss": 19.9507, "step": 342650 }, { "epoch": 0.6921948795436274, "grad_norm": 500.0039978027344, "learning_rate": 2.7250725992602926e-06, "loss": 35.457, "step": 342660 }, { "epoch": 0.6922150801763112, "grad_norm": 536.0646362304688, "learning_rate": 2.724761762063895e-06, "loss": 14.2985, "step": 342670 }, { "epoch": 0.692235280808995, "grad_norm": 561.66748046875, "learning_rate": 2.724450935956733e-06, "loss": 17.293, "step": 342680 }, { "epoch": 0.6922554814416787, "grad_norm": 486.9822998046875, "learning_rate": 2.7241401209403185e-06, "loss": 20.2485, "step": 342690 }, { "epoch": 0.6922756820743625, "grad_norm": 383.0431213378906, "learning_rate": 2.723829317016169e-06, "loss": 12.8665, "step": 342700 }, { "epoch": 0.6922958827070463, "grad_norm": 338.18701171875, "learning_rate": 2.723518524185804e-06, "loss": 27.404, "step": 342710 }, { "epoch": 0.6923160833397302, "grad_norm": 38.59807205200195, "learning_rate": 2.723207742450729e-06, "loss": 7.1109, "step": 342720 }, { "epoch": 0.692336283972414, "grad_norm": 194.36668395996094, "learning_rate": 2.7228969718124638e-06, "loss": 23.9927, "step": 342730 }, { "epoch": 0.6923564846050978, "grad_norm": 262.5284423828125, "learning_rate": 2.7225862122725243e-06, "loss": 24.375, "step": 342740 }, { "epoch": 0.6923766852377816, "grad_norm": 473.5325012207031, "learning_rate": 2.7222754638324235e-06, "loss": 19.0865, "step": 342750 }, { "epoch": 0.6923968858704654, "grad_norm": 458.2725524902344, "learning_rate": 2.7219647264936733e-06, "loss": 28.6746, "step": 342760 }, { "epoch": 0.6924170865031493, "grad_norm": 5.04648494720459, "learning_rate": 2.7216540002577933e-06, "loss": 20.2492, "step": 342770 }, { "epoch": 0.6924372871358331, "grad_norm": 5.904820442199707, "learning_rate": 2.7213432851262955e-06, "loss": 10.9156, "step": 342780 }, { "epoch": 0.6924574877685169, "grad_norm": 299.0598449707031, "learning_rate": 2.721032581100692e-06, "loss": 18.7586, "step": 342790 }, { "epoch": 0.6924776884012007, "grad_norm": 481.9532165527344, "learning_rate": 2.7207218881825016e-06, "loss": 20.6702, "step": 342800 }, { "epoch": 0.6924978890338845, "grad_norm": 837.5463256835938, "learning_rate": 2.7204112063732337e-06, "loss": 12.8458, "step": 342810 }, { "epoch": 0.6925180896665684, "grad_norm": 293.55029296875, "learning_rate": 2.720100535674407e-06, "loss": 7.2574, "step": 342820 }, { "epoch": 0.6925382902992522, "grad_norm": 532.0534057617188, "learning_rate": 2.719789876087532e-06, "loss": 25.8048, "step": 342830 }, { "epoch": 0.692558490931936, "grad_norm": 1.385595440864563, "learning_rate": 2.719479227614127e-06, "loss": 9.461, "step": 342840 }, { "epoch": 0.6925786915646198, "grad_norm": 495.5453186035156, "learning_rate": 2.719168590255703e-06, "loss": 18.9211, "step": 342850 }, { "epoch": 0.6925988921973036, "grad_norm": 656.9524536132812, "learning_rate": 2.718857964013773e-06, "loss": 24.4337, "step": 342860 }, { "epoch": 0.6926190928299875, "grad_norm": 786.6329345703125, "learning_rate": 2.7185473488898544e-06, "loss": 38.7861, "step": 342870 }, { "epoch": 0.6926392934626713, "grad_norm": 637.03076171875, "learning_rate": 2.7182367448854573e-06, "loss": 16.2283, "step": 342880 }, { "epoch": 0.6926594940953551, "grad_norm": 251.8146209716797, "learning_rate": 2.7179261520021005e-06, "loss": 12.2752, "step": 342890 }, { "epoch": 0.6926796947280389, "grad_norm": 207.01556396484375, "learning_rate": 2.717615570241294e-06, "loss": 11.8785, "step": 342900 }, { "epoch": 0.6926998953607227, "grad_norm": 193.8882293701172, "learning_rate": 2.71730499960455e-06, "loss": 13.7648, "step": 342910 }, { "epoch": 0.6927200959934066, "grad_norm": 607.9854125976562, "learning_rate": 2.7169944400933872e-06, "loss": 11.8336, "step": 342920 }, { "epoch": 0.6927402966260904, "grad_norm": 299.84722900390625, "learning_rate": 2.7166838917093163e-06, "loss": 25.2349, "step": 342930 }, { "epoch": 0.6927604972587742, "grad_norm": 350.7240905761719, "learning_rate": 2.7163733544538496e-06, "loss": 17.3845, "step": 342940 }, { "epoch": 0.6927806978914579, "grad_norm": 230.2483673095703, "learning_rate": 2.716062828328502e-06, "loss": 8.2107, "step": 342950 }, { "epoch": 0.6928008985241417, "grad_norm": 169.19032287597656, "learning_rate": 2.7157523133347906e-06, "loss": 12.2735, "step": 342960 }, { "epoch": 0.6928210991568255, "grad_norm": 297.84771728515625, "learning_rate": 2.7154418094742222e-06, "loss": 7.1174, "step": 342970 }, { "epoch": 0.6928412997895094, "grad_norm": 267.66998291015625, "learning_rate": 2.7151313167483127e-06, "loss": 20.0398, "step": 342980 }, { "epoch": 0.6928615004221932, "grad_norm": 495.4940185546875, "learning_rate": 2.714820835158579e-06, "loss": 22.8677, "step": 342990 }, { "epoch": 0.692881701054877, "grad_norm": 300.28643798828125, "learning_rate": 2.714510364706531e-06, "loss": 16.3664, "step": 343000 }, { "epoch": 0.6929019016875608, "grad_norm": 328.9463806152344, "learning_rate": 2.7141999053936795e-06, "loss": 21.4665, "step": 343010 }, { "epoch": 0.6929221023202446, "grad_norm": 421.4502258300781, "learning_rate": 2.713889457221541e-06, "loss": 24.8321, "step": 343020 }, { "epoch": 0.6929423029529285, "grad_norm": 117.91653442382812, "learning_rate": 2.713579020191632e-06, "loss": 21.2125, "step": 343030 }, { "epoch": 0.6929625035856123, "grad_norm": 330.5600280761719, "learning_rate": 2.713268594305458e-06, "loss": 23.6342, "step": 343040 }, { "epoch": 0.6929827042182961, "grad_norm": 432.38043212890625, "learning_rate": 2.712958179564535e-06, "loss": 16.9824, "step": 343050 }, { "epoch": 0.6930029048509799, "grad_norm": 393.30328369140625, "learning_rate": 2.7126477759703786e-06, "loss": 14.0953, "step": 343060 }, { "epoch": 0.6930231054836637, "grad_norm": 300.6133728027344, "learning_rate": 2.7123373835244994e-06, "loss": 16.7631, "step": 343070 }, { "epoch": 0.6930433061163476, "grad_norm": 1277.46826171875, "learning_rate": 2.712027002228408e-06, "loss": 30.9857, "step": 343080 }, { "epoch": 0.6930635067490314, "grad_norm": 453.1453857421875, "learning_rate": 2.711716632083622e-06, "loss": 10.205, "step": 343090 }, { "epoch": 0.6930837073817152, "grad_norm": 170.03887939453125, "learning_rate": 2.7114062730916513e-06, "loss": 14.3898, "step": 343100 }, { "epoch": 0.693103908014399, "grad_norm": 506.34442138671875, "learning_rate": 2.711095925254007e-06, "loss": 22.0371, "step": 343110 }, { "epoch": 0.6931241086470828, "grad_norm": 848.0342407226562, "learning_rate": 2.7107855885722036e-06, "loss": 15.3399, "step": 343120 }, { "epoch": 0.6931443092797667, "grad_norm": 136.1507568359375, "learning_rate": 2.710475263047756e-06, "loss": 15.8781, "step": 343130 }, { "epoch": 0.6931645099124505, "grad_norm": 319.15411376953125, "learning_rate": 2.7101649486821735e-06, "loss": 21.6083, "step": 343140 }, { "epoch": 0.6931847105451343, "grad_norm": 131.8520965576172, "learning_rate": 2.7098546454769683e-06, "loss": 10.8579, "step": 343150 }, { "epoch": 0.6932049111778181, "grad_norm": 261.65594482421875, "learning_rate": 2.7095443534336545e-06, "loss": 13.2355, "step": 343160 }, { "epoch": 0.693225111810502, "grad_norm": 152.35980224609375, "learning_rate": 2.7092340725537446e-06, "loss": 15.7499, "step": 343170 }, { "epoch": 0.6932453124431858, "grad_norm": 1050.298828125, "learning_rate": 2.708923802838748e-06, "loss": 22.562, "step": 343180 }, { "epoch": 0.6932655130758696, "grad_norm": 331.61590576171875, "learning_rate": 2.7086135442901806e-06, "loss": 14.9861, "step": 343190 }, { "epoch": 0.6932857137085533, "grad_norm": 564.1597900390625, "learning_rate": 2.708303296909551e-06, "loss": 25.4975, "step": 343200 }, { "epoch": 0.6933059143412371, "grad_norm": 433.4954833984375, "learning_rate": 2.7079930606983753e-06, "loss": 12.8945, "step": 343210 }, { "epoch": 0.6933261149739209, "grad_norm": 160.9016876220703, "learning_rate": 2.707682835658163e-06, "loss": 21.6092, "step": 343220 }, { "epoch": 0.6933463156066048, "grad_norm": 507.4945983886719, "learning_rate": 2.7073726217904247e-06, "loss": 17.8813, "step": 343230 }, { "epoch": 0.6933665162392886, "grad_norm": 208.25315856933594, "learning_rate": 2.7070624190966744e-06, "loss": 13.1097, "step": 343240 }, { "epoch": 0.6933867168719724, "grad_norm": 137.00123596191406, "learning_rate": 2.7067522275784275e-06, "loss": 14.4099, "step": 343250 }, { "epoch": 0.6934069175046562, "grad_norm": 420.7992248535156, "learning_rate": 2.7064420472371876e-06, "loss": 11.5682, "step": 343260 }, { "epoch": 0.69342711813734, "grad_norm": 290.5668640136719, "learning_rate": 2.706131878074472e-06, "loss": 14.6584, "step": 343270 }, { "epoch": 0.6934473187700239, "grad_norm": 201.93861389160156, "learning_rate": 2.7058217200917934e-06, "loss": 14.4573, "step": 343280 }, { "epoch": 0.6934675194027077, "grad_norm": 375.8645935058594, "learning_rate": 2.705511573290661e-06, "loss": 17.5655, "step": 343290 }, { "epoch": 0.6934877200353915, "grad_norm": 368.1650085449219, "learning_rate": 2.705201437672585e-06, "loss": 15.0431, "step": 343300 }, { "epoch": 0.6935079206680753, "grad_norm": 841.0086669921875, "learning_rate": 2.7048913132390787e-06, "loss": 26.5423, "step": 343310 }, { "epoch": 0.6935281213007591, "grad_norm": 496.6707763671875, "learning_rate": 2.7045811999916583e-06, "loss": 23.5289, "step": 343320 }, { "epoch": 0.693548321933443, "grad_norm": 398.8883056640625, "learning_rate": 2.7042710979318266e-06, "loss": 11.9281, "step": 343330 }, { "epoch": 0.6935685225661268, "grad_norm": 236.6054229736328, "learning_rate": 2.703961007061099e-06, "loss": 12.007, "step": 343340 }, { "epoch": 0.6935887231988106, "grad_norm": 148.3018341064453, "learning_rate": 2.7036509273809887e-06, "loss": 14.4419, "step": 343350 }, { "epoch": 0.6936089238314944, "grad_norm": 311.6505432128906, "learning_rate": 2.7033408588930054e-06, "loss": 24.959, "step": 343360 }, { "epoch": 0.6936291244641782, "grad_norm": 365.60302734375, "learning_rate": 2.7030308015986583e-06, "loss": 21.7858, "step": 343370 }, { "epoch": 0.6936493250968621, "grad_norm": 258.2699279785156, "learning_rate": 2.7027207554994627e-06, "loss": 31.418, "step": 343380 }, { "epoch": 0.6936695257295459, "grad_norm": 312.8044738769531, "learning_rate": 2.702410720596927e-06, "loss": 35.4766, "step": 343390 }, { "epoch": 0.6936897263622297, "grad_norm": 534.6698608398438, "learning_rate": 2.7021006968925613e-06, "loss": 18.845, "step": 343400 }, { "epoch": 0.6937099269949135, "grad_norm": 186.9039306640625, "learning_rate": 2.7017906843878795e-06, "loss": 17.7089, "step": 343410 }, { "epoch": 0.6937301276275973, "grad_norm": 207.4452362060547, "learning_rate": 2.7014806830843897e-06, "loss": 12.9498, "step": 343420 }, { "epoch": 0.6937503282602812, "grad_norm": 319.1103515625, "learning_rate": 2.7011706929836056e-06, "loss": 19.4653, "step": 343430 }, { "epoch": 0.693770528892965, "grad_norm": 443.2889099121094, "learning_rate": 2.700860714087036e-06, "loss": 12.2581, "step": 343440 }, { "epoch": 0.6937907295256488, "grad_norm": 166.7688446044922, "learning_rate": 2.700550746396193e-06, "loss": 16.6311, "step": 343450 }, { "epoch": 0.6938109301583325, "grad_norm": 348.3745422363281, "learning_rate": 2.7002407899125883e-06, "loss": 15.0211, "step": 343460 }, { "epoch": 0.6938311307910163, "grad_norm": 464.27130126953125, "learning_rate": 2.699930844637728e-06, "loss": 25.4117, "step": 343470 }, { "epoch": 0.6938513314237001, "grad_norm": 367.8930358886719, "learning_rate": 2.6996209105731273e-06, "loss": 23.6164, "step": 343480 }, { "epoch": 0.693871532056384, "grad_norm": 478.6580810546875, "learning_rate": 2.6993109877202942e-06, "loss": 7.154, "step": 343490 }, { "epoch": 0.6938917326890678, "grad_norm": 455.1324462890625, "learning_rate": 2.699001076080742e-06, "loss": 26.1042, "step": 343500 }, { "epoch": 0.6939119333217516, "grad_norm": 547.47119140625, "learning_rate": 2.6986911756559795e-06, "loss": 23.7914, "step": 343510 }, { "epoch": 0.6939321339544354, "grad_norm": 767.7376098632812, "learning_rate": 2.698381286447516e-06, "loss": 44.6646, "step": 343520 }, { "epoch": 0.6939523345871192, "grad_norm": 445.3045959472656, "learning_rate": 2.698071408456864e-06, "loss": 24.4303, "step": 343530 }, { "epoch": 0.6939725352198031, "grad_norm": 459.85736083984375, "learning_rate": 2.6977615416855325e-06, "loss": 14.9492, "step": 343540 }, { "epoch": 0.6939927358524869, "grad_norm": 566.3602905273438, "learning_rate": 2.697451686135031e-06, "loss": 20.4367, "step": 343550 }, { "epoch": 0.6940129364851707, "grad_norm": 142.90953063964844, "learning_rate": 2.6971418418068696e-06, "loss": 24.6062, "step": 343560 }, { "epoch": 0.6940331371178545, "grad_norm": 356.7073669433594, "learning_rate": 2.696832008702564e-06, "loss": 23.8435, "step": 343570 }, { "epoch": 0.6940533377505383, "grad_norm": 380.256103515625, "learning_rate": 2.6965221868236156e-06, "loss": 29.407, "step": 343580 }, { "epoch": 0.6940735383832222, "grad_norm": 238.80799865722656, "learning_rate": 2.6962123761715395e-06, "loss": 12.5504, "step": 343590 }, { "epoch": 0.694093739015906, "grad_norm": 292.9700622558594, "learning_rate": 2.6959025767478466e-06, "loss": 13.9543, "step": 343600 }, { "epoch": 0.6941139396485898, "grad_norm": 168.14193725585938, "learning_rate": 2.6955927885540444e-06, "loss": 13.1834, "step": 343610 }, { "epoch": 0.6941341402812736, "grad_norm": 557.2796630859375, "learning_rate": 2.6952830115916417e-06, "loss": 15.7199, "step": 343620 }, { "epoch": 0.6941543409139574, "grad_norm": 348.6419982910156, "learning_rate": 2.6949732458621502e-06, "loss": 15.3388, "step": 343630 }, { "epoch": 0.6941745415466413, "grad_norm": 370.0677490234375, "learning_rate": 2.694663491367084e-06, "loss": 21.5792, "step": 343640 }, { "epoch": 0.6941947421793251, "grad_norm": 630.1292114257812, "learning_rate": 2.694353748107944e-06, "loss": 28.5514, "step": 343650 }, { "epoch": 0.6942149428120089, "grad_norm": 40.72578048706055, "learning_rate": 2.694044016086244e-06, "loss": 10.9335, "step": 343660 }, { "epoch": 0.6942351434446927, "grad_norm": 267.8238220214844, "learning_rate": 2.6937342953034963e-06, "loss": 16.2739, "step": 343670 }, { "epoch": 0.6942553440773765, "grad_norm": 513.903564453125, "learning_rate": 2.6934245857612074e-06, "loss": 21.8557, "step": 343680 }, { "epoch": 0.6942755447100604, "grad_norm": 330.5280456542969, "learning_rate": 2.6931148874608854e-06, "loss": 22.1439, "step": 343690 }, { "epoch": 0.6942957453427442, "grad_norm": 257.3049621582031, "learning_rate": 2.692805200404044e-06, "loss": 14.5472, "step": 343700 }, { "epoch": 0.6943159459754279, "grad_norm": 534.4154052734375, "learning_rate": 2.69249552459219e-06, "loss": 33.5646, "step": 343710 }, { "epoch": 0.6943361466081117, "grad_norm": 420.3919982910156, "learning_rate": 2.6921858600268304e-06, "loss": 16.5254, "step": 343720 }, { "epoch": 0.6943563472407955, "grad_norm": 475.1569519042969, "learning_rate": 2.6918762067094776e-06, "loss": 21.7642, "step": 343730 }, { "epoch": 0.6943765478734794, "grad_norm": 318.7789001464844, "learning_rate": 2.6915665646416423e-06, "loss": 22.4433, "step": 343740 }, { "epoch": 0.6943967485061632, "grad_norm": 815.83740234375, "learning_rate": 2.6912569338248317e-06, "loss": 11.2253, "step": 343750 }, { "epoch": 0.694416949138847, "grad_norm": 271.6921081542969, "learning_rate": 2.6909473142605522e-06, "loss": 13.2603, "step": 343760 }, { "epoch": 0.6944371497715308, "grad_norm": 355.1222229003906, "learning_rate": 2.6906377059503176e-06, "loss": 20.8389, "step": 343770 }, { "epoch": 0.6944573504042146, "grad_norm": 414.8699645996094, "learning_rate": 2.690328108895635e-06, "loss": 21.0342, "step": 343780 }, { "epoch": 0.6944775510368985, "grad_norm": 24.91457748413086, "learning_rate": 2.6900185230980115e-06, "loss": 14.9175, "step": 343790 }, { "epoch": 0.6944977516695823, "grad_norm": 302.8378601074219, "learning_rate": 2.6897089485589584e-06, "loss": 16.5532, "step": 343800 }, { "epoch": 0.6945179523022661, "grad_norm": 547.6952514648438, "learning_rate": 2.6893993852799827e-06, "loss": 31.2786, "step": 343810 }, { "epoch": 0.6945381529349499, "grad_norm": 455.7846984863281, "learning_rate": 2.689089833262595e-06, "loss": 14.3525, "step": 343820 }, { "epoch": 0.6945583535676337, "grad_norm": 153.60443115234375, "learning_rate": 2.688780292508304e-06, "loss": 12.6853, "step": 343830 }, { "epoch": 0.6945785542003176, "grad_norm": 671.5533447265625, "learning_rate": 2.6884707630186158e-06, "loss": 30.678, "step": 343840 }, { "epoch": 0.6945987548330014, "grad_norm": 421.84185791015625, "learning_rate": 2.6881612447950425e-06, "loss": 14.1414, "step": 343850 }, { "epoch": 0.6946189554656852, "grad_norm": 296.41375732421875, "learning_rate": 2.6878517378390906e-06, "loss": 18.8036, "step": 343860 }, { "epoch": 0.694639156098369, "grad_norm": 399.1142272949219, "learning_rate": 2.6875422421522667e-06, "loss": 23.7339, "step": 343870 }, { "epoch": 0.6946593567310528, "grad_norm": 164.97605895996094, "learning_rate": 2.6872327577360822e-06, "loss": 16.1473, "step": 343880 }, { "epoch": 0.6946795573637367, "grad_norm": 187.7085418701172, "learning_rate": 2.6869232845920466e-06, "loss": 16.9912, "step": 343890 }, { "epoch": 0.6946997579964205, "grad_norm": 507.7225341796875, "learning_rate": 2.686613822721666e-06, "loss": 28.3052, "step": 343900 }, { "epoch": 0.6947199586291043, "grad_norm": 281.36639404296875, "learning_rate": 2.686304372126447e-06, "loss": 17.0173, "step": 343910 }, { "epoch": 0.6947401592617881, "grad_norm": 643.44189453125, "learning_rate": 2.6859949328079005e-06, "loss": 17.3979, "step": 343920 }, { "epoch": 0.6947603598944719, "grad_norm": 78.4656753540039, "learning_rate": 2.685685504767537e-06, "loss": 13.7249, "step": 343930 }, { "epoch": 0.6947805605271558, "grad_norm": 664.6371459960938, "learning_rate": 2.6853760880068587e-06, "loss": 40.8635, "step": 343940 }, { "epoch": 0.6948007611598396, "grad_norm": 369.2539367675781, "learning_rate": 2.6850666825273762e-06, "loss": 15.5132, "step": 343950 }, { "epoch": 0.6948209617925234, "grad_norm": 163.865966796875, "learning_rate": 2.6847572883305993e-06, "loss": 20.9875, "step": 343960 }, { "epoch": 0.6948411624252071, "grad_norm": 114.55418395996094, "learning_rate": 2.6844479054180354e-06, "loss": 16.347, "step": 343970 }, { "epoch": 0.6948613630578909, "grad_norm": 505.979736328125, "learning_rate": 2.6841385337911895e-06, "loss": 17.2984, "step": 343980 }, { "epoch": 0.6948815636905747, "grad_norm": 784.9866333007812, "learning_rate": 2.683829173451573e-06, "loss": 27.368, "step": 343990 }, { "epoch": 0.6949017643232586, "grad_norm": 260.9141845703125, "learning_rate": 2.683519824400693e-06, "loss": 19.7886, "step": 344000 }, { "epoch": 0.6949219649559424, "grad_norm": 217.6226806640625, "learning_rate": 2.683210486640054e-06, "loss": 9.5596, "step": 344010 }, { "epoch": 0.6949421655886262, "grad_norm": 317.6123962402344, "learning_rate": 2.682901160171168e-06, "loss": 17.6249, "step": 344020 }, { "epoch": 0.69496236622131, "grad_norm": 202.72772216796875, "learning_rate": 2.6825918449955386e-06, "loss": 15.8701, "step": 344030 }, { "epoch": 0.6949825668539938, "grad_norm": 364.443359375, "learning_rate": 2.682282541114678e-06, "loss": 10.4873, "step": 344040 }, { "epoch": 0.6950027674866777, "grad_norm": 299.5184631347656, "learning_rate": 2.6819732485300887e-06, "loss": 19.8617, "step": 344050 }, { "epoch": 0.6950229681193615, "grad_norm": 178.6079559326172, "learning_rate": 2.6816639672432826e-06, "loss": 14.4802, "step": 344060 }, { "epoch": 0.6950431687520453, "grad_norm": 554.738525390625, "learning_rate": 2.681354697255765e-06, "loss": 14.2377, "step": 344070 }, { "epoch": 0.6950633693847291, "grad_norm": 383.87725830078125, "learning_rate": 2.681045438569042e-06, "loss": 18.7799, "step": 344080 }, { "epoch": 0.6950835700174129, "grad_norm": 129.1428985595703, "learning_rate": 2.680736191184624e-06, "loss": 17.5231, "step": 344090 }, { "epoch": 0.6951037706500968, "grad_norm": 183.20083618164062, "learning_rate": 2.680426955104014e-06, "loss": 9.6862, "step": 344100 }, { "epoch": 0.6951239712827806, "grad_norm": 206.9559783935547, "learning_rate": 2.6801177303287247e-06, "loss": 13.197, "step": 344110 }, { "epoch": 0.6951441719154644, "grad_norm": 320.4137878417969, "learning_rate": 2.6798085168602595e-06, "loss": 21.8238, "step": 344120 }, { "epoch": 0.6951643725481482, "grad_norm": 327.3988037109375, "learning_rate": 2.6794993147001246e-06, "loss": 22.6297, "step": 344130 }, { "epoch": 0.695184573180832, "grad_norm": 263.87213134765625, "learning_rate": 2.6791901238498302e-06, "loss": 17.3819, "step": 344140 }, { "epoch": 0.6952047738135159, "grad_norm": 215.83154296875, "learning_rate": 2.678880944310882e-06, "loss": 25.1242, "step": 344150 }, { "epoch": 0.6952249744461997, "grad_norm": 817.7406005859375, "learning_rate": 2.678571776084784e-06, "loss": 14.7458, "step": 344160 }, { "epoch": 0.6952451750788835, "grad_norm": 190.51718139648438, "learning_rate": 2.6782626191730466e-06, "loss": 24.2561, "step": 344170 }, { "epoch": 0.6952653757115673, "grad_norm": 2117.508056640625, "learning_rate": 2.677953473577177e-06, "loss": 17.2426, "step": 344180 }, { "epoch": 0.6952855763442511, "grad_norm": 1233.36669921875, "learning_rate": 2.6776443392986808e-06, "loss": 42.3313, "step": 344190 }, { "epoch": 0.695305776976935, "grad_norm": 316.6412353515625, "learning_rate": 2.677335216339062e-06, "loss": 18.3216, "step": 344200 }, { "epoch": 0.6953259776096188, "grad_norm": 514.594482421875, "learning_rate": 2.6770261046998315e-06, "loss": 20.8065, "step": 344210 }, { "epoch": 0.6953461782423026, "grad_norm": 220.20916748046875, "learning_rate": 2.6767170043824942e-06, "loss": 32.971, "step": 344220 }, { "epoch": 0.6953663788749863, "grad_norm": 7.986196994781494, "learning_rate": 2.6764079153885547e-06, "loss": 15.0296, "step": 344230 }, { "epoch": 0.6953865795076701, "grad_norm": 253.78794860839844, "learning_rate": 2.6760988377195206e-06, "loss": 12.7176, "step": 344240 }, { "epoch": 0.695406780140354, "grad_norm": 332.3077392578125, "learning_rate": 2.675789771376904e-06, "loss": 27.8419, "step": 344250 }, { "epoch": 0.6954269807730378, "grad_norm": 477.7747802734375, "learning_rate": 2.6754807163622014e-06, "loss": 15.0112, "step": 344260 }, { "epoch": 0.6954471814057216, "grad_norm": 45.76880645751953, "learning_rate": 2.6751716726769237e-06, "loss": 14.3162, "step": 344270 }, { "epoch": 0.6954673820384054, "grad_norm": 211.90830993652344, "learning_rate": 2.67486264032258e-06, "loss": 21.739, "step": 344280 }, { "epoch": 0.6954875826710892, "grad_norm": 343.0636901855469, "learning_rate": 2.674553619300673e-06, "loss": 21.8155, "step": 344290 }, { "epoch": 0.695507783303773, "grad_norm": 350.2322998046875, "learning_rate": 2.6742446096127086e-06, "loss": 12.1763, "step": 344300 }, { "epoch": 0.6955279839364569, "grad_norm": 514.8064575195312, "learning_rate": 2.673935611260195e-06, "loss": 15.4479, "step": 344310 }, { "epoch": 0.6955481845691407, "grad_norm": 208.349365234375, "learning_rate": 2.6736266242446372e-06, "loss": 8.9711, "step": 344320 }, { "epoch": 0.6955683852018245, "grad_norm": 209.54495239257812, "learning_rate": 2.6733176485675396e-06, "loss": 17.0446, "step": 344330 }, { "epoch": 0.6955885858345083, "grad_norm": 585.2893676757812, "learning_rate": 2.6730086842304093e-06, "loss": 15.8475, "step": 344340 }, { "epoch": 0.6956087864671922, "grad_norm": 56.8120231628418, "learning_rate": 2.6726997312347546e-06, "loss": 24.2784, "step": 344350 }, { "epoch": 0.695628987099876, "grad_norm": 65.433349609375, "learning_rate": 2.672390789582079e-06, "loss": 19.357, "step": 344360 }, { "epoch": 0.6956491877325598, "grad_norm": 289.3479919433594, "learning_rate": 2.672081859273886e-06, "loss": 22.835, "step": 344370 }, { "epoch": 0.6956693883652436, "grad_norm": 128.19265747070312, "learning_rate": 2.6717729403116866e-06, "loss": 8.0582, "step": 344380 }, { "epoch": 0.6956895889979274, "grad_norm": 446.22857666015625, "learning_rate": 2.671464032696982e-06, "loss": 26.8629, "step": 344390 }, { "epoch": 0.6957097896306113, "grad_norm": 1369.6859130859375, "learning_rate": 2.671155136431279e-06, "loss": 30.2338, "step": 344400 }, { "epoch": 0.6957299902632951, "grad_norm": 652.7861938476562, "learning_rate": 2.6708462515160845e-06, "loss": 30.2917, "step": 344410 }, { "epoch": 0.6957501908959789, "grad_norm": 532.6014404296875, "learning_rate": 2.670537377952901e-06, "loss": 38.7639, "step": 344420 }, { "epoch": 0.6957703915286627, "grad_norm": 59.8785285949707, "learning_rate": 2.670228515743238e-06, "loss": 13.5877, "step": 344430 }, { "epoch": 0.6957905921613465, "grad_norm": 486.962890625, "learning_rate": 2.6699196648885984e-06, "loss": 27.2722, "step": 344440 }, { "epoch": 0.6958107927940304, "grad_norm": 132.49996948242188, "learning_rate": 2.6696108253904856e-06, "loss": 16.1759, "step": 344450 }, { "epoch": 0.6958309934267142, "grad_norm": 17.91014289855957, "learning_rate": 2.669301997250409e-06, "loss": 24.9406, "step": 344460 }, { "epoch": 0.695851194059398, "grad_norm": 221.14280700683594, "learning_rate": 2.668993180469872e-06, "loss": 12.3098, "step": 344470 }, { "epoch": 0.6958713946920817, "grad_norm": 319.5102844238281, "learning_rate": 2.668684375050378e-06, "loss": 9.2963, "step": 344480 }, { "epoch": 0.6958915953247655, "grad_norm": 344.1924743652344, "learning_rate": 2.668375580993433e-06, "loss": 13.7195, "step": 344490 }, { "epoch": 0.6959117959574493, "grad_norm": 654.0018920898438, "learning_rate": 2.6680667983005446e-06, "loss": 17.6545, "step": 344500 }, { "epoch": 0.6959319965901332, "grad_norm": 421.8318786621094, "learning_rate": 2.667758026973216e-06, "loss": 21.6775, "step": 344510 }, { "epoch": 0.695952197222817, "grad_norm": 372.5625915527344, "learning_rate": 2.667449267012949e-06, "loss": 19.3328, "step": 344520 }, { "epoch": 0.6959723978555008, "grad_norm": 73.21385192871094, "learning_rate": 2.667140518421255e-06, "loss": 12.0675, "step": 344530 }, { "epoch": 0.6959925984881846, "grad_norm": 593.1376342773438, "learning_rate": 2.6668317811996342e-06, "loss": 17.3688, "step": 344540 }, { "epoch": 0.6960127991208684, "grad_norm": 272.7303161621094, "learning_rate": 2.66652305534959e-06, "loss": 21.8176, "step": 344550 }, { "epoch": 0.6960329997535523, "grad_norm": 352.7306213378906, "learning_rate": 2.6662143408726306e-06, "loss": 17.7654, "step": 344560 }, { "epoch": 0.6960532003862361, "grad_norm": 185.99951171875, "learning_rate": 2.6659056377702606e-06, "loss": 18.1175, "step": 344570 }, { "epoch": 0.6960734010189199, "grad_norm": 565.5797729492188, "learning_rate": 2.6655969460439835e-06, "loss": 23.5426, "step": 344580 }, { "epoch": 0.6960936016516037, "grad_norm": 1198.6705322265625, "learning_rate": 2.6652882656953016e-06, "loss": 30.6016, "step": 344590 }, { "epoch": 0.6961138022842875, "grad_norm": 271.77508544921875, "learning_rate": 2.6649795967257243e-06, "loss": 18.6716, "step": 344600 }, { "epoch": 0.6961340029169714, "grad_norm": 478.1930236816406, "learning_rate": 2.6646709391367524e-06, "loss": 15.9826, "step": 344610 }, { "epoch": 0.6961542035496552, "grad_norm": 493.9571838378906, "learning_rate": 2.6643622929298896e-06, "loss": 29.0761, "step": 344620 }, { "epoch": 0.696174404182339, "grad_norm": 291.86895751953125, "learning_rate": 2.6640536581066434e-06, "loss": 22.289, "step": 344630 }, { "epoch": 0.6961946048150228, "grad_norm": 647.7007446289062, "learning_rate": 2.6637450346685145e-06, "loss": 20.7047, "step": 344640 }, { "epoch": 0.6962148054477066, "grad_norm": 372.8083801269531, "learning_rate": 2.6634364226170105e-06, "loss": 8.2205, "step": 344650 }, { "epoch": 0.6962350060803905, "grad_norm": 396.89886474609375, "learning_rate": 2.663127821953633e-06, "loss": 12.5007, "step": 344660 }, { "epoch": 0.6962552067130743, "grad_norm": 92.28195190429688, "learning_rate": 2.6628192326798884e-06, "loss": 15.7482, "step": 344670 }, { "epoch": 0.6962754073457581, "grad_norm": 251.61859130859375, "learning_rate": 2.662510654797279e-06, "loss": 13.5063, "step": 344680 }, { "epoch": 0.6962956079784419, "grad_norm": 314.4949035644531, "learning_rate": 2.662202088307308e-06, "loss": 15.6295, "step": 344690 }, { "epoch": 0.6963158086111257, "grad_norm": 568.0778198242188, "learning_rate": 2.661893533211482e-06, "loss": 22.3975, "step": 344700 }, { "epoch": 0.6963360092438096, "grad_norm": 1040.8778076171875, "learning_rate": 2.6615849895113014e-06, "loss": 18.2789, "step": 344710 }, { "epoch": 0.6963562098764934, "grad_norm": 16.518474578857422, "learning_rate": 2.661276457208274e-06, "loss": 27.5529, "step": 344720 }, { "epoch": 0.6963764105091772, "grad_norm": 29.804773330688477, "learning_rate": 2.660967936303902e-06, "loss": 8.306, "step": 344730 }, { "epoch": 0.6963966111418609, "grad_norm": 0.0, "learning_rate": 2.6606594267996853e-06, "loss": 15.3719, "step": 344740 }, { "epoch": 0.6964168117745447, "grad_norm": 557.9322509765625, "learning_rate": 2.6603509286971342e-06, "loss": 16.6565, "step": 344750 }, { "epoch": 0.6964370124072286, "grad_norm": 569.5855712890625, "learning_rate": 2.660042441997748e-06, "loss": 19.1198, "step": 344760 }, { "epoch": 0.6964572130399124, "grad_norm": 226.26670837402344, "learning_rate": 2.6597339667030296e-06, "loss": 17.1782, "step": 344770 }, { "epoch": 0.6964774136725962, "grad_norm": 681.4418334960938, "learning_rate": 2.659425502814484e-06, "loss": 20.0888, "step": 344780 }, { "epoch": 0.69649761430528, "grad_norm": 229.27304077148438, "learning_rate": 2.659117050333616e-06, "loss": 21.8857, "step": 344790 }, { "epoch": 0.6965178149379638, "grad_norm": 235.35556030273438, "learning_rate": 2.658808609261928e-06, "loss": 9.5892, "step": 344800 }, { "epoch": 0.6965380155706477, "grad_norm": 313.0237121582031, "learning_rate": 2.658500179600921e-06, "loss": 21.6451, "step": 344810 }, { "epoch": 0.6965582162033315, "grad_norm": 36.43192672729492, "learning_rate": 2.6581917613521026e-06, "loss": 14.984, "step": 344820 }, { "epoch": 0.6965784168360153, "grad_norm": 697.332763671875, "learning_rate": 2.657883354516973e-06, "loss": 12.2152, "step": 344830 }, { "epoch": 0.6965986174686991, "grad_norm": 267.1387939453125, "learning_rate": 2.6575749590970336e-06, "loss": 17.5607, "step": 344840 }, { "epoch": 0.6966188181013829, "grad_norm": 502.197509765625, "learning_rate": 2.6572665750937898e-06, "loss": 17.2962, "step": 344850 }, { "epoch": 0.6966390187340668, "grad_norm": 291.0636291503906, "learning_rate": 2.656958202508749e-06, "loss": 21.6046, "step": 344860 }, { "epoch": 0.6966592193667506, "grad_norm": 379.7904968261719, "learning_rate": 2.656649841343406e-06, "loss": 15.628, "step": 344870 }, { "epoch": 0.6966794199994344, "grad_norm": 357.1584777832031, "learning_rate": 2.656341491599267e-06, "loss": 22.4748, "step": 344880 }, { "epoch": 0.6966996206321182, "grad_norm": 65.4222640991211, "learning_rate": 2.6560331532778373e-06, "loss": 12.0059, "step": 344890 }, { "epoch": 0.696719821264802, "grad_norm": 566.2289428710938, "learning_rate": 2.6557248263806175e-06, "loss": 16.9036, "step": 344900 }, { "epoch": 0.6967400218974859, "grad_norm": 344.33990478515625, "learning_rate": 2.655416510909109e-06, "loss": 12.362, "step": 344910 }, { "epoch": 0.6967602225301697, "grad_norm": 277.7760314941406, "learning_rate": 2.655108206864818e-06, "loss": 13.9431, "step": 344920 }, { "epoch": 0.6967804231628535, "grad_norm": 390.2586669921875, "learning_rate": 2.654799914249245e-06, "loss": 12.7993, "step": 344930 }, { "epoch": 0.6968006237955373, "grad_norm": 163.2816162109375, "learning_rate": 2.65449163306389e-06, "loss": 16.1431, "step": 344940 }, { "epoch": 0.6968208244282211, "grad_norm": 2.7255659103393555, "learning_rate": 2.65418336331026e-06, "loss": 22.8669, "step": 344950 }, { "epoch": 0.696841025060905, "grad_norm": 343.7413635253906, "learning_rate": 2.653875104989857e-06, "loss": 20.7097, "step": 344960 }, { "epoch": 0.6968612256935888, "grad_norm": 133.50035095214844, "learning_rate": 2.653566858104182e-06, "loss": 12.405, "step": 344970 }, { "epoch": 0.6968814263262726, "grad_norm": 454.4327392578125, "learning_rate": 2.6532586226547354e-06, "loss": 19.0066, "step": 344980 }, { "epoch": 0.6969016269589563, "grad_norm": 358.3306884765625, "learning_rate": 2.652950398643024e-06, "loss": 17.5057, "step": 344990 }, { "epoch": 0.6969218275916401, "grad_norm": 527.6987915039062, "learning_rate": 2.6526421860705474e-06, "loss": 29.4849, "step": 345000 }, { "epoch": 0.6969420282243239, "grad_norm": 429.4691162109375, "learning_rate": 2.6523339849388065e-06, "loss": 22.8431, "step": 345010 }, { "epoch": 0.6969622288570078, "grad_norm": 443.98284912109375, "learning_rate": 2.6520257952493066e-06, "loss": 19.1371, "step": 345020 }, { "epoch": 0.6969824294896916, "grad_norm": 71.12145233154297, "learning_rate": 2.6517176170035463e-06, "loss": 13.3549, "step": 345030 }, { "epoch": 0.6970026301223754, "grad_norm": 422.64208984375, "learning_rate": 2.651409450203032e-06, "loss": 12.5471, "step": 345040 }, { "epoch": 0.6970228307550592, "grad_norm": 558.01513671875, "learning_rate": 2.6511012948492625e-06, "loss": 20.7775, "step": 345050 }, { "epoch": 0.697043031387743, "grad_norm": 464.9036865234375, "learning_rate": 2.650793150943739e-06, "loss": 21.6904, "step": 345060 }, { "epoch": 0.6970632320204269, "grad_norm": 609.20849609375, "learning_rate": 2.650485018487966e-06, "loss": 15.1591, "step": 345070 }, { "epoch": 0.6970834326531107, "grad_norm": 720.4866333007812, "learning_rate": 2.6501768974834453e-06, "loss": 15.6479, "step": 345080 }, { "epoch": 0.6971036332857945, "grad_norm": 306.485107421875, "learning_rate": 2.649868787931674e-06, "loss": 11.1549, "step": 345090 }, { "epoch": 0.6971238339184783, "grad_norm": 402.8655090332031, "learning_rate": 2.649560689834158e-06, "loss": 32.3631, "step": 345100 }, { "epoch": 0.6971440345511621, "grad_norm": 440.0959777832031, "learning_rate": 2.6492526031924005e-06, "loss": 37.2021, "step": 345110 }, { "epoch": 0.697164235183846, "grad_norm": 353.9069519042969, "learning_rate": 2.6489445280078998e-06, "loss": 8.6506, "step": 345120 }, { "epoch": 0.6971844358165298, "grad_norm": 306.89111328125, "learning_rate": 2.6486364642821565e-06, "loss": 23.3063, "step": 345130 }, { "epoch": 0.6972046364492136, "grad_norm": 109.22026824951172, "learning_rate": 2.6483284120166762e-06, "loss": 7.5639, "step": 345140 }, { "epoch": 0.6972248370818974, "grad_norm": 244.5216064453125, "learning_rate": 2.6480203712129583e-06, "loss": 6.595, "step": 345150 }, { "epoch": 0.6972450377145812, "grad_norm": 465.40179443359375, "learning_rate": 2.647712341872501e-06, "loss": 25.7738, "step": 345160 }, { "epoch": 0.6972652383472651, "grad_norm": 604.065185546875, "learning_rate": 2.647404323996809e-06, "loss": 22.5741, "step": 345170 }, { "epoch": 0.6972854389799489, "grad_norm": 230.20556640625, "learning_rate": 2.647096317587385e-06, "loss": 10.2197, "step": 345180 }, { "epoch": 0.6973056396126327, "grad_norm": 268.8053283691406, "learning_rate": 2.646788322645728e-06, "loss": 21.6019, "step": 345190 }, { "epoch": 0.6973258402453165, "grad_norm": 301.2347717285156, "learning_rate": 2.646480339173337e-06, "loss": 20.836, "step": 345200 }, { "epoch": 0.6973460408780003, "grad_norm": 633.9385375976562, "learning_rate": 2.6461723671717177e-06, "loss": 18.6043, "step": 345210 }, { "epoch": 0.6973662415106842, "grad_norm": 0.0, "learning_rate": 2.645864406642369e-06, "loss": 18.6629, "step": 345220 }, { "epoch": 0.697386442143368, "grad_norm": 78.62548065185547, "learning_rate": 2.6455564575867893e-06, "loss": 26.5985, "step": 345230 }, { "epoch": 0.6974066427760518, "grad_norm": 211.503662109375, "learning_rate": 2.645248520006482e-06, "loss": 15.8581, "step": 345240 }, { "epoch": 0.6974268434087355, "grad_norm": 440.4510192871094, "learning_rate": 2.64494059390295e-06, "loss": 33.8293, "step": 345250 }, { "epoch": 0.6974470440414193, "grad_norm": 476.35699462890625, "learning_rate": 2.644632679277692e-06, "loss": 15.2913, "step": 345260 }, { "epoch": 0.6974672446741031, "grad_norm": 797.4821166992188, "learning_rate": 2.644324776132206e-06, "loss": 29.4788, "step": 345270 }, { "epoch": 0.697487445306787, "grad_norm": 491.3204345703125, "learning_rate": 2.6440168844679983e-06, "loss": 21.4707, "step": 345280 }, { "epoch": 0.6975076459394708, "grad_norm": 816.2991333007812, "learning_rate": 2.6437090042865655e-06, "loss": 15.4586, "step": 345290 }, { "epoch": 0.6975278465721546, "grad_norm": 30.84978675842285, "learning_rate": 2.6434011355894074e-06, "loss": 14.2834, "step": 345300 }, { "epoch": 0.6975480472048384, "grad_norm": 582.0877075195312, "learning_rate": 2.643093278378029e-06, "loss": 14.9192, "step": 345310 }, { "epoch": 0.6975682478375222, "grad_norm": 137.56689453125, "learning_rate": 2.642785432653926e-06, "loss": 11.3768, "step": 345320 }, { "epoch": 0.6975884484702061, "grad_norm": 379.98486328125, "learning_rate": 2.6424775984186024e-06, "loss": 16.0875, "step": 345330 }, { "epoch": 0.6976086491028899, "grad_norm": 195.0530242919922, "learning_rate": 2.6421697756735577e-06, "loss": 23.8646, "step": 345340 }, { "epoch": 0.6976288497355737, "grad_norm": 253.6774139404297, "learning_rate": 2.641861964420289e-06, "loss": 12.3859, "step": 345350 }, { "epoch": 0.6976490503682575, "grad_norm": 186.47940063476562, "learning_rate": 2.641554164660301e-06, "loss": 15.902, "step": 345360 }, { "epoch": 0.6976692510009413, "grad_norm": 576.4246215820312, "learning_rate": 2.6412463763950925e-06, "loss": 10.885, "step": 345370 }, { "epoch": 0.6976894516336252, "grad_norm": 4.220212936401367, "learning_rate": 2.6409385996261606e-06, "loss": 14.7602, "step": 345380 }, { "epoch": 0.697709652266309, "grad_norm": 267.7427673339844, "learning_rate": 2.640630834355008e-06, "loss": 16.3859, "step": 345390 }, { "epoch": 0.6977298528989928, "grad_norm": 441.47271728515625, "learning_rate": 2.640323080583137e-06, "loss": 22.2648, "step": 345400 }, { "epoch": 0.6977500535316766, "grad_norm": 202.620361328125, "learning_rate": 2.640015338312044e-06, "loss": 14.9657, "step": 345410 }, { "epoch": 0.6977702541643604, "grad_norm": 353.13739013671875, "learning_rate": 2.6397076075432294e-06, "loss": 14.2941, "step": 345420 }, { "epoch": 0.6977904547970443, "grad_norm": 426.14117431640625, "learning_rate": 2.6393998882781945e-06, "loss": 17.4602, "step": 345430 }, { "epoch": 0.6978106554297281, "grad_norm": 741.8760375976562, "learning_rate": 2.6390921805184387e-06, "loss": 30.9822, "step": 345440 }, { "epoch": 0.6978308560624119, "grad_norm": 184.38121032714844, "learning_rate": 2.63878448426546e-06, "loss": 15.771, "step": 345450 }, { "epoch": 0.6978510566950957, "grad_norm": 236.48931884765625, "learning_rate": 2.6384767995207584e-06, "loss": 16.7433, "step": 345460 }, { "epoch": 0.6978712573277795, "grad_norm": 165.02105712890625, "learning_rate": 2.6381691262858385e-06, "loss": 10.6148, "step": 345470 }, { "epoch": 0.6978914579604634, "grad_norm": 153.6220703125, "learning_rate": 2.6378614645621916e-06, "loss": 28.2366, "step": 345480 }, { "epoch": 0.6979116585931472, "grad_norm": 200.2836151123047, "learning_rate": 2.6375538143513225e-06, "loss": 8.507, "step": 345490 }, { "epoch": 0.697931859225831, "grad_norm": 234.54251098632812, "learning_rate": 2.637246175654731e-06, "loss": 9.2595, "step": 345500 }, { "epoch": 0.6979520598585147, "grad_norm": 316.4593505859375, "learning_rate": 2.6369385484739143e-06, "loss": 24.5696, "step": 345510 }, { "epoch": 0.6979722604911985, "grad_norm": 107.1210708618164, "learning_rate": 2.636630932810371e-06, "loss": 18.6488, "step": 345520 }, { "epoch": 0.6979924611238824, "grad_norm": 533.1016235351562, "learning_rate": 2.6363233286656044e-06, "loss": 14.3121, "step": 345530 }, { "epoch": 0.6980126617565662, "grad_norm": 634.0641479492188, "learning_rate": 2.636015736041111e-06, "loss": 20.5269, "step": 345540 }, { "epoch": 0.69803286238925, "grad_norm": 392.26788330078125, "learning_rate": 2.6357081549383877e-06, "loss": 14.8525, "step": 345550 }, { "epoch": 0.6980530630219338, "grad_norm": 620.7987060546875, "learning_rate": 2.635400585358937e-06, "loss": 24.258, "step": 345560 }, { "epoch": 0.6980732636546176, "grad_norm": 419.3951721191406, "learning_rate": 2.6350930273042587e-06, "loss": 15.7807, "step": 345570 }, { "epoch": 0.6980934642873015, "grad_norm": 685.9359741210938, "learning_rate": 2.63478548077585e-06, "loss": 16.4399, "step": 345580 }, { "epoch": 0.6981136649199853, "grad_norm": 500.3210144042969, "learning_rate": 2.634477945775208e-06, "loss": 11.0419, "step": 345590 }, { "epoch": 0.6981338655526691, "grad_norm": 286.71722412109375, "learning_rate": 2.634170422303835e-06, "loss": 13.9834, "step": 345600 }, { "epoch": 0.6981540661853529, "grad_norm": 155.51107788085938, "learning_rate": 2.633862910363229e-06, "loss": 23.2099, "step": 345610 }, { "epoch": 0.6981742668180367, "grad_norm": 81.69243621826172, "learning_rate": 2.6335554099548865e-06, "loss": 10.7837, "step": 345620 }, { "epoch": 0.6981944674507206, "grad_norm": 412.6045837402344, "learning_rate": 2.63324792108031e-06, "loss": 28.8401, "step": 345630 }, { "epoch": 0.6982146680834044, "grad_norm": 1176.1641845703125, "learning_rate": 2.6329404437409934e-06, "loss": 20.9388, "step": 345640 }, { "epoch": 0.6982348687160882, "grad_norm": 1.3624160289764404, "learning_rate": 2.6326329779384397e-06, "loss": 13.8064, "step": 345650 }, { "epoch": 0.698255069348772, "grad_norm": 276.4120178222656, "learning_rate": 2.6323255236741465e-06, "loss": 13.0461, "step": 345660 }, { "epoch": 0.6982752699814558, "grad_norm": 289.1661376953125, "learning_rate": 2.63201808094961e-06, "loss": 30.3161, "step": 345670 }, { "epoch": 0.6982954706141397, "grad_norm": 206.7049560546875, "learning_rate": 2.6317106497663316e-06, "loss": 7.163, "step": 345680 }, { "epoch": 0.6983156712468235, "grad_norm": 146.7299346923828, "learning_rate": 2.6314032301258072e-06, "loss": 26.6436, "step": 345690 }, { "epoch": 0.6983358718795073, "grad_norm": 336.31182861328125, "learning_rate": 2.6310958220295356e-06, "loss": 15.1083, "step": 345700 }, { "epoch": 0.6983560725121911, "grad_norm": 124.25076293945312, "learning_rate": 2.630788425479015e-06, "loss": 16.3743, "step": 345710 }, { "epoch": 0.6983762731448749, "grad_norm": 574.3560791015625, "learning_rate": 2.6304810404757465e-06, "loss": 22.2036, "step": 345720 }, { "epoch": 0.6983964737775588, "grad_norm": 580.5028076171875, "learning_rate": 2.6301736670212263e-06, "loss": 19.0385, "step": 345730 }, { "epoch": 0.6984166744102426, "grad_norm": 41.32448196411133, "learning_rate": 2.62986630511695e-06, "loss": 26.0171, "step": 345740 }, { "epoch": 0.6984368750429264, "grad_norm": 804.4308471679688, "learning_rate": 2.6295589547644195e-06, "loss": 32.7556, "step": 345750 }, { "epoch": 0.6984570756756101, "grad_norm": 118.28096008300781, "learning_rate": 2.6292516159651317e-06, "loss": 31.4055, "step": 345760 }, { "epoch": 0.6984772763082939, "grad_norm": 291.58050537109375, "learning_rate": 2.628944288720582e-06, "loss": 23.683, "step": 345770 }, { "epoch": 0.6984974769409777, "grad_norm": 651.7496948242188, "learning_rate": 2.6286369730322693e-06, "loss": 21.9618, "step": 345780 }, { "epoch": 0.6985176775736616, "grad_norm": 316.13653564453125, "learning_rate": 2.6283296689016953e-06, "loss": 36.7003, "step": 345790 }, { "epoch": 0.6985378782063454, "grad_norm": 636.0591430664062, "learning_rate": 2.6280223763303546e-06, "loss": 13.3677, "step": 345800 }, { "epoch": 0.6985580788390292, "grad_norm": 351.2723083496094, "learning_rate": 2.6277150953197427e-06, "loss": 10.4449, "step": 345810 }, { "epoch": 0.698578279471713, "grad_norm": 187.9607391357422, "learning_rate": 2.6274078258713626e-06, "loss": 11.7961, "step": 345820 }, { "epoch": 0.6985984801043968, "grad_norm": 460.8187255859375, "learning_rate": 2.627100567986709e-06, "loss": 24.5505, "step": 345830 }, { "epoch": 0.6986186807370807, "grad_norm": 497.2703552246094, "learning_rate": 2.626793321667277e-06, "loss": 35.9925, "step": 345840 }, { "epoch": 0.6986388813697645, "grad_norm": 643.0888061523438, "learning_rate": 2.626486086914566e-06, "loss": 23.4985, "step": 345850 }, { "epoch": 0.6986590820024483, "grad_norm": 764.6570434570312, "learning_rate": 2.626178863730077e-06, "loss": 27.3257, "step": 345860 }, { "epoch": 0.6986792826351321, "grad_norm": 219.7559814453125, "learning_rate": 2.6258716521153034e-06, "loss": 12.841, "step": 345870 }, { "epoch": 0.698699483267816, "grad_norm": 286.03521728515625, "learning_rate": 2.6255644520717417e-06, "loss": 20.3383, "step": 345880 }, { "epoch": 0.6987196839004998, "grad_norm": 447.294921875, "learning_rate": 2.6252572636008934e-06, "loss": 26.421, "step": 345890 }, { "epoch": 0.6987398845331836, "grad_norm": 512.5075073242188, "learning_rate": 2.6249500867042523e-06, "loss": 14.1046, "step": 345900 }, { "epoch": 0.6987600851658674, "grad_norm": 755.806396484375, "learning_rate": 2.6246429213833146e-06, "loss": 17.2237, "step": 345910 }, { "epoch": 0.6987802857985512, "grad_norm": 284.7914123535156, "learning_rate": 2.624335767639582e-06, "loss": 12.9259, "step": 345920 }, { "epoch": 0.698800486431235, "grad_norm": 17.339370727539062, "learning_rate": 2.624028625474546e-06, "loss": 24.0289, "step": 345930 }, { "epoch": 0.6988206870639189, "grad_norm": 398.7443542480469, "learning_rate": 2.6237214948897084e-06, "loss": 12.7268, "step": 345940 }, { "epoch": 0.6988408876966027, "grad_norm": 144.84312438964844, "learning_rate": 2.623414375886564e-06, "loss": 18.3145, "step": 345950 }, { "epoch": 0.6988610883292865, "grad_norm": 658.30908203125, "learning_rate": 2.623107268466608e-06, "loss": 28.3886, "step": 345960 }, { "epoch": 0.6988812889619703, "grad_norm": 559.9882202148438, "learning_rate": 2.6228001726313406e-06, "loss": 18.2379, "step": 345970 }, { "epoch": 0.6989014895946541, "grad_norm": 389.85107421875, "learning_rate": 2.622493088382257e-06, "loss": 40.59, "step": 345980 }, { "epoch": 0.698921690227338, "grad_norm": 285.4831237792969, "learning_rate": 2.6221860157208516e-06, "loss": 33.7344, "step": 345990 }, { "epoch": 0.6989418908600218, "grad_norm": 296.01312255859375, "learning_rate": 2.6218789546486235e-06, "loss": 13.7665, "step": 346000 }, { "epoch": 0.6989620914927056, "grad_norm": 0.0, "learning_rate": 2.6215719051670705e-06, "loss": 8.9669, "step": 346010 }, { "epoch": 0.6989822921253893, "grad_norm": 437.0624694824219, "learning_rate": 2.6212648672776874e-06, "loss": 13.5886, "step": 346020 }, { "epoch": 0.6990024927580731, "grad_norm": 183.48011779785156, "learning_rate": 2.620957840981969e-06, "loss": 12.9059, "step": 346030 }, { "epoch": 0.699022693390757, "grad_norm": 559.4110107421875, "learning_rate": 2.6206508262814164e-06, "loss": 15.7754, "step": 346040 }, { "epoch": 0.6990428940234408, "grad_norm": 41.547645568847656, "learning_rate": 2.6203438231775224e-06, "loss": 10.2372, "step": 346050 }, { "epoch": 0.6990630946561246, "grad_norm": 202.2870635986328, "learning_rate": 2.6200368316717816e-06, "loss": 14.3801, "step": 346060 }, { "epoch": 0.6990832952888084, "grad_norm": 452.917236328125, "learning_rate": 2.6197298517656933e-06, "loss": 14.5969, "step": 346070 }, { "epoch": 0.6991034959214922, "grad_norm": 570.240478515625, "learning_rate": 2.6194228834607567e-06, "loss": 42.6146, "step": 346080 }, { "epoch": 0.699123696554176, "grad_norm": 356.0271301269531, "learning_rate": 2.6191159267584604e-06, "loss": 20.1366, "step": 346090 }, { "epoch": 0.6991438971868599, "grad_norm": 162.37998962402344, "learning_rate": 2.618808981660304e-06, "loss": 24.5838, "step": 346100 }, { "epoch": 0.6991640978195437, "grad_norm": 320.3457946777344, "learning_rate": 2.618502048167786e-06, "loss": 10.9108, "step": 346110 }, { "epoch": 0.6991842984522275, "grad_norm": 279.5674133300781, "learning_rate": 2.6181951262824e-06, "loss": 22.2888, "step": 346120 }, { "epoch": 0.6992044990849113, "grad_norm": 244.40220642089844, "learning_rate": 2.617888216005641e-06, "loss": 21.7767, "step": 346130 }, { "epoch": 0.6992246997175952, "grad_norm": 406.1513977050781, "learning_rate": 2.6175813173390063e-06, "loss": 32.9853, "step": 346140 }, { "epoch": 0.699244900350279, "grad_norm": 236.93878173828125, "learning_rate": 2.6172744302839925e-06, "loss": 25.957, "step": 346150 }, { "epoch": 0.6992651009829628, "grad_norm": 341.7259216308594, "learning_rate": 2.616967554842092e-06, "loss": 20.1887, "step": 346160 }, { "epoch": 0.6992853016156466, "grad_norm": 119.66575622558594, "learning_rate": 2.6166606910148024e-06, "loss": 13.2485, "step": 346170 }, { "epoch": 0.6993055022483304, "grad_norm": 385.6126708984375, "learning_rate": 2.6163538388036213e-06, "loss": 22.6795, "step": 346180 }, { "epoch": 0.6993257028810143, "grad_norm": 148.49256896972656, "learning_rate": 2.6160469982100426e-06, "loss": 11.1807, "step": 346190 }, { "epoch": 0.6993459035136981, "grad_norm": 189.14205932617188, "learning_rate": 2.61574016923556e-06, "loss": 13.4204, "step": 346200 }, { "epoch": 0.6993661041463819, "grad_norm": 784.8663330078125, "learning_rate": 2.6154333518816727e-06, "loss": 32.9437, "step": 346210 }, { "epoch": 0.6993863047790657, "grad_norm": 207.66246032714844, "learning_rate": 2.6151265461498737e-06, "loss": 12.0641, "step": 346220 }, { "epoch": 0.6994065054117495, "grad_norm": 248.05140686035156, "learning_rate": 2.6148197520416567e-06, "loss": 30.4625, "step": 346230 }, { "epoch": 0.6994267060444334, "grad_norm": 375.9503173828125, "learning_rate": 2.6145129695585213e-06, "loss": 13.1731, "step": 346240 }, { "epoch": 0.6994469066771172, "grad_norm": 309.3663635253906, "learning_rate": 2.614206198701958e-06, "loss": 14.8673, "step": 346250 }, { "epoch": 0.699467107309801, "grad_norm": 114.35416412353516, "learning_rate": 2.6138994394734663e-06, "loss": 8.6712, "step": 346260 }, { "epoch": 0.6994873079424847, "grad_norm": 404.1855773925781, "learning_rate": 2.613592691874539e-06, "loss": 14.2193, "step": 346270 }, { "epoch": 0.6995075085751685, "grad_norm": 118.95695495605469, "learning_rate": 2.6132859559066704e-06, "loss": 10.8912, "step": 346280 }, { "epoch": 0.6995277092078523, "grad_norm": 329.8266906738281, "learning_rate": 2.6129792315713576e-06, "loss": 14.2744, "step": 346290 }, { "epoch": 0.6995479098405362, "grad_norm": 1208.884033203125, "learning_rate": 2.612672518870093e-06, "loss": 39.5541, "step": 346300 }, { "epoch": 0.69956811047322, "grad_norm": 714.0730590820312, "learning_rate": 2.6123658178043753e-06, "loss": 16.1393, "step": 346310 }, { "epoch": 0.6995883111059038, "grad_norm": 214.5430145263672, "learning_rate": 2.6120591283756946e-06, "loss": 29.9662, "step": 346320 }, { "epoch": 0.6996085117385876, "grad_norm": 950.339111328125, "learning_rate": 2.6117524505855507e-06, "loss": 26.5155, "step": 346330 }, { "epoch": 0.6996287123712714, "grad_norm": 319.1742858886719, "learning_rate": 2.611445784435435e-06, "loss": 18.1529, "step": 346340 }, { "epoch": 0.6996489130039553, "grad_norm": 517.3515625, "learning_rate": 2.6111391299268406e-06, "loss": 14.5742, "step": 346350 }, { "epoch": 0.6996691136366391, "grad_norm": 83.57072448730469, "learning_rate": 2.6108324870612674e-06, "loss": 11.5814, "step": 346360 }, { "epoch": 0.6996893142693229, "grad_norm": 113.90496063232422, "learning_rate": 2.610525855840206e-06, "loss": 11.7873, "step": 346370 }, { "epoch": 0.6997095149020067, "grad_norm": 337.7570495605469, "learning_rate": 2.61021923626515e-06, "loss": 34.2954, "step": 346380 }, { "epoch": 0.6997297155346905, "grad_norm": 380.3957214355469, "learning_rate": 2.609912628337596e-06, "loss": 17.7364, "step": 346390 }, { "epoch": 0.6997499161673744, "grad_norm": 61.789344787597656, "learning_rate": 2.6096060320590393e-06, "loss": 8.3513, "step": 346400 }, { "epoch": 0.6997701168000582, "grad_norm": 216.51742553710938, "learning_rate": 2.609299447430973e-06, "loss": 16.2906, "step": 346410 }, { "epoch": 0.699790317432742, "grad_norm": 595.2395629882812, "learning_rate": 2.60899287445489e-06, "loss": 16.2307, "step": 346420 }, { "epoch": 0.6998105180654258, "grad_norm": 41.3694953918457, "learning_rate": 2.608686313132287e-06, "loss": 16.0588, "step": 346430 }, { "epoch": 0.6998307186981096, "grad_norm": 108.62818908691406, "learning_rate": 2.6083797634646567e-06, "loss": 14.3768, "step": 346440 }, { "epoch": 0.6998509193307935, "grad_norm": 300.60406494140625, "learning_rate": 2.608073225453492e-06, "loss": 18.2154, "step": 346450 }, { "epoch": 0.6998711199634773, "grad_norm": 297.52813720703125, "learning_rate": 2.607766699100288e-06, "loss": 21.0632, "step": 346460 }, { "epoch": 0.6998913205961611, "grad_norm": 135.4776611328125, "learning_rate": 2.6074601844065407e-06, "loss": 12.4945, "step": 346470 }, { "epoch": 0.6999115212288449, "grad_norm": 260.7397155761719, "learning_rate": 2.607153681373743e-06, "loss": 26.7468, "step": 346480 }, { "epoch": 0.6999317218615287, "grad_norm": 420.61676025390625, "learning_rate": 2.6068471900033852e-06, "loss": 28.9963, "step": 346490 }, { "epoch": 0.6999519224942126, "grad_norm": 104.43257904052734, "learning_rate": 2.6065407102969664e-06, "loss": 20.6861, "step": 346500 }, { "epoch": 0.6999721231268964, "grad_norm": 306.8272399902344, "learning_rate": 2.6062342422559776e-06, "loss": 12.7433, "step": 346510 }, { "epoch": 0.6999923237595802, "grad_norm": 365.0401916503906, "learning_rate": 2.605927785881911e-06, "loss": 18.0941, "step": 346520 }, { "epoch": 0.7000125243922639, "grad_norm": 265.7608947753906, "learning_rate": 2.6056213411762645e-06, "loss": 18.0347, "step": 346530 }, { "epoch": 0.7000327250249477, "grad_norm": 504.5923156738281, "learning_rate": 2.6053149081405267e-06, "loss": 14.9236, "step": 346540 }, { "epoch": 0.7000529256576316, "grad_norm": 469.1474609375, "learning_rate": 2.6050084867761953e-06, "loss": 19.0025, "step": 346550 }, { "epoch": 0.7000731262903154, "grad_norm": 1204.1434326171875, "learning_rate": 2.6047020770847618e-06, "loss": 34.2178, "step": 346560 }, { "epoch": 0.7000933269229992, "grad_norm": 425.03106689453125, "learning_rate": 2.6043956790677195e-06, "loss": 21.6887, "step": 346570 }, { "epoch": 0.700113527555683, "grad_norm": 427.1048278808594, "learning_rate": 2.6040892927265627e-06, "loss": 20.1533, "step": 346580 }, { "epoch": 0.7001337281883668, "grad_norm": 212.57095336914062, "learning_rate": 2.603782918062784e-06, "loss": 19.1921, "step": 346590 }, { "epoch": 0.7001539288210507, "grad_norm": 226.52755737304688, "learning_rate": 2.6034765550778753e-06, "loss": 15.4578, "step": 346600 }, { "epoch": 0.7001741294537345, "grad_norm": 323.2734680175781, "learning_rate": 2.603170203773331e-06, "loss": 18.7106, "step": 346610 }, { "epoch": 0.7001943300864183, "grad_norm": 703.8001708984375, "learning_rate": 2.6028638641506464e-06, "loss": 12.5089, "step": 346620 }, { "epoch": 0.7002145307191021, "grad_norm": 203.0648956298828, "learning_rate": 2.602557536211313e-06, "loss": 25.0396, "step": 346630 }, { "epoch": 0.7002347313517859, "grad_norm": 447.15386962890625, "learning_rate": 2.6022512199568205e-06, "loss": 19.8693, "step": 346640 }, { "epoch": 0.7002549319844698, "grad_norm": 77.42908477783203, "learning_rate": 2.601944915388668e-06, "loss": 20.6841, "step": 346650 }, { "epoch": 0.7002751326171536, "grad_norm": 807.8881225585938, "learning_rate": 2.6016386225083438e-06, "loss": 17.1238, "step": 346660 }, { "epoch": 0.7002953332498374, "grad_norm": 653.5703125, "learning_rate": 2.6013323413173408e-06, "loss": 20.1987, "step": 346670 }, { "epoch": 0.7003155338825212, "grad_norm": 404.2229309082031, "learning_rate": 2.601026071817153e-06, "loss": 22.7176, "step": 346680 }, { "epoch": 0.700335734515205, "grad_norm": 227.59866333007812, "learning_rate": 2.600719814009277e-06, "loss": 20.0885, "step": 346690 }, { "epoch": 0.7003559351478889, "grad_norm": 349.4118957519531, "learning_rate": 2.600413567895198e-06, "loss": 14.6721, "step": 346700 }, { "epoch": 0.7003761357805727, "grad_norm": 532.9380493164062, "learning_rate": 2.6001073334764117e-06, "loss": 8.393, "step": 346710 }, { "epoch": 0.7003963364132565, "grad_norm": 193.77354431152344, "learning_rate": 2.5998011107544134e-06, "loss": 10.4868, "step": 346720 }, { "epoch": 0.7004165370459403, "grad_norm": 356.72210693359375, "learning_rate": 2.5994948997306935e-06, "loss": 34.4526, "step": 346730 }, { "epoch": 0.7004367376786241, "grad_norm": 557.6670532226562, "learning_rate": 2.599188700406743e-06, "loss": 19.6684, "step": 346740 }, { "epoch": 0.700456938311308, "grad_norm": 303.978271484375, "learning_rate": 2.5988825127840547e-06, "loss": 13.9076, "step": 346750 }, { "epoch": 0.7004771389439918, "grad_norm": 590.5808715820312, "learning_rate": 2.5985763368641253e-06, "loss": 15.3695, "step": 346760 }, { "epoch": 0.7004973395766756, "grad_norm": 279.01910400390625, "learning_rate": 2.5982701726484405e-06, "loss": 27.293, "step": 346770 }, { "epoch": 0.7005175402093593, "grad_norm": 216.29835510253906, "learning_rate": 2.5979640201384953e-06, "loss": 21.8219, "step": 346780 }, { "epoch": 0.7005377408420431, "grad_norm": 411.185302734375, "learning_rate": 2.597657879335784e-06, "loss": 24.5489, "step": 346790 }, { "epoch": 0.7005579414747269, "grad_norm": 192.6901397705078, "learning_rate": 2.5973517502417966e-06, "loss": 18.8785, "step": 346800 }, { "epoch": 0.7005781421074108, "grad_norm": 137.09548950195312, "learning_rate": 2.597045632858024e-06, "loss": 10.2786, "step": 346810 }, { "epoch": 0.7005983427400946, "grad_norm": 856.15966796875, "learning_rate": 2.5967395271859614e-06, "loss": 31.4287, "step": 346820 }, { "epoch": 0.7006185433727784, "grad_norm": 40.23459243774414, "learning_rate": 2.596433433227099e-06, "loss": 7.1841, "step": 346830 }, { "epoch": 0.7006387440054622, "grad_norm": 188.1201171875, "learning_rate": 2.596127350982926e-06, "loss": 5.4929, "step": 346840 }, { "epoch": 0.700658944638146, "grad_norm": 338.2161865234375, "learning_rate": 2.5958212804549387e-06, "loss": 17.0177, "step": 346850 }, { "epoch": 0.7006791452708299, "grad_norm": 118.79478454589844, "learning_rate": 2.5955152216446255e-06, "loss": 14.1635, "step": 346860 }, { "epoch": 0.7006993459035137, "grad_norm": 137.65213012695312, "learning_rate": 2.595209174553481e-06, "loss": 12.1935, "step": 346870 }, { "epoch": 0.7007195465361975, "grad_norm": 185.00650024414062, "learning_rate": 2.594903139182996e-06, "loss": 14.265, "step": 346880 }, { "epoch": 0.7007397471688813, "grad_norm": 545.932373046875, "learning_rate": 2.594597115534658e-06, "loss": 17.734, "step": 346890 }, { "epoch": 0.7007599478015651, "grad_norm": 1035.7293701171875, "learning_rate": 2.5942911036099657e-06, "loss": 26.7553, "step": 346900 }, { "epoch": 0.700780148434249, "grad_norm": 385.90435791015625, "learning_rate": 2.5939851034104035e-06, "loss": 11.4597, "step": 346910 }, { "epoch": 0.7008003490669328, "grad_norm": 587.3248291015625, "learning_rate": 2.5936791149374686e-06, "loss": 14.9327, "step": 346920 }, { "epoch": 0.7008205496996166, "grad_norm": 234.338134765625, "learning_rate": 2.5933731381926473e-06, "loss": 22.0711, "step": 346930 }, { "epoch": 0.7008407503323004, "grad_norm": 155.63592529296875, "learning_rate": 2.593067173177436e-06, "loss": 49.627, "step": 346940 }, { "epoch": 0.7008609509649842, "grad_norm": 52.495059967041016, "learning_rate": 2.5927612198933237e-06, "loss": 22.2556, "step": 346950 }, { "epoch": 0.7008811515976681, "grad_norm": 363.7169494628906, "learning_rate": 2.592455278341799e-06, "loss": 19.1529, "step": 346960 }, { "epoch": 0.7009013522303519, "grad_norm": 88.28634643554688, "learning_rate": 2.5921493485243566e-06, "loss": 9.0732, "step": 346970 }, { "epoch": 0.7009215528630357, "grad_norm": 40.72396469116211, "learning_rate": 2.5918434304424867e-06, "loss": 17.0255, "step": 346980 }, { "epoch": 0.7009417534957195, "grad_norm": 554.2332763671875, "learning_rate": 2.591537524097678e-06, "loss": 21.5423, "step": 346990 }, { "epoch": 0.7009619541284033, "grad_norm": 195.70889282226562, "learning_rate": 2.5912316294914232e-06, "loss": 11.9071, "step": 347000 }, { "epoch": 0.7009821547610872, "grad_norm": 477.51763916015625, "learning_rate": 2.590925746625217e-06, "loss": 19.4229, "step": 347010 }, { "epoch": 0.701002355393771, "grad_norm": 214.69302368164062, "learning_rate": 2.590619875500543e-06, "loss": 13.2143, "step": 347020 }, { "epoch": 0.7010225560264548, "grad_norm": 521.0151977539062, "learning_rate": 2.590314016118895e-06, "loss": 16.2183, "step": 347030 }, { "epoch": 0.7010427566591385, "grad_norm": 197.3564910888672, "learning_rate": 2.5900081684817667e-06, "loss": 20.2638, "step": 347040 }, { "epoch": 0.7010629572918223, "grad_norm": 134.8383331298828, "learning_rate": 2.5897023325906458e-06, "loss": 23.2689, "step": 347050 }, { "epoch": 0.7010831579245062, "grad_norm": 341.59088134765625, "learning_rate": 2.589396508447022e-06, "loss": 16.9646, "step": 347060 }, { "epoch": 0.70110335855719, "grad_norm": 380.99041748046875, "learning_rate": 2.5890906960523865e-06, "loss": 11.1826, "step": 347070 }, { "epoch": 0.7011235591898738, "grad_norm": 302.3643493652344, "learning_rate": 2.588784895408235e-06, "loss": 17.5542, "step": 347080 }, { "epoch": 0.7011437598225576, "grad_norm": 407.2210388183594, "learning_rate": 2.58847910651605e-06, "loss": 13.7203, "step": 347090 }, { "epoch": 0.7011639604552414, "grad_norm": 1867.4381103515625, "learning_rate": 2.588173329377324e-06, "loss": 29.6082, "step": 347100 }, { "epoch": 0.7011841610879253, "grad_norm": 669.7487182617188, "learning_rate": 2.587867563993552e-06, "loss": 13.0289, "step": 347110 }, { "epoch": 0.7012043617206091, "grad_norm": 50.876983642578125, "learning_rate": 2.5875618103662204e-06, "loss": 11.7301, "step": 347120 }, { "epoch": 0.7012245623532929, "grad_norm": 49.40553665161133, "learning_rate": 2.5872560684968175e-06, "loss": 12.6641, "step": 347130 }, { "epoch": 0.7012447629859767, "grad_norm": 378.6500549316406, "learning_rate": 2.5869503383868387e-06, "loss": 44.4532, "step": 347140 }, { "epoch": 0.7012649636186605, "grad_norm": 221.18179321289062, "learning_rate": 2.5866446200377688e-06, "loss": 14.7554, "step": 347150 }, { "epoch": 0.7012851642513444, "grad_norm": 81.32159423828125, "learning_rate": 2.5863389134511024e-06, "loss": 11.0102, "step": 347160 }, { "epoch": 0.7013053648840282, "grad_norm": 567.9331665039062, "learning_rate": 2.5860332186283277e-06, "loss": 12.9765, "step": 347170 }, { "epoch": 0.701325565516712, "grad_norm": 326.4610900878906, "learning_rate": 2.5857275355709317e-06, "loss": 10.4179, "step": 347180 }, { "epoch": 0.7013457661493958, "grad_norm": 81.64095306396484, "learning_rate": 2.585421864280409e-06, "loss": 19.6015, "step": 347190 }, { "epoch": 0.7013659667820796, "grad_norm": 2042.0657958984375, "learning_rate": 2.5851162047582477e-06, "loss": 25.512, "step": 347200 }, { "epoch": 0.7013861674147635, "grad_norm": 341.677734375, "learning_rate": 2.5848105570059346e-06, "loss": 16.6826, "step": 347210 }, { "epoch": 0.7014063680474473, "grad_norm": 276.0132751464844, "learning_rate": 2.584504921024963e-06, "loss": 18.1895, "step": 347220 }, { "epoch": 0.7014265686801311, "grad_norm": 313.1415100097656, "learning_rate": 2.5841992968168224e-06, "loss": 12.0104, "step": 347230 }, { "epoch": 0.7014467693128149, "grad_norm": 726.3991088867188, "learning_rate": 2.5838936843830015e-06, "loss": 13.0599, "step": 347240 }, { "epoch": 0.7014669699454987, "grad_norm": 335.7645568847656, "learning_rate": 2.5835880837249884e-06, "loss": 23.8675, "step": 347250 }, { "epoch": 0.7014871705781826, "grad_norm": 423.79449462890625, "learning_rate": 2.5832824948442747e-06, "loss": 13.2148, "step": 347260 }, { "epoch": 0.7015073712108664, "grad_norm": 392.97625732421875, "learning_rate": 2.5829769177423504e-06, "loss": 26.6486, "step": 347270 }, { "epoch": 0.7015275718435502, "grad_norm": 255.8606719970703, "learning_rate": 2.5826713524207e-06, "loss": 7.65, "step": 347280 }, { "epoch": 0.701547772476234, "grad_norm": 249.2724609375, "learning_rate": 2.5823657988808176e-06, "loss": 12.9573, "step": 347290 }, { "epoch": 0.7015679731089177, "grad_norm": 584.9038696289062, "learning_rate": 2.582060257124195e-06, "loss": 20.7267, "step": 347300 }, { "epoch": 0.7015881737416015, "grad_norm": 586.8082275390625, "learning_rate": 2.5817547271523124e-06, "loss": 12.8741, "step": 347310 }, { "epoch": 0.7016083743742854, "grad_norm": 324.95489501953125, "learning_rate": 2.5814492089666642e-06, "loss": 19.4545, "step": 347320 }, { "epoch": 0.7016285750069692, "grad_norm": 535.495361328125, "learning_rate": 2.581143702568742e-06, "loss": 29.9428, "step": 347330 }, { "epoch": 0.701648775639653, "grad_norm": 323.0057067871094, "learning_rate": 2.5808382079600315e-06, "loss": 13.38, "step": 347340 }, { "epoch": 0.7016689762723368, "grad_norm": 477.53656005859375, "learning_rate": 2.5805327251420205e-06, "loss": 12.4908, "step": 347350 }, { "epoch": 0.7016891769050206, "grad_norm": 0.0, "learning_rate": 2.580227254116199e-06, "loss": 14.4245, "step": 347360 }, { "epoch": 0.7017093775377045, "grad_norm": 209.73056030273438, "learning_rate": 2.5799217948840603e-06, "loss": 26.2392, "step": 347370 }, { "epoch": 0.7017295781703883, "grad_norm": 520.84716796875, "learning_rate": 2.579616347447086e-06, "loss": 24.331, "step": 347380 }, { "epoch": 0.7017497788030721, "grad_norm": 537.9876098632812, "learning_rate": 2.579310911806768e-06, "loss": 18.937, "step": 347390 }, { "epoch": 0.7017699794357559, "grad_norm": 362.0723876953125, "learning_rate": 2.5790054879645964e-06, "loss": 14.1632, "step": 347400 }, { "epoch": 0.7017901800684397, "grad_norm": 427.97357177734375, "learning_rate": 2.5787000759220592e-06, "loss": 12.2612, "step": 347410 }, { "epoch": 0.7018103807011236, "grad_norm": 450.44036865234375, "learning_rate": 2.578394675680641e-06, "loss": 19.8099, "step": 347420 }, { "epoch": 0.7018305813338074, "grad_norm": 362.4385986328125, "learning_rate": 2.578089287241836e-06, "loss": 15.9942, "step": 347430 }, { "epoch": 0.7018507819664912, "grad_norm": 404.1336975097656, "learning_rate": 2.5777839106071308e-06, "loss": 16.5669, "step": 347440 }, { "epoch": 0.701870982599175, "grad_norm": 296.78582763671875, "learning_rate": 2.5774785457780107e-06, "loss": 18.6781, "step": 347450 }, { "epoch": 0.7018911832318588, "grad_norm": 148.4103546142578, "learning_rate": 2.577173192755968e-06, "loss": 12.5277, "step": 347460 }, { "epoch": 0.7019113838645427, "grad_norm": 533.5665283203125, "learning_rate": 2.576867851542487e-06, "loss": 20.6553, "step": 347470 }, { "epoch": 0.7019315844972265, "grad_norm": 359.1020202636719, "learning_rate": 2.576562522139061e-06, "loss": 16.1798, "step": 347480 }, { "epoch": 0.7019517851299103, "grad_norm": 115.88701629638672, "learning_rate": 2.5762572045471744e-06, "loss": 16.8293, "step": 347490 }, { "epoch": 0.7019719857625941, "grad_norm": 268.5501708984375, "learning_rate": 2.5759518987683154e-06, "loss": 19.6261, "step": 347500 }, { "epoch": 0.7019921863952779, "grad_norm": 328.0009460449219, "learning_rate": 2.575646604803974e-06, "loss": 36.7878, "step": 347510 }, { "epoch": 0.7020123870279618, "grad_norm": 827.8441162109375, "learning_rate": 2.5753413226556356e-06, "loss": 22.8495, "step": 347520 }, { "epoch": 0.7020325876606456, "grad_norm": 458.2436828613281, "learning_rate": 2.575036052324791e-06, "loss": 14.1508, "step": 347530 }, { "epoch": 0.7020527882933294, "grad_norm": 467.7879943847656, "learning_rate": 2.5747307938129245e-06, "loss": 14.7493, "step": 347540 }, { "epoch": 0.7020729889260131, "grad_norm": 622.4500122070312, "learning_rate": 2.5744255471215284e-06, "loss": 20.4477, "step": 347550 }, { "epoch": 0.7020931895586969, "grad_norm": 494.28387451171875, "learning_rate": 2.5741203122520876e-06, "loss": 21.0831, "step": 347560 }, { "epoch": 0.7021133901913807, "grad_norm": 286.85528564453125, "learning_rate": 2.573815089206089e-06, "loss": 29.851, "step": 347570 }, { "epoch": 0.7021335908240646, "grad_norm": 182.8809814453125, "learning_rate": 2.573509877985022e-06, "loss": 19.086, "step": 347580 }, { "epoch": 0.7021537914567484, "grad_norm": 284.8392639160156, "learning_rate": 2.5732046785903744e-06, "loss": 16.4531, "step": 347590 }, { "epoch": 0.7021739920894322, "grad_norm": 0.0, "learning_rate": 2.5728994910236304e-06, "loss": 16.0378, "step": 347600 }, { "epoch": 0.702194192722116, "grad_norm": 355.5620422363281, "learning_rate": 2.572594315286281e-06, "loss": 22.9755, "step": 347610 }, { "epoch": 0.7022143933547998, "grad_norm": 272.7740783691406, "learning_rate": 2.5722891513798156e-06, "loss": 16.932, "step": 347620 }, { "epoch": 0.7022345939874837, "grad_norm": 410.8348083496094, "learning_rate": 2.5719839993057143e-06, "loss": 26.2486, "step": 347630 }, { "epoch": 0.7022547946201675, "grad_norm": 197.9288787841797, "learning_rate": 2.571678859065469e-06, "loss": 35.5925, "step": 347640 }, { "epoch": 0.7022749952528513, "grad_norm": 485.03631591796875, "learning_rate": 2.571373730660568e-06, "loss": 22.3784, "step": 347650 }, { "epoch": 0.7022951958855351, "grad_norm": 501.2548522949219, "learning_rate": 2.571068614092497e-06, "loss": 15.8858, "step": 347660 }, { "epoch": 0.702315396518219, "grad_norm": 314.9706726074219, "learning_rate": 2.5707635093627415e-06, "loss": 35.8188, "step": 347670 }, { "epoch": 0.7023355971509028, "grad_norm": 250.5843505859375, "learning_rate": 2.5704584164727898e-06, "loss": 29.6213, "step": 347680 }, { "epoch": 0.7023557977835866, "grad_norm": 84.46544647216797, "learning_rate": 2.5701533354241325e-06, "loss": 14.2155, "step": 347690 }, { "epoch": 0.7023759984162704, "grad_norm": 104.89397430419922, "learning_rate": 2.5698482662182494e-06, "loss": 19.7077, "step": 347700 }, { "epoch": 0.7023961990489542, "grad_norm": 1.461729884147644, "learning_rate": 2.5695432088566313e-06, "loss": 8.5402, "step": 347710 }, { "epoch": 0.702416399681638, "grad_norm": 338.3124084472656, "learning_rate": 2.5692381633407672e-06, "loss": 18.6766, "step": 347720 }, { "epoch": 0.7024366003143219, "grad_norm": 484.132080078125, "learning_rate": 2.568933129672141e-06, "loss": 21.331, "step": 347730 }, { "epoch": 0.7024568009470057, "grad_norm": 435.5819396972656, "learning_rate": 2.568628107852238e-06, "loss": 23.2717, "step": 347740 }, { "epoch": 0.7024770015796895, "grad_norm": 593.2225952148438, "learning_rate": 2.568323097882548e-06, "loss": 33.024, "step": 347750 }, { "epoch": 0.7024972022123733, "grad_norm": 360.1360168457031, "learning_rate": 2.5680180997645577e-06, "loss": 9.6854, "step": 347760 }, { "epoch": 0.7025174028450571, "grad_norm": 450.7495422363281, "learning_rate": 2.567713113499749e-06, "loss": 24.615, "step": 347770 }, { "epoch": 0.702537603477741, "grad_norm": 413.72882080078125, "learning_rate": 2.5674081390896146e-06, "loss": 14.1096, "step": 347780 }, { "epoch": 0.7025578041104248, "grad_norm": 130.60775756835938, "learning_rate": 2.567103176535635e-06, "loss": 10.5338, "step": 347790 }, { "epoch": 0.7025780047431086, "grad_norm": 277.84588623046875, "learning_rate": 2.5667982258393016e-06, "loss": 26.829, "step": 347800 }, { "epoch": 0.7025982053757923, "grad_norm": 211.7849578857422, "learning_rate": 2.5664932870020966e-06, "loss": 13.9786, "step": 347810 }, { "epoch": 0.7026184060084761, "grad_norm": 252.23204040527344, "learning_rate": 2.5661883600255107e-06, "loss": 10.5075, "step": 347820 }, { "epoch": 0.70263860664116, "grad_norm": 339.7162170410156, "learning_rate": 2.565883444911025e-06, "loss": 21.0565, "step": 347830 }, { "epoch": 0.7026588072738438, "grad_norm": 387.20281982421875, "learning_rate": 2.5655785416601297e-06, "loss": 20.3326, "step": 347840 }, { "epoch": 0.7026790079065276, "grad_norm": 764.5905151367188, "learning_rate": 2.5652736502743105e-06, "loss": 22.7976, "step": 347850 }, { "epoch": 0.7026992085392114, "grad_norm": 473.9588623046875, "learning_rate": 2.56496877075505e-06, "loss": 13.2835, "step": 347860 }, { "epoch": 0.7027194091718952, "grad_norm": 2.3222599029541016, "learning_rate": 2.564663903103838e-06, "loss": 10.7422, "step": 347870 }, { "epoch": 0.7027396098045791, "grad_norm": 3.153733015060425, "learning_rate": 2.564359047322158e-06, "loss": 10.4594, "step": 347880 }, { "epoch": 0.7027598104372629, "grad_norm": 310.48565673828125, "learning_rate": 2.5640542034114955e-06, "loss": 13.2866, "step": 347890 }, { "epoch": 0.7027800110699467, "grad_norm": 368.23065185546875, "learning_rate": 2.5637493713733376e-06, "loss": 20.0134, "step": 347900 }, { "epoch": 0.7028002117026305, "grad_norm": 217.38487243652344, "learning_rate": 2.5634445512091733e-06, "loss": 12.3388, "step": 347910 }, { "epoch": 0.7028204123353143, "grad_norm": 1.3076226711273193, "learning_rate": 2.563139742920481e-06, "loss": 13.5034, "step": 347920 }, { "epoch": 0.7028406129679982, "grad_norm": 269.5832824707031, "learning_rate": 2.5628349465087498e-06, "loss": 20.369, "step": 347930 }, { "epoch": 0.702860813600682, "grad_norm": 1015.3848266601562, "learning_rate": 2.5625301619754678e-06, "loss": 37.5008, "step": 347940 }, { "epoch": 0.7028810142333658, "grad_norm": 353.0274658203125, "learning_rate": 2.5622253893221176e-06, "loss": 21.5301, "step": 347950 }, { "epoch": 0.7029012148660496, "grad_norm": 160.7222900390625, "learning_rate": 2.561920628550184e-06, "loss": 16.6286, "step": 347960 }, { "epoch": 0.7029214154987334, "grad_norm": 95.90129852294922, "learning_rate": 2.5616158796611527e-06, "loss": 15.1646, "step": 347970 }, { "epoch": 0.7029416161314173, "grad_norm": 433.74139404296875, "learning_rate": 2.5613111426565144e-06, "loss": 21.9443, "step": 347980 }, { "epoch": 0.7029618167641011, "grad_norm": 260.3633728027344, "learning_rate": 2.5610064175377456e-06, "loss": 18.6269, "step": 347990 }, { "epoch": 0.7029820173967849, "grad_norm": 445.6225891113281, "learning_rate": 2.560701704306336e-06, "loss": 16.5316, "step": 348000 }, { "epoch": 0.7030022180294687, "grad_norm": 217.5462646484375, "learning_rate": 2.5603970029637727e-06, "loss": 12.7375, "step": 348010 }, { "epoch": 0.7030224186621525, "grad_norm": 435.4579162597656, "learning_rate": 2.5600923135115374e-06, "loss": 37.3925, "step": 348020 }, { "epoch": 0.7030426192948364, "grad_norm": 334.6219177246094, "learning_rate": 2.5597876359511153e-06, "loss": 22.5415, "step": 348030 }, { "epoch": 0.7030628199275202, "grad_norm": 241.5675811767578, "learning_rate": 2.5594829702839937e-06, "loss": 19.2447, "step": 348040 }, { "epoch": 0.703083020560204, "grad_norm": 163.2465362548828, "learning_rate": 2.5591783165116563e-06, "loss": 11.7875, "step": 348050 }, { "epoch": 0.7031032211928877, "grad_norm": 132.82749938964844, "learning_rate": 2.5588736746355858e-06, "loss": 21.8375, "step": 348060 }, { "epoch": 0.7031234218255715, "grad_norm": 150.1280975341797, "learning_rate": 2.5585690446572708e-06, "loss": 26.0591, "step": 348070 }, { "epoch": 0.7031436224582553, "grad_norm": 583.20654296875, "learning_rate": 2.558264426578192e-06, "loss": 19.3685, "step": 348080 }, { "epoch": 0.7031638230909392, "grad_norm": 589.6922607421875, "learning_rate": 2.557959820399839e-06, "loss": 24.9182, "step": 348090 }, { "epoch": 0.703184023723623, "grad_norm": 358.9769287109375, "learning_rate": 2.557655226123693e-06, "loss": 13.0542, "step": 348100 }, { "epoch": 0.7032042243563068, "grad_norm": 482.1541442871094, "learning_rate": 2.5573506437512374e-06, "loss": 32.9986, "step": 348110 }, { "epoch": 0.7032244249889906, "grad_norm": 782.5233764648438, "learning_rate": 2.55704607328396e-06, "loss": 23.4075, "step": 348120 }, { "epoch": 0.7032446256216744, "grad_norm": 537.6296997070312, "learning_rate": 2.556741514723342e-06, "loss": 31.0836, "step": 348130 }, { "epoch": 0.7032648262543583, "grad_norm": 13.034551620483398, "learning_rate": 2.556436968070872e-06, "loss": 17.012, "step": 348140 }, { "epoch": 0.7032850268870421, "grad_norm": 384.6843566894531, "learning_rate": 2.55613243332803e-06, "loss": 18.9023, "step": 348150 }, { "epoch": 0.7033052275197259, "grad_norm": 142.6124267578125, "learning_rate": 2.5558279104963037e-06, "loss": 15.5146, "step": 348160 }, { "epoch": 0.7033254281524097, "grad_norm": 686.4570922851562, "learning_rate": 2.5555233995771757e-06, "loss": 19.0457, "step": 348170 }, { "epoch": 0.7033456287850935, "grad_norm": 362.68115234375, "learning_rate": 2.555218900572128e-06, "loss": 10.2994, "step": 348180 }, { "epoch": 0.7033658294177774, "grad_norm": 10.302240371704102, "learning_rate": 2.5549144134826487e-06, "loss": 18.6784, "step": 348190 }, { "epoch": 0.7033860300504612, "grad_norm": 609.0881958007812, "learning_rate": 2.5546099383102206e-06, "loss": 16.0359, "step": 348200 }, { "epoch": 0.703406230683145, "grad_norm": 620.99609375, "learning_rate": 2.5543054750563246e-06, "loss": 13.1333, "step": 348210 }, { "epoch": 0.7034264313158288, "grad_norm": 14.392234802246094, "learning_rate": 2.5540010237224476e-06, "loss": 13.039, "step": 348220 }, { "epoch": 0.7034466319485126, "grad_norm": 644.1470947265625, "learning_rate": 2.5536965843100764e-06, "loss": 18.3325, "step": 348230 }, { "epoch": 0.7034668325811965, "grad_norm": 750.2973022460938, "learning_rate": 2.5533921568206876e-06, "loss": 13.2997, "step": 348240 }, { "epoch": 0.7034870332138803, "grad_norm": 251.7254180908203, "learning_rate": 2.5530877412557684e-06, "loss": 12.3368, "step": 348250 }, { "epoch": 0.7035072338465641, "grad_norm": 288.3773193359375, "learning_rate": 2.5527833376168055e-06, "loss": 10.5416, "step": 348260 }, { "epoch": 0.7035274344792479, "grad_norm": 314.669189453125, "learning_rate": 2.552478945905279e-06, "loss": 14.8445, "step": 348270 }, { "epoch": 0.7035476351119317, "grad_norm": 538.0570068359375, "learning_rate": 2.5521745661226717e-06, "loss": 14.2398, "step": 348280 }, { "epoch": 0.7035678357446156, "grad_norm": 245.4825439453125, "learning_rate": 2.5518701982704684e-06, "loss": 12.9883, "step": 348290 }, { "epoch": 0.7035880363772994, "grad_norm": 530.113525390625, "learning_rate": 2.5515658423501573e-06, "loss": 14.3787, "step": 348300 }, { "epoch": 0.7036082370099832, "grad_norm": 60.87131118774414, "learning_rate": 2.551261498363213e-06, "loss": 25.7569, "step": 348310 }, { "epoch": 0.7036284376426669, "grad_norm": 302.0447692871094, "learning_rate": 2.5509571663111233e-06, "loss": 17.5409, "step": 348320 }, { "epoch": 0.7036486382753507, "grad_norm": 573.7368774414062, "learning_rate": 2.5506528461953726e-06, "loss": 7.0166, "step": 348330 }, { "epoch": 0.7036688389080346, "grad_norm": 41.724220275878906, "learning_rate": 2.5503485380174443e-06, "loss": 26.0337, "step": 348340 }, { "epoch": 0.7036890395407184, "grad_norm": 337.8447570800781, "learning_rate": 2.5500442417788176e-06, "loss": 15.9246, "step": 348350 }, { "epoch": 0.7037092401734022, "grad_norm": 222.74319458007812, "learning_rate": 2.549739957480979e-06, "loss": 25.3731, "step": 348360 }, { "epoch": 0.703729440806086, "grad_norm": 106.73396301269531, "learning_rate": 2.549435685125412e-06, "loss": 13.1698, "step": 348370 }, { "epoch": 0.7037496414387698, "grad_norm": 35.89967727661133, "learning_rate": 2.5491314247135955e-06, "loss": 19.4369, "step": 348380 }, { "epoch": 0.7037698420714537, "grad_norm": 150.50033569335938, "learning_rate": 2.5488271762470172e-06, "loss": 37.7394, "step": 348390 }, { "epoch": 0.7037900427041375, "grad_norm": 311.3075866699219, "learning_rate": 2.5485229397271567e-06, "loss": 19.6422, "step": 348400 }, { "epoch": 0.7038102433368213, "grad_norm": 756.06884765625, "learning_rate": 2.5482187151554994e-06, "loss": 18.5072, "step": 348410 }, { "epoch": 0.7038304439695051, "grad_norm": 356.7169189453125, "learning_rate": 2.547914502533525e-06, "loss": 26.5348, "step": 348420 }, { "epoch": 0.7038506446021889, "grad_norm": 1041.09521484375, "learning_rate": 2.5476103018627195e-06, "loss": 17.3727, "step": 348430 }, { "epoch": 0.7038708452348728, "grad_norm": 146.49102783203125, "learning_rate": 2.547306113144564e-06, "loss": 24.9171, "step": 348440 }, { "epoch": 0.7038910458675566, "grad_norm": 1084.015380859375, "learning_rate": 2.54700193638054e-06, "loss": 26.2919, "step": 348450 }, { "epoch": 0.7039112465002404, "grad_norm": 119.42090606689453, "learning_rate": 2.546697771572132e-06, "loss": 18.9228, "step": 348460 }, { "epoch": 0.7039314471329242, "grad_norm": 465.013427734375, "learning_rate": 2.5463936187208198e-06, "loss": 21.561, "step": 348470 }, { "epoch": 0.703951647765608, "grad_norm": 510.8338317871094, "learning_rate": 2.54608947782809e-06, "loss": 22.8123, "step": 348480 }, { "epoch": 0.7039718483982919, "grad_norm": 240.90560913085938, "learning_rate": 2.5457853488954214e-06, "loss": 32.2432, "step": 348490 }, { "epoch": 0.7039920490309757, "grad_norm": 242.47865295410156, "learning_rate": 2.545481231924296e-06, "loss": 32.7862, "step": 348500 }, { "epoch": 0.7040122496636595, "grad_norm": 326.0067443847656, "learning_rate": 2.5451771269161996e-06, "loss": 20.4139, "step": 348510 }, { "epoch": 0.7040324502963433, "grad_norm": 292.7941589355469, "learning_rate": 2.544873033872611e-06, "loss": 21.531, "step": 348520 }, { "epoch": 0.7040526509290271, "grad_norm": 296.0098571777344, "learning_rate": 2.5445689527950135e-06, "loss": 22.2272, "step": 348530 }, { "epoch": 0.704072851561711, "grad_norm": 405.1947937011719, "learning_rate": 2.5442648836848877e-06, "loss": 17.4805, "step": 348540 }, { "epoch": 0.7040930521943948, "grad_norm": 143.13543701171875, "learning_rate": 2.5439608265437186e-06, "loss": 13.4997, "step": 348550 }, { "epoch": 0.7041132528270786, "grad_norm": 540.0049438476562, "learning_rate": 2.5436567813729877e-06, "loss": 13.2646, "step": 348560 }, { "epoch": 0.7041334534597624, "grad_norm": 531.985107421875, "learning_rate": 2.543352748174173e-06, "loss": 20.3237, "step": 348570 }, { "epoch": 0.7041536540924461, "grad_norm": 654.4515380859375, "learning_rate": 2.54304872694876e-06, "loss": 18.5625, "step": 348580 }, { "epoch": 0.7041738547251299, "grad_norm": 200.78884887695312, "learning_rate": 2.5427447176982323e-06, "loss": 24.3229, "step": 348590 }, { "epoch": 0.7041940553578138, "grad_norm": 525.7599487304688, "learning_rate": 2.5424407204240653e-06, "loss": 23.0156, "step": 348600 }, { "epoch": 0.7042142559904976, "grad_norm": 540.3980712890625, "learning_rate": 2.542136735127744e-06, "loss": 11.9227, "step": 348610 }, { "epoch": 0.7042344566231814, "grad_norm": 221.6465301513672, "learning_rate": 2.541832761810753e-06, "loss": 31.0744, "step": 348620 }, { "epoch": 0.7042546572558652, "grad_norm": 542.7943115234375, "learning_rate": 2.5415288004745697e-06, "loss": 17.2589, "step": 348630 }, { "epoch": 0.704274857888549, "grad_norm": 391.7570495605469, "learning_rate": 2.541224851120676e-06, "loss": 16.0362, "step": 348640 }, { "epoch": 0.7042950585212329, "grad_norm": 17.678213119506836, "learning_rate": 2.540920913750555e-06, "loss": 13.9695, "step": 348650 }, { "epoch": 0.7043152591539167, "grad_norm": 744.2957153320312, "learning_rate": 2.5406169883656883e-06, "loss": 25.7589, "step": 348660 }, { "epoch": 0.7043354597866005, "grad_norm": 141.6887969970703, "learning_rate": 2.5403130749675537e-06, "loss": 12.5907, "step": 348670 }, { "epoch": 0.7043556604192843, "grad_norm": 115.41643524169922, "learning_rate": 2.540009173557637e-06, "loss": 14.5657, "step": 348680 }, { "epoch": 0.7043758610519681, "grad_norm": 8.622913360595703, "learning_rate": 2.5397052841374147e-06, "loss": 10.0912, "step": 348690 }, { "epoch": 0.704396061684652, "grad_norm": 1027.541015625, "learning_rate": 2.539401406708373e-06, "loss": 32.8672, "step": 348700 }, { "epoch": 0.7044162623173358, "grad_norm": 919.50244140625, "learning_rate": 2.5390975412719897e-06, "loss": 17.1704, "step": 348710 }, { "epoch": 0.7044364629500196, "grad_norm": 239.37896728515625, "learning_rate": 2.5387936878297452e-06, "loss": 51.5828, "step": 348720 }, { "epoch": 0.7044566635827034, "grad_norm": 619.6591186523438, "learning_rate": 2.5384898463831237e-06, "loss": 20.6317, "step": 348730 }, { "epoch": 0.7044768642153872, "grad_norm": 422.139404296875, "learning_rate": 2.538186016933602e-06, "loss": 24.9099, "step": 348740 }, { "epoch": 0.7044970648480711, "grad_norm": 387.4186706542969, "learning_rate": 2.5378821994826654e-06, "loss": 18.9311, "step": 348750 }, { "epoch": 0.7045172654807549, "grad_norm": 963.0557250976562, "learning_rate": 2.53757839403179e-06, "loss": 23.9692, "step": 348760 }, { "epoch": 0.7045374661134387, "grad_norm": 423.4918518066406, "learning_rate": 2.5372746005824605e-06, "loss": 13.7943, "step": 348770 }, { "epoch": 0.7045576667461225, "grad_norm": 134.3823699951172, "learning_rate": 2.5369708191361565e-06, "loss": 13.4868, "step": 348780 }, { "epoch": 0.7045778673788063, "grad_norm": 156.3392791748047, "learning_rate": 2.5366670496943557e-06, "loss": 32.6647, "step": 348790 }, { "epoch": 0.7045980680114902, "grad_norm": 231.02288818359375, "learning_rate": 2.536363292258543e-06, "loss": 12.9722, "step": 348800 }, { "epoch": 0.704618268644174, "grad_norm": 361.2793884277344, "learning_rate": 2.5360595468301966e-06, "loss": 12.8922, "step": 348810 }, { "epoch": 0.7046384692768578, "grad_norm": 13.414840698242188, "learning_rate": 2.5357558134107958e-06, "loss": 13.9536, "step": 348820 }, { "epoch": 0.7046586699095415, "grad_norm": 413.43621826171875, "learning_rate": 2.5354520920018215e-06, "loss": 20.3253, "step": 348830 }, { "epoch": 0.7046788705422253, "grad_norm": 291.3251037597656, "learning_rate": 2.5351483826047595e-06, "loss": 15.2389, "step": 348840 }, { "epoch": 0.7046990711749092, "grad_norm": 378.5207214355469, "learning_rate": 2.5348446852210807e-06, "loss": 26.6477, "step": 348850 }, { "epoch": 0.704719271807593, "grad_norm": 36.3099365234375, "learning_rate": 2.5345409998522704e-06, "loss": 9.6446, "step": 348860 }, { "epoch": 0.7047394724402768, "grad_norm": 485.4751281738281, "learning_rate": 2.53423732649981e-06, "loss": 27.9962, "step": 348870 }, { "epoch": 0.7047596730729606, "grad_norm": 662.0821533203125, "learning_rate": 2.533933665165178e-06, "loss": 15.3202, "step": 348880 }, { "epoch": 0.7047798737056444, "grad_norm": 298.1793212890625, "learning_rate": 2.5336300158498518e-06, "loss": 22.0453, "step": 348890 }, { "epoch": 0.7048000743383283, "grad_norm": 718.59423828125, "learning_rate": 2.533326378555314e-06, "loss": 21.5961, "step": 348900 }, { "epoch": 0.7048202749710121, "grad_norm": 336.1101379394531, "learning_rate": 2.5330227532830483e-06, "loss": 13.5072, "step": 348910 }, { "epoch": 0.7048404756036959, "grad_norm": 305.3926696777344, "learning_rate": 2.5327191400345262e-06, "loss": 14.4775, "step": 348920 }, { "epoch": 0.7048606762363797, "grad_norm": 873.168701171875, "learning_rate": 2.5324155388112326e-06, "loss": 31.5757, "step": 348930 }, { "epoch": 0.7048808768690635, "grad_norm": 1145.8697509765625, "learning_rate": 2.5321119496146472e-06, "loss": 23.1881, "step": 348940 }, { "epoch": 0.7049010775017474, "grad_norm": 1285.3612060546875, "learning_rate": 2.5318083724462495e-06, "loss": 27.1833, "step": 348950 }, { "epoch": 0.7049212781344312, "grad_norm": 328.0639343261719, "learning_rate": 2.5315048073075166e-06, "loss": 26.5006, "step": 348960 }, { "epoch": 0.704941478767115, "grad_norm": 371.9706115722656, "learning_rate": 2.531201254199932e-06, "loss": 16.9027, "step": 348970 }, { "epoch": 0.7049616793997988, "grad_norm": 533.75927734375, "learning_rate": 2.5308977131249724e-06, "loss": 18.4212, "step": 348980 }, { "epoch": 0.7049818800324826, "grad_norm": 897.893798828125, "learning_rate": 2.5305941840841163e-06, "loss": 23.0169, "step": 348990 }, { "epoch": 0.7050020806651665, "grad_norm": 179.62530517578125, "learning_rate": 2.5302906670788463e-06, "loss": 17.0761, "step": 349000 }, { "epoch": 0.7050222812978503, "grad_norm": 224.842041015625, "learning_rate": 2.5299871621106387e-06, "loss": 18.5162, "step": 349010 }, { "epoch": 0.7050424819305341, "grad_norm": 158.41604614257812, "learning_rate": 2.529683669180976e-06, "loss": 13.6336, "step": 349020 }, { "epoch": 0.7050626825632179, "grad_norm": 224.61642456054688, "learning_rate": 2.5293801882913327e-06, "loss": 12.4215, "step": 349030 }, { "epoch": 0.7050828831959017, "grad_norm": 507.733642578125, "learning_rate": 2.529076719443193e-06, "loss": 9.5354, "step": 349040 }, { "epoch": 0.7051030838285856, "grad_norm": 302.4479064941406, "learning_rate": 2.528773262638034e-06, "loss": 14.3375, "step": 349050 }, { "epoch": 0.7051232844612694, "grad_norm": 642.5108642578125, "learning_rate": 2.5284698178773327e-06, "loss": 25.7173, "step": 349060 }, { "epoch": 0.7051434850939532, "grad_norm": 372.1785583496094, "learning_rate": 2.5281663851625703e-06, "loss": 20.8116, "step": 349070 }, { "epoch": 0.705163685726637, "grad_norm": 587.0077514648438, "learning_rate": 2.5278629644952245e-06, "loss": 19.7193, "step": 349080 }, { "epoch": 0.7051838863593207, "grad_norm": 301.967041015625, "learning_rate": 2.527559555876776e-06, "loss": 16.2101, "step": 349090 }, { "epoch": 0.7052040869920045, "grad_norm": 286.00799560546875, "learning_rate": 2.527256159308703e-06, "loss": 17.1478, "step": 349100 }, { "epoch": 0.7052242876246884, "grad_norm": 474.2564392089844, "learning_rate": 2.5269527747924816e-06, "loss": 38.9327, "step": 349110 }, { "epoch": 0.7052444882573722, "grad_norm": 166.05015563964844, "learning_rate": 2.526649402329594e-06, "loss": 19.6407, "step": 349120 }, { "epoch": 0.705264688890056, "grad_norm": 138.1863555908203, "learning_rate": 2.526346041921518e-06, "loss": 20.5006, "step": 349130 }, { "epoch": 0.7052848895227398, "grad_norm": 414.12799072265625, "learning_rate": 2.5260426935697286e-06, "loss": 24.341, "step": 349140 }, { "epoch": 0.7053050901554236, "grad_norm": 475.72662353515625, "learning_rate": 2.5257393572757073e-06, "loss": 19.4902, "step": 349150 }, { "epoch": 0.7053252907881075, "grad_norm": 261.77862548828125, "learning_rate": 2.5254360330409343e-06, "loss": 15.5935, "step": 349160 }, { "epoch": 0.7053454914207913, "grad_norm": 126.8353500366211, "learning_rate": 2.5251327208668856e-06, "loss": 19.4454, "step": 349170 }, { "epoch": 0.7053656920534751, "grad_norm": 1070.47802734375, "learning_rate": 2.5248294207550383e-06, "loss": 18.289, "step": 349180 }, { "epoch": 0.7053858926861589, "grad_norm": 850.1143798828125, "learning_rate": 2.5245261327068736e-06, "loss": 20.9405, "step": 349190 }, { "epoch": 0.7054060933188427, "grad_norm": 292.16888427734375, "learning_rate": 2.524222856723869e-06, "loss": 10.4333, "step": 349200 }, { "epoch": 0.7054262939515266, "grad_norm": 822.7456665039062, "learning_rate": 2.5239195928075e-06, "loss": 17.5991, "step": 349210 }, { "epoch": 0.7054464945842104, "grad_norm": 196.78997802734375, "learning_rate": 2.5236163409592464e-06, "loss": 22.1525, "step": 349220 }, { "epoch": 0.7054666952168942, "grad_norm": 224.603515625, "learning_rate": 2.523313101180588e-06, "loss": 20.539, "step": 349230 }, { "epoch": 0.705486895849578, "grad_norm": 231.63055419921875, "learning_rate": 2.5230098734730014e-06, "loss": 7.1304, "step": 349240 }, { "epoch": 0.7055070964822618, "grad_norm": 544.2684936523438, "learning_rate": 2.5227066578379624e-06, "loss": 22.3781, "step": 349250 }, { "epoch": 0.7055272971149457, "grad_norm": 375.712158203125, "learning_rate": 2.522403454276952e-06, "loss": 16.2288, "step": 349260 }, { "epoch": 0.7055474977476295, "grad_norm": 286.46942138671875, "learning_rate": 2.522100262791447e-06, "loss": 15.4103, "step": 349270 }, { "epoch": 0.7055676983803133, "grad_norm": 307.37689208984375, "learning_rate": 2.521797083382923e-06, "loss": 23.6451, "step": 349280 }, { "epoch": 0.7055878990129971, "grad_norm": 483.400146484375, "learning_rate": 2.521493916052862e-06, "loss": 30.4754, "step": 349290 }, { "epoch": 0.7056080996456809, "grad_norm": 425.2827453613281, "learning_rate": 2.5211907608027366e-06, "loss": 23.0068, "step": 349300 }, { "epoch": 0.7056283002783648, "grad_norm": 26.249313354492188, "learning_rate": 2.5208876176340285e-06, "loss": 19.1362, "step": 349310 }, { "epoch": 0.7056485009110486, "grad_norm": 129.19873046875, "learning_rate": 2.5205844865482115e-06, "loss": 11.3333, "step": 349320 }, { "epoch": 0.7056687015437324, "grad_norm": 255.25157165527344, "learning_rate": 2.5202813675467675e-06, "loss": 49.9998, "step": 349330 }, { "epoch": 0.7056889021764161, "grad_norm": 575.8396606445312, "learning_rate": 2.5199782606311708e-06, "loss": 28.7747, "step": 349340 }, { "epoch": 0.7057091028090999, "grad_norm": 208.08485412597656, "learning_rate": 2.5196751658028972e-06, "loss": 18.6068, "step": 349350 }, { "epoch": 0.7057293034417838, "grad_norm": 50.586368560791016, "learning_rate": 2.5193720830634284e-06, "loss": 18.1747, "step": 349360 }, { "epoch": 0.7057495040744676, "grad_norm": 54.649024963378906, "learning_rate": 2.5190690124142368e-06, "loss": 9.2091, "step": 349370 }, { "epoch": 0.7057697047071514, "grad_norm": 223.4710235595703, "learning_rate": 2.5187659538568043e-06, "loss": 12.027, "step": 349380 }, { "epoch": 0.7057899053398352, "grad_norm": 315.0380554199219, "learning_rate": 2.518462907392606e-06, "loss": 15.8469, "step": 349390 }, { "epoch": 0.705810105972519, "grad_norm": 687.1735229492188, "learning_rate": 2.518159873023116e-06, "loss": 11.4713, "step": 349400 }, { "epoch": 0.7058303066052029, "grad_norm": 8.193793296813965, "learning_rate": 2.5178568507498156e-06, "loss": 19.8824, "step": 349410 }, { "epoch": 0.7058505072378867, "grad_norm": 761.634521484375, "learning_rate": 2.51755384057418e-06, "loss": 25.2532, "step": 349420 }, { "epoch": 0.7058707078705705, "grad_norm": 233.63406372070312, "learning_rate": 2.5172508424976837e-06, "loss": 25.4617, "step": 349430 }, { "epoch": 0.7058909085032543, "grad_norm": 350.4383544921875, "learning_rate": 2.5169478565218065e-06, "loss": 13.2413, "step": 349440 }, { "epoch": 0.7059111091359381, "grad_norm": 474.5752258300781, "learning_rate": 2.5166448826480274e-06, "loss": 9.202, "step": 349450 }, { "epoch": 0.705931309768622, "grad_norm": 298.2665100097656, "learning_rate": 2.516341920877816e-06, "loss": 16.7047, "step": 349460 }, { "epoch": 0.7059515104013058, "grad_norm": 440.1244201660156, "learning_rate": 2.5160389712126537e-06, "loss": 26.5362, "step": 349470 }, { "epoch": 0.7059717110339896, "grad_norm": 346.0702819824219, "learning_rate": 2.5157360336540175e-06, "loss": 18.0136, "step": 349480 }, { "epoch": 0.7059919116666734, "grad_norm": 612.431640625, "learning_rate": 2.5154331082033823e-06, "loss": 12.4774, "step": 349490 }, { "epoch": 0.7060121122993572, "grad_norm": 118.4017333984375, "learning_rate": 2.5151301948622235e-06, "loss": 17.7558, "step": 349500 }, { "epoch": 0.706032312932041, "grad_norm": 542.0604248046875, "learning_rate": 2.5148272936320186e-06, "loss": 17.6797, "step": 349510 }, { "epoch": 0.7060525135647249, "grad_norm": 158.3154296875, "learning_rate": 2.514524404514248e-06, "loss": 8.8993, "step": 349520 }, { "epoch": 0.7060727141974087, "grad_norm": 5338.759765625, "learning_rate": 2.51422152751038e-06, "loss": 12.7294, "step": 349530 }, { "epoch": 0.7060929148300925, "grad_norm": 98.99118041992188, "learning_rate": 2.513918662621894e-06, "loss": 26.2244, "step": 349540 }, { "epoch": 0.7061131154627763, "grad_norm": 849.1727294921875, "learning_rate": 2.51361580985027e-06, "loss": 18.898, "step": 349550 }, { "epoch": 0.7061333160954602, "grad_norm": 421.4109802246094, "learning_rate": 2.5133129691969806e-06, "loss": 15.1216, "step": 349560 }, { "epoch": 0.706153516728144, "grad_norm": 1055.987060546875, "learning_rate": 2.5130101406635e-06, "loss": 22.7242, "step": 349570 }, { "epoch": 0.7061737173608278, "grad_norm": 44.73744201660156, "learning_rate": 2.5127073242513083e-06, "loss": 37.3095, "step": 349580 }, { "epoch": 0.7061939179935116, "grad_norm": 306.37249755859375, "learning_rate": 2.5124045199618795e-06, "loss": 20.5377, "step": 349590 }, { "epoch": 0.7062141186261953, "grad_norm": 29.394678115844727, "learning_rate": 2.5121017277966875e-06, "loss": 18.109, "step": 349600 }, { "epoch": 0.7062343192588791, "grad_norm": 267.1628112792969, "learning_rate": 2.5117989477572126e-06, "loss": 14.5406, "step": 349610 }, { "epoch": 0.706254519891563, "grad_norm": 317.5898742675781, "learning_rate": 2.5114961798449245e-06, "loss": 11.5739, "step": 349620 }, { "epoch": 0.7062747205242468, "grad_norm": 359.2458801269531, "learning_rate": 2.511193424061305e-06, "loss": 33.4177, "step": 349630 }, { "epoch": 0.7062949211569306, "grad_norm": 533.362548828125, "learning_rate": 2.510890680407825e-06, "loss": 12.9816, "step": 349640 }, { "epoch": 0.7063151217896144, "grad_norm": 253.7837677001953, "learning_rate": 2.5105879488859635e-06, "loss": 16.3754, "step": 349650 }, { "epoch": 0.7063353224222982, "grad_norm": 256.9443664550781, "learning_rate": 2.510285229497195e-06, "loss": 12.2255, "step": 349660 }, { "epoch": 0.7063555230549821, "grad_norm": 338.7027282714844, "learning_rate": 2.5099825222429918e-06, "loss": 10.8429, "step": 349670 }, { "epoch": 0.7063757236876659, "grad_norm": 578.6676025390625, "learning_rate": 2.5096798271248337e-06, "loss": 14.5646, "step": 349680 }, { "epoch": 0.7063959243203497, "grad_norm": 226.984619140625, "learning_rate": 2.5093771441441923e-06, "loss": 8.6617, "step": 349690 }, { "epoch": 0.7064161249530335, "grad_norm": 283.9709777832031, "learning_rate": 2.509074473302546e-06, "loss": 8.8569, "step": 349700 }, { "epoch": 0.7064363255857173, "grad_norm": 755.0188598632812, "learning_rate": 2.5087718146013697e-06, "loss": 19.6124, "step": 349710 }, { "epoch": 0.7064565262184012, "grad_norm": 257.248046875, "learning_rate": 2.5084691680421346e-06, "loss": 29.3938, "step": 349720 }, { "epoch": 0.706476726851085, "grad_norm": 141.60836791992188, "learning_rate": 2.508166533626321e-06, "loss": 10.2731, "step": 349730 }, { "epoch": 0.7064969274837688, "grad_norm": 189.88119506835938, "learning_rate": 2.5078639113554017e-06, "loss": 15.4532, "step": 349740 }, { "epoch": 0.7065171281164526, "grad_norm": 423.1811828613281, "learning_rate": 2.507561301230849e-06, "loss": 34.6921, "step": 349750 }, { "epoch": 0.7065373287491364, "grad_norm": 60.76221466064453, "learning_rate": 2.5072587032541407e-06, "loss": 28.9149, "step": 349760 }, { "epoch": 0.7065575293818203, "grad_norm": 607.290283203125, "learning_rate": 2.5069561174267524e-06, "loss": 22.9158, "step": 349770 }, { "epoch": 0.7065777300145041, "grad_norm": 565.7552490234375, "learning_rate": 2.5066535437501584e-06, "loss": 15.0228, "step": 349780 }, { "epoch": 0.7065979306471879, "grad_norm": 172.1897735595703, "learning_rate": 2.50635098222583e-06, "loss": 16.5305, "step": 349790 }, { "epoch": 0.7066181312798717, "grad_norm": 397.20611572265625, "learning_rate": 2.506048432855247e-06, "loss": 27.4047, "step": 349800 }, { "epoch": 0.7066383319125555, "grad_norm": 185.18716430664062, "learning_rate": 2.5057458956398806e-06, "loss": 17.7929, "step": 349810 }, { "epoch": 0.7066585325452394, "grad_norm": 0.0, "learning_rate": 2.5054433705812054e-06, "loss": 13.086, "step": 349820 }, { "epoch": 0.7066787331779232, "grad_norm": 294.8137512207031, "learning_rate": 2.505140857680696e-06, "loss": 28.368, "step": 349830 }, { "epoch": 0.706698933810607, "grad_norm": 364.5111389160156, "learning_rate": 2.504838356939829e-06, "loss": 14.5719, "step": 349840 }, { "epoch": 0.7067191344432907, "grad_norm": 977.1616821289062, "learning_rate": 2.504535868360078e-06, "loss": 24.5785, "step": 349850 }, { "epoch": 0.7067393350759745, "grad_norm": 155.75967407226562, "learning_rate": 2.504233391942914e-06, "loss": 18.3162, "step": 349860 }, { "epoch": 0.7067595357086583, "grad_norm": 290.4385986328125, "learning_rate": 2.503930927689816e-06, "loss": 18.4517, "step": 349870 }, { "epoch": 0.7067797363413422, "grad_norm": 437.519775390625, "learning_rate": 2.503628475602256e-06, "loss": 19.7228, "step": 349880 }, { "epoch": 0.706799936974026, "grad_norm": 397.1817932128906, "learning_rate": 2.5033260356817056e-06, "loss": 18.5204, "step": 349890 }, { "epoch": 0.7068201376067098, "grad_norm": 295.46319580078125, "learning_rate": 2.5030236079296443e-06, "loss": 13.8248, "step": 349900 }, { "epoch": 0.7068403382393936, "grad_norm": 774.9948120117188, "learning_rate": 2.50272119234754e-06, "loss": 19.6042, "step": 349910 }, { "epoch": 0.7068605388720774, "grad_norm": 205.68801879882812, "learning_rate": 2.502418788936872e-06, "loss": 12.7801, "step": 349920 }, { "epoch": 0.7068807395047613, "grad_norm": 75.88280487060547, "learning_rate": 2.5021163976991103e-06, "loss": 9.6228, "step": 349930 }, { "epoch": 0.7069009401374451, "grad_norm": 474.43792724609375, "learning_rate": 2.501814018635732e-06, "loss": 23.6048, "step": 349940 }, { "epoch": 0.7069211407701289, "grad_norm": 470.50616455078125, "learning_rate": 2.5015116517482097e-06, "loss": 20.9527, "step": 349950 }, { "epoch": 0.7069413414028127, "grad_norm": 618.3411254882812, "learning_rate": 2.501209297038014e-06, "loss": 21.1948, "step": 349960 }, { "epoch": 0.7069615420354965, "grad_norm": 170.8068389892578, "learning_rate": 2.500906954506623e-06, "loss": 9.9451, "step": 349970 }, { "epoch": 0.7069817426681804, "grad_norm": 291.6849365234375, "learning_rate": 2.5006046241555073e-06, "loss": 9.2926, "step": 349980 }, { "epoch": 0.7070019433008642, "grad_norm": 490.95391845703125, "learning_rate": 2.500302305986142e-06, "loss": 26.7192, "step": 349990 }, { "epoch": 0.707022143933548, "grad_norm": 476.1736145019531, "learning_rate": 2.5000000000000015e-06, "loss": 26.904, "step": 350000 }, { "epoch": 0.7070423445662318, "grad_norm": 111.90296936035156, "learning_rate": 2.499697706198555e-06, "loss": 16.3807, "step": 350010 }, { "epoch": 0.7070625451989156, "grad_norm": 402.1435546875, "learning_rate": 2.499395424583281e-06, "loss": 15.6745, "step": 350020 }, { "epoch": 0.7070827458315995, "grad_norm": 297.2471618652344, "learning_rate": 2.49909315515565e-06, "loss": 14.4463, "step": 350030 }, { "epoch": 0.7071029464642833, "grad_norm": 164.84974670410156, "learning_rate": 2.498790897917134e-06, "loss": 36.1733, "step": 350040 }, { "epoch": 0.7071231470969671, "grad_norm": 350.8130187988281, "learning_rate": 2.4984886528692076e-06, "loss": 18.4681, "step": 350050 }, { "epoch": 0.7071433477296509, "grad_norm": 655.9428100585938, "learning_rate": 2.4981864200133483e-06, "loss": 19.3289, "step": 350060 }, { "epoch": 0.7071635483623347, "grad_norm": 495.717041015625, "learning_rate": 2.4978841993510213e-06, "loss": 18.2208, "step": 350070 }, { "epoch": 0.7071837489950186, "grad_norm": 129.1037139892578, "learning_rate": 2.4975819908837024e-06, "loss": 12.0158, "step": 350080 }, { "epoch": 0.7072039496277024, "grad_norm": 337.0894775390625, "learning_rate": 2.497279794612868e-06, "loss": 17.966, "step": 350090 }, { "epoch": 0.7072241502603862, "grad_norm": 510.8279113769531, "learning_rate": 2.496977610539988e-06, "loss": 27.7402, "step": 350100 }, { "epoch": 0.7072443508930699, "grad_norm": 960.7432861328125, "learning_rate": 2.496675438666534e-06, "loss": 19.3872, "step": 350110 }, { "epoch": 0.7072645515257537, "grad_norm": 236.84434509277344, "learning_rate": 2.49637327899398e-06, "loss": 46.7402, "step": 350120 }, { "epoch": 0.7072847521584376, "grad_norm": 832.6741333007812, "learning_rate": 2.4960711315238027e-06, "loss": 20.6005, "step": 350130 }, { "epoch": 0.7073049527911214, "grad_norm": 924.15576171875, "learning_rate": 2.495768996257467e-06, "loss": 26.5795, "step": 350140 }, { "epoch": 0.7073251534238052, "grad_norm": 359.6117858886719, "learning_rate": 2.49546687319645e-06, "loss": 18.2028, "step": 350150 }, { "epoch": 0.707345354056489, "grad_norm": 179.73480224609375, "learning_rate": 2.4951647623422256e-06, "loss": 25.4375, "step": 350160 }, { "epoch": 0.7073655546891728, "grad_norm": 489.7877197265625, "learning_rate": 2.4948626636962643e-06, "loss": 18.0493, "step": 350170 }, { "epoch": 0.7073857553218567, "grad_norm": 301.5377197265625, "learning_rate": 2.4945605772600364e-06, "loss": 19.2083, "step": 350180 }, { "epoch": 0.7074059559545405, "grad_norm": 364.8948974609375, "learning_rate": 2.494258503035018e-06, "loss": 13.9501, "step": 350190 }, { "epoch": 0.7074261565872243, "grad_norm": 448.95379638671875, "learning_rate": 2.49395644102268e-06, "loss": 19.7066, "step": 350200 }, { "epoch": 0.7074463572199081, "grad_norm": 423.24188232421875, "learning_rate": 2.493654391224493e-06, "loss": 42.3437, "step": 350210 }, { "epoch": 0.7074665578525919, "grad_norm": 222.1471710205078, "learning_rate": 2.493352353641932e-06, "loss": 20.5775, "step": 350220 }, { "epoch": 0.7074867584852758, "grad_norm": 150.0435333251953, "learning_rate": 2.4930503282764658e-06, "loss": 13.4709, "step": 350230 }, { "epoch": 0.7075069591179596, "grad_norm": 416.463623046875, "learning_rate": 2.49274831512957e-06, "loss": 14.5652, "step": 350240 }, { "epoch": 0.7075271597506434, "grad_norm": 228.19183349609375, "learning_rate": 2.492446314202713e-06, "loss": 9.9245, "step": 350250 }, { "epoch": 0.7075473603833272, "grad_norm": 288.11798095703125, "learning_rate": 2.4921443254973705e-06, "loss": 12.6007, "step": 350260 }, { "epoch": 0.707567561016011, "grad_norm": 140.6687774658203, "learning_rate": 2.491842349015012e-06, "loss": 13.3591, "step": 350270 }, { "epoch": 0.7075877616486949, "grad_norm": 645.8302612304688, "learning_rate": 2.4915403847571083e-06, "loss": 21.0156, "step": 350280 }, { "epoch": 0.7076079622813787, "grad_norm": 14.02106761932373, "learning_rate": 2.4912384327251344e-06, "loss": 9.1831, "step": 350290 }, { "epoch": 0.7076281629140625, "grad_norm": 0.8506768345832825, "learning_rate": 2.4909364929205575e-06, "loss": 19.6283, "step": 350300 }, { "epoch": 0.7076483635467463, "grad_norm": 635.20361328125, "learning_rate": 2.4906345653448545e-06, "loss": 22.548, "step": 350310 }, { "epoch": 0.7076685641794301, "grad_norm": 434.19683837890625, "learning_rate": 2.490332649999494e-06, "loss": 16.8567, "step": 350320 }, { "epoch": 0.707688764812114, "grad_norm": 349.81982421875, "learning_rate": 2.490030746885946e-06, "loss": 25.0367, "step": 350330 }, { "epoch": 0.7077089654447978, "grad_norm": 188.83731079101562, "learning_rate": 2.4897288560056854e-06, "loss": 11.8076, "step": 350340 }, { "epoch": 0.7077291660774816, "grad_norm": 285.6274719238281, "learning_rate": 2.489426977360182e-06, "loss": 21.5039, "step": 350350 }, { "epoch": 0.7077493667101654, "grad_norm": 272.9427795410156, "learning_rate": 2.4891251109509053e-06, "loss": 15.4717, "step": 350360 }, { "epoch": 0.7077695673428491, "grad_norm": 132.57620239257812, "learning_rate": 2.488823256779328e-06, "loss": 8.4129, "step": 350370 }, { "epoch": 0.707789767975533, "grad_norm": 69.85224914550781, "learning_rate": 2.488521414846923e-06, "loss": 14.1976, "step": 350380 }, { "epoch": 0.7078099686082168, "grad_norm": 81.13935852050781, "learning_rate": 2.488219585155161e-06, "loss": 17.9706, "step": 350390 }, { "epoch": 0.7078301692409006, "grad_norm": 784.9371948242188, "learning_rate": 2.48791776770551e-06, "loss": 25.0036, "step": 350400 }, { "epoch": 0.7078503698735844, "grad_norm": 618.5388793945312, "learning_rate": 2.4876159624994443e-06, "loss": 16.6287, "step": 350410 }, { "epoch": 0.7078705705062682, "grad_norm": 415.8648681640625, "learning_rate": 2.4873141695384346e-06, "loss": 19.047, "step": 350420 }, { "epoch": 0.707890771138952, "grad_norm": 415.9384460449219, "learning_rate": 2.4870123888239485e-06, "loss": 17.0729, "step": 350430 }, { "epoch": 0.7079109717716359, "grad_norm": 351.33514404296875, "learning_rate": 2.486710620357459e-06, "loss": 11.8749, "step": 350440 }, { "epoch": 0.7079311724043197, "grad_norm": 288.665283203125, "learning_rate": 2.4864088641404398e-06, "loss": 13.4284, "step": 350450 }, { "epoch": 0.7079513730370035, "grad_norm": 206.8216552734375, "learning_rate": 2.4861071201743586e-06, "loss": 10.4989, "step": 350460 }, { "epoch": 0.7079715736696873, "grad_norm": 333.7101135253906, "learning_rate": 2.4858053884606843e-06, "loss": 20.4974, "step": 350470 }, { "epoch": 0.7079917743023711, "grad_norm": 258.67803955078125, "learning_rate": 2.4855036690008918e-06, "loss": 5.2349, "step": 350480 }, { "epoch": 0.708011974935055, "grad_norm": 411.38092041015625, "learning_rate": 2.485201961796449e-06, "loss": 17.3271, "step": 350490 }, { "epoch": 0.7080321755677388, "grad_norm": 316.57391357421875, "learning_rate": 2.484900266848825e-06, "loss": 21.3045, "step": 350500 }, { "epoch": 0.7080523762004226, "grad_norm": 115.9952392578125, "learning_rate": 2.484598584159494e-06, "loss": 8.3172, "step": 350510 }, { "epoch": 0.7080725768331064, "grad_norm": 404.4654541015625, "learning_rate": 2.484296913729923e-06, "loss": 12.901, "step": 350520 }, { "epoch": 0.7080927774657902, "grad_norm": 265.7141418457031, "learning_rate": 2.4839952555615846e-06, "loss": 14.5914, "step": 350530 }, { "epoch": 0.7081129780984741, "grad_norm": 41.656776428222656, "learning_rate": 2.483693609655947e-06, "loss": 12.2818, "step": 350540 }, { "epoch": 0.7081331787311579, "grad_norm": 391.55859375, "learning_rate": 2.4833919760144838e-06, "loss": 29.0186, "step": 350550 }, { "epoch": 0.7081533793638417, "grad_norm": 244.14015197753906, "learning_rate": 2.483090354638662e-06, "loss": 12.6377, "step": 350560 }, { "epoch": 0.7081735799965255, "grad_norm": 446.8510437011719, "learning_rate": 2.4827887455299516e-06, "loss": 12.4944, "step": 350570 }, { "epoch": 0.7081937806292093, "grad_norm": 274.75274658203125, "learning_rate": 2.4824871486898244e-06, "loss": 25.0839, "step": 350580 }, { "epoch": 0.7082139812618932, "grad_norm": 234.93728637695312, "learning_rate": 2.4821855641197483e-06, "loss": 32.2213, "step": 350590 }, { "epoch": 0.708234181894577, "grad_norm": 484.5325927734375, "learning_rate": 2.4818839918211963e-06, "loss": 15.9191, "step": 350600 }, { "epoch": 0.7082543825272608, "grad_norm": 192.70309448242188, "learning_rate": 2.4815824317956363e-06, "loss": 16.686, "step": 350610 }, { "epoch": 0.7082745831599445, "grad_norm": 0.34595391154289246, "learning_rate": 2.4812808840445357e-06, "loss": 15.7636, "step": 350620 }, { "epoch": 0.7082947837926283, "grad_norm": 106.8895492553711, "learning_rate": 2.480979348569369e-06, "loss": 21.4576, "step": 350630 }, { "epoch": 0.7083149844253122, "grad_norm": 388.26007080078125, "learning_rate": 2.480677825371603e-06, "loss": 27.7207, "step": 350640 }, { "epoch": 0.708335185057996, "grad_norm": 116.71707153320312, "learning_rate": 2.480376314452706e-06, "loss": 11.684, "step": 350650 }, { "epoch": 0.7083553856906798, "grad_norm": 438.4858093261719, "learning_rate": 2.48007481581415e-06, "loss": 31.7195, "step": 350660 }, { "epoch": 0.7083755863233636, "grad_norm": 350.0809020996094, "learning_rate": 2.479773329457406e-06, "loss": 21.4821, "step": 350670 }, { "epoch": 0.7083957869560474, "grad_norm": 65.26168060302734, "learning_rate": 2.4794718553839387e-06, "loss": 25.7347, "step": 350680 }, { "epoch": 0.7084159875887313, "grad_norm": 442.9814147949219, "learning_rate": 2.4791703935952193e-06, "loss": 16.2535, "step": 350690 }, { "epoch": 0.7084361882214151, "grad_norm": 259.0112609863281, "learning_rate": 2.4788689440927193e-06, "loss": 10.223, "step": 350700 }, { "epoch": 0.7084563888540989, "grad_norm": 447.5721435546875, "learning_rate": 2.478567506877907e-06, "loss": 14.2684, "step": 350710 }, { "epoch": 0.7084765894867827, "grad_norm": 725.1121826171875, "learning_rate": 2.478266081952248e-06, "loss": 8.9663, "step": 350720 }, { "epoch": 0.7084967901194665, "grad_norm": 563.68359375, "learning_rate": 2.477964669317215e-06, "loss": 15.6711, "step": 350730 }, { "epoch": 0.7085169907521504, "grad_norm": 408.3504638671875, "learning_rate": 2.4776632689742803e-06, "loss": 10.5074, "step": 350740 }, { "epoch": 0.7085371913848342, "grad_norm": 265.1654968261719, "learning_rate": 2.4773618809249045e-06, "loss": 26.6782, "step": 350750 }, { "epoch": 0.708557392017518, "grad_norm": 548.5407104492188, "learning_rate": 2.477060505170561e-06, "loss": 35.9407, "step": 350760 }, { "epoch": 0.7085775926502018, "grad_norm": 151.9861602783203, "learning_rate": 2.4767591417127207e-06, "loss": 18.9445, "step": 350770 }, { "epoch": 0.7085977932828856, "grad_norm": 320.5681457519531, "learning_rate": 2.4764577905528503e-06, "loss": 28.4419, "step": 350780 }, { "epoch": 0.7086179939155695, "grad_norm": 472.62884521484375, "learning_rate": 2.476156451692416e-06, "loss": 31.2559, "step": 350790 }, { "epoch": 0.7086381945482533, "grad_norm": 614.879638671875, "learning_rate": 2.4758551251328923e-06, "loss": 19.3613, "step": 350800 }, { "epoch": 0.7086583951809371, "grad_norm": 126.76094818115234, "learning_rate": 2.4755538108757436e-06, "loss": 25.4135, "step": 350810 }, { "epoch": 0.7086785958136209, "grad_norm": 384.14593505859375, "learning_rate": 2.475252508922438e-06, "loss": 12.4958, "step": 350820 }, { "epoch": 0.7086987964463047, "grad_norm": 334.7979736328125, "learning_rate": 2.4749512192744473e-06, "loss": 9.4986, "step": 350830 }, { "epoch": 0.7087189970789886, "grad_norm": 213.6591033935547, "learning_rate": 2.474649941933236e-06, "loss": 18.3898, "step": 350840 }, { "epoch": 0.7087391977116724, "grad_norm": 289.82159423828125, "learning_rate": 2.4743486769002767e-06, "loss": 14.2574, "step": 350850 }, { "epoch": 0.7087593983443562, "grad_norm": 579.4589233398438, "learning_rate": 2.4740474241770333e-06, "loss": 14.4531, "step": 350860 }, { "epoch": 0.70877959897704, "grad_norm": 111.13056182861328, "learning_rate": 2.4737461837649784e-06, "loss": 16.4243, "step": 350870 }, { "epoch": 0.7087997996097237, "grad_norm": 424.7178039550781, "learning_rate": 2.4734449556655786e-06, "loss": 15.5977, "step": 350880 }, { "epoch": 0.7088200002424075, "grad_norm": 347.6862487792969, "learning_rate": 2.4731437398802998e-06, "loss": 13.8698, "step": 350890 }, { "epoch": 0.7088402008750914, "grad_norm": 163.52963256835938, "learning_rate": 2.4728425364106136e-06, "loss": 22.4221, "step": 350900 }, { "epoch": 0.7088604015077752, "grad_norm": 200.37506103515625, "learning_rate": 2.472541345257984e-06, "loss": 12.9967, "step": 350910 }, { "epoch": 0.708880602140459, "grad_norm": 203.79859924316406, "learning_rate": 2.4722401664238837e-06, "loss": 13.5091, "step": 350920 }, { "epoch": 0.7089008027731428, "grad_norm": 481.20965576171875, "learning_rate": 2.4719389999097787e-06, "loss": 11.0495, "step": 350930 }, { "epoch": 0.7089210034058266, "grad_norm": 283.4579772949219, "learning_rate": 2.471637845717134e-06, "loss": 16.7812, "step": 350940 }, { "epoch": 0.7089412040385105, "grad_norm": 273.2008361816406, "learning_rate": 2.471336703847422e-06, "loss": 11.4263, "step": 350950 }, { "epoch": 0.7089614046711943, "grad_norm": 108.56293487548828, "learning_rate": 2.4710355743021077e-06, "loss": 18.0725, "step": 350960 }, { "epoch": 0.7089816053038781, "grad_norm": 425.48089599609375, "learning_rate": 2.4707344570826576e-06, "loss": 24.4652, "step": 350970 }, { "epoch": 0.7090018059365619, "grad_norm": 156.72557067871094, "learning_rate": 2.470433352190541e-06, "loss": 15.688, "step": 350980 }, { "epoch": 0.7090220065692457, "grad_norm": 1711.9622802734375, "learning_rate": 2.470132259627227e-06, "loss": 35.6115, "step": 350990 }, { "epoch": 0.7090422072019296, "grad_norm": 322.0758361816406, "learning_rate": 2.469831179394182e-06, "loss": 19.3249, "step": 351000 }, { "epoch": 0.7090624078346134, "grad_norm": 184.63465881347656, "learning_rate": 2.469530111492871e-06, "loss": 16.4322, "step": 351010 }, { "epoch": 0.7090826084672972, "grad_norm": 368.8414611816406, "learning_rate": 2.4692290559247652e-06, "loss": 17.1046, "step": 351020 }, { "epoch": 0.709102809099981, "grad_norm": 373.5274353027344, "learning_rate": 2.4689280126913302e-06, "loss": 23.63, "step": 351030 }, { "epoch": 0.7091230097326648, "grad_norm": 268.5332336425781, "learning_rate": 2.4686269817940306e-06, "loss": 10.7349, "step": 351040 }, { "epoch": 0.7091432103653487, "grad_norm": 293.2986145019531, "learning_rate": 2.4683259632343363e-06, "loss": 20.8799, "step": 351050 }, { "epoch": 0.7091634109980325, "grad_norm": 174.36912536621094, "learning_rate": 2.4680249570137166e-06, "loss": 23.0473, "step": 351060 }, { "epoch": 0.7091836116307163, "grad_norm": 113.24224853515625, "learning_rate": 2.467723963133636e-06, "loss": 19.4785, "step": 351070 }, { "epoch": 0.7092038122634001, "grad_norm": 129.29562377929688, "learning_rate": 2.4674229815955596e-06, "loss": 10.8498, "step": 351080 }, { "epoch": 0.7092240128960839, "grad_norm": 103.91205596923828, "learning_rate": 2.467122012400958e-06, "loss": 33.7763, "step": 351090 }, { "epoch": 0.7092442135287678, "grad_norm": 552.5250854492188, "learning_rate": 2.4668210555512974e-06, "loss": 14.3251, "step": 351100 }, { "epoch": 0.7092644141614516, "grad_norm": 110.7344741821289, "learning_rate": 2.466520111048041e-06, "loss": 16.1519, "step": 351110 }, { "epoch": 0.7092846147941354, "grad_norm": 321.0063171386719, "learning_rate": 2.46621917889266e-06, "loss": 13.9278, "step": 351120 }, { "epoch": 0.7093048154268191, "grad_norm": 1080.12841796875, "learning_rate": 2.4659182590866183e-06, "loss": 26.4543, "step": 351130 }, { "epoch": 0.7093250160595029, "grad_norm": 423.43524169921875, "learning_rate": 2.4656173516313852e-06, "loss": 12.3552, "step": 351140 }, { "epoch": 0.7093452166921868, "grad_norm": 663.8313598632812, "learning_rate": 2.465316456528424e-06, "loss": 22.1683, "step": 351150 }, { "epoch": 0.7093654173248706, "grad_norm": 132.00119018554688, "learning_rate": 2.465015573779205e-06, "loss": 32.2573, "step": 351160 }, { "epoch": 0.7093856179575544, "grad_norm": 402.5078125, "learning_rate": 2.464714703385192e-06, "loss": 11.4204, "step": 351170 }, { "epoch": 0.7094058185902382, "grad_norm": 302.7805480957031, "learning_rate": 2.4644138453478504e-06, "loss": 17.0685, "step": 351180 }, { "epoch": 0.709426019222922, "grad_norm": 346.5506591796875, "learning_rate": 2.464112999668651e-06, "loss": 16.594, "step": 351190 }, { "epoch": 0.7094462198556059, "grad_norm": 154.2192840576172, "learning_rate": 2.4638121663490546e-06, "loss": 26.4545, "step": 351200 }, { "epoch": 0.7094664204882897, "grad_norm": 383.23046875, "learning_rate": 2.463511345390532e-06, "loss": 8.9706, "step": 351210 }, { "epoch": 0.7094866211209735, "grad_norm": 396.2514343261719, "learning_rate": 2.463210536794547e-06, "loss": 13.9642, "step": 351220 }, { "epoch": 0.7095068217536573, "grad_norm": 270.4677429199219, "learning_rate": 2.4629097405625645e-06, "loss": 18.0537, "step": 351230 }, { "epoch": 0.7095270223863411, "grad_norm": 402.54486083984375, "learning_rate": 2.4626089566960546e-06, "loss": 13.869, "step": 351240 }, { "epoch": 0.709547223019025, "grad_norm": 592.2306518554688, "learning_rate": 2.462308185196481e-06, "loss": 14.6964, "step": 351250 }, { "epoch": 0.7095674236517088, "grad_norm": 229.1131134033203, "learning_rate": 2.462007426065307e-06, "loss": 19.8668, "step": 351260 }, { "epoch": 0.7095876242843926, "grad_norm": 365.9820556640625, "learning_rate": 2.4617066793040012e-06, "loss": 24.1304, "step": 351270 }, { "epoch": 0.7096078249170764, "grad_norm": 208.26876831054688, "learning_rate": 2.461405944914033e-06, "loss": 17.9849, "step": 351280 }, { "epoch": 0.7096280255497602, "grad_norm": 27.316822052001953, "learning_rate": 2.4611052228968606e-06, "loss": 11.6133, "step": 351290 }, { "epoch": 0.709648226182444, "grad_norm": 423.08477783203125, "learning_rate": 2.4608045132539536e-06, "loss": 20.7744, "step": 351300 }, { "epoch": 0.7096684268151279, "grad_norm": 683.5103149414062, "learning_rate": 2.460503815986779e-06, "loss": 26.1462, "step": 351310 }, { "epoch": 0.7096886274478117, "grad_norm": 421.5254821777344, "learning_rate": 2.4602031310968013e-06, "loss": 12.5355, "step": 351320 }, { "epoch": 0.7097088280804955, "grad_norm": 373.98773193359375, "learning_rate": 2.459902458585483e-06, "loss": 9.5074, "step": 351330 }, { "epoch": 0.7097290287131793, "grad_norm": 35.10133361816406, "learning_rate": 2.459601798454292e-06, "loss": 15.7518, "step": 351340 }, { "epoch": 0.7097492293458632, "grad_norm": 270.9390869140625, "learning_rate": 2.4593011507046976e-06, "loss": 26.5067, "step": 351350 }, { "epoch": 0.709769429978547, "grad_norm": 50.27530288696289, "learning_rate": 2.459000515338158e-06, "loss": 19.8882, "step": 351360 }, { "epoch": 0.7097896306112308, "grad_norm": 496.9534912109375, "learning_rate": 2.4586998923561412e-06, "loss": 18.3404, "step": 351370 }, { "epoch": 0.7098098312439146, "grad_norm": 417.775146484375, "learning_rate": 2.458399281760115e-06, "loss": 15.2598, "step": 351380 }, { "epoch": 0.7098300318765983, "grad_norm": 707.9732666015625, "learning_rate": 2.4580986835515423e-06, "loss": 23.1386, "step": 351390 }, { "epoch": 0.7098502325092821, "grad_norm": 232.51669311523438, "learning_rate": 2.4577980977318866e-06, "loss": 17.2174, "step": 351400 }, { "epoch": 0.709870433141966, "grad_norm": 31.553802490234375, "learning_rate": 2.457497524302616e-06, "loss": 17.3653, "step": 351410 }, { "epoch": 0.7098906337746498, "grad_norm": 114.79324340820312, "learning_rate": 2.457196963265195e-06, "loss": 12.134, "step": 351420 }, { "epoch": 0.7099108344073336, "grad_norm": 601.8326416015625, "learning_rate": 2.456896414621085e-06, "loss": 24.023, "step": 351430 }, { "epoch": 0.7099310350400174, "grad_norm": 115.26791381835938, "learning_rate": 2.4565958783717534e-06, "loss": 19.1904, "step": 351440 }, { "epoch": 0.7099512356727012, "grad_norm": 409.48663330078125, "learning_rate": 2.4562953545186675e-06, "loss": 12.8175, "step": 351450 }, { "epoch": 0.7099714363053851, "grad_norm": 385.10211181640625, "learning_rate": 2.455994843063289e-06, "loss": 20.6888, "step": 351460 }, { "epoch": 0.7099916369380689, "grad_norm": 307.5181884765625, "learning_rate": 2.455694344007082e-06, "loss": 31.795, "step": 351470 }, { "epoch": 0.7100118375707527, "grad_norm": 263.39544677734375, "learning_rate": 2.455393857351513e-06, "loss": 17.189, "step": 351480 }, { "epoch": 0.7100320382034365, "grad_norm": 265.06463623046875, "learning_rate": 2.455093383098046e-06, "loss": 19.1293, "step": 351490 }, { "epoch": 0.7100522388361203, "grad_norm": 855.9002685546875, "learning_rate": 2.4547929212481436e-06, "loss": 32.2827, "step": 351500 }, { "epoch": 0.7100724394688042, "grad_norm": 422.6793212890625, "learning_rate": 2.454492471803274e-06, "loss": 39.923, "step": 351510 }, { "epoch": 0.710092640101488, "grad_norm": 90.21331787109375, "learning_rate": 2.454192034764897e-06, "loss": 16.5796, "step": 351520 }, { "epoch": 0.7101128407341718, "grad_norm": 340.8531188964844, "learning_rate": 2.4538916101344806e-06, "loss": 21.9852, "step": 351530 }, { "epoch": 0.7101330413668556, "grad_norm": 507.1401062011719, "learning_rate": 2.4535911979134884e-06, "loss": 18.3361, "step": 351540 }, { "epoch": 0.7101532419995394, "grad_norm": 130.31398010253906, "learning_rate": 2.4532907981033822e-06, "loss": 12.3717, "step": 351550 }, { "epoch": 0.7101734426322233, "grad_norm": 1278.9815673828125, "learning_rate": 2.452990410705629e-06, "loss": 24.3266, "step": 351560 }, { "epoch": 0.7101936432649071, "grad_norm": 389.86334228515625, "learning_rate": 2.452690035721692e-06, "loss": 15.1485, "step": 351570 }, { "epoch": 0.7102138438975909, "grad_norm": 133.64479064941406, "learning_rate": 2.4523896731530327e-06, "loss": 12.0792, "step": 351580 }, { "epoch": 0.7102340445302747, "grad_norm": 331.3221435546875, "learning_rate": 2.4520893230011174e-06, "loss": 23.9486, "step": 351590 }, { "epoch": 0.7102542451629585, "grad_norm": 287.4932556152344, "learning_rate": 2.4517889852674114e-06, "loss": 17.338, "step": 351600 }, { "epoch": 0.7102744457956424, "grad_norm": 577.8561401367188, "learning_rate": 2.4514886599533773e-06, "loss": 18.4957, "step": 351610 }, { "epoch": 0.7102946464283262, "grad_norm": 81.04473114013672, "learning_rate": 2.4511883470604757e-06, "loss": 18.9512, "step": 351620 }, { "epoch": 0.71031484706101, "grad_norm": 133.50791931152344, "learning_rate": 2.450888046590175e-06, "loss": 19.3605, "step": 351630 }, { "epoch": 0.7103350476936938, "grad_norm": 211.41757202148438, "learning_rate": 2.4505877585439376e-06, "loss": 15.7611, "step": 351640 }, { "epoch": 0.7103552483263775, "grad_norm": 1024.399169921875, "learning_rate": 2.4502874829232238e-06, "loss": 23.0618, "step": 351650 }, { "epoch": 0.7103754489590614, "grad_norm": 552.9322509765625, "learning_rate": 2.4499872197294992e-06, "loss": 14.6487, "step": 351660 }, { "epoch": 0.7103956495917452, "grad_norm": 395.48712158203125, "learning_rate": 2.449686968964232e-06, "loss": 18.742, "step": 351670 }, { "epoch": 0.710415850224429, "grad_norm": 777.3709106445312, "learning_rate": 2.4493867306288772e-06, "loss": 22.8613, "step": 351680 }, { "epoch": 0.7104360508571128, "grad_norm": 526.1470947265625, "learning_rate": 2.449086504724902e-06, "loss": 42.0243, "step": 351690 }, { "epoch": 0.7104562514897966, "grad_norm": 618.8796997070312, "learning_rate": 2.448786291253772e-06, "loss": 24.4049, "step": 351700 }, { "epoch": 0.7104764521224805, "grad_norm": 248.31163024902344, "learning_rate": 2.4484860902169477e-06, "loss": 26.042, "step": 351710 }, { "epoch": 0.7104966527551643, "grad_norm": 344.298095703125, "learning_rate": 2.4481859016158913e-06, "loss": 14.6626, "step": 351720 }, { "epoch": 0.7105168533878481, "grad_norm": 600.903076171875, "learning_rate": 2.4478857254520688e-06, "loss": 17.7351, "step": 351730 }, { "epoch": 0.7105370540205319, "grad_norm": 654.8291625976562, "learning_rate": 2.44758556172694e-06, "loss": 26.584, "step": 351740 }, { "epoch": 0.7105572546532157, "grad_norm": 464.43414306640625, "learning_rate": 2.4472854104419717e-06, "loss": 18.7963, "step": 351750 }, { "epoch": 0.7105774552858996, "grad_norm": 152.92335510253906, "learning_rate": 2.4469852715986232e-06, "loss": 16.9666, "step": 351760 }, { "epoch": 0.7105976559185834, "grad_norm": 385.8680114746094, "learning_rate": 2.44668514519836e-06, "loss": 11.9277, "step": 351770 }, { "epoch": 0.7106178565512672, "grad_norm": 654.5179443359375, "learning_rate": 2.446385031242644e-06, "loss": 14.0661, "step": 351780 }, { "epoch": 0.710638057183951, "grad_norm": 781.8509521484375, "learning_rate": 2.4460849297329355e-06, "loss": 16.5214, "step": 351790 }, { "epoch": 0.7106582578166348, "grad_norm": 650.5888671875, "learning_rate": 2.4457848406707014e-06, "loss": 18.0695, "step": 351800 }, { "epoch": 0.7106784584493187, "grad_norm": 249.22930908203125, "learning_rate": 2.4454847640574004e-06, "loss": 9.1941, "step": 351810 }, { "epoch": 0.7106986590820025, "grad_norm": 281.6560974121094, "learning_rate": 2.4451846998944985e-06, "loss": 13.0573, "step": 351820 }, { "epoch": 0.7107188597146863, "grad_norm": 369.3673095703125, "learning_rate": 2.4448846481834566e-06, "loss": 13.1717, "step": 351830 }, { "epoch": 0.7107390603473701, "grad_norm": 397.47998046875, "learning_rate": 2.4445846089257354e-06, "loss": 14.0664, "step": 351840 }, { "epoch": 0.7107592609800539, "grad_norm": 50.95933532714844, "learning_rate": 2.4442845821228005e-06, "loss": 12.9487, "step": 351850 }, { "epoch": 0.7107794616127378, "grad_norm": 222.64434814453125, "learning_rate": 2.4439845677761124e-06, "loss": 16.0556, "step": 351860 }, { "epoch": 0.7107996622454216, "grad_norm": 381.6914367675781, "learning_rate": 2.4436845658871317e-06, "loss": 15.1181, "step": 351870 }, { "epoch": 0.7108198628781054, "grad_norm": 618.4828491210938, "learning_rate": 2.4433845764573225e-06, "loss": 24.6758, "step": 351880 }, { "epoch": 0.7108400635107892, "grad_norm": 311.1064758300781, "learning_rate": 2.4430845994881507e-06, "loss": 30.1891, "step": 351890 }, { "epoch": 0.7108602641434729, "grad_norm": 855.1177368164062, "learning_rate": 2.442784634981071e-06, "loss": 20.8685, "step": 351900 }, { "epoch": 0.7108804647761567, "grad_norm": 230.25753784179688, "learning_rate": 2.442484682937548e-06, "loss": 37.8443, "step": 351910 }, { "epoch": 0.7109006654088406, "grad_norm": 247.62557983398438, "learning_rate": 2.4421847433590466e-06, "loss": 11.3085, "step": 351920 }, { "epoch": 0.7109208660415244, "grad_norm": 690.3058471679688, "learning_rate": 2.4418848162470273e-06, "loss": 22.4823, "step": 351930 }, { "epoch": 0.7109410666742082, "grad_norm": 371.29913330078125, "learning_rate": 2.441584901602948e-06, "loss": 12.9227, "step": 351940 }, { "epoch": 0.710961267306892, "grad_norm": 197.8428192138672, "learning_rate": 2.4412849994282744e-06, "loss": 12.8529, "step": 351950 }, { "epoch": 0.7109814679395758, "grad_norm": 476.21197509765625, "learning_rate": 2.4409851097244708e-06, "loss": 21.9375, "step": 351960 }, { "epoch": 0.7110016685722597, "grad_norm": 245.45703125, "learning_rate": 2.440685232492992e-06, "loss": 15.0306, "step": 351970 }, { "epoch": 0.7110218692049435, "grad_norm": 465.3127746582031, "learning_rate": 2.440385367735303e-06, "loss": 21.7765, "step": 351980 }, { "epoch": 0.7110420698376273, "grad_norm": 300.7366638183594, "learning_rate": 2.440085515452867e-06, "loss": 20.5044, "step": 351990 }, { "epoch": 0.7110622704703111, "grad_norm": 634.2356567382812, "learning_rate": 2.4397856756471435e-06, "loss": 36.8186, "step": 352000 }, { "epoch": 0.7110824711029949, "grad_norm": 967.7387084960938, "learning_rate": 2.4394858483195923e-06, "loss": 29.0341, "step": 352010 }, { "epoch": 0.7111026717356788, "grad_norm": 313.5653381347656, "learning_rate": 2.4391860334716783e-06, "loss": 12.2251, "step": 352020 }, { "epoch": 0.7111228723683626, "grad_norm": 165.4011688232422, "learning_rate": 2.438886231104861e-06, "loss": 22.8992, "step": 352030 }, { "epoch": 0.7111430730010464, "grad_norm": 460.9947814941406, "learning_rate": 2.4385864412206e-06, "loss": 26.6358, "step": 352040 }, { "epoch": 0.7111632736337302, "grad_norm": 92.2685317993164, "learning_rate": 2.4382866638203578e-06, "loss": 13.4465, "step": 352050 }, { "epoch": 0.711183474266414, "grad_norm": 89.22782135009766, "learning_rate": 2.4379868989055976e-06, "loss": 17.6479, "step": 352060 }, { "epoch": 0.7112036748990979, "grad_norm": 95.64362335205078, "learning_rate": 2.4376871464777792e-06, "loss": 10.2265, "step": 352070 }, { "epoch": 0.7112238755317817, "grad_norm": 234.10340881347656, "learning_rate": 2.43738740653836e-06, "loss": 17.5689, "step": 352080 }, { "epoch": 0.7112440761644655, "grad_norm": 84.29734802246094, "learning_rate": 2.437087679088806e-06, "loss": 12.9642, "step": 352090 }, { "epoch": 0.7112642767971493, "grad_norm": 189.70095825195312, "learning_rate": 2.4367879641305757e-06, "loss": 21.265, "step": 352100 }, { "epoch": 0.7112844774298331, "grad_norm": 516.434326171875, "learning_rate": 2.4364882616651288e-06, "loss": 14.5156, "step": 352110 }, { "epoch": 0.711304678062517, "grad_norm": 394.54852294921875, "learning_rate": 2.436188571693928e-06, "loss": 18.9324, "step": 352120 }, { "epoch": 0.7113248786952008, "grad_norm": 157.44735717773438, "learning_rate": 2.4358888942184324e-06, "loss": 17.6764, "step": 352130 }, { "epoch": 0.7113450793278846, "grad_norm": 46.21310806274414, "learning_rate": 2.4355892292401044e-06, "loss": 21.4303, "step": 352140 }, { "epoch": 0.7113652799605684, "grad_norm": 199.23959350585938, "learning_rate": 2.4352895767604036e-06, "loss": 16.9316, "step": 352150 }, { "epoch": 0.7113854805932521, "grad_norm": 443.296142578125, "learning_rate": 2.4349899367807885e-06, "loss": 16.1591, "step": 352160 }, { "epoch": 0.711405681225936, "grad_norm": 1023.393310546875, "learning_rate": 2.4346903093027237e-06, "loss": 22.3192, "step": 352170 }, { "epoch": 0.7114258818586198, "grad_norm": 289.38592529296875, "learning_rate": 2.434390694327666e-06, "loss": 21.6989, "step": 352180 }, { "epoch": 0.7114460824913036, "grad_norm": 344.63568115234375, "learning_rate": 2.434091091857076e-06, "loss": 21.0206, "step": 352190 }, { "epoch": 0.7114662831239874, "grad_norm": 132.6818084716797, "learning_rate": 2.4337915018924147e-06, "loss": 13.4311, "step": 352200 }, { "epoch": 0.7114864837566712, "grad_norm": 500.07904052734375, "learning_rate": 2.433491924435144e-06, "loss": 15.8135, "step": 352210 }, { "epoch": 0.711506684389355, "grad_norm": 16.9533634185791, "learning_rate": 2.433192359486723e-06, "loss": 16.036, "step": 352220 }, { "epoch": 0.7115268850220389, "grad_norm": 554.3599243164062, "learning_rate": 2.4328928070486086e-06, "loss": 18.9378, "step": 352230 }, { "epoch": 0.7115470856547227, "grad_norm": 179.26231384277344, "learning_rate": 2.432593267122265e-06, "loss": 12.1016, "step": 352240 }, { "epoch": 0.7115672862874065, "grad_norm": 92.05696105957031, "learning_rate": 2.432293739709151e-06, "loss": 16.263, "step": 352250 }, { "epoch": 0.7115874869200903, "grad_norm": 422.00677490234375, "learning_rate": 2.4319942248107236e-06, "loss": 40.672, "step": 352260 }, { "epoch": 0.7116076875527741, "grad_norm": 447.24139404296875, "learning_rate": 2.4316947224284454e-06, "loss": 13.0432, "step": 352270 }, { "epoch": 0.711627888185458, "grad_norm": 416.1942138671875, "learning_rate": 2.431395232563779e-06, "loss": 11.9405, "step": 352280 }, { "epoch": 0.7116480888181418, "grad_norm": 649.938232421875, "learning_rate": 2.431095755218177e-06, "loss": 11.7699, "step": 352290 }, { "epoch": 0.7116682894508256, "grad_norm": 157.4993896484375, "learning_rate": 2.4307962903931025e-06, "loss": 9.6227, "step": 352300 }, { "epoch": 0.7116884900835094, "grad_norm": 280.8648376464844, "learning_rate": 2.430496838090017e-06, "loss": 28.5089, "step": 352310 }, { "epoch": 0.7117086907161932, "grad_norm": 405.6296691894531, "learning_rate": 2.4301973983103793e-06, "loss": 11.0124, "step": 352320 }, { "epoch": 0.7117288913488771, "grad_norm": 180.7757110595703, "learning_rate": 2.429897971055645e-06, "loss": 18.2415, "step": 352330 }, { "epoch": 0.7117490919815609, "grad_norm": 500.71588134765625, "learning_rate": 2.4295985563272785e-06, "loss": 18.0885, "step": 352340 }, { "epoch": 0.7117692926142447, "grad_norm": 383.4118347167969, "learning_rate": 2.4292991541267368e-06, "loss": 19.4345, "step": 352350 }, { "epoch": 0.7117894932469285, "grad_norm": 593.0022583007812, "learning_rate": 2.4289997644554775e-06, "loss": 46.2417, "step": 352360 }, { "epoch": 0.7118096938796123, "grad_norm": 434.1874694824219, "learning_rate": 2.428700387314961e-06, "loss": 13.0681, "step": 352370 }, { "epoch": 0.7118298945122962, "grad_norm": 185.2329864501953, "learning_rate": 2.4284010227066495e-06, "loss": 19.5999, "step": 352380 }, { "epoch": 0.71185009514498, "grad_norm": 548.0687255859375, "learning_rate": 2.4281016706319992e-06, "loss": 20.2285, "step": 352390 }, { "epoch": 0.7118702957776638, "grad_norm": 463.87506103515625, "learning_rate": 2.4278023310924676e-06, "loss": 13.8517, "step": 352400 }, { "epoch": 0.7118904964103475, "grad_norm": 875.9000244140625, "learning_rate": 2.4275030040895178e-06, "loss": 9.5436, "step": 352410 }, { "epoch": 0.7119106970430313, "grad_norm": 699.1371459960938, "learning_rate": 2.4272036896246054e-06, "loss": 29.8913, "step": 352420 }, { "epoch": 0.7119308976757152, "grad_norm": 446.48834228515625, "learning_rate": 2.4269043876991888e-06, "loss": 18.5582, "step": 352430 }, { "epoch": 0.711951098308399, "grad_norm": 636.35400390625, "learning_rate": 2.4266050983147298e-06, "loss": 22.3689, "step": 352440 }, { "epoch": 0.7119712989410828, "grad_norm": 577.2525024414062, "learning_rate": 2.4263058214726844e-06, "loss": 12.0827, "step": 352450 }, { "epoch": 0.7119914995737666, "grad_norm": 456.84130859375, "learning_rate": 2.426006557174513e-06, "loss": 17.5838, "step": 352460 }, { "epoch": 0.7120117002064504, "grad_norm": 158.64451599121094, "learning_rate": 2.425707305421674e-06, "loss": 17.0695, "step": 352470 }, { "epoch": 0.7120319008391343, "grad_norm": 328.5570373535156, "learning_rate": 2.425408066215623e-06, "loss": 16.1986, "step": 352480 }, { "epoch": 0.7120521014718181, "grad_norm": 564.2173461914062, "learning_rate": 2.4251088395578214e-06, "loss": 27.737, "step": 352490 }, { "epoch": 0.7120723021045019, "grad_norm": 866.6751708984375, "learning_rate": 2.424809625449729e-06, "loss": 29.3532, "step": 352500 }, { "epoch": 0.7120925027371857, "grad_norm": 114.92594909667969, "learning_rate": 2.424510423892802e-06, "loss": 15.4919, "step": 352510 }, { "epoch": 0.7121127033698695, "grad_norm": 201.77572631835938, "learning_rate": 2.424211234888497e-06, "loss": 12.8224, "step": 352520 }, { "epoch": 0.7121329040025534, "grad_norm": 789.9775390625, "learning_rate": 2.4239120584382757e-06, "loss": 20.3586, "step": 352530 }, { "epoch": 0.7121531046352372, "grad_norm": 274.7069396972656, "learning_rate": 2.4236128945435944e-06, "loss": 17.7998, "step": 352540 }, { "epoch": 0.712173305267921, "grad_norm": 642.5575561523438, "learning_rate": 2.42331374320591e-06, "loss": 24.558, "step": 352550 }, { "epoch": 0.7121935059006048, "grad_norm": 1134.72314453125, "learning_rate": 2.423014604426682e-06, "loss": 25.1194, "step": 352560 }, { "epoch": 0.7122137065332886, "grad_norm": 169.52781677246094, "learning_rate": 2.4227154782073716e-06, "loss": 16.6606, "step": 352570 }, { "epoch": 0.7122339071659725, "grad_norm": 86.97180938720703, "learning_rate": 2.422416364549429e-06, "loss": 14.4027, "step": 352580 }, { "epoch": 0.7122541077986563, "grad_norm": 102.85459899902344, "learning_rate": 2.4221172634543177e-06, "loss": 12.2192, "step": 352590 }, { "epoch": 0.7122743084313401, "grad_norm": 264.3111267089844, "learning_rate": 2.4218181749234954e-06, "loss": 20.4156, "step": 352600 }, { "epoch": 0.7122945090640239, "grad_norm": 495.5534362792969, "learning_rate": 2.4215190989584187e-06, "loss": 10.5141, "step": 352610 }, { "epoch": 0.7123147096967077, "grad_norm": 79.8003921508789, "learning_rate": 2.4212200355605433e-06, "loss": 33.404, "step": 352620 }, { "epoch": 0.7123349103293916, "grad_norm": 461.2104187011719, "learning_rate": 2.4209209847313302e-06, "loss": 10.4754, "step": 352630 }, { "epoch": 0.7123551109620754, "grad_norm": 338.4543151855469, "learning_rate": 2.4206219464722356e-06, "loss": 15.9289, "step": 352640 }, { "epoch": 0.7123753115947592, "grad_norm": 372.7828674316406, "learning_rate": 2.4203229207847155e-06, "loss": 12.2598, "step": 352650 }, { "epoch": 0.712395512227443, "grad_norm": 850.4609985351562, "learning_rate": 2.420023907670228e-06, "loss": 31.5782, "step": 352660 }, { "epoch": 0.7124157128601267, "grad_norm": 354.53204345703125, "learning_rate": 2.419724907130233e-06, "loss": 18.0651, "step": 352670 }, { "epoch": 0.7124359134928105, "grad_norm": 261.14532470703125, "learning_rate": 2.4194259191661864e-06, "loss": 22.9313, "step": 352680 }, { "epoch": 0.7124561141254944, "grad_norm": 581.628662109375, "learning_rate": 2.419126943779543e-06, "loss": 26.5322, "step": 352690 }, { "epoch": 0.7124763147581782, "grad_norm": 470.56378173828125, "learning_rate": 2.418827980971763e-06, "loss": 29.114, "step": 352700 }, { "epoch": 0.712496515390862, "grad_norm": 173.2490997314453, "learning_rate": 2.4185290307443025e-06, "loss": 20.4095, "step": 352710 }, { "epoch": 0.7125167160235458, "grad_norm": 597.8363647460938, "learning_rate": 2.418230093098617e-06, "loss": 20.5333, "step": 352720 }, { "epoch": 0.7125369166562296, "grad_norm": 731.7879638671875, "learning_rate": 2.417931168036166e-06, "loss": 23.9049, "step": 352730 }, { "epoch": 0.7125571172889135, "grad_norm": 3.5769662857055664, "learning_rate": 2.417632255558404e-06, "loss": 11.3522, "step": 352740 }, { "epoch": 0.7125773179215973, "grad_norm": 54.508094787597656, "learning_rate": 2.4173333556667912e-06, "loss": 26.4764, "step": 352750 }, { "epoch": 0.7125975185542811, "grad_norm": 694.1456298828125, "learning_rate": 2.417034468362782e-06, "loss": 22.3851, "step": 352760 }, { "epoch": 0.7126177191869649, "grad_norm": 208.19479370117188, "learning_rate": 2.416735593647832e-06, "loss": 19.3907, "step": 352770 }, { "epoch": 0.7126379198196487, "grad_norm": 163.6040802001953, "learning_rate": 2.416436731523401e-06, "loss": 13.7756, "step": 352780 }, { "epoch": 0.7126581204523326, "grad_norm": 699.3252563476562, "learning_rate": 2.4161378819909444e-06, "loss": 20.9277, "step": 352790 }, { "epoch": 0.7126783210850164, "grad_norm": 131.13226318359375, "learning_rate": 2.415839045051916e-06, "loss": 19.9862, "step": 352800 }, { "epoch": 0.7126985217177002, "grad_norm": 203.00201416015625, "learning_rate": 2.415540220707775e-06, "loss": 19.0849, "step": 352810 }, { "epoch": 0.712718722350384, "grad_norm": 190.422119140625, "learning_rate": 2.4152414089599798e-06, "loss": 15.3859, "step": 352820 }, { "epoch": 0.7127389229830678, "grad_norm": 147.30853271484375, "learning_rate": 2.4149426098099836e-06, "loss": 14.347, "step": 352830 }, { "epoch": 0.7127591236157517, "grad_norm": 149.81625366210938, "learning_rate": 2.4146438232592425e-06, "loss": 12.5917, "step": 352840 }, { "epoch": 0.7127793242484355, "grad_norm": 273.17840576171875, "learning_rate": 2.4143450493092146e-06, "loss": 9.3379, "step": 352850 }, { "epoch": 0.7127995248811193, "grad_norm": 340.0224304199219, "learning_rate": 2.414046287961356e-06, "loss": 22.2239, "step": 352860 }, { "epoch": 0.7128197255138031, "grad_norm": 0.5263413190841675, "learning_rate": 2.4137475392171204e-06, "loss": 14.0726, "step": 352870 }, { "epoch": 0.712839926146487, "grad_norm": 246.65101623535156, "learning_rate": 2.4134488030779657e-06, "loss": 12.8788, "step": 352880 }, { "epoch": 0.7128601267791708, "grad_norm": 982.08251953125, "learning_rate": 2.4131500795453515e-06, "loss": 26.5317, "step": 352890 }, { "epoch": 0.7128803274118546, "grad_norm": 252.6681365966797, "learning_rate": 2.412851368620726e-06, "loss": 23.6088, "step": 352900 }, { "epoch": 0.7129005280445384, "grad_norm": 219.7015838623047, "learning_rate": 2.41255267030555e-06, "loss": 20.815, "step": 352910 }, { "epoch": 0.7129207286772221, "grad_norm": 462.3797607421875, "learning_rate": 2.412253984601279e-06, "loss": 29.5089, "step": 352920 }, { "epoch": 0.7129409293099059, "grad_norm": 352.5789489746094, "learning_rate": 2.411955311509369e-06, "loss": 34.6314, "step": 352930 }, { "epoch": 0.7129611299425898, "grad_norm": 0.0, "learning_rate": 2.4116566510312734e-06, "loss": 22.799, "step": 352940 }, { "epoch": 0.7129813305752736, "grad_norm": 267.8687744140625, "learning_rate": 2.4113580031684487e-06, "loss": 21.5248, "step": 352950 }, { "epoch": 0.7130015312079574, "grad_norm": 264.94580078125, "learning_rate": 2.4110593679223547e-06, "loss": 20.9346, "step": 352960 }, { "epoch": 0.7130217318406412, "grad_norm": 306.8415222167969, "learning_rate": 2.41076074529444e-06, "loss": 17.5703, "step": 352970 }, { "epoch": 0.713041932473325, "grad_norm": 553.2825927734375, "learning_rate": 2.4104621352861633e-06, "loss": 19.9164, "step": 352980 }, { "epoch": 0.7130621331060089, "grad_norm": 238.46238708496094, "learning_rate": 2.4101635378989823e-06, "loss": 18.6934, "step": 352990 }, { "epoch": 0.7130823337386927, "grad_norm": 355.835205078125, "learning_rate": 2.40986495313435e-06, "loss": 20.9812, "step": 353000 }, { "epoch": 0.7131025343713765, "grad_norm": 296.1120300292969, "learning_rate": 2.4095663809937198e-06, "loss": 17.2678, "step": 353010 }, { "epoch": 0.7131227350040603, "grad_norm": 171.39669799804688, "learning_rate": 2.4092678214785508e-06, "loss": 7.5386, "step": 353020 }, { "epoch": 0.7131429356367441, "grad_norm": 459.44488525390625, "learning_rate": 2.408969274590296e-06, "loss": 17.5729, "step": 353030 }, { "epoch": 0.713163136269428, "grad_norm": 435.53857421875, "learning_rate": 2.408670740330409e-06, "loss": 14.0548, "step": 353040 }, { "epoch": 0.7131833369021118, "grad_norm": 866.0286254882812, "learning_rate": 2.4083722187003483e-06, "loss": 26.1176, "step": 353050 }, { "epoch": 0.7132035375347956, "grad_norm": 369.73272705078125, "learning_rate": 2.408073709701565e-06, "loss": 30.3595, "step": 353060 }, { "epoch": 0.7132237381674794, "grad_norm": 481.9102478027344, "learning_rate": 2.407775213335518e-06, "loss": 13.01, "step": 353070 }, { "epoch": 0.7132439388001632, "grad_norm": 276.68603515625, "learning_rate": 2.407476729603661e-06, "loss": 19.3233, "step": 353080 }, { "epoch": 0.713264139432847, "grad_norm": 318.9827575683594, "learning_rate": 2.4071782585074453e-06, "loss": 18.0887, "step": 353090 }, { "epoch": 0.7132843400655309, "grad_norm": 309.0885009765625, "learning_rate": 2.4068798000483306e-06, "loss": 28.781, "step": 353100 }, { "epoch": 0.7133045406982147, "grad_norm": 500.3667907714844, "learning_rate": 2.406581354227767e-06, "loss": 30.4355, "step": 353110 }, { "epoch": 0.7133247413308985, "grad_norm": 502.664306640625, "learning_rate": 2.406282921047213e-06, "loss": 22.4138, "step": 353120 }, { "epoch": 0.7133449419635823, "grad_norm": 402.8708190917969, "learning_rate": 2.40598450050812e-06, "loss": 19.5594, "step": 353130 }, { "epoch": 0.7133651425962662, "grad_norm": 581.7855834960938, "learning_rate": 2.405686092611946e-06, "loss": 10.1923, "step": 353140 }, { "epoch": 0.71338534322895, "grad_norm": 378.47686767578125, "learning_rate": 2.405387697360143e-06, "loss": 25.2502, "step": 353150 }, { "epoch": 0.7134055438616338, "grad_norm": 0.0, "learning_rate": 2.4050893147541643e-06, "loss": 15.7241, "step": 353160 }, { "epoch": 0.7134257444943176, "grad_norm": 485.62335205078125, "learning_rate": 2.4047909447954647e-06, "loss": 13.8162, "step": 353170 }, { "epoch": 0.7134459451270013, "grad_norm": 249.17730712890625, "learning_rate": 2.4044925874855035e-06, "loss": 23.7092, "step": 353180 }, { "epoch": 0.7134661457596851, "grad_norm": 52.380401611328125, "learning_rate": 2.404194242825727e-06, "loss": 8.8809, "step": 353190 }, { "epoch": 0.713486346392369, "grad_norm": 129.6929931640625, "learning_rate": 2.403895910817593e-06, "loss": 19.2553, "step": 353200 }, { "epoch": 0.7135065470250528, "grad_norm": 1098.8128662109375, "learning_rate": 2.403597591462557e-06, "loss": 16.2623, "step": 353210 }, { "epoch": 0.7135267476577366, "grad_norm": 145.89706420898438, "learning_rate": 2.403299284762071e-06, "loss": 12.9106, "step": 353220 }, { "epoch": 0.7135469482904204, "grad_norm": 347.21124267578125, "learning_rate": 2.403000990717588e-06, "loss": 12.9401, "step": 353230 }, { "epoch": 0.7135671489231042, "grad_norm": 327.05682373046875, "learning_rate": 2.4027027093305655e-06, "loss": 15.7113, "step": 353240 }, { "epoch": 0.7135873495557881, "grad_norm": 391.7794494628906, "learning_rate": 2.402404440602455e-06, "loss": 12.8125, "step": 353250 }, { "epoch": 0.7136075501884719, "grad_norm": 266.3108215332031, "learning_rate": 2.4021061845347076e-06, "loss": 28.6932, "step": 353260 }, { "epoch": 0.7136277508211557, "grad_norm": 986.6097412109375, "learning_rate": 2.40180794112878e-06, "loss": 28.6111, "step": 353270 }, { "epoch": 0.7136479514538395, "grad_norm": 476.0670471191406, "learning_rate": 2.401509710386127e-06, "loss": 12.6359, "step": 353280 }, { "epoch": 0.7136681520865233, "grad_norm": 116.15657043457031, "learning_rate": 2.4012114923082007e-06, "loss": 13.8278, "step": 353290 }, { "epoch": 0.7136883527192072, "grad_norm": 93.48078155517578, "learning_rate": 2.4009132868964525e-06, "loss": 13.8956, "step": 353300 }, { "epoch": 0.713708553351891, "grad_norm": 472.78204345703125, "learning_rate": 2.400615094152339e-06, "loss": 15.9945, "step": 353310 }, { "epoch": 0.7137287539845748, "grad_norm": 329.33526611328125, "learning_rate": 2.4003169140773132e-06, "loss": 23.1479, "step": 353320 }, { "epoch": 0.7137489546172586, "grad_norm": 490.73138427734375, "learning_rate": 2.4000187466728253e-06, "loss": 26.9908, "step": 353330 }, { "epoch": 0.7137691552499424, "grad_norm": 427.5204772949219, "learning_rate": 2.3997205919403323e-06, "loss": 11.6373, "step": 353340 }, { "epoch": 0.7137893558826263, "grad_norm": 289.8179016113281, "learning_rate": 2.399422449881284e-06, "loss": 12.7979, "step": 353350 }, { "epoch": 0.7138095565153101, "grad_norm": 429.68603515625, "learning_rate": 2.399124320497137e-06, "loss": 26.3519, "step": 353360 }, { "epoch": 0.7138297571479939, "grad_norm": 353.4993591308594, "learning_rate": 2.398826203789343e-06, "loss": 20.2615, "step": 353370 }, { "epoch": 0.7138499577806777, "grad_norm": 53.866371154785156, "learning_rate": 2.3985280997593523e-06, "loss": 10.5225, "step": 353380 }, { "epoch": 0.7138701584133615, "grad_norm": 235.62281799316406, "learning_rate": 2.3982300084086224e-06, "loss": 15.1585, "step": 353390 }, { "epoch": 0.7138903590460454, "grad_norm": 629.7739868164062, "learning_rate": 2.3979319297386035e-06, "loss": 23.6567, "step": 353400 }, { "epoch": 0.7139105596787292, "grad_norm": 106.13060760498047, "learning_rate": 2.397633863750747e-06, "loss": 22.0218, "step": 353410 }, { "epoch": 0.713930760311413, "grad_norm": 728.9324951171875, "learning_rate": 2.397335810446508e-06, "loss": 17.6712, "step": 353420 }, { "epoch": 0.7139509609440968, "grad_norm": 259.8564758300781, "learning_rate": 2.3970377698273396e-06, "loss": 49.6211, "step": 353430 }, { "epoch": 0.7139711615767805, "grad_norm": 410.4757995605469, "learning_rate": 2.3967397418946937e-06, "loss": 22.1843, "step": 353440 }, { "epoch": 0.7139913622094644, "grad_norm": 547.3377685546875, "learning_rate": 2.396441726650021e-06, "loss": 9.9852, "step": 353450 }, { "epoch": 0.7140115628421482, "grad_norm": 138.32223510742188, "learning_rate": 2.396143724094777e-06, "loss": 11.1795, "step": 353460 }, { "epoch": 0.714031763474832, "grad_norm": 659.0087280273438, "learning_rate": 2.395845734230413e-06, "loss": 10.0586, "step": 353470 }, { "epoch": 0.7140519641075158, "grad_norm": 173.6551513671875, "learning_rate": 2.395547757058379e-06, "loss": 20.8068, "step": 353480 }, { "epoch": 0.7140721647401996, "grad_norm": 369.30169677734375, "learning_rate": 2.395249792580129e-06, "loss": 15.9972, "step": 353490 }, { "epoch": 0.7140923653728835, "grad_norm": 230.31594848632812, "learning_rate": 2.39495184079712e-06, "loss": 22.5841, "step": 353500 }, { "epoch": 0.7141125660055673, "grad_norm": 160.00787353515625, "learning_rate": 2.3946539017107963e-06, "loss": 65.5788, "step": 353510 }, { "epoch": 0.7141327666382511, "grad_norm": 3.456495523452759, "learning_rate": 2.3943559753226124e-06, "loss": 25.4125, "step": 353520 }, { "epoch": 0.7141529672709349, "grad_norm": 109.46832275390625, "learning_rate": 2.3940580616340244e-06, "loss": 24.6163, "step": 353530 }, { "epoch": 0.7141731679036187, "grad_norm": 741.8111572265625, "learning_rate": 2.3937601606464807e-06, "loss": 23.2918, "step": 353540 }, { "epoch": 0.7141933685363026, "grad_norm": 190.29891967773438, "learning_rate": 2.393462272361432e-06, "loss": 17.0141, "step": 353550 }, { "epoch": 0.7142135691689864, "grad_norm": 151.7148895263672, "learning_rate": 2.393164396780332e-06, "loss": 19.2695, "step": 353560 }, { "epoch": 0.7142337698016702, "grad_norm": 563.07373046875, "learning_rate": 2.3928665339046363e-06, "loss": 10.5755, "step": 353570 }, { "epoch": 0.714253970434354, "grad_norm": 546.0304565429688, "learning_rate": 2.3925686837357898e-06, "loss": 10.5596, "step": 353580 }, { "epoch": 0.7142741710670378, "grad_norm": 44.742496490478516, "learning_rate": 2.3922708462752466e-06, "loss": 13.7291, "step": 353590 }, { "epoch": 0.7142943716997217, "grad_norm": 217.78912353515625, "learning_rate": 2.391973021524461e-06, "loss": 13.751, "step": 353600 }, { "epoch": 0.7143145723324055, "grad_norm": 448.3959655761719, "learning_rate": 2.391675209484883e-06, "loss": 17.5057, "step": 353610 }, { "epoch": 0.7143347729650893, "grad_norm": 6.279942989349365, "learning_rate": 2.391377410157961e-06, "loss": 18.6252, "step": 353620 }, { "epoch": 0.7143549735977731, "grad_norm": 85.3839340209961, "learning_rate": 2.391079623545152e-06, "loss": 12.992, "step": 353630 }, { "epoch": 0.7143751742304569, "grad_norm": 436.60076904296875, "learning_rate": 2.390781849647904e-06, "loss": 19.5454, "step": 353640 }, { "epoch": 0.7143953748631408, "grad_norm": 149.72430419921875, "learning_rate": 2.3904840884676665e-06, "loss": 16.6928, "step": 353650 }, { "epoch": 0.7144155754958246, "grad_norm": 946.8202514648438, "learning_rate": 2.3901863400058954e-06, "loss": 13.3227, "step": 353660 }, { "epoch": 0.7144357761285084, "grad_norm": 112.90775299072266, "learning_rate": 2.389888604264038e-06, "loss": 10.5937, "step": 353670 }, { "epoch": 0.7144559767611922, "grad_norm": 284.0372314453125, "learning_rate": 2.389590881243548e-06, "loss": 12.4341, "step": 353680 }, { "epoch": 0.7144761773938759, "grad_norm": 425.15069580078125, "learning_rate": 2.389293170945876e-06, "loss": 24.0845, "step": 353690 }, { "epoch": 0.7144963780265597, "grad_norm": 538.189453125, "learning_rate": 2.3889954733724708e-06, "loss": 22.6088, "step": 353700 }, { "epoch": 0.7145165786592436, "grad_norm": 515.6146240234375, "learning_rate": 2.3886977885247866e-06, "loss": 31.1808, "step": 353710 }, { "epoch": 0.7145367792919274, "grad_norm": 600.225341796875, "learning_rate": 2.388400116404271e-06, "loss": 20.5745, "step": 353720 }, { "epoch": 0.7145569799246112, "grad_norm": 92.6533432006836, "learning_rate": 2.3881024570123777e-06, "loss": 18.8627, "step": 353730 }, { "epoch": 0.714577180557295, "grad_norm": 390.71588134765625, "learning_rate": 2.387804810350555e-06, "loss": 14.4567, "step": 353740 }, { "epoch": 0.7145973811899788, "grad_norm": 395.343017578125, "learning_rate": 2.387507176420256e-06, "loss": 19.7061, "step": 353750 }, { "epoch": 0.7146175818226627, "grad_norm": 193.63609313964844, "learning_rate": 2.387209555222931e-06, "loss": 33.2489, "step": 353760 }, { "epoch": 0.7146377824553465, "grad_norm": 339.81512451171875, "learning_rate": 2.3869119467600273e-06, "loss": 23.1288, "step": 353770 }, { "epoch": 0.7146579830880303, "grad_norm": 276.13140869140625, "learning_rate": 2.3866143510329998e-06, "loss": 10.3668, "step": 353780 }, { "epoch": 0.7146781837207141, "grad_norm": 676.8049926757812, "learning_rate": 2.3863167680432975e-06, "loss": 22.302, "step": 353790 }, { "epoch": 0.7146983843533979, "grad_norm": 263.4266052246094, "learning_rate": 2.3860191977923673e-06, "loss": 18.8042, "step": 353800 }, { "epoch": 0.7147185849860818, "grad_norm": 813.533203125, "learning_rate": 2.3857216402816635e-06, "loss": 18.3054, "step": 353810 }, { "epoch": 0.7147387856187656, "grad_norm": 686.9315185546875, "learning_rate": 2.385424095512637e-06, "loss": 13.7513, "step": 353820 }, { "epoch": 0.7147589862514494, "grad_norm": 291.71368408203125, "learning_rate": 2.3851265634867358e-06, "loss": 13.1515, "step": 353830 }, { "epoch": 0.7147791868841332, "grad_norm": 699.3037109375, "learning_rate": 2.3848290442054096e-06, "loss": 30.5805, "step": 353840 }, { "epoch": 0.714799387516817, "grad_norm": 155.97952270507812, "learning_rate": 2.3845315376701112e-06, "loss": 6.559, "step": 353850 }, { "epoch": 0.7148195881495009, "grad_norm": 284.4759826660156, "learning_rate": 2.384234043882288e-06, "loss": 18.174, "step": 353860 }, { "epoch": 0.7148397887821847, "grad_norm": 75.58837890625, "learning_rate": 2.38393656284339e-06, "loss": 13.5558, "step": 353870 }, { "epoch": 0.7148599894148685, "grad_norm": 394.6120300292969, "learning_rate": 2.3836390945548672e-06, "loss": 26.4915, "step": 353880 }, { "epoch": 0.7148801900475523, "grad_norm": 119.50384521484375, "learning_rate": 2.3833416390181723e-06, "loss": 8.0645, "step": 353890 }, { "epoch": 0.7149003906802361, "grad_norm": 141.528076171875, "learning_rate": 2.3830441962347528e-06, "loss": 27.5925, "step": 353900 }, { "epoch": 0.71492059131292, "grad_norm": 1280.6165771484375, "learning_rate": 2.3827467662060565e-06, "loss": 23.4032, "step": 353910 }, { "epoch": 0.7149407919456038, "grad_norm": 489.8740234375, "learning_rate": 2.382449348933537e-06, "loss": 11.2119, "step": 353920 }, { "epoch": 0.7149609925782876, "grad_norm": 0.8326438069343567, "learning_rate": 2.382151944418642e-06, "loss": 15.309, "step": 353930 }, { "epoch": 0.7149811932109714, "grad_norm": 364.7646789550781, "learning_rate": 2.381854552662819e-06, "loss": 17.5244, "step": 353940 }, { "epoch": 0.7150013938436551, "grad_norm": 180.71661376953125, "learning_rate": 2.3815571736675214e-06, "loss": 12.8768, "step": 353950 }, { "epoch": 0.715021594476339, "grad_norm": 326.84393310546875, "learning_rate": 2.381259807434194e-06, "loss": 14.069, "step": 353960 }, { "epoch": 0.7150417951090228, "grad_norm": 227.0348358154297, "learning_rate": 2.3809624539642913e-06, "loss": 34.223, "step": 353970 }, { "epoch": 0.7150619957417066, "grad_norm": 11.7371244430542, "learning_rate": 2.3806651132592597e-06, "loss": 13.6704, "step": 353980 }, { "epoch": 0.7150821963743904, "grad_norm": 260.1331481933594, "learning_rate": 2.3803677853205465e-06, "loss": 17.0676, "step": 353990 }, { "epoch": 0.7151023970070742, "grad_norm": 7237.70263671875, "learning_rate": 2.380070470149605e-06, "loss": 23.8909, "step": 354000 }, { "epoch": 0.715122597639758, "grad_norm": 551.85302734375, "learning_rate": 2.3797731677478808e-06, "loss": 29.154, "step": 354010 }, { "epoch": 0.7151427982724419, "grad_norm": 845.3187255859375, "learning_rate": 2.379475878116826e-06, "loss": 18.3291, "step": 354020 }, { "epoch": 0.7151629989051257, "grad_norm": 265.6539611816406, "learning_rate": 2.379178601257886e-06, "loss": 18.141, "step": 354030 }, { "epoch": 0.7151831995378095, "grad_norm": 149.31614685058594, "learning_rate": 2.3788813371725133e-06, "loss": 14.8972, "step": 354040 }, { "epoch": 0.7152034001704933, "grad_norm": 407.98004150390625, "learning_rate": 2.3785840858621556e-06, "loss": 17.6841, "step": 354050 }, { "epoch": 0.7152236008031772, "grad_norm": 3.9735329151153564, "learning_rate": 2.3782868473282587e-06, "loss": 26.4006, "step": 354060 }, { "epoch": 0.715243801435861, "grad_norm": 471.94683837890625, "learning_rate": 2.3779896215722765e-06, "loss": 15.9451, "step": 354070 }, { "epoch": 0.7152640020685448, "grad_norm": 226.0657196044922, "learning_rate": 2.3776924085956536e-06, "loss": 17.6065, "step": 354080 }, { "epoch": 0.7152842027012286, "grad_norm": 488.97723388671875, "learning_rate": 2.3773952083998392e-06, "loss": 20.6488, "step": 354090 }, { "epoch": 0.7153044033339124, "grad_norm": 128.92112731933594, "learning_rate": 2.3770980209862814e-06, "loss": 25.5519, "step": 354100 }, { "epoch": 0.7153246039665963, "grad_norm": 314.02978515625, "learning_rate": 2.376800846356434e-06, "loss": 20.6244, "step": 354110 }, { "epoch": 0.7153448045992801, "grad_norm": 359.4573059082031, "learning_rate": 2.3765036845117373e-06, "loss": 25.4475, "step": 354120 }, { "epoch": 0.7153650052319639, "grad_norm": 79.7017593383789, "learning_rate": 2.3762065354536436e-06, "loss": 6.9899, "step": 354130 }, { "epoch": 0.7153852058646477, "grad_norm": 251.77645874023438, "learning_rate": 2.375909399183603e-06, "loss": 23.4261, "step": 354140 }, { "epoch": 0.7154054064973315, "grad_norm": 495.43450927734375, "learning_rate": 2.3756122757030614e-06, "loss": 16.1308, "step": 354150 }, { "epoch": 0.7154256071300154, "grad_norm": 0.0, "learning_rate": 2.3753151650134655e-06, "loss": 11.1015, "step": 354160 }, { "epoch": 0.7154458077626992, "grad_norm": 213.82225036621094, "learning_rate": 2.3750180671162656e-06, "loss": 9.9914, "step": 354170 }, { "epoch": 0.715466008395383, "grad_norm": 1957.6202392578125, "learning_rate": 2.3747209820129117e-06, "loss": 29.8445, "step": 354180 }, { "epoch": 0.7154862090280668, "grad_norm": 1360.60400390625, "learning_rate": 2.3744239097048465e-06, "loss": 28.059, "step": 354190 }, { "epoch": 0.7155064096607505, "grad_norm": 716.8304443359375, "learning_rate": 2.3741268501935212e-06, "loss": 21.6099, "step": 354200 }, { "epoch": 0.7155266102934343, "grad_norm": 475.5372009277344, "learning_rate": 2.373829803480384e-06, "loss": 10.761, "step": 354210 }, { "epoch": 0.7155468109261182, "grad_norm": 5.335785865783691, "learning_rate": 2.3735327695668823e-06, "loss": 15.5514, "step": 354220 }, { "epoch": 0.715567011558802, "grad_norm": 306.8415832519531, "learning_rate": 2.3732357484544616e-06, "loss": 13.4526, "step": 354230 }, { "epoch": 0.7155872121914858, "grad_norm": 357.27764892578125, "learning_rate": 2.372938740144573e-06, "loss": 19.6147, "step": 354240 }, { "epoch": 0.7156074128241696, "grad_norm": 218.32611083984375, "learning_rate": 2.372641744638662e-06, "loss": 15.3973, "step": 354250 }, { "epoch": 0.7156276134568534, "grad_norm": 593.254638671875, "learning_rate": 2.3723447619381756e-06, "loss": 22.3607, "step": 354260 }, { "epoch": 0.7156478140895373, "grad_norm": 415.7333068847656, "learning_rate": 2.3720477920445633e-06, "loss": 11.2397, "step": 354270 }, { "epoch": 0.7156680147222211, "grad_norm": 501.7664794921875, "learning_rate": 2.3717508349592695e-06, "loss": 22.7659, "step": 354280 }, { "epoch": 0.7156882153549049, "grad_norm": 339.79876708984375, "learning_rate": 2.3714538906837452e-06, "loss": 25.0175, "step": 354290 }, { "epoch": 0.7157084159875887, "grad_norm": 362.44024658203125, "learning_rate": 2.3711569592194363e-06, "loss": 25.5803, "step": 354300 }, { "epoch": 0.7157286166202725, "grad_norm": 261.34521484375, "learning_rate": 2.370860040567787e-06, "loss": 14.9646, "step": 354310 }, { "epoch": 0.7157488172529564, "grad_norm": 75.80406188964844, "learning_rate": 2.3705631347302492e-06, "loss": 13.9618, "step": 354320 }, { "epoch": 0.7157690178856402, "grad_norm": 484.663818359375, "learning_rate": 2.3702662417082655e-06, "loss": 12.7879, "step": 354330 }, { "epoch": 0.715789218518324, "grad_norm": 824.4431762695312, "learning_rate": 2.369969361503288e-06, "loss": 13.1514, "step": 354340 }, { "epoch": 0.7158094191510078, "grad_norm": 326.220947265625, "learning_rate": 2.3696724941167583e-06, "loss": 13.5248, "step": 354350 }, { "epoch": 0.7158296197836916, "grad_norm": 577.2223510742188, "learning_rate": 2.369375639550127e-06, "loss": 15.4694, "step": 354360 }, { "epoch": 0.7158498204163755, "grad_norm": 0.0, "learning_rate": 2.369078797804841e-06, "loss": 31.32, "step": 354370 }, { "epoch": 0.7158700210490593, "grad_norm": 318.8240966796875, "learning_rate": 2.368781968882343e-06, "loss": 25.9194, "step": 354380 }, { "epoch": 0.7158902216817431, "grad_norm": 249.54876708984375, "learning_rate": 2.368485152784086e-06, "loss": 18.2879, "step": 354390 }, { "epoch": 0.7159104223144269, "grad_norm": 357.135498046875, "learning_rate": 2.3681883495115114e-06, "loss": 17.8518, "step": 354400 }, { "epoch": 0.7159306229471107, "grad_norm": 523.0523071289062, "learning_rate": 2.3678915590660667e-06, "loss": 12.5804, "step": 354410 }, { "epoch": 0.7159508235797946, "grad_norm": 399.6500244140625, "learning_rate": 2.367594781449199e-06, "loss": 18.5693, "step": 354420 }, { "epoch": 0.7159710242124784, "grad_norm": 665.1788940429688, "learning_rate": 2.367298016662357e-06, "loss": 7.4977, "step": 354430 }, { "epoch": 0.7159912248451622, "grad_norm": 237.6899871826172, "learning_rate": 2.3670012647069852e-06, "loss": 17.5121, "step": 354440 }, { "epoch": 0.716011425477846, "grad_norm": 349.6956787109375, "learning_rate": 2.3667045255845276e-06, "loss": 18.4566, "step": 354450 }, { "epoch": 0.7160316261105297, "grad_norm": 423.8206787109375, "learning_rate": 2.3664077992964356e-06, "loss": 11.8242, "step": 354460 }, { "epoch": 0.7160518267432135, "grad_norm": 174.9776611328125, "learning_rate": 2.3661110858441517e-06, "loss": 10.2079, "step": 354470 }, { "epoch": 0.7160720273758974, "grad_norm": 612.3877563476562, "learning_rate": 2.3658143852291214e-06, "loss": 16.587, "step": 354480 }, { "epoch": 0.7160922280085812, "grad_norm": 282.3558654785156, "learning_rate": 2.3655176974527922e-06, "loss": 16.1453, "step": 354490 }, { "epoch": 0.716112428641265, "grad_norm": 438.8551940917969, "learning_rate": 2.3652210225166122e-06, "loss": 13.2628, "step": 354500 }, { "epoch": 0.7161326292739488, "grad_norm": 310.1200256347656, "learning_rate": 2.364924360422025e-06, "loss": 20.7244, "step": 354510 }, { "epoch": 0.7161528299066326, "grad_norm": 202.9417266845703, "learning_rate": 2.3646277111704756e-06, "loss": 25.7473, "step": 354520 }, { "epoch": 0.7161730305393165, "grad_norm": 156.94667053222656, "learning_rate": 2.364331074763413e-06, "loss": 17.2835, "step": 354530 }, { "epoch": 0.7161932311720003, "grad_norm": 327.7087707519531, "learning_rate": 2.3640344512022807e-06, "loss": 12.6792, "step": 354540 }, { "epoch": 0.7162134318046841, "grad_norm": 524.0986328125, "learning_rate": 2.3637378404885224e-06, "loss": 15.3804, "step": 354550 }, { "epoch": 0.7162336324373679, "grad_norm": 133.65394592285156, "learning_rate": 2.3634412426235886e-06, "loss": 19.5644, "step": 354560 }, { "epoch": 0.7162538330700517, "grad_norm": 212.6671600341797, "learning_rate": 2.3631446576089205e-06, "loss": 23.5408, "step": 354570 }, { "epoch": 0.7162740337027356, "grad_norm": 436.2182922363281, "learning_rate": 2.362848085445968e-06, "loss": 17.9776, "step": 354580 }, { "epoch": 0.7162942343354194, "grad_norm": 410.3316955566406, "learning_rate": 2.362551526136173e-06, "loss": 16.9427, "step": 354590 }, { "epoch": 0.7163144349681032, "grad_norm": 126.34648895263672, "learning_rate": 2.3622549796809807e-06, "loss": 20.3769, "step": 354600 }, { "epoch": 0.716334635600787, "grad_norm": 24.165029525756836, "learning_rate": 2.3619584460818397e-06, "loss": 15.8446, "step": 354610 }, { "epoch": 0.7163548362334708, "grad_norm": 271.6394958496094, "learning_rate": 2.3616619253401913e-06, "loss": 7.4026, "step": 354620 }, { "epoch": 0.7163750368661547, "grad_norm": 118.4200668334961, "learning_rate": 2.361365417457484e-06, "loss": 14.8887, "step": 354630 }, { "epoch": 0.7163952374988385, "grad_norm": 270.2481689453125, "learning_rate": 2.36106892243516e-06, "loss": 14.6306, "step": 354640 }, { "epoch": 0.7164154381315223, "grad_norm": 660.8930053710938, "learning_rate": 2.3607724402746685e-06, "loss": 12.6815, "step": 354650 }, { "epoch": 0.7164356387642061, "grad_norm": 606.572021484375, "learning_rate": 2.3604759709774514e-06, "loss": 31.5383, "step": 354660 }, { "epoch": 0.71645583939689, "grad_norm": 577.8751220703125, "learning_rate": 2.3601795145449525e-06, "loss": 14.3754, "step": 354670 }, { "epoch": 0.7164760400295738, "grad_norm": 3.1835930347442627, "learning_rate": 2.3598830709786206e-06, "loss": 19.7489, "step": 354680 }, { "epoch": 0.7164962406622576, "grad_norm": 67.04893493652344, "learning_rate": 2.3595866402798983e-06, "loss": 15.8679, "step": 354690 }, { "epoch": 0.7165164412949414, "grad_norm": 745.938232421875, "learning_rate": 2.3592902224502284e-06, "loss": 13.4207, "step": 354700 }, { "epoch": 0.7165366419276252, "grad_norm": 268.83355712890625, "learning_rate": 2.3589938174910577e-06, "loss": 19.7198, "step": 354710 }, { "epoch": 0.7165568425603089, "grad_norm": 404.059326171875, "learning_rate": 2.3586974254038347e-06, "loss": 14.0341, "step": 354720 }, { "epoch": 0.7165770431929928, "grad_norm": 218.82638549804688, "learning_rate": 2.3584010461899966e-06, "loss": 24.7808, "step": 354730 }, { "epoch": 0.7165972438256766, "grad_norm": 342.35198974609375, "learning_rate": 2.358104679850991e-06, "loss": 12.6807, "step": 354740 }, { "epoch": 0.7166174444583604, "grad_norm": 414.22064208984375, "learning_rate": 2.357808326388265e-06, "loss": 18.3535, "step": 354750 }, { "epoch": 0.7166376450910442, "grad_norm": 83.78343963623047, "learning_rate": 2.3575119858032604e-06, "loss": 30.4336, "step": 354760 }, { "epoch": 0.716657845723728, "grad_norm": 486.3948974609375, "learning_rate": 2.3572156580974205e-06, "loss": 18.1159, "step": 354770 }, { "epoch": 0.7166780463564119, "grad_norm": 1177.810791015625, "learning_rate": 2.3569193432721904e-06, "loss": 17.2594, "step": 354780 }, { "epoch": 0.7166982469890957, "grad_norm": 466.76275634765625, "learning_rate": 2.3566230413290186e-06, "loss": 24.7981, "step": 354790 }, { "epoch": 0.7167184476217795, "grad_norm": 403.2004699707031, "learning_rate": 2.356326752269342e-06, "loss": 13.501, "step": 354800 }, { "epoch": 0.7167386482544633, "grad_norm": 106.48176574707031, "learning_rate": 2.356030476094608e-06, "loss": 24.7516, "step": 354810 }, { "epoch": 0.7167588488871471, "grad_norm": 349.6374816894531, "learning_rate": 2.355734212806263e-06, "loss": 17.2748, "step": 354820 }, { "epoch": 0.716779049519831, "grad_norm": 378.016357421875, "learning_rate": 2.3554379624057485e-06, "loss": 39.0445, "step": 354830 }, { "epoch": 0.7167992501525148, "grad_norm": 666.3162231445312, "learning_rate": 2.355141724894507e-06, "loss": 18.3326, "step": 354840 }, { "epoch": 0.7168194507851986, "grad_norm": 450.85235595703125, "learning_rate": 2.354845500273985e-06, "loss": 32.1428, "step": 354850 }, { "epoch": 0.7168396514178824, "grad_norm": 335.59295654296875, "learning_rate": 2.354549288545626e-06, "loss": 17.131, "step": 354860 }, { "epoch": 0.7168598520505662, "grad_norm": 353.0129089355469, "learning_rate": 2.354253089710871e-06, "loss": 28.5203, "step": 354870 }, { "epoch": 0.7168800526832501, "grad_norm": 123.41834259033203, "learning_rate": 2.3539569037711675e-06, "loss": 16.2803, "step": 354880 }, { "epoch": 0.7169002533159339, "grad_norm": 165.06300354003906, "learning_rate": 2.3536607307279546e-06, "loss": 5.1878, "step": 354890 }, { "epoch": 0.7169204539486177, "grad_norm": 858.8455810546875, "learning_rate": 2.353364570582681e-06, "loss": 26.3914, "step": 354900 }, { "epoch": 0.7169406545813015, "grad_norm": 135.5835418701172, "learning_rate": 2.353068423336787e-06, "loss": 11.9173, "step": 354910 }, { "epoch": 0.7169608552139853, "grad_norm": 569.444091796875, "learning_rate": 2.3527722889917147e-06, "loss": 20.383, "step": 354920 }, { "epoch": 0.7169810558466692, "grad_norm": 332.755615234375, "learning_rate": 2.352476167548911e-06, "loss": 10.1888, "step": 354930 }, { "epoch": 0.717001256479353, "grad_norm": 100.6562728881836, "learning_rate": 2.3521800590098153e-06, "loss": 12.2876, "step": 354940 }, { "epoch": 0.7170214571120368, "grad_norm": 173.51876831054688, "learning_rate": 2.351883963375875e-06, "loss": 22.0638, "step": 354950 }, { "epoch": 0.7170416577447206, "grad_norm": 368.49957275390625, "learning_rate": 2.3515878806485292e-06, "loss": 30.4389, "step": 354960 }, { "epoch": 0.7170618583774043, "grad_norm": 757.2198486328125, "learning_rate": 2.351291810829225e-06, "loss": 17.8596, "step": 354970 }, { "epoch": 0.7170820590100881, "grad_norm": 6.089599609375, "learning_rate": 2.3509957539194028e-06, "loss": 15.726, "step": 354980 }, { "epoch": 0.717102259642772, "grad_norm": 21.2546443939209, "learning_rate": 2.350699709920504e-06, "loss": 16.9506, "step": 354990 }, { "epoch": 0.7171224602754558, "grad_norm": 328.02374267578125, "learning_rate": 2.3504036788339763e-06, "loss": 17.9776, "step": 355000 }, { "epoch": 0.7171426609081396, "grad_norm": 270.3284606933594, "learning_rate": 2.3501076606612587e-06, "loss": 15.238, "step": 355010 }, { "epoch": 0.7171628615408234, "grad_norm": 303.3382263183594, "learning_rate": 2.3498116554037937e-06, "loss": 27.3261, "step": 355020 }, { "epoch": 0.7171830621735072, "grad_norm": 128.95709228515625, "learning_rate": 2.349515663063025e-06, "loss": 11.8511, "step": 355030 }, { "epoch": 0.7172032628061911, "grad_norm": 198.04559326171875, "learning_rate": 2.3492196836403974e-06, "loss": 15.5393, "step": 355040 }, { "epoch": 0.7172234634388749, "grad_norm": 123.14707946777344, "learning_rate": 2.348923717137352e-06, "loss": 15.7005, "step": 355050 }, { "epoch": 0.7172436640715587, "grad_norm": 261.7399597167969, "learning_rate": 2.3486277635553282e-06, "loss": 17.2241, "step": 355060 }, { "epoch": 0.7172638647042425, "grad_norm": 516.3812866210938, "learning_rate": 2.3483318228957734e-06, "loss": 12.9015, "step": 355070 }, { "epoch": 0.7172840653369263, "grad_norm": 69.616455078125, "learning_rate": 2.348035895160128e-06, "loss": 10.3203, "step": 355080 }, { "epoch": 0.7173042659696102, "grad_norm": 106.06146240234375, "learning_rate": 2.347739980349831e-06, "loss": 12.7461, "step": 355090 }, { "epoch": 0.717324466602294, "grad_norm": 972.2819213867188, "learning_rate": 2.3474440784663287e-06, "loss": 23.5822, "step": 355100 }, { "epoch": 0.7173446672349778, "grad_norm": 239.9462890625, "learning_rate": 2.3471481895110643e-06, "loss": 17.7041, "step": 355110 }, { "epoch": 0.7173648678676616, "grad_norm": 480.9305114746094, "learning_rate": 2.346852313485477e-06, "loss": 42.3706, "step": 355120 }, { "epoch": 0.7173850685003454, "grad_norm": 544.0484619140625, "learning_rate": 2.346556450391009e-06, "loss": 16.3635, "step": 355130 }, { "epoch": 0.7174052691330293, "grad_norm": 444.17205810546875, "learning_rate": 2.346260600229104e-06, "loss": 17.6372, "step": 355140 }, { "epoch": 0.7174254697657131, "grad_norm": 105.86084747314453, "learning_rate": 2.3459647630012026e-06, "loss": 20.5306, "step": 355150 }, { "epoch": 0.7174456703983969, "grad_norm": 58.388790130615234, "learning_rate": 2.345668938708746e-06, "loss": 17.3686, "step": 355160 }, { "epoch": 0.7174658710310807, "grad_norm": 141.6204071044922, "learning_rate": 2.345373127353179e-06, "loss": 10.0835, "step": 355170 }, { "epoch": 0.7174860716637645, "grad_norm": 544.3477172851562, "learning_rate": 2.345077328935939e-06, "loss": 24.7715, "step": 355180 }, { "epoch": 0.7175062722964484, "grad_norm": 74.02786254882812, "learning_rate": 2.344781543458472e-06, "loss": 18.9166, "step": 355190 }, { "epoch": 0.7175264729291322, "grad_norm": 59.02185821533203, "learning_rate": 2.344485770922218e-06, "loss": 14.772, "step": 355200 }, { "epoch": 0.717546673561816, "grad_norm": 837.1063842773438, "learning_rate": 2.3441900113286164e-06, "loss": 31.2862, "step": 355210 }, { "epoch": 0.7175668741944998, "grad_norm": 478.86492919921875, "learning_rate": 2.3438942646791125e-06, "loss": 22.5689, "step": 355220 }, { "epoch": 0.7175870748271835, "grad_norm": 133.06336975097656, "learning_rate": 2.3435985309751436e-06, "loss": 29.2806, "step": 355230 }, { "epoch": 0.7176072754598674, "grad_norm": 161.58766174316406, "learning_rate": 2.3433028102181553e-06, "loss": 24.9607, "step": 355240 }, { "epoch": 0.7176274760925512, "grad_norm": 725.4195556640625, "learning_rate": 2.3430071024095853e-06, "loss": 25.7237, "step": 355250 }, { "epoch": 0.717647676725235, "grad_norm": 288.8263244628906, "learning_rate": 2.3427114075508776e-06, "loss": 19.4521, "step": 355260 }, { "epoch": 0.7176678773579188, "grad_norm": 556.9649047851562, "learning_rate": 2.342415725643473e-06, "loss": 17.8697, "step": 355270 }, { "epoch": 0.7176880779906026, "grad_norm": 261.4717102050781, "learning_rate": 2.3421200566888096e-06, "loss": 10.0185, "step": 355280 }, { "epoch": 0.7177082786232865, "grad_norm": 201.7537384033203, "learning_rate": 2.341824400688333e-06, "loss": 22.7394, "step": 355290 }, { "epoch": 0.7177284792559703, "grad_norm": 531.0883178710938, "learning_rate": 2.3415287576434807e-06, "loss": 19.9155, "step": 355300 }, { "epoch": 0.7177486798886541, "grad_norm": 435.77392578125, "learning_rate": 2.3412331275556936e-06, "loss": 37.4234, "step": 355310 }, { "epoch": 0.7177688805213379, "grad_norm": 218.02395629882812, "learning_rate": 2.3409375104264144e-06, "loss": 33.8755, "step": 355320 }, { "epoch": 0.7177890811540217, "grad_norm": 130.5308837890625, "learning_rate": 2.340641906257086e-06, "loss": 16.9654, "step": 355330 }, { "epoch": 0.7178092817867056, "grad_norm": 613.1275634765625, "learning_rate": 2.3403463150491434e-06, "loss": 15.5211, "step": 355340 }, { "epoch": 0.7178294824193894, "grad_norm": 270.5167236328125, "learning_rate": 2.34005073680403e-06, "loss": 17.8015, "step": 355350 }, { "epoch": 0.7178496830520732, "grad_norm": 260.85516357421875, "learning_rate": 2.3397551715231887e-06, "loss": 18.6767, "step": 355360 }, { "epoch": 0.717869883684757, "grad_norm": 1112.9285888671875, "learning_rate": 2.3394596192080575e-06, "loss": 23.5197, "step": 355370 }, { "epoch": 0.7178900843174408, "grad_norm": 546.1166381835938, "learning_rate": 2.3391640798600762e-06, "loss": 20.2109, "step": 355380 }, { "epoch": 0.7179102849501247, "grad_norm": 433.3735046386719, "learning_rate": 2.3388685534806865e-06, "loss": 18.0769, "step": 355390 }, { "epoch": 0.7179304855828085, "grad_norm": 316.08782958984375, "learning_rate": 2.338573040071332e-06, "loss": 20.6479, "step": 355400 }, { "epoch": 0.7179506862154923, "grad_norm": 340.77081298828125, "learning_rate": 2.338277539633446e-06, "loss": 20.0103, "step": 355410 }, { "epoch": 0.7179708868481761, "grad_norm": 376.880126953125, "learning_rate": 2.3379820521684727e-06, "loss": 25.7699, "step": 355420 }, { "epoch": 0.7179910874808599, "grad_norm": 290.0224304199219, "learning_rate": 2.337686577677854e-06, "loss": 12.8468, "step": 355430 }, { "epoch": 0.7180112881135438, "grad_norm": 1088.810302734375, "learning_rate": 2.3373911161630274e-06, "loss": 26.5678, "step": 355440 }, { "epoch": 0.7180314887462276, "grad_norm": 246.1015167236328, "learning_rate": 2.3370956676254327e-06, "loss": 25.9341, "step": 355450 }, { "epoch": 0.7180516893789114, "grad_norm": 282.4861755371094, "learning_rate": 2.3368002320665118e-06, "loss": 15.3598, "step": 355460 }, { "epoch": 0.7180718900115952, "grad_norm": 746.9185180664062, "learning_rate": 2.3365048094877033e-06, "loss": 25.822, "step": 355470 }, { "epoch": 0.7180920906442789, "grad_norm": 1026.7742919921875, "learning_rate": 2.336209399890446e-06, "loss": 29.8923, "step": 355480 }, { "epoch": 0.7181122912769627, "grad_norm": 192.3336639404297, "learning_rate": 2.3359140032761827e-06, "loss": 16.082, "step": 355490 }, { "epoch": 0.7181324919096466, "grad_norm": 365.6981506347656, "learning_rate": 2.3356186196463497e-06, "loss": 24.8557, "step": 355500 }, { "epoch": 0.7181526925423304, "grad_norm": 964.8790283203125, "learning_rate": 2.335323249002391e-06, "loss": 17.6356, "step": 355510 }, { "epoch": 0.7181728931750142, "grad_norm": 198.53807067871094, "learning_rate": 2.3350278913457404e-06, "loss": 14.7058, "step": 355520 }, { "epoch": 0.718193093807698, "grad_norm": 631.6527099609375, "learning_rate": 2.334732546677843e-06, "loss": 19.1467, "step": 355530 }, { "epoch": 0.7182132944403818, "grad_norm": 717.8720092773438, "learning_rate": 2.334437215000136e-06, "loss": 18.5422, "step": 355540 }, { "epoch": 0.7182334950730657, "grad_norm": 925.7899169921875, "learning_rate": 2.334141896314057e-06, "loss": 15.6616, "step": 355550 }, { "epoch": 0.7182536957057495, "grad_norm": 651.7544555664062, "learning_rate": 2.333846590621049e-06, "loss": 27.0347, "step": 355560 }, { "epoch": 0.7182738963384333, "grad_norm": 57.169681549072266, "learning_rate": 2.333551297922547e-06, "loss": 22.672, "step": 355570 }, { "epoch": 0.7182940969711171, "grad_norm": 371.8760070800781, "learning_rate": 2.333256018219995e-06, "loss": 14.3969, "step": 355580 }, { "epoch": 0.7183142976038009, "grad_norm": 2.247293472290039, "learning_rate": 2.3329607515148287e-06, "loss": 28.4106, "step": 355590 }, { "epoch": 0.7183344982364848, "grad_norm": 368.8210144042969, "learning_rate": 2.3326654978084872e-06, "loss": 20.3566, "step": 355600 }, { "epoch": 0.7183546988691686, "grad_norm": 92.35023498535156, "learning_rate": 2.3323702571024125e-06, "loss": 15.0502, "step": 355610 }, { "epoch": 0.7183748995018524, "grad_norm": 412.3029479980469, "learning_rate": 2.3320750293980416e-06, "loss": 25.6768, "step": 355620 }, { "epoch": 0.7183951001345362, "grad_norm": 662.3887939453125, "learning_rate": 2.3317798146968113e-06, "loss": 22.7219, "step": 355630 }, { "epoch": 0.71841530076722, "grad_norm": 241.14309692382812, "learning_rate": 2.3314846130001622e-06, "loss": 13.5183, "step": 355640 }, { "epoch": 0.7184355013999039, "grad_norm": 408.4259948730469, "learning_rate": 2.3311894243095363e-06, "loss": 13.5035, "step": 355650 }, { "epoch": 0.7184557020325877, "grad_norm": 262.0238342285156, "learning_rate": 2.3308942486263685e-06, "loss": 16.8217, "step": 355660 }, { "epoch": 0.7184759026652715, "grad_norm": 180.18206787109375, "learning_rate": 2.3305990859520974e-06, "loss": 12.8303, "step": 355670 }, { "epoch": 0.7184961032979553, "grad_norm": 157.4781494140625, "learning_rate": 2.3303039362881634e-06, "loss": 20.4091, "step": 355680 }, { "epoch": 0.7185163039306391, "grad_norm": 311.36297607421875, "learning_rate": 2.3300087996360053e-06, "loss": 21.4781, "step": 355690 }, { "epoch": 0.718536504563323, "grad_norm": 801.7392578125, "learning_rate": 2.329713675997058e-06, "loss": 22.1854, "step": 355700 }, { "epoch": 0.7185567051960068, "grad_norm": 546.9130859375, "learning_rate": 2.3294185653727623e-06, "loss": 36.6917, "step": 355710 }, { "epoch": 0.7185769058286906, "grad_norm": 107.47648620605469, "learning_rate": 2.329123467764559e-06, "loss": 8.9922, "step": 355720 }, { "epoch": 0.7185971064613744, "grad_norm": 405.75994873046875, "learning_rate": 2.3288283831738834e-06, "loss": 23.6942, "step": 355730 }, { "epoch": 0.7186173070940581, "grad_norm": 392.80364990234375, "learning_rate": 2.328533311602173e-06, "loss": 25.28, "step": 355740 }, { "epoch": 0.718637507726742, "grad_norm": 146.98622131347656, "learning_rate": 2.3282382530508683e-06, "loss": 17.9209, "step": 355750 }, { "epoch": 0.7186577083594258, "grad_norm": 250.06829833984375, "learning_rate": 2.327943207521407e-06, "loss": 18.7913, "step": 355760 }, { "epoch": 0.7186779089921096, "grad_norm": 395.7095947265625, "learning_rate": 2.3276481750152245e-06, "loss": 14.4532, "step": 355770 }, { "epoch": 0.7186981096247934, "grad_norm": 83.01255798339844, "learning_rate": 2.3273531555337624e-06, "loss": 17.7567, "step": 355780 }, { "epoch": 0.7187183102574772, "grad_norm": 613.611572265625, "learning_rate": 2.327058149078455e-06, "loss": 20.1002, "step": 355790 }, { "epoch": 0.718738510890161, "grad_norm": 208.4181365966797, "learning_rate": 2.3267631556507443e-06, "loss": 22.4576, "step": 355800 }, { "epoch": 0.7187587115228449, "grad_norm": 419.4081726074219, "learning_rate": 2.326468175252065e-06, "loss": 17.6618, "step": 355810 }, { "epoch": 0.7187789121555287, "grad_norm": 224.1723175048828, "learning_rate": 2.326173207883854e-06, "loss": 21.159, "step": 355820 }, { "epoch": 0.7187991127882125, "grad_norm": 557.3604125976562, "learning_rate": 2.325878253547552e-06, "loss": 20.6774, "step": 355830 }, { "epoch": 0.7188193134208963, "grad_norm": 222.83775329589844, "learning_rate": 2.3255833122445937e-06, "loss": 22.6103, "step": 355840 }, { "epoch": 0.7188395140535802, "grad_norm": 745.8064575195312, "learning_rate": 2.32528838397642e-06, "loss": 23.5769, "step": 355850 }, { "epoch": 0.718859714686264, "grad_norm": 553.0786743164062, "learning_rate": 2.3249934687444642e-06, "loss": 19.6189, "step": 355860 }, { "epoch": 0.7188799153189478, "grad_norm": 283.5133056640625, "learning_rate": 2.3246985665501674e-06, "loss": 22.1916, "step": 355870 }, { "epoch": 0.7189001159516316, "grad_norm": 569.7648315429688, "learning_rate": 2.3244036773949658e-06, "loss": 20.4876, "step": 355880 }, { "epoch": 0.7189203165843154, "grad_norm": 474.4637451171875, "learning_rate": 2.3241088012802938e-06, "loss": 15.6569, "step": 355890 }, { "epoch": 0.7189405172169993, "grad_norm": 564.4053955078125, "learning_rate": 2.323813938207593e-06, "loss": 17.7916, "step": 355900 }, { "epoch": 0.7189607178496831, "grad_norm": 130.00132751464844, "learning_rate": 2.323519088178299e-06, "loss": 17.8169, "step": 355910 }, { "epoch": 0.7189809184823669, "grad_norm": 610.6986083984375, "learning_rate": 2.3232242511938452e-06, "loss": 20.7983, "step": 355920 }, { "epoch": 0.7190011191150507, "grad_norm": 3521.993896484375, "learning_rate": 2.322929427255673e-06, "loss": 53.2133, "step": 355930 }, { "epoch": 0.7190213197477345, "grad_norm": 74.32054901123047, "learning_rate": 2.3226346163652213e-06, "loss": 15.5488, "step": 355940 }, { "epoch": 0.7190415203804184, "grad_norm": 316.9515380859375, "learning_rate": 2.32233981852392e-06, "loss": 17.8092, "step": 355950 }, { "epoch": 0.7190617210131022, "grad_norm": 1.8933401107788086, "learning_rate": 2.3220450337332097e-06, "loss": 14.9994, "step": 355960 }, { "epoch": 0.719081921645786, "grad_norm": 56.94375228881836, "learning_rate": 2.321750261994529e-06, "loss": 13.7345, "step": 355970 }, { "epoch": 0.7191021222784698, "grad_norm": 478.0467834472656, "learning_rate": 2.321455503309313e-06, "loss": 11.1437, "step": 355980 }, { "epoch": 0.7191223229111535, "grad_norm": 171.91444396972656, "learning_rate": 2.3211607576789958e-06, "loss": 17.4037, "step": 355990 }, { "epoch": 0.7191425235438373, "grad_norm": 87.4278335571289, "learning_rate": 2.320866025105016e-06, "loss": 20.4756, "step": 356000 }, { "epoch": 0.7191627241765212, "grad_norm": 380.328125, "learning_rate": 2.320571305588814e-06, "loss": 22.9658, "step": 356010 }, { "epoch": 0.719182924809205, "grad_norm": 368.05047607421875, "learning_rate": 2.3202765991318195e-06, "loss": 11.3421, "step": 356020 }, { "epoch": 0.7192031254418888, "grad_norm": 47.237091064453125, "learning_rate": 2.3199819057354712e-06, "loss": 23.8725, "step": 356030 }, { "epoch": 0.7192233260745726, "grad_norm": 213.0360565185547, "learning_rate": 2.3196872254012075e-06, "loss": 30.4471, "step": 356040 }, { "epoch": 0.7192435267072564, "grad_norm": 340.695556640625, "learning_rate": 2.319392558130464e-06, "loss": 23.5536, "step": 356050 }, { "epoch": 0.7192637273399403, "grad_norm": 348.7161865234375, "learning_rate": 2.3190979039246738e-06, "loss": 25.9111, "step": 356060 }, { "epoch": 0.7192839279726241, "grad_norm": 517.1141357421875, "learning_rate": 2.318803262785277e-06, "loss": 27.4346, "step": 356070 }, { "epoch": 0.7193041286053079, "grad_norm": 408.94342041015625, "learning_rate": 2.318508634713708e-06, "loss": 18.317, "step": 356080 }, { "epoch": 0.7193243292379917, "grad_norm": 936.9793090820312, "learning_rate": 2.318214019711401e-06, "loss": 20.7434, "step": 356090 }, { "epoch": 0.7193445298706755, "grad_norm": 680.93896484375, "learning_rate": 2.3179194177797954e-06, "loss": 12.71, "step": 356100 }, { "epoch": 0.7193647305033594, "grad_norm": 309.80169677734375, "learning_rate": 2.3176248289203237e-06, "loss": 21.2214, "step": 356110 }, { "epoch": 0.7193849311360432, "grad_norm": 296.0082702636719, "learning_rate": 2.3173302531344243e-06, "loss": 17.1504, "step": 356120 }, { "epoch": 0.719405131768727, "grad_norm": 426.7873840332031, "learning_rate": 2.317035690423531e-06, "loss": 22.7068, "step": 356130 }, { "epoch": 0.7194253324014108, "grad_norm": 324.6080627441406, "learning_rate": 2.3167411407890814e-06, "loss": 22.9607, "step": 356140 }, { "epoch": 0.7194455330340946, "grad_norm": 696.2737426757812, "learning_rate": 2.3164466042325106e-06, "loss": 17.5741, "step": 356150 }, { "epoch": 0.7194657336667785, "grad_norm": 359.4756774902344, "learning_rate": 2.316152080755251e-06, "loss": 22.4907, "step": 356160 }, { "epoch": 0.7194859342994623, "grad_norm": 251.14215087890625, "learning_rate": 2.315857570358743e-06, "loss": 16.7082, "step": 356170 }, { "epoch": 0.7195061349321461, "grad_norm": 295.7928161621094, "learning_rate": 2.3155630730444185e-06, "loss": 11.9237, "step": 356180 }, { "epoch": 0.7195263355648299, "grad_norm": 336.5198974609375, "learning_rate": 2.315268588813715e-06, "loss": 10.258, "step": 356190 }, { "epoch": 0.7195465361975137, "grad_norm": 1.7450218200683594, "learning_rate": 2.3149741176680666e-06, "loss": 8.8111, "step": 356200 }, { "epoch": 0.7195667368301976, "grad_norm": 376.4080810546875, "learning_rate": 2.314679659608907e-06, "loss": 34.4667, "step": 356210 }, { "epoch": 0.7195869374628814, "grad_norm": 273.2095031738281, "learning_rate": 2.3143852146376754e-06, "loss": 30.1214, "step": 356220 }, { "epoch": 0.7196071380955652, "grad_norm": 1135.8089599609375, "learning_rate": 2.314090782755804e-06, "loss": 16.1298, "step": 356230 }, { "epoch": 0.719627338728249, "grad_norm": 1103.915283203125, "learning_rate": 2.313796363964727e-06, "loss": 38.4009, "step": 356240 }, { "epoch": 0.7196475393609327, "grad_norm": 333.146728515625, "learning_rate": 2.3135019582658803e-06, "loss": 20.5211, "step": 356250 }, { "epoch": 0.7196677399936166, "grad_norm": 269.69256591796875, "learning_rate": 2.3132075656607034e-06, "loss": 11.4717, "step": 356260 }, { "epoch": 0.7196879406263004, "grad_norm": 143.51089477539062, "learning_rate": 2.3129131861506225e-06, "loss": 25.2597, "step": 356270 }, { "epoch": 0.7197081412589842, "grad_norm": 412.6779479980469, "learning_rate": 2.3126188197370773e-06, "loss": 15.8539, "step": 356280 }, { "epoch": 0.719728341891668, "grad_norm": 358.5265808105469, "learning_rate": 2.312324466421504e-06, "loss": 28.5921, "step": 356290 }, { "epoch": 0.7197485425243518, "grad_norm": 256.109375, "learning_rate": 2.312030126205335e-06, "loss": 8.2917, "step": 356300 }, { "epoch": 0.7197687431570357, "grad_norm": 477.0332336425781, "learning_rate": 2.3117357990900034e-06, "loss": 27.9492, "step": 356310 }, { "epoch": 0.7197889437897195, "grad_norm": 81.21957397460938, "learning_rate": 2.3114414850769458e-06, "loss": 14.0278, "step": 356320 }, { "epoch": 0.7198091444224033, "grad_norm": 209.64715576171875, "learning_rate": 2.3111471841675993e-06, "loss": 26.2375, "step": 356330 }, { "epoch": 0.7198293450550871, "grad_norm": 479.06219482421875, "learning_rate": 2.310852896363392e-06, "loss": 12.472, "step": 356340 }, { "epoch": 0.7198495456877709, "grad_norm": 509.81658935546875, "learning_rate": 2.3105586216657616e-06, "loss": 27.761, "step": 356350 }, { "epoch": 0.7198697463204548, "grad_norm": 616.83984375, "learning_rate": 2.3102643600761445e-06, "loss": 38.2269, "step": 356360 }, { "epoch": 0.7198899469531386, "grad_norm": 180.7018585205078, "learning_rate": 2.3099701115959715e-06, "loss": 10.2843, "step": 356370 }, { "epoch": 0.7199101475858224, "grad_norm": 259.7471618652344, "learning_rate": 2.309675876226677e-06, "loss": 12.6841, "step": 356380 }, { "epoch": 0.7199303482185062, "grad_norm": 483.06158447265625, "learning_rate": 2.309381653969698e-06, "loss": 32.0829, "step": 356390 }, { "epoch": 0.71995054885119, "grad_norm": 139.95729064941406, "learning_rate": 2.309087444826464e-06, "loss": 10.3258, "step": 356400 }, { "epoch": 0.7199707494838739, "grad_norm": 650.1048583984375, "learning_rate": 2.308793248798414e-06, "loss": 20.2942, "step": 356410 }, { "epoch": 0.7199909501165577, "grad_norm": 389.9107971191406, "learning_rate": 2.308499065886978e-06, "loss": 24.8322, "step": 356420 }, { "epoch": 0.7200111507492415, "grad_norm": 207.32176208496094, "learning_rate": 2.3082048960935905e-06, "loss": 8.6192, "step": 356430 }, { "epoch": 0.7200313513819253, "grad_norm": 474.74658203125, "learning_rate": 2.3079107394196875e-06, "loss": 6.9978, "step": 356440 }, { "epoch": 0.7200515520146091, "grad_norm": 248.63880920410156, "learning_rate": 2.3076165958666992e-06, "loss": 9.2451, "step": 356450 }, { "epoch": 0.720071752647293, "grad_norm": 478.6060791015625, "learning_rate": 2.3073224654360627e-06, "loss": 20.6712, "step": 356460 }, { "epoch": 0.7200919532799768, "grad_norm": 191.9722900390625, "learning_rate": 2.3070283481292077e-06, "loss": 12.5131, "step": 356470 }, { "epoch": 0.7201121539126606, "grad_norm": 671.8963623046875, "learning_rate": 2.306734243947572e-06, "loss": 25.6819, "step": 356480 }, { "epoch": 0.7201323545453444, "grad_norm": 217.21632385253906, "learning_rate": 2.3064401528925872e-06, "loss": 14.4012, "step": 356490 }, { "epoch": 0.7201525551780282, "grad_norm": 164.06991577148438, "learning_rate": 2.3061460749656844e-06, "loss": 12.9598, "step": 356500 }, { "epoch": 0.7201727558107119, "grad_norm": 129.89657592773438, "learning_rate": 2.3058520101683012e-06, "loss": 26.4054, "step": 356510 }, { "epoch": 0.7201929564433958, "grad_norm": 463.0583190917969, "learning_rate": 2.3055579585018685e-06, "loss": 19.5261, "step": 356520 }, { "epoch": 0.7202131570760796, "grad_norm": 328.9757995605469, "learning_rate": 2.3052639199678167e-06, "loss": 13.6301, "step": 356530 }, { "epoch": 0.7202333577087634, "grad_norm": 440.01763916015625, "learning_rate": 2.3049698945675826e-06, "loss": 13.7399, "step": 356540 }, { "epoch": 0.7202535583414472, "grad_norm": 149.9425811767578, "learning_rate": 2.3046758823026018e-06, "loss": 14.2513, "step": 356550 }, { "epoch": 0.720273758974131, "grad_norm": 842.3128662109375, "learning_rate": 2.3043818831743003e-06, "loss": 14.4759, "step": 356560 }, { "epoch": 0.7202939596068149, "grad_norm": 432.9392395019531, "learning_rate": 2.304087897184114e-06, "loss": 24.7468, "step": 356570 }, { "epoch": 0.7203141602394987, "grad_norm": 89.6991958618164, "learning_rate": 2.303793924333479e-06, "loss": 23.3948, "step": 356580 }, { "epoch": 0.7203343608721825, "grad_norm": 412.6732482910156, "learning_rate": 2.303499964623825e-06, "loss": 17.0822, "step": 356590 }, { "epoch": 0.7203545615048663, "grad_norm": 328.019287109375, "learning_rate": 2.303206018056583e-06, "loss": 13.4234, "step": 356600 }, { "epoch": 0.7203747621375501, "grad_norm": 299.4187927246094, "learning_rate": 2.3029120846331883e-06, "loss": 16.7432, "step": 356610 }, { "epoch": 0.720394962770234, "grad_norm": 35.62937927246094, "learning_rate": 2.3026181643550767e-06, "loss": 25.7069, "step": 356620 }, { "epoch": 0.7204151634029178, "grad_norm": 350.29095458984375, "learning_rate": 2.3023242572236728e-06, "loss": 21.3234, "step": 356630 }, { "epoch": 0.7204353640356016, "grad_norm": 261.7138671875, "learning_rate": 2.3020303632404132e-06, "loss": 15.6534, "step": 356640 }, { "epoch": 0.7204555646682854, "grad_norm": 1417.159423828125, "learning_rate": 2.301736482406733e-06, "loss": 26.2665, "step": 356650 }, { "epoch": 0.7204757653009692, "grad_norm": 655.8534545898438, "learning_rate": 2.3014426147240614e-06, "loss": 12.1236, "step": 356660 }, { "epoch": 0.7204959659336531, "grad_norm": 349.3835754394531, "learning_rate": 2.3011487601938292e-06, "loss": 16.5535, "step": 356670 }, { "epoch": 0.7205161665663369, "grad_norm": 194.57681274414062, "learning_rate": 2.3008549188174728e-06, "loss": 17.0723, "step": 356680 }, { "epoch": 0.7205363671990207, "grad_norm": 254.3366241455078, "learning_rate": 2.300561090596422e-06, "loss": 13.1158, "step": 356690 }, { "epoch": 0.7205565678317045, "grad_norm": 190.81695556640625, "learning_rate": 2.3002672755321076e-06, "loss": 15.1789, "step": 356700 }, { "epoch": 0.7205767684643883, "grad_norm": 307.341064453125, "learning_rate": 2.2999734736259644e-06, "loss": 12.2914, "step": 356710 }, { "epoch": 0.7205969690970722, "grad_norm": 644.7877807617188, "learning_rate": 2.299679684879421e-06, "loss": 31.196, "step": 356720 }, { "epoch": 0.720617169729756, "grad_norm": 31.456466674804688, "learning_rate": 2.2993859092939136e-06, "loss": 13.6381, "step": 356730 }, { "epoch": 0.7206373703624398, "grad_norm": 724.866455078125, "learning_rate": 2.29909214687087e-06, "loss": 17.2696, "step": 356740 }, { "epoch": 0.7206575709951236, "grad_norm": 454.0261535644531, "learning_rate": 2.298798397611725e-06, "loss": 18.3462, "step": 356750 }, { "epoch": 0.7206777716278073, "grad_norm": 8.363052368164062, "learning_rate": 2.2985046615179098e-06, "loss": 4.4754, "step": 356760 }, { "epoch": 0.7206979722604911, "grad_norm": 264.83489990234375, "learning_rate": 2.2982109385908524e-06, "loss": 15.6975, "step": 356770 }, { "epoch": 0.720718172893175, "grad_norm": 393.46734619140625, "learning_rate": 2.29791722883199e-06, "loss": 18.1938, "step": 356780 }, { "epoch": 0.7207383735258588, "grad_norm": 801.3164672851562, "learning_rate": 2.2976235322427487e-06, "loss": 41.4712, "step": 356790 }, { "epoch": 0.7207585741585426, "grad_norm": 150.0250244140625, "learning_rate": 2.297329848824565e-06, "loss": 8.4385, "step": 356800 }, { "epoch": 0.7207787747912264, "grad_norm": 398.1314697265625, "learning_rate": 2.2970361785788673e-06, "loss": 17.7207, "step": 356810 }, { "epoch": 0.7207989754239102, "grad_norm": 170.1879119873047, "learning_rate": 2.296742521507086e-06, "loss": 9.9429, "step": 356820 }, { "epoch": 0.7208191760565941, "grad_norm": 645.7498779296875, "learning_rate": 2.296448877610655e-06, "loss": 18.6266, "step": 356830 }, { "epoch": 0.7208393766892779, "grad_norm": 266.1501159667969, "learning_rate": 2.2961552468910048e-06, "loss": 16.7124, "step": 356840 }, { "epoch": 0.7208595773219617, "grad_norm": 222.91485595703125, "learning_rate": 2.295861629349564e-06, "loss": 14.8486, "step": 356850 }, { "epoch": 0.7208797779546455, "grad_norm": 614.7936401367188, "learning_rate": 2.295568024987766e-06, "loss": 42.9427, "step": 356860 }, { "epoch": 0.7208999785873293, "grad_norm": 453.291015625, "learning_rate": 2.2952744338070447e-06, "loss": 18.9652, "step": 356870 }, { "epoch": 0.7209201792200132, "grad_norm": 555.583984375, "learning_rate": 2.2949808558088243e-06, "loss": 21.7545, "step": 356880 }, { "epoch": 0.720940379852697, "grad_norm": 117.07242584228516, "learning_rate": 2.2946872909945387e-06, "loss": 64.4816, "step": 356890 }, { "epoch": 0.7209605804853808, "grad_norm": 117.32300567626953, "learning_rate": 2.294393739365621e-06, "loss": 31.0427, "step": 356900 }, { "epoch": 0.7209807811180646, "grad_norm": 314.8080139160156, "learning_rate": 2.294100200923501e-06, "loss": 21.1378, "step": 356910 }, { "epoch": 0.7210009817507484, "grad_norm": 287.3998107910156, "learning_rate": 2.293806675669606e-06, "loss": 18.6618, "step": 356920 }, { "epoch": 0.7210211823834323, "grad_norm": 179.3436737060547, "learning_rate": 2.2935131636053687e-06, "loss": 12.9096, "step": 356930 }, { "epoch": 0.7210413830161161, "grad_norm": 181.98316955566406, "learning_rate": 2.293219664732224e-06, "loss": 18.5185, "step": 356940 }, { "epoch": 0.7210615836487999, "grad_norm": 148.46475219726562, "learning_rate": 2.2929261790515944e-06, "loss": 21.5742, "step": 356950 }, { "epoch": 0.7210817842814837, "grad_norm": 298.6468200683594, "learning_rate": 2.2926327065649144e-06, "loss": 9.8013, "step": 356960 }, { "epoch": 0.7211019849141675, "grad_norm": 2026.063232421875, "learning_rate": 2.292339247273617e-06, "loss": 35.6442, "step": 356970 }, { "epoch": 0.7211221855468514, "grad_norm": 202.90513610839844, "learning_rate": 2.2920458011791287e-06, "loss": 19.6064, "step": 356980 }, { "epoch": 0.7211423861795352, "grad_norm": 276.17608642578125, "learning_rate": 2.291752368282879e-06, "loss": 19.9596, "step": 356990 }, { "epoch": 0.721162586812219, "grad_norm": 287.6712951660156, "learning_rate": 2.2914589485863015e-06, "loss": 27.0526, "step": 357000 }, { "epoch": 0.7211827874449028, "grad_norm": 560.3438110351562, "learning_rate": 2.2911655420908247e-06, "loss": 19.6648, "step": 357010 }, { "epoch": 0.7212029880775865, "grad_norm": 320.8586120605469, "learning_rate": 2.290872148797877e-06, "loss": 25.0655, "step": 357020 }, { "epoch": 0.7212231887102704, "grad_norm": 209.60411071777344, "learning_rate": 2.2905787687088914e-06, "loss": 14.534, "step": 357030 }, { "epoch": 0.7212433893429542, "grad_norm": 598.6663208007812, "learning_rate": 2.2902854018252945e-06, "loss": 17.7608, "step": 357040 }, { "epoch": 0.721263589975638, "grad_norm": 455.5414123535156, "learning_rate": 2.2899920481485192e-06, "loss": 27.8769, "step": 357050 }, { "epoch": 0.7212837906083218, "grad_norm": 809.207275390625, "learning_rate": 2.2896987076799933e-06, "loss": 20.7547, "step": 357060 }, { "epoch": 0.7213039912410056, "grad_norm": 378.81134033203125, "learning_rate": 2.289405380421148e-06, "loss": 20.7372, "step": 357070 }, { "epoch": 0.7213241918736895, "grad_norm": 579.6322631835938, "learning_rate": 2.289112066373411e-06, "loss": 14.606, "step": 357080 }, { "epoch": 0.7213443925063733, "grad_norm": 318.2368469238281, "learning_rate": 2.2888187655382143e-06, "loss": 9.5947, "step": 357090 }, { "epoch": 0.7213645931390571, "grad_norm": 452.0503845214844, "learning_rate": 2.288525477916986e-06, "loss": 15.6629, "step": 357100 }, { "epoch": 0.7213847937717409, "grad_norm": 696.1923217773438, "learning_rate": 2.2882322035111543e-06, "loss": 14.907, "step": 357110 }, { "epoch": 0.7214049944044247, "grad_norm": 836.78564453125, "learning_rate": 2.2879389423221514e-06, "loss": 16.9761, "step": 357120 }, { "epoch": 0.7214251950371086, "grad_norm": 494.79388427734375, "learning_rate": 2.287645694351406e-06, "loss": 29.4495, "step": 357130 }, { "epoch": 0.7214453956697924, "grad_norm": 474.1503601074219, "learning_rate": 2.287352459600344e-06, "loss": 9.4509, "step": 357140 }, { "epoch": 0.7214655963024762, "grad_norm": 395.12396240234375, "learning_rate": 2.287059238070397e-06, "loss": 11.9516, "step": 357150 }, { "epoch": 0.72148579693516, "grad_norm": 8.561695098876953, "learning_rate": 2.2867660297629977e-06, "loss": 13.3588, "step": 357160 }, { "epoch": 0.7215059975678438, "grad_norm": 93.96231079101562, "learning_rate": 2.2864728346795686e-06, "loss": 14.2886, "step": 357170 }, { "epoch": 0.7215261982005277, "grad_norm": 215.66087341308594, "learning_rate": 2.286179652821542e-06, "loss": 23.7562, "step": 357180 }, { "epoch": 0.7215463988332115, "grad_norm": 438.2885437011719, "learning_rate": 2.285886484190348e-06, "loss": 19.0968, "step": 357190 }, { "epoch": 0.7215665994658953, "grad_norm": 21.059423446655273, "learning_rate": 2.285593328787414e-06, "loss": 13.9654, "step": 357200 }, { "epoch": 0.7215868000985791, "grad_norm": 185.17214965820312, "learning_rate": 2.285300186614167e-06, "loss": 12.5266, "step": 357210 }, { "epoch": 0.7216070007312629, "grad_norm": 395.4156494140625, "learning_rate": 2.285007057672038e-06, "loss": 20.2223, "step": 357220 }, { "epoch": 0.7216272013639468, "grad_norm": 411.3515319824219, "learning_rate": 2.2847139419624585e-06, "loss": 21.688, "step": 357230 }, { "epoch": 0.7216474019966306, "grad_norm": 241.63412475585938, "learning_rate": 2.2844208394868504e-06, "loss": 14.3528, "step": 357240 }, { "epoch": 0.7216676026293144, "grad_norm": 587.6914672851562, "learning_rate": 2.284127750246646e-06, "loss": 14.3501, "step": 357250 }, { "epoch": 0.7216878032619982, "grad_norm": 674.5878295898438, "learning_rate": 2.2838346742432753e-06, "loss": 24.0324, "step": 357260 }, { "epoch": 0.7217080038946819, "grad_norm": 461.4089050292969, "learning_rate": 2.283541611478165e-06, "loss": 10.5924, "step": 357270 }, { "epoch": 0.7217282045273657, "grad_norm": 278.19781494140625, "learning_rate": 2.2832485619527417e-06, "loss": 15.6594, "step": 357280 }, { "epoch": 0.7217484051600496, "grad_norm": 623.696044921875, "learning_rate": 2.2829555256684372e-06, "loss": 22.9163, "step": 357290 }, { "epoch": 0.7217686057927334, "grad_norm": 394.54296875, "learning_rate": 2.282662502626678e-06, "loss": 12.9196, "step": 357300 }, { "epoch": 0.7217888064254172, "grad_norm": 155.50816345214844, "learning_rate": 2.2823694928288897e-06, "loss": 20.3786, "step": 357310 }, { "epoch": 0.721809007058101, "grad_norm": 265.4114685058594, "learning_rate": 2.282076496276506e-06, "loss": 15.7187, "step": 357320 }, { "epoch": 0.7218292076907848, "grad_norm": 697.023193359375, "learning_rate": 2.2817835129709486e-06, "loss": 14.364, "step": 357330 }, { "epoch": 0.7218494083234687, "grad_norm": 487.0653381347656, "learning_rate": 2.2814905429136515e-06, "loss": 17.4101, "step": 357340 }, { "epoch": 0.7218696089561525, "grad_norm": 276.76165771484375, "learning_rate": 2.281197586106037e-06, "loss": 37.9181, "step": 357350 }, { "epoch": 0.7218898095888363, "grad_norm": 136.1702117919922, "learning_rate": 2.2809046425495386e-06, "loss": 21.9253, "step": 357360 }, { "epoch": 0.7219100102215201, "grad_norm": 11.655019760131836, "learning_rate": 2.2806117122455806e-06, "loss": 13.4374, "step": 357370 }, { "epoch": 0.721930210854204, "grad_norm": 251.75360107421875, "learning_rate": 2.280318795195589e-06, "loss": 23.9527, "step": 357380 }, { "epoch": 0.7219504114868878, "grad_norm": 801.7538452148438, "learning_rate": 2.2800258914009966e-06, "loss": 26.2478, "step": 357390 }, { "epoch": 0.7219706121195716, "grad_norm": 335.84063720703125, "learning_rate": 2.2797330008632255e-06, "loss": 22.8577, "step": 357400 }, { "epoch": 0.7219908127522554, "grad_norm": 40.592529296875, "learning_rate": 2.2794401235837083e-06, "loss": 11.7794, "step": 357410 }, { "epoch": 0.7220110133849392, "grad_norm": 244.24241638183594, "learning_rate": 2.2791472595638693e-06, "loss": 19.2623, "step": 357420 }, { "epoch": 0.722031214017623, "grad_norm": 183.85824584960938, "learning_rate": 2.278854408805135e-06, "loss": 13.9928, "step": 357430 }, { "epoch": 0.7220514146503069, "grad_norm": 282.90216064453125, "learning_rate": 2.2785615713089363e-06, "loss": 20.9458, "step": 357440 }, { "epoch": 0.7220716152829907, "grad_norm": 418.7989807128906, "learning_rate": 2.2782687470766985e-06, "loss": 21.4213, "step": 357450 }, { "epoch": 0.7220918159156745, "grad_norm": 579.4332885742188, "learning_rate": 2.277975936109846e-06, "loss": 15.887, "step": 357460 }, { "epoch": 0.7221120165483583, "grad_norm": 184.46786499023438, "learning_rate": 2.2776831384098096e-06, "loss": 15.6152, "step": 357470 }, { "epoch": 0.7221322171810421, "grad_norm": 233.37539672851562, "learning_rate": 2.277390353978019e-06, "loss": 13.5625, "step": 357480 }, { "epoch": 0.722152417813726, "grad_norm": 301.2912902832031, "learning_rate": 2.2770975828158936e-06, "loss": 19.0677, "step": 357490 }, { "epoch": 0.7221726184464098, "grad_norm": 203.8992462158203, "learning_rate": 2.2768048249248648e-06, "loss": 9.9747, "step": 357500 }, { "epoch": 0.7221928190790936, "grad_norm": 56.02370071411133, "learning_rate": 2.27651208030636e-06, "loss": 20.9339, "step": 357510 }, { "epoch": 0.7222130197117774, "grad_norm": 509.4747619628906, "learning_rate": 2.2762193489618057e-06, "loss": 26.1781, "step": 357520 }, { "epoch": 0.7222332203444611, "grad_norm": 324.1914367675781, "learning_rate": 2.2759266308926257e-06, "loss": 14.2268, "step": 357530 }, { "epoch": 0.722253420977145, "grad_norm": 175.71090698242188, "learning_rate": 2.275633926100249e-06, "loss": 17.0575, "step": 357540 }, { "epoch": 0.7222736216098288, "grad_norm": 3198.631103515625, "learning_rate": 2.2753412345861065e-06, "loss": 31.1765, "step": 357550 }, { "epoch": 0.7222938222425126, "grad_norm": 219.0963592529297, "learning_rate": 2.2750485563516154e-06, "loss": 20.2615, "step": 357560 }, { "epoch": 0.7223140228751964, "grad_norm": 77.74398040771484, "learning_rate": 2.2747558913982084e-06, "loss": 22.1748, "step": 357570 }, { "epoch": 0.7223342235078802, "grad_norm": 1779.8966064453125, "learning_rate": 2.2744632397273113e-06, "loss": 34.767, "step": 357580 }, { "epoch": 0.722354424140564, "grad_norm": 331.18829345703125, "learning_rate": 2.2741706013403507e-06, "loss": 21.3225, "step": 357590 }, { "epoch": 0.7223746247732479, "grad_norm": 14.230195045471191, "learning_rate": 2.27387797623875e-06, "loss": 10.3357, "step": 357600 }, { "epoch": 0.7223948254059317, "grad_norm": 434.1004943847656, "learning_rate": 2.273585364423939e-06, "loss": 22.1532, "step": 357610 }, { "epoch": 0.7224150260386155, "grad_norm": 384.7958068847656, "learning_rate": 2.2732927658973427e-06, "loss": 23.2884, "step": 357620 }, { "epoch": 0.7224352266712993, "grad_norm": 1325.3875732421875, "learning_rate": 2.273000180660384e-06, "loss": 32.3256, "step": 357630 }, { "epoch": 0.7224554273039832, "grad_norm": 320.1987609863281, "learning_rate": 2.272707608714493e-06, "loss": 33.4472, "step": 357640 }, { "epoch": 0.722475627936667, "grad_norm": 0.4604489505290985, "learning_rate": 2.2724150500610946e-06, "loss": 26.7996, "step": 357650 }, { "epoch": 0.7224958285693508, "grad_norm": 269.8821716308594, "learning_rate": 2.2721225047016153e-06, "loss": 14.3756, "step": 357660 }, { "epoch": 0.7225160292020346, "grad_norm": 134.71096801757812, "learning_rate": 2.2718299726374786e-06, "loss": 11.6502, "step": 357670 }, { "epoch": 0.7225362298347184, "grad_norm": 215.3944091796875, "learning_rate": 2.271537453870113e-06, "loss": 17.0625, "step": 357680 }, { "epoch": 0.7225564304674023, "grad_norm": 436.587646484375, "learning_rate": 2.271244948400943e-06, "loss": 18.9358, "step": 357690 }, { "epoch": 0.7225766311000861, "grad_norm": 388.4676513671875, "learning_rate": 2.2709524562313923e-06, "loss": 26.0947, "step": 357700 }, { "epoch": 0.7225968317327699, "grad_norm": 602.5151977539062, "learning_rate": 2.2706599773628906e-06, "loss": 13.6967, "step": 357710 }, { "epoch": 0.7226170323654537, "grad_norm": 531.5065307617188, "learning_rate": 2.270367511796859e-06, "loss": 22.3433, "step": 357720 }, { "epoch": 0.7226372329981375, "grad_norm": 413.5867004394531, "learning_rate": 2.2700750595347263e-06, "loss": 14.5211, "step": 357730 }, { "epoch": 0.7226574336308214, "grad_norm": 352.24969482421875, "learning_rate": 2.2697826205779178e-06, "loss": 8.6732, "step": 357740 }, { "epoch": 0.7226776342635052, "grad_norm": 385.8419494628906, "learning_rate": 2.2694901949278554e-06, "loss": 9.7671, "step": 357750 }, { "epoch": 0.722697834896189, "grad_norm": 489.5587158203125, "learning_rate": 2.269197782585968e-06, "loss": 11.0093, "step": 357760 }, { "epoch": 0.7227180355288728, "grad_norm": 330.181396484375, "learning_rate": 2.26890538355368e-06, "loss": 15.1593, "step": 357770 }, { "epoch": 0.7227382361615566, "grad_norm": 253.468994140625, "learning_rate": 2.2686129978324134e-06, "loss": 13.8927, "step": 357780 }, { "epoch": 0.7227584367942403, "grad_norm": 261.2122802734375, "learning_rate": 2.2683206254235962e-06, "loss": 16.3722, "step": 357790 }, { "epoch": 0.7227786374269242, "grad_norm": 259.9643249511719, "learning_rate": 2.268028266328655e-06, "loss": 14.7177, "step": 357800 }, { "epoch": 0.722798838059608, "grad_norm": 372.77593994140625, "learning_rate": 2.2677359205490122e-06, "loss": 20.6044, "step": 357810 }, { "epoch": 0.7228190386922918, "grad_norm": 205.45460510253906, "learning_rate": 2.267443588086092e-06, "loss": 14.7403, "step": 357820 }, { "epoch": 0.7228392393249756, "grad_norm": 9.897441864013672, "learning_rate": 2.26715126894132e-06, "loss": 13.5756, "step": 357830 }, { "epoch": 0.7228594399576594, "grad_norm": 275.323486328125, "learning_rate": 2.2668589631161246e-06, "loss": 21.7044, "step": 357840 }, { "epoch": 0.7228796405903433, "grad_norm": 136.56597900390625, "learning_rate": 2.2665666706119237e-06, "loss": 20.5228, "step": 357850 }, { "epoch": 0.7228998412230271, "grad_norm": 436.6207580566406, "learning_rate": 2.2662743914301455e-06, "loss": 15.2756, "step": 357860 }, { "epoch": 0.7229200418557109, "grad_norm": 663.1101684570312, "learning_rate": 2.265982125572216e-06, "loss": 23.2945, "step": 357870 }, { "epoch": 0.7229402424883947, "grad_norm": 528.8939819335938, "learning_rate": 2.2656898730395575e-06, "loss": 30.845, "step": 357880 }, { "epoch": 0.7229604431210785, "grad_norm": 165.55613708496094, "learning_rate": 2.2653976338335936e-06, "loss": 17.8566, "step": 357890 }, { "epoch": 0.7229806437537624, "grad_norm": 754.408935546875, "learning_rate": 2.265105407955752e-06, "loss": 45.743, "step": 357900 }, { "epoch": 0.7230008443864462, "grad_norm": 56.507568359375, "learning_rate": 2.2648131954074546e-06, "loss": 30.7881, "step": 357910 }, { "epoch": 0.72302104501913, "grad_norm": 204.65957641601562, "learning_rate": 2.264520996190124e-06, "loss": 12.5634, "step": 357920 }, { "epoch": 0.7230412456518138, "grad_norm": 152.5059356689453, "learning_rate": 2.264228810305189e-06, "loss": 7.7773, "step": 357930 }, { "epoch": 0.7230614462844976, "grad_norm": 373.3321228027344, "learning_rate": 2.2639366377540684e-06, "loss": 17.3394, "step": 357940 }, { "epoch": 0.7230816469171815, "grad_norm": 493.6985168457031, "learning_rate": 2.263644478538191e-06, "loss": 32.958, "step": 357950 }, { "epoch": 0.7231018475498653, "grad_norm": 1540.5264892578125, "learning_rate": 2.263352332658976e-06, "loss": 24.3123, "step": 357960 }, { "epoch": 0.7231220481825491, "grad_norm": 121.92477416992188, "learning_rate": 2.2630602001178524e-06, "loss": 20.463, "step": 357970 }, { "epoch": 0.7231422488152329, "grad_norm": 182.40499877929688, "learning_rate": 2.262768080916241e-06, "loss": 17.9135, "step": 357980 }, { "epoch": 0.7231624494479167, "grad_norm": 377.84173583984375, "learning_rate": 2.2624759750555642e-06, "loss": 14.76, "step": 357990 }, { "epoch": 0.7231826500806006, "grad_norm": 338.09686279296875, "learning_rate": 2.2621838825372496e-06, "loss": 9.3154, "step": 358000 }, { "epoch": 0.7232028507132844, "grad_norm": 426.4371337890625, "learning_rate": 2.2618918033627168e-06, "loss": 25.7333, "step": 358010 }, { "epoch": 0.7232230513459682, "grad_norm": 198.6202392578125, "learning_rate": 2.2615997375333926e-06, "loss": 8.7386, "step": 358020 }, { "epoch": 0.723243251978652, "grad_norm": 73.05398559570312, "learning_rate": 2.2613076850506997e-06, "loss": 17.6739, "step": 358030 }, { "epoch": 0.7232634526113357, "grad_norm": 131.81849670410156, "learning_rate": 2.261015645916059e-06, "loss": 12.5919, "step": 358040 }, { "epoch": 0.7232836532440196, "grad_norm": 698.3875122070312, "learning_rate": 2.2607236201308974e-06, "loss": 23.4516, "step": 358050 }, { "epoch": 0.7233038538767034, "grad_norm": 333.82537841796875, "learning_rate": 2.260431607696637e-06, "loss": 17.0055, "step": 358060 }, { "epoch": 0.7233240545093872, "grad_norm": 268.1274108886719, "learning_rate": 2.260139608614699e-06, "loss": 24.7118, "step": 358070 }, { "epoch": 0.723344255142071, "grad_norm": 542.8685302734375, "learning_rate": 2.2598476228865078e-06, "loss": 19.3529, "step": 358080 }, { "epoch": 0.7233644557747548, "grad_norm": 201.8508758544922, "learning_rate": 2.2595556505134885e-06, "loss": 11.3707, "step": 358090 }, { "epoch": 0.7233846564074387, "grad_norm": 434.41180419921875, "learning_rate": 2.2592636914970633e-06, "loss": 14.799, "step": 358100 }, { "epoch": 0.7234048570401225, "grad_norm": 118.51235961914062, "learning_rate": 2.258971745838652e-06, "loss": 12.1212, "step": 358110 }, { "epoch": 0.7234250576728063, "grad_norm": 507.77349853515625, "learning_rate": 2.2586798135396824e-06, "loss": 15.2521, "step": 358120 }, { "epoch": 0.7234452583054901, "grad_norm": 77.28484344482422, "learning_rate": 2.258387894601575e-06, "loss": 7.3264, "step": 358130 }, { "epoch": 0.7234654589381739, "grad_norm": 337.06976318359375, "learning_rate": 2.2580959890257496e-06, "loss": 17.3473, "step": 358140 }, { "epoch": 0.7234856595708578, "grad_norm": 400.00372314453125, "learning_rate": 2.2578040968136326e-06, "loss": 26.0037, "step": 358150 }, { "epoch": 0.7235058602035416, "grad_norm": 315.7028503417969, "learning_rate": 2.25751221796665e-06, "loss": 22.5793, "step": 358160 }, { "epoch": 0.7235260608362254, "grad_norm": 118.08807373046875, "learning_rate": 2.257220352486216e-06, "loss": 18.7497, "step": 358170 }, { "epoch": 0.7235462614689092, "grad_norm": 493.3076171875, "learning_rate": 2.2569285003737567e-06, "loss": 15.3999, "step": 358180 }, { "epoch": 0.723566462101593, "grad_norm": 480.72161865234375, "learning_rate": 2.256636661630698e-06, "loss": 10.4665, "step": 358190 }, { "epoch": 0.7235866627342769, "grad_norm": 428.39788818359375, "learning_rate": 2.256344836258459e-06, "loss": 17.419, "step": 358200 }, { "epoch": 0.7236068633669607, "grad_norm": 467.6040954589844, "learning_rate": 2.2560530242584604e-06, "loss": 16.9549, "step": 358210 }, { "epoch": 0.7236270639996445, "grad_norm": 719.1596069335938, "learning_rate": 2.255761225632129e-06, "loss": 28.8495, "step": 358220 }, { "epoch": 0.7236472646323283, "grad_norm": 287.503173828125, "learning_rate": 2.255469440380885e-06, "loss": 19.0372, "step": 358230 }, { "epoch": 0.7236674652650121, "grad_norm": 181.43899536132812, "learning_rate": 2.255177668506147e-06, "loss": 12.2521, "step": 358240 }, { "epoch": 0.723687665897696, "grad_norm": 32.46775817871094, "learning_rate": 2.254885910009341e-06, "loss": 10.1338, "step": 358250 }, { "epoch": 0.7237078665303798, "grad_norm": 760.5326538085938, "learning_rate": 2.2545941648918897e-06, "loss": 23.1847, "step": 358260 }, { "epoch": 0.7237280671630636, "grad_norm": 41.78430938720703, "learning_rate": 2.2543024331552133e-06, "loss": 19.854, "step": 358270 }, { "epoch": 0.7237482677957474, "grad_norm": 402.6669006347656, "learning_rate": 2.2540107148007316e-06, "loss": 20.7295, "step": 358280 }, { "epoch": 0.7237684684284312, "grad_norm": 19.97368812561035, "learning_rate": 2.253719009829871e-06, "loss": 8.8038, "step": 358290 }, { "epoch": 0.7237886690611149, "grad_norm": 168.602294921875, "learning_rate": 2.2534273182440515e-06, "loss": 12.4717, "step": 358300 }, { "epoch": 0.7238088696937988, "grad_norm": 44.03022003173828, "learning_rate": 2.2531356400446913e-06, "loss": 12.1233, "step": 358310 }, { "epoch": 0.7238290703264826, "grad_norm": 20.101224899291992, "learning_rate": 2.252843975233217e-06, "loss": 13.4733, "step": 358320 }, { "epoch": 0.7238492709591664, "grad_norm": 644.1679077148438, "learning_rate": 2.2525523238110465e-06, "loss": 17.2313, "step": 358330 }, { "epoch": 0.7238694715918502, "grad_norm": 230.2259979248047, "learning_rate": 2.2522606857796036e-06, "loss": 12.693, "step": 358340 }, { "epoch": 0.723889672224534, "grad_norm": 1635.599853515625, "learning_rate": 2.25196906114031e-06, "loss": 17.1778, "step": 358350 }, { "epoch": 0.7239098728572179, "grad_norm": 440.3619689941406, "learning_rate": 2.251677449894583e-06, "loss": 21.9231, "step": 358360 }, { "epoch": 0.7239300734899017, "grad_norm": 32.30497741699219, "learning_rate": 2.2513858520438497e-06, "loss": 9.506, "step": 358370 }, { "epoch": 0.7239502741225855, "grad_norm": 501.3995361328125, "learning_rate": 2.2510942675895277e-06, "loss": 14.3097, "step": 358380 }, { "epoch": 0.7239704747552693, "grad_norm": 821.6167602539062, "learning_rate": 2.250802696533037e-06, "loss": 24.4095, "step": 358390 }, { "epoch": 0.7239906753879531, "grad_norm": 449.1909484863281, "learning_rate": 2.250511138875801e-06, "loss": 7.4873, "step": 358400 }, { "epoch": 0.724010876020637, "grad_norm": 615.5599975585938, "learning_rate": 2.250219594619242e-06, "loss": 17.3436, "step": 358410 }, { "epoch": 0.7240310766533208, "grad_norm": 159.68350219726562, "learning_rate": 2.2499280637647785e-06, "loss": 22.6658, "step": 358420 }, { "epoch": 0.7240512772860046, "grad_norm": 319.6164855957031, "learning_rate": 2.249636546313831e-06, "loss": 19.9201, "step": 358430 }, { "epoch": 0.7240714779186884, "grad_norm": 166.0934295654297, "learning_rate": 2.2493450422678224e-06, "loss": 25.8329, "step": 358440 }, { "epoch": 0.7240916785513722, "grad_norm": 441.7103576660156, "learning_rate": 2.249053551628173e-06, "loss": 22.1586, "step": 358450 }, { "epoch": 0.7241118791840561, "grad_norm": 322.5674743652344, "learning_rate": 2.248762074396301e-06, "loss": 27.2304, "step": 358460 }, { "epoch": 0.7241320798167399, "grad_norm": 541.81787109375, "learning_rate": 2.2484706105736294e-06, "loss": 12.4397, "step": 358470 }, { "epoch": 0.7241522804494237, "grad_norm": 408.0065612792969, "learning_rate": 2.2481791601615797e-06, "loss": 12.977, "step": 358480 }, { "epoch": 0.7241724810821075, "grad_norm": 417.0727844238281, "learning_rate": 2.247887723161571e-06, "loss": 30.8202, "step": 358490 }, { "epoch": 0.7241926817147913, "grad_norm": 809.7229614257812, "learning_rate": 2.2475962995750224e-06, "loss": 18.3792, "step": 358500 }, { "epoch": 0.7242128823474752, "grad_norm": 109.99610900878906, "learning_rate": 2.2473048894033566e-06, "loss": 8.9393, "step": 358510 }, { "epoch": 0.724233082980159, "grad_norm": 638.1373901367188, "learning_rate": 2.247013492647994e-06, "loss": 20.4283, "step": 358520 }, { "epoch": 0.7242532836128428, "grad_norm": 411.397216796875, "learning_rate": 2.246722109310351e-06, "loss": 17.646, "step": 358530 }, { "epoch": 0.7242734842455266, "grad_norm": 500.1414489746094, "learning_rate": 2.2464307393918523e-06, "loss": 26.2394, "step": 358540 }, { "epoch": 0.7242936848782103, "grad_norm": 370.89263916015625, "learning_rate": 2.246139382893915e-06, "loss": 26.8334, "step": 358550 }, { "epoch": 0.7243138855108942, "grad_norm": 463.3133850097656, "learning_rate": 2.2458480398179615e-06, "loss": 37.4234, "step": 358560 }, { "epoch": 0.724334086143578, "grad_norm": 316.4405212402344, "learning_rate": 2.245556710165409e-06, "loss": 13.3972, "step": 358570 }, { "epoch": 0.7243542867762618, "grad_norm": 553.1693115234375, "learning_rate": 2.245265393937681e-06, "loss": 22.1872, "step": 358580 }, { "epoch": 0.7243744874089456, "grad_norm": 591.702392578125, "learning_rate": 2.2449740911361955e-06, "loss": 20.8448, "step": 358590 }, { "epoch": 0.7243946880416294, "grad_norm": 276.9403991699219, "learning_rate": 2.24468280176237e-06, "loss": 18.095, "step": 358600 }, { "epoch": 0.7244148886743133, "grad_norm": 455.9407653808594, "learning_rate": 2.2443915258176283e-06, "loss": 18.1615, "step": 358610 }, { "epoch": 0.7244350893069971, "grad_norm": 66.06970977783203, "learning_rate": 2.2441002633033865e-06, "loss": 12.1589, "step": 358620 }, { "epoch": 0.7244552899396809, "grad_norm": 451.0813293457031, "learning_rate": 2.243809014221068e-06, "loss": 19.9946, "step": 358630 }, { "epoch": 0.7244754905723647, "grad_norm": 7.21042013168335, "learning_rate": 2.243517778572089e-06, "loss": 28.7723, "step": 358640 }, { "epoch": 0.7244956912050485, "grad_norm": 191.8756866455078, "learning_rate": 2.2432265563578686e-06, "loss": 7.0977, "step": 358650 }, { "epoch": 0.7245158918377324, "grad_norm": 216.77684020996094, "learning_rate": 2.2429353475798298e-06, "loss": 7.8373, "step": 358660 }, { "epoch": 0.7245360924704162, "grad_norm": 332.76300048828125, "learning_rate": 2.2426441522393893e-06, "loss": 15.3892, "step": 358670 }, { "epoch": 0.7245562931031, "grad_norm": 317.4374084472656, "learning_rate": 2.2423529703379646e-06, "loss": 23.7859, "step": 358680 }, { "epoch": 0.7245764937357838, "grad_norm": 66.8053970336914, "learning_rate": 2.242061801876978e-06, "loss": 9.3861, "step": 358690 }, { "epoch": 0.7245966943684676, "grad_norm": 214.3506317138672, "learning_rate": 2.2417706468578495e-06, "loss": 16.2656, "step": 358700 }, { "epoch": 0.7246168950011515, "grad_norm": 537.5006103515625, "learning_rate": 2.2414795052819956e-06, "loss": 32.6843, "step": 358710 }, { "epoch": 0.7246370956338353, "grad_norm": 456.9745178222656, "learning_rate": 2.241188377150834e-06, "loss": 24.5563, "step": 358720 }, { "epoch": 0.7246572962665191, "grad_norm": 153.896484375, "learning_rate": 2.240897262465788e-06, "loss": 22.6297, "step": 358730 }, { "epoch": 0.7246774968992029, "grad_norm": 14.554478645324707, "learning_rate": 2.240606161228274e-06, "loss": 12.3954, "step": 358740 }, { "epoch": 0.7246976975318867, "grad_norm": 398.4756164550781, "learning_rate": 2.2403150734397095e-06, "loss": 16.2786, "step": 358750 }, { "epoch": 0.7247178981645706, "grad_norm": 197.67860412597656, "learning_rate": 2.2400239991015144e-06, "loss": 15.9143, "step": 358760 }, { "epoch": 0.7247380987972544, "grad_norm": 453.6308288574219, "learning_rate": 2.239732938215111e-06, "loss": 10.0401, "step": 358770 }, { "epoch": 0.7247582994299382, "grad_norm": 549.3597412109375, "learning_rate": 2.239441890781911e-06, "loss": 14.2409, "step": 358780 }, { "epoch": 0.724778500062622, "grad_norm": 312.85858154296875, "learning_rate": 2.239150856803336e-06, "loss": 16.799, "step": 358790 }, { "epoch": 0.7247987006953058, "grad_norm": 590.366943359375, "learning_rate": 2.2388598362808074e-06, "loss": 27.8185, "step": 358800 }, { "epoch": 0.7248189013279895, "grad_norm": 219.06785583496094, "learning_rate": 2.2385688292157405e-06, "loss": 10.0919, "step": 358810 }, { "epoch": 0.7248391019606734, "grad_norm": 148.28330993652344, "learning_rate": 2.2382778356095524e-06, "loss": 14.3372, "step": 358820 }, { "epoch": 0.7248593025933572, "grad_norm": 195.47874450683594, "learning_rate": 2.2379868554636653e-06, "loss": 15.591, "step": 358830 }, { "epoch": 0.724879503226041, "grad_norm": 206.07144165039062, "learning_rate": 2.2376958887794953e-06, "loss": 18.501, "step": 358840 }, { "epoch": 0.7248997038587248, "grad_norm": 447.396728515625, "learning_rate": 2.2374049355584583e-06, "loss": 9.9212, "step": 358850 }, { "epoch": 0.7249199044914086, "grad_norm": 368.84735107421875, "learning_rate": 2.237113995801975e-06, "loss": 36.9193, "step": 358860 }, { "epoch": 0.7249401051240925, "grad_norm": 2.169508695602417, "learning_rate": 2.2368230695114644e-06, "loss": 29.4848, "step": 358870 }, { "epoch": 0.7249603057567763, "grad_norm": 60.03493118286133, "learning_rate": 2.2365321566883437e-06, "loss": 28.9763, "step": 358880 }, { "epoch": 0.7249805063894601, "grad_norm": 682.3919067382812, "learning_rate": 2.2362412573340274e-06, "loss": 22.0694, "step": 358890 }, { "epoch": 0.7250007070221439, "grad_norm": 59.70888137817383, "learning_rate": 2.235950371449938e-06, "loss": 13.3987, "step": 358900 }, { "epoch": 0.7250209076548277, "grad_norm": 578.2684936523438, "learning_rate": 2.235659499037492e-06, "loss": 13.9095, "step": 358910 }, { "epoch": 0.7250411082875116, "grad_norm": 189.75296020507812, "learning_rate": 2.2353686400981038e-06, "loss": 22.0163, "step": 358920 }, { "epoch": 0.7250613089201954, "grad_norm": 378.1733703613281, "learning_rate": 2.235077794633196e-06, "loss": 16.2982, "step": 358930 }, { "epoch": 0.7250815095528792, "grad_norm": 368.48858642578125, "learning_rate": 2.234786962644181e-06, "loss": 15.0167, "step": 358940 }, { "epoch": 0.725101710185563, "grad_norm": 871.95166015625, "learning_rate": 2.2344961441324814e-06, "loss": 21.6351, "step": 358950 }, { "epoch": 0.7251219108182468, "grad_norm": 389.7361755371094, "learning_rate": 2.2342053390995117e-06, "loss": 16.9612, "step": 358960 }, { "epoch": 0.7251421114509307, "grad_norm": 15.658724784851074, "learning_rate": 2.2339145475466885e-06, "loss": 15.0011, "step": 358970 }, { "epoch": 0.7251623120836145, "grad_norm": 202.39169311523438, "learning_rate": 2.2336237694754314e-06, "loss": 24.985, "step": 358980 }, { "epoch": 0.7251825127162983, "grad_norm": 297.51947021484375, "learning_rate": 2.233333004887157e-06, "loss": 15.1063, "step": 358990 }, { "epoch": 0.7252027133489821, "grad_norm": 93.51701354980469, "learning_rate": 2.23304225378328e-06, "loss": 8.9163, "step": 359000 }, { "epoch": 0.7252229139816659, "grad_norm": 467.2501220703125, "learning_rate": 2.2327515161652196e-06, "loss": 15.1077, "step": 359010 }, { "epoch": 0.7252431146143498, "grad_norm": 326.148681640625, "learning_rate": 2.232460792034395e-06, "loss": 15.2704, "step": 359020 }, { "epoch": 0.7252633152470336, "grad_norm": 526.8595581054688, "learning_rate": 2.2321700813922205e-06, "loss": 12.8883, "step": 359030 }, { "epoch": 0.7252835158797174, "grad_norm": 31.045886993408203, "learning_rate": 2.231879384240111e-06, "loss": 22.5578, "step": 359040 }, { "epoch": 0.7253037165124012, "grad_norm": 115.10821533203125, "learning_rate": 2.231588700579488e-06, "loss": 5.4195, "step": 359050 }, { "epoch": 0.7253239171450849, "grad_norm": 341.2144775390625, "learning_rate": 2.2312980304117656e-06, "loss": 13.2967, "step": 359060 }, { "epoch": 0.7253441177777687, "grad_norm": 7.237173557281494, "learning_rate": 2.2310073737383593e-06, "loss": 15.3578, "step": 359070 }, { "epoch": 0.7253643184104526, "grad_norm": 742.6315307617188, "learning_rate": 2.230716730560687e-06, "loss": 21.257, "step": 359080 }, { "epoch": 0.7253845190431364, "grad_norm": 143.5435333251953, "learning_rate": 2.230426100880167e-06, "loss": 19.4306, "step": 359090 }, { "epoch": 0.7254047196758202, "grad_norm": 376.4156494140625, "learning_rate": 2.2301354846982148e-06, "loss": 16.1877, "step": 359100 }, { "epoch": 0.725424920308504, "grad_norm": 538.2612915039062, "learning_rate": 2.2298448820162438e-06, "loss": 12.3151, "step": 359110 }, { "epoch": 0.7254451209411878, "grad_norm": 255.2987518310547, "learning_rate": 2.2295542928356755e-06, "loss": 18.9394, "step": 359120 }, { "epoch": 0.7254653215738717, "grad_norm": 330.2919616699219, "learning_rate": 2.229263717157923e-06, "loss": 12.4454, "step": 359130 }, { "epoch": 0.7254855222065555, "grad_norm": 315.2470397949219, "learning_rate": 2.2289731549844018e-06, "loss": 42.2872, "step": 359140 }, { "epoch": 0.7255057228392393, "grad_norm": 715.8504638671875, "learning_rate": 2.228682606316529e-06, "loss": 18.0506, "step": 359150 }, { "epoch": 0.7255259234719231, "grad_norm": 201.93016052246094, "learning_rate": 2.2283920711557226e-06, "loss": 17.994, "step": 359160 }, { "epoch": 0.725546124104607, "grad_norm": 697.1846923828125, "learning_rate": 2.2281015495033975e-06, "loss": 16.3907, "step": 359170 }, { "epoch": 0.7255663247372908, "grad_norm": 544.1822509765625, "learning_rate": 2.227811041360967e-06, "loss": 25.6777, "step": 359180 }, { "epoch": 0.7255865253699746, "grad_norm": 347.1976318359375, "learning_rate": 2.2275205467298515e-06, "loss": 24.672, "step": 359190 }, { "epoch": 0.7256067260026584, "grad_norm": 760.2865600585938, "learning_rate": 2.2272300656114648e-06, "loss": 18.438, "step": 359200 }, { "epoch": 0.7256269266353422, "grad_norm": 488.0180969238281, "learning_rate": 2.2269395980072206e-06, "loss": 21.7027, "step": 359210 }, { "epoch": 0.725647127268026, "grad_norm": 682.90966796875, "learning_rate": 2.226649143918538e-06, "loss": 17.3114, "step": 359220 }, { "epoch": 0.7256673279007099, "grad_norm": 1414.949462890625, "learning_rate": 2.2263587033468293e-06, "loss": 18.8927, "step": 359230 }, { "epoch": 0.7256875285333937, "grad_norm": 313.8021240234375, "learning_rate": 2.2260682762935137e-06, "loss": 14.85, "step": 359240 }, { "epoch": 0.7257077291660775, "grad_norm": 1083.0970458984375, "learning_rate": 2.2257778627600044e-06, "loss": 21.5205, "step": 359250 }, { "epoch": 0.7257279297987613, "grad_norm": 270.201416015625, "learning_rate": 2.2254874627477164e-06, "loss": 14.2119, "step": 359260 }, { "epoch": 0.7257481304314451, "grad_norm": 263.2742004394531, "learning_rate": 2.2251970762580675e-06, "loss": 19.2875, "step": 359270 }, { "epoch": 0.725768331064129, "grad_norm": 40.12912368774414, "learning_rate": 2.2249067032924715e-06, "loss": 23.795, "step": 359280 }, { "epoch": 0.7257885316968128, "grad_norm": 365.149658203125, "learning_rate": 2.2246163438523417e-06, "loss": 17.3139, "step": 359290 }, { "epoch": 0.7258087323294966, "grad_norm": 222.59262084960938, "learning_rate": 2.224325997939095e-06, "loss": 12.1256, "step": 359300 }, { "epoch": 0.7258289329621804, "grad_norm": 289.6069030761719, "learning_rate": 2.2240356655541488e-06, "loss": 19.4232, "step": 359310 }, { "epoch": 0.7258491335948641, "grad_norm": 27.476863861083984, "learning_rate": 2.223745346698917e-06, "loss": 10.5382, "step": 359320 }, { "epoch": 0.725869334227548, "grad_norm": 269.6813659667969, "learning_rate": 2.2234550413748106e-06, "loss": 17.7383, "step": 359330 }, { "epoch": 0.7258895348602318, "grad_norm": 1166.08056640625, "learning_rate": 2.2231647495832496e-06, "loss": 22.4043, "step": 359340 }, { "epoch": 0.7259097354929156, "grad_norm": 1.4344022274017334, "learning_rate": 2.222874471325647e-06, "loss": 14.3152, "step": 359350 }, { "epoch": 0.7259299361255994, "grad_norm": 540.9111328125, "learning_rate": 2.222584206603416e-06, "loss": 16.838, "step": 359360 }, { "epoch": 0.7259501367582832, "grad_norm": 253.33688354492188, "learning_rate": 2.222293955417972e-06, "loss": 16.5393, "step": 359370 }, { "epoch": 0.7259703373909671, "grad_norm": 393.6908874511719, "learning_rate": 2.2220037177707342e-06, "loss": 21.5705, "step": 359380 }, { "epoch": 0.7259905380236509, "grad_norm": 423.1177062988281, "learning_rate": 2.2217134936631095e-06, "loss": 16.9682, "step": 359390 }, { "epoch": 0.7260107386563347, "grad_norm": 285.3522644042969, "learning_rate": 2.221423283096517e-06, "loss": 13.07, "step": 359400 }, { "epoch": 0.7260309392890185, "grad_norm": 474.0448303222656, "learning_rate": 2.221133086072372e-06, "loss": 23.2813, "step": 359410 }, { "epoch": 0.7260511399217023, "grad_norm": 299.09234619140625, "learning_rate": 2.220842902592087e-06, "loss": 16.1896, "step": 359420 }, { "epoch": 0.7260713405543862, "grad_norm": 10.753003120422363, "learning_rate": 2.220552732657075e-06, "loss": 17.9483, "step": 359430 }, { "epoch": 0.72609154118707, "grad_norm": 603.6778564453125, "learning_rate": 2.2202625762687533e-06, "loss": 14.1939, "step": 359440 }, { "epoch": 0.7261117418197538, "grad_norm": 158.3985595703125, "learning_rate": 2.219972433428535e-06, "loss": 15.2814, "step": 359450 }, { "epoch": 0.7261319424524376, "grad_norm": 170.3075408935547, "learning_rate": 2.2196823041378325e-06, "loss": 12.2966, "step": 359460 }, { "epoch": 0.7261521430851214, "grad_norm": 190.19564819335938, "learning_rate": 2.21939218839806e-06, "loss": 7.367, "step": 359470 }, { "epoch": 0.7261723437178053, "grad_norm": 199.4613037109375, "learning_rate": 2.2191020862106353e-06, "loss": 15.0406, "step": 359480 }, { "epoch": 0.7261925443504891, "grad_norm": 362.06243896484375, "learning_rate": 2.21881199757697e-06, "loss": 25.7381, "step": 359490 }, { "epoch": 0.7262127449831729, "grad_norm": 168.42156982421875, "learning_rate": 2.218521922498476e-06, "loss": 20.4386, "step": 359500 }, { "epoch": 0.7262329456158567, "grad_norm": 173.8197784423828, "learning_rate": 2.2182318609765703e-06, "loss": 13.2371, "step": 359510 }, { "epoch": 0.7262531462485405, "grad_norm": 264.0271301269531, "learning_rate": 2.217941813012665e-06, "loss": 32.4108, "step": 359520 }, { "epoch": 0.7262733468812244, "grad_norm": 69.82245635986328, "learning_rate": 2.217651778608172e-06, "loss": 15.0553, "step": 359530 }, { "epoch": 0.7262935475139082, "grad_norm": 291.6565246582031, "learning_rate": 2.217361757764509e-06, "loss": 6.4934, "step": 359540 }, { "epoch": 0.726313748146592, "grad_norm": 569.568359375, "learning_rate": 2.217071750483085e-06, "loss": 14.8901, "step": 359550 }, { "epoch": 0.7263339487792758, "grad_norm": 269.9482727050781, "learning_rate": 2.2167817567653176e-06, "loss": 15.6459, "step": 359560 }, { "epoch": 0.7263541494119596, "grad_norm": 1034.3734130859375, "learning_rate": 2.216491776612619e-06, "loss": 29.7321, "step": 359570 }, { "epoch": 0.7263743500446433, "grad_norm": 213.13063049316406, "learning_rate": 2.2162018100263995e-06, "loss": 57.3121, "step": 359580 }, { "epoch": 0.7263945506773272, "grad_norm": 59.16593551635742, "learning_rate": 2.215911857008077e-06, "loss": 11.3962, "step": 359590 }, { "epoch": 0.726414751310011, "grad_norm": 550.7051391601562, "learning_rate": 2.2156219175590623e-06, "loss": 19.9069, "step": 359600 }, { "epoch": 0.7264349519426948, "grad_norm": 348.9745788574219, "learning_rate": 2.215331991680766e-06, "loss": 11.6251, "step": 359610 }, { "epoch": 0.7264551525753786, "grad_norm": 405.37017822265625, "learning_rate": 2.215042079374605e-06, "loss": 12.169, "step": 359620 }, { "epoch": 0.7264753532080624, "grad_norm": 200.28878784179688, "learning_rate": 2.214752180641992e-06, "loss": 17.7441, "step": 359630 }, { "epoch": 0.7264955538407463, "grad_norm": 612.6473388671875, "learning_rate": 2.2144622954843396e-06, "loss": 24.8813, "step": 359640 }, { "epoch": 0.7265157544734301, "grad_norm": 311.94775390625, "learning_rate": 2.214172423903058e-06, "loss": 19.8329, "step": 359650 }, { "epoch": 0.7265359551061139, "grad_norm": 314.4627990722656, "learning_rate": 2.2138825658995645e-06, "loss": 20.2682, "step": 359660 }, { "epoch": 0.7265561557387977, "grad_norm": 598.5054931640625, "learning_rate": 2.213592721475269e-06, "loss": 17.5923, "step": 359670 }, { "epoch": 0.7265763563714815, "grad_norm": 406.8619384765625, "learning_rate": 2.213302890631583e-06, "loss": 14.9745, "step": 359680 }, { "epoch": 0.7265965570041654, "grad_norm": 597.0632934570312, "learning_rate": 2.2130130733699206e-06, "loss": 24.9578, "step": 359690 }, { "epoch": 0.7266167576368492, "grad_norm": 425.6470947265625, "learning_rate": 2.212723269691697e-06, "loss": 22.5632, "step": 359700 }, { "epoch": 0.726636958269533, "grad_norm": 408.3787536621094, "learning_rate": 2.212433479598321e-06, "loss": 10.5233, "step": 359710 }, { "epoch": 0.7266571589022168, "grad_norm": 617.8192138671875, "learning_rate": 2.2121437030912045e-06, "loss": 19.0784, "step": 359720 }, { "epoch": 0.7266773595349006, "grad_norm": 407.09674072265625, "learning_rate": 2.2118539401717636e-06, "loss": 28.2398, "step": 359730 }, { "epoch": 0.7266975601675845, "grad_norm": 458.1068420410156, "learning_rate": 2.2115641908414087e-06, "loss": 22.6853, "step": 359740 }, { "epoch": 0.7267177608002683, "grad_norm": 517.1664428710938, "learning_rate": 2.2112744551015496e-06, "loss": 13.1747, "step": 359750 }, { "epoch": 0.7267379614329521, "grad_norm": 1.197109341621399, "learning_rate": 2.2109847329536005e-06, "loss": 10.3271, "step": 359760 }, { "epoch": 0.7267581620656359, "grad_norm": 245.50851440429688, "learning_rate": 2.2106950243989754e-06, "loss": 12.7855, "step": 359770 }, { "epoch": 0.7267783626983197, "grad_norm": 318.2196960449219, "learning_rate": 2.2104053294390847e-06, "loss": 22.7982, "step": 359780 }, { "epoch": 0.7267985633310036, "grad_norm": 89.09252166748047, "learning_rate": 2.210115648075338e-06, "loss": 13.4377, "step": 359790 }, { "epoch": 0.7268187639636874, "grad_norm": 324.0706787109375, "learning_rate": 2.209825980309151e-06, "loss": 19.3334, "step": 359800 }, { "epoch": 0.7268389645963712, "grad_norm": 443.2936706542969, "learning_rate": 2.209536326141934e-06, "loss": 14.9058, "step": 359810 }, { "epoch": 0.726859165229055, "grad_norm": 1070.38720703125, "learning_rate": 2.2092466855750966e-06, "loss": 23.2808, "step": 359820 }, { "epoch": 0.7268793658617387, "grad_norm": 216.96498107910156, "learning_rate": 2.2089570586100545e-06, "loss": 14.0101, "step": 359830 }, { "epoch": 0.7268995664944226, "grad_norm": 445.51409912109375, "learning_rate": 2.208667445248215e-06, "loss": 11.5647, "step": 359840 }, { "epoch": 0.7269197671271064, "grad_norm": 454.36480712890625, "learning_rate": 2.208377845490994e-06, "loss": 28.9318, "step": 359850 }, { "epoch": 0.7269399677597902, "grad_norm": 401.3663635253906, "learning_rate": 2.2080882593398e-06, "loss": 24.741, "step": 359860 }, { "epoch": 0.726960168392474, "grad_norm": 186.1900634765625, "learning_rate": 2.2077986867960436e-06, "loss": 16.7676, "step": 359870 }, { "epoch": 0.7269803690251578, "grad_norm": 250.18862915039062, "learning_rate": 2.20750912786114e-06, "loss": 15.4508, "step": 359880 }, { "epoch": 0.7270005696578417, "grad_norm": 360.76416015625, "learning_rate": 2.2072195825364983e-06, "loss": 23.8939, "step": 359890 }, { "epoch": 0.7270207702905255, "grad_norm": 297.14190673828125, "learning_rate": 2.2069300508235273e-06, "loss": 15.5774, "step": 359900 }, { "epoch": 0.7270409709232093, "grad_norm": 65.08010864257812, "learning_rate": 2.2066405327236413e-06, "loss": 10.4941, "step": 359910 }, { "epoch": 0.7270611715558931, "grad_norm": 192.5146942138672, "learning_rate": 2.2063510282382517e-06, "loss": 21.6264, "step": 359920 }, { "epoch": 0.7270813721885769, "grad_norm": 142.77598571777344, "learning_rate": 2.206061537368768e-06, "loss": 17.3337, "step": 359930 }, { "epoch": 0.7271015728212608, "grad_norm": 297.79437255859375, "learning_rate": 2.2057720601166004e-06, "loss": 18.7999, "step": 359940 }, { "epoch": 0.7271217734539446, "grad_norm": 270.64678955078125, "learning_rate": 2.2054825964831627e-06, "loss": 30.1399, "step": 359950 }, { "epoch": 0.7271419740866284, "grad_norm": 384.5345153808594, "learning_rate": 2.2051931464698636e-06, "loss": 21.4343, "step": 359960 }, { "epoch": 0.7271621747193122, "grad_norm": 163.68165588378906, "learning_rate": 2.2049037100781125e-06, "loss": 8.9799, "step": 359970 }, { "epoch": 0.727182375351996, "grad_norm": 129.2692413330078, "learning_rate": 2.204614287309321e-06, "loss": 21.4616, "step": 359980 }, { "epoch": 0.7272025759846799, "grad_norm": 555.4015502929688, "learning_rate": 2.204324878164905e-06, "loss": 17.6524, "step": 359990 }, { "epoch": 0.7272227766173637, "grad_norm": 461.436279296875, "learning_rate": 2.204035482646267e-06, "loss": 16.2615, "step": 360000 }, { "epoch": 0.7272429772500475, "grad_norm": 596.1917724609375, "learning_rate": 2.20374610075482e-06, "loss": 30.5351, "step": 360010 }, { "epoch": 0.7272631778827313, "grad_norm": 264.5061340332031, "learning_rate": 2.2034567324919774e-06, "loss": 5.905, "step": 360020 }, { "epoch": 0.7272833785154151, "grad_norm": 446.2514953613281, "learning_rate": 2.2031673778591477e-06, "loss": 20.922, "step": 360030 }, { "epoch": 0.727303579148099, "grad_norm": 235.1972198486328, "learning_rate": 2.2028780368577395e-06, "loss": 22.4989, "step": 360040 }, { "epoch": 0.7273237797807828, "grad_norm": 25.34808921813965, "learning_rate": 2.2025887094891657e-06, "loss": 19.9513, "step": 360050 }, { "epoch": 0.7273439804134666, "grad_norm": 333.74322509765625, "learning_rate": 2.202299395754836e-06, "loss": 13.5665, "step": 360060 }, { "epoch": 0.7273641810461504, "grad_norm": 451.43218994140625, "learning_rate": 2.2020100956561576e-06, "loss": 23.9477, "step": 360070 }, { "epoch": 0.7273843816788342, "grad_norm": 320.87115478515625, "learning_rate": 2.201720809194542e-06, "loss": 11.5491, "step": 360080 }, { "epoch": 0.7274045823115179, "grad_norm": 331.8887023925781, "learning_rate": 2.201431536371402e-06, "loss": 13.0425, "step": 360090 }, { "epoch": 0.7274247829442018, "grad_norm": 295.6164245605469, "learning_rate": 2.201142277188146e-06, "loss": 21.2351, "step": 360100 }, { "epoch": 0.7274449835768856, "grad_norm": 327.21002197265625, "learning_rate": 2.20085303164618e-06, "loss": 36.5531, "step": 360110 }, { "epoch": 0.7274651842095694, "grad_norm": 538.5135498046875, "learning_rate": 2.2005637997469194e-06, "loss": 23.5763, "step": 360120 }, { "epoch": 0.7274853848422532, "grad_norm": 529.24951171875, "learning_rate": 2.2002745814917716e-06, "loss": 21.429, "step": 360130 }, { "epoch": 0.727505585474937, "grad_norm": 602.7332763671875, "learning_rate": 2.1999853768821433e-06, "loss": 18.329, "step": 360140 }, { "epoch": 0.7275257861076209, "grad_norm": 626.5838623046875, "learning_rate": 2.1996961859194487e-06, "loss": 12.5332, "step": 360150 }, { "epoch": 0.7275459867403047, "grad_norm": 685.1061401367188, "learning_rate": 2.1994070086050937e-06, "loss": 18.306, "step": 360160 }, { "epoch": 0.7275661873729885, "grad_norm": 506.77349853515625, "learning_rate": 2.199117844940491e-06, "loss": 13.3728, "step": 360170 }, { "epoch": 0.7275863880056723, "grad_norm": 92.83052062988281, "learning_rate": 2.198828694927048e-06, "loss": 18.4025, "step": 360180 }, { "epoch": 0.7276065886383561, "grad_norm": 318.7611083984375, "learning_rate": 2.198539558566173e-06, "loss": 29.3137, "step": 360190 }, { "epoch": 0.72762678927104, "grad_norm": 338.2325744628906, "learning_rate": 2.1982504358592777e-06, "loss": 28.2511, "step": 360200 }, { "epoch": 0.7276469899037238, "grad_norm": 525.8775024414062, "learning_rate": 2.1979613268077684e-06, "loss": 10.6956, "step": 360210 }, { "epoch": 0.7276671905364076, "grad_norm": 461.5638122558594, "learning_rate": 2.1976722314130576e-06, "loss": 11.5493, "step": 360220 }, { "epoch": 0.7276873911690914, "grad_norm": 217.5858917236328, "learning_rate": 2.1973831496765503e-06, "loss": 16.0767, "step": 360230 }, { "epoch": 0.7277075918017752, "grad_norm": 566.3673095703125, "learning_rate": 2.1970940815996592e-06, "loss": 17.7488, "step": 360240 }, { "epoch": 0.7277277924344591, "grad_norm": 407.6756286621094, "learning_rate": 2.1968050271837926e-06, "loss": 15.9521, "step": 360250 }, { "epoch": 0.7277479930671429, "grad_norm": 502.3351135253906, "learning_rate": 2.196515986430356e-06, "loss": 34.51, "step": 360260 }, { "epoch": 0.7277681936998267, "grad_norm": 275.16143798828125, "learning_rate": 2.196226959340762e-06, "loss": 6.6301, "step": 360270 }, { "epoch": 0.7277883943325105, "grad_norm": 381.4097595214844, "learning_rate": 2.195937945916418e-06, "loss": 19.1713, "step": 360280 }, { "epoch": 0.7278085949651943, "grad_norm": 1045.807861328125, "learning_rate": 2.1956489461587307e-06, "loss": 24.8398, "step": 360290 }, { "epoch": 0.7278287955978782, "grad_norm": 420.18475341796875, "learning_rate": 2.19535996006911e-06, "loss": 22.2127, "step": 360300 }, { "epoch": 0.727848996230562, "grad_norm": 378.78338623046875, "learning_rate": 2.195070987648966e-06, "loss": 22.7336, "step": 360310 }, { "epoch": 0.7278691968632458, "grad_norm": 132.0506134033203, "learning_rate": 2.1947820288997067e-06, "loss": 8.3921, "step": 360320 }, { "epoch": 0.7278893974959296, "grad_norm": 358.63775634765625, "learning_rate": 2.1944930838227374e-06, "loss": 12.0968, "step": 360330 }, { "epoch": 0.7279095981286133, "grad_norm": 411.3817443847656, "learning_rate": 2.1942041524194705e-06, "loss": 19.8867, "step": 360340 }, { "epoch": 0.7279297987612972, "grad_norm": 130.55499267578125, "learning_rate": 2.193915234691312e-06, "loss": 12.3914, "step": 360350 }, { "epoch": 0.727949999393981, "grad_norm": 60.1270637512207, "learning_rate": 2.1936263306396688e-06, "loss": 14.733, "step": 360360 }, { "epoch": 0.7279702000266648, "grad_norm": 4.043726921081543, "learning_rate": 2.1933374402659502e-06, "loss": 27.5651, "step": 360370 }, { "epoch": 0.7279904006593486, "grad_norm": 363.79461669921875, "learning_rate": 2.1930485635715665e-06, "loss": 19.1083, "step": 360380 }, { "epoch": 0.7280106012920324, "grad_norm": 241.27272033691406, "learning_rate": 2.1927597005579236e-06, "loss": 19.0199, "step": 360390 }, { "epoch": 0.7280308019247163, "grad_norm": 1405.54638671875, "learning_rate": 2.192470851226428e-06, "loss": 20.4921, "step": 360400 }, { "epoch": 0.7280510025574001, "grad_norm": 268.7628479003906, "learning_rate": 2.19218201557849e-06, "loss": 13.2207, "step": 360410 }, { "epoch": 0.7280712031900839, "grad_norm": 589.1351318359375, "learning_rate": 2.1918931936155167e-06, "loss": 22.0355, "step": 360420 }, { "epoch": 0.7280914038227677, "grad_norm": 483.664794921875, "learning_rate": 2.191604385338914e-06, "loss": 16.1735, "step": 360430 }, { "epoch": 0.7281116044554515, "grad_norm": 0.7184778451919556, "learning_rate": 2.1913155907500923e-06, "loss": 17.3139, "step": 360440 }, { "epoch": 0.7281318050881354, "grad_norm": 99.97135162353516, "learning_rate": 2.1910268098504562e-06, "loss": 8.8662, "step": 360450 }, { "epoch": 0.7281520057208192, "grad_norm": 220.3773956298828, "learning_rate": 2.190738042641416e-06, "loss": 10.7225, "step": 360460 }, { "epoch": 0.728172206353503, "grad_norm": 187.96905517578125, "learning_rate": 2.1904492891243785e-06, "loss": 26.9043, "step": 360470 }, { "epoch": 0.7281924069861868, "grad_norm": 525.996337890625, "learning_rate": 2.190160549300748e-06, "loss": 21.2937, "step": 360480 }, { "epoch": 0.7282126076188706, "grad_norm": 152.04421997070312, "learning_rate": 2.189871823171936e-06, "loss": 10.7987, "step": 360490 }, { "epoch": 0.7282328082515545, "grad_norm": 235.4153594970703, "learning_rate": 2.1895831107393485e-06, "loss": 17.4104, "step": 360500 }, { "epoch": 0.7282530088842383, "grad_norm": 248.13519287109375, "learning_rate": 2.18929441200439e-06, "loss": 24.46, "step": 360510 }, { "epoch": 0.7282732095169221, "grad_norm": 198.44969177246094, "learning_rate": 2.1890057269684695e-06, "loss": 22.6974, "step": 360520 }, { "epoch": 0.7282934101496059, "grad_norm": 25.584314346313477, "learning_rate": 2.1887170556329962e-06, "loss": 9.6163, "step": 360530 }, { "epoch": 0.7283136107822897, "grad_norm": 116.18987274169922, "learning_rate": 2.188428397999375e-06, "loss": 13.5682, "step": 360540 }, { "epoch": 0.7283338114149736, "grad_norm": 2.7935242652893066, "learning_rate": 2.1881397540690106e-06, "loss": 7.1686, "step": 360550 }, { "epoch": 0.7283540120476574, "grad_norm": 591.6446533203125, "learning_rate": 2.187851123843314e-06, "loss": 26.2548, "step": 360560 }, { "epoch": 0.7283742126803412, "grad_norm": 102.64051055908203, "learning_rate": 2.18756250732369e-06, "loss": 13.3199, "step": 360570 }, { "epoch": 0.728394413313025, "grad_norm": 532.3738403320312, "learning_rate": 2.187273904511544e-06, "loss": 17.5984, "step": 360580 }, { "epoch": 0.7284146139457088, "grad_norm": 400.3085021972656, "learning_rate": 2.1869853154082828e-06, "loss": 12.8936, "step": 360590 }, { "epoch": 0.7284348145783925, "grad_norm": 470.28582763671875, "learning_rate": 2.1866967400153184e-06, "loss": 15.0249, "step": 360600 }, { "epoch": 0.7284550152110764, "grad_norm": 550.6588745117188, "learning_rate": 2.1864081783340484e-06, "loss": 16.8647, "step": 360610 }, { "epoch": 0.7284752158437602, "grad_norm": 639.104248046875, "learning_rate": 2.1861196303658843e-06, "loss": 12.5242, "step": 360620 }, { "epoch": 0.728495416476444, "grad_norm": 295.1681823730469, "learning_rate": 2.1858310961122336e-06, "loss": 10.8901, "step": 360630 }, { "epoch": 0.7285156171091278, "grad_norm": 416.6802062988281, "learning_rate": 2.185542575574501e-06, "loss": 18.2717, "step": 360640 }, { "epoch": 0.7285358177418116, "grad_norm": 258.6806640625, "learning_rate": 2.18525406875409e-06, "loss": 15.2738, "step": 360650 }, { "epoch": 0.7285560183744955, "grad_norm": 222.70498657226562, "learning_rate": 2.184965575652412e-06, "loss": 13.6236, "step": 360660 }, { "epoch": 0.7285762190071793, "grad_norm": 530.6907958984375, "learning_rate": 2.18467709627087e-06, "loss": 13.6382, "step": 360670 }, { "epoch": 0.7285964196398631, "grad_norm": 36.37629318237305, "learning_rate": 2.1843886306108686e-06, "loss": 11.1797, "step": 360680 }, { "epoch": 0.7286166202725469, "grad_norm": 540.2681884765625, "learning_rate": 2.184100178673815e-06, "loss": 9.2794, "step": 360690 }, { "epoch": 0.7286368209052307, "grad_norm": 260.7170104980469, "learning_rate": 2.183811740461118e-06, "loss": 32.9272, "step": 360700 }, { "epoch": 0.7286570215379146, "grad_norm": 446.9761962890625, "learning_rate": 2.183523315974181e-06, "loss": 19.565, "step": 360710 }, { "epoch": 0.7286772221705984, "grad_norm": 366.071533203125, "learning_rate": 2.183234905214408e-06, "loss": 24.6809, "step": 360720 }, { "epoch": 0.7286974228032822, "grad_norm": 342.06671142578125, "learning_rate": 2.182946508183208e-06, "loss": 31.346, "step": 360730 }, { "epoch": 0.728717623435966, "grad_norm": 393.7168273925781, "learning_rate": 2.182658124881985e-06, "loss": 23.4002, "step": 360740 }, { "epoch": 0.7287378240686498, "grad_norm": 369.3158874511719, "learning_rate": 2.1823697553121432e-06, "loss": 15.1723, "step": 360750 }, { "epoch": 0.7287580247013337, "grad_norm": 240.69386291503906, "learning_rate": 2.1820813994750904e-06, "loss": 16.7641, "step": 360760 }, { "epoch": 0.7287782253340175, "grad_norm": 238.80050659179688, "learning_rate": 2.18179305737223e-06, "loss": 12.4301, "step": 360770 }, { "epoch": 0.7287984259667013, "grad_norm": 618.7152099609375, "learning_rate": 2.1815047290049707e-06, "loss": 15.9741, "step": 360780 }, { "epoch": 0.7288186265993851, "grad_norm": 65.00995635986328, "learning_rate": 2.1812164143747143e-06, "loss": 12.7006, "step": 360790 }, { "epoch": 0.7288388272320689, "grad_norm": 265.7799987792969, "learning_rate": 2.1809281134828663e-06, "loss": 17.9888, "step": 360800 }, { "epoch": 0.7288590278647528, "grad_norm": 147.77239990234375, "learning_rate": 2.1806398263308343e-06, "loss": 14.6698, "step": 360810 }, { "epoch": 0.7288792284974366, "grad_norm": 270.9167175292969, "learning_rate": 2.1803515529200204e-06, "loss": 26.5043, "step": 360820 }, { "epoch": 0.7288994291301204, "grad_norm": 265.6896057128906, "learning_rate": 2.1800632932518325e-06, "loss": 17.3283, "step": 360830 }, { "epoch": 0.7289196297628042, "grad_norm": 1087.7618408203125, "learning_rate": 2.179775047327672e-06, "loss": 26.0571, "step": 360840 }, { "epoch": 0.728939830395488, "grad_norm": 304.3948974609375, "learning_rate": 2.179486815148948e-06, "loss": 14.0474, "step": 360850 }, { "epoch": 0.7289600310281718, "grad_norm": 428.5285949707031, "learning_rate": 2.179198596717063e-06, "loss": 32.7469, "step": 360860 }, { "epoch": 0.7289802316608556, "grad_norm": 1042.8258056640625, "learning_rate": 2.1789103920334205e-06, "loss": 29.2223, "step": 360870 }, { "epoch": 0.7290004322935394, "grad_norm": 143.35415649414062, "learning_rate": 2.178622201099428e-06, "loss": 34.5574, "step": 360880 }, { "epoch": 0.7290206329262232, "grad_norm": 880.0578002929688, "learning_rate": 2.178334023916489e-06, "loss": 26.2843, "step": 360890 }, { "epoch": 0.729040833558907, "grad_norm": 395.4434509277344, "learning_rate": 2.1780458604860056e-06, "loss": 14.6299, "step": 360900 }, { "epoch": 0.7290610341915909, "grad_norm": 1124.6776123046875, "learning_rate": 2.1777577108093843e-06, "loss": 13.5058, "step": 360910 }, { "epoch": 0.7290812348242747, "grad_norm": 241.42478942871094, "learning_rate": 2.177469574888034e-06, "loss": 16.9511, "step": 360920 }, { "epoch": 0.7291014354569585, "grad_norm": 59.13896179199219, "learning_rate": 2.17718145272335e-06, "loss": 16.9234, "step": 360930 }, { "epoch": 0.7291216360896423, "grad_norm": 430.2388916015625, "learning_rate": 2.1768933443167423e-06, "loss": 23.9968, "step": 360940 }, { "epoch": 0.7291418367223261, "grad_norm": 301.6485900878906, "learning_rate": 2.1766052496696155e-06, "loss": 28.9483, "step": 360950 }, { "epoch": 0.72916203735501, "grad_norm": 409.0451965332031, "learning_rate": 2.176317168783372e-06, "loss": 11.3576, "step": 360960 }, { "epoch": 0.7291822379876938, "grad_norm": 58.25111770629883, "learning_rate": 2.1760291016594143e-06, "loss": 10.1496, "step": 360970 }, { "epoch": 0.7292024386203776, "grad_norm": 538.2581787109375, "learning_rate": 2.1757410482991488e-06, "loss": 12.4929, "step": 360980 }, { "epoch": 0.7292226392530614, "grad_norm": 107.94898986816406, "learning_rate": 2.17545300870398e-06, "loss": 24.9767, "step": 360990 }, { "epoch": 0.7292428398857452, "grad_norm": 119.3606185913086, "learning_rate": 2.175164982875311e-06, "loss": 11.8385, "step": 361000 }, { "epoch": 0.729263040518429, "grad_norm": 326.6532287597656, "learning_rate": 2.1748769708145435e-06, "loss": 14.3606, "step": 361010 }, { "epoch": 0.7292832411511129, "grad_norm": 335.1154479980469, "learning_rate": 2.1745889725230845e-06, "loss": 20.6894, "step": 361020 }, { "epoch": 0.7293034417837967, "grad_norm": 300.31842041015625, "learning_rate": 2.1743009880023364e-06, "loss": 13.3313, "step": 361030 }, { "epoch": 0.7293236424164805, "grad_norm": 244.2138671875, "learning_rate": 2.174013017253701e-06, "loss": 14.7413, "step": 361040 }, { "epoch": 0.7293438430491643, "grad_norm": 961.990234375, "learning_rate": 2.173725060278585e-06, "loss": 18.6023, "step": 361050 }, { "epoch": 0.7293640436818482, "grad_norm": 275.3308410644531, "learning_rate": 2.1734371170783888e-06, "loss": 17.4297, "step": 361060 }, { "epoch": 0.729384244314532, "grad_norm": 465.2654724121094, "learning_rate": 2.173149187654518e-06, "loss": 23.1155, "step": 361070 }, { "epoch": 0.7294044449472158, "grad_norm": 352.1817626953125, "learning_rate": 2.1728612720083764e-06, "loss": 24.5433, "step": 361080 }, { "epoch": 0.7294246455798996, "grad_norm": 684.2299194335938, "learning_rate": 2.172573370141364e-06, "loss": 17.5574, "step": 361090 }, { "epoch": 0.7294448462125834, "grad_norm": 357.49310302734375, "learning_rate": 2.1722854820548873e-06, "loss": 17.079, "step": 361100 }, { "epoch": 0.7294650468452671, "grad_norm": 167.45614624023438, "learning_rate": 2.1719976077503484e-06, "loss": 16.0663, "step": 361110 }, { "epoch": 0.729485247477951, "grad_norm": 86.17201232910156, "learning_rate": 2.171709747229149e-06, "loss": 15.0983, "step": 361120 }, { "epoch": 0.7295054481106348, "grad_norm": 363.0920104980469, "learning_rate": 2.1714219004926923e-06, "loss": 13.2577, "step": 361130 }, { "epoch": 0.7295256487433186, "grad_norm": 263.7752685546875, "learning_rate": 2.1711340675423847e-06, "loss": 14.69, "step": 361140 }, { "epoch": 0.7295458493760024, "grad_norm": 576.8699951171875, "learning_rate": 2.1708462483796263e-06, "loss": 18.0602, "step": 361150 }, { "epoch": 0.7295660500086862, "grad_norm": 64.69669342041016, "learning_rate": 2.170558443005818e-06, "loss": 26.0218, "step": 361160 }, { "epoch": 0.7295862506413701, "grad_norm": 292.1713562011719, "learning_rate": 2.170270651422367e-06, "loss": 12.5334, "step": 361170 }, { "epoch": 0.7296064512740539, "grad_norm": 402.4986877441406, "learning_rate": 2.1699828736306736e-06, "loss": 22.3744, "step": 361180 }, { "epoch": 0.7296266519067377, "grad_norm": 252.6857452392578, "learning_rate": 2.1696951096321383e-06, "loss": 16.728, "step": 361190 }, { "epoch": 0.7296468525394215, "grad_norm": 362.59912109375, "learning_rate": 2.1694073594281663e-06, "loss": 36.7554, "step": 361200 }, { "epoch": 0.7296670531721053, "grad_norm": 179.68382263183594, "learning_rate": 2.1691196230201626e-06, "loss": 12.6639, "step": 361210 }, { "epoch": 0.7296872538047892, "grad_norm": 3.6797828674316406, "learning_rate": 2.168831900409523e-06, "loss": 12.1016, "step": 361220 }, { "epoch": 0.729707454437473, "grad_norm": 505.0819091796875, "learning_rate": 2.1685441915976537e-06, "loss": 20.3073, "step": 361230 }, { "epoch": 0.7297276550701568, "grad_norm": 261.9149169921875, "learning_rate": 2.168256496585958e-06, "loss": 15.1085, "step": 361240 }, { "epoch": 0.7297478557028406, "grad_norm": 434.857177734375, "learning_rate": 2.1679688153758373e-06, "loss": 16.0192, "step": 361250 }, { "epoch": 0.7297680563355244, "grad_norm": 522.893310546875, "learning_rate": 2.1676811479686905e-06, "loss": 9.4449, "step": 361260 }, { "epoch": 0.7297882569682083, "grad_norm": 710.4201049804688, "learning_rate": 2.1673934943659226e-06, "loss": 27.2911, "step": 361270 }, { "epoch": 0.7298084576008921, "grad_norm": 2422.255126953125, "learning_rate": 2.1671058545689387e-06, "loss": 42.0225, "step": 361280 }, { "epoch": 0.7298286582335759, "grad_norm": 645.8966064453125, "learning_rate": 2.166818228579134e-06, "loss": 28.2227, "step": 361290 }, { "epoch": 0.7298488588662597, "grad_norm": 327.0567321777344, "learning_rate": 2.1665306163979132e-06, "loss": 6.7001, "step": 361300 }, { "epoch": 0.7298690594989435, "grad_norm": 793.0493774414062, "learning_rate": 2.1662430180266808e-06, "loss": 14.0256, "step": 361310 }, { "epoch": 0.7298892601316274, "grad_norm": 216.51014709472656, "learning_rate": 2.1659554334668364e-06, "loss": 26.4455, "step": 361320 }, { "epoch": 0.7299094607643112, "grad_norm": 245.2882080078125, "learning_rate": 2.1656678627197793e-06, "loss": 6.7295, "step": 361330 }, { "epoch": 0.729929661396995, "grad_norm": 0.20949004590511322, "learning_rate": 2.165380305786915e-06, "loss": 24.76, "step": 361340 }, { "epoch": 0.7299498620296788, "grad_norm": 953.6806640625, "learning_rate": 2.165092762669643e-06, "loss": 26.3631, "step": 361350 }, { "epoch": 0.7299700626623626, "grad_norm": 348.4391174316406, "learning_rate": 2.164805233369364e-06, "loss": 18.4093, "step": 361360 }, { "epoch": 0.7299902632950463, "grad_norm": 468.8824768066406, "learning_rate": 2.1645177178874817e-06, "loss": 19.1577, "step": 361370 }, { "epoch": 0.7300104639277302, "grad_norm": 79.99201965332031, "learning_rate": 2.164230216225395e-06, "loss": 26.3549, "step": 361380 }, { "epoch": 0.730030664560414, "grad_norm": 448.7281188964844, "learning_rate": 2.163942728384507e-06, "loss": 16.6217, "step": 361390 }, { "epoch": 0.7300508651930978, "grad_norm": 101.54920959472656, "learning_rate": 2.1636552543662187e-06, "loss": 13.2712, "step": 361400 }, { "epoch": 0.7300710658257816, "grad_norm": 374.1964416503906, "learning_rate": 2.163367794171929e-06, "loss": 13.4904, "step": 361410 }, { "epoch": 0.7300912664584654, "grad_norm": 597.1946411132812, "learning_rate": 2.1630803478030428e-06, "loss": 17.7211, "step": 361420 }, { "epoch": 0.7301114670911493, "grad_norm": 320.4565734863281, "learning_rate": 2.162792915260956e-06, "loss": 21.6194, "step": 361430 }, { "epoch": 0.7301316677238331, "grad_norm": 114.46507263183594, "learning_rate": 2.1625054965470754e-06, "loss": 22.1658, "step": 361440 }, { "epoch": 0.7301518683565169, "grad_norm": 415.8811340332031, "learning_rate": 2.1622180916627964e-06, "loss": 10.7178, "step": 361450 }, { "epoch": 0.7301720689892007, "grad_norm": 271.2681884765625, "learning_rate": 2.161930700609524e-06, "loss": 23.4488, "step": 361460 }, { "epoch": 0.7301922696218845, "grad_norm": 103.7387466430664, "learning_rate": 2.1616433233886576e-06, "loss": 30.7911, "step": 361470 }, { "epoch": 0.7302124702545684, "grad_norm": 404.3741455078125, "learning_rate": 2.1613559600015955e-06, "loss": 21.6233, "step": 361480 }, { "epoch": 0.7302326708872522, "grad_norm": 82.00901794433594, "learning_rate": 2.1610686104497413e-06, "loss": 14.4869, "step": 361490 }, { "epoch": 0.730252871519936, "grad_norm": 433.1382141113281, "learning_rate": 2.1607812747344955e-06, "loss": 21.366, "step": 361500 }, { "epoch": 0.7302730721526198, "grad_norm": 121.11111450195312, "learning_rate": 2.160493952857255e-06, "loss": 10.8664, "step": 361510 }, { "epoch": 0.7302932727853036, "grad_norm": 890.6771850585938, "learning_rate": 2.160206644819422e-06, "loss": 24.7509, "step": 361520 }, { "epoch": 0.7303134734179875, "grad_norm": 10.892768859863281, "learning_rate": 2.159919350622402e-06, "loss": 9.2625, "step": 361530 }, { "epoch": 0.7303336740506713, "grad_norm": 637.7189331054688, "learning_rate": 2.1596320702675867e-06, "loss": 9.0058, "step": 361540 }, { "epoch": 0.7303538746833551, "grad_norm": 342.6564636230469, "learning_rate": 2.1593448037563795e-06, "loss": 26.8792, "step": 361550 }, { "epoch": 0.7303740753160389, "grad_norm": 457.1528625488281, "learning_rate": 2.159057551090184e-06, "loss": 20.4412, "step": 361560 }, { "epoch": 0.7303942759487227, "grad_norm": 440.42083740234375, "learning_rate": 2.158770312270397e-06, "loss": 28.5649, "step": 361570 }, { "epoch": 0.7304144765814066, "grad_norm": 130.75962829589844, "learning_rate": 2.158483087298417e-06, "loss": 28.6687, "step": 361580 }, { "epoch": 0.7304346772140904, "grad_norm": 759.0496215820312, "learning_rate": 2.158195876175646e-06, "loss": 22.2783, "step": 361590 }, { "epoch": 0.7304548778467742, "grad_norm": 350.0136413574219, "learning_rate": 2.157908678903487e-06, "loss": 15.8889, "step": 361600 }, { "epoch": 0.730475078479458, "grad_norm": 213.92550659179688, "learning_rate": 2.157621495483333e-06, "loss": 11.6819, "step": 361610 }, { "epoch": 0.7304952791121417, "grad_norm": 368.8323669433594, "learning_rate": 2.157334325916587e-06, "loss": 19.16, "step": 361620 }, { "epoch": 0.7305154797448256, "grad_norm": 870.4979858398438, "learning_rate": 2.1570471702046504e-06, "loss": 20.1488, "step": 361630 }, { "epoch": 0.7305356803775094, "grad_norm": 259.45025634765625, "learning_rate": 2.1567600283489213e-06, "loss": 17.0631, "step": 361640 }, { "epoch": 0.7305558810101932, "grad_norm": 109.738037109375, "learning_rate": 2.1564729003507974e-06, "loss": 16.7987, "step": 361650 }, { "epoch": 0.730576081642877, "grad_norm": 443.196044921875, "learning_rate": 2.156185786211681e-06, "loss": 20.6028, "step": 361660 }, { "epoch": 0.7305962822755608, "grad_norm": 176.04074096679688, "learning_rate": 2.15589868593297e-06, "loss": 20.3453, "step": 361670 }, { "epoch": 0.7306164829082447, "grad_norm": 333.56732177734375, "learning_rate": 2.1556115995160624e-06, "loss": 15.0055, "step": 361680 }, { "epoch": 0.7306366835409285, "grad_norm": 559.19970703125, "learning_rate": 2.155324526962361e-06, "loss": 29.2269, "step": 361690 }, { "epoch": 0.7306568841736123, "grad_norm": 230.17852783203125, "learning_rate": 2.1550374682732605e-06, "loss": 32.9297, "step": 361700 }, { "epoch": 0.7306770848062961, "grad_norm": 135.4713134765625, "learning_rate": 2.154750423450165e-06, "loss": 13.8467, "step": 361710 }, { "epoch": 0.7306972854389799, "grad_norm": 223.9093475341797, "learning_rate": 2.154463392494468e-06, "loss": 14.1095, "step": 361720 }, { "epoch": 0.7307174860716638, "grad_norm": 262.5910949707031, "learning_rate": 2.1541763754075732e-06, "loss": 19.8624, "step": 361730 }, { "epoch": 0.7307376867043476, "grad_norm": 146.00656127929688, "learning_rate": 2.1538893721908766e-06, "loss": 12.8752, "step": 361740 }, { "epoch": 0.7307578873370314, "grad_norm": 350.9228210449219, "learning_rate": 2.1536023828457793e-06, "loss": 12.8664, "step": 361750 }, { "epoch": 0.7307780879697152, "grad_norm": 281.30767822265625, "learning_rate": 2.153315407373679e-06, "loss": 20.9679, "step": 361760 }, { "epoch": 0.730798288602399, "grad_norm": 817.6254272460938, "learning_rate": 2.153028445775972e-06, "loss": 34.3722, "step": 361770 }, { "epoch": 0.7308184892350829, "grad_norm": 308.9070739746094, "learning_rate": 2.1527414980540607e-06, "loss": 34.6558, "step": 361780 }, { "epoch": 0.7308386898677667, "grad_norm": 254.76809692382812, "learning_rate": 2.1524545642093426e-06, "loss": 42.2831, "step": 361790 }, { "epoch": 0.7308588905004505, "grad_norm": 476.56787109375, "learning_rate": 2.152167644243213e-06, "loss": 12.8899, "step": 361800 }, { "epoch": 0.7308790911331343, "grad_norm": 294.74993896484375, "learning_rate": 2.1518807381570737e-06, "loss": 12.2191, "step": 361810 }, { "epoch": 0.7308992917658181, "grad_norm": 232.15695190429688, "learning_rate": 2.1515938459523254e-06, "loss": 15.1084, "step": 361820 }, { "epoch": 0.730919492398502, "grad_norm": 174.69699096679688, "learning_rate": 2.15130696763036e-06, "loss": 8.9743, "step": 361830 }, { "epoch": 0.7309396930311858, "grad_norm": 249.19667053222656, "learning_rate": 2.151020103192579e-06, "loss": 9.6197, "step": 361840 }, { "epoch": 0.7309598936638696, "grad_norm": 425.8226318359375, "learning_rate": 2.1507332526403814e-06, "loss": 31.1412, "step": 361850 }, { "epoch": 0.7309800942965534, "grad_norm": 325.6577453613281, "learning_rate": 2.1504464159751646e-06, "loss": 12.526, "step": 361860 }, { "epoch": 0.7310002949292372, "grad_norm": 501.7792053222656, "learning_rate": 2.1501595931983256e-06, "loss": 13.087, "step": 361870 }, { "epoch": 0.731020495561921, "grad_norm": 319.7770080566406, "learning_rate": 2.149872784311262e-06, "loss": 16.9793, "step": 361880 }, { "epoch": 0.7310406961946048, "grad_norm": 593.9952392578125, "learning_rate": 2.149585989315377e-06, "loss": 21.6624, "step": 361890 }, { "epoch": 0.7310608968272886, "grad_norm": 138.48013305664062, "learning_rate": 2.14929920821206e-06, "loss": 32.0402, "step": 361900 }, { "epoch": 0.7310810974599724, "grad_norm": 433.41741943359375, "learning_rate": 2.1490124410027137e-06, "loss": 20.2227, "step": 361910 }, { "epoch": 0.7311012980926562, "grad_norm": 589.3394165039062, "learning_rate": 2.1487256876887356e-06, "loss": 20.6278, "step": 361920 }, { "epoch": 0.73112149872534, "grad_norm": 243.6151123046875, "learning_rate": 2.148438948271524e-06, "loss": 12.6422, "step": 361930 }, { "epoch": 0.7311416993580239, "grad_norm": 405.2738037109375, "learning_rate": 2.1481522227524725e-06, "loss": 10.863, "step": 361940 }, { "epoch": 0.7311618999907077, "grad_norm": 325.8308410644531, "learning_rate": 2.147865511132983e-06, "loss": 19.621, "step": 361950 }, { "epoch": 0.7311821006233915, "grad_norm": 265.0723571777344, "learning_rate": 2.1475788134144516e-06, "loss": 22.2872, "step": 361960 }, { "epoch": 0.7312023012560753, "grad_norm": 210.59898376464844, "learning_rate": 2.147292129598273e-06, "loss": 13.0862, "step": 361970 }, { "epoch": 0.7312225018887591, "grad_norm": 188.1290740966797, "learning_rate": 2.147005459685848e-06, "loss": 18.2052, "step": 361980 }, { "epoch": 0.731242702521443, "grad_norm": 324.91693115234375, "learning_rate": 2.1467188036785706e-06, "loss": 11.8528, "step": 361990 }, { "epoch": 0.7312629031541268, "grad_norm": 15.264313697814941, "learning_rate": 2.146432161577842e-06, "loss": 8.4361, "step": 362000 }, { "epoch": 0.7312831037868106, "grad_norm": 483.80743408203125, "learning_rate": 2.146145533385057e-06, "loss": 12.2032, "step": 362010 }, { "epoch": 0.7313033044194944, "grad_norm": 517.9437255859375, "learning_rate": 2.1458589191016103e-06, "loss": 47.8655, "step": 362020 }, { "epoch": 0.7313235050521782, "grad_norm": 259.2720642089844, "learning_rate": 2.1455723187289028e-06, "loss": 18.0934, "step": 362030 }, { "epoch": 0.7313437056848621, "grad_norm": 319.8258056640625, "learning_rate": 2.1452857322683285e-06, "loss": 17.4515, "step": 362040 }, { "epoch": 0.7313639063175459, "grad_norm": 0.0, "learning_rate": 2.1449991597212865e-06, "loss": 29.6921, "step": 362050 }, { "epoch": 0.7313841069502297, "grad_norm": 54.035953521728516, "learning_rate": 2.1447126010891704e-06, "loss": 15.0086, "step": 362060 }, { "epoch": 0.7314043075829135, "grad_norm": 148.7391815185547, "learning_rate": 2.144426056373381e-06, "loss": 14.8681, "step": 362070 }, { "epoch": 0.7314245082155973, "grad_norm": 387.33441162109375, "learning_rate": 2.144139525575313e-06, "loss": 23.0641, "step": 362080 }, { "epoch": 0.7314447088482812, "grad_norm": 139.35191345214844, "learning_rate": 2.14385300869636e-06, "loss": 23.601, "step": 362090 }, { "epoch": 0.731464909480965, "grad_norm": 433.0995178222656, "learning_rate": 2.1435665057379233e-06, "loss": 24.6062, "step": 362100 }, { "epoch": 0.7314851101136488, "grad_norm": 56.32212448120117, "learning_rate": 2.143280016701397e-06, "loss": 18.4387, "step": 362110 }, { "epoch": 0.7315053107463326, "grad_norm": 640.1629638671875, "learning_rate": 2.1429935415881753e-06, "loss": 24.2324, "step": 362120 }, { "epoch": 0.7315255113790163, "grad_norm": 150.6722869873047, "learning_rate": 2.1427070803996565e-06, "loss": 7.2841, "step": 362130 }, { "epoch": 0.7315457120117002, "grad_norm": 277.1022644042969, "learning_rate": 2.142420633137241e-06, "loss": 22.3922, "step": 362140 }, { "epoch": 0.731565912644384, "grad_norm": 275.8918151855469, "learning_rate": 2.1421341998023167e-06, "loss": 13.4643, "step": 362150 }, { "epoch": 0.7315861132770678, "grad_norm": 332.7549133300781, "learning_rate": 2.141847780396284e-06, "loss": 9.5429, "step": 362160 }, { "epoch": 0.7316063139097516, "grad_norm": 1105.1734619140625, "learning_rate": 2.14156137492054e-06, "loss": 18.1477, "step": 362170 }, { "epoch": 0.7316265145424354, "grad_norm": 191.01303100585938, "learning_rate": 2.141274983376479e-06, "loss": 7.7928, "step": 362180 }, { "epoch": 0.7316467151751193, "grad_norm": 261.5278015136719, "learning_rate": 2.1409886057654963e-06, "loss": 15.5697, "step": 362190 }, { "epoch": 0.7316669158078031, "grad_norm": 4180.95361328125, "learning_rate": 2.140702242088987e-06, "loss": 34.039, "step": 362200 }, { "epoch": 0.7316871164404869, "grad_norm": 341.9481201171875, "learning_rate": 2.1404158923483524e-06, "loss": 23.2512, "step": 362210 }, { "epoch": 0.7317073170731707, "grad_norm": 1053.8857421875, "learning_rate": 2.1401295565449803e-06, "loss": 29.5673, "step": 362220 }, { "epoch": 0.7317275177058545, "grad_norm": 314.30804443359375, "learning_rate": 2.13984323468027e-06, "loss": 29.4899, "step": 362230 }, { "epoch": 0.7317477183385384, "grad_norm": 8.269257545471191, "learning_rate": 2.1395569267556187e-06, "loss": 24.012, "step": 362240 }, { "epoch": 0.7317679189712222, "grad_norm": 237.9320526123047, "learning_rate": 2.13927063277242e-06, "loss": 17.2456, "step": 362250 }, { "epoch": 0.731788119603906, "grad_norm": 124.94193267822266, "learning_rate": 2.1389843527320675e-06, "loss": 12.9626, "step": 362260 }, { "epoch": 0.7318083202365898, "grad_norm": 341.7077331542969, "learning_rate": 2.1386980866359595e-06, "loss": 44.6113, "step": 362270 }, { "epoch": 0.7318285208692736, "grad_norm": 854.4281005859375, "learning_rate": 2.1384118344854906e-06, "loss": 14.9032, "step": 362280 }, { "epoch": 0.7318487215019575, "grad_norm": 444.77606201171875, "learning_rate": 2.1381255962820535e-06, "loss": 12.3953, "step": 362290 }, { "epoch": 0.7318689221346413, "grad_norm": 666.7606811523438, "learning_rate": 2.137839372027047e-06, "loss": 19.9273, "step": 362300 }, { "epoch": 0.7318891227673251, "grad_norm": 670.42431640625, "learning_rate": 2.137553161721862e-06, "loss": 29.0313, "step": 362310 }, { "epoch": 0.7319093234000089, "grad_norm": 217.4117889404297, "learning_rate": 2.137266965367898e-06, "loss": 11.4348, "step": 362320 }, { "epoch": 0.7319295240326927, "grad_norm": 450.91351318359375, "learning_rate": 2.1369807829665455e-06, "loss": 18.4128, "step": 362330 }, { "epoch": 0.7319497246653766, "grad_norm": 628.2463989257812, "learning_rate": 2.136694614519203e-06, "loss": 18.8765, "step": 362340 }, { "epoch": 0.7319699252980604, "grad_norm": 302.05169677734375, "learning_rate": 2.1364084600272645e-06, "loss": 10.7668, "step": 362350 }, { "epoch": 0.7319901259307442, "grad_norm": 264.4158935546875, "learning_rate": 2.1361223194921214e-06, "loss": 31.4827, "step": 362360 }, { "epoch": 0.732010326563428, "grad_norm": 285.2078552246094, "learning_rate": 2.135836192915173e-06, "loss": 19.0411, "step": 362370 }, { "epoch": 0.7320305271961118, "grad_norm": 193.60736083984375, "learning_rate": 2.1355500802978093e-06, "loss": 20.2884, "step": 362380 }, { "epoch": 0.7320507278287955, "grad_norm": 283.0826110839844, "learning_rate": 2.135263981641429e-06, "loss": 16.9832, "step": 362390 }, { "epoch": 0.7320709284614794, "grad_norm": 211.36553955078125, "learning_rate": 2.134977896947425e-06, "loss": 9.025, "step": 362400 }, { "epoch": 0.7320911290941632, "grad_norm": 319.20904541015625, "learning_rate": 2.134691826217189e-06, "loss": 27.5033, "step": 362410 }, { "epoch": 0.732111329726847, "grad_norm": 214.47494506835938, "learning_rate": 2.1344057694521177e-06, "loss": 6.9164, "step": 362420 }, { "epoch": 0.7321315303595308, "grad_norm": 302.7760925292969, "learning_rate": 2.1341197266536085e-06, "loss": 22.4382, "step": 362430 }, { "epoch": 0.7321517309922146, "grad_norm": 530.4078979492188, "learning_rate": 2.1338336978230487e-06, "loss": 20.3697, "step": 362440 }, { "epoch": 0.7321719316248985, "grad_norm": 113.6771240234375, "learning_rate": 2.1335476829618364e-06, "loss": 16.3201, "step": 362450 }, { "epoch": 0.7321921322575823, "grad_norm": 25.262041091918945, "learning_rate": 2.133261682071366e-06, "loss": 15.1948, "step": 362460 }, { "epoch": 0.7322123328902661, "grad_norm": 303.3648986816406, "learning_rate": 2.1329756951530307e-06, "loss": 10.07, "step": 362470 }, { "epoch": 0.7322325335229499, "grad_norm": 554.2640380859375, "learning_rate": 2.132689722208223e-06, "loss": 20.4004, "step": 362480 }, { "epoch": 0.7322527341556337, "grad_norm": 342.9158935546875, "learning_rate": 2.132403763238337e-06, "loss": 10.8332, "step": 362490 }, { "epoch": 0.7322729347883176, "grad_norm": 274.4956359863281, "learning_rate": 2.132117818244771e-06, "loss": 23.1871, "step": 362500 }, { "epoch": 0.7322931354210014, "grad_norm": 234.41029357910156, "learning_rate": 2.1318318872289117e-06, "loss": 25.0619, "step": 362510 }, { "epoch": 0.7323133360536852, "grad_norm": 304.1642150878906, "learning_rate": 2.1315459701921553e-06, "loss": 13.4672, "step": 362520 }, { "epoch": 0.732333536686369, "grad_norm": 323.67913818359375, "learning_rate": 2.1312600671358983e-06, "loss": 22.5587, "step": 362530 }, { "epoch": 0.7323537373190528, "grad_norm": 316.3114013671875, "learning_rate": 2.1309741780615316e-06, "loss": 25.9001, "step": 362540 }, { "epoch": 0.7323739379517367, "grad_norm": 398.9940490722656, "learning_rate": 2.1306883029704472e-06, "loss": 12.2283, "step": 362550 }, { "epoch": 0.7323941385844205, "grad_norm": 121.77845764160156, "learning_rate": 2.130402441864041e-06, "loss": 8.5003, "step": 362560 }, { "epoch": 0.7324143392171043, "grad_norm": 956.2951049804688, "learning_rate": 2.1301165947437064e-06, "loss": 27.1598, "step": 362570 }, { "epoch": 0.7324345398497881, "grad_norm": 247.68553161621094, "learning_rate": 2.129830761610833e-06, "loss": 27.7892, "step": 362580 }, { "epoch": 0.7324547404824719, "grad_norm": 239.00120544433594, "learning_rate": 2.1295449424668184e-06, "loss": 23.0604, "step": 362590 }, { "epoch": 0.7324749411151558, "grad_norm": 707.9892578125, "learning_rate": 2.1292591373130515e-06, "loss": 17.0722, "step": 362600 }, { "epoch": 0.7324951417478396, "grad_norm": 485.00299072265625, "learning_rate": 2.1289733461509294e-06, "loss": 26.1827, "step": 362610 }, { "epoch": 0.7325153423805234, "grad_norm": 385.3713684082031, "learning_rate": 2.128687568981843e-06, "loss": 17.5272, "step": 362620 }, { "epoch": 0.7325355430132072, "grad_norm": 668.2521362304688, "learning_rate": 2.1284018058071833e-06, "loss": 24.3644, "step": 362630 }, { "epoch": 0.732555743645891, "grad_norm": 293.8020324707031, "learning_rate": 2.1281160566283466e-06, "loss": 27.0207, "step": 362640 }, { "epoch": 0.7325759442785748, "grad_norm": 271.657958984375, "learning_rate": 2.127830321446722e-06, "loss": 7.6152, "step": 362650 }, { "epoch": 0.7325961449112586, "grad_norm": 633.6864624023438, "learning_rate": 2.1275446002637063e-06, "loss": 28.675, "step": 362660 }, { "epoch": 0.7326163455439424, "grad_norm": 360.24310302734375, "learning_rate": 2.127258893080688e-06, "loss": 14.7315, "step": 362670 }, { "epoch": 0.7326365461766262, "grad_norm": 375.8663635253906, "learning_rate": 2.126973199899063e-06, "loss": 12.8284, "step": 362680 }, { "epoch": 0.73265674680931, "grad_norm": 423.78326416015625, "learning_rate": 2.126687520720222e-06, "loss": 15.7296, "step": 362690 }, { "epoch": 0.7326769474419939, "grad_norm": 321.8089599609375, "learning_rate": 2.1264018555455563e-06, "loss": 26.7895, "step": 362700 }, { "epoch": 0.7326971480746777, "grad_norm": 260.6068115234375, "learning_rate": 2.1261162043764606e-06, "loss": 20.3698, "step": 362710 }, { "epoch": 0.7327173487073615, "grad_norm": 484.1304931640625, "learning_rate": 2.1258305672143265e-06, "loss": 24.1116, "step": 362720 }, { "epoch": 0.7327375493400453, "grad_norm": 525.1082763671875, "learning_rate": 2.1255449440605436e-06, "loss": 17.53, "step": 362730 }, { "epoch": 0.7327577499727291, "grad_norm": 357.7300720214844, "learning_rate": 2.1252593349165056e-06, "loss": 13.1004, "step": 362740 }, { "epoch": 0.732777950605413, "grad_norm": 305.9688415527344, "learning_rate": 2.124973739783609e-06, "loss": 5.4386, "step": 362750 }, { "epoch": 0.7327981512380968, "grad_norm": 309.69061279296875, "learning_rate": 2.1246881586632384e-06, "loss": 23.2326, "step": 362760 }, { "epoch": 0.7328183518707806, "grad_norm": 311.6143493652344, "learning_rate": 2.1244025915567883e-06, "loss": 17.7003, "step": 362770 }, { "epoch": 0.7328385525034644, "grad_norm": 166.311279296875, "learning_rate": 2.1241170384656533e-06, "loss": 11.4186, "step": 362780 }, { "epoch": 0.7328587531361482, "grad_norm": 181.48782348632812, "learning_rate": 2.123831499391223e-06, "loss": 32.3941, "step": 362790 }, { "epoch": 0.732878953768832, "grad_norm": 193.65078735351562, "learning_rate": 2.1235459743348874e-06, "loss": 14.3278, "step": 362800 }, { "epoch": 0.7328991544015159, "grad_norm": 165.1879425048828, "learning_rate": 2.12326046329804e-06, "loss": 8.6508, "step": 362810 }, { "epoch": 0.7329193550341997, "grad_norm": 749.2440185546875, "learning_rate": 2.1229749662820754e-06, "loss": 27.1321, "step": 362820 }, { "epoch": 0.7329395556668835, "grad_norm": 734.5176391601562, "learning_rate": 2.122689483288379e-06, "loss": 22.8381, "step": 362830 }, { "epoch": 0.7329597562995673, "grad_norm": 287.7867736816406, "learning_rate": 2.1224040143183444e-06, "loss": 10.9977, "step": 362840 }, { "epoch": 0.7329799569322512, "grad_norm": 639.458984375, "learning_rate": 2.122118559373366e-06, "loss": 17.4001, "step": 362850 }, { "epoch": 0.733000157564935, "grad_norm": 136.86932373046875, "learning_rate": 2.121833118454832e-06, "loss": 15.538, "step": 362860 }, { "epoch": 0.7330203581976188, "grad_norm": 598.6109619140625, "learning_rate": 2.1215476915641327e-06, "loss": 27.0943, "step": 362870 }, { "epoch": 0.7330405588303026, "grad_norm": 202.90081787109375, "learning_rate": 2.1212622787026626e-06, "loss": 13.3531, "step": 362880 }, { "epoch": 0.7330607594629864, "grad_norm": 148.5986785888672, "learning_rate": 2.120976879871811e-06, "loss": 9.3221, "step": 362890 }, { "epoch": 0.7330809600956701, "grad_norm": 447.45245361328125, "learning_rate": 2.1206914950729673e-06, "loss": 15.6156, "step": 362900 }, { "epoch": 0.733101160728354, "grad_norm": 339.8567810058594, "learning_rate": 2.1204061243075257e-06, "loss": 14.7264, "step": 362910 }, { "epoch": 0.7331213613610378, "grad_norm": 191.8043212890625, "learning_rate": 2.1201207675768738e-06, "loss": 13.6121, "step": 362920 }, { "epoch": 0.7331415619937216, "grad_norm": 382.62396240234375, "learning_rate": 2.1198354248824057e-06, "loss": 15.7879, "step": 362930 }, { "epoch": 0.7331617626264054, "grad_norm": 206.1188201904297, "learning_rate": 2.1195500962255084e-06, "loss": 10.5715, "step": 362940 }, { "epoch": 0.7331819632590892, "grad_norm": 211.1554718017578, "learning_rate": 2.119264781607577e-06, "loss": 21.8638, "step": 362950 }, { "epoch": 0.7332021638917731, "grad_norm": 245.4528350830078, "learning_rate": 2.118979481029999e-06, "loss": 11.091, "step": 362960 }, { "epoch": 0.7332223645244569, "grad_norm": 1.5007383823394775, "learning_rate": 2.118694194494164e-06, "loss": 17.3465, "step": 362970 }, { "epoch": 0.7332425651571407, "grad_norm": 504.3922424316406, "learning_rate": 2.1184089220014657e-06, "loss": 24.4486, "step": 362980 }, { "epoch": 0.7332627657898245, "grad_norm": 268.60662841796875, "learning_rate": 2.1181236635532913e-06, "loss": 15.5253, "step": 362990 }, { "epoch": 0.7332829664225083, "grad_norm": 218.5037384033203, "learning_rate": 2.1178384191510344e-06, "loss": 18.9683, "step": 363000 }, { "epoch": 0.7333031670551922, "grad_norm": 459.7041931152344, "learning_rate": 2.1175531887960834e-06, "loss": 17.4342, "step": 363010 }, { "epoch": 0.733323367687876, "grad_norm": 352.9919738769531, "learning_rate": 2.1172679724898264e-06, "loss": 20.0649, "step": 363020 }, { "epoch": 0.7333435683205598, "grad_norm": 272.325439453125, "learning_rate": 2.116982770233658e-06, "loss": 16.1177, "step": 363030 }, { "epoch": 0.7333637689532436, "grad_norm": 20.618610382080078, "learning_rate": 2.116697582028966e-06, "loss": 16.507, "step": 363040 }, { "epoch": 0.7333839695859274, "grad_norm": 419.63336181640625, "learning_rate": 2.116412407877138e-06, "loss": 21.0784, "step": 363050 }, { "epoch": 0.7334041702186113, "grad_norm": 742.9035034179688, "learning_rate": 2.116127247779566e-06, "loss": 18.4874, "step": 363060 }, { "epoch": 0.7334243708512951, "grad_norm": 20.84160804748535, "learning_rate": 2.1158421017376423e-06, "loss": 27.8615, "step": 363070 }, { "epoch": 0.7334445714839789, "grad_norm": 535.6408081054688, "learning_rate": 2.1155569697527546e-06, "loss": 20.284, "step": 363080 }, { "epoch": 0.7334647721166627, "grad_norm": 193.92379760742188, "learning_rate": 2.1152718518262903e-06, "loss": 19.2419, "step": 363090 }, { "epoch": 0.7334849727493465, "grad_norm": 211.2849578857422, "learning_rate": 2.114986747959643e-06, "loss": 17.1762, "step": 363100 }, { "epoch": 0.7335051733820304, "grad_norm": 306.9668273925781, "learning_rate": 2.1147016581542e-06, "loss": 16.8532, "step": 363110 }, { "epoch": 0.7335253740147142, "grad_norm": 67.09207916259766, "learning_rate": 2.11441658241135e-06, "loss": 19.1871, "step": 363120 }, { "epoch": 0.733545574647398, "grad_norm": 190.48162841796875, "learning_rate": 2.114131520732483e-06, "loss": 17.9457, "step": 363130 }, { "epoch": 0.7335657752800818, "grad_norm": 506.0332336425781, "learning_rate": 2.113846473118991e-06, "loss": 9.4889, "step": 363140 }, { "epoch": 0.7335859759127656, "grad_norm": 303.1412048339844, "learning_rate": 2.1135614395722613e-06, "loss": 26.3246, "step": 363150 }, { "epoch": 0.7336061765454494, "grad_norm": 370.9874267578125, "learning_rate": 2.113276420093681e-06, "loss": 17.4242, "step": 363160 }, { "epoch": 0.7336263771781332, "grad_norm": 296.4694519042969, "learning_rate": 2.1129914146846435e-06, "loss": 27.9499, "step": 363170 }, { "epoch": 0.733646577810817, "grad_norm": 622.138916015625, "learning_rate": 2.1127064233465354e-06, "loss": 23.5313, "step": 363180 }, { "epoch": 0.7336667784435008, "grad_norm": 292.4410400390625, "learning_rate": 2.1124214460807446e-06, "loss": 22.4486, "step": 363190 }, { "epoch": 0.7336869790761846, "grad_norm": 380.1524963378906, "learning_rate": 2.112136482888663e-06, "loss": 34.3623, "step": 363200 }, { "epoch": 0.7337071797088685, "grad_norm": 230.6260223388672, "learning_rate": 2.111851533771676e-06, "loss": 12.5961, "step": 363210 }, { "epoch": 0.7337273803415523, "grad_norm": 473.07080078125, "learning_rate": 2.111566598731176e-06, "loss": 15.4202, "step": 363220 }, { "epoch": 0.7337475809742361, "grad_norm": 317.4713439941406, "learning_rate": 2.1112816777685506e-06, "loss": 20.3979, "step": 363230 }, { "epoch": 0.7337677816069199, "grad_norm": 322.885498046875, "learning_rate": 2.110996770885186e-06, "loss": 14.5344, "step": 363240 }, { "epoch": 0.7337879822396037, "grad_norm": 0.08826649934053421, "learning_rate": 2.1107118780824744e-06, "loss": 21.5315, "step": 363250 }, { "epoch": 0.7338081828722876, "grad_norm": 552.6737060546875, "learning_rate": 2.1104269993618008e-06, "loss": 20.2855, "step": 363260 }, { "epoch": 0.7338283835049714, "grad_norm": 497.2496643066406, "learning_rate": 2.1101421347245576e-06, "loss": 22.2619, "step": 363270 }, { "epoch": 0.7338485841376552, "grad_norm": 0.0, "learning_rate": 2.109857284172129e-06, "loss": 23.4409, "step": 363280 }, { "epoch": 0.733868784770339, "grad_norm": 398.0042724609375, "learning_rate": 2.1095724477059077e-06, "loss": 12.7938, "step": 363290 }, { "epoch": 0.7338889854030228, "grad_norm": 451.1884765625, "learning_rate": 2.1092876253272793e-06, "loss": 13.8234, "step": 363300 }, { "epoch": 0.7339091860357067, "grad_norm": 298.0521545410156, "learning_rate": 2.1090028170376307e-06, "loss": 8.3172, "step": 363310 }, { "epoch": 0.7339293866683905, "grad_norm": 160.68760681152344, "learning_rate": 2.1087180228383536e-06, "loss": 17.8757, "step": 363320 }, { "epoch": 0.7339495873010743, "grad_norm": 760.4774169921875, "learning_rate": 2.108433242730834e-06, "loss": 18.7513, "step": 363330 }, { "epoch": 0.7339697879337581, "grad_norm": 243.92681884765625, "learning_rate": 2.1081484767164584e-06, "loss": 10.6218, "step": 363340 }, { "epoch": 0.7339899885664419, "grad_norm": 377.17681884765625, "learning_rate": 2.1078637247966166e-06, "loss": 13.2281, "step": 363350 }, { "epoch": 0.7340101891991258, "grad_norm": 127.73228454589844, "learning_rate": 2.1075789869726998e-06, "loss": 19.4942, "step": 363360 }, { "epoch": 0.7340303898318096, "grad_norm": 378.4305725097656, "learning_rate": 2.1072942632460887e-06, "loss": 17.1761, "step": 363370 }, { "epoch": 0.7340505904644934, "grad_norm": 493.0738830566406, "learning_rate": 2.107009553618174e-06, "loss": 10.5615, "step": 363380 }, { "epoch": 0.7340707910971772, "grad_norm": 651.5400390625, "learning_rate": 2.106724858090346e-06, "loss": 26.7237, "step": 363390 }, { "epoch": 0.734090991729861, "grad_norm": 356.9836120605469, "learning_rate": 2.10644017666399e-06, "loss": 15.6841, "step": 363400 }, { "epoch": 0.7341111923625447, "grad_norm": 338.3780212402344, "learning_rate": 2.1061555093404917e-06, "loss": 7.3456, "step": 363410 }, { "epoch": 0.7341313929952286, "grad_norm": 175.104248046875, "learning_rate": 2.105870856121241e-06, "loss": 20.5585, "step": 363420 }, { "epoch": 0.7341515936279124, "grad_norm": 449.11956787109375, "learning_rate": 2.105586217007628e-06, "loss": 14.4432, "step": 363430 }, { "epoch": 0.7341717942605962, "grad_norm": 171.7938232421875, "learning_rate": 2.1053015920010328e-06, "loss": 5.9366, "step": 363440 }, { "epoch": 0.73419199489328, "grad_norm": 489.88165283203125, "learning_rate": 2.105016981102847e-06, "loss": 10.2844, "step": 363450 }, { "epoch": 0.7342121955259638, "grad_norm": 585.947265625, "learning_rate": 2.104732384314459e-06, "loss": 12.4221, "step": 363460 }, { "epoch": 0.7342323961586477, "grad_norm": 564.4567260742188, "learning_rate": 2.1044478016372544e-06, "loss": 25.8418, "step": 363470 }, { "epoch": 0.7342525967913315, "grad_norm": 870.10400390625, "learning_rate": 2.104163233072618e-06, "loss": 28.1137, "step": 363480 }, { "epoch": 0.7342727974240153, "grad_norm": 273.01324462890625, "learning_rate": 2.1038786786219405e-06, "loss": 11.7085, "step": 363490 }, { "epoch": 0.7342929980566991, "grad_norm": 37.50141906738281, "learning_rate": 2.103594138286607e-06, "loss": 31.0749, "step": 363500 }, { "epoch": 0.7343131986893829, "grad_norm": 134.88909912109375, "learning_rate": 2.103309612068003e-06, "loss": 8.8806, "step": 363510 }, { "epoch": 0.7343333993220668, "grad_norm": 263.0030212402344, "learning_rate": 2.1030250999675184e-06, "loss": 8.9125, "step": 363520 }, { "epoch": 0.7343535999547506, "grad_norm": 207.8386993408203, "learning_rate": 2.102740601986536e-06, "loss": 12.5714, "step": 363530 }, { "epoch": 0.7343738005874344, "grad_norm": 197.8450164794922, "learning_rate": 2.1024561181264464e-06, "loss": 22.7507, "step": 363540 }, { "epoch": 0.7343940012201182, "grad_norm": 496.8077087402344, "learning_rate": 2.1021716483886323e-06, "loss": 17.176, "step": 363550 }, { "epoch": 0.734414201852802, "grad_norm": 425.3866882324219, "learning_rate": 2.1018871927744844e-06, "loss": 11.5271, "step": 363560 }, { "epoch": 0.7344344024854859, "grad_norm": 545.3555908203125, "learning_rate": 2.1016027512853864e-06, "loss": 12.7917, "step": 363570 }, { "epoch": 0.7344546031181697, "grad_norm": 65.3341293334961, "learning_rate": 2.101318323922723e-06, "loss": 15.5178, "step": 363580 }, { "epoch": 0.7344748037508535, "grad_norm": 304.2625732421875, "learning_rate": 2.1010339106878846e-06, "loss": 25.6953, "step": 363590 }, { "epoch": 0.7344950043835373, "grad_norm": 9.351652145385742, "learning_rate": 2.100749511582254e-06, "loss": 14.0082, "step": 363600 }, { "epoch": 0.7345152050162211, "grad_norm": 312.57421875, "learning_rate": 2.10046512660722e-06, "loss": 20.842, "step": 363610 }, { "epoch": 0.734535405648905, "grad_norm": 342.7440490722656, "learning_rate": 2.1001807557641673e-06, "loss": 18.5986, "step": 363620 }, { "epoch": 0.7345556062815888, "grad_norm": 0.0, "learning_rate": 2.09989639905448e-06, "loss": 13.1004, "step": 363630 }, { "epoch": 0.7345758069142726, "grad_norm": 324.0922546386719, "learning_rate": 2.099612056479548e-06, "loss": 14.762, "step": 363640 }, { "epoch": 0.7345960075469564, "grad_norm": 384.5039978027344, "learning_rate": 2.0993277280407547e-06, "loss": 11.5096, "step": 363650 }, { "epoch": 0.7346162081796402, "grad_norm": 225.64239501953125, "learning_rate": 2.099043413739485e-06, "loss": 21.6839, "step": 363660 }, { "epoch": 0.734636408812324, "grad_norm": 271.57293701171875, "learning_rate": 2.098759113577125e-06, "loss": 22.0194, "step": 363670 }, { "epoch": 0.7346566094450078, "grad_norm": 186.3390350341797, "learning_rate": 2.098474827555064e-06, "loss": 35.961, "step": 363680 }, { "epoch": 0.7346768100776916, "grad_norm": 1.4856864213943481, "learning_rate": 2.098190555674684e-06, "loss": 20.7479, "step": 363690 }, { "epoch": 0.7346970107103754, "grad_norm": 263.3894348144531, "learning_rate": 2.09790629793737e-06, "loss": 13.7656, "step": 363700 }, { "epoch": 0.7347172113430592, "grad_norm": 338.7405090332031, "learning_rate": 2.09762205434451e-06, "loss": 20.4562, "step": 363710 }, { "epoch": 0.734737411975743, "grad_norm": 268.4193115234375, "learning_rate": 2.0973378248974884e-06, "loss": 12.3929, "step": 363720 }, { "epoch": 0.7347576126084269, "grad_norm": 332.6508483886719, "learning_rate": 2.0970536095976884e-06, "loss": 24.0123, "step": 363730 }, { "epoch": 0.7347778132411107, "grad_norm": 111.83285522460938, "learning_rate": 2.0967694084464973e-06, "loss": 23.1527, "step": 363740 }, { "epoch": 0.7347980138737945, "grad_norm": 655.0184326171875, "learning_rate": 2.096485221445301e-06, "loss": 31.2206, "step": 363750 }, { "epoch": 0.7348182145064783, "grad_norm": 398.7709045410156, "learning_rate": 2.0962010485954844e-06, "loss": 14.0068, "step": 363760 }, { "epoch": 0.7348384151391621, "grad_norm": 366.68621826171875, "learning_rate": 2.0959168898984295e-06, "loss": 16.8575, "step": 363770 }, { "epoch": 0.734858615771846, "grad_norm": 261.1799621582031, "learning_rate": 2.095632745355525e-06, "loss": 19.9537, "step": 363780 }, { "epoch": 0.7348788164045298, "grad_norm": 368.545654296875, "learning_rate": 2.0953486149681553e-06, "loss": 15.8502, "step": 363790 }, { "epoch": 0.7348990170372136, "grad_norm": 387.75653076171875, "learning_rate": 2.095064498737701e-06, "loss": 21.5372, "step": 363800 }, { "epoch": 0.7349192176698974, "grad_norm": 573.4590454101562, "learning_rate": 2.0947803966655526e-06, "loss": 19.9252, "step": 363810 }, { "epoch": 0.7349394183025812, "grad_norm": 515.4793701171875, "learning_rate": 2.09449630875309e-06, "loss": 12.6202, "step": 363820 }, { "epoch": 0.7349596189352651, "grad_norm": 206.53895568847656, "learning_rate": 2.0942122350017023e-06, "loss": 9.7034, "step": 363830 }, { "epoch": 0.7349798195679489, "grad_norm": 666.141357421875, "learning_rate": 2.0939281754127695e-06, "loss": 28.0815, "step": 363840 }, { "epoch": 0.7350000202006327, "grad_norm": 162.98211669921875, "learning_rate": 2.0936441299876803e-06, "loss": 9.8799, "step": 363850 }, { "epoch": 0.7350202208333165, "grad_norm": 344.7902526855469, "learning_rate": 2.093360098727817e-06, "loss": 12.8903, "step": 363860 }, { "epoch": 0.7350404214660003, "grad_norm": 506.5223388671875, "learning_rate": 2.0930760816345626e-06, "loss": 18.2629, "step": 363870 }, { "epoch": 0.7350606220986842, "grad_norm": 480.6791687011719, "learning_rate": 2.092792078709304e-06, "loss": 21.4682, "step": 363880 }, { "epoch": 0.735080822731368, "grad_norm": 44.390289306640625, "learning_rate": 2.0925080899534227e-06, "loss": 15.0543, "step": 363890 }, { "epoch": 0.7351010233640518, "grad_norm": 616.2040405273438, "learning_rate": 2.0922241153683064e-06, "loss": 13.2332, "step": 363900 }, { "epoch": 0.7351212239967356, "grad_norm": 359.86737060546875, "learning_rate": 2.091940154955337e-06, "loss": 13.5108, "step": 363910 }, { "epoch": 0.7351414246294194, "grad_norm": 0.5225976705551147, "learning_rate": 2.0916562087158964e-06, "loss": 14.2642, "step": 363920 }, { "epoch": 0.7351616252621032, "grad_norm": 285.5023193359375, "learning_rate": 2.091372276651373e-06, "loss": 34.4661, "step": 363930 }, { "epoch": 0.735181825894787, "grad_norm": 525.5093994140625, "learning_rate": 2.0910883587631476e-06, "loss": 13.6851, "step": 363940 }, { "epoch": 0.7352020265274708, "grad_norm": 472.0049743652344, "learning_rate": 2.0908044550526034e-06, "loss": 16.949, "step": 363950 }, { "epoch": 0.7352222271601546, "grad_norm": 181.69915771484375, "learning_rate": 2.0905205655211257e-06, "loss": 19.0975, "step": 363960 }, { "epoch": 0.7352424277928384, "grad_norm": 816.1010131835938, "learning_rate": 2.090236690170101e-06, "loss": 23.5879, "step": 363970 }, { "epoch": 0.7352626284255223, "grad_norm": 320.40216064453125, "learning_rate": 2.0899528290009065e-06, "loss": 16.6658, "step": 363980 }, { "epoch": 0.7352828290582061, "grad_norm": 337.8548583984375, "learning_rate": 2.089668982014929e-06, "loss": 16.0121, "step": 363990 }, { "epoch": 0.7353030296908899, "grad_norm": 363.7285461425781, "learning_rate": 2.0893851492135536e-06, "loss": 24.7558, "step": 364000 }, { "epoch": 0.7353232303235737, "grad_norm": 506.2754211425781, "learning_rate": 2.0891013305981615e-06, "loss": 16.5288, "step": 364010 }, { "epoch": 0.7353434309562575, "grad_norm": 606.0123901367188, "learning_rate": 2.0888175261701355e-06, "loss": 22.4118, "step": 364020 }, { "epoch": 0.7353636315889414, "grad_norm": 555.2738647460938, "learning_rate": 2.0885337359308592e-06, "loss": 27.5469, "step": 364030 }, { "epoch": 0.7353838322216252, "grad_norm": 167.02308654785156, "learning_rate": 2.08824995988172e-06, "loss": 7.4998, "step": 364040 }, { "epoch": 0.735404032854309, "grad_norm": 167.9142608642578, "learning_rate": 2.087966198024094e-06, "loss": 20.0514, "step": 364050 }, { "epoch": 0.7354242334869928, "grad_norm": 50.869422912597656, "learning_rate": 2.0876824503593673e-06, "loss": 23.0109, "step": 364060 }, { "epoch": 0.7354444341196766, "grad_norm": 720.871826171875, "learning_rate": 2.087398716888925e-06, "loss": 22.7871, "step": 364070 }, { "epoch": 0.7354646347523605, "grad_norm": 174.04391479492188, "learning_rate": 2.0871149976141484e-06, "loss": 12.0311, "step": 364080 }, { "epoch": 0.7354848353850443, "grad_norm": 84.24659729003906, "learning_rate": 2.086831292536418e-06, "loss": 17.644, "step": 364090 }, { "epoch": 0.7355050360177281, "grad_norm": 522.3529663085938, "learning_rate": 2.0865476016571206e-06, "loss": 13.5571, "step": 364100 }, { "epoch": 0.7355252366504119, "grad_norm": 631.71826171875, "learning_rate": 2.0862639249776364e-06, "loss": 16.4983, "step": 364110 }, { "epoch": 0.7355454372830957, "grad_norm": 1105.38330078125, "learning_rate": 2.085980262499347e-06, "loss": 34.7617, "step": 364120 }, { "epoch": 0.7355656379157796, "grad_norm": 649.3748168945312, "learning_rate": 2.085696614223638e-06, "loss": 12.8657, "step": 364130 }, { "epoch": 0.7355858385484634, "grad_norm": 285.0609130859375, "learning_rate": 2.085412980151888e-06, "loss": 13.6714, "step": 364140 }, { "epoch": 0.7356060391811472, "grad_norm": 337.8122253417969, "learning_rate": 2.085129360285484e-06, "loss": 13.7497, "step": 364150 }, { "epoch": 0.735626239813831, "grad_norm": 132.0164337158203, "learning_rate": 2.0848457546258037e-06, "loss": 18.5715, "step": 364160 }, { "epoch": 0.7356464404465148, "grad_norm": 1003.1087036132812, "learning_rate": 2.0845621631742342e-06, "loss": 45.9939, "step": 364170 }, { "epoch": 0.7356666410791985, "grad_norm": 329.656982421875, "learning_rate": 2.0842785859321545e-06, "loss": 16.9075, "step": 364180 }, { "epoch": 0.7356868417118824, "grad_norm": 368.9480895996094, "learning_rate": 2.083995022900946e-06, "loss": 13.125, "step": 364190 }, { "epoch": 0.7357070423445662, "grad_norm": 306.953369140625, "learning_rate": 2.083711474081993e-06, "loss": 15.056, "step": 364200 }, { "epoch": 0.73572724297725, "grad_norm": 574.099365234375, "learning_rate": 2.0834279394766755e-06, "loss": 17.3683, "step": 364210 }, { "epoch": 0.7357474436099338, "grad_norm": 419.91485595703125, "learning_rate": 2.083144419086378e-06, "loss": 22.846, "step": 364220 }, { "epoch": 0.7357676442426176, "grad_norm": 361.4034423828125, "learning_rate": 2.082860912912481e-06, "loss": 15.6339, "step": 364230 }, { "epoch": 0.7357878448753015, "grad_norm": 498.6043395996094, "learning_rate": 2.082577420956364e-06, "loss": 10.3416, "step": 364240 }, { "epoch": 0.7358080455079853, "grad_norm": 689.240478515625, "learning_rate": 2.0822939432194134e-06, "loss": 15.2039, "step": 364250 }, { "epoch": 0.7358282461406691, "grad_norm": 245.316650390625, "learning_rate": 2.082010479703008e-06, "loss": 12.0396, "step": 364260 }, { "epoch": 0.7358484467733529, "grad_norm": 293.8541564941406, "learning_rate": 2.0817270304085273e-06, "loss": 14.1903, "step": 364270 }, { "epoch": 0.7358686474060367, "grad_norm": 469.2142333984375, "learning_rate": 2.0814435953373554e-06, "loss": 43.1208, "step": 364280 }, { "epoch": 0.7358888480387206, "grad_norm": 59.95088577270508, "learning_rate": 2.081160174490875e-06, "loss": 19.9318, "step": 364290 }, { "epoch": 0.7359090486714044, "grad_norm": 199.74713134765625, "learning_rate": 2.080876767870466e-06, "loss": 17.9222, "step": 364300 }, { "epoch": 0.7359292493040882, "grad_norm": 863.7758178710938, "learning_rate": 2.0805933754775083e-06, "loss": 26.1903, "step": 364310 }, { "epoch": 0.735949449936772, "grad_norm": 372.6961669921875, "learning_rate": 2.0803099973133856e-06, "loss": 10.2383, "step": 364320 }, { "epoch": 0.7359696505694558, "grad_norm": 341.9979553222656, "learning_rate": 2.080026633379478e-06, "loss": 25.43, "step": 364330 }, { "epoch": 0.7359898512021397, "grad_norm": 58.08280944824219, "learning_rate": 2.079743283677165e-06, "loss": 15.3824, "step": 364340 }, { "epoch": 0.7360100518348235, "grad_norm": 293.8395080566406, "learning_rate": 2.079459948207828e-06, "loss": 15.1824, "step": 364350 }, { "epoch": 0.7360302524675073, "grad_norm": 176.7348175048828, "learning_rate": 2.079176626972852e-06, "loss": 15.4581, "step": 364360 }, { "epoch": 0.7360504531001911, "grad_norm": 0.0, "learning_rate": 2.0788933199736145e-06, "loss": 17.6577, "step": 364370 }, { "epoch": 0.736070653732875, "grad_norm": 269.6431884765625, "learning_rate": 2.0786100272114943e-06, "loss": 13.2815, "step": 364380 }, { "epoch": 0.7360908543655588, "grad_norm": 343.29443359375, "learning_rate": 2.0783267486878773e-06, "loss": 13.7139, "step": 364390 }, { "epoch": 0.7361110549982426, "grad_norm": 25.994674682617188, "learning_rate": 2.07804348440414e-06, "loss": 11.1712, "step": 364400 }, { "epoch": 0.7361312556309264, "grad_norm": 241.93959045410156, "learning_rate": 2.077760234361664e-06, "loss": 11.0591, "step": 364410 }, { "epoch": 0.7361514562636102, "grad_norm": 423.5716552734375, "learning_rate": 2.0774769985618317e-06, "loss": 15.3195, "step": 364420 }, { "epoch": 0.736171656896294, "grad_norm": 230.12428283691406, "learning_rate": 2.07719377700602e-06, "loss": 34.281, "step": 364430 }, { "epoch": 0.7361918575289778, "grad_norm": 215.46192932128906, "learning_rate": 2.0769105696956128e-06, "loss": 21.9807, "step": 364440 }, { "epoch": 0.7362120581616616, "grad_norm": 353.5723571777344, "learning_rate": 2.0766273766319873e-06, "loss": 18.0947, "step": 364450 }, { "epoch": 0.7362322587943454, "grad_norm": 421.5323791503906, "learning_rate": 2.076344197816527e-06, "loss": 26.1596, "step": 364460 }, { "epoch": 0.7362524594270292, "grad_norm": 398.92974853515625, "learning_rate": 2.076061033250611e-06, "loss": 23.5638, "step": 364470 }, { "epoch": 0.736272660059713, "grad_norm": 1340.0908203125, "learning_rate": 2.0757778829356175e-06, "loss": 16.1693, "step": 364480 }, { "epoch": 0.7362928606923969, "grad_norm": 318.86724853515625, "learning_rate": 2.0754947468729285e-06, "loss": 15.0761, "step": 364490 }, { "epoch": 0.7363130613250807, "grad_norm": 371.62493896484375, "learning_rate": 2.075211625063923e-06, "loss": 22.8919, "step": 364500 }, { "epoch": 0.7363332619577645, "grad_norm": 436.4801330566406, "learning_rate": 2.074928517509982e-06, "loss": 23.7027, "step": 364510 }, { "epoch": 0.7363534625904483, "grad_norm": 197.2120819091797, "learning_rate": 2.0746454242124846e-06, "loss": 12.4145, "step": 364520 }, { "epoch": 0.7363736632231321, "grad_norm": 721.41357421875, "learning_rate": 2.0743623451728096e-06, "loss": 15.5012, "step": 364530 }, { "epoch": 0.736393863855816, "grad_norm": 423.0234069824219, "learning_rate": 2.074079280392339e-06, "loss": 18.1207, "step": 364540 }, { "epoch": 0.7364140644884998, "grad_norm": 756.3760375976562, "learning_rate": 2.0737962298724513e-06, "loss": 25.2786, "step": 364550 }, { "epoch": 0.7364342651211836, "grad_norm": 466.7777404785156, "learning_rate": 2.0735131936145237e-06, "loss": 9.0022, "step": 364560 }, { "epoch": 0.7364544657538674, "grad_norm": 189.84327697753906, "learning_rate": 2.073230171619938e-06, "loss": 19.0023, "step": 364570 }, { "epoch": 0.7364746663865512, "grad_norm": 412.6581115722656, "learning_rate": 2.0729471638900772e-06, "loss": 27.9567, "step": 364580 }, { "epoch": 0.736494867019235, "grad_norm": 321.45556640625, "learning_rate": 2.0726641704263133e-06, "loss": 25.5634, "step": 364590 }, { "epoch": 0.7365150676519189, "grad_norm": 626.9800415039062, "learning_rate": 2.0723811912300295e-06, "loss": 11.3805, "step": 364600 }, { "epoch": 0.7365352682846027, "grad_norm": 456.0321044921875, "learning_rate": 2.072098226302606e-06, "loss": 15.3584, "step": 364610 }, { "epoch": 0.7365554689172865, "grad_norm": 230.5832977294922, "learning_rate": 2.0718152756454207e-06, "loss": 21.3752, "step": 364620 }, { "epoch": 0.7365756695499703, "grad_norm": 338.4997253417969, "learning_rate": 2.071532339259851e-06, "loss": 24.3353, "step": 364630 }, { "epoch": 0.7365958701826542, "grad_norm": 361.087646484375, "learning_rate": 2.0712494171472776e-06, "loss": 15.6216, "step": 364640 }, { "epoch": 0.736616070815338, "grad_norm": 315.47674560546875, "learning_rate": 2.0709665093090824e-06, "loss": 14.0099, "step": 364650 }, { "epoch": 0.7366362714480218, "grad_norm": 89.35796356201172, "learning_rate": 2.0706836157466383e-06, "loss": 13.5254, "step": 364660 }, { "epoch": 0.7366564720807056, "grad_norm": 202.1490936279297, "learning_rate": 2.0704007364613266e-06, "loss": 14.4107, "step": 364670 }, { "epoch": 0.7366766727133894, "grad_norm": 467.12591552734375, "learning_rate": 2.0701178714545285e-06, "loss": 18.407, "step": 364680 }, { "epoch": 0.7366968733460731, "grad_norm": 582.572998046875, "learning_rate": 2.0698350207276204e-06, "loss": 15.5343, "step": 364690 }, { "epoch": 0.736717073978757, "grad_norm": 25.415082931518555, "learning_rate": 2.0695521842819788e-06, "loss": 17.5226, "step": 364700 }, { "epoch": 0.7367372746114408, "grad_norm": 229.2875518798828, "learning_rate": 2.0692693621189864e-06, "loss": 14.4092, "step": 364710 }, { "epoch": 0.7367574752441246, "grad_norm": 126.3668212890625, "learning_rate": 2.0689865542400196e-06, "loss": 25.4265, "step": 364720 }, { "epoch": 0.7367776758768084, "grad_norm": 288.700927734375, "learning_rate": 2.0687037606464554e-06, "loss": 20.3068, "step": 364730 }, { "epoch": 0.7367978765094922, "grad_norm": 22.80805206298828, "learning_rate": 2.0684209813396748e-06, "loss": 14.9529, "step": 364740 }, { "epoch": 0.7368180771421761, "grad_norm": 362.77783203125, "learning_rate": 2.0681382163210533e-06, "loss": 16.4827, "step": 364750 }, { "epoch": 0.7368382777748599, "grad_norm": 321.43145751953125, "learning_rate": 2.0678554655919725e-06, "loss": 15.38, "step": 364760 }, { "epoch": 0.7368584784075437, "grad_norm": 318.1789245605469, "learning_rate": 2.0675727291538068e-06, "loss": 20.9938, "step": 364770 }, { "epoch": 0.7368786790402275, "grad_norm": 430.7816467285156, "learning_rate": 2.0672900070079375e-06, "loss": 14.727, "step": 364780 }, { "epoch": 0.7368988796729113, "grad_norm": 599.7792358398438, "learning_rate": 2.067007299155741e-06, "loss": 18.0324, "step": 364790 }, { "epoch": 0.7369190803055952, "grad_norm": 183.4662628173828, "learning_rate": 2.066724605598594e-06, "loss": 13.3307, "step": 364800 }, { "epoch": 0.736939280938279, "grad_norm": 332.3937683105469, "learning_rate": 2.0664419263378764e-06, "loss": 19.6866, "step": 364810 }, { "epoch": 0.7369594815709628, "grad_norm": 208.60269165039062, "learning_rate": 2.066159261374964e-06, "loss": 21.2156, "step": 364820 }, { "epoch": 0.7369796822036466, "grad_norm": 124.0291519165039, "learning_rate": 2.0658766107112367e-06, "loss": 25.1262, "step": 364830 }, { "epoch": 0.7369998828363304, "grad_norm": 454.86767578125, "learning_rate": 2.0655939743480714e-06, "loss": 15.669, "step": 364840 }, { "epoch": 0.7370200834690143, "grad_norm": 172.296875, "learning_rate": 2.0653113522868427e-06, "loss": 11.7845, "step": 364850 }, { "epoch": 0.7370402841016981, "grad_norm": 840.0567626953125, "learning_rate": 2.065028744528933e-06, "loss": 29.7223, "step": 364860 }, { "epoch": 0.7370604847343819, "grad_norm": 548.4918823242188, "learning_rate": 2.0647461510757173e-06, "loss": 21.1387, "step": 364870 }, { "epoch": 0.7370806853670657, "grad_norm": 306.392333984375, "learning_rate": 2.0644635719285704e-06, "loss": 18.6181, "step": 364880 }, { "epoch": 0.7371008859997495, "grad_norm": 270.8236999511719, "learning_rate": 2.064181007088873e-06, "loss": 15.3122, "step": 364890 }, { "epoch": 0.7371210866324334, "grad_norm": 476.0078430175781, "learning_rate": 2.063898456558002e-06, "loss": 27.3251, "step": 364900 }, { "epoch": 0.7371412872651172, "grad_norm": 406.9973449707031, "learning_rate": 2.0636159203373344e-06, "loss": 29.6369, "step": 364910 }, { "epoch": 0.737161487897801, "grad_norm": 354.7281494140625, "learning_rate": 2.063333398428245e-06, "loss": 15.3554, "step": 364920 }, { "epoch": 0.7371816885304848, "grad_norm": 301.2372131347656, "learning_rate": 2.063050890832114e-06, "loss": 14.0986, "step": 364930 }, { "epoch": 0.7372018891631686, "grad_norm": 245.56309509277344, "learning_rate": 2.0627683975503165e-06, "loss": 12.8766, "step": 364940 }, { "epoch": 0.7372220897958524, "grad_norm": 196.98167419433594, "learning_rate": 2.0624859185842284e-06, "loss": 21.5888, "step": 364950 }, { "epoch": 0.7372422904285362, "grad_norm": 216.22962951660156, "learning_rate": 2.062203453935227e-06, "loss": 12.4488, "step": 364960 }, { "epoch": 0.73726249106122, "grad_norm": 132.98841857910156, "learning_rate": 2.0619210036046923e-06, "loss": 14.3631, "step": 364970 }, { "epoch": 0.7372826916939038, "grad_norm": 459.241943359375, "learning_rate": 2.0616385675939977e-06, "loss": 14.7004, "step": 364980 }, { "epoch": 0.7373028923265876, "grad_norm": 532.1847534179688, "learning_rate": 2.0613561459045184e-06, "loss": 30.1397, "step": 364990 }, { "epoch": 0.7373230929592715, "grad_norm": 301.25482177734375, "learning_rate": 2.061073738537635e-06, "loss": 20.9194, "step": 365000 }, { "epoch": 0.7373432935919553, "grad_norm": 62.32251739501953, "learning_rate": 2.0607913454947215e-06, "loss": 16.0964, "step": 365010 }, { "epoch": 0.7373634942246391, "grad_norm": 180.23123168945312, "learning_rate": 2.060508966777153e-06, "loss": 9.9189, "step": 365020 }, { "epoch": 0.7373836948573229, "grad_norm": 269.3304443359375, "learning_rate": 2.0602266023863088e-06, "loss": 16.3484, "step": 365030 }, { "epoch": 0.7374038954900067, "grad_norm": 101.0495376586914, "learning_rate": 2.059944252323562e-06, "loss": 13.0723, "step": 365040 }, { "epoch": 0.7374240961226906, "grad_norm": 83.1161880493164, "learning_rate": 2.0596619165902916e-06, "loss": 17.9313, "step": 365050 }, { "epoch": 0.7374442967553744, "grad_norm": 159.0005645751953, "learning_rate": 2.059379595187871e-06, "loss": 25.5723, "step": 365060 }, { "epoch": 0.7374644973880582, "grad_norm": 489.9853515625, "learning_rate": 2.0590972881176798e-06, "loss": 17.5896, "step": 365070 }, { "epoch": 0.737484698020742, "grad_norm": 276.6214599609375, "learning_rate": 2.058814995381091e-06, "loss": 11.981, "step": 365080 }, { "epoch": 0.7375048986534258, "grad_norm": 334.879150390625, "learning_rate": 2.0585327169794796e-06, "loss": 25.8994, "step": 365090 }, { "epoch": 0.7375250992861097, "grad_norm": 153.3126678466797, "learning_rate": 2.0582504529142248e-06, "loss": 25.3136, "step": 365100 }, { "epoch": 0.7375452999187935, "grad_norm": 860.970947265625, "learning_rate": 2.0579682031866988e-06, "loss": 23.6348, "step": 365110 }, { "epoch": 0.7375655005514773, "grad_norm": 4.6487836837768555, "learning_rate": 2.057685967798281e-06, "loss": 14.9108, "step": 365120 }, { "epoch": 0.7375857011841611, "grad_norm": 63.38954162597656, "learning_rate": 2.0574037467503444e-06, "loss": 17.4898, "step": 365130 }, { "epoch": 0.7376059018168449, "grad_norm": 324.40863037109375, "learning_rate": 2.0571215400442634e-06, "loss": 11.4311, "step": 365140 }, { "epoch": 0.7376261024495288, "grad_norm": 185.480224609375, "learning_rate": 2.056839347681417e-06, "loss": 17.5421, "step": 365150 }, { "epoch": 0.7376463030822126, "grad_norm": 262.0285949707031, "learning_rate": 2.056557169663179e-06, "loss": 16.7033, "step": 365160 }, { "epoch": 0.7376665037148964, "grad_norm": 415.1700744628906, "learning_rate": 2.056275005990922e-06, "loss": 20.0006, "step": 365170 }, { "epoch": 0.7376867043475802, "grad_norm": 502.0487060546875, "learning_rate": 2.0559928566660235e-06, "loss": 17.7509, "step": 365180 }, { "epoch": 0.737706904980264, "grad_norm": 340.950439453125, "learning_rate": 2.055710721689863e-06, "loss": 31.2926, "step": 365190 }, { "epoch": 0.7377271056129477, "grad_norm": 87.57659149169922, "learning_rate": 2.0554286010638076e-06, "loss": 28.3744, "step": 365200 }, { "epoch": 0.7377473062456316, "grad_norm": 794.2094116210938, "learning_rate": 2.055146494789236e-06, "loss": 24.5646, "step": 365210 }, { "epoch": 0.7377675068783154, "grad_norm": 286.29437255859375, "learning_rate": 2.0548644028675246e-06, "loss": 18.8776, "step": 365220 }, { "epoch": 0.7377877075109992, "grad_norm": 427.6775817871094, "learning_rate": 2.054582325300047e-06, "loss": 13.636, "step": 365230 }, { "epoch": 0.737807908143683, "grad_norm": 364.1160888671875, "learning_rate": 2.0543002620881764e-06, "loss": 12.5557, "step": 365240 }, { "epoch": 0.7378281087763668, "grad_norm": 641.0657958984375, "learning_rate": 2.0540182132332886e-06, "loss": 19.4537, "step": 365250 }, { "epoch": 0.7378483094090507, "grad_norm": 264.384765625, "learning_rate": 2.0537361787367625e-06, "loss": 15.7444, "step": 365260 }, { "epoch": 0.7378685100417345, "grad_norm": 192.15878295898438, "learning_rate": 2.0534541585999656e-06, "loss": 6.6334, "step": 365270 }, { "epoch": 0.7378887106744183, "grad_norm": 156.77110290527344, "learning_rate": 2.053172152824276e-06, "loss": 15.3243, "step": 365280 }, { "epoch": 0.7379089113071021, "grad_norm": 263.1060485839844, "learning_rate": 2.052890161411069e-06, "loss": 26.4381, "step": 365290 }, { "epoch": 0.7379291119397859, "grad_norm": 848.5113525390625, "learning_rate": 2.0526081843617183e-06, "loss": 20.6396, "step": 365300 }, { "epoch": 0.7379493125724698, "grad_norm": 642.378173828125, "learning_rate": 2.0523262216775962e-06, "loss": 21.4196, "step": 365310 }, { "epoch": 0.7379695132051536, "grad_norm": 191.56842041015625, "learning_rate": 2.0520442733600805e-06, "loss": 20.8087, "step": 365320 }, { "epoch": 0.7379897138378374, "grad_norm": 317.2579650878906, "learning_rate": 2.0517623394105427e-06, "loss": 22.4094, "step": 365330 }, { "epoch": 0.7380099144705212, "grad_norm": 251.9071807861328, "learning_rate": 2.0514804198303565e-06, "loss": 19.7969, "step": 365340 }, { "epoch": 0.738030115103205, "grad_norm": 276.1114501953125, "learning_rate": 2.0511985146208966e-06, "loss": 14.7182, "step": 365350 }, { "epoch": 0.7380503157358889, "grad_norm": 352.9045715332031, "learning_rate": 2.0509166237835398e-06, "loss": 43.914, "step": 365360 }, { "epoch": 0.7380705163685727, "grad_norm": 1177.640380859375, "learning_rate": 2.0506347473196582e-06, "loss": 23.1351, "step": 365370 }, { "epoch": 0.7380907170012565, "grad_norm": 346.06512451171875, "learning_rate": 2.0503528852306225e-06, "loss": 17.196, "step": 365380 }, { "epoch": 0.7381109176339403, "grad_norm": 154.27084350585938, "learning_rate": 2.0500710375178107e-06, "loss": 8.2563, "step": 365390 }, { "epoch": 0.7381311182666241, "grad_norm": 439.40386962890625, "learning_rate": 2.049789204182596e-06, "loss": 16.077, "step": 365400 }, { "epoch": 0.738151318899308, "grad_norm": 498.5280456542969, "learning_rate": 2.0495073852263476e-06, "loss": 20.0574, "step": 365410 }, { "epoch": 0.7381715195319918, "grad_norm": 197.1608428955078, "learning_rate": 2.0492255806504453e-06, "loss": 27.3639, "step": 365420 }, { "epoch": 0.7381917201646756, "grad_norm": 387.186767578125, "learning_rate": 2.048943790456257e-06, "loss": 27.2629, "step": 365430 }, { "epoch": 0.7382119207973594, "grad_norm": 679.3992919921875, "learning_rate": 2.0486620146451613e-06, "loss": 27.7594, "step": 365440 }, { "epoch": 0.7382321214300432, "grad_norm": 132.18710327148438, "learning_rate": 2.0483802532185286e-06, "loss": 15.0273, "step": 365450 }, { "epoch": 0.738252322062727, "grad_norm": 401.27386474609375, "learning_rate": 2.048098506177731e-06, "loss": 12.9366, "step": 365460 }, { "epoch": 0.7382725226954108, "grad_norm": 429.3953857421875, "learning_rate": 2.0478167735241443e-06, "loss": 21.7013, "step": 365470 }, { "epoch": 0.7382927233280946, "grad_norm": 186.17623901367188, "learning_rate": 2.0475350552591418e-06, "loss": 24.4907, "step": 365480 }, { "epoch": 0.7383129239607784, "grad_norm": 295.16973876953125, "learning_rate": 2.0472533513840923e-06, "loss": 14.8181, "step": 365490 }, { "epoch": 0.7383331245934622, "grad_norm": 18.06108283996582, "learning_rate": 2.046971661900373e-06, "loss": 14.5095, "step": 365500 }, { "epoch": 0.738353325226146, "grad_norm": 492.65283203125, "learning_rate": 2.0466899868093566e-06, "loss": 18.0839, "step": 365510 }, { "epoch": 0.7383735258588299, "grad_norm": 302.4225158691406, "learning_rate": 2.0464083261124156e-06, "loss": 24.0759, "step": 365520 }, { "epoch": 0.7383937264915137, "grad_norm": 238.61460876464844, "learning_rate": 2.04612667981092e-06, "loss": 27.7483, "step": 365530 }, { "epoch": 0.7384139271241975, "grad_norm": 437.5618896484375, "learning_rate": 2.0458450479062465e-06, "loss": 11.6448, "step": 365540 }, { "epoch": 0.7384341277568813, "grad_norm": 511.740234375, "learning_rate": 2.045563430399766e-06, "loss": 18.7123, "step": 365550 }, { "epoch": 0.7384543283895652, "grad_norm": 397.40032958984375, "learning_rate": 2.0452818272928493e-06, "loss": 14.0414, "step": 365560 }, { "epoch": 0.738474529022249, "grad_norm": 583.4635009765625, "learning_rate": 2.0450002385868707e-06, "loss": 24.7034, "step": 365570 }, { "epoch": 0.7384947296549328, "grad_norm": 377.3338928222656, "learning_rate": 2.0447186642832063e-06, "loss": 11.6895, "step": 365580 }, { "epoch": 0.7385149302876166, "grad_norm": 451.6911926269531, "learning_rate": 2.0444371043832208e-06, "loss": 15.0496, "step": 365590 }, { "epoch": 0.7385351309203004, "grad_norm": 454.59991455078125, "learning_rate": 2.04415555888829e-06, "loss": 23.482, "step": 365600 }, { "epoch": 0.7385553315529843, "grad_norm": 329.60504150390625, "learning_rate": 2.0438740277997888e-06, "loss": 21.0239, "step": 365610 }, { "epoch": 0.7385755321856681, "grad_norm": 72.90430450439453, "learning_rate": 2.0435925111190867e-06, "loss": 9.1965, "step": 365620 }, { "epoch": 0.7385957328183519, "grad_norm": 332.781494140625, "learning_rate": 2.043311008847555e-06, "loss": 26.3267, "step": 365630 }, { "epoch": 0.7386159334510357, "grad_norm": 383.88885498046875, "learning_rate": 2.043029520986568e-06, "loss": 16.9502, "step": 365640 }, { "epoch": 0.7386361340837195, "grad_norm": 175.68109130859375, "learning_rate": 2.042748047537495e-06, "loss": 21.7168, "step": 365650 }, { "epoch": 0.7386563347164034, "grad_norm": 461.905517578125, "learning_rate": 2.0424665885017114e-06, "loss": 19.2489, "step": 365660 }, { "epoch": 0.7386765353490872, "grad_norm": 205.0062255859375, "learning_rate": 2.0421851438805845e-06, "loss": 14.8898, "step": 365670 }, { "epoch": 0.738696735981771, "grad_norm": 146.28482055664062, "learning_rate": 2.0419037136754903e-06, "loss": 13.7219, "step": 365680 }, { "epoch": 0.7387169366144548, "grad_norm": 561.8257446289062, "learning_rate": 2.0416222978877993e-06, "loss": 22.6266, "step": 365690 }, { "epoch": 0.7387371372471386, "grad_norm": 231.55657958984375, "learning_rate": 2.04134089651888e-06, "loss": 21.4549, "step": 365700 }, { "epoch": 0.7387573378798225, "grad_norm": 353.3453369140625, "learning_rate": 2.0410595095701084e-06, "loss": 23.9026, "step": 365710 }, { "epoch": 0.7387775385125062, "grad_norm": 359.21295166015625, "learning_rate": 2.040778137042852e-06, "loss": 21.8223, "step": 365720 }, { "epoch": 0.73879773914519, "grad_norm": 386.9476318359375, "learning_rate": 2.040496778938486e-06, "loss": 16.4884, "step": 365730 }, { "epoch": 0.7388179397778738, "grad_norm": 222.1886444091797, "learning_rate": 2.04021543525838e-06, "loss": 26.9491, "step": 365740 }, { "epoch": 0.7388381404105576, "grad_norm": 301.1086120605469, "learning_rate": 2.0399341060039023e-06, "loss": 25.0354, "step": 365750 }, { "epoch": 0.7388583410432414, "grad_norm": 253.30914306640625, "learning_rate": 2.039652791176429e-06, "loss": 9.767, "step": 365760 }, { "epoch": 0.7388785416759253, "grad_norm": 424.1557922363281, "learning_rate": 2.0393714907773294e-06, "loss": 20.6632, "step": 365770 }, { "epoch": 0.7388987423086091, "grad_norm": 509.7571105957031, "learning_rate": 2.0390902048079717e-06, "loss": 19.995, "step": 365780 }, { "epoch": 0.7389189429412929, "grad_norm": 571.0275268554688, "learning_rate": 2.038808933269729e-06, "loss": 16.7485, "step": 365790 }, { "epoch": 0.7389391435739767, "grad_norm": 301.386962890625, "learning_rate": 2.0385276761639768e-06, "loss": 6.9186, "step": 365800 }, { "epoch": 0.7389593442066605, "grad_norm": 545.816650390625, "learning_rate": 2.0382464334920774e-06, "loss": 16.2437, "step": 365810 }, { "epoch": 0.7389795448393444, "grad_norm": 138.1280975341797, "learning_rate": 2.037965205255406e-06, "loss": 10.7938, "step": 365820 }, { "epoch": 0.7389997454720282, "grad_norm": 185.7974395751953, "learning_rate": 2.037683991455334e-06, "loss": 28.174, "step": 365830 }, { "epoch": 0.739019946104712, "grad_norm": 184.17898559570312, "learning_rate": 2.037402792093232e-06, "loss": 13.9978, "step": 365840 }, { "epoch": 0.7390401467373958, "grad_norm": 367.27703857421875, "learning_rate": 2.0371216071704667e-06, "loss": 18.0003, "step": 365850 }, { "epoch": 0.7390603473700796, "grad_norm": 599.73974609375, "learning_rate": 2.036840436688412e-06, "loss": 21.7555, "step": 365860 }, { "epoch": 0.7390805480027635, "grad_norm": 321.1237487792969, "learning_rate": 2.036559280648441e-06, "loss": 24.0683, "step": 365870 }, { "epoch": 0.7391007486354473, "grad_norm": 313.18206787109375, "learning_rate": 2.036278139051917e-06, "loss": 19.0138, "step": 365880 }, { "epoch": 0.7391209492681311, "grad_norm": 298.0772399902344, "learning_rate": 2.0359970119002143e-06, "loss": 26.005, "step": 365890 }, { "epoch": 0.7391411499008149, "grad_norm": 270.0126647949219, "learning_rate": 2.035715899194704e-06, "loss": 17.485, "step": 365900 }, { "epoch": 0.7391613505334987, "grad_norm": 308.9616394042969, "learning_rate": 2.0354348009367554e-06, "loss": 13.9394, "step": 365910 }, { "epoch": 0.7391815511661826, "grad_norm": 398.1065368652344, "learning_rate": 2.0351537171277353e-06, "loss": 10.6025, "step": 365920 }, { "epoch": 0.7392017517988664, "grad_norm": 622.6244506835938, "learning_rate": 2.034872647769019e-06, "loss": 27.7901, "step": 365930 }, { "epoch": 0.7392219524315502, "grad_norm": 437.3424072265625, "learning_rate": 2.0345915928619737e-06, "loss": 19.379, "step": 365940 }, { "epoch": 0.739242153064234, "grad_norm": 9.678370475769043, "learning_rate": 2.034310552407967e-06, "loss": 13.0048, "step": 365950 }, { "epoch": 0.7392623536969178, "grad_norm": 586.5205078125, "learning_rate": 2.0340295264083716e-06, "loss": 17.0872, "step": 365960 }, { "epoch": 0.7392825543296015, "grad_norm": 1070.67431640625, "learning_rate": 2.033748514864558e-06, "loss": 28.4841, "step": 365970 }, { "epoch": 0.7393027549622854, "grad_norm": 292.5230712890625, "learning_rate": 2.0334675177778938e-06, "loss": 27.6959, "step": 365980 }, { "epoch": 0.7393229555949692, "grad_norm": 60.216163635253906, "learning_rate": 2.033186535149748e-06, "loss": 25.453, "step": 365990 }, { "epoch": 0.739343156227653, "grad_norm": 558.8899536132812, "learning_rate": 2.0329055669814936e-06, "loss": 17.6868, "step": 366000 }, { "epoch": 0.7393633568603368, "grad_norm": 97.86579895019531, "learning_rate": 2.0326246132744963e-06, "loss": 17.3967, "step": 366010 }, { "epoch": 0.7393835574930206, "grad_norm": 756.5426025390625, "learning_rate": 2.0323436740301262e-06, "loss": 36.4764, "step": 366020 }, { "epoch": 0.7394037581257045, "grad_norm": 248.0166473388672, "learning_rate": 2.0320627492497543e-06, "loss": 8.8571, "step": 366030 }, { "epoch": 0.7394239587583883, "grad_norm": 334.6541442871094, "learning_rate": 2.0317818389347468e-06, "loss": 21.99, "step": 366040 }, { "epoch": 0.7394441593910721, "grad_norm": 0.0, "learning_rate": 2.0315009430864762e-06, "loss": 23.731, "step": 366050 }, { "epoch": 0.7394643600237559, "grad_norm": 157.05633544921875, "learning_rate": 2.03122006170631e-06, "loss": 15.738, "step": 366060 }, { "epoch": 0.7394845606564397, "grad_norm": 419.2681884765625, "learning_rate": 2.030939194795616e-06, "loss": 20.4409, "step": 366070 }, { "epoch": 0.7395047612891236, "grad_norm": 427.7028503417969, "learning_rate": 2.030658342355765e-06, "loss": 14.2418, "step": 366080 }, { "epoch": 0.7395249619218074, "grad_norm": 109.60071563720703, "learning_rate": 2.030377504388126e-06, "loss": 21.0627, "step": 366090 }, { "epoch": 0.7395451625544912, "grad_norm": 201.05209350585938, "learning_rate": 2.030096680894065e-06, "loss": 14.6214, "step": 366100 }, { "epoch": 0.739565363187175, "grad_norm": 97.4200210571289, "learning_rate": 2.029815871874952e-06, "loss": 11.6279, "step": 366110 }, { "epoch": 0.7395855638198588, "grad_norm": 242.57472229003906, "learning_rate": 2.0295350773321583e-06, "loss": 16.6682, "step": 366120 }, { "epoch": 0.7396057644525427, "grad_norm": 204.07839965820312, "learning_rate": 2.0292542972670503e-06, "loss": 31.5801, "step": 366130 }, { "epoch": 0.7396259650852265, "grad_norm": 18.23077392578125, "learning_rate": 2.0289735316809948e-06, "loss": 30.5035, "step": 366140 }, { "epoch": 0.7396461657179103, "grad_norm": 265.7449645996094, "learning_rate": 2.028692780575364e-06, "loss": 23.7089, "step": 366150 }, { "epoch": 0.7396663663505941, "grad_norm": 210.06321716308594, "learning_rate": 2.028412043951524e-06, "loss": 16.4642, "step": 366160 }, { "epoch": 0.739686566983278, "grad_norm": 519.0916137695312, "learning_rate": 2.028131321810841e-06, "loss": 23.075, "step": 366170 }, { "epoch": 0.7397067676159618, "grad_norm": 759.4010620117188, "learning_rate": 2.027850614154686e-06, "loss": 24.0642, "step": 366180 }, { "epoch": 0.7397269682486456, "grad_norm": 335.1924133300781, "learning_rate": 2.0275699209844306e-06, "loss": 19.3043, "step": 366190 }, { "epoch": 0.7397471688813294, "grad_norm": 234.74412536621094, "learning_rate": 2.027289242301435e-06, "loss": 15.7694, "step": 366200 }, { "epoch": 0.7397673695140132, "grad_norm": 66.03064727783203, "learning_rate": 2.0270085781070707e-06, "loss": 15.6784, "step": 366210 }, { "epoch": 0.739787570146697, "grad_norm": 200.4514617919922, "learning_rate": 2.026727928402708e-06, "loss": 11.6553, "step": 366220 }, { "epoch": 0.7398077707793808, "grad_norm": 1297.374755859375, "learning_rate": 2.0264472931897135e-06, "loss": 23.5428, "step": 366230 }, { "epoch": 0.7398279714120646, "grad_norm": 452.533935546875, "learning_rate": 2.026166672469451e-06, "loss": 19.6167, "step": 366240 }, { "epoch": 0.7398481720447484, "grad_norm": 71.55281066894531, "learning_rate": 2.0258860662432946e-06, "loss": 21.2165, "step": 366250 }, { "epoch": 0.7398683726774322, "grad_norm": 15.032710075378418, "learning_rate": 2.025605474512608e-06, "loss": 13.4593, "step": 366260 }, { "epoch": 0.739888573310116, "grad_norm": 171.6467742919922, "learning_rate": 2.025324897278758e-06, "loss": 18.3105, "step": 366270 }, { "epoch": 0.7399087739427999, "grad_norm": 635.5283203125, "learning_rate": 2.0250443345431135e-06, "loss": 35.5155, "step": 366280 }, { "epoch": 0.7399289745754837, "grad_norm": 21.338153839111328, "learning_rate": 2.0247637863070446e-06, "loss": 17.7918, "step": 366290 }, { "epoch": 0.7399491752081675, "grad_norm": 310.5945129394531, "learning_rate": 2.0244832525719155e-06, "loss": 11.9054, "step": 366300 }, { "epoch": 0.7399693758408513, "grad_norm": 327.890625, "learning_rate": 2.0242027333390924e-06, "loss": 40.5949, "step": 366310 }, { "epoch": 0.7399895764735351, "grad_norm": 215.30043029785156, "learning_rate": 2.0239222286099463e-06, "loss": 16.9784, "step": 366320 }, { "epoch": 0.740009777106219, "grad_norm": 324.63348388671875, "learning_rate": 2.0236417383858404e-06, "loss": 25.1119, "step": 366330 }, { "epoch": 0.7400299777389028, "grad_norm": 218.94415283203125, "learning_rate": 2.0233612626681454e-06, "loss": 20.3333, "step": 366340 }, { "epoch": 0.7400501783715866, "grad_norm": 830.9730224609375, "learning_rate": 2.0230808014582265e-06, "loss": 19.89, "step": 366350 }, { "epoch": 0.7400703790042704, "grad_norm": 100.47584533691406, "learning_rate": 2.0228003547574488e-06, "loss": 26.5495, "step": 366360 }, { "epoch": 0.7400905796369542, "grad_norm": 0.0, "learning_rate": 2.022519922567183e-06, "loss": 8.8323, "step": 366370 }, { "epoch": 0.7401107802696381, "grad_norm": 463.47674560546875, "learning_rate": 2.0222395048887942e-06, "loss": 13.7695, "step": 366380 }, { "epoch": 0.7401309809023219, "grad_norm": 197.24440002441406, "learning_rate": 2.0219591017236474e-06, "loss": 22.9665, "step": 366390 }, { "epoch": 0.7401511815350057, "grad_norm": 345.9573974609375, "learning_rate": 2.02167871307311e-06, "loss": 18.8581, "step": 366400 }, { "epoch": 0.7401713821676895, "grad_norm": 442.7614440917969, "learning_rate": 2.021398338938552e-06, "loss": 23.2895, "step": 366410 }, { "epoch": 0.7401915828003733, "grad_norm": 760.4876708984375, "learning_rate": 2.021117979321336e-06, "loss": 19.8918, "step": 366420 }, { "epoch": 0.7402117834330572, "grad_norm": 165.46810913085938, "learning_rate": 2.0208376342228287e-06, "loss": 20.7642, "step": 366430 }, { "epoch": 0.740231984065741, "grad_norm": 604.820556640625, "learning_rate": 2.0205573036443994e-06, "loss": 19.4534, "step": 366440 }, { "epoch": 0.7402521846984248, "grad_norm": 214.176025390625, "learning_rate": 2.020276987587412e-06, "loss": 15.5619, "step": 366450 }, { "epoch": 0.7402723853311086, "grad_norm": 376.1199951171875, "learning_rate": 2.019996686053231e-06, "loss": 22.0531, "step": 366460 }, { "epoch": 0.7402925859637924, "grad_norm": 159.35194396972656, "learning_rate": 2.0197163990432256e-06, "loss": 21.4652, "step": 366470 }, { "epoch": 0.7403127865964761, "grad_norm": 170.47312927246094, "learning_rate": 2.0194361265587644e-06, "loss": 7.2859, "step": 366480 }, { "epoch": 0.74033298722916, "grad_norm": 204.39385986328125, "learning_rate": 2.019155868601206e-06, "loss": 8.9417, "step": 366490 }, { "epoch": 0.7403531878618438, "grad_norm": 403.3724365234375, "learning_rate": 2.0188756251719204e-06, "loss": 19.4494, "step": 366500 }, { "epoch": 0.7403733884945276, "grad_norm": 56.86149215698242, "learning_rate": 2.018595396272275e-06, "loss": 18.7177, "step": 366510 }, { "epoch": 0.7403935891272114, "grad_norm": 418.9256591796875, "learning_rate": 2.018315181903635e-06, "loss": 19.4897, "step": 366520 }, { "epoch": 0.7404137897598952, "grad_norm": 246.70790100097656, "learning_rate": 2.018034982067363e-06, "loss": 25.4961, "step": 366530 }, { "epoch": 0.7404339903925791, "grad_norm": 89.4341049194336, "learning_rate": 2.0177547967648283e-06, "loss": 19.213, "step": 366540 }, { "epoch": 0.7404541910252629, "grad_norm": 323.2135009765625, "learning_rate": 2.0174746259973956e-06, "loss": 9.7069, "step": 366550 }, { "epoch": 0.7404743916579467, "grad_norm": 690.9506225585938, "learning_rate": 2.0171944697664277e-06, "loss": 16.364, "step": 366560 }, { "epoch": 0.7404945922906305, "grad_norm": 481.0787658691406, "learning_rate": 2.0169143280732916e-06, "loss": 14.3124, "step": 366570 }, { "epoch": 0.7405147929233143, "grad_norm": 379.80987548828125, "learning_rate": 2.016634200919356e-06, "loss": 14.8808, "step": 366580 }, { "epoch": 0.7405349935559982, "grad_norm": 745.8098754882812, "learning_rate": 2.016354088305983e-06, "loss": 15.8693, "step": 366590 }, { "epoch": 0.740555194188682, "grad_norm": 520.9889526367188, "learning_rate": 2.016073990234536e-06, "loss": 32.575, "step": 366600 }, { "epoch": 0.7405753948213658, "grad_norm": 59.87366485595703, "learning_rate": 2.0157939067063848e-06, "loss": 21.0093, "step": 366610 }, { "epoch": 0.7405955954540496, "grad_norm": 469.46917724609375, "learning_rate": 2.0155138377228924e-06, "loss": 23.9371, "step": 366620 }, { "epoch": 0.7406157960867334, "grad_norm": 12.194167137145996, "learning_rate": 2.0152337832854213e-06, "loss": 13.9133, "step": 366630 }, { "epoch": 0.7406359967194173, "grad_norm": 0.0, "learning_rate": 2.014953743395341e-06, "loss": 10.9898, "step": 366640 }, { "epoch": 0.7406561973521011, "grad_norm": 320.4013366699219, "learning_rate": 2.014673718054012e-06, "loss": 42.721, "step": 366650 }, { "epoch": 0.7406763979847849, "grad_norm": 73.00108337402344, "learning_rate": 2.0143937072628033e-06, "loss": 13.0563, "step": 366660 }, { "epoch": 0.7406965986174687, "grad_norm": 440.5721740722656, "learning_rate": 2.014113711023077e-06, "loss": 17.716, "step": 366670 }, { "epoch": 0.7407167992501525, "grad_norm": 623.513427734375, "learning_rate": 2.013833729336197e-06, "loss": 30.0989, "step": 366680 }, { "epoch": 0.7407369998828364, "grad_norm": 161.8341522216797, "learning_rate": 2.0135537622035313e-06, "loss": 13.7036, "step": 366690 }, { "epoch": 0.7407572005155202, "grad_norm": 262.3466796875, "learning_rate": 2.0132738096264415e-06, "loss": 12.5189, "step": 366700 }, { "epoch": 0.740777401148204, "grad_norm": 335.18231201171875, "learning_rate": 2.0129938716062917e-06, "loss": 21.4074, "step": 366710 }, { "epoch": 0.7407976017808878, "grad_norm": 95.01959991455078, "learning_rate": 2.0127139481444475e-06, "loss": 20.8021, "step": 366720 }, { "epoch": 0.7408178024135716, "grad_norm": 374.58447265625, "learning_rate": 2.0124340392422746e-06, "loss": 20.3548, "step": 366730 }, { "epoch": 0.7408380030462554, "grad_norm": 525.8010864257812, "learning_rate": 2.012154144901136e-06, "loss": 32.7077, "step": 366740 }, { "epoch": 0.7408582036789392, "grad_norm": 623.1885986328125, "learning_rate": 2.0118742651223944e-06, "loss": 19.2108, "step": 366750 }, { "epoch": 0.740878404311623, "grad_norm": 284.1596984863281, "learning_rate": 2.0115943999074167e-06, "loss": 14.0548, "step": 366760 }, { "epoch": 0.7408986049443068, "grad_norm": 253.09112548828125, "learning_rate": 2.011314549257565e-06, "loss": 19.0957, "step": 366770 }, { "epoch": 0.7409188055769906, "grad_norm": 166.9971466064453, "learning_rate": 2.0110347131742024e-06, "loss": 6.3661, "step": 366780 }, { "epoch": 0.7409390062096745, "grad_norm": 408.9407958984375, "learning_rate": 2.0107548916586946e-06, "loss": 16.7069, "step": 366790 }, { "epoch": 0.7409592068423583, "grad_norm": 455.688720703125, "learning_rate": 2.0104750847124075e-06, "loss": 15.5309, "step": 366800 }, { "epoch": 0.7409794074750421, "grad_norm": 351.8607482910156, "learning_rate": 2.010195292336699e-06, "loss": 23.2376, "step": 366810 }, { "epoch": 0.7409996081077259, "grad_norm": 279.016357421875, "learning_rate": 2.0099155145329364e-06, "loss": 19.2799, "step": 366820 }, { "epoch": 0.7410198087404097, "grad_norm": 470.3839416503906, "learning_rate": 2.009635751302484e-06, "loss": 15.1497, "step": 366830 }, { "epoch": 0.7410400093730936, "grad_norm": 163.67852783203125, "learning_rate": 2.0093560026467046e-06, "loss": 11.5575, "step": 366840 }, { "epoch": 0.7410602100057774, "grad_norm": 179.66949462890625, "learning_rate": 2.0090762685669597e-06, "loss": 17.2575, "step": 366850 }, { "epoch": 0.7410804106384612, "grad_norm": 490.8962097167969, "learning_rate": 2.0087965490646144e-06, "loss": 21.9387, "step": 366860 }, { "epoch": 0.741100611271145, "grad_norm": 519.0337524414062, "learning_rate": 2.0085168441410352e-06, "loss": 13.6265, "step": 366870 }, { "epoch": 0.7411208119038288, "grad_norm": 381.9383850097656, "learning_rate": 2.0082371537975786e-06, "loss": 17.2182, "step": 366880 }, { "epoch": 0.7411410125365127, "grad_norm": 292.76849365234375, "learning_rate": 2.0079574780356116e-06, "loss": 22.6855, "step": 366890 }, { "epoch": 0.7411612131691965, "grad_norm": 453.6347961425781, "learning_rate": 2.007677816856498e-06, "loss": 13.1982, "step": 366900 }, { "epoch": 0.7411814138018803, "grad_norm": 824.3630981445312, "learning_rate": 2.0073981702616e-06, "loss": 27.8966, "step": 366910 }, { "epoch": 0.7412016144345641, "grad_norm": 304.8612976074219, "learning_rate": 2.007118538252279e-06, "loss": 23.3396, "step": 366920 }, { "epoch": 0.7412218150672479, "grad_norm": 638.951416015625, "learning_rate": 2.006838920829901e-06, "loss": 20.9227, "step": 366930 }, { "epoch": 0.7412420156999318, "grad_norm": 14.768926620483398, "learning_rate": 2.0065593179958267e-06, "loss": 31.2564, "step": 366940 }, { "epoch": 0.7412622163326156, "grad_norm": 358.9869384765625, "learning_rate": 2.0062797297514176e-06, "loss": 11.3001, "step": 366950 }, { "epoch": 0.7412824169652994, "grad_norm": 147.42005920410156, "learning_rate": 2.0060001560980395e-06, "loss": 23.4147, "step": 366960 }, { "epoch": 0.7413026175979832, "grad_norm": 247.12164306640625, "learning_rate": 2.0057205970370524e-06, "loss": 26.8701, "step": 366970 }, { "epoch": 0.741322818230667, "grad_norm": 462.36322021484375, "learning_rate": 2.0054410525698217e-06, "loss": 21.2175, "step": 366980 }, { "epoch": 0.7413430188633509, "grad_norm": 263.4853210449219, "learning_rate": 2.0051615226977072e-06, "loss": 8.8489, "step": 366990 }, { "epoch": 0.7413632194960346, "grad_norm": 752.6406860351562, "learning_rate": 2.0048820074220716e-06, "loss": 28.5969, "step": 367000 }, { "epoch": 0.7413834201287184, "grad_norm": 470.063232421875, "learning_rate": 2.0046025067442788e-06, "loss": 22.4267, "step": 367010 }, { "epoch": 0.7414036207614022, "grad_norm": 632.08837890625, "learning_rate": 2.0043230206656884e-06, "loss": 26.7387, "step": 367020 }, { "epoch": 0.741423821394086, "grad_norm": 281.9903259277344, "learning_rate": 2.0040435491876666e-06, "loss": 14.0005, "step": 367030 }, { "epoch": 0.7414440220267698, "grad_norm": 371.42437744140625, "learning_rate": 2.0037640923115704e-06, "loss": 39.7928, "step": 367040 }, { "epoch": 0.7414642226594537, "grad_norm": 402.6060485839844, "learning_rate": 2.0034846500387674e-06, "loss": 12.355, "step": 367050 }, { "epoch": 0.7414844232921375, "grad_norm": 42.73507308959961, "learning_rate": 2.003205222370616e-06, "loss": 25.2388, "step": 367060 }, { "epoch": 0.7415046239248213, "grad_norm": 271.7084045410156, "learning_rate": 2.0029258093084774e-06, "loss": 19.5723, "step": 367070 }, { "epoch": 0.7415248245575051, "grad_norm": 407.57977294921875, "learning_rate": 2.0026464108537153e-06, "loss": 19.6693, "step": 367080 }, { "epoch": 0.7415450251901889, "grad_norm": 506.03558349609375, "learning_rate": 2.002367027007694e-06, "loss": 8.3654, "step": 367090 }, { "epoch": 0.7415652258228728, "grad_norm": 222.3409881591797, "learning_rate": 2.002087657771769e-06, "loss": 14.7323, "step": 367100 }, { "epoch": 0.7415854264555566, "grad_norm": 387.82733154296875, "learning_rate": 2.001808303147305e-06, "loss": 28.677, "step": 367110 }, { "epoch": 0.7416056270882404, "grad_norm": 324.7660217285156, "learning_rate": 2.0015289631356654e-06, "loss": 11.4134, "step": 367120 }, { "epoch": 0.7416258277209242, "grad_norm": 710.8705444335938, "learning_rate": 2.00124963773821e-06, "loss": 17.5392, "step": 367130 }, { "epoch": 0.741646028353608, "grad_norm": 384.54461669921875, "learning_rate": 2.000970326956299e-06, "loss": 15.3054, "step": 367140 }, { "epoch": 0.7416662289862919, "grad_norm": 347.185791015625, "learning_rate": 2.0006910307912965e-06, "loss": 32.0539, "step": 367150 }, { "epoch": 0.7416864296189757, "grad_norm": 236.717529296875, "learning_rate": 2.0004117492445614e-06, "loss": 13.4359, "step": 367160 }, { "epoch": 0.7417066302516595, "grad_norm": 206.33102416992188, "learning_rate": 2.0001324823174544e-06, "loss": 26.4592, "step": 367170 }, { "epoch": 0.7417268308843433, "grad_norm": 247.7039031982422, "learning_rate": 1.9998532300113376e-06, "loss": 17.3294, "step": 367180 }, { "epoch": 0.7417470315170271, "grad_norm": 389.9203186035156, "learning_rate": 1.9995739923275743e-06, "loss": 22.1714, "step": 367190 }, { "epoch": 0.741767232149711, "grad_norm": 388.1050109863281, "learning_rate": 1.999294769267523e-06, "loss": 12.2662, "step": 367200 }, { "epoch": 0.7417874327823948, "grad_norm": 601.4808959960938, "learning_rate": 1.999015560832544e-06, "loss": 22.3924, "step": 367210 }, { "epoch": 0.7418076334150786, "grad_norm": 716.5521850585938, "learning_rate": 1.9987363670240006e-06, "loss": 18.4349, "step": 367220 }, { "epoch": 0.7418278340477624, "grad_norm": 840.4054565429688, "learning_rate": 1.998457187843252e-06, "loss": 19.8814, "step": 367230 }, { "epoch": 0.7418480346804462, "grad_norm": 443.71624755859375, "learning_rate": 1.998178023291657e-06, "loss": 14.4573, "step": 367240 }, { "epoch": 0.74186823531313, "grad_norm": 293.313232421875, "learning_rate": 1.9978988733705807e-06, "loss": 16.8452, "step": 367250 }, { "epoch": 0.7418884359458138, "grad_norm": 1023.288330078125, "learning_rate": 1.997619738081379e-06, "loss": 21.4529, "step": 367260 }, { "epoch": 0.7419086365784976, "grad_norm": 309.85845947265625, "learning_rate": 1.997340617425416e-06, "loss": 10.65, "step": 367270 }, { "epoch": 0.7419288372111814, "grad_norm": 870.9706420898438, "learning_rate": 1.9970615114040514e-06, "loss": 16.24, "step": 367280 }, { "epoch": 0.7419490378438652, "grad_norm": 375.987060546875, "learning_rate": 1.9967824200186426e-06, "loss": 18.7024, "step": 367290 }, { "epoch": 0.741969238476549, "grad_norm": 365.5154724121094, "learning_rate": 1.996503343270554e-06, "loss": 11.1557, "step": 367300 }, { "epoch": 0.7419894391092329, "grad_norm": 272.2825012207031, "learning_rate": 1.9962242811611437e-06, "loss": 13.8497, "step": 367310 }, { "epoch": 0.7420096397419167, "grad_norm": 450.99810791015625, "learning_rate": 1.99594523369177e-06, "loss": 17.0733, "step": 367320 }, { "epoch": 0.7420298403746005, "grad_norm": 144.34202575683594, "learning_rate": 1.995666200863795e-06, "loss": 21.3699, "step": 367330 }, { "epoch": 0.7420500410072843, "grad_norm": 823.3612670898438, "learning_rate": 1.9953871826785804e-06, "loss": 18.1909, "step": 367340 }, { "epoch": 0.7420702416399682, "grad_norm": 48.882633209228516, "learning_rate": 1.9951081791374843e-06, "loss": 18.089, "step": 367350 }, { "epoch": 0.742090442272652, "grad_norm": 25.4761905670166, "learning_rate": 1.994829190241865e-06, "loss": 10.5992, "step": 367360 }, { "epoch": 0.7421106429053358, "grad_norm": 531.5303344726562, "learning_rate": 1.9945502159930846e-06, "loss": 11.2596, "step": 367370 }, { "epoch": 0.7421308435380196, "grad_norm": 352.27239990234375, "learning_rate": 1.994271256392503e-06, "loss": 11.6738, "step": 367380 }, { "epoch": 0.7421510441707034, "grad_norm": 214.4779052734375, "learning_rate": 1.993992311441476e-06, "loss": 16.8358, "step": 367390 }, { "epoch": 0.7421712448033873, "grad_norm": 144.89553833007812, "learning_rate": 1.9937133811413666e-06, "loss": 18.4599, "step": 367400 }, { "epoch": 0.7421914454360711, "grad_norm": 260.8878479003906, "learning_rate": 1.9934344654935367e-06, "loss": 11.9296, "step": 367410 }, { "epoch": 0.7422116460687549, "grad_norm": 366.73394775390625, "learning_rate": 1.9931555644993395e-06, "loss": 20.6212, "step": 367420 }, { "epoch": 0.7422318467014387, "grad_norm": 654.520751953125, "learning_rate": 1.9928766781601366e-06, "loss": 21.1741, "step": 367430 }, { "epoch": 0.7422520473341225, "grad_norm": 323.9236755371094, "learning_rate": 1.9925978064772904e-06, "loss": 31.874, "step": 367440 }, { "epoch": 0.7422722479668064, "grad_norm": 132.22869873046875, "learning_rate": 1.9923189494521576e-06, "loss": 11.6857, "step": 367450 }, { "epoch": 0.7422924485994902, "grad_norm": 245.091552734375, "learning_rate": 1.9920401070860955e-06, "loss": 17.0472, "step": 367460 }, { "epoch": 0.742312649232174, "grad_norm": 216.7820281982422, "learning_rate": 1.991761279380466e-06, "loss": 9.716, "step": 367470 }, { "epoch": 0.7423328498648578, "grad_norm": 432.0198669433594, "learning_rate": 1.9914824663366296e-06, "loss": 22.8366, "step": 367480 }, { "epoch": 0.7423530504975416, "grad_norm": 383.17138671875, "learning_rate": 1.9912036679559397e-06, "loss": 20.7813, "step": 367490 }, { "epoch": 0.7423732511302255, "grad_norm": 346.9214172363281, "learning_rate": 1.990924884239758e-06, "loss": 22.4922, "step": 367500 }, { "epoch": 0.7423934517629092, "grad_norm": 435.1873474121094, "learning_rate": 1.990646115189446e-06, "loss": 20.0185, "step": 367510 }, { "epoch": 0.742413652395593, "grad_norm": 106.90283203125, "learning_rate": 1.990367360806359e-06, "loss": 12.1977, "step": 367520 }, { "epoch": 0.7424338530282768, "grad_norm": 413.74432373046875, "learning_rate": 1.9900886210918547e-06, "loss": 19.9012, "step": 367530 }, { "epoch": 0.7424540536609606, "grad_norm": 185.722412109375, "learning_rate": 1.989809896047295e-06, "loss": 26.3624, "step": 367540 }, { "epoch": 0.7424742542936444, "grad_norm": 444.6708068847656, "learning_rate": 1.989531185674037e-06, "loss": 26.142, "step": 367550 }, { "epoch": 0.7424944549263283, "grad_norm": 334.2047424316406, "learning_rate": 1.989252489973438e-06, "loss": 9.756, "step": 367560 }, { "epoch": 0.7425146555590121, "grad_norm": 1283.1856689453125, "learning_rate": 1.988973808946858e-06, "loss": 12.4231, "step": 367570 }, { "epoch": 0.7425348561916959, "grad_norm": 350.5517883300781, "learning_rate": 1.988695142595653e-06, "loss": 7.4296, "step": 367580 }, { "epoch": 0.7425550568243797, "grad_norm": 188.34580993652344, "learning_rate": 1.988416490921184e-06, "loss": 11.0043, "step": 367590 }, { "epoch": 0.7425752574570635, "grad_norm": 588.7089233398438, "learning_rate": 1.988137853924808e-06, "loss": 31.801, "step": 367600 }, { "epoch": 0.7425954580897474, "grad_norm": 527.1619873046875, "learning_rate": 1.9878592316078813e-06, "loss": 22.5599, "step": 367610 }, { "epoch": 0.7426156587224312, "grad_norm": 412.13873291015625, "learning_rate": 1.987580623971765e-06, "loss": 34.7493, "step": 367620 }, { "epoch": 0.742635859355115, "grad_norm": 441.67449951171875, "learning_rate": 1.987302031017814e-06, "loss": 21.1823, "step": 367630 }, { "epoch": 0.7426560599877988, "grad_norm": 319.75970458984375, "learning_rate": 1.9870234527473886e-06, "loss": 29.5131, "step": 367640 }, { "epoch": 0.7426762606204826, "grad_norm": 506.9483337402344, "learning_rate": 1.986744889161844e-06, "loss": 16.4514, "step": 367650 }, { "epoch": 0.7426964612531665, "grad_norm": 471.54632568359375, "learning_rate": 1.986466340262541e-06, "loss": 20.7602, "step": 367660 }, { "epoch": 0.7427166618858503, "grad_norm": 56.814937591552734, "learning_rate": 1.9861878060508357e-06, "loss": 30.1332, "step": 367670 }, { "epoch": 0.7427368625185341, "grad_norm": 0.0, "learning_rate": 1.985909286528084e-06, "loss": 16.3546, "step": 367680 }, { "epoch": 0.7427570631512179, "grad_norm": 160.3397216796875, "learning_rate": 1.985630781695646e-06, "loss": 16.0622, "step": 367690 }, { "epoch": 0.7427772637839017, "grad_norm": 320.552490234375, "learning_rate": 1.9853522915548777e-06, "loss": 28.14, "step": 367700 }, { "epoch": 0.7427974644165856, "grad_norm": 378.26947021484375, "learning_rate": 1.985073816107136e-06, "loss": 27.7358, "step": 367710 }, { "epoch": 0.7428176650492694, "grad_norm": 176.5840606689453, "learning_rate": 1.984795355353778e-06, "loss": 24.6097, "step": 367720 }, { "epoch": 0.7428378656819532, "grad_norm": 1436.84619140625, "learning_rate": 1.9845169092961643e-06, "loss": 22.231, "step": 367730 }, { "epoch": 0.742858066314637, "grad_norm": 200.1486358642578, "learning_rate": 1.984238477935649e-06, "loss": 17.1255, "step": 367740 }, { "epoch": 0.7428782669473208, "grad_norm": 455.57196044921875, "learning_rate": 1.9839600612735877e-06, "loss": 16.9428, "step": 367750 }, { "epoch": 0.7428984675800046, "grad_norm": 479.2783203125, "learning_rate": 1.983681659311341e-06, "loss": 26.5434, "step": 367760 }, { "epoch": 0.7429186682126884, "grad_norm": 207.67135620117188, "learning_rate": 1.9834032720502646e-06, "loss": 22.968, "step": 367770 }, { "epoch": 0.7429388688453722, "grad_norm": 313.4549560546875, "learning_rate": 1.9831248994917123e-06, "loss": 23.2637, "step": 367780 }, { "epoch": 0.742959069478056, "grad_norm": 177.2661590576172, "learning_rate": 1.9828465416370434e-06, "loss": 9.194, "step": 367790 }, { "epoch": 0.7429792701107398, "grad_norm": 293.1524353027344, "learning_rate": 1.9825681984876173e-06, "loss": 23.7921, "step": 367800 }, { "epoch": 0.7429994707434237, "grad_norm": 463.06427001953125, "learning_rate": 1.982289870044787e-06, "loss": 24.1851, "step": 367810 }, { "epoch": 0.7430196713761075, "grad_norm": 150.6734161376953, "learning_rate": 1.982011556309908e-06, "loss": 9.0601, "step": 367820 }, { "epoch": 0.7430398720087913, "grad_norm": 360.19970703125, "learning_rate": 1.9817332572843408e-06, "loss": 22.3678, "step": 367830 }, { "epoch": 0.7430600726414751, "grad_norm": 94.46986389160156, "learning_rate": 1.9814549729694395e-06, "loss": 12.3882, "step": 367840 }, { "epoch": 0.7430802732741589, "grad_norm": 250.29745483398438, "learning_rate": 1.9811767033665587e-06, "loss": 44.3132, "step": 367850 }, { "epoch": 0.7431004739068428, "grad_norm": 233.59835815429688, "learning_rate": 1.9808984484770577e-06, "loss": 18.3321, "step": 367860 }, { "epoch": 0.7431206745395266, "grad_norm": 373.7940368652344, "learning_rate": 1.9806202083022906e-06, "loss": 13.86, "step": 367870 }, { "epoch": 0.7431408751722104, "grad_norm": 178.30247497558594, "learning_rate": 1.980341982843616e-06, "loss": 17.0042, "step": 367880 }, { "epoch": 0.7431610758048942, "grad_norm": 291.4422912597656, "learning_rate": 1.980063772102388e-06, "loss": 10.8339, "step": 367890 }, { "epoch": 0.743181276437578, "grad_norm": 419.87493896484375, "learning_rate": 1.979785576079961e-06, "loss": 13.6699, "step": 367900 }, { "epoch": 0.7432014770702619, "grad_norm": 219.6775360107422, "learning_rate": 1.9795073947776955e-06, "loss": 18.5805, "step": 367910 }, { "epoch": 0.7432216777029457, "grad_norm": 129.4747314453125, "learning_rate": 1.979229228196942e-06, "loss": 7.3325, "step": 367920 }, { "epoch": 0.7432418783356295, "grad_norm": 263.1779479980469, "learning_rate": 1.9789510763390605e-06, "loss": 17.6758, "step": 367930 }, { "epoch": 0.7432620789683133, "grad_norm": 149.30722045898438, "learning_rate": 1.978672939205404e-06, "loss": 12.421, "step": 367940 }, { "epoch": 0.7432822796009971, "grad_norm": 261.9346008300781, "learning_rate": 1.9783948167973306e-06, "loss": 21.0661, "step": 367950 }, { "epoch": 0.743302480233681, "grad_norm": 276.88958740234375, "learning_rate": 1.9781167091161944e-06, "loss": 19.6049, "step": 367960 }, { "epoch": 0.7433226808663648, "grad_norm": 366.97808837890625, "learning_rate": 1.977838616163349e-06, "loss": 27.8706, "step": 367970 }, { "epoch": 0.7433428814990486, "grad_norm": 335.1759948730469, "learning_rate": 1.9775605379401534e-06, "loss": 15.9381, "step": 367980 }, { "epoch": 0.7433630821317324, "grad_norm": 501.2055969238281, "learning_rate": 1.9772824744479613e-06, "loss": 26.8819, "step": 367990 }, { "epoch": 0.7433832827644162, "grad_norm": 185.55311584472656, "learning_rate": 1.977004425688126e-06, "loss": 6.9506, "step": 368000 }, { "epoch": 0.7434034833971, "grad_norm": 314.1372375488281, "learning_rate": 1.9767263916620043e-06, "loss": 15.9037, "step": 368010 }, { "epoch": 0.7434236840297838, "grad_norm": 596.9950561523438, "learning_rate": 1.9764483723709555e-06, "loss": 18.8883, "step": 368020 }, { "epoch": 0.7434438846624676, "grad_norm": 353.4117736816406, "learning_rate": 1.9761703678163267e-06, "loss": 14.5118, "step": 368030 }, { "epoch": 0.7434640852951514, "grad_norm": 300.2447509765625, "learning_rate": 1.975892377999477e-06, "loss": 7.5033, "step": 368040 }, { "epoch": 0.7434842859278352, "grad_norm": 43.761016845703125, "learning_rate": 1.9756144029217626e-06, "loss": 15.808, "step": 368050 }, { "epoch": 0.743504486560519, "grad_norm": 447.7980041503906, "learning_rate": 1.975336442584537e-06, "loss": 17.3498, "step": 368060 }, { "epoch": 0.7435246871932029, "grad_norm": 431.0140075683594, "learning_rate": 1.975058496989153e-06, "loss": 9.2232, "step": 368070 }, { "epoch": 0.7435448878258867, "grad_norm": 271.3089904785156, "learning_rate": 1.9747805661369662e-06, "loss": 24.2961, "step": 368080 }, { "epoch": 0.7435650884585705, "grad_norm": 271.5911560058594, "learning_rate": 1.974502650029336e-06, "loss": 26.4251, "step": 368090 }, { "epoch": 0.7435852890912543, "grad_norm": 13.378753662109375, "learning_rate": 1.97422474866761e-06, "loss": 19.3268, "step": 368100 }, { "epoch": 0.7436054897239381, "grad_norm": 551.256103515625, "learning_rate": 1.9739468620531448e-06, "loss": 15.8235, "step": 368110 }, { "epoch": 0.743625690356622, "grad_norm": 234.52906799316406, "learning_rate": 1.973668990187298e-06, "loss": 16.2694, "step": 368120 }, { "epoch": 0.7436458909893058, "grad_norm": 319.56573486328125, "learning_rate": 1.9733911330714213e-06, "loss": 15.377, "step": 368130 }, { "epoch": 0.7436660916219896, "grad_norm": 472.1600036621094, "learning_rate": 1.973113290706867e-06, "loss": 13.2866, "step": 368140 }, { "epoch": 0.7436862922546734, "grad_norm": 90.01313781738281, "learning_rate": 1.9728354630949935e-06, "loss": 6.5488, "step": 368150 }, { "epoch": 0.7437064928873572, "grad_norm": 265.7916259765625, "learning_rate": 1.972557650237153e-06, "loss": 14.189, "step": 368160 }, { "epoch": 0.7437266935200411, "grad_norm": 367.3469543457031, "learning_rate": 1.972279852134697e-06, "loss": 19.1696, "step": 368170 }, { "epoch": 0.7437468941527249, "grad_norm": 360.18450927734375, "learning_rate": 1.972002068788984e-06, "loss": 9.7707, "step": 368180 }, { "epoch": 0.7437670947854087, "grad_norm": 333.741455078125, "learning_rate": 1.9717243002013636e-06, "loss": 20.8472, "step": 368190 }, { "epoch": 0.7437872954180925, "grad_norm": 368.5290832519531, "learning_rate": 1.9714465463731934e-06, "loss": 26.3683, "step": 368200 }, { "epoch": 0.7438074960507763, "grad_norm": 574.5621337890625, "learning_rate": 1.9711688073058262e-06, "loss": 21.3149, "step": 368210 }, { "epoch": 0.7438276966834602, "grad_norm": 483.143798828125, "learning_rate": 1.9708910830006124e-06, "loss": 32.7721, "step": 368220 }, { "epoch": 0.743847897316144, "grad_norm": 59.498416900634766, "learning_rate": 1.97061337345891e-06, "loss": 16.5077, "step": 368230 }, { "epoch": 0.7438680979488278, "grad_norm": 526.3450317382812, "learning_rate": 1.9703356786820687e-06, "loss": 16.4204, "step": 368240 }, { "epoch": 0.7438882985815116, "grad_norm": 532.1583862304688, "learning_rate": 1.970057998671446e-06, "loss": 11.2544, "step": 368250 }, { "epoch": 0.7439084992141954, "grad_norm": 916.2235717773438, "learning_rate": 1.9697803334283906e-06, "loss": 21.3499, "step": 368260 }, { "epoch": 0.7439286998468791, "grad_norm": 188.19058227539062, "learning_rate": 1.9695026829542607e-06, "loss": 14.5245, "step": 368270 }, { "epoch": 0.743948900479563, "grad_norm": 342.7704772949219, "learning_rate": 1.969225047250407e-06, "loss": 18.4819, "step": 368280 }, { "epoch": 0.7439691011122468, "grad_norm": 296.64520263671875, "learning_rate": 1.9689474263181814e-06, "loss": 17.9191, "step": 368290 }, { "epoch": 0.7439893017449306, "grad_norm": 271.92340087890625, "learning_rate": 1.9686698201589395e-06, "loss": 17.2526, "step": 368300 }, { "epoch": 0.7440095023776144, "grad_norm": 653.4396362304688, "learning_rate": 1.968392228774034e-06, "loss": 16.0667, "step": 368310 }, { "epoch": 0.7440297030102982, "grad_norm": 137.953857421875, "learning_rate": 1.968114652164815e-06, "loss": 11.8034, "step": 368320 }, { "epoch": 0.7440499036429821, "grad_norm": 158.80421447753906, "learning_rate": 1.967837090332637e-06, "loss": 12.5297, "step": 368330 }, { "epoch": 0.7440701042756659, "grad_norm": 225.25830078125, "learning_rate": 1.967559543278856e-06, "loss": 23.5659, "step": 368340 }, { "epoch": 0.7440903049083497, "grad_norm": 403.22662353515625, "learning_rate": 1.9672820110048207e-06, "loss": 14.903, "step": 368350 }, { "epoch": 0.7441105055410335, "grad_norm": 486.71124267578125, "learning_rate": 1.967004493511884e-06, "loss": 28.5848, "step": 368360 }, { "epoch": 0.7441307061737173, "grad_norm": 417.70623779296875, "learning_rate": 1.966726990801402e-06, "loss": 13.3004, "step": 368370 }, { "epoch": 0.7441509068064012, "grad_norm": 31.580720901489258, "learning_rate": 1.966449502874724e-06, "loss": 9.2592, "step": 368380 }, { "epoch": 0.744171107439085, "grad_norm": 6.129324913024902, "learning_rate": 1.9661720297332014e-06, "loss": 28.3694, "step": 368390 }, { "epoch": 0.7441913080717688, "grad_norm": 278.25164794921875, "learning_rate": 1.9658945713781883e-06, "loss": 20.7193, "step": 368400 }, { "epoch": 0.7442115087044526, "grad_norm": 237.9478302001953, "learning_rate": 1.9656171278110394e-06, "loss": 22.9121, "step": 368410 }, { "epoch": 0.7442317093371364, "grad_norm": 745.4280395507812, "learning_rate": 1.9653396990331043e-06, "loss": 48.5694, "step": 368420 }, { "epoch": 0.7442519099698203, "grad_norm": 570.2730102539062, "learning_rate": 1.965062285045733e-06, "loss": 27.6345, "step": 368430 }, { "epoch": 0.7442721106025041, "grad_norm": 141.58511352539062, "learning_rate": 1.9647848858502825e-06, "loss": 21.2795, "step": 368440 }, { "epoch": 0.7442923112351879, "grad_norm": 430.83642578125, "learning_rate": 1.9645075014481024e-06, "loss": 14.071, "step": 368450 }, { "epoch": 0.7443125118678717, "grad_norm": 197.9840545654297, "learning_rate": 1.964230131840543e-06, "loss": 12.0083, "step": 368460 }, { "epoch": 0.7443327125005555, "grad_norm": 186.36111450195312, "learning_rate": 1.9639527770289586e-06, "loss": 12.0492, "step": 368470 }, { "epoch": 0.7443529131332394, "grad_norm": 369.485107421875, "learning_rate": 1.9636754370146987e-06, "loss": 15.3095, "step": 368480 }, { "epoch": 0.7443731137659232, "grad_norm": 205.3795928955078, "learning_rate": 1.9633981117991186e-06, "loss": 20.2946, "step": 368490 }, { "epoch": 0.744393314398607, "grad_norm": 236.73345947265625, "learning_rate": 1.9631208013835677e-06, "loss": 10.602, "step": 368500 }, { "epoch": 0.7444135150312908, "grad_norm": 22.172441482543945, "learning_rate": 1.9628435057693963e-06, "loss": 10.0532, "step": 368510 }, { "epoch": 0.7444337156639746, "grad_norm": 456.3912658691406, "learning_rate": 1.9625662249579586e-06, "loss": 21.0028, "step": 368520 }, { "epoch": 0.7444539162966584, "grad_norm": 620.5680541992188, "learning_rate": 1.962288958950603e-06, "loss": 12.2597, "step": 368530 }, { "epoch": 0.7444741169293422, "grad_norm": 368.8133544921875, "learning_rate": 1.9620117077486838e-06, "loss": 15.5148, "step": 368540 }, { "epoch": 0.744494317562026, "grad_norm": 612.2443237304688, "learning_rate": 1.9617344713535503e-06, "loss": 11.3829, "step": 368550 }, { "epoch": 0.7445145181947098, "grad_norm": 190.12802124023438, "learning_rate": 1.9614572497665555e-06, "loss": 40.4976, "step": 368560 }, { "epoch": 0.7445347188273936, "grad_norm": 697.5142211914062, "learning_rate": 1.9611800429890497e-06, "loss": 19.6238, "step": 368570 }, { "epoch": 0.7445549194600775, "grad_norm": 64.01460266113281, "learning_rate": 1.960902851022382e-06, "loss": 25.6799, "step": 368580 }, { "epoch": 0.7445751200927613, "grad_norm": 388.7540283203125, "learning_rate": 1.9606256738679074e-06, "loss": 17.4432, "step": 368590 }, { "epoch": 0.7445953207254451, "grad_norm": 560.4470825195312, "learning_rate": 1.9603485115269743e-06, "loss": 26.4945, "step": 368600 }, { "epoch": 0.7446155213581289, "grad_norm": 796.9616088867188, "learning_rate": 1.960071364000932e-06, "loss": 13.8728, "step": 368610 }, { "epoch": 0.7446357219908127, "grad_norm": 342.3143310546875, "learning_rate": 1.959794231291134e-06, "loss": 13.0166, "step": 368620 }, { "epoch": 0.7446559226234966, "grad_norm": 156.66897583007812, "learning_rate": 1.959517113398933e-06, "loss": 15.2025, "step": 368630 }, { "epoch": 0.7446761232561804, "grad_norm": 993.8450927734375, "learning_rate": 1.959240010325673e-06, "loss": 23.725, "step": 368640 }, { "epoch": 0.7446963238888642, "grad_norm": 696.0406494140625, "learning_rate": 1.958962922072709e-06, "loss": 21.7202, "step": 368650 }, { "epoch": 0.744716524521548, "grad_norm": 275.1782531738281, "learning_rate": 1.9586858486413923e-06, "loss": 15.1027, "step": 368660 }, { "epoch": 0.7447367251542318, "grad_norm": 611.896484375, "learning_rate": 1.958408790033072e-06, "loss": 34.5304, "step": 368670 }, { "epoch": 0.7447569257869157, "grad_norm": 401.76611328125, "learning_rate": 1.958131746249097e-06, "loss": 16.009, "step": 368680 }, { "epoch": 0.7447771264195995, "grad_norm": 310.48065185546875, "learning_rate": 1.9578547172908185e-06, "loss": 25.6982, "step": 368690 }, { "epoch": 0.7447973270522833, "grad_norm": 470.43499755859375, "learning_rate": 1.9575777031595906e-06, "loss": 19.3149, "step": 368700 }, { "epoch": 0.7448175276849671, "grad_norm": 282.6390686035156, "learning_rate": 1.9573007038567565e-06, "loss": 12.1436, "step": 368710 }, { "epoch": 0.7448377283176509, "grad_norm": 0.0, "learning_rate": 1.95702371938367e-06, "loss": 14.1782, "step": 368720 }, { "epoch": 0.7448579289503348, "grad_norm": 705.910400390625, "learning_rate": 1.956746749741682e-06, "loss": 11.0068, "step": 368730 }, { "epoch": 0.7448781295830186, "grad_norm": 953.3394165039062, "learning_rate": 1.9564697949321417e-06, "loss": 15.2012, "step": 368740 }, { "epoch": 0.7448983302157024, "grad_norm": 622.295654296875, "learning_rate": 1.956192854956397e-06, "loss": 31.6843, "step": 368750 }, { "epoch": 0.7449185308483862, "grad_norm": 204.8721466064453, "learning_rate": 1.9559159298158e-06, "loss": 25.308, "step": 368760 }, { "epoch": 0.74493873148107, "grad_norm": 1993.9295654296875, "learning_rate": 1.9556390195117004e-06, "loss": 18.91, "step": 368770 }, { "epoch": 0.7449589321137539, "grad_norm": 74.58755493164062, "learning_rate": 1.955362124045445e-06, "loss": 23.2488, "step": 368780 }, { "epoch": 0.7449791327464376, "grad_norm": 392.92547607421875, "learning_rate": 1.955085243418387e-06, "loss": 25.7019, "step": 368790 }, { "epoch": 0.7449993333791214, "grad_norm": 441.37969970703125, "learning_rate": 1.9548083776318727e-06, "loss": 10.7071, "step": 368800 }, { "epoch": 0.7450195340118052, "grad_norm": 597.580078125, "learning_rate": 1.9545315266872545e-06, "loss": 16.4676, "step": 368810 }, { "epoch": 0.745039734644489, "grad_norm": 325.194091796875, "learning_rate": 1.95425469058588e-06, "loss": 16.3694, "step": 368820 }, { "epoch": 0.7450599352771728, "grad_norm": 1044.6522216796875, "learning_rate": 1.9539778693290976e-06, "loss": 11.8188, "step": 368830 }, { "epoch": 0.7450801359098567, "grad_norm": 442.4576416015625, "learning_rate": 1.953701062918259e-06, "loss": 20.3352, "step": 368840 }, { "epoch": 0.7451003365425405, "grad_norm": 1567.712158203125, "learning_rate": 1.95342427135471e-06, "loss": 33.4006, "step": 368850 }, { "epoch": 0.7451205371752243, "grad_norm": 390.2557373046875, "learning_rate": 1.953147494639804e-06, "loss": 16.9342, "step": 368860 }, { "epoch": 0.7451407378079081, "grad_norm": 1089.302978515625, "learning_rate": 1.9528707327748853e-06, "loss": 39.3832, "step": 368870 }, { "epoch": 0.745160938440592, "grad_norm": 56.755516052246094, "learning_rate": 1.9525939857613075e-06, "loss": 15.0005, "step": 368880 }, { "epoch": 0.7451811390732758, "grad_norm": 568.4422607421875, "learning_rate": 1.9523172536004165e-06, "loss": 10.7274, "step": 368890 }, { "epoch": 0.7452013397059596, "grad_norm": 51.04914093017578, "learning_rate": 1.95204053629356e-06, "loss": 17.0731, "step": 368900 }, { "epoch": 0.7452215403386434, "grad_norm": 449.2515869140625, "learning_rate": 1.9517638338420898e-06, "loss": 15.5755, "step": 368910 }, { "epoch": 0.7452417409713272, "grad_norm": 463.6322021484375, "learning_rate": 1.9514871462473527e-06, "loss": 19.8048, "step": 368920 }, { "epoch": 0.745261941604011, "grad_norm": 82.99703979492188, "learning_rate": 1.951210473510696e-06, "loss": 19.7288, "step": 368930 }, { "epoch": 0.7452821422366949, "grad_norm": 536.100830078125, "learning_rate": 1.9509338156334695e-06, "loss": 26.1445, "step": 368940 }, { "epoch": 0.7453023428693787, "grad_norm": 776.7899169921875, "learning_rate": 1.9506571726170236e-06, "loss": 15.4703, "step": 368950 }, { "epoch": 0.7453225435020625, "grad_norm": 735.1712036132812, "learning_rate": 1.9503805444627054e-06, "loss": 21.861, "step": 368960 }, { "epoch": 0.7453427441347463, "grad_norm": 1005.1720581054688, "learning_rate": 1.95010393117186e-06, "loss": 33.4139, "step": 368970 }, { "epoch": 0.7453629447674301, "grad_norm": 311.2734069824219, "learning_rate": 1.9498273327458405e-06, "loss": 14.2049, "step": 368980 }, { "epoch": 0.745383145400114, "grad_norm": 22.720521926879883, "learning_rate": 1.9495507491859922e-06, "loss": 21.7195, "step": 368990 }, { "epoch": 0.7454033460327978, "grad_norm": 203.66107177734375, "learning_rate": 1.9492741804936623e-06, "loss": 9.2814, "step": 369000 }, { "epoch": 0.7454235466654816, "grad_norm": 160.48106384277344, "learning_rate": 1.9489976266702e-06, "loss": 17.9409, "step": 369010 }, { "epoch": 0.7454437472981654, "grad_norm": 332.17327880859375, "learning_rate": 1.9487210877169545e-06, "loss": 14.8505, "step": 369020 }, { "epoch": 0.7454639479308492, "grad_norm": 408.85784912109375, "learning_rate": 1.9484445636352724e-06, "loss": 14.3842, "step": 369030 }, { "epoch": 0.745484148563533, "grad_norm": 713.8599853515625, "learning_rate": 1.9481680544264995e-06, "loss": 23.6859, "step": 369040 }, { "epoch": 0.7455043491962168, "grad_norm": 602.9070434570312, "learning_rate": 1.9478915600919877e-06, "loss": 18.816, "step": 369050 }, { "epoch": 0.7455245498289006, "grad_norm": 171.35777282714844, "learning_rate": 1.9476150806330816e-06, "loss": 9.5252, "step": 369060 }, { "epoch": 0.7455447504615844, "grad_norm": 36.31989288330078, "learning_rate": 1.947338616051129e-06, "loss": 11.3571, "step": 369070 }, { "epoch": 0.7455649510942682, "grad_norm": 386.3040771484375, "learning_rate": 1.947062166347478e-06, "loss": 32.9928, "step": 369080 }, { "epoch": 0.7455851517269521, "grad_norm": 853.2674560546875, "learning_rate": 1.9467857315234746e-06, "loss": 23.3887, "step": 369090 }, { "epoch": 0.7456053523596359, "grad_norm": 237.78518676757812, "learning_rate": 1.946509311580469e-06, "loss": 20.5703, "step": 369100 }, { "epoch": 0.7456255529923197, "grad_norm": 693.06396484375, "learning_rate": 1.946232906519806e-06, "loss": 24.2468, "step": 369110 }, { "epoch": 0.7456457536250035, "grad_norm": 171.52392578125, "learning_rate": 1.9459565163428322e-06, "loss": 21.2309, "step": 369120 }, { "epoch": 0.7456659542576873, "grad_norm": 453.17706298828125, "learning_rate": 1.945680141050898e-06, "loss": 17.6323, "step": 369130 }, { "epoch": 0.7456861548903712, "grad_norm": 78.51905059814453, "learning_rate": 1.945403780645346e-06, "loss": 4.4487, "step": 369140 }, { "epoch": 0.745706355523055, "grad_norm": 19.49560546875, "learning_rate": 1.945127435127528e-06, "loss": 21.8167, "step": 369150 }, { "epoch": 0.7457265561557388, "grad_norm": 395.67755126953125, "learning_rate": 1.9448511044987862e-06, "loss": 17.1365, "step": 369160 }, { "epoch": 0.7457467567884226, "grad_norm": 508.52960205078125, "learning_rate": 1.944574788760471e-06, "loss": 27.2976, "step": 369170 }, { "epoch": 0.7457669574211064, "grad_norm": 603.98095703125, "learning_rate": 1.944298487913928e-06, "loss": 62.7787, "step": 369180 }, { "epoch": 0.7457871580537903, "grad_norm": 387.5748596191406, "learning_rate": 1.9440222019605022e-06, "loss": 22.3551, "step": 369190 }, { "epoch": 0.7458073586864741, "grad_norm": 0.17242459952831268, "learning_rate": 1.9437459309015426e-06, "loss": 6.1827, "step": 369200 }, { "epoch": 0.7458275593191579, "grad_norm": 0.0, "learning_rate": 1.9434696747383946e-06, "loss": 21.0599, "step": 369210 }, { "epoch": 0.7458477599518417, "grad_norm": 272.6597595214844, "learning_rate": 1.9431934334724035e-06, "loss": 20.5769, "step": 369220 }, { "epoch": 0.7458679605845255, "grad_norm": 90.93065643310547, "learning_rate": 1.942917207104917e-06, "loss": 19.9338, "step": 369230 }, { "epoch": 0.7458881612172094, "grad_norm": 374.5639953613281, "learning_rate": 1.942640995637284e-06, "loss": 18.8268, "step": 369240 }, { "epoch": 0.7459083618498932, "grad_norm": 261.09100341796875, "learning_rate": 1.942364799070845e-06, "loss": 32.9503, "step": 369250 }, { "epoch": 0.745928562482577, "grad_norm": 687.7623901367188, "learning_rate": 1.9420886174069486e-06, "loss": 18.6524, "step": 369260 }, { "epoch": 0.7459487631152608, "grad_norm": 505.5316467285156, "learning_rate": 1.9418124506469437e-06, "loss": 13.9148, "step": 369270 }, { "epoch": 0.7459689637479446, "grad_norm": 334.58233642578125, "learning_rate": 1.9415362987921737e-06, "loss": 12.9861, "step": 369280 }, { "epoch": 0.7459891643806285, "grad_norm": 99.5759048461914, "learning_rate": 1.9412601618439834e-06, "loss": 19.2453, "step": 369290 }, { "epoch": 0.7460093650133122, "grad_norm": 322.212890625, "learning_rate": 1.94098403980372e-06, "loss": 21.7992, "step": 369300 }, { "epoch": 0.746029565645996, "grad_norm": 528.5283203125, "learning_rate": 1.940707932672733e-06, "loss": 8.8681, "step": 369310 }, { "epoch": 0.7460497662786798, "grad_norm": 255.1359405517578, "learning_rate": 1.9404318404523605e-06, "loss": 15.9722, "step": 369320 }, { "epoch": 0.7460699669113636, "grad_norm": 46.65135192871094, "learning_rate": 1.9401557631439526e-06, "loss": 27.8944, "step": 369330 }, { "epoch": 0.7460901675440474, "grad_norm": 284.31573486328125, "learning_rate": 1.939879700748856e-06, "loss": 16.722, "step": 369340 }, { "epoch": 0.7461103681767313, "grad_norm": 125.75949096679688, "learning_rate": 1.939603653268414e-06, "loss": 73.6868, "step": 369350 }, { "epoch": 0.7461305688094151, "grad_norm": 289.5494689941406, "learning_rate": 1.9393276207039717e-06, "loss": 17.8466, "step": 369360 }, { "epoch": 0.7461507694420989, "grad_norm": 419.5796203613281, "learning_rate": 1.9390516030568767e-06, "loss": 19.4291, "step": 369370 }, { "epoch": 0.7461709700747827, "grad_norm": 228.23182678222656, "learning_rate": 1.938775600328473e-06, "loss": 15.3995, "step": 369380 }, { "epoch": 0.7461911707074665, "grad_norm": 398.65203857421875, "learning_rate": 1.9384996125201045e-06, "loss": 23.2901, "step": 369390 }, { "epoch": 0.7462113713401504, "grad_norm": 32.43281555175781, "learning_rate": 1.938223639633119e-06, "loss": 25.4916, "step": 369400 }, { "epoch": 0.7462315719728342, "grad_norm": 576.2562866210938, "learning_rate": 1.937947681668858e-06, "loss": 17.5294, "step": 369410 }, { "epoch": 0.746251772605518, "grad_norm": 129.07254028320312, "learning_rate": 1.9376717386286703e-06, "loss": 17.2375, "step": 369420 }, { "epoch": 0.7462719732382018, "grad_norm": 769.473388671875, "learning_rate": 1.9373958105138997e-06, "loss": 14.399, "step": 369430 }, { "epoch": 0.7462921738708856, "grad_norm": 538.255126953125, "learning_rate": 1.937119897325889e-06, "loss": 16.4073, "step": 369440 }, { "epoch": 0.7463123745035695, "grad_norm": 267.184326171875, "learning_rate": 1.936843999065985e-06, "loss": 30.4707, "step": 369450 }, { "epoch": 0.7463325751362533, "grad_norm": 5.306746482849121, "learning_rate": 1.936568115735531e-06, "loss": 27.5811, "step": 369460 }, { "epoch": 0.7463527757689371, "grad_norm": 548.7133178710938, "learning_rate": 1.9362922473358735e-06, "loss": 22.3497, "step": 369470 }, { "epoch": 0.7463729764016209, "grad_norm": 367.3851013183594, "learning_rate": 1.936016393868355e-06, "loss": 35.1907, "step": 369480 }, { "epoch": 0.7463931770343047, "grad_norm": 699.6803588867188, "learning_rate": 1.9357405553343224e-06, "loss": 14.7062, "step": 369490 }, { "epoch": 0.7464133776669886, "grad_norm": 171.78594970703125, "learning_rate": 1.9354647317351187e-06, "loss": 10.5206, "step": 369500 }, { "epoch": 0.7464335782996724, "grad_norm": 186.3427276611328, "learning_rate": 1.9351889230720866e-06, "loss": 7.8387, "step": 369510 }, { "epoch": 0.7464537789323562, "grad_norm": 393.7889404296875, "learning_rate": 1.9349131293465732e-06, "loss": 12.3001, "step": 369520 }, { "epoch": 0.74647397956504, "grad_norm": 475.2576599121094, "learning_rate": 1.934637350559922e-06, "loss": 30.4372, "step": 369530 }, { "epoch": 0.7464941801977238, "grad_norm": 401.1710510253906, "learning_rate": 1.9343615867134748e-06, "loss": 44.3436, "step": 369540 }, { "epoch": 0.7465143808304076, "grad_norm": 312.4425354003906, "learning_rate": 1.9340858378085777e-06, "loss": 19.5031, "step": 369550 }, { "epoch": 0.7465345814630914, "grad_norm": 565.4642333984375, "learning_rate": 1.933810103846575e-06, "loss": 24.1692, "step": 369560 }, { "epoch": 0.7465547820957752, "grad_norm": 597.1283569335938, "learning_rate": 1.933534384828811e-06, "loss": 21.642, "step": 369570 }, { "epoch": 0.746574982728459, "grad_norm": 151.046875, "learning_rate": 1.933258680756627e-06, "loss": 12.984, "step": 369580 }, { "epoch": 0.7465951833611428, "grad_norm": 240.71826171875, "learning_rate": 1.9329829916313684e-06, "loss": 8.2591, "step": 369590 }, { "epoch": 0.7466153839938267, "grad_norm": 202.32093811035156, "learning_rate": 1.93270731745438e-06, "loss": 25.7117, "step": 369600 }, { "epoch": 0.7466355846265105, "grad_norm": 500.51055908203125, "learning_rate": 1.9324316582270025e-06, "loss": 11.8643, "step": 369610 }, { "epoch": 0.7466557852591943, "grad_norm": 385.1941833496094, "learning_rate": 1.93215601395058e-06, "loss": 20.8431, "step": 369620 }, { "epoch": 0.7466759858918781, "grad_norm": 513.0110473632812, "learning_rate": 1.93188038462646e-06, "loss": 18.9674, "step": 369630 }, { "epoch": 0.7466961865245619, "grad_norm": 902.2584228515625, "learning_rate": 1.931604770255982e-06, "loss": 22.0942, "step": 369640 }, { "epoch": 0.7467163871572458, "grad_norm": 372.42059326171875, "learning_rate": 1.9313291708404885e-06, "loss": 12.7421, "step": 369650 }, { "epoch": 0.7467365877899296, "grad_norm": 527.2333984375, "learning_rate": 1.9310535863813266e-06, "loss": 20.7038, "step": 369660 }, { "epoch": 0.7467567884226134, "grad_norm": 530.1795043945312, "learning_rate": 1.9307780168798374e-06, "loss": 17.0725, "step": 369670 }, { "epoch": 0.7467769890552972, "grad_norm": 121.6954116821289, "learning_rate": 1.930502462337362e-06, "loss": 16.0094, "step": 369680 }, { "epoch": 0.746797189687981, "grad_norm": 450.1278991699219, "learning_rate": 1.9302269227552465e-06, "loss": 22.1541, "step": 369690 }, { "epoch": 0.7468173903206649, "grad_norm": 278.0421142578125, "learning_rate": 1.929951398134832e-06, "loss": 19.6431, "step": 369700 }, { "epoch": 0.7468375909533487, "grad_norm": 3.5465619564056396, "learning_rate": 1.9296758884774624e-06, "loss": 15.6716, "step": 369710 }, { "epoch": 0.7468577915860325, "grad_norm": 555.057861328125, "learning_rate": 1.9294003937844806e-06, "loss": 19.467, "step": 369720 }, { "epoch": 0.7468779922187163, "grad_norm": 412.9222412109375, "learning_rate": 1.9291249140572275e-06, "loss": 16.6906, "step": 369730 }, { "epoch": 0.7468981928514001, "grad_norm": 943.5767822265625, "learning_rate": 1.9288494492970487e-06, "loss": 17.9049, "step": 369740 }, { "epoch": 0.746918393484084, "grad_norm": 188.947265625, "learning_rate": 1.928573999505284e-06, "loss": 19.5368, "step": 369750 }, { "epoch": 0.7469385941167678, "grad_norm": 436.8934326171875, "learning_rate": 1.928298564683278e-06, "loss": 10.6879, "step": 369760 }, { "epoch": 0.7469587947494516, "grad_norm": 188.24301147460938, "learning_rate": 1.928023144832371e-06, "loss": 12.9682, "step": 369770 }, { "epoch": 0.7469789953821354, "grad_norm": 294.8847961425781, "learning_rate": 1.927747739953908e-06, "loss": 17.8445, "step": 369780 }, { "epoch": 0.7469991960148192, "grad_norm": 155.76507568359375, "learning_rate": 1.9274723500492304e-06, "loss": 11.2441, "step": 369790 }, { "epoch": 0.747019396647503, "grad_norm": 534.6624145507812, "learning_rate": 1.927196975119678e-06, "loss": 13.1074, "step": 369800 }, { "epoch": 0.7470395972801868, "grad_norm": 279.3785400390625, "learning_rate": 1.926921615166596e-06, "loss": 17.9435, "step": 369810 }, { "epoch": 0.7470597979128706, "grad_norm": 627.2294921875, "learning_rate": 1.926646270191326e-06, "loss": 16.5889, "step": 369820 }, { "epoch": 0.7470799985455544, "grad_norm": 327.37646484375, "learning_rate": 1.9263709401952076e-06, "loss": 25.8833, "step": 369830 }, { "epoch": 0.7471001991782382, "grad_norm": 108.8963394165039, "learning_rate": 1.926095625179584e-06, "loss": 16.5024, "step": 369840 }, { "epoch": 0.747120399810922, "grad_norm": 233.37591552734375, "learning_rate": 1.9258203251458012e-06, "loss": 17.9614, "step": 369850 }, { "epoch": 0.7471406004436059, "grad_norm": 440.4516906738281, "learning_rate": 1.9255450400951937e-06, "loss": 21.2581, "step": 369860 }, { "epoch": 0.7471608010762897, "grad_norm": 380.8348693847656, "learning_rate": 1.925269770029107e-06, "loss": 9.627, "step": 369870 }, { "epoch": 0.7471810017089735, "grad_norm": 127.53816986083984, "learning_rate": 1.924994514948884e-06, "loss": 8.6012, "step": 369880 }, { "epoch": 0.7472012023416573, "grad_norm": 287.0458984375, "learning_rate": 1.9247192748558648e-06, "loss": 14.8855, "step": 369890 }, { "epoch": 0.7472214029743411, "grad_norm": 413.32171630859375, "learning_rate": 1.9244440497513895e-06, "loss": 21.4359, "step": 369900 }, { "epoch": 0.747241603607025, "grad_norm": 420.88275146484375, "learning_rate": 1.9241688396368e-06, "loss": 16.8756, "step": 369910 }, { "epoch": 0.7472618042397088, "grad_norm": 814.9009399414062, "learning_rate": 1.923893644513443e-06, "loss": 17.2182, "step": 369920 }, { "epoch": 0.7472820048723926, "grad_norm": 73.95588684082031, "learning_rate": 1.9236184643826515e-06, "loss": 22.9281, "step": 369930 }, { "epoch": 0.7473022055050764, "grad_norm": 147.46900939941406, "learning_rate": 1.9233432992457708e-06, "loss": 15.6789, "step": 369940 }, { "epoch": 0.7473224061377602, "grad_norm": 506.54510498046875, "learning_rate": 1.9230681491041425e-06, "loss": 28.0695, "step": 369950 }, { "epoch": 0.7473426067704441, "grad_norm": 422.76824951171875, "learning_rate": 1.9227930139591077e-06, "loss": 21.3278, "step": 369960 }, { "epoch": 0.7473628074031279, "grad_norm": 919.2847290039062, "learning_rate": 1.922517893812004e-06, "loss": 16.2156, "step": 369970 }, { "epoch": 0.7473830080358117, "grad_norm": 392.3123779296875, "learning_rate": 1.9222427886641774e-06, "loss": 16.2128, "step": 369980 }, { "epoch": 0.7474032086684955, "grad_norm": 311.8834228515625, "learning_rate": 1.921967698516966e-06, "loss": 23.6828, "step": 369990 }, { "epoch": 0.7474234093011793, "grad_norm": 256.5654296875, "learning_rate": 1.9216926233717087e-06, "loss": 21.8324, "step": 370000 }, { "epoch": 0.7474436099338632, "grad_norm": 71.87821960449219, "learning_rate": 1.9214175632297503e-06, "loss": 13.2317, "step": 370010 }, { "epoch": 0.747463810566547, "grad_norm": 28.057043075561523, "learning_rate": 1.9211425180924274e-06, "loss": 19.1946, "step": 370020 }, { "epoch": 0.7474840111992308, "grad_norm": 113.76498413085938, "learning_rate": 1.920867487961084e-06, "loss": 12.4828, "step": 370030 }, { "epoch": 0.7475042118319146, "grad_norm": 341.0687561035156, "learning_rate": 1.920592472837057e-06, "loss": 27.8353, "step": 370040 }, { "epoch": 0.7475244124645984, "grad_norm": 912.1026000976562, "learning_rate": 1.920317472721691e-06, "loss": 31.9558, "step": 370050 }, { "epoch": 0.7475446130972823, "grad_norm": 287.0536193847656, "learning_rate": 1.9200424876163244e-06, "loss": 10.8399, "step": 370060 }, { "epoch": 0.747564813729966, "grad_norm": 523.6887817382812, "learning_rate": 1.9197675175222954e-06, "loss": 17.1597, "step": 370070 }, { "epoch": 0.7475850143626498, "grad_norm": 408.5846862792969, "learning_rate": 1.919492562440947e-06, "loss": 10.8312, "step": 370080 }, { "epoch": 0.7476052149953336, "grad_norm": 1629.015380859375, "learning_rate": 1.919217622373617e-06, "loss": 33.6798, "step": 370090 }, { "epoch": 0.7476254156280174, "grad_norm": 375.0655212402344, "learning_rate": 1.9189426973216478e-06, "loss": 20.4349, "step": 370100 }, { "epoch": 0.7476456162607013, "grad_norm": 109.400146484375, "learning_rate": 1.918667787286379e-06, "loss": 20.1551, "step": 370110 }, { "epoch": 0.7476658168933851, "grad_norm": 301.20184326171875, "learning_rate": 1.9183928922691474e-06, "loss": 19.1174, "step": 370120 }, { "epoch": 0.7476860175260689, "grad_norm": 139.5936279296875, "learning_rate": 1.918118012271297e-06, "loss": 10.2495, "step": 370130 }, { "epoch": 0.7477062181587527, "grad_norm": 35.07410430908203, "learning_rate": 1.917843147294166e-06, "loss": 10.8955, "step": 370140 }, { "epoch": 0.7477264187914365, "grad_norm": 687.4677734375, "learning_rate": 1.917568297339091e-06, "loss": 24.3905, "step": 370150 }, { "epoch": 0.7477466194241204, "grad_norm": 777.4478149414062, "learning_rate": 1.9172934624074153e-06, "loss": 28.4684, "step": 370160 }, { "epoch": 0.7477668200568042, "grad_norm": 251.4214324951172, "learning_rate": 1.9170186425004805e-06, "loss": 12.5224, "step": 370170 }, { "epoch": 0.747787020689488, "grad_norm": 622.1026000976562, "learning_rate": 1.916743837619619e-06, "loss": 19.8279, "step": 370180 }, { "epoch": 0.7478072213221718, "grad_norm": 658.2227783203125, "learning_rate": 1.9164690477661746e-06, "loss": 24.3781, "step": 370190 }, { "epoch": 0.7478274219548556, "grad_norm": 0.4346986413002014, "learning_rate": 1.9161942729414876e-06, "loss": 28.2475, "step": 370200 }, { "epoch": 0.7478476225875395, "grad_norm": 338.2659912109375, "learning_rate": 1.9159195131468955e-06, "loss": 13.7039, "step": 370210 }, { "epoch": 0.7478678232202233, "grad_norm": 345.5289001464844, "learning_rate": 1.9156447683837365e-06, "loss": 15.8404, "step": 370220 }, { "epoch": 0.7478880238529071, "grad_norm": 229.8165740966797, "learning_rate": 1.9153700386533502e-06, "loss": 24.3089, "step": 370230 }, { "epoch": 0.7479082244855909, "grad_norm": 400.5947265625, "learning_rate": 1.9150953239570784e-06, "loss": 11.5835, "step": 370240 }, { "epoch": 0.7479284251182747, "grad_norm": 289.86627197265625, "learning_rate": 1.9148206242962575e-06, "loss": 7.1895, "step": 370250 }, { "epoch": 0.7479486257509586, "grad_norm": 267.56402587890625, "learning_rate": 1.9145459396722248e-06, "loss": 21.1082, "step": 370260 }, { "epoch": 0.7479688263836424, "grad_norm": 563.6002197265625, "learning_rate": 1.914271270086323e-06, "loss": 13.0804, "step": 370270 }, { "epoch": 0.7479890270163262, "grad_norm": 114.8333740234375, "learning_rate": 1.9139966155398894e-06, "loss": 13.9258, "step": 370280 }, { "epoch": 0.74800922764901, "grad_norm": 452.27496337890625, "learning_rate": 1.913721976034259e-06, "loss": 21.8796, "step": 370290 }, { "epoch": 0.7480294282816938, "grad_norm": 278.9789733886719, "learning_rate": 1.913447351570776e-06, "loss": 30.854, "step": 370300 }, { "epoch": 0.7480496289143777, "grad_norm": 602.8372802734375, "learning_rate": 1.913172742150774e-06, "loss": 19.3653, "step": 370310 }, { "epoch": 0.7480698295470614, "grad_norm": 317.3916931152344, "learning_rate": 1.912898147775596e-06, "loss": 11.7023, "step": 370320 }, { "epoch": 0.7480900301797452, "grad_norm": 201.62997436523438, "learning_rate": 1.912623568446578e-06, "loss": 17.2515, "step": 370330 }, { "epoch": 0.748110230812429, "grad_norm": 344.6565246582031, "learning_rate": 1.9123490041650556e-06, "loss": 11.0799, "step": 370340 }, { "epoch": 0.7481304314451128, "grad_norm": 315.00152587890625, "learning_rate": 1.912074454932372e-06, "loss": 24.2449, "step": 370350 }, { "epoch": 0.7481506320777966, "grad_norm": 229.8583984375, "learning_rate": 1.911799920749861e-06, "loss": 8.638, "step": 370360 }, { "epoch": 0.7481708327104805, "grad_norm": 397.9170837402344, "learning_rate": 1.911525401618865e-06, "loss": 25.7534, "step": 370370 }, { "epoch": 0.7481910333431643, "grad_norm": 218.24066162109375, "learning_rate": 1.9112508975407173e-06, "loss": 32.6129, "step": 370380 }, { "epoch": 0.7482112339758481, "grad_norm": 260.3525390625, "learning_rate": 1.9109764085167604e-06, "loss": 22.2521, "step": 370390 }, { "epoch": 0.7482314346085319, "grad_norm": 249.40843200683594, "learning_rate": 1.910701934548329e-06, "loss": 18.8474, "step": 370400 }, { "epoch": 0.7482516352412157, "grad_norm": 347.7652282714844, "learning_rate": 1.9104274756367606e-06, "loss": 14.7549, "step": 370410 }, { "epoch": 0.7482718358738996, "grad_norm": 585.7051391601562, "learning_rate": 1.9101530317833957e-06, "loss": 17.4191, "step": 370420 }, { "epoch": 0.7482920365065834, "grad_norm": 986.23291015625, "learning_rate": 1.9098786029895698e-06, "loss": 27.654, "step": 370430 }, { "epoch": 0.7483122371392672, "grad_norm": 522.16796875, "learning_rate": 1.909604189256619e-06, "loss": 28.5049, "step": 370440 }, { "epoch": 0.748332437771951, "grad_norm": 580.5750122070312, "learning_rate": 1.9093297905858833e-06, "loss": 14.9824, "step": 370450 }, { "epoch": 0.7483526384046348, "grad_norm": 461.26007080078125, "learning_rate": 1.909055406978702e-06, "loss": 14.1847, "step": 370460 }, { "epoch": 0.7483728390373187, "grad_norm": 438.3565368652344, "learning_rate": 1.908781038436407e-06, "loss": 20.7148, "step": 370470 }, { "epoch": 0.7483930396700025, "grad_norm": 304.63580322265625, "learning_rate": 1.9085066849603377e-06, "loss": 14.8347, "step": 370480 }, { "epoch": 0.7484132403026863, "grad_norm": 596.1948852539062, "learning_rate": 1.908232346551834e-06, "loss": 19.4836, "step": 370490 }, { "epoch": 0.7484334409353701, "grad_norm": 506.2995910644531, "learning_rate": 1.90795802321223e-06, "loss": 24.1062, "step": 370500 }, { "epoch": 0.7484536415680539, "grad_norm": 343.956787109375, "learning_rate": 1.907683714942863e-06, "loss": 16.1206, "step": 370510 }, { "epoch": 0.7484738422007378, "grad_norm": 139.79502868652344, "learning_rate": 1.90740942174507e-06, "loss": 23.2229, "step": 370520 }, { "epoch": 0.7484940428334216, "grad_norm": 329.6191711425781, "learning_rate": 1.9071351436201918e-06, "loss": 12.6936, "step": 370530 }, { "epoch": 0.7485142434661054, "grad_norm": 618.066162109375, "learning_rate": 1.9068608805695588e-06, "loss": 16.9842, "step": 370540 }, { "epoch": 0.7485344440987892, "grad_norm": 103.03219604492188, "learning_rate": 1.9065866325945099e-06, "loss": 28.5514, "step": 370550 }, { "epoch": 0.748554644731473, "grad_norm": 645.9698486328125, "learning_rate": 1.906312399696385e-06, "loss": 19.0781, "step": 370560 }, { "epoch": 0.7485748453641569, "grad_norm": 230.70558166503906, "learning_rate": 1.9060381818765177e-06, "loss": 17.82, "step": 370570 }, { "epoch": 0.7485950459968406, "grad_norm": 216.55380249023438, "learning_rate": 1.9057639791362437e-06, "loss": 18.2331, "step": 370580 }, { "epoch": 0.7486152466295244, "grad_norm": 80.94849395751953, "learning_rate": 1.9054897914769028e-06, "loss": 20.7204, "step": 370590 }, { "epoch": 0.7486354472622082, "grad_norm": 348.4039611816406, "learning_rate": 1.9052156188998284e-06, "loss": 8.6221, "step": 370600 }, { "epoch": 0.748655647894892, "grad_norm": 256.8065185546875, "learning_rate": 1.9049414614063566e-06, "loss": 18.6516, "step": 370610 }, { "epoch": 0.7486758485275758, "grad_norm": 596.1468505859375, "learning_rate": 1.9046673189978266e-06, "loss": 17.3199, "step": 370620 }, { "epoch": 0.7486960491602597, "grad_norm": 148.25144958496094, "learning_rate": 1.904393191675571e-06, "loss": 22.5702, "step": 370630 }, { "epoch": 0.7487162497929435, "grad_norm": 331.7828674316406, "learning_rate": 1.9041190794409287e-06, "loss": 16.0017, "step": 370640 }, { "epoch": 0.7487364504256273, "grad_norm": 85.73043823242188, "learning_rate": 1.9038449822952331e-06, "loss": 10.475, "step": 370650 }, { "epoch": 0.7487566510583111, "grad_norm": 2970.264404296875, "learning_rate": 1.9035709002398234e-06, "loss": 36.6707, "step": 370660 }, { "epoch": 0.748776851690995, "grad_norm": 318.8187561035156, "learning_rate": 1.9032968332760331e-06, "loss": 14.7505, "step": 370670 }, { "epoch": 0.7487970523236788, "grad_norm": 275.8929748535156, "learning_rate": 1.903022781405197e-06, "loss": 10.1959, "step": 370680 }, { "epoch": 0.7488172529563626, "grad_norm": 53.88206100463867, "learning_rate": 1.902748744628654e-06, "loss": 18.7687, "step": 370690 }, { "epoch": 0.7488374535890464, "grad_norm": 460.0052185058594, "learning_rate": 1.9024747229477365e-06, "loss": 20.5616, "step": 370700 }, { "epoch": 0.7488576542217302, "grad_norm": 275.89794921875, "learning_rate": 1.9022007163637829e-06, "loss": 19.8112, "step": 370710 }, { "epoch": 0.748877854854414, "grad_norm": 557.7904052734375, "learning_rate": 1.9019267248781276e-06, "loss": 12.6103, "step": 370720 }, { "epoch": 0.7488980554870979, "grad_norm": 845.3037109375, "learning_rate": 1.9016527484921037e-06, "loss": 20.1345, "step": 370730 }, { "epoch": 0.7489182561197817, "grad_norm": 370.7670593261719, "learning_rate": 1.9013787872070506e-06, "loss": 25.4725, "step": 370740 }, { "epoch": 0.7489384567524655, "grad_norm": 131.9552764892578, "learning_rate": 1.9011048410243011e-06, "loss": 36.2493, "step": 370750 }, { "epoch": 0.7489586573851493, "grad_norm": 355.9578552246094, "learning_rate": 1.900830909945189e-06, "loss": 21.0548, "step": 370760 }, { "epoch": 0.7489788580178331, "grad_norm": 73.87676239013672, "learning_rate": 1.900556993971051e-06, "loss": 10.1273, "step": 370770 }, { "epoch": 0.748999058650517, "grad_norm": 938.6024780273438, "learning_rate": 1.9002830931032262e-06, "loss": 20.8951, "step": 370780 }, { "epoch": 0.7490192592832008, "grad_norm": 505.7022705078125, "learning_rate": 1.900009207343042e-06, "loss": 18.4569, "step": 370790 }, { "epoch": 0.7490394599158846, "grad_norm": 200.5887451171875, "learning_rate": 1.8997353366918369e-06, "loss": 19.4817, "step": 370800 }, { "epoch": 0.7490596605485684, "grad_norm": 267.5477600097656, "learning_rate": 1.8994614811509475e-06, "loss": 17.4729, "step": 370810 }, { "epoch": 0.7490798611812522, "grad_norm": 203.84954833984375, "learning_rate": 1.8991876407217068e-06, "loss": 15.9631, "step": 370820 }, { "epoch": 0.749100061813936, "grad_norm": 203.7384796142578, "learning_rate": 1.8989138154054482e-06, "loss": 33.4527, "step": 370830 }, { "epoch": 0.7491202624466198, "grad_norm": 657.7221069335938, "learning_rate": 1.898640005203507e-06, "loss": 26.5684, "step": 370840 }, { "epoch": 0.7491404630793036, "grad_norm": 270.6231384277344, "learning_rate": 1.8983662101172217e-06, "loss": 7.4513, "step": 370850 }, { "epoch": 0.7491606637119874, "grad_norm": 1.5963205099105835, "learning_rate": 1.8980924301479199e-06, "loss": 25.2977, "step": 370860 }, { "epoch": 0.7491808643446712, "grad_norm": 360.6601867675781, "learning_rate": 1.8978186652969394e-06, "loss": 25.598, "step": 370870 }, { "epoch": 0.7492010649773551, "grad_norm": 296.4714050292969, "learning_rate": 1.8975449155656162e-06, "loss": 19.0337, "step": 370880 }, { "epoch": 0.7492212656100389, "grad_norm": 253.92222595214844, "learning_rate": 1.897271180955283e-06, "loss": 15.944, "step": 370890 }, { "epoch": 0.7492414662427227, "grad_norm": 1114.2794189453125, "learning_rate": 1.896997461467272e-06, "loss": 18.0087, "step": 370900 }, { "epoch": 0.7492616668754065, "grad_norm": 29.295988082885742, "learning_rate": 1.8967237571029207e-06, "loss": 12.9869, "step": 370910 }, { "epoch": 0.7492818675080903, "grad_norm": 71.8803482055664, "learning_rate": 1.896450067863561e-06, "loss": 10.9449, "step": 370920 }, { "epoch": 0.7493020681407742, "grad_norm": 293.2554016113281, "learning_rate": 1.8961763937505262e-06, "loss": 12.5658, "step": 370930 }, { "epoch": 0.749322268773458, "grad_norm": 281.9087219238281, "learning_rate": 1.8959027347651527e-06, "loss": 11.3054, "step": 370940 }, { "epoch": 0.7493424694061418, "grad_norm": 805.2517700195312, "learning_rate": 1.895629090908771e-06, "loss": 19.4157, "step": 370950 }, { "epoch": 0.7493626700388256, "grad_norm": 286.0586242675781, "learning_rate": 1.895355462182718e-06, "loss": 21.2542, "step": 370960 }, { "epoch": 0.7493828706715094, "grad_norm": 423.8516845703125, "learning_rate": 1.8950818485883248e-06, "loss": 12.2588, "step": 370970 }, { "epoch": 0.7494030713041933, "grad_norm": 0.0, "learning_rate": 1.8948082501269272e-06, "loss": 12.0584, "step": 370980 }, { "epoch": 0.7494232719368771, "grad_norm": 943.635498046875, "learning_rate": 1.8945346667998566e-06, "loss": 12.7964, "step": 370990 }, { "epoch": 0.7494434725695609, "grad_norm": 670.3473510742188, "learning_rate": 1.8942610986084487e-06, "loss": 16.2354, "step": 371000 }, { "epoch": 0.7494636732022447, "grad_norm": 621.8496704101562, "learning_rate": 1.8939875455540352e-06, "loss": 26.1257, "step": 371010 }, { "epoch": 0.7494838738349285, "grad_norm": 114.41722106933594, "learning_rate": 1.8937140076379484e-06, "loss": 33.2578, "step": 371020 }, { "epoch": 0.7495040744676124, "grad_norm": 399.6213684082031, "learning_rate": 1.8934404848615245e-06, "loss": 16.3593, "step": 371030 }, { "epoch": 0.7495242751002962, "grad_norm": 555.8201293945312, "learning_rate": 1.8931669772260946e-06, "loss": 12.3215, "step": 371040 }, { "epoch": 0.74954447573298, "grad_norm": 191.23582458496094, "learning_rate": 1.8928934847329905e-06, "loss": 15.6306, "step": 371050 }, { "epoch": 0.7495646763656638, "grad_norm": 353.723876953125, "learning_rate": 1.8926200073835466e-06, "loss": 32.8761, "step": 371060 }, { "epoch": 0.7495848769983476, "grad_norm": 422.55938720703125, "learning_rate": 1.8923465451790997e-06, "loss": 18.5121, "step": 371070 }, { "epoch": 0.7496050776310315, "grad_norm": 154.19549560546875, "learning_rate": 1.892073098120975e-06, "loss": 11.1005, "step": 371080 }, { "epoch": 0.7496252782637152, "grad_norm": 859.382080078125, "learning_rate": 1.8917996662105092e-06, "loss": 25.7056, "step": 371090 }, { "epoch": 0.749645478896399, "grad_norm": 477.7334899902344, "learning_rate": 1.8915262494490366e-06, "loss": 19.7303, "step": 371100 }, { "epoch": 0.7496656795290828, "grad_norm": 570.6571655273438, "learning_rate": 1.8912528478378877e-06, "loss": 33.5792, "step": 371110 }, { "epoch": 0.7496858801617666, "grad_norm": 196.02267456054688, "learning_rate": 1.8909794613783943e-06, "loss": 27.0937, "step": 371120 }, { "epoch": 0.7497060807944504, "grad_norm": 53.689823150634766, "learning_rate": 1.8907060900718894e-06, "loss": 16.336, "step": 371130 }, { "epoch": 0.7497262814271343, "grad_norm": 1805.9793701171875, "learning_rate": 1.8904327339197098e-06, "loss": 11.4277, "step": 371140 }, { "epoch": 0.7497464820598181, "grad_norm": 316.41802978515625, "learning_rate": 1.8901593929231804e-06, "loss": 27.9684, "step": 371150 }, { "epoch": 0.7497666826925019, "grad_norm": 128.99745178222656, "learning_rate": 1.8898860670836367e-06, "loss": 14.9042, "step": 371160 }, { "epoch": 0.7497868833251857, "grad_norm": 23.5936222076416, "learning_rate": 1.8896127564024124e-06, "loss": 19.2889, "step": 371170 }, { "epoch": 0.7498070839578695, "grad_norm": 121.90223693847656, "learning_rate": 1.8893394608808391e-06, "loss": 27.0936, "step": 371180 }, { "epoch": 0.7498272845905534, "grad_norm": 451.2678527832031, "learning_rate": 1.889066180520246e-06, "loss": 17.8427, "step": 371190 }, { "epoch": 0.7498474852232372, "grad_norm": 367.1835021972656, "learning_rate": 1.8887929153219687e-06, "loss": 22.2723, "step": 371200 }, { "epoch": 0.749867685855921, "grad_norm": 175.74130249023438, "learning_rate": 1.8885196652873372e-06, "loss": 18.2782, "step": 371210 }, { "epoch": 0.7498878864886048, "grad_norm": 141.18226623535156, "learning_rate": 1.8882464304176817e-06, "loss": 27.077, "step": 371220 }, { "epoch": 0.7499080871212886, "grad_norm": 253.47525024414062, "learning_rate": 1.8879732107143378e-06, "loss": 28.0933, "step": 371230 }, { "epoch": 0.7499282877539725, "grad_norm": 446.2903747558594, "learning_rate": 1.8877000061786333e-06, "loss": 20.4453, "step": 371240 }, { "epoch": 0.7499484883866563, "grad_norm": 182.23312377929688, "learning_rate": 1.887426816811903e-06, "loss": 13.7468, "step": 371250 }, { "epoch": 0.7499686890193401, "grad_norm": 179.2864227294922, "learning_rate": 1.8871536426154752e-06, "loss": 17.0753, "step": 371260 }, { "epoch": 0.7499888896520239, "grad_norm": 485.5817565917969, "learning_rate": 1.8868804835906845e-06, "loss": 21.7351, "step": 371270 }, { "epoch": 0.7500090902847077, "grad_norm": 865.7108154296875, "learning_rate": 1.8866073397388612e-06, "loss": 15.9035, "step": 371280 }, { "epoch": 0.7500292909173916, "grad_norm": 118.02152252197266, "learning_rate": 1.8863342110613342e-06, "loss": 13.5667, "step": 371290 }, { "epoch": 0.7500494915500754, "grad_norm": 93.32388305664062, "learning_rate": 1.8860610975594384e-06, "loss": 12.3167, "step": 371300 }, { "epoch": 0.7500696921827592, "grad_norm": 0.05210625007748604, "learning_rate": 1.8857879992345013e-06, "loss": 16.3564, "step": 371310 }, { "epoch": 0.750089892815443, "grad_norm": 540.0073852539062, "learning_rate": 1.8855149160878571e-06, "loss": 18.4973, "step": 371320 }, { "epoch": 0.7501100934481268, "grad_norm": 225.62303161621094, "learning_rate": 1.8852418481208362e-06, "loss": 14.9685, "step": 371330 }, { "epoch": 0.7501302940808106, "grad_norm": 257.6683654785156, "learning_rate": 1.8849687953347666e-06, "loss": 24.1979, "step": 371340 }, { "epoch": 0.7501504947134944, "grad_norm": 608.3507080078125, "learning_rate": 1.8846957577309832e-06, "loss": 16.5048, "step": 371350 }, { "epoch": 0.7501706953461782, "grad_norm": 158.879150390625, "learning_rate": 1.8844227353108146e-06, "loss": 21.7614, "step": 371360 }, { "epoch": 0.750190895978862, "grad_norm": 449.2908020019531, "learning_rate": 1.8841497280755906e-06, "loss": 26.0557, "step": 371370 }, { "epoch": 0.7502110966115458, "grad_norm": 344.0711364746094, "learning_rate": 1.8838767360266425e-06, "loss": 20.2887, "step": 371380 }, { "epoch": 0.7502312972442297, "grad_norm": 637.5467529296875, "learning_rate": 1.8836037591653044e-06, "loss": 18.1995, "step": 371390 }, { "epoch": 0.7502514978769135, "grad_norm": 267.91094970703125, "learning_rate": 1.8833307974929006e-06, "loss": 18.5801, "step": 371400 }, { "epoch": 0.7502716985095973, "grad_norm": 685.9423217773438, "learning_rate": 1.8830578510107638e-06, "loss": 33.9838, "step": 371410 }, { "epoch": 0.7502918991422811, "grad_norm": 621.2180786132812, "learning_rate": 1.8827849197202275e-06, "loss": 29.6183, "step": 371420 }, { "epoch": 0.7503120997749649, "grad_norm": 369.6567687988281, "learning_rate": 1.8825120036226192e-06, "loss": 9.9979, "step": 371430 }, { "epoch": 0.7503323004076488, "grad_norm": 165.22293090820312, "learning_rate": 1.8822391027192677e-06, "loss": 23.3534, "step": 371440 }, { "epoch": 0.7503525010403326, "grad_norm": 528.44873046875, "learning_rate": 1.8819662170115043e-06, "loss": 12.3577, "step": 371450 }, { "epoch": 0.7503727016730164, "grad_norm": 627.9049682617188, "learning_rate": 1.881693346500663e-06, "loss": 19.3407, "step": 371460 }, { "epoch": 0.7503929023057002, "grad_norm": 310.8790588378906, "learning_rate": 1.8814204911880667e-06, "loss": 22.8446, "step": 371470 }, { "epoch": 0.750413102938384, "grad_norm": 421.85400390625, "learning_rate": 1.8811476510750486e-06, "loss": 14.8666, "step": 371480 }, { "epoch": 0.7504333035710679, "grad_norm": 563.498779296875, "learning_rate": 1.8808748261629406e-06, "loss": 17.9732, "step": 371490 }, { "epoch": 0.7504535042037517, "grad_norm": 248.99783325195312, "learning_rate": 1.8806020164530702e-06, "loss": 19.4519, "step": 371500 }, { "epoch": 0.7504737048364355, "grad_norm": 622.2474365234375, "learning_rate": 1.8803292219467656e-06, "loss": 32.7135, "step": 371510 }, { "epoch": 0.7504939054691193, "grad_norm": 436.3412170410156, "learning_rate": 1.8800564426453595e-06, "loss": 22.4725, "step": 371520 }, { "epoch": 0.7505141061018031, "grad_norm": 659.15234375, "learning_rate": 1.87978367855018e-06, "loss": 15.2118, "step": 371530 }, { "epoch": 0.750534306734487, "grad_norm": 39.72431945800781, "learning_rate": 1.8795109296625546e-06, "loss": 5.0009, "step": 371540 }, { "epoch": 0.7505545073671708, "grad_norm": 325.27874755859375, "learning_rate": 1.8792381959838147e-06, "loss": 13.0658, "step": 371550 }, { "epoch": 0.7505747079998546, "grad_norm": 302.402099609375, "learning_rate": 1.878965477515291e-06, "loss": 18.2643, "step": 371560 }, { "epoch": 0.7505949086325384, "grad_norm": 243.29498291015625, "learning_rate": 1.8786927742583111e-06, "loss": 17.85, "step": 371570 }, { "epoch": 0.7506151092652222, "grad_norm": 321.30810546875, "learning_rate": 1.878420086214202e-06, "loss": 20.9506, "step": 371580 }, { "epoch": 0.7506353098979061, "grad_norm": 5.587765216827393, "learning_rate": 1.8781474133842963e-06, "loss": 19.0845, "step": 371590 }, { "epoch": 0.7506555105305898, "grad_norm": 417.1700439453125, "learning_rate": 1.8778747557699223e-06, "loss": 26.0506, "step": 371600 }, { "epoch": 0.7506757111632736, "grad_norm": 0.44337430596351624, "learning_rate": 1.877602113372406e-06, "loss": 15.0031, "step": 371610 }, { "epoch": 0.7506959117959574, "grad_norm": 190.23757934570312, "learning_rate": 1.8773294861930797e-06, "loss": 15.9604, "step": 371620 }, { "epoch": 0.7507161124286412, "grad_norm": 186.6510772705078, "learning_rate": 1.8770568742332695e-06, "loss": 18.3207, "step": 371630 }, { "epoch": 0.750736313061325, "grad_norm": 498.6097412109375, "learning_rate": 1.8767842774943068e-06, "loss": 18.6611, "step": 371640 }, { "epoch": 0.7507565136940089, "grad_norm": 0.26976367831230164, "learning_rate": 1.8765116959775187e-06, "loss": 17.8622, "step": 371650 }, { "epoch": 0.7507767143266927, "grad_norm": 404.4729309082031, "learning_rate": 1.876239129684232e-06, "loss": 8.5176, "step": 371660 }, { "epoch": 0.7507969149593765, "grad_norm": 457.6100769042969, "learning_rate": 1.875966578615777e-06, "loss": 19.4443, "step": 371670 }, { "epoch": 0.7508171155920603, "grad_norm": 556.7343139648438, "learning_rate": 1.8756940427734854e-06, "loss": 21.3292, "step": 371680 }, { "epoch": 0.7508373162247441, "grad_norm": 671.5337524414062, "learning_rate": 1.8754215221586785e-06, "loss": 32.8127, "step": 371690 }, { "epoch": 0.750857516857428, "grad_norm": 243.16249084472656, "learning_rate": 1.8751490167726888e-06, "loss": 15.6399, "step": 371700 }, { "epoch": 0.7508777174901118, "grad_norm": 540.6301879882812, "learning_rate": 1.874876526616845e-06, "loss": 18.4024, "step": 371710 }, { "epoch": 0.7508979181227956, "grad_norm": 226.1687774658203, "learning_rate": 1.874604051692474e-06, "loss": 19.5006, "step": 371720 }, { "epoch": 0.7509181187554794, "grad_norm": 560.4127807617188, "learning_rate": 1.874331592000902e-06, "loss": 16.1051, "step": 371730 }, { "epoch": 0.7509383193881632, "grad_norm": 698.4486083984375, "learning_rate": 1.8740591475434588e-06, "loss": 20.4588, "step": 371740 }, { "epoch": 0.7509585200208471, "grad_norm": 267.2878723144531, "learning_rate": 1.873786718321476e-06, "loss": 14.6198, "step": 371750 }, { "epoch": 0.7509787206535309, "grad_norm": 133.45802307128906, "learning_rate": 1.8735143043362735e-06, "loss": 27.8448, "step": 371760 }, { "epoch": 0.7509989212862147, "grad_norm": 689.1096801757812, "learning_rate": 1.8732419055891832e-06, "loss": 31.4137, "step": 371770 }, { "epoch": 0.7510191219188985, "grad_norm": 214.43348693847656, "learning_rate": 1.8729695220815346e-06, "loss": 12.4934, "step": 371780 }, { "epoch": 0.7510393225515823, "grad_norm": 236.0517120361328, "learning_rate": 1.8726971538146532e-06, "loss": 15.2071, "step": 371790 }, { "epoch": 0.7510595231842662, "grad_norm": 236.72003173828125, "learning_rate": 1.8724248007898648e-06, "loss": 25.1316, "step": 371800 }, { "epoch": 0.75107972381695, "grad_norm": 2.4329068660736084, "learning_rate": 1.8721524630085003e-06, "loss": 9.839, "step": 371810 }, { "epoch": 0.7510999244496338, "grad_norm": 401.3538513183594, "learning_rate": 1.8718801404718856e-06, "loss": 15.5504, "step": 371820 }, { "epoch": 0.7511201250823176, "grad_norm": 290.0550231933594, "learning_rate": 1.8716078331813459e-06, "loss": 21.7786, "step": 371830 }, { "epoch": 0.7511403257150014, "grad_norm": 801.5999755859375, "learning_rate": 1.8713355411382117e-06, "loss": 20.3351, "step": 371840 }, { "epoch": 0.7511605263476853, "grad_norm": 56.34940719604492, "learning_rate": 1.871063264343807e-06, "loss": 14.2141, "step": 371850 }, { "epoch": 0.751180726980369, "grad_norm": 272.6626281738281, "learning_rate": 1.870791002799462e-06, "loss": 19.8764, "step": 371860 }, { "epoch": 0.7512009276130528, "grad_norm": 316.6409606933594, "learning_rate": 1.8705187565065003e-06, "loss": 19.6626, "step": 371870 }, { "epoch": 0.7512211282457366, "grad_norm": 503.186767578125, "learning_rate": 1.8702465254662527e-06, "loss": 13.1025, "step": 371880 }, { "epoch": 0.7512413288784204, "grad_norm": 190.72361755371094, "learning_rate": 1.8699743096800438e-06, "loss": 18.0219, "step": 371890 }, { "epoch": 0.7512615295111043, "grad_norm": 1024.67822265625, "learning_rate": 1.8697021091491991e-06, "loss": 26.6758, "step": 371900 }, { "epoch": 0.7512817301437881, "grad_norm": 238.12718200683594, "learning_rate": 1.869429923875048e-06, "loss": 19.7015, "step": 371910 }, { "epoch": 0.7513019307764719, "grad_norm": 681.9209594726562, "learning_rate": 1.869157753858914e-06, "loss": 23.4534, "step": 371920 }, { "epoch": 0.7513221314091557, "grad_norm": 86.79539489746094, "learning_rate": 1.8688855991021272e-06, "loss": 18.3068, "step": 371930 }, { "epoch": 0.7513423320418395, "grad_norm": 628.2562866210938, "learning_rate": 1.8686134596060123e-06, "loss": 14.6956, "step": 371940 }, { "epoch": 0.7513625326745234, "grad_norm": 146.2157745361328, "learning_rate": 1.8683413353718937e-06, "loss": 18.6938, "step": 371950 }, { "epoch": 0.7513827333072072, "grad_norm": 1092.339111328125, "learning_rate": 1.8680692264011014e-06, "loss": 20.8303, "step": 371960 }, { "epoch": 0.751402933939891, "grad_norm": 400.3479919433594, "learning_rate": 1.8677971326949602e-06, "loss": 13.7188, "step": 371970 }, { "epoch": 0.7514231345725748, "grad_norm": 281.03680419921875, "learning_rate": 1.867525054254794e-06, "loss": 10.1167, "step": 371980 }, { "epoch": 0.7514433352052586, "grad_norm": 443.2901611328125, "learning_rate": 1.8672529910819305e-06, "loss": 32.2682, "step": 371990 }, { "epoch": 0.7514635358379425, "grad_norm": 372.9542236328125, "learning_rate": 1.8669809431776991e-06, "loss": 31.8958, "step": 372000 }, { "epoch": 0.7514837364706263, "grad_norm": 306.41961669921875, "learning_rate": 1.86670891054342e-06, "loss": 13.3648, "step": 372010 }, { "epoch": 0.7515039371033101, "grad_norm": 335.14239501953125, "learning_rate": 1.8664368931804211e-06, "loss": 10.7412, "step": 372020 }, { "epoch": 0.7515241377359939, "grad_norm": 165.44140625, "learning_rate": 1.8661648910900303e-06, "loss": 24.0779, "step": 372030 }, { "epoch": 0.7515443383686777, "grad_norm": 384.6484069824219, "learning_rate": 1.8658929042735725e-06, "loss": 16.3811, "step": 372040 }, { "epoch": 0.7515645390013616, "grad_norm": 221.1503143310547, "learning_rate": 1.8656209327323704e-06, "loss": 24.0299, "step": 372050 }, { "epoch": 0.7515847396340454, "grad_norm": 505.26861572265625, "learning_rate": 1.8653489764677512e-06, "loss": 13.4609, "step": 372060 }, { "epoch": 0.7516049402667292, "grad_norm": 340.41748046875, "learning_rate": 1.865077035481045e-06, "loss": 15.3707, "step": 372070 }, { "epoch": 0.751625140899413, "grad_norm": 709.1183471679688, "learning_rate": 1.8648051097735697e-06, "loss": 17.3134, "step": 372080 }, { "epoch": 0.7516453415320968, "grad_norm": 372.26153564453125, "learning_rate": 1.8645331993466537e-06, "loss": 11.1038, "step": 372090 }, { "epoch": 0.7516655421647807, "grad_norm": 358.8562927246094, "learning_rate": 1.8642613042016245e-06, "loss": 26.0, "step": 372100 }, { "epoch": 0.7516857427974644, "grad_norm": 435.4578552246094, "learning_rate": 1.8639894243398055e-06, "loss": 13.9236, "step": 372110 }, { "epoch": 0.7517059434301482, "grad_norm": 1.341683030128479, "learning_rate": 1.8637175597625195e-06, "loss": 15.2352, "step": 372120 }, { "epoch": 0.751726144062832, "grad_norm": 226.56320190429688, "learning_rate": 1.8634457104710956e-06, "loss": 11.6611, "step": 372130 }, { "epoch": 0.7517463446955158, "grad_norm": 437.05535888671875, "learning_rate": 1.8631738764668571e-06, "loss": 17.6793, "step": 372140 }, { "epoch": 0.7517665453281996, "grad_norm": 255.4073944091797, "learning_rate": 1.862902057751127e-06, "loss": 8.3148, "step": 372150 }, { "epoch": 0.7517867459608835, "grad_norm": 225.7238006591797, "learning_rate": 1.8626302543252317e-06, "loss": 11.3706, "step": 372160 }, { "epoch": 0.7518069465935673, "grad_norm": 28.877674102783203, "learning_rate": 1.8623584661904976e-06, "loss": 10.5569, "step": 372170 }, { "epoch": 0.7518271472262511, "grad_norm": 248.7672576904297, "learning_rate": 1.862086693348248e-06, "loss": 17.5141, "step": 372180 }, { "epoch": 0.7518473478589349, "grad_norm": 244.1647186279297, "learning_rate": 1.8618149357998055e-06, "loss": 8.8961, "step": 372190 }, { "epoch": 0.7518675484916187, "grad_norm": 394.9271240234375, "learning_rate": 1.8615431935464984e-06, "loss": 17.3364, "step": 372200 }, { "epoch": 0.7518877491243026, "grad_norm": 102.46836853027344, "learning_rate": 1.8612714665896486e-06, "loss": 10.7787, "step": 372210 }, { "epoch": 0.7519079497569864, "grad_norm": 137.36032104492188, "learning_rate": 1.8609997549305792e-06, "loss": 17.4078, "step": 372220 }, { "epoch": 0.7519281503896702, "grad_norm": 0.998528242111206, "learning_rate": 1.8607280585706183e-06, "loss": 25.5399, "step": 372230 }, { "epoch": 0.751948351022354, "grad_norm": 1166.321533203125, "learning_rate": 1.8604563775110868e-06, "loss": 28.0414, "step": 372240 }, { "epoch": 0.7519685516550378, "grad_norm": 163.6189727783203, "learning_rate": 1.8601847117533112e-06, "loss": 15.5508, "step": 372250 }, { "epoch": 0.7519887522877217, "grad_norm": 167.6587677001953, "learning_rate": 1.859913061298615e-06, "loss": 11.8528, "step": 372260 }, { "epoch": 0.7520089529204055, "grad_norm": 169.41229248046875, "learning_rate": 1.8596414261483192e-06, "loss": 14.1934, "step": 372270 }, { "epoch": 0.7520291535530893, "grad_norm": 794.1710815429688, "learning_rate": 1.8593698063037525e-06, "loss": 23.826, "step": 372280 }, { "epoch": 0.7520493541857731, "grad_norm": 183.20425415039062, "learning_rate": 1.8590982017662362e-06, "loss": 24.6968, "step": 372290 }, { "epoch": 0.7520695548184569, "grad_norm": 659.823974609375, "learning_rate": 1.8588266125370929e-06, "loss": 23.0166, "step": 372300 }, { "epoch": 0.7520897554511408, "grad_norm": 427.4971008300781, "learning_rate": 1.8585550386176476e-06, "loss": 14.9007, "step": 372310 }, { "epoch": 0.7521099560838246, "grad_norm": 486.5269775390625, "learning_rate": 1.858283480009226e-06, "loss": 19.578, "step": 372320 }, { "epoch": 0.7521301567165084, "grad_norm": 605.6756591796875, "learning_rate": 1.8580119367131487e-06, "loss": 19.1646, "step": 372330 }, { "epoch": 0.7521503573491922, "grad_norm": 253.02023315429688, "learning_rate": 1.8577404087307394e-06, "loss": 11.5323, "step": 372340 }, { "epoch": 0.752170557981876, "grad_norm": 584.7437133789062, "learning_rate": 1.8574688960633236e-06, "loss": 20.9569, "step": 372350 }, { "epoch": 0.7521907586145599, "grad_norm": 744.9182739257812, "learning_rate": 1.8571973987122233e-06, "loss": 13.9706, "step": 372360 }, { "epoch": 0.7522109592472436, "grad_norm": 220.01039123535156, "learning_rate": 1.85692591667876e-06, "loss": 20.734, "step": 372370 }, { "epoch": 0.7522311598799274, "grad_norm": 663.4364624023438, "learning_rate": 1.8566544499642587e-06, "loss": 22.673, "step": 372380 }, { "epoch": 0.7522513605126112, "grad_norm": 220.8157958984375, "learning_rate": 1.8563829985700444e-06, "loss": 16.6396, "step": 372390 }, { "epoch": 0.752271561145295, "grad_norm": 304.0716247558594, "learning_rate": 1.8561115624974374e-06, "loss": 12.1302, "step": 372400 }, { "epoch": 0.7522917617779789, "grad_norm": 390.99932861328125, "learning_rate": 1.8558401417477602e-06, "loss": 22.9299, "step": 372410 }, { "epoch": 0.7523119624106627, "grad_norm": 851.5274658203125, "learning_rate": 1.855568736322338e-06, "loss": 21.7617, "step": 372420 }, { "epoch": 0.7523321630433465, "grad_norm": 474.37847900390625, "learning_rate": 1.8552973462224926e-06, "loss": 34.2556, "step": 372430 }, { "epoch": 0.7523523636760303, "grad_norm": 908.5614624023438, "learning_rate": 1.8550259714495444e-06, "loss": 26.9689, "step": 372440 }, { "epoch": 0.7523725643087141, "grad_norm": 505.4935302734375, "learning_rate": 1.8547546120048204e-06, "loss": 16.8324, "step": 372450 }, { "epoch": 0.752392764941398, "grad_norm": 653.8145751953125, "learning_rate": 1.854483267889639e-06, "loss": 8.4588, "step": 372460 }, { "epoch": 0.7524129655740818, "grad_norm": 447.92877197265625, "learning_rate": 1.854211939105327e-06, "loss": 18.8001, "step": 372470 }, { "epoch": 0.7524331662067656, "grad_norm": 584.7274169921875, "learning_rate": 1.8539406256532022e-06, "loss": 12.1158, "step": 372480 }, { "epoch": 0.7524533668394494, "grad_norm": 372.8336486816406, "learning_rate": 1.8536693275345908e-06, "loss": 26.4126, "step": 372490 }, { "epoch": 0.7524735674721332, "grad_norm": 447.24200439453125, "learning_rate": 1.8533980447508138e-06, "loss": 36.0696, "step": 372500 }, { "epoch": 0.752493768104817, "grad_norm": 353.8642578125, "learning_rate": 1.8531267773031913e-06, "loss": 16.9937, "step": 372510 }, { "epoch": 0.7525139687375009, "grad_norm": 451.4334411621094, "learning_rate": 1.8528555251930492e-06, "loss": 22.1797, "step": 372520 }, { "epoch": 0.7525341693701847, "grad_norm": 643.447998046875, "learning_rate": 1.8525842884217055e-06, "loss": 21.9101, "step": 372530 }, { "epoch": 0.7525543700028685, "grad_norm": 0.0, "learning_rate": 1.852313066990486e-06, "loss": 13.9883, "step": 372540 }, { "epoch": 0.7525745706355523, "grad_norm": 420.5641784667969, "learning_rate": 1.8520418609007107e-06, "loss": 14.4659, "step": 372550 }, { "epoch": 0.7525947712682362, "grad_norm": 444.2103576660156, "learning_rate": 1.8517706701536998e-06, "loss": 12.8427, "step": 372560 }, { "epoch": 0.75261497190092, "grad_norm": 232.41436767578125, "learning_rate": 1.8514994947507787e-06, "loss": 15.8466, "step": 372570 }, { "epoch": 0.7526351725336038, "grad_norm": 533.5691528320312, "learning_rate": 1.8512283346932675e-06, "loss": 16.6797, "step": 372580 }, { "epoch": 0.7526553731662876, "grad_norm": 678.6180419921875, "learning_rate": 1.8509571899824851e-06, "loss": 26.8828, "step": 372590 }, { "epoch": 0.7526755737989714, "grad_norm": 181.0961151123047, "learning_rate": 1.8506860606197564e-06, "loss": 15.2489, "step": 372600 }, { "epoch": 0.7526957744316553, "grad_norm": 133.0625457763672, "learning_rate": 1.850414946606403e-06, "loss": 19.4113, "step": 372610 }, { "epoch": 0.752715975064339, "grad_norm": 516.4111938476562, "learning_rate": 1.850143847943745e-06, "loss": 25.9688, "step": 372620 }, { "epoch": 0.7527361756970228, "grad_norm": 473.9225769042969, "learning_rate": 1.8498727646331022e-06, "loss": 24.7476, "step": 372630 }, { "epoch": 0.7527563763297066, "grad_norm": 908.9474487304688, "learning_rate": 1.8496016966757996e-06, "loss": 27.9903, "step": 372640 }, { "epoch": 0.7527765769623904, "grad_norm": 126.64356231689453, "learning_rate": 1.8493306440731557e-06, "loss": 14.6101, "step": 372650 }, { "epoch": 0.7527967775950742, "grad_norm": 174.29595947265625, "learning_rate": 1.849059606826491e-06, "loss": 7.9262, "step": 372660 }, { "epoch": 0.7528169782277581, "grad_norm": 580.3689575195312, "learning_rate": 1.8487885849371268e-06, "loss": 22.3703, "step": 372670 }, { "epoch": 0.7528371788604419, "grad_norm": 543.1556396484375, "learning_rate": 1.848517578406389e-06, "loss": 20.5304, "step": 372680 }, { "epoch": 0.7528573794931257, "grad_norm": 374.93121337890625, "learning_rate": 1.8482465872355904e-06, "loss": 22.1513, "step": 372690 }, { "epoch": 0.7528775801258095, "grad_norm": 551.6804809570312, "learning_rate": 1.8479756114260562e-06, "loss": 19.926, "step": 372700 }, { "epoch": 0.7528977807584933, "grad_norm": 397.40899658203125, "learning_rate": 1.847704650979108e-06, "loss": 15.0318, "step": 372710 }, { "epoch": 0.7529179813911772, "grad_norm": 358.8426818847656, "learning_rate": 1.8474337058960646e-06, "loss": 23.3054, "step": 372720 }, { "epoch": 0.752938182023861, "grad_norm": 432.9877014160156, "learning_rate": 1.8471627761782457e-06, "loss": 13.7821, "step": 372730 }, { "epoch": 0.7529583826565448, "grad_norm": 602.4999389648438, "learning_rate": 1.8468918618269749e-06, "loss": 21.9916, "step": 372740 }, { "epoch": 0.7529785832892286, "grad_norm": 267.88726806640625, "learning_rate": 1.8466209628435705e-06, "loss": 14.1686, "step": 372750 }, { "epoch": 0.7529987839219124, "grad_norm": 152.61849975585938, "learning_rate": 1.846350079229351e-06, "loss": 21.7504, "step": 372760 }, { "epoch": 0.7530189845545963, "grad_norm": 404.3066711425781, "learning_rate": 1.846079210985639e-06, "loss": 42.8575, "step": 372770 }, { "epoch": 0.7530391851872801, "grad_norm": 1.7670810222625732, "learning_rate": 1.8458083581137565e-06, "loss": 10.9234, "step": 372780 }, { "epoch": 0.7530593858199639, "grad_norm": 480.45086669921875, "learning_rate": 1.8455375206150212e-06, "loss": 19.2333, "step": 372790 }, { "epoch": 0.7530795864526477, "grad_norm": 336.90460205078125, "learning_rate": 1.8452666984907519e-06, "loss": 11.0934, "step": 372800 }, { "epoch": 0.7530997870853315, "grad_norm": 764.4038696289062, "learning_rate": 1.8449958917422712e-06, "loss": 18.3545, "step": 372810 }, { "epoch": 0.7531199877180154, "grad_norm": 1.3576780557632446, "learning_rate": 1.8447251003708982e-06, "loss": 22.2995, "step": 372820 }, { "epoch": 0.7531401883506992, "grad_norm": 146.26312255859375, "learning_rate": 1.8444543243779512e-06, "loss": 35.3476, "step": 372830 }, { "epoch": 0.753160388983383, "grad_norm": 342.9661865234375, "learning_rate": 1.844183563764752e-06, "loss": 12.6528, "step": 372840 }, { "epoch": 0.7531805896160668, "grad_norm": 553.3638916015625, "learning_rate": 1.8439128185326183e-06, "loss": 22.5289, "step": 372850 }, { "epoch": 0.7532007902487506, "grad_norm": 85.49042510986328, "learning_rate": 1.843642088682872e-06, "loss": 24.871, "step": 372860 }, { "epoch": 0.7532209908814345, "grad_norm": 363.81671142578125, "learning_rate": 1.8433713742168313e-06, "loss": 13.3764, "step": 372870 }, { "epoch": 0.7532411915141182, "grad_norm": 103.99618530273438, "learning_rate": 1.8431006751358143e-06, "loss": 14.6951, "step": 372880 }, { "epoch": 0.753261392146802, "grad_norm": 136.24386596679688, "learning_rate": 1.8428299914411435e-06, "loss": 16.6602, "step": 372890 }, { "epoch": 0.7532815927794858, "grad_norm": 310.4085998535156, "learning_rate": 1.842559323134136e-06, "loss": 15.7493, "step": 372900 }, { "epoch": 0.7533017934121696, "grad_norm": 249.01113891601562, "learning_rate": 1.8422886702161098e-06, "loss": 24.468, "step": 372910 }, { "epoch": 0.7533219940448534, "grad_norm": 352.1138000488281, "learning_rate": 1.8420180326883857e-06, "loss": 31.5453, "step": 372920 }, { "epoch": 0.7533421946775373, "grad_norm": 254.39141845703125, "learning_rate": 1.8417474105522849e-06, "loss": 20.3214, "step": 372930 }, { "epoch": 0.7533623953102211, "grad_norm": 494.2662658691406, "learning_rate": 1.8414768038091235e-06, "loss": 9.3419, "step": 372940 }, { "epoch": 0.7533825959429049, "grad_norm": 141.86717224121094, "learning_rate": 1.8412062124602192e-06, "loss": 19.459, "step": 372950 }, { "epoch": 0.7534027965755887, "grad_norm": 585.3483276367188, "learning_rate": 1.8409356365068947e-06, "loss": 30.1718, "step": 372960 }, { "epoch": 0.7534229972082725, "grad_norm": 661.2196044921875, "learning_rate": 1.8406650759504667e-06, "loss": 13.9914, "step": 372970 }, { "epoch": 0.7534431978409564, "grad_norm": 122.52154541015625, "learning_rate": 1.8403945307922526e-06, "loss": 13.9282, "step": 372980 }, { "epoch": 0.7534633984736402, "grad_norm": 907.5955810546875, "learning_rate": 1.8401240010335725e-06, "loss": 19.1787, "step": 372990 }, { "epoch": 0.753483599106324, "grad_norm": 313.92523193359375, "learning_rate": 1.8398534866757455e-06, "loss": 15.3005, "step": 373000 }, { "epoch": 0.7535037997390078, "grad_norm": 64.90787506103516, "learning_rate": 1.8395829877200904e-06, "loss": 15.5811, "step": 373010 }, { "epoch": 0.7535240003716916, "grad_norm": 997.3680419921875, "learning_rate": 1.8393125041679221e-06, "loss": 15.6607, "step": 373020 }, { "epoch": 0.7535442010043755, "grad_norm": 480.28204345703125, "learning_rate": 1.8390420360205635e-06, "loss": 13.6431, "step": 373030 }, { "epoch": 0.7535644016370593, "grad_norm": 125.03921508789062, "learning_rate": 1.83877158327933e-06, "loss": 7.5408, "step": 373040 }, { "epoch": 0.7535846022697431, "grad_norm": 580.4336547851562, "learning_rate": 1.8385011459455394e-06, "loss": 15.5241, "step": 373050 }, { "epoch": 0.7536048029024269, "grad_norm": 108.73404693603516, "learning_rate": 1.83823072402051e-06, "loss": 17.1433, "step": 373060 }, { "epoch": 0.7536250035351107, "grad_norm": 793.8583984375, "learning_rate": 1.8379603175055628e-06, "loss": 18.2797, "step": 373070 }, { "epoch": 0.7536452041677946, "grad_norm": 4.538880348205566, "learning_rate": 1.8376899264020138e-06, "loss": 34.743, "step": 373080 }, { "epoch": 0.7536654048004784, "grad_norm": 658.1224365234375, "learning_rate": 1.8374195507111781e-06, "loss": 19.0254, "step": 373090 }, { "epoch": 0.7536856054331622, "grad_norm": 216.00830078125, "learning_rate": 1.837149190434378e-06, "loss": 18.4801, "step": 373100 }, { "epoch": 0.753705806065846, "grad_norm": 600.862060546875, "learning_rate": 1.8368788455729292e-06, "loss": 34.0035, "step": 373110 }, { "epoch": 0.7537260066985298, "grad_norm": 495.9053649902344, "learning_rate": 1.8366085161281477e-06, "loss": 16.4508, "step": 373120 }, { "epoch": 0.7537462073312137, "grad_norm": 495.1671447753906, "learning_rate": 1.8363382021013536e-06, "loss": 12.3908, "step": 373130 }, { "epoch": 0.7537664079638974, "grad_norm": 1.0381710529327393, "learning_rate": 1.8360679034938628e-06, "loss": 21.47, "step": 373140 }, { "epoch": 0.7537866085965812, "grad_norm": 365.1124267578125, "learning_rate": 1.8357976203069943e-06, "loss": 18.5855, "step": 373150 }, { "epoch": 0.753806809229265, "grad_norm": 541.2918090820312, "learning_rate": 1.8355273525420642e-06, "loss": 26.7612, "step": 373160 }, { "epoch": 0.7538270098619488, "grad_norm": 779.7386474609375, "learning_rate": 1.8352571002003888e-06, "loss": 22.4891, "step": 373170 }, { "epoch": 0.7538472104946327, "grad_norm": 611.7274169921875, "learning_rate": 1.834986863283288e-06, "loss": 11.2856, "step": 373180 }, { "epoch": 0.7538674111273165, "grad_norm": 85.95925903320312, "learning_rate": 1.8347166417920776e-06, "loss": 20.2332, "step": 373190 }, { "epoch": 0.7538876117600003, "grad_norm": 201.36541748046875, "learning_rate": 1.8344464357280722e-06, "loss": 11.7059, "step": 373200 }, { "epoch": 0.7539078123926841, "grad_norm": 306.60211181640625, "learning_rate": 1.834176245092591e-06, "loss": 17.1396, "step": 373210 }, { "epoch": 0.7539280130253679, "grad_norm": 374.44140625, "learning_rate": 1.8339060698869526e-06, "loss": 25.0919, "step": 373220 }, { "epoch": 0.7539482136580518, "grad_norm": 293.8785400390625, "learning_rate": 1.8336359101124724e-06, "loss": 8.6249, "step": 373230 }, { "epoch": 0.7539684142907356, "grad_norm": 530.725341796875, "learning_rate": 1.8333657657704645e-06, "loss": 13.0651, "step": 373240 }, { "epoch": 0.7539886149234194, "grad_norm": 324.19317626953125, "learning_rate": 1.8330956368622498e-06, "loss": 27.7408, "step": 373250 }, { "epoch": 0.7540088155561032, "grad_norm": 556.4000244140625, "learning_rate": 1.8328255233891428e-06, "loss": 12.6205, "step": 373260 }, { "epoch": 0.754029016188787, "grad_norm": 425.33197021484375, "learning_rate": 1.8325554253524585e-06, "loss": 16.1365, "step": 373270 }, { "epoch": 0.7540492168214709, "grad_norm": 401.85260009765625, "learning_rate": 1.8322853427535148e-06, "loss": 12.5885, "step": 373280 }, { "epoch": 0.7540694174541547, "grad_norm": 236.56997680664062, "learning_rate": 1.832015275593631e-06, "loss": 8.9223, "step": 373290 }, { "epoch": 0.7540896180868385, "grad_norm": 170.80523681640625, "learning_rate": 1.831745223874118e-06, "loss": 17.5782, "step": 373300 }, { "epoch": 0.7541098187195223, "grad_norm": 1027.2984619140625, "learning_rate": 1.8314751875962939e-06, "loss": 34.7551, "step": 373310 }, { "epoch": 0.7541300193522061, "grad_norm": 741.0579223632812, "learning_rate": 1.8312051667614772e-06, "loss": 27.0635, "step": 373320 }, { "epoch": 0.75415021998489, "grad_norm": 385.8459777832031, "learning_rate": 1.8309351613709825e-06, "loss": 25.9867, "step": 373330 }, { "epoch": 0.7541704206175738, "grad_norm": 126.38776397705078, "learning_rate": 1.8306651714261237e-06, "loss": 22.6648, "step": 373340 }, { "epoch": 0.7541906212502576, "grad_norm": 412.8068542480469, "learning_rate": 1.8303951969282202e-06, "loss": 17.6643, "step": 373350 }, { "epoch": 0.7542108218829414, "grad_norm": 544.8564453125, "learning_rate": 1.8301252378785856e-06, "loss": 15.0653, "step": 373360 }, { "epoch": 0.7542310225156252, "grad_norm": 284.7385559082031, "learning_rate": 1.8298552942785352e-06, "loss": 27.7315, "step": 373370 }, { "epoch": 0.7542512231483091, "grad_norm": 386.16339111328125, "learning_rate": 1.829585366129385e-06, "loss": 14.34, "step": 373380 }, { "epoch": 0.7542714237809928, "grad_norm": 339.692138671875, "learning_rate": 1.8293154534324531e-06, "loss": 19.847, "step": 373390 }, { "epoch": 0.7542916244136766, "grad_norm": 148.24313354492188, "learning_rate": 1.829045556189053e-06, "loss": 26.0274, "step": 373400 }, { "epoch": 0.7543118250463604, "grad_norm": 588.754638671875, "learning_rate": 1.8287756744004986e-06, "loss": 42.3484, "step": 373410 }, { "epoch": 0.7543320256790442, "grad_norm": 226.2706298828125, "learning_rate": 1.828505808068109e-06, "loss": 9.4534, "step": 373420 }, { "epoch": 0.754352226311728, "grad_norm": 75.25081634521484, "learning_rate": 1.8282359571931968e-06, "loss": 7.8139, "step": 373430 }, { "epoch": 0.7543724269444119, "grad_norm": 184.63485717773438, "learning_rate": 1.8279661217770766e-06, "loss": 12.659, "step": 373440 }, { "epoch": 0.7543926275770957, "grad_norm": 299.3465270996094, "learning_rate": 1.8276963018210664e-06, "loss": 15.0745, "step": 373450 }, { "epoch": 0.7544128282097795, "grad_norm": 85.56561279296875, "learning_rate": 1.8274264973264782e-06, "loss": 20.9809, "step": 373460 }, { "epoch": 0.7544330288424633, "grad_norm": 418.154296875, "learning_rate": 1.8271567082946302e-06, "loss": 23.4097, "step": 373470 }, { "epoch": 0.7544532294751471, "grad_norm": 223.2666015625, "learning_rate": 1.8268869347268348e-06, "loss": 16.1004, "step": 373480 }, { "epoch": 0.754473430107831, "grad_norm": 652.4942626953125, "learning_rate": 1.8266171766244067e-06, "loss": 21.3322, "step": 373490 }, { "epoch": 0.7544936307405148, "grad_norm": 722.2883911132812, "learning_rate": 1.8263474339886628e-06, "loss": 15.3794, "step": 373500 }, { "epoch": 0.7545138313731986, "grad_norm": 6.032987594604492, "learning_rate": 1.8260777068209168e-06, "loss": 8.9061, "step": 373510 }, { "epoch": 0.7545340320058824, "grad_norm": 2.141902446746826, "learning_rate": 1.8258079951224816e-06, "loss": 13.8676, "step": 373520 }, { "epoch": 0.7545542326385662, "grad_norm": 54.33201599121094, "learning_rate": 1.825538298894673e-06, "loss": 23.2468, "step": 373530 }, { "epoch": 0.7545744332712501, "grad_norm": 623.4483032226562, "learning_rate": 1.825268618138808e-06, "loss": 20.8951, "step": 373540 }, { "epoch": 0.7545946339039339, "grad_norm": 263.5401916503906, "learning_rate": 1.824998952856198e-06, "loss": 17.8034, "step": 373550 }, { "epoch": 0.7546148345366177, "grad_norm": 540.0736694335938, "learning_rate": 1.8247293030481568e-06, "loss": 22.1899, "step": 373560 }, { "epoch": 0.7546350351693015, "grad_norm": 550.5327758789062, "learning_rate": 1.824459668716001e-06, "loss": 25.1992, "step": 373570 }, { "epoch": 0.7546552358019853, "grad_norm": 517.9869995117188, "learning_rate": 1.8241900498610438e-06, "loss": 20.0201, "step": 373580 }, { "epoch": 0.7546754364346692, "grad_norm": 290.59112548828125, "learning_rate": 1.8239204464845978e-06, "loss": 15.7157, "step": 373590 }, { "epoch": 0.754695637067353, "grad_norm": 101.77208709716797, "learning_rate": 1.8236508585879781e-06, "loss": 19.5796, "step": 373600 }, { "epoch": 0.7547158377000368, "grad_norm": 542.2976684570312, "learning_rate": 1.8233812861725002e-06, "loss": 11.8657, "step": 373610 }, { "epoch": 0.7547360383327206, "grad_norm": 47.22188949584961, "learning_rate": 1.8231117292394772e-06, "loss": 16.9453, "step": 373620 }, { "epoch": 0.7547562389654044, "grad_norm": 200.20924377441406, "learning_rate": 1.8228421877902203e-06, "loss": 19.2468, "step": 373630 }, { "epoch": 0.7547764395980883, "grad_norm": 533.7518310546875, "learning_rate": 1.822572661826047e-06, "loss": 26.7474, "step": 373640 }, { "epoch": 0.754796640230772, "grad_norm": 244.7967529296875, "learning_rate": 1.8223031513482692e-06, "loss": 10.8895, "step": 373650 }, { "epoch": 0.7548168408634558, "grad_norm": 307.4630432128906, "learning_rate": 1.8220336563581986e-06, "loss": 18.4101, "step": 373660 }, { "epoch": 0.7548370414961396, "grad_norm": 408.6343688964844, "learning_rate": 1.821764176857151e-06, "loss": 23.4212, "step": 373670 }, { "epoch": 0.7548572421288234, "grad_norm": 285.11029052734375, "learning_rate": 1.8214947128464406e-06, "loss": 12.0743, "step": 373680 }, { "epoch": 0.7548774427615073, "grad_norm": 551.1350708007812, "learning_rate": 1.8212252643273797e-06, "loss": 29.6394, "step": 373690 }, { "epoch": 0.7548976433941911, "grad_norm": 254.38385009765625, "learning_rate": 1.8209558313012792e-06, "loss": 23.6601, "step": 373700 }, { "epoch": 0.7549178440268749, "grad_norm": 602.5098266601562, "learning_rate": 1.8206864137694563e-06, "loss": 13.4353, "step": 373710 }, { "epoch": 0.7549380446595587, "grad_norm": 372.99005126953125, "learning_rate": 1.8204170117332226e-06, "loss": 20.7797, "step": 373720 }, { "epoch": 0.7549582452922425, "grad_norm": 300.43499755859375, "learning_rate": 1.8201476251938888e-06, "loss": 26.6204, "step": 373730 }, { "epoch": 0.7549784459249264, "grad_norm": 646.2213134765625, "learning_rate": 1.8198782541527715e-06, "loss": 12.8761, "step": 373740 }, { "epoch": 0.7549986465576102, "grad_norm": 294.9193420410156, "learning_rate": 1.8196088986111798e-06, "loss": 20.0258, "step": 373750 }, { "epoch": 0.755018847190294, "grad_norm": 283.4877624511719, "learning_rate": 1.819339558570431e-06, "loss": 6.8631, "step": 373760 }, { "epoch": 0.7550390478229778, "grad_norm": 209.648681640625, "learning_rate": 1.819070234031835e-06, "loss": 15.8996, "step": 373770 }, { "epoch": 0.7550592484556616, "grad_norm": 34.476966857910156, "learning_rate": 1.818800924996703e-06, "loss": 18.4766, "step": 373780 }, { "epoch": 0.7550794490883455, "grad_norm": 311.11376953125, "learning_rate": 1.8185316314663515e-06, "loss": 26.3973, "step": 373790 }, { "epoch": 0.7550996497210293, "grad_norm": 318.308349609375, "learning_rate": 1.8182623534420906e-06, "loss": 21.0494, "step": 373800 }, { "epoch": 0.7551198503537131, "grad_norm": 307.3672790527344, "learning_rate": 1.817993090925232e-06, "loss": 7.7701, "step": 373810 }, { "epoch": 0.7551400509863969, "grad_norm": 565.0277709960938, "learning_rate": 1.8177238439170885e-06, "loss": 11.7383, "step": 373820 }, { "epoch": 0.7551602516190807, "grad_norm": 573.1658325195312, "learning_rate": 1.8174546124189752e-06, "loss": 16.0305, "step": 373830 }, { "epoch": 0.7551804522517646, "grad_norm": 95.29529571533203, "learning_rate": 1.8171853964322016e-06, "loss": 9.8451, "step": 373840 }, { "epoch": 0.7552006528844484, "grad_norm": 429.97845458984375, "learning_rate": 1.8169161959580795e-06, "loss": 17.8444, "step": 373850 }, { "epoch": 0.7552208535171322, "grad_norm": 385.33087158203125, "learning_rate": 1.816647010997923e-06, "loss": 18.6532, "step": 373860 }, { "epoch": 0.755241054149816, "grad_norm": 267.79779052734375, "learning_rate": 1.8163778415530425e-06, "loss": 22.2014, "step": 373870 }, { "epoch": 0.7552612547824998, "grad_norm": 199.0067596435547, "learning_rate": 1.8161086876247492e-06, "loss": 29.8439, "step": 373880 }, { "epoch": 0.7552814554151837, "grad_norm": 215.98484802246094, "learning_rate": 1.8158395492143555e-06, "loss": 18.1653, "step": 373890 }, { "epoch": 0.7553016560478674, "grad_norm": 755.5444946289062, "learning_rate": 1.8155704263231777e-06, "loss": 36.9149, "step": 373900 }, { "epoch": 0.7553218566805512, "grad_norm": 54.62074661254883, "learning_rate": 1.8153013189525192e-06, "loss": 23.4045, "step": 373910 }, { "epoch": 0.755342057313235, "grad_norm": 573.7791748046875, "learning_rate": 1.8150322271036962e-06, "loss": 42.773, "step": 373920 }, { "epoch": 0.7553622579459188, "grad_norm": 394.81207275390625, "learning_rate": 1.8147631507780217e-06, "loss": 23.2269, "step": 373930 }, { "epoch": 0.7553824585786026, "grad_norm": 72.78643035888672, "learning_rate": 1.814494089976805e-06, "loss": 19.492, "step": 373940 }, { "epoch": 0.7554026592112865, "grad_norm": 352.05078125, "learning_rate": 1.8142250447013566e-06, "loss": 18.0564, "step": 373950 }, { "epoch": 0.7554228598439703, "grad_norm": 692.9324951171875, "learning_rate": 1.81395601495299e-06, "loss": 17.2259, "step": 373960 }, { "epoch": 0.7554430604766541, "grad_norm": 429.7369384765625, "learning_rate": 1.8136870007330155e-06, "loss": 9.6317, "step": 373970 }, { "epoch": 0.7554632611093379, "grad_norm": 206.9267120361328, "learning_rate": 1.8134180020427423e-06, "loss": 37.7886, "step": 373980 }, { "epoch": 0.7554834617420217, "grad_norm": 0.6846423149108887, "learning_rate": 1.8131490188834837e-06, "loss": 8.6686, "step": 373990 }, { "epoch": 0.7555036623747056, "grad_norm": 151.581298828125, "learning_rate": 1.8128800512565514e-06, "loss": 10.8003, "step": 374000 }, { "epoch": 0.7555238630073894, "grad_norm": 380.9773254394531, "learning_rate": 1.8126110991632556e-06, "loss": 12.3034, "step": 374010 }, { "epoch": 0.7555440636400732, "grad_norm": 272.1925354003906, "learning_rate": 1.8123421626049048e-06, "loss": 14.0785, "step": 374020 }, { "epoch": 0.755564264272757, "grad_norm": 874.1373901367188, "learning_rate": 1.8120732415828135e-06, "loss": 14.6445, "step": 374030 }, { "epoch": 0.7555844649054408, "grad_norm": 188.32449340820312, "learning_rate": 1.8118043360982906e-06, "loss": 14.4632, "step": 374040 }, { "epoch": 0.7556046655381247, "grad_norm": 297.5085144042969, "learning_rate": 1.8115354461526453e-06, "loss": 26.6521, "step": 374050 }, { "epoch": 0.7556248661708085, "grad_norm": 246.0274658203125, "learning_rate": 1.8112665717471905e-06, "loss": 23.6608, "step": 374060 }, { "epoch": 0.7556450668034923, "grad_norm": 280.06146240234375, "learning_rate": 1.8109977128832346e-06, "loss": 27.8095, "step": 374070 }, { "epoch": 0.7556652674361761, "grad_norm": 387.3278503417969, "learning_rate": 1.8107288695620905e-06, "loss": 21.2384, "step": 374080 }, { "epoch": 0.75568546806886, "grad_norm": 631.174072265625, "learning_rate": 1.810460041785067e-06, "loss": 9.841, "step": 374090 }, { "epoch": 0.7557056687015438, "grad_norm": 481.58599853515625, "learning_rate": 1.810191229553473e-06, "loss": 21.0722, "step": 374100 }, { "epoch": 0.7557258693342276, "grad_norm": 424.12042236328125, "learning_rate": 1.8099224328686216e-06, "loss": 30.1242, "step": 374110 }, { "epoch": 0.7557460699669114, "grad_norm": 471.8075256347656, "learning_rate": 1.8096536517318196e-06, "loss": 20.2933, "step": 374120 }, { "epoch": 0.7557662705995952, "grad_norm": 582.2421875, "learning_rate": 1.8093848861443802e-06, "loss": 11.7692, "step": 374130 }, { "epoch": 0.755786471232279, "grad_norm": 490.2405090332031, "learning_rate": 1.809116136107611e-06, "loss": 17.1522, "step": 374140 }, { "epoch": 0.7558066718649629, "grad_norm": 642.3689575195312, "learning_rate": 1.8088474016228236e-06, "loss": 38.366, "step": 374150 }, { "epoch": 0.7558268724976466, "grad_norm": 207.1797637939453, "learning_rate": 1.808578682691327e-06, "loss": 9.4646, "step": 374160 }, { "epoch": 0.7558470731303304, "grad_norm": 464.04241943359375, "learning_rate": 1.8083099793144299e-06, "loss": 34.519, "step": 374170 }, { "epoch": 0.7558672737630142, "grad_norm": 336.1421203613281, "learning_rate": 1.8080412914934436e-06, "loss": 11.8767, "step": 374180 }, { "epoch": 0.755887474395698, "grad_norm": 266.2442626953125, "learning_rate": 1.8077726192296774e-06, "loss": 10.2188, "step": 374190 }, { "epoch": 0.7559076750283819, "grad_norm": 140.14678955078125, "learning_rate": 1.807503962524439e-06, "loss": 10.7977, "step": 374200 }, { "epoch": 0.7559278756610657, "grad_norm": 249.89939880371094, "learning_rate": 1.8072353213790383e-06, "loss": 17.3283, "step": 374210 }, { "epoch": 0.7559480762937495, "grad_norm": 184.39439392089844, "learning_rate": 1.8069666957947873e-06, "loss": 13.4435, "step": 374220 }, { "epoch": 0.7559682769264333, "grad_norm": 99.02581024169922, "learning_rate": 1.8066980857729937e-06, "loss": 16.3724, "step": 374230 }, { "epoch": 0.7559884775591171, "grad_norm": 76.87078094482422, "learning_rate": 1.8064294913149645e-06, "loss": 14.3106, "step": 374240 }, { "epoch": 0.756008678191801, "grad_norm": 746.1737060546875, "learning_rate": 1.806160912422012e-06, "loss": 17.5373, "step": 374250 }, { "epoch": 0.7560288788244848, "grad_norm": 147.80593872070312, "learning_rate": 1.8058923490954443e-06, "loss": 9.2528, "step": 374260 }, { "epoch": 0.7560490794571686, "grad_norm": 408.75677490234375, "learning_rate": 1.8056238013365679e-06, "loss": 22.3378, "step": 374270 }, { "epoch": 0.7560692800898524, "grad_norm": 53.84675979614258, "learning_rate": 1.8053552691466936e-06, "loss": 15.5796, "step": 374280 }, { "epoch": 0.7560894807225362, "grad_norm": 541.00830078125, "learning_rate": 1.805086752527132e-06, "loss": 14.2719, "step": 374290 }, { "epoch": 0.75610968135522, "grad_norm": 1040.6136474609375, "learning_rate": 1.8048182514791901e-06, "loss": 26.303, "step": 374300 }, { "epoch": 0.7561298819879039, "grad_norm": 5.465455532073975, "learning_rate": 1.804549766004175e-06, "loss": 20.0012, "step": 374310 }, { "epoch": 0.7561500826205877, "grad_norm": 313.8343200683594, "learning_rate": 1.8042812961033983e-06, "loss": 12.539, "step": 374320 }, { "epoch": 0.7561702832532715, "grad_norm": 0.7055712342262268, "learning_rate": 1.8040128417781672e-06, "loss": 17.5081, "step": 374330 }, { "epoch": 0.7561904838859553, "grad_norm": 81.33342742919922, "learning_rate": 1.8037444030297878e-06, "loss": 25.3508, "step": 374340 }, { "epoch": 0.7562106845186392, "grad_norm": 764.7306518554688, "learning_rate": 1.8034759798595724e-06, "loss": 17.5529, "step": 374350 }, { "epoch": 0.756230885151323, "grad_norm": 490.69293212890625, "learning_rate": 1.803207572268826e-06, "loss": 10.7631, "step": 374360 }, { "epoch": 0.7562510857840068, "grad_norm": 157.39866638183594, "learning_rate": 1.8029391802588598e-06, "loss": 17.3602, "step": 374370 }, { "epoch": 0.7562712864166906, "grad_norm": 1128.905029296875, "learning_rate": 1.8026708038309797e-06, "loss": 16.8336, "step": 374380 }, { "epoch": 0.7562914870493744, "grad_norm": 389.259033203125, "learning_rate": 1.8024024429864928e-06, "loss": 10.3319, "step": 374390 }, { "epoch": 0.7563116876820583, "grad_norm": 313.4371032714844, "learning_rate": 1.8021340977267104e-06, "loss": 15.9083, "step": 374400 }, { "epoch": 0.756331888314742, "grad_norm": 1055.1304931640625, "learning_rate": 1.801865768052939e-06, "loss": 29.6975, "step": 374410 }, { "epoch": 0.7563520889474258, "grad_norm": 490.5217590332031, "learning_rate": 1.8015974539664839e-06, "loss": 18.6402, "step": 374420 }, { "epoch": 0.7563722895801096, "grad_norm": 279.38360595703125, "learning_rate": 1.8013291554686547e-06, "loss": 14.5644, "step": 374430 }, { "epoch": 0.7563924902127934, "grad_norm": 394.5645446777344, "learning_rate": 1.801060872560761e-06, "loss": 13.438, "step": 374440 }, { "epoch": 0.7564126908454772, "grad_norm": 100.2540054321289, "learning_rate": 1.800792605244109e-06, "loss": 13.5408, "step": 374450 }, { "epoch": 0.7564328914781611, "grad_norm": 129.83828735351562, "learning_rate": 1.8005243535200034e-06, "loss": 22.0487, "step": 374460 }, { "epoch": 0.7564530921108449, "grad_norm": 660.2085571289062, "learning_rate": 1.8002561173897564e-06, "loss": 12.8716, "step": 374470 }, { "epoch": 0.7564732927435287, "grad_norm": 0.35584282875061035, "learning_rate": 1.7999878968546724e-06, "loss": 17.5631, "step": 374480 }, { "epoch": 0.7564934933762125, "grad_norm": 427.03399658203125, "learning_rate": 1.7997196919160582e-06, "loss": 16.2899, "step": 374490 }, { "epoch": 0.7565136940088963, "grad_norm": 77.68339538574219, "learning_rate": 1.799451502575222e-06, "loss": 12.8195, "step": 374500 }, { "epoch": 0.7565338946415802, "grad_norm": 712.9091796875, "learning_rate": 1.7991833288334742e-06, "loss": 18.7816, "step": 374510 }, { "epoch": 0.756554095274264, "grad_norm": 238.63438415527344, "learning_rate": 1.7989151706921155e-06, "loss": 12.6859, "step": 374520 }, { "epoch": 0.7565742959069478, "grad_norm": 777.8014526367188, "learning_rate": 1.7986470281524555e-06, "loss": 18.9677, "step": 374530 }, { "epoch": 0.7565944965396316, "grad_norm": 615.5677490234375, "learning_rate": 1.7983789012158037e-06, "loss": 16.7531, "step": 374540 }, { "epoch": 0.7566146971723154, "grad_norm": 430.4071044921875, "learning_rate": 1.798110789883465e-06, "loss": 20.0846, "step": 374550 }, { "epoch": 0.7566348978049993, "grad_norm": 11.2094087600708, "learning_rate": 1.7978426941567435e-06, "loss": 11.201, "step": 374560 }, { "epoch": 0.7566550984376831, "grad_norm": 267.1214599609375, "learning_rate": 1.7975746140369505e-06, "loss": 16.2487, "step": 374570 }, { "epoch": 0.7566752990703669, "grad_norm": 454.36309814453125, "learning_rate": 1.7973065495253905e-06, "loss": 21.9082, "step": 374580 }, { "epoch": 0.7566954997030507, "grad_norm": 214.96356201171875, "learning_rate": 1.7970385006233682e-06, "loss": 15.3024, "step": 374590 }, { "epoch": 0.7567157003357345, "grad_norm": 288.4180908203125, "learning_rate": 1.7967704673321917e-06, "loss": 25.1004, "step": 374600 }, { "epoch": 0.7567359009684184, "grad_norm": 213.95083618164062, "learning_rate": 1.796502449653169e-06, "loss": 11.2068, "step": 374610 }, { "epoch": 0.7567561016011022, "grad_norm": 280.2714538574219, "learning_rate": 1.7962344475876054e-06, "loss": 13.2888, "step": 374620 }, { "epoch": 0.756776302233786, "grad_norm": 158.43423461914062, "learning_rate": 1.7959664611368043e-06, "loss": 20.3864, "step": 374630 }, { "epoch": 0.7567965028664698, "grad_norm": 419.7962951660156, "learning_rate": 1.7956984903020757e-06, "loss": 23.4935, "step": 374640 }, { "epoch": 0.7568167034991536, "grad_norm": 701.8889770507812, "learning_rate": 1.7954305350847246e-06, "loss": 27.9078, "step": 374650 }, { "epoch": 0.7568369041318375, "grad_norm": 245.12318420410156, "learning_rate": 1.7951625954860541e-06, "loss": 18.8585, "step": 374660 }, { "epoch": 0.7568571047645212, "grad_norm": 487.855224609375, "learning_rate": 1.7948946715073744e-06, "loss": 19.2059, "step": 374670 }, { "epoch": 0.756877305397205, "grad_norm": 0.0, "learning_rate": 1.7946267631499874e-06, "loss": 29.6986, "step": 374680 }, { "epoch": 0.7568975060298888, "grad_norm": 488.7209777832031, "learning_rate": 1.7943588704152033e-06, "loss": 17.4271, "step": 374690 }, { "epoch": 0.7569177066625726, "grad_norm": 276.59039306640625, "learning_rate": 1.7940909933043243e-06, "loss": 15.1514, "step": 374700 }, { "epoch": 0.7569379072952565, "grad_norm": 526.3798828125, "learning_rate": 1.7938231318186555e-06, "loss": 18.1403, "step": 374710 }, { "epoch": 0.7569581079279403, "grad_norm": 0.0, "learning_rate": 1.7935552859595058e-06, "loss": 14.2099, "step": 374720 }, { "epoch": 0.7569783085606241, "grad_norm": 476.27899169921875, "learning_rate": 1.793287455728177e-06, "loss": 31.5622, "step": 374730 }, { "epoch": 0.7569985091933079, "grad_norm": 487.2760009765625, "learning_rate": 1.7930196411259782e-06, "loss": 23.4855, "step": 374740 }, { "epoch": 0.7570187098259917, "grad_norm": 281.9328918457031, "learning_rate": 1.7927518421542106e-06, "loss": 18.0875, "step": 374750 }, { "epoch": 0.7570389104586756, "grad_norm": 311.88751220703125, "learning_rate": 1.7924840588141829e-06, "loss": 9.8626, "step": 374760 }, { "epoch": 0.7570591110913594, "grad_norm": 582.0269775390625, "learning_rate": 1.7922162911071993e-06, "loss": 18.8568, "step": 374770 }, { "epoch": 0.7570793117240432, "grad_norm": 459.2454833984375, "learning_rate": 1.7919485390345631e-06, "loss": 22.8015, "step": 374780 }, { "epoch": 0.757099512356727, "grad_norm": 343.797607421875, "learning_rate": 1.791680802597582e-06, "loss": 13.3459, "step": 374790 }, { "epoch": 0.7571197129894108, "grad_norm": 302.8917236328125, "learning_rate": 1.7914130817975595e-06, "loss": 18.7183, "step": 374800 }, { "epoch": 0.7571399136220947, "grad_norm": 764.393798828125, "learning_rate": 1.7911453766357983e-06, "loss": 28.0872, "step": 374810 }, { "epoch": 0.7571601142547785, "grad_norm": 26.692785263061523, "learning_rate": 1.7908776871136063e-06, "loss": 30.2047, "step": 374820 }, { "epoch": 0.7571803148874623, "grad_norm": 113.09803771972656, "learning_rate": 1.7906100132322902e-06, "loss": 12.2912, "step": 374830 }, { "epoch": 0.7572005155201461, "grad_norm": 238.6380615234375, "learning_rate": 1.7903423549931482e-06, "loss": 15.993, "step": 374840 }, { "epoch": 0.7572207161528299, "grad_norm": 149.5492401123047, "learning_rate": 1.7900747123974882e-06, "loss": 37.5172, "step": 374850 }, { "epoch": 0.7572409167855138, "grad_norm": 377.97052001953125, "learning_rate": 1.7898070854466165e-06, "loss": 17.2467, "step": 374860 }, { "epoch": 0.7572611174181976, "grad_norm": 868.7412109375, "learning_rate": 1.7895394741418355e-06, "loss": 32.1399, "step": 374870 }, { "epoch": 0.7572813180508814, "grad_norm": 453.3160705566406, "learning_rate": 1.7892718784844482e-06, "loss": 14.5478, "step": 374880 }, { "epoch": 0.7573015186835652, "grad_norm": 242.47076416015625, "learning_rate": 1.7890042984757605e-06, "loss": 31.7516, "step": 374890 }, { "epoch": 0.757321719316249, "grad_norm": 257.50445556640625, "learning_rate": 1.7887367341170781e-06, "loss": 14.959, "step": 374900 }, { "epoch": 0.7573419199489329, "grad_norm": 208.0048065185547, "learning_rate": 1.788469185409703e-06, "loss": 20.22, "step": 374910 }, { "epoch": 0.7573621205816167, "grad_norm": 97.57205200195312, "learning_rate": 1.7882016523549383e-06, "loss": 12.312, "step": 374920 }, { "epoch": 0.7573823212143004, "grad_norm": 493.3318786621094, "learning_rate": 1.7879341349540907e-06, "loss": 23.5748, "step": 374930 }, { "epoch": 0.7574025218469842, "grad_norm": 191.0437469482422, "learning_rate": 1.787666633208463e-06, "loss": 13.13, "step": 374940 }, { "epoch": 0.757422722479668, "grad_norm": 291.8238525390625, "learning_rate": 1.7873991471193563e-06, "loss": 34.0035, "step": 374950 }, { "epoch": 0.7574429231123518, "grad_norm": 245.30130004882812, "learning_rate": 1.787131676688078e-06, "loss": 28.5045, "step": 374960 }, { "epoch": 0.7574631237450357, "grad_norm": 2033.75244140625, "learning_rate": 1.7868642219159292e-06, "loss": 32.8364, "step": 374970 }, { "epoch": 0.7574833243777195, "grad_norm": 80.31974792480469, "learning_rate": 1.7865967828042158e-06, "loss": 13.944, "step": 374980 }, { "epoch": 0.7575035250104033, "grad_norm": 995.2206420898438, "learning_rate": 1.7863293593542402e-06, "loss": 31.8257, "step": 374990 }, { "epoch": 0.7575237256430871, "grad_norm": 381.6286315917969, "learning_rate": 1.7860619515673034e-06, "loss": 17.0038, "step": 375000 }, { "epoch": 0.7575439262757709, "grad_norm": 260.1957092285156, "learning_rate": 1.7857945594447128e-06, "loss": 18.8672, "step": 375010 }, { "epoch": 0.7575641269084548, "grad_norm": 89.29084014892578, "learning_rate": 1.7855271829877696e-06, "loss": 16.7167, "step": 375020 }, { "epoch": 0.7575843275411386, "grad_norm": 548.8855590820312, "learning_rate": 1.7852598221977757e-06, "loss": 25.9282, "step": 375030 }, { "epoch": 0.7576045281738224, "grad_norm": 1157.1932373046875, "learning_rate": 1.7849924770760352e-06, "loss": 15.5239, "step": 375040 }, { "epoch": 0.7576247288065062, "grad_norm": 375.1731262207031, "learning_rate": 1.784725147623853e-06, "loss": 13.8027, "step": 375050 }, { "epoch": 0.75764492943919, "grad_norm": 384.16510009765625, "learning_rate": 1.7844578338425306e-06, "loss": 17.4332, "step": 375060 }, { "epoch": 0.7576651300718739, "grad_norm": 388.6846923828125, "learning_rate": 1.7841905357333688e-06, "loss": 9.5122, "step": 375070 }, { "epoch": 0.7576853307045577, "grad_norm": 149.29364013671875, "learning_rate": 1.7839232532976746e-06, "loss": 21.0735, "step": 375080 }, { "epoch": 0.7577055313372415, "grad_norm": 122.9616470336914, "learning_rate": 1.783655986536748e-06, "loss": 14.2662, "step": 375090 }, { "epoch": 0.7577257319699253, "grad_norm": 224.22518920898438, "learning_rate": 1.7833887354518902e-06, "loss": 9.137, "step": 375100 }, { "epoch": 0.7577459326026091, "grad_norm": 309.8247375488281, "learning_rate": 1.7831215000444057e-06, "loss": 10.1294, "step": 375110 }, { "epoch": 0.757766133235293, "grad_norm": 232.92469787597656, "learning_rate": 1.7828542803156007e-06, "loss": 17.5009, "step": 375120 }, { "epoch": 0.7577863338679768, "grad_norm": 265.9322204589844, "learning_rate": 1.7825870762667696e-06, "loss": 15.1142, "step": 375130 }, { "epoch": 0.7578065345006606, "grad_norm": 513.8374633789062, "learning_rate": 1.7823198878992192e-06, "loss": 12.3842, "step": 375140 }, { "epoch": 0.7578267351333444, "grad_norm": 223.05540466308594, "learning_rate": 1.7820527152142531e-06, "loss": 17.299, "step": 375150 }, { "epoch": 0.7578469357660282, "grad_norm": 260.6461181640625, "learning_rate": 1.781785558213172e-06, "loss": 23.935, "step": 375160 }, { "epoch": 0.7578671363987121, "grad_norm": 231.95819091796875, "learning_rate": 1.781518416897276e-06, "loss": 24.2113, "step": 375170 }, { "epoch": 0.7578873370313958, "grad_norm": 597.959228515625, "learning_rate": 1.7812512912678687e-06, "loss": 16.3873, "step": 375180 }, { "epoch": 0.7579075376640796, "grad_norm": 210.48451232910156, "learning_rate": 1.7809841813262558e-06, "loss": 10.5464, "step": 375190 }, { "epoch": 0.7579277382967634, "grad_norm": 129.9409942626953, "learning_rate": 1.7807170870737317e-06, "loss": 14.1919, "step": 375200 }, { "epoch": 0.7579479389294472, "grad_norm": 534.8091430664062, "learning_rate": 1.7804500085116022e-06, "loss": 12.0085, "step": 375210 }, { "epoch": 0.757968139562131, "grad_norm": 338.99273681640625, "learning_rate": 1.7801829456411713e-06, "loss": 10.6398, "step": 375220 }, { "epoch": 0.7579883401948149, "grad_norm": 183.349365234375, "learning_rate": 1.7799158984637372e-06, "loss": 23.207, "step": 375230 }, { "epoch": 0.7580085408274987, "grad_norm": 426.0802307128906, "learning_rate": 1.779648866980601e-06, "loss": 9.9525, "step": 375240 }, { "epoch": 0.7580287414601825, "grad_norm": 308.3338317871094, "learning_rate": 1.7793818511930678e-06, "loss": 24.7034, "step": 375250 }, { "epoch": 0.7580489420928663, "grad_norm": 926.7468872070312, "learning_rate": 1.779114851102437e-06, "loss": 28.6146, "step": 375260 }, { "epoch": 0.7580691427255501, "grad_norm": 465.4128112792969, "learning_rate": 1.7788478667100074e-06, "loss": 12.0828, "step": 375270 }, { "epoch": 0.758089343358234, "grad_norm": 27.61420440673828, "learning_rate": 1.7785808980170848e-06, "loss": 11.1147, "step": 375280 }, { "epoch": 0.7581095439909178, "grad_norm": 559.3772583007812, "learning_rate": 1.7783139450249664e-06, "loss": 24.2235, "step": 375290 }, { "epoch": 0.7581297446236016, "grad_norm": 297.36480712890625, "learning_rate": 1.7780470077349566e-06, "loss": 20.765, "step": 375300 }, { "epoch": 0.7581499452562854, "grad_norm": 486.7535095214844, "learning_rate": 1.7777800861483552e-06, "loss": 13.895, "step": 375310 }, { "epoch": 0.7581701458889692, "grad_norm": 242.015380859375, "learning_rate": 1.7775131802664608e-06, "loss": 17.063, "step": 375320 }, { "epoch": 0.7581903465216531, "grad_norm": 957.19189453125, "learning_rate": 1.777246290090578e-06, "loss": 49.9769, "step": 375330 }, { "epoch": 0.7582105471543369, "grad_norm": 338.21856689453125, "learning_rate": 1.7769794156220043e-06, "loss": 24.6517, "step": 375340 }, { "epoch": 0.7582307477870207, "grad_norm": 163.7085723876953, "learning_rate": 1.7767125568620442e-06, "loss": 27.5348, "step": 375350 }, { "epoch": 0.7582509484197045, "grad_norm": 382.12322998046875, "learning_rate": 1.776445713811994e-06, "loss": 12.7062, "step": 375360 }, { "epoch": 0.7582711490523883, "grad_norm": 244.8220977783203, "learning_rate": 1.7761788864731582e-06, "loss": 20.3112, "step": 375370 }, { "epoch": 0.7582913496850722, "grad_norm": 492.9038391113281, "learning_rate": 1.7759120748468356e-06, "loss": 20.0269, "step": 375380 }, { "epoch": 0.758311550317756, "grad_norm": 348.23773193359375, "learning_rate": 1.7756452789343243e-06, "loss": 27.5935, "step": 375390 }, { "epoch": 0.7583317509504398, "grad_norm": 266.7994689941406, "learning_rate": 1.7753784987369287e-06, "loss": 24.2155, "step": 375400 }, { "epoch": 0.7583519515831236, "grad_norm": 279.92987060546875, "learning_rate": 1.7751117342559477e-06, "loss": 12.4588, "step": 375410 }, { "epoch": 0.7583721522158074, "grad_norm": 1162.434814453125, "learning_rate": 1.7748449854926792e-06, "loss": 40.0468, "step": 375420 }, { "epoch": 0.7583923528484913, "grad_norm": 31.6923770904541, "learning_rate": 1.774578252448425e-06, "loss": 8.9493, "step": 375430 }, { "epoch": 0.758412553481175, "grad_norm": 194.1206512451172, "learning_rate": 1.7743115351244883e-06, "loss": 14.6561, "step": 375440 }, { "epoch": 0.7584327541138588, "grad_norm": 337.4588317871094, "learning_rate": 1.7740448335221628e-06, "loss": 9.4775, "step": 375450 }, { "epoch": 0.7584529547465426, "grad_norm": 322.32781982421875, "learning_rate": 1.7737781476427511e-06, "loss": 29.0225, "step": 375460 }, { "epoch": 0.7584731553792264, "grad_norm": 420.2108459472656, "learning_rate": 1.7735114774875556e-06, "loss": 19.4391, "step": 375470 }, { "epoch": 0.7584933560119103, "grad_norm": 3.5831351280212402, "learning_rate": 1.7732448230578743e-06, "loss": 9.7425, "step": 375480 }, { "epoch": 0.7585135566445941, "grad_norm": 258.252197265625, "learning_rate": 1.7729781843550036e-06, "loss": 12.9734, "step": 375490 }, { "epoch": 0.7585337572772779, "grad_norm": 553.4739990234375, "learning_rate": 1.7727115613802465e-06, "loss": 30.479, "step": 375500 }, { "epoch": 0.7585539579099617, "grad_norm": 39.61473083496094, "learning_rate": 1.7724449541349048e-06, "loss": 17.0065, "step": 375510 }, { "epoch": 0.7585741585426455, "grad_norm": 770.8367309570312, "learning_rate": 1.772178362620272e-06, "loss": 22.7148, "step": 375520 }, { "epoch": 0.7585943591753294, "grad_norm": 1040.59619140625, "learning_rate": 1.77191178683765e-06, "loss": 27.546, "step": 375530 }, { "epoch": 0.7586145598080132, "grad_norm": 78.16218566894531, "learning_rate": 1.7716452267883404e-06, "loss": 33.4048, "step": 375540 }, { "epoch": 0.758634760440697, "grad_norm": 380.9717712402344, "learning_rate": 1.7713786824736406e-06, "loss": 32.5716, "step": 375550 }, { "epoch": 0.7586549610733808, "grad_norm": 368.9980163574219, "learning_rate": 1.7711121538948473e-06, "loss": 17.0607, "step": 375560 }, { "epoch": 0.7586751617060646, "grad_norm": 460.2774353027344, "learning_rate": 1.7708456410532637e-06, "loss": 22.7964, "step": 375570 }, { "epoch": 0.7586953623387485, "grad_norm": 50.77473831176758, "learning_rate": 1.7705791439501851e-06, "loss": 15.9386, "step": 375580 }, { "epoch": 0.7587155629714323, "grad_norm": 324.2077331542969, "learning_rate": 1.7703126625869138e-06, "loss": 14.2888, "step": 375590 }, { "epoch": 0.7587357636041161, "grad_norm": 667.908935546875, "learning_rate": 1.770046196964747e-06, "loss": 29.7749, "step": 375600 }, { "epoch": 0.7587559642367999, "grad_norm": 496.64434814453125, "learning_rate": 1.769779747084981e-06, "loss": 17.4577, "step": 375610 }, { "epoch": 0.7587761648694837, "grad_norm": 305.63262939453125, "learning_rate": 1.769513312948919e-06, "loss": 13.8207, "step": 375620 }, { "epoch": 0.7587963655021676, "grad_norm": 618.8668212890625, "learning_rate": 1.7692468945578572e-06, "loss": 18.8828, "step": 375630 }, { "epoch": 0.7588165661348514, "grad_norm": 278.47723388671875, "learning_rate": 1.768980491913092e-06, "loss": 15.5401, "step": 375640 }, { "epoch": 0.7588367667675352, "grad_norm": 236.46444702148438, "learning_rate": 1.7687141050159246e-06, "loss": 20.985, "step": 375650 }, { "epoch": 0.758856967400219, "grad_norm": 313.394287109375, "learning_rate": 1.7684477338676543e-06, "loss": 13.4524, "step": 375660 }, { "epoch": 0.7588771680329028, "grad_norm": 161.9228057861328, "learning_rate": 1.7681813784695778e-06, "loss": 14.2453, "step": 375670 }, { "epoch": 0.7588973686655867, "grad_norm": 340.1613464355469, "learning_rate": 1.7679150388229916e-06, "loss": 11.2734, "step": 375680 }, { "epoch": 0.7589175692982704, "grad_norm": 270.6866760253906, "learning_rate": 1.7676487149291972e-06, "loss": 21.4311, "step": 375690 }, { "epoch": 0.7589377699309542, "grad_norm": 413.8176574707031, "learning_rate": 1.7673824067894912e-06, "loss": 13.3455, "step": 375700 }, { "epoch": 0.758957970563638, "grad_norm": 400.6026306152344, "learning_rate": 1.767116114405169e-06, "loss": 20.54, "step": 375710 }, { "epoch": 0.7589781711963218, "grad_norm": 476.71063232421875, "learning_rate": 1.7668498377775312e-06, "loss": 15.2724, "step": 375720 }, { "epoch": 0.7589983718290056, "grad_norm": 417.1393127441406, "learning_rate": 1.7665835769078782e-06, "loss": 12.251, "step": 375730 }, { "epoch": 0.7590185724616895, "grad_norm": 2.2373547554016113, "learning_rate": 1.7663173317975012e-06, "loss": 9.704, "step": 375740 }, { "epoch": 0.7590387730943733, "grad_norm": 412.61566162109375, "learning_rate": 1.7660511024477018e-06, "loss": 29.7417, "step": 375750 }, { "epoch": 0.7590589737270571, "grad_norm": 271.6612854003906, "learning_rate": 1.765784888859779e-06, "loss": 17.1338, "step": 375760 }, { "epoch": 0.7590791743597409, "grad_norm": 358.768310546875, "learning_rate": 1.7655186910350276e-06, "loss": 22.6152, "step": 375770 }, { "epoch": 0.7590993749924247, "grad_norm": 253.42044067382812, "learning_rate": 1.7652525089747447e-06, "loss": 21.7026, "step": 375780 }, { "epoch": 0.7591195756251086, "grad_norm": 601.6935424804688, "learning_rate": 1.7649863426802283e-06, "loss": 28.4537, "step": 375790 }, { "epoch": 0.7591397762577924, "grad_norm": 317.54400634765625, "learning_rate": 1.7647201921527802e-06, "loss": 14.1507, "step": 375800 }, { "epoch": 0.7591599768904762, "grad_norm": 226.88931274414062, "learning_rate": 1.7644540573936892e-06, "loss": 10.0781, "step": 375810 }, { "epoch": 0.75918017752316, "grad_norm": 469.7894592285156, "learning_rate": 1.7641879384042571e-06, "loss": 23.0613, "step": 375820 }, { "epoch": 0.7592003781558438, "grad_norm": 657.5131225585938, "learning_rate": 1.7639218351857824e-06, "loss": 26.1201, "step": 375830 }, { "epoch": 0.7592205787885277, "grad_norm": 550.1307373046875, "learning_rate": 1.76365574773956e-06, "loss": 33.5979, "step": 375840 }, { "epoch": 0.7592407794212115, "grad_norm": 411.6669616699219, "learning_rate": 1.763389676066885e-06, "loss": 15.1043, "step": 375850 }, { "epoch": 0.7592609800538953, "grad_norm": 618.7662963867188, "learning_rate": 1.7631236201690583e-06, "loss": 10.8901, "step": 375860 }, { "epoch": 0.7592811806865791, "grad_norm": 263.6335144042969, "learning_rate": 1.7628575800473747e-06, "loss": 23.4085, "step": 375870 }, { "epoch": 0.759301381319263, "grad_norm": 565.1019287109375, "learning_rate": 1.7625915557031287e-06, "loss": 19.3755, "step": 375880 }, { "epoch": 0.7593215819519468, "grad_norm": 341.90289306640625, "learning_rate": 1.7623255471376199e-06, "loss": 17.2587, "step": 375890 }, { "epoch": 0.7593417825846306, "grad_norm": 203.76864624023438, "learning_rate": 1.762059554352143e-06, "loss": 34.235, "step": 375900 }, { "epoch": 0.7593619832173144, "grad_norm": 605.4048461914062, "learning_rate": 1.761793577347996e-06, "loss": 20.0809, "step": 375910 }, { "epoch": 0.7593821838499982, "grad_norm": 321.06439208984375, "learning_rate": 1.761527616126475e-06, "loss": 17.8851, "step": 375920 }, { "epoch": 0.759402384482682, "grad_norm": 319.451416015625, "learning_rate": 1.7612616706888735e-06, "loss": 5.3906, "step": 375930 }, { "epoch": 0.7594225851153659, "grad_norm": 418.46124267578125, "learning_rate": 1.7609957410364913e-06, "loss": 17.0868, "step": 375940 }, { "epoch": 0.7594427857480496, "grad_norm": 216.66879272460938, "learning_rate": 1.760729827170622e-06, "loss": 19.261, "step": 375950 }, { "epoch": 0.7594629863807334, "grad_norm": 174.04296875, "learning_rate": 1.760463929092564e-06, "loss": 10.1454, "step": 375960 }, { "epoch": 0.7594831870134172, "grad_norm": 330.9078674316406, "learning_rate": 1.76019804680361e-06, "loss": 24.3022, "step": 375970 }, { "epoch": 0.759503387646101, "grad_norm": 453.86749267578125, "learning_rate": 1.7599321803050595e-06, "loss": 13.8887, "step": 375980 }, { "epoch": 0.7595235882787849, "grad_norm": 210.9562530517578, "learning_rate": 1.7596663295982069e-06, "loss": 29.3843, "step": 375990 }, { "epoch": 0.7595437889114687, "grad_norm": 324.6178894042969, "learning_rate": 1.7594004946843458e-06, "loss": 13.822, "step": 376000 }, { "epoch": 0.7595639895441525, "grad_norm": 1.545017123222351, "learning_rate": 1.7591346755647754e-06, "loss": 11.5382, "step": 376010 }, { "epoch": 0.7595841901768363, "grad_norm": 298.6878356933594, "learning_rate": 1.7588688722407892e-06, "loss": 19.7182, "step": 376020 }, { "epoch": 0.7596043908095201, "grad_norm": 495.82452392578125, "learning_rate": 1.7586030847136815e-06, "loss": 20.0121, "step": 376030 }, { "epoch": 0.759624591442204, "grad_norm": 501.1104431152344, "learning_rate": 1.7583373129847493e-06, "loss": 41.7348, "step": 376040 }, { "epoch": 0.7596447920748878, "grad_norm": 964.5734252929688, "learning_rate": 1.758071557055291e-06, "loss": 32.0864, "step": 376050 }, { "epoch": 0.7596649927075716, "grad_norm": 415.458251953125, "learning_rate": 1.7578058169265954e-06, "loss": 34.0849, "step": 376060 }, { "epoch": 0.7596851933402554, "grad_norm": 24.043859481811523, "learning_rate": 1.7575400925999613e-06, "loss": 10.3754, "step": 376070 }, { "epoch": 0.7597053939729392, "grad_norm": 319.2466125488281, "learning_rate": 1.7572743840766854e-06, "loss": 19.9899, "step": 376080 }, { "epoch": 0.7597255946056231, "grad_norm": 253.73214721679688, "learning_rate": 1.7570086913580603e-06, "loss": 13.9276, "step": 376090 }, { "epoch": 0.7597457952383069, "grad_norm": 637.227294921875, "learning_rate": 1.7567430144453801e-06, "loss": 13.6044, "step": 376100 }, { "epoch": 0.7597659958709907, "grad_norm": 288.0482177734375, "learning_rate": 1.756477353339941e-06, "loss": 15.9136, "step": 376110 }, { "epoch": 0.7597861965036745, "grad_norm": 709.585205078125, "learning_rate": 1.7562117080430413e-06, "loss": 35.3148, "step": 376120 }, { "epoch": 0.7598063971363583, "grad_norm": 401.94012451171875, "learning_rate": 1.755946078555969e-06, "loss": 15.7747, "step": 376130 }, { "epoch": 0.7598265977690422, "grad_norm": 281.07598876953125, "learning_rate": 1.755680464880022e-06, "loss": 8.5536, "step": 376140 }, { "epoch": 0.759846798401726, "grad_norm": 445.91845703125, "learning_rate": 1.7554148670164966e-06, "loss": 22.7897, "step": 376150 }, { "epoch": 0.7598669990344098, "grad_norm": 166.84934997558594, "learning_rate": 1.7551492849666857e-06, "loss": 15.9211, "step": 376160 }, { "epoch": 0.7598871996670936, "grad_norm": 587.87841796875, "learning_rate": 1.7548837187318817e-06, "loss": 21.8101, "step": 376170 }, { "epoch": 0.7599074002997774, "grad_norm": 66.2043228149414, "learning_rate": 1.7546181683133829e-06, "loss": 18.6671, "step": 376180 }, { "epoch": 0.7599276009324613, "grad_norm": 522.960693359375, "learning_rate": 1.7543526337124817e-06, "loss": 21.857, "step": 376190 }, { "epoch": 0.7599478015651451, "grad_norm": 322.857666015625, "learning_rate": 1.75408711493047e-06, "loss": 19.3971, "step": 376200 }, { "epoch": 0.7599680021978288, "grad_norm": 423.8174133300781, "learning_rate": 1.7538216119686457e-06, "loss": 16.8876, "step": 376210 }, { "epoch": 0.7599882028305126, "grad_norm": 687.6094970703125, "learning_rate": 1.7535561248282994e-06, "loss": 15.8927, "step": 376220 }, { "epoch": 0.7600084034631964, "grad_norm": 347.0122375488281, "learning_rate": 1.7532906535107286e-06, "loss": 8.7866, "step": 376230 }, { "epoch": 0.7600286040958802, "grad_norm": 1143.4398193359375, "learning_rate": 1.7530251980172241e-06, "loss": 44.6102, "step": 376240 }, { "epoch": 0.7600488047285641, "grad_norm": 329.41644287109375, "learning_rate": 1.7527597583490825e-06, "loss": 11.3206, "step": 376250 }, { "epoch": 0.7600690053612479, "grad_norm": 250.16851806640625, "learning_rate": 1.7524943345075957e-06, "loss": 12.0135, "step": 376260 }, { "epoch": 0.7600892059939317, "grad_norm": 74.37361145019531, "learning_rate": 1.7522289264940557e-06, "loss": 8.2757, "step": 376270 }, { "epoch": 0.7601094066266155, "grad_norm": 785.9454345703125, "learning_rate": 1.7519635343097601e-06, "loss": 23.9677, "step": 376280 }, { "epoch": 0.7601296072592993, "grad_norm": 468.9040832519531, "learning_rate": 1.7516981579559987e-06, "loss": 17.7546, "step": 376290 }, { "epoch": 0.7601498078919832, "grad_norm": 215.666748046875, "learning_rate": 1.751432797434068e-06, "loss": 19.3954, "step": 376300 }, { "epoch": 0.760170008524667, "grad_norm": 315.82989501953125, "learning_rate": 1.75116745274526e-06, "loss": 24.7293, "step": 376310 }, { "epoch": 0.7601902091573508, "grad_norm": 281.9214782714844, "learning_rate": 1.7509021238908659e-06, "loss": 12.1708, "step": 376320 }, { "epoch": 0.7602104097900346, "grad_norm": 243.76895141601562, "learning_rate": 1.750636810872181e-06, "loss": 7.8583, "step": 376330 }, { "epoch": 0.7602306104227184, "grad_norm": 484.5269775390625, "learning_rate": 1.7503715136905014e-06, "loss": 19.7257, "step": 376340 }, { "epoch": 0.7602508110554023, "grad_norm": 266.2414245605469, "learning_rate": 1.7501062323471136e-06, "loss": 11.5703, "step": 376350 }, { "epoch": 0.7602710116880861, "grad_norm": 462.5458068847656, "learning_rate": 1.7498409668433135e-06, "loss": 12.6849, "step": 376360 }, { "epoch": 0.7602912123207699, "grad_norm": 376.05426025390625, "learning_rate": 1.7495757171803967e-06, "loss": 7.2245, "step": 376370 }, { "epoch": 0.7603114129534537, "grad_norm": 179.6309051513672, "learning_rate": 1.749310483359653e-06, "loss": 25.1848, "step": 376380 }, { "epoch": 0.7603316135861375, "grad_norm": 282.795654296875, "learning_rate": 1.7490452653823747e-06, "loss": 18.6978, "step": 376390 }, { "epoch": 0.7603518142188214, "grad_norm": 239.79541015625, "learning_rate": 1.7487800632498547e-06, "loss": 17.3848, "step": 376400 }, { "epoch": 0.7603720148515052, "grad_norm": 274.5397644042969, "learning_rate": 1.7485148769633903e-06, "loss": 23.4847, "step": 376410 }, { "epoch": 0.760392215484189, "grad_norm": 0.0, "learning_rate": 1.7482497065242665e-06, "loss": 14.6895, "step": 376420 }, { "epoch": 0.7604124161168728, "grad_norm": 1420.82470703125, "learning_rate": 1.7479845519337795e-06, "loss": 29.4314, "step": 376430 }, { "epoch": 0.7604326167495566, "grad_norm": 713.2938232421875, "learning_rate": 1.7477194131932229e-06, "loss": 22.3533, "step": 376440 }, { "epoch": 0.7604528173822405, "grad_norm": 261.40106201171875, "learning_rate": 1.747454290303887e-06, "loss": 20.2105, "step": 376450 }, { "epoch": 0.7604730180149242, "grad_norm": 211.13002014160156, "learning_rate": 1.747189183267063e-06, "loss": 16.6662, "step": 376460 }, { "epoch": 0.760493218647608, "grad_norm": 264.2223205566406, "learning_rate": 1.7469240920840463e-06, "loss": 22.8406, "step": 376470 }, { "epoch": 0.7605134192802918, "grad_norm": 301.1449279785156, "learning_rate": 1.746659016756127e-06, "loss": 23.6323, "step": 376480 }, { "epoch": 0.7605336199129756, "grad_norm": 300.9630126953125, "learning_rate": 1.7463939572845951e-06, "loss": 13.9824, "step": 376490 }, { "epoch": 0.7605538205456595, "grad_norm": 218.0428009033203, "learning_rate": 1.746128913670746e-06, "loss": 13.2248, "step": 376500 }, { "epoch": 0.7605740211783433, "grad_norm": 198.65106201171875, "learning_rate": 1.7458638859158688e-06, "loss": 12.9668, "step": 376510 }, { "epoch": 0.7605942218110271, "grad_norm": 931.1880493164062, "learning_rate": 1.7455988740212576e-06, "loss": 23.6093, "step": 376520 }, { "epoch": 0.7606144224437109, "grad_norm": 88.32315826416016, "learning_rate": 1.7453338779882029e-06, "loss": 24.4658, "step": 376530 }, { "epoch": 0.7606346230763947, "grad_norm": 170.65892028808594, "learning_rate": 1.7450688978179947e-06, "loss": 20.2341, "step": 376540 }, { "epoch": 0.7606548237090786, "grad_norm": 787.7627563476562, "learning_rate": 1.7448039335119272e-06, "loss": 24.3571, "step": 376550 }, { "epoch": 0.7606750243417624, "grad_norm": 154.41603088378906, "learning_rate": 1.744538985071289e-06, "loss": 18.6799, "step": 376560 }, { "epoch": 0.7606952249744462, "grad_norm": 308.78009033203125, "learning_rate": 1.7442740524973744e-06, "loss": 21.146, "step": 376570 }, { "epoch": 0.76071542560713, "grad_norm": 6.902546405792236, "learning_rate": 1.7440091357914718e-06, "loss": 20.52, "step": 376580 }, { "epoch": 0.7607356262398138, "grad_norm": 789.2552490234375, "learning_rate": 1.7437442349548756e-06, "loss": 18.2658, "step": 376590 }, { "epoch": 0.7607558268724977, "grad_norm": 231.39454650878906, "learning_rate": 1.7434793499888746e-06, "loss": 15.8216, "step": 376600 }, { "epoch": 0.7607760275051815, "grad_norm": 601.401123046875, "learning_rate": 1.7432144808947587e-06, "loss": 18.9456, "step": 376610 }, { "epoch": 0.7607962281378653, "grad_norm": 235.45130920410156, "learning_rate": 1.7429496276738223e-06, "loss": 17.1497, "step": 376620 }, { "epoch": 0.7608164287705491, "grad_norm": 443.2958679199219, "learning_rate": 1.7426847903273547e-06, "loss": 25.5497, "step": 376630 }, { "epoch": 0.7608366294032329, "grad_norm": 429.2977600097656, "learning_rate": 1.742419968856644e-06, "loss": 19.2875, "step": 376640 }, { "epoch": 0.7608568300359168, "grad_norm": 333.5958557128906, "learning_rate": 1.7421551632629835e-06, "loss": 15.7969, "step": 376650 }, { "epoch": 0.7608770306686006, "grad_norm": 301.61822509765625, "learning_rate": 1.7418903735476673e-06, "loss": 27.5749, "step": 376660 }, { "epoch": 0.7608972313012844, "grad_norm": 244.7122344970703, "learning_rate": 1.7416255997119786e-06, "loss": 18.7674, "step": 376670 }, { "epoch": 0.7609174319339682, "grad_norm": 261.3179016113281, "learning_rate": 1.7413608417572114e-06, "loss": 16.8346, "step": 376680 }, { "epoch": 0.760937632566652, "grad_norm": 485.1816711425781, "learning_rate": 1.7410960996846583e-06, "loss": 13.3663, "step": 376690 }, { "epoch": 0.7609578331993359, "grad_norm": 878.5574340820312, "learning_rate": 1.7408313734956074e-06, "loss": 40.5127, "step": 376700 }, { "epoch": 0.7609780338320197, "grad_norm": 110.62535095214844, "learning_rate": 1.7405666631913475e-06, "loss": 21.6779, "step": 376710 }, { "epoch": 0.7609982344647034, "grad_norm": 265.9751281738281, "learning_rate": 1.7403019687731704e-06, "loss": 12.9539, "step": 376720 }, { "epoch": 0.7610184350973872, "grad_norm": 473.6266174316406, "learning_rate": 1.740037290242369e-06, "loss": 12.9548, "step": 376730 }, { "epoch": 0.761038635730071, "grad_norm": 351.79290771484375, "learning_rate": 1.7397726276002274e-06, "loss": 13.2225, "step": 376740 }, { "epoch": 0.7610588363627548, "grad_norm": 320.6704406738281, "learning_rate": 1.7395079808480386e-06, "loss": 28.4213, "step": 376750 }, { "epoch": 0.7610790369954387, "grad_norm": 175.11500549316406, "learning_rate": 1.7392433499870941e-06, "loss": 28.7987, "step": 376760 }, { "epoch": 0.7610992376281225, "grad_norm": 798.2296142578125, "learning_rate": 1.738978735018682e-06, "loss": 15.7704, "step": 376770 }, { "epoch": 0.7611194382608063, "grad_norm": 272.8622741699219, "learning_rate": 1.7387141359440907e-06, "loss": 26.9554, "step": 376780 }, { "epoch": 0.7611396388934901, "grad_norm": 130.2759552001953, "learning_rate": 1.7384495527646127e-06, "loss": 19.8157, "step": 376790 }, { "epoch": 0.7611598395261739, "grad_norm": 867.0805053710938, "learning_rate": 1.738184985481536e-06, "loss": 18.2362, "step": 376800 }, { "epoch": 0.7611800401588578, "grad_norm": 538.44482421875, "learning_rate": 1.7379204340961481e-06, "loss": 15.0707, "step": 376810 }, { "epoch": 0.7612002407915416, "grad_norm": 425.69573974609375, "learning_rate": 1.7376558986097424e-06, "loss": 20.5225, "step": 376820 }, { "epoch": 0.7612204414242254, "grad_norm": 251.30618286132812, "learning_rate": 1.737391379023604e-06, "loss": 11.0112, "step": 376830 }, { "epoch": 0.7612406420569092, "grad_norm": 557.7199096679688, "learning_rate": 1.7371268753390265e-06, "loss": 35.7515, "step": 376840 }, { "epoch": 0.761260842689593, "grad_norm": 102.01097869873047, "learning_rate": 1.7368623875572948e-06, "loss": 15.3064, "step": 376850 }, { "epoch": 0.7612810433222769, "grad_norm": 234.31521606445312, "learning_rate": 1.736597915679702e-06, "loss": 16.7485, "step": 376860 }, { "epoch": 0.7613012439549607, "grad_norm": 334.3034362792969, "learning_rate": 1.7363334597075354e-06, "loss": 29.7615, "step": 376870 }, { "epoch": 0.7613214445876445, "grad_norm": 175.65499877929688, "learning_rate": 1.7360690196420816e-06, "loss": 11.9786, "step": 376880 }, { "epoch": 0.7613416452203283, "grad_norm": 273.6047668457031, "learning_rate": 1.735804595484633e-06, "loss": 16.8305, "step": 376890 }, { "epoch": 0.7613618458530121, "grad_norm": 356.8277282714844, "learning_rate": 1.7355401872364759e-06, "loss": 28.5677, "step": 376900 }, { "epoch": 0.761382046485696, "grad_norm": 453.5097961425781, "learning_rate": 1.7352757948989012e-06, "loss": 34.673, "step": 376910 }, { "epoch": 0.7614022471183798, "grad_norm": 114.28955078125, "learning_rate": 1.7350114184731965e-06, "loss": 24.7503, "step": 376920 }, { "epoch": 0.7614224477510636, "grad_norm": 123.94576263427734, "learning_rate": 1.7347470579606478e-06, "loss": 15.0611, "step": 376930 }, { "epoch": 0.7614426483837474, "grad_norm": 51.32515335083008, "learning_rate": 1.7344827133625487e-06, "loss": 13.312, "step": 376940 }, { "epoch": 0.7614628490164312, "grad_norm": 0.0, "learning_rate": 1.734218384680184e-06, "loss": 18.5802, "step": 376950 }, { "epoch": 0.7614830496491151, "grad_norm": 9.909598350524902, "learning_rate": 1.7339540719148417e-06, "loss": 16.478, "step": 376960 }, { "epoch": 0.7615032502817988, "grad_norm": 215.57704162597656, "learning_rate": 1.7336897750678106e-06, "loss": 15.0508, "step": 376970 }, { "epoch": 0.7615234509144826, "grad_norm": 612.6788330078125, "learning_rate": 1.733425494140381e-06, "loss": 13.8191, "step": 376980 }, { "epoch": 0.7615436515471664, "grad_norm": 387.7210388183594, "learning_rate": 1.7331612291338402e-06, "loss": 19.9209, "step": 376990 }, { "epoch": 0.7615638521798502, "grad_norm": 377.4043884277344, "learning_rate": 1.7328969800494727e-06, "loss": 15.2365, "step": 377000 }, { "epoch": 0.761584052812534, "grad_norm": 31.66712188720703, "learning_rate": 1.7326327468885695e-06, "loss": 21.8383, "step": 377010 }, { "epoch": 0.7616042534452179, "grad_norm": 58.340267181396484, "learning_rate": 1.7323685296524212e-06, "loss": 18.2436, "step": 377020 }, { "epoch": 0.7616244540779017, "grad_norm": 673.8753662109375, "learning_rate": 1.7321043283423094e-06, "loss": 20.8677, "step": 377030 }, { "epoch": 0.7616446547105855, "grad_norm": 205.27183532714844, "learning_rate": 1.7318401429595244e-06, "loss": 16.2658, "step": 377040 }, { "epoch": 0.7616648553432693, "grad_norm": 0.0, "learning_rate": 1.7315759735053562e-06, "loss": 11.151, "step": 377050 }, { "epoch": 0.7616850559759532, "grad_norm": 1154.3646240234375, "learning_rate": 1.7313118199810897e-06, "loss": 21.8191, "step": 377060 }, { "epoch": 0.761705256608637, "grad_norm": 54.69978332519531, "learning_rate": 1.7310476823880118e-06, "loss": 27.5796, "step": 377070 }, { "epoch": 0.7617254572413208, "grad_norm": 659.6768798828125, "learning_rate": 1.7307835607274125e-06, "loss": 22.9341, "step": 377080 }, { "epoch": 0.7617456578740046, "grad_norm": 41.843936920166016, "learning_rate": 1.7305194550005776e-06, "loss": 14.992, "step": 377090 }, { "epoch": 0.7617658585066884, "grad_norm": 483.52044677734375, "learning_rate": 1.7302553652087927e-06, "loss": 10.2882, "step": 377100 }, { "epoch": 0.7617860591393723, "grad_norm": 670.2805786132812, "learning_rate": 1.7299912913533485e-06, "loss": 15.0992, "step": 377110 }, { "epoch": 0.7618062597720561, "grad_norm": 184.13381958007812, "learning_rate": 1.729727233435528e-06, "loss": 23.6193, "step": 377120 }, { "epoch": 0.7618264604047399, "grad_norm": 127.78020477294922, "learning_rate": 1.7294631914566222e-06, "loss": 17.8214, "step": 377130 }, { "epoch": 0.7618466610374237, "grad_norm": 945.5842895507812, "learning_rate": 1.7291991654179163e-06, "loss": 21.2076, "step": 377140 }, { "epoch": 0.7618668616701075, "grad_norm": 12.364981651306152, "learning_rate": 1.7289351553206952e-06, "loss": 19.2048, "step": 377150 }, { "epoch": 0.7618870623027914, "grad_norm": 207.98306274414062, "learning_rate": 1.7286711611662488e-06, "loss": 9.4154, "step": 377160 }, { "epoch": 0.7619072629354752, "grad_norm": 261.1643371582031, "learning_rate": 1.7284071829558606e-06, "loss": 10.8977, "step": 377170 }, { "epoch": 0.761927463568159, "grad_norm": 807.3040161132812, "learning_rate": 1.7281432206908211e-06, "loss": 25.6677, "step": 377180 }, { "epoch": 0.7619476642008428, "grad_norm": 552.6361694335938, "learning_rate": 1.7278792743724133e-06, "loss": 32.2756, "step": 377190 }, { "epoch": 0.7619678648335266, "grad_norm": 669.0811767578125, "learning_rate": 1.727615344001926e-06, "loss": 16.0056, "step": 377200 }, { "epoch": 0.7619880654662105, "grad_norm": 229.4485626220703, "learning_rate": 1.7273514295806454e-06, "loss": 18.1232, "step": 377210 }, { "epoch": 0.7620082660988943, "grad_norm": 67.4647216796875, "learning_rate": 1.727087531109855e-06, "loss": 19.872, "step": 377220 }, { "epoch": 0.762028466731578, "grad_norm": 313.5301513671875, "learning_rate": 1.7268236485908446e-06, "loss": 21.8244, "step": 377230 }, { "epoch": 0.7620486673642618, "grad_norm": 387.4441223144531, "learning_rate": 1.7265597820248987e-06, "loss": 15.3841, "step": 377240 }, { "epoch": 0.7620688679969456, "grad_norm": 537.0117797851562, "learning_rate": 1.7262959314133015e-06, "loss": 24.9926, "step": 377250 }, { "epoch": 0.7620890686296294, "grad_norm": 375.2696533203125, "learning_rate": 1.7260320967573413e-06, "loss": 17.9223, "step": 377260 }, { "epoch": 0.7621092692623133, "grad_norm": 92.41923522949219, "learning_rate": 1.7257682780583068e-06, "loss": 21.5852, "step": 377270 }, { "epoch": 0.7621294698949971, "grad_norm": 501.506591796875, "learning_rate": 1.7255044753174778e-06, "loss": 26.0706, "step": 377280 }, { "epoch": 0.7621496705276809, "grad_norm": 665.3644409179688, "learning_rate": 1.7252406885361416e-06, "loss": 16.7149, "step": 377290 }, { "epoch": 0.7621698711603647, "grad_norm": 637.6961059570312, "learning_rate": 1.7249769177155879e-06, "loss": 30.2187, "step": 377300 }, { "epoch": 0.7621900717930485, "grad_norm": 745.5427856445312, "learning_rate": 1.724713162857099e-06, "loss": 17.7424, "step": 377310 }, { "epoch": 0.7622102724257324, "grad_norm": 108.52301025390625, "learning_rate": 1.7244494239619592e-06, "loss": 17.9354, "step": 377320 }, { "epoch": 0.7622304730584162, "grad_norm": 512.7685546875, "learning_rate": 1.7241857010314555e-06, "loss": 11.6889, "step": 377330 }, { "epoch": 0.7622506736911, "grad_norm": 328.822265625, "learning_rate": 1.7239219940668771e-06, "loss": 16.8154, "step": 377340 }, { "epoch": 0.7622708743237838, "grad_norm": 314.33148193359375, "learning_rate": 1.723658303069502e-06, "loss": 19.154, "step": 377350 }, { "epoch": 0.7622910749564676, "grad_norm": 368.0884704589844, "learning_rate": 1.7233946280406193e-06, "loss": 24.4742, "step": 377360 }, { "epoch": 0.7623112755891515, "grad_norm": 575.9612426757812, "learning_rate": 1.723130968981515e-06, "loss": 23.5788, "step": 377370 }, { "epoch": 0.7623314762218353, "grad_norm": 393.7576904296875, "learning_rate": 1.722867325893473e-06, "loss": 22.748, "step": 377380 }, { "epoch": 0.7623516768545191, "grad_norm": 506.5479736328125, "learning_rate": 1.7226036987777767e-06, "loss": 16.1577, "step": 377390 }, { "epoch": 0.7623718774872029, "grad_norm": 405.98748779296875, "learning_rate": 1.7223400876357144e-06, "loss": 21.2211, "step": 377400 }, { "epoch": 0.7623920781198867, "grad_norm": 191.85047912597656, "learning_rate": 1.7220764924685685e-06, "loss": 18.3298, "step": 377410 }, { "epoch": 0.7624122787525706, "grad_norm": 398.3681640625, "learning_rate": 1.7218129132776224e-06, "loss": 25.5017, "step": 377420 }, { "epoch": 0.7624324793852544, "grad_norm": 109.55448150634766, "learning_rate": 1.7215493500641645e-06, "loss": 12.3465, "step": 377430 }, { "epoch": 0.7624526800179382, "grad_norm": 247.6683349609375, "learning_rate": 1.721285802829476e-06, "loss": 23.2204, "step": 377440 }, { "epoch": 0.762472880650622, "grad_norm": 323.5118408203125, "learning_rate": 1.7210222715748443e-06, "loss": 21.2569, "step": 377450 }, { "epoch": 0.7624930812833058, "grad_norm": 557.070556640625, "learning_rate": 1.7207587563015505e-06, "loss": 35.0821, "step": 377460 }, { "epoch": 0.7625132819159897, "grad_norm": 469.9717102050781, "learning_rate": 1.720495257010883e-06, "loss": 14.0372, "step": 377470 }, { "epoch": 0.7625334825486734, "grad_norm": 512.7113037109375, "learning_rate": 1.7202317737041235e-06, "loss": 19.707, "step": 377480 }, { "epoch": 0.7625536831813572, "grad_norm": 335.61767578125, "learning_rate": 1.7199683063825544e-06, "loss": 20.3734, "step": 377490 }, { "epoch": 0.762573883814041, "grad_norm": 0.8463869690895081, "learning_rate": 1.7197048550474643e-06, "loss": 11.2797, "step": 377500 }, { "epoch": 0.7625940844467248, "grad_norm": 324.1897888183594, "learning_rate": 1.719441419700133e-06, "loss": 28.4015, "step": 377510 }, { "epoch": 0.7626142850794086, "grad_norm": 509.4043273925781, "learning_rate": 1.7191780003418485e-06, "loss": 24.1996, "step": 377520 }, { "epoch": 0.7626344857120925, "grad_norm": 212.92823791503906, "learning_rate": 1.7189145969738918e-06, "loss": 21.1147, "step": 377530 }, { "epoch": 0.7626546863447763, "grad_norm": 132.7948455810547, "learning_rate": 1.718651209597546e-06, "loss": 14.4377, "step": 377540 }, { "epoch": 0.7626748869774601, "grad_norm": 620.8007202148438, "learning_rate": 1.7183878382140978e-06, "loss": 17.0333, "step": 377550 }, { "epoch": 0.7626950876101439, "grad_norm": 89.54065704345703, "learning_rate": 1.7181244828248294e-06, "loss": 10.121, "step": 377560 }, { "epoch": 0.7627152882428277, "grad_norm": 311.8543701171875, "learning_rate": 1.7178611434310221e-06, "loss": 36.6704, "step": 377570 }, { "epoch": 0.7627354888755116, "grad_norm": 1.616960048675537, "learning_rate": 1.7175978200339622e-06, "loss": 9.524, "step": 377580 }, { "epoch": 0.7627556895081954, "grad_norm": 124.14556121826172, "learning_rate": 1.7173345126349339e-06, "loss": 8.1849, "step": 377590 }, { "epoch": 0.7627758901408792, "grad_norm": 258.3683776855469, "learning_rate": 1.7170712212352187e-06, "loss": 17.2788, "step": 377600 }, { "epoch": 0.762796090773563, "grad_norm": 400.57159423828125, "learning_rate": 1.7168079458360987e-06, "loss": 26.6816, "step": 377610 }, { "epoch": 0.7628162914062468, "grad_norm": 417.3874206542969, "learning_rate": 1.7165446864388608e-06, "loss": 14.7793, "step": 377620 }, { "epoch": 0.7628364920389307, "grad_norm": 473.7869567871094, "learning_rate": 1.7162814430447849e-06, "loss": 18.1507, "step": 377630 }, { "epoch": 0.7628566926716145, "grad_norm": 734.55078125, "learning_rate": 1.7160182156551542e-06, "loss": 16.1951, "step": 377640 }, { "epoch": 0.7628768933042983, "grad_norm": 353.12628173828125, "learning_rate": 1.7157550042712517e-06, "loss": 22.855, "step": 377650 }, { "epoch": 0.7628970939369821, "grad_norm": 220.4140167236328, "learning_rate": 1.7154918088943629e-06, "loss": 11.4678, "step": 377660 }, { "epoch": 0.762917294569666, "grad_norm": 664.710205078125, "learning_rate": 1.7152286295257687e-06, "loss": 31.2423, "step": 377670 }, { "epoch": 0.7629374952023498, "grad_norm": 313.5198059082031, "learning_rate": 1.7149654661667503e-06, "loss": 15.2182, "step": 377680 }, { "epoch": 0.7629576958350336, "grad_norm": 550.048583984375, "learning_rate": 1.714702318818593e-06, "loss": 20.6036, "step": 377690 }, { "epoch": 0.7629778964677174, "grad_norm": 634.5236206054688, "learning_rate": 1.7144391874825784e-06, "loss": 22.9805, "step": 377700 }, { "epoch": 0.7629980971004012, "grad_norm": 112.58702850341797, "learning_rate": 1.714176072159987e-06, "loss": 34.8407, "step": 377710 }, { "epoch": 0.763018297733085, "grad_norm": 234.19915771484375, "learning_rate": 1.7139129728521048e-06, "loss": 16.3787, "step": 377720 }, { "epoch": 0.7630384983657689, "grad_norm": 320.8688049316406, "learning_rate": 1.7136498895602105e-06, "loss": 17.2428, "step": 377730 }, { "epoch": 0.7630586989984526, "grad_norm": 265.7731628417969, "learning_rate": 1.7133868222855893e-06, "loss": 16.528, "step": 377740 }, { "epoch": 0.7630788996311364, "grad_norm": 323.55340576171875, "learning_rate": 1.7131237710295207e-06, "loss": 22.871, "step": 377750 }, { "epoch": 0.7630991002638202, "grad_norm": 351.5572814941406, "learning_rate": 1.7128607357932903e-06, "loss": 17.6728, "step": 377760 }, { "epoch": 0.763119300896504, "grad_norm": 440.6859436035156, "learning_rate": 1.7125977165781772e-06, "loss": 19.3434, "step": 377770 }, { "epoch": 0.7631395015291879, "grad_norm": 445.04156494140625, "learning_rate": 1.7123347133854628e-06, "loss": 19.0785, "step": 377780 }, { "epoch": 0.7631597021618717, "grad_norm": 332.620361328125, "learning_rate": 1.7120717262164322e-06, "loss": 20.1409, "step": 377790 }, { "epoch": 0.7631799027945555, "grad_norm": 518.7366333007812, "learning_rate": 1.7118087550723633e-06, "loss": 14.5072, "step": 377800 }, { "epoch": 0.7632001034272393, "grad_norm": 320.44488525390625, "learning_rate": 1.711545799954541e-06, "loss": 30.7372, "step": 377810 }, { "epoch": 0.7632203040599231, "grad_norm": 953.9003295898438, "learning_rate": 1.7112828608642463e-06, "loss": 12.0028, "step": 377820 }, { "epoch": 0.763240504692607, "grad_norm": 184.9840850830078, "learning_rate": 1.7110199378027581e-06, "loss": 22.6699, "step": 377830 }, { "epoch": 0.7632607053252908, "grad_norm": 618.4388427734375, "learning_rate": 1.7107570307713618e-06, "loss": 16.9055, "step": 377840 }, { "epoch": 0.7632809059579746, "grad_norm": 95.88978576660156, "learning_rate": 1.710494139771336e-06, "loss": 11.2177, "step": 377850 }, { "epoch": 0.7633011065906584, "grad_norm": 228.05349731445312, "learning_rate": 1.7102312648039616e-06, "loss": 20.6369, "step": 377860 }, { "epoch": 0.7633213072233422, "grad_norm": 472.37579345703125, "learning_rate": 1.7099684058705212e-06, "loss": 22.444, "step": 377870 }, { "epoch": 0.7633415078560261, "grad_norm": 130.0430450439453, "learning_rate": 1.7097055629722991e-06, "loss": 5.8776, "step": 377880 }, { "epoch": 0.7633617084887099, "grad_norm": 333.94732666015625, "learning_rate": 1.7094427361105693e-06, "loss": 13.7401, "step": 377890 }, { "epoch": 0.7633819091213937, "grad_norm": 137.74929809570312, "learning_rate": 1.709179925286617e-06, "loss": 26.0631, "step": 377900 }, { "epoch": 0.7634021097540775, "grad_norm": 956.1805419921875, "learning_rate": 1.7089171305017238e-06, "loss": 19.5247, "step": 377910 }, { "epoch": 0.7634223103867613, "grad_norm": 302.4202575683594, "learning_rate": 1.7086543517571697e-06, "loss": 27.5805, "step": 377920 }, { "epoch": 0.7634425110194452, "grad_norm": 483.156005859375, "learning_rate": 1.7083915890542329e-06, "loss": 25.8959, "step": 377930 }, { "epoch": 0.763462711652129, "grad_norm": 574.2993774414062, "learning_rate": 1.7081288423941967e-06, "loss": 34.5771, "step": 377940 }, { "epoch": 0.7634829122848128, "grad_norm": 16.331708908081055, "learning_rate": 1.7078661117783447e-06, "loss": 16.448, "step": 377950 }, { "epoch": 0.7635031129174966, "grad_norm": 415.71783447265625, "learning_rate": 1.7076033972079503e-06, "loss": 17.2632, "step": 377960 }, { "epoch": 0.7635233135501804, "grad_norm": 156.38693237304688, "learning_rate": 1.7073406986842982e-06, "loss": 17.7233, "step": 377970 }, { "epoch": 0.7635435141828643, "grad_norm": 158.8320770263672, "learning_rate": 1.7070780162086691e-06, "loss": 19.0591, "step": 377980 }, { "epoch": 0.7635637148155481, "grad_norm": 412.1049499511719, "learning_rate": 1.7068153497823431e-06, "loss": 16.6338, "step": 377990 }, { "epoch": 0.7635839154482318, "grad_norm": 572.3352661132812, "learning_rate": 1.7065526994065973e-06, "loss": 8.5546, "step": 378000 }, { "epoch": 0.7636041160809156, "grad_norm": 261.773193359375, "learning_rate": 1.7062900650827163e-06, "loss": 13.3251, "step": 378010 }, { "epoch": 0.7636243167135994, "grad_norm": 413.029541015625, "learning_rate": 1.7060274468119782e-06, "loss": 31.3703, "step": 378020 }, { "epoch": 0.7636445173462832, "grad_norm": 229.0946044921875, "learning_rate": 1.7057648445956609e-06, "loss": 14.7214, "step": 378030 }, { "epoch": 0.7636647179789671, "grad_norm": 355.132080078125, "learning_rate": 1.7055022584350477e-06, "loss": 11.482, "step": 378040 }, { "epoch": 0.7636849186116509, "grad_norm": 211.0098876953125, "learning_rate": 1.7052396883314154e-06, "loss": 16.8089, "step": 378050 }, { "epoch": 0.7637051192443347, "grad_norm": 96.66175079345703, "learning_rate": 1.704977134286047e-06, "loss": 10.922, "step": 378060 }, { "epoch": 0.7637253198770185, "grad_norm": 57.73271942138672, "learning_rate": 1.7047145963002187e-06, "loss": 15.9487, "step": 378070 }, { "epoch": 0.7637455205097023, "grad_norm": 756.5963745117188, "learning_rate": 1.7044520743752135e-06, "loss": 20.7602, "step": 378080 }, { "epoch": 0.7637657211423862, "grad_norm": 596.1071166992188, "learning_rate": 1.7041895685123088e-06, "loss": 25.3244, "step": 378090 }, { "epoch": 0.76378592177507, "grad_norm": 689.9220581054688, "learning_rate": 1.7039270787127832e-06, "loss": 24.2241, "step": 378100 }, { "epoch": 0.7638061224077538, "grad_norm": 252.2925567626953, "learning_rate": 1.7036646049779188e-06, "loss": 24.4682, "step": 378110 }, { "epoch": 0.7638263230404376, "grad_norm": 465.9823913574219, "learning_rate": 1.7034021473089918e-06, "loss": 17.5219, "step": 378120 }, { "epoch": 0.7638465236731214, "grad_norm": 342.3565368652344, "learning_rate": 1.7031397057072846e-06, "loss": 13.4319, "step": 378130 }, { "epoch": 0.7638667243058053, "grad_norm": 216.5574188232422, "learning_rate": 1.702877280174074e-06, "loss": 12.3569, "step": 378140 }, { "epoch": 0.7638869249384891, "grad_norm": 269.8787536621094, "learning_rate": 1.7026148707106388e-06, "loss": 14.3361, "step": 378150 }, { "epoch": 0.7639071255711729, "grad_norm": 672.4157104492188, "learning_rate": 1.7023524773182598e-06, "loss": 10.6181, "step": 378160 }, { "epoch": 0.7639273262038567, "grad_norm": 133.22813415527344, "learning_rate": 1.7020900999982154e-06, "loss": 9.1915, "step": 378170 }, { "epoch": 0.7639475268365405, "grad_norm": 669.6143188476562, "learning_rate": 1.7018277387517817e-06, "loss": 21.729, "step": 378180 }, { "epoch": 0.7639677274692244, "grad_norm": 131.20468139648438, "learning_rate": 1.70156539358024e-06, "loss": 18.019, "step": 378190 }, { "epoch": 0.7639879281019082, "grad_norm": 433.8664855957031, "learning_rate": 1.7013030644848698e-06, "loss": 17.6022, "step": 378200 }, { "epoch": 0.764008128734592, "grad_norm": 238.54808044433594, "learning_rate": 1.7010407514669485e-06, "loss": 32.016, "step": 378210 }, { "epoch": 0.7640283293672758, "grad_norm": 586.1495971679688, "learning_rate": 1.7007784545277528e-06, "loss": 11.9752, "step": 378220 }, { "epoch": 0.7640485299999596, "grad_norm": 231.04721069335938, "learning_rate": 1.700516173668565e-06, "loss": 19.4538, "step": 378230 }, { "epoch": 0.7640687306326435, "grad_norm": 713.0658569335938, "learning_rate": 1.7002539088906606e-06, "loss": 32.0958, "step": 378240 }, { "epoch": 0.7640889312653272, "grad_norm": 42.475868225097656, "learning_rate": 1.6999916601953165e-06, "loss": 23.6006, "step": 378250 }, { "epoch": 0.764109131898011, "grad_norm": 836.8502197265625, "learning_rate": 1.6997294275838134e-06, "loss": 32.8616, "step": 378260 }, { "epoch": 0.7641293325306948, "grad_norm": 311.5104675292969, "learning_rate": 1.69946721105743e-06, "loss": 14.5967, "step": 378270 }, { "epoch": 0.7641495331633786, "grad_norm": 401.9920654296875, "learning_rate": 1.6992050106174435e-06, "loss": 15.822, "step": 378280 }, { "epoch": 0.7641697337960625, "grad_norm": 743.8467407226562, "learning_rate": 1.6989428262651296e-06, "loss": 27.7331, "step": 378290 }, { "epoch": 0.7641899344287463, "grad_norm": 199.25067138671875, "learning_rate": 1.6986806580017695e-06, "loss": 11.5302, "step": 378300 }, { "epoch": 0.7642101350614301, "grad_norm": 1117.6463623046875, "learning_rate": 1.6984185058286396e-06, "loss": 26.7681, "step": 378310 }, { "epoch": 0.7642303356941139, "grad_norm": 425.97314453125, "learning_rate": 1.698156369747016e-06, "loss": 22.1579, "step": 378320 }, { "epoch": 0.7642505363267977, "grad_norm": 516.744140625, "learning_rate": 1.6978942497581797e-06, "loss": 21.7841, "step": 378330 }, { "epoch": 0.7642707369594816, "grad_norm": 193.90664672851562, "learning_rate": 1.6976321458634036e-06, "loss": 12.7344, "step": 378340 }, { "epoch": 0.7642909375921654, "grad_norm": 492.00347900390625, "learning_rate": 1.6973700580639707e-06, "loss": 16.3136, "step": 378350 }, { "epoch": 0.7643111382248492, "grad_norm": 647.6111450195312, "learning_rate": 1.6971079863611534e-06, "loss": 19.5634, "step": 378360 }, { "epoch": 0.764331338857533, "grad_norm": 588.0462036132812, "learning_rate": 1.6968459307562329e-06, "loss": 17.8978, "step": 378370 }, { "epoch": 0.7643515394902168, "grad_norm": 337.4934387207031, "learning_rate": 1.6965838912504845e-06, "loss": 20.2251, "step": 378380 }, { "epoch": 0.7643717401229007, "grad_norm": 49.08926010131836, "learning_rate": 1.6963218678451843e-06, "loss": 26.6575, "step": 378390 }, { "epoch": 0.7643919407555845, "grad_norm": 596.4000854492188, "learning_rate": 1.6960598605416117e-06, "loss": 14.4334, "step": 378400 }, { "epoch": 0.7644121413882683, "grad_norm": 162.82354736328125, "learning_rate": 1.6957978693410414e-06, "loss": 20.7228, "step": 378410 }, { "epoch": 0.7644323420209521, "grad_norm": 285.4637756347656, "learning_rate": 1.695535894244753e-06, "loss": 15.7035, "step": 378420 }, { "epoch": 0.7644525426536359, "grad_norm": 299.3540954589844, "learning_rate": 1.695273935254022e-06, "loss": 20.7704, "step": 378430 }, { "epoch": 0.7644727432863198, "grad_norm": 165.3634796142578, "learning_rate": 1.6950119923701235e-06, "loss": 18.8815, "step": 378440 }, { "epoch": 0.7644929439190036, "grad_norm": 269.2787780761719, "learning_rate": 1.6947500655943373e-06, "loss": 19.3063, "step": 378450 }, { "epoch": 0.7645131445516874, "grad_norm": 425.3699035644531, "learning_rate": 1.6944881549279384e-06, "loss": 23.9079, "step": 378460 }, { "epoch": 0.7645333451843712, "grad_norm": 564.7520751953125, "learning_rate": 1.6942262603722015e-06, "loss": 25.971, "step": 378470 }, { "epoch": 0.764553545817055, "grad_norm": 308.5755615234375, "learning_rate": 1.6939643819284051e-06, "loss": 12.3883, "step": 378480 }, { "epoch": 0.7645737464497389, "grad_norm": 349.83050537109375, "learning_rate": 1.6937025195978286e-06, "loss": 15.6415, "step": 378490 }, { "epoch": 0.7645939470824227, "grad_norm": 453.6837158203125, "learning_rate": 1.6934406733817417e-06, "loss": 13.7756, "step": 378500 }, { "epoch": 0.7646141477151064, "grad_norm": 269.7435607910156, "learning_rate": 1.6931788432814233e-06, "loss": 26.2361, "step": 378510 }, { "epoch": 0.7646343483477902, "grad_norm": 241.135986328125, "learning_rate": 1.6929170292981528e-06, "loss": 18.1771, "step": 378520 }, { "epoch": 0.764654548980474, "grad_norm": 360.90704345703125, "learning_rate": 1.692655231433203e-06, "loss": 16.855, "step": 378530 }, { "epoch": 0.7646747496131578, "grad_norm": 507.8138427734375, "learning_rate": 1.6923934496878485e-06, "loss": 11.4303, "step": 378540 }, { "epoch": 0.7646949502458417, "grad_norm": 195.2571258544922, "learning_rate": 1.6921316840633678e-06, "loss": 14.8914, "step": 378550 }, { "epoch": 0.7647151508785255, "grad_norm": 1136.0404052734375, "learning_rate": 1.6918699345610395e-06, "loss": 22.6831, "step": 378560 }, { "epoch": 0.7647353515112093, "grad_norm": 444.76324462890625, "learning_rate": 1.6916082011821322e-06, "loss": 19.5697, "step": 378570 }, { "epoch": 0.7647555521438931, "grad_norm": 633.3739624023438, "learning_rate": 1.6913464839279254e-06, "loss": 19.0122, "step": 378580 }, { "epoch": 0.764775752776577, "grad_norm": 669.9867553710938, "learning_rate": 1.6910847827996961e-06, "loss": 16.7808, "step": 378590 }, { "epoch": 0.7647959534092608, "grad_norm": 761.240478515625, "learning_rate": 1.6908230977987184e-06, "loss": 19.101, "step": 378600 }, { "epoch": 0.7648161540419446, "grad_norm": 447.00750732421875, "learning_rate": 1.6905614289262657e-06, "loss": 21.746, "step": 378610 }, { "epoch": 0.7648363546746284, "grad_norm": 213.79307556152344, "learning_rate": 1.690299776183617e-06, "loss": 19.0353, "step": 378620 }, { "epoch": 0.7648565553073122, "grad_norm": 227.6842498779297, "learning_rate": 1.6900381395720455e-06, "loss": 12.6426, "step": 378630 }, { "epoch": 0.764876755939996, "grad_norm": 142.15248107910156, "learning_rate": 1.6897765190928257e-06, "loss": 17.1625, "step": 378640 }, { "epoch": 0.7648969565726799, "grad_norm": 210.0847625732422, "learning_rate": 1.6895149147472344e-06, "loss": 13.9529, "step": 378650 }, { "epoch": 0.7649171572053637, "grad_norm": 459.7899169921875, "learning_rate": 1.6892533265365445e-06, "loss": 13.3628, "step": 378660 }, { "epoch": 0.7649373578380475, "grad_norm": 865.4264526367188, "learning_rate": 1.6889917544620342e-06, "loss": 23.15, "step": 378670 }, { "epoch": 0.7649575584707313, "grad_norm": 523.2117309570312, "learning_rate": 1.6887301985249754e-06, "loss": 14.8688, "step": 378680 }, { "epoch": 0.7649777591034151, "grad_norm": 149.56378173828125, "learning_rate": 1.6884686587266446e-06, "loss": 19.8172, "step": 378690 }, { "epoch": 0.764997959736099, "grad_norm": 671.8781127929688, "learning_rate": 1.6882071350683165e-06, "loss": 16.2385, "step": 378700 }, { "epoch": 0.7650181603687828, "grad_norm": 197.93997192382812, "learning_rate": 1.6879456275512634e-06, "loss": 12.6514, "step": 378710 }, { "epoch": 0.7650383610014666, "grad_norm": 417.5447998046875, "learning_rate": 1.6876841361767637e-06, "loss": 20.2361, "step": 378720 }, { "epoch": 0.7650585616341504, "grad_norm": 414.91107177734375, "learning_rate": 1.6874226609460875e-06, "loss": 16.3534, "step": 378730 }, { "epoch": 0.7650787622668342, "grad_norm": 372.59368896484375, "learning_rate": 1.6871612018605131e-06, "loss": 35.1785, "step": 378740 }, { "epoch": 0.7650989628995181, "grad_norm": 215.13145446777344, "learning_rate": 1.6868997589213138e-06, "loss": 14.9734, "step": 378750 }, { "epoch": 0.7651191635322018, "grad_norm": 149.23243713378906, "learning_rate": 1.6866383321297614e-06, "loss": 21.2211, "step": 378760 }, { "epoch": 0.7651393641648856, "grad_norm": 367.0497741699219, "learning_rate": 1.6863769214871334e-06, "loss": 16.1369, "step": 378770 }, { "epoch": 0.7651595647975694, "grad_norm": 1.4534255266189575, "learning_rate": 1.6861155269947022e-06, "loss": 19.1285, "step": 378780 }, { "epoch": 0.7651797654302532, "grad_norm": 639.5684814453125, "learning_rate": 1.6858541486537406e-06, "loss": 21.6637, "step": 378790 }, { "epoch": 0.765199966062937, "grad_norm": 999.3773803710938, "learning_rate": 1.6855927864655241e-06, "loss": 21.8322, "step": 378800 }, { "epoch": 0.7652201666956209, "grad_norm": 87.37020111083984, "learning_rate": 1.6853314404313275e-06, "loss": 13.0347, "step": 378810 }, { "epoch": 0.7652403673283047, "grad_norm": 579.7806396484375, "learning_rate": 1.6850701105524236e-06, "loss": 34.897, "step": 378820 }, { "epoch": 0.7652605679609885, "grad_norm": 232.5875701904297, "learning_rate": 1.6848087968300848e-06, "loss": 16.0784, "step": 378830 }, { "epoch": 0.7652807685936723, "grad_norm": 61.81998825073242, "learning_rate": 1.684547499265587e-06, "loss": 18.8138, "step": 378840 }, { "epoch": 0.7653009692263562, "grad_norm": 482.0547180175781, "learning_rate": 1.6842862178602026e-06, "loss": 12.2082, "step": 378850 }, { "epoch": 0.76532116985904, "grad_norm": 55.65410614013672, "learning_rate": 1.6840249526152036e-06, "loss": 13.8418, "step": 378860 }, { "epoch": 0.7653413704917238, "grad_norm": 249.66575622558594, "learning_rate": 1.6837637035318643e-06, "loss": 14.0964, "step": 378870 }, { "epoch": 0.7653615711244076, "grad_norm": 510.0167541503906, "learning_rate": 1.6835024706114605e-06, "loss": 25.2258, "step": 378880 }, { "epoch": 0.7653817717570914, "grad_norm": 573.1727905273438, "learning_rate": 1.6832412538552634e-06, "loss": 12.4214, "step": 378890 }, { "epoch": 0.7654019723897753, "grad_norm": 451.53851318359375, "learning_rate": 1.6829800532645447e-06, "loss": 6.745, "step": 378900 }, { "epoch": 0.7654221730224591, "grad_norm": 620.3411865234375, "learning_rate": 1.6827188688405805e-06, "loss": 31.8535, "step": 378910 }, { "epoch": 0.7654423736551429, "grad_norm": 703.267578125, "learning_rate": 1.6824577005846421e-06, "loss": 17.4161, "step": 378920 }, { "epoch": 0.7654625742878267, "grad_norm": 76.35845184326172, "learning_rate": 1.6821965484980007e-06, "loss": 15.2875, "step": 378930 }, { "epoch": 0.7654827749205105, "grad_norm": 672.7890014648438, "learning_rate": 1.6819354125819327e-06, "loss": 19.937, "step": 378940 }, { "epoch": 0.7655029755531944, "grad_norm": 501.7395324707031, "learning_rate": 1.6816742928377072e-06, "loss": 8.8874, "step": 378950 }, { "epoch": 0.7655231761858782, "grad_norm": 189.4306182861328, "learning_rate": 1.6814131892666009e-06, "loss": 13.2997, "step": 378960 }, { "epoch": 0.765543376818562, "grad_norm": 522.3626098632812, "learning_rate": 1.6811521018698824e-06, "loss": 25.9449, "step": 378970 }, { "epoch": 0.7655635774512458, "grad_norm": 445.63043212890625, "learning_rate": 1.6808910306488274e-06, "loss": 15.6408, "step": 378980 }, { "epoch": 0.7655837780839296, "grad_norm": 310.6912536621094, "learning_rate": 1.6806299756047068e-06, "loss": 17.2769, "step": 378990 }, { "epoch": 0.7656039787166135, "grad_norm": 781.8789672851562, "learning_rate": 1.680368936738792e-06, "loss": 12.3257, "step": 379000 }, { "epoch": 0.7656241793492973, "grad_norm": 924.9375610351562, "learning_rate": 1.680107914052358e-06, "loss": 24.7111, "step": 379010 }, { "epoch": 0.765644379981981, "grad_norm": 133.7320098876953, "learning_rate": 1.6798469075466734e-06, "loss": 12.1958, "step": 379020 }, { "epoch": 0.7656645806146648, "grad_norm": 1912.49951171875, "learning_rate": 1.6795859172230139e-06, "loss": 40.6826, "step": 379030 }, { "epoch": 0.7656847812473486, "grad_norm": 36.367393493652344, "learning_rate": 1.6793249430826502e-06, "loss": 22.006, "step": 379040 }, { "epoch": 0.7657049818800324, "grad_norm": 163.1468963623047, "learning_rate": 1.679063985126852e-06, "loss": 15.4548, "step": 379050 }, { "epoch": 0.7657251825127163, "grad_norm": 444.4681396484375, "learning_rate": 1.6788030433568953e-06, "loss": 10.0607, "step": 379060 }, { "epoch": 0.7657453831454001, "grad_norm": 319.3157653808594, "learning_rate": 1.678542117774049e-06, "loss": 14.2482, "step": 379070 }, { "epoch": 0.7657655837780839, "grad_norm": 244.03370666503906, "learning_rate": 1.6782812083795846e-06, "loss": 34.7048, "step": 379080 }, { "epoch": 0.7657857844107677, "grad_norm": 257.67388916015625, "learning_rate": 1.6780203151747742e-06, "loss": 8.6153, "step": 379090 }, { "epoch": 0.7658059850434515, "grad_norm": 389.9267883300781, "learning_rate": 1.6777594381608936e-06, "loss": 26.8459, "step": 379100 }, { "epoch": 0.7658261856761354, "grad_norm": 391.3517150878906, "learning_rate": 1.6774985773392071e-06, "loss": 23.6509, "step": 379110 }, { "epoch": 0.7658463863088192, "grad_norm": 363.4808349609375, "learning_rate": 1.6772377327109896e-06, "loss": 11.718, "step": 379120 }, { "epoch": 0.765866586941503, "grad_norm": 535.1600341796875, "learning_rate": 1.6769769042775141e-06, "loss": 20.8023, "step": 379130 }, { "epoch": 0.7658867875741868, "grad_norm": 458.2337646484375, "learning_rate": 1.67671609204005e-06, "loss": 25.0361, "step": 379140 }, { "epoch": 0.7659069882068706, "grad_norm": 545.01904296875, "learning_rate": 1.676455295999867e-06, "loss": 15.2905, "step": 379150 }, { "epoch": 0.7659271888395545, "grad_norm": 502.02215576171875, "learning_rate": 1.6761945161582382e-06, "loss": 22.2201, "step": 379160 }, { "epoch": 0.7659473894722383, "grad_norm": 293.74493408203125, "learning_rate": 1.675933752516437e-06, "loss": 24.9691, "step": 379170 }, { "epoch": 0.7659675901049221, "grad_norm": 410.0480041503906, "learning_rate": 1.675673005075728e-06, "loss": 18.7837, "step": 379180 }, { "epoch": 0.7659877907376059, "grad_norm": 381.4063720703125, "learning_rate": 1.6754122738373863e-06, "loss": 31.3284, "step": 379190 }, { "epoch": 0.7660079913702897, "grad_norm": 576.6478881835938, "learning_rate": 1.6751515588026828e-06, "loss": 17.7734, "step": 379200 }, { "epoch": 0.7660281920029736, "grad_norm": 307.06597900390625, "learning_rate": 1.674890859972888e-06, "loss": 16.9801, "step": 379210 }, { "epoch": 0.7660483926356574, "grad_norm": 795.3487548828125, "learning_rate": 1.6746301773492701e-06, "loss": 23.2835, "step": 379220 }, { "epoch": 0.7660685932683412, "grad_norm": 300.02984619140625, "learning_rate": 1.6743695109331027e-06, "loss": 12.2691, "step": 379230 }, { "epoch": 0.766088793901025, "grad_norm": 522.2974243164062, "learning_rate": 1.6741088607256556e-06, "loss": 25.5154, "step": 379240 }, { "epoch": 0.7661089945337088, "grad_norm": 554.88232421875, "learning_rate": 1.6738482267281963e-06, "loss": 22.2439, "step": 379250 }, { "epoch": 0.7661291951663927, "grad_norm": 266.3728332519531, "learning_rate": 1.6735876089419973e-06, "loss": 16.9184, "step": 379260 }, { "epoch": 0.7661493957990765, "grad_norm": 210.74923706054688, "learning_rate": 1.6733270073683305e-06, "loss": 14.001, "step": 379270 }, { "epoch": 0.7661695964317602, "grad_norm": 94.46199798583984, "learning_rate": 1.6730664220084647e-06, "loss": 16.1184, "step": 379280 }, { "epoch": 0.766189797064444, "grad_norm": 274.54150390625, "learning_rate": 1.6728058528636682e-06, "loss": 15.9896, "step": 379290 }, { "epoch": 0.7662099976971278, "grad_norm": 341.462646484375, "learning_rate": 1.6725452999352137e-06, "loss": 35.1971, "step": 379300 }, { "epoch": 0.7662301983298117, "grad_norm": 339.0533142089844, "learning_rate": 1.6722847632243699e-06, "loss": 11.7799, "step": 379310 }, { "epoch": 0.7662503989624955, "grad_norm": 208.22216796875, "learning_rate": 1.6720242427324045e-06, "loss": 14.5783, "step": 379320 }, { "epoch": 0.7662705995951793, "grad_norm": 232.9193572998047, "learning_rate": 1.6717637384605917e-06, "loss": 11.5119, "step": 379330 }, { "epoch": 0.7662908002278631, "grad_norm": 208.89015197753906, "learning_rate": 1.6715032504101968e-06, "loss": 10.4471, "step": 379340 }, { "epoch": 0.7663110008605469, "grad_norm": 184.51535034179688, "learning_rate": 1.671242778582493e-06, "loss": 22.6559, "step": 379350 }, { "epoch": 0.7663312014932308, "grad_norm": 313.58050537109375, "learning_rate": 1.670982322978748e-06, "loss": 9.7866, "step": 379360 }, { "epoch": 0.7663514021259146, "grad_norm": 938.3388671875, "learning_rate": 1.6707218836002298e-06, "loss": 19.2327, "step": 379370 }, { "epoch": 0.7663716027585984, "grad_norm": 487.086181640625, "learning_rate": 1.6704614604482112e-06, "loss": 18.2963, "step": 379380 }, { "epoch": 0.7663918033912822, "grad_norm": 617.3723754882812, "learning_rate": 1.67020105352396e-06, "loss": 23.3614, "step": 379390 }, { "epoch": 0.766412004023966, "grad_norm": 363.2884826660156, "learning_rate": 1.6699406628287423e-06, "loss": 15.7168, "step": 379400 }, { "epoch": 0.7664322046566499, "grad_norm": 303.956298828125, "learning_rate": 1.6696802883638309e-06, "loss": 23.3308, "step": 379410 }, { "epoch": 0.7664524052893337, "grad_norm": 553.9864501953125, "learning_rate": 1.6694199301304947e-06, "loss": 12.3935, "step": 379420 }, { "epoch": 0.7664726059220175, "grad_norm": 535.25537109375, "learning_rate": 1.6691595881300026e-06, "loss": 18.6105, "step": 379430 }, { "epoch": 0.7664928065547013, "grad_norm": 86.4391098022461, "learning_rate": 1.6688992623636201e-06, "loss": 25.8113, "step": 379440 }, { "epoch": 0.7665130071873851, "grad_norm": 404.76690673828125, "learning_rate": 1.6686389528326214e-06, "loss": 16.7689, "step": 379450 }, { "epoch": 0.766533207820069, "grad_norm": 226.09596252441406, "learning_rate": 1.6683786595382716e-06, "loss": 15.676, "step": 379460 }, { "epoch": 0.7665534084527528, "grad_norm": 346.31243896484375, "learning_rate": 1.6681183824818387e-06, "loss": 8.5206, "step": 379470 }, { "epoch": 0.7665736090854366, "grad_norm": 447.7514343261719, "learning_rate": 1.6678581216645928e-06, "loss": 17.0264, "step": 379480 }, { "epoch": 0.7665938097181204, "grad_norm": 532.4463500976562, "learning_rate": 1.6675978770878055e-06, "loss": 19.6064, "step": 379490 }, { "epoch": 0.7666140103508042, "grad_norm": 216.76478576660156, "learning_rate": 1.6673376487527382e-06, "loss": 19.8939, "step": 379500 }, { "epoch": 0.766634210983488, "grad_norm": 447.941162109375, "learning_rate": 1.6670774366606635e-06, "loss": 12.3378, "step": 379510 }, { "epoch": 0.7666544116161719, "grad_norm": 737.269775390625, "learning_rate": 1.6668172408128509e-06, "loss": 18.2964, "step": 379520 }, { "epoch": 0.7666746122488556, "grad_norm": 260.05401611328125, "learning_rate": 1.6665570612105663e-06, "loss": 9.79, "step": 379530 }, { "epoch": 0.7666948128815394, "grad_norm": 428.9274597167969, "learning_rate": 1.666296897855077e-06, "loss": 16.6628, "step": 379540 }, { "epoch": 0.7667150135142232, "grad_norm": 343.9048767089844, "learning_rate": 1.6660367507476539e-06, "loss": 19.8294, "step": 379550 }, { "epoch": 0.766735214146907, "grad_norm": 892.6444702148438, "learning_rate": 1.665776619889562e-06, "loss": 28.0487, "step": 379560 }, { "epoch": 0.7667554147795909, "grad_norm": 508.3505554199219, "learning_rate": 1.6655165052820715e-06, "loss": 9.3646, "step": 379570 }, { "epoch": 0.7667756154122747, "grad_norm": 148.50218200683594, "learning_rate": 1.6652564069264476e-06, "loss": 9.8464, "step": 379580 }, { "epoch": 0.7667958160449585, "grad_norm": 116.51488494873047, "learning_rate": 1.6649963248239614e-06, "loss": 26.0876, "step": 379590 }, { "epoch": 0.7668160166776423, "grad_norm": 708.1141357421875, "learning_rate": 1.6647362589758787e-06, "loss": 28.6497, "step": 379600 }, { "epoch": 0.7668362173103261, "grad_norm": 371.03057861328125, "learning_rate": 1.6644762093834648e-06, "loss": 11.6496, "step": 379610 }, { "epoch": 0.76685641794301, "grad_norm": 274.0343017578125, "learning_rate": 1.6642161760479913e-06, "loss": 14.4446, "step": 379620 }, { "epoch": 0.7668766185756938, "grad_norm": 322.36651611328125, "learning_rate": 1.663956158970722e-06, "loss": 16.6299, "step": 379630 }, { "epoch": 0.7668968192083776, "grad_norm": 183.26901245117188, "learning_rate": 1.6636961581529277e-06, "loss": 17.6881, "step": 379640 }, { "epoch": 0.7669170198410614, "grad_norm": 788.52392578125, "learning_rate": 1.6634361735958731e-06, "loss": 17.3198, "step": 379650 }, { "epoch": 0.7669372204737452, "grad_norm": 446.07098388671875, "learning_rate": 1.6631762053008244e-06, "loss": 15.1415, "step": 379660 }, { "epoch": 0.7669574211064291, "grad_norm": 570.2052001953125, "learning_rate": 1.6629162532690517e-06, "loss": 32.3281, "step": 379670 }, { "epoch": 0.7669776217391129, "grad_norm": 355.68701171875, "learning_rate": 1.6626563175018207e-06, "loss": 10.5786, "step": 379680 }, { "epoch": 0.7669978223717967, "grad_norm": 441.5621032714844, "learning_rate": 1.6623963980003966e-06, "loss": 16.6183, "step": 379690 }, { "epoch": 0.7670180230044805, "grad_norm": 197.07032775878906, "learning_rate": 1.6621364947660472e-06, "loss": 12.0965, "step": 379700 }, { "epoch": 0.7670382236371643, "grad_norm": 339.8711242675781, "learning_rate": 1.661876607800043e-06, "loss": 39.5871, "step": 379710 }, { "epoch": 0.7670584242698482, "grad_norm": 241.8334197998047, "learning_rate": 1.6616167371036446e-06, "loss": 15.9962, "step": 379720 }, { "epoch": 0.767078624902532, "grad_norm": 411.3114013671875, "learning_rate": 1.6613568826781208e-06, "loss": 22.2917, "step": 379730 }, { "epoch": 0.7670988255352158, "grad_norm": 161.6168975830078, "learning_rate": 1.6610970445247404e-06, "loss": 27.7665, "step": 379740 }, { "epoch": 0.7671190261678996, "grad_norm": 987.0467529296875, "learning_rate": 1.6608372226447678e-06, "loss": 20.4386, "step": 379750 }, { "epoch": 0.7671392268005834, "grad_norm": 280.5501708984375, "learning_rate": 1.6605774170394683e-06, "loss": 24.2737, "step": 379760 }, { "epoch": 0.7671594274332673, "grad_norm": 765.9490966796875, "learning_rate": 1.6603176277101095e-06, "loss": 24.2523, "step": 379770 }, { "epoch": 0.7671796280659511, "grad_norm": 178.56707763671875, "learning_rate": 1.6600578546579604e-06, "loss": 19.8372, "step": 379780 }, { "epoch": 0.7671998286986348, "grad_norm": 148.49171447753906, "learning_rate": 1.6597980978842814e-06, "loss": 23.115, "step": 379790 }, { "epoch": 0.7672200293313186, "grad_norm": 148.5966033935547, "learning_rate": 1.6595383573903412e-06, "loss": 18.8011, "step": 379800 }, { "epoch": 0.7672402299640024, "grad_norm": 203.92120361328125, "learning_rate": 1.6592786331774075e-06, "loss": 17.3271, "step": 379810 }, { "epoch": 0.7672604305966862, "grad_norm": 636.1489868164062, "learning_rate": 1.6590189252467448e-06, "loss": 9.946, "step": 379820 }, { "epoch": 0.7672806312293701, "grad_norm": 320.523193359375, "learning_rate": 1.6587592335996167e-06, "loss": 14.2966, "step": 379830 }, { "epoch": 0.7673008318620539, "grad_norm": 268.8244934082031, "learning_rate": 1.6584995582372926e-06, "loss": 24.7064, "step": 379840 }, { "epoch": 0.7673210324947377, "grad_norm": 533.8139038085938, "learning_rate": 1.658239899161036e-06, "loss": 10.481, "step": 379850 }, { "epoch": 0.7673412331274215, "grad_norm": 655.6635131835938, "learning_rate": 1.6579802563721115e-06, "loss": 16.7972, "step": 379860 }, { "epoch": 0.7673614337601053, "grad_norm": 763.1399536132812, "learning_rate": 1.6577206298717862e-06, "loss": 31.9017, "step": 379870 }, { "epoch": 0.7673816343927892, "grad_norm": 176.9679718017578, "learning_rate": 1.657461019661326e-06, "loss": 24.1936, "step": 379880 }, { "epoch": 0.767401835025473, "grad_norm": 108.568115234375, "learning_rate": 1.657201425741996e-06, "loss": 12.1127, "step": 379890 }, { "epoch": 0.7674220356581568, "grad_norm": 214.27066040039062, "learning_rate": 1.6569418481150596e-06, "loss": 14.8013, "step": 379900 }, { "epoch": 0.7674422362908406, "grad_norm": 370.8332824707031, "learning_rate": 1.656682286781784e-06, "loss": 33.4163, "step": 379910 }, { "epoch": 0.7674624369235244, "grad_norm": 223.8822021484375, "learning_rate": 1.6564227417434336e-06, "loss": 16.8652, "step": 379920 }, { "epoch": 0.7674826375562083, "grad_norm": 254.4141082763672, "learning_rate": 1.6561632130012716e-06, "loss": 15.5237, "step": 379930 }, { "epoch": 0.7675028381888921, "grad_norm": 177.2603302001953, "learning_rate": 1.6559037005565665e-06, "loss": 17.4667, "step": 379940 }, { "epoch": 0.7675230388215759, "grad_norm": 701.9732666015625, "learning_rate": 1.6556442044105797e-06, "loss": 18.3868, "step": 379950 }, { "epoch": 0.7675432394542597, "grad_norm": 306.0867614746094, "learning_rate": 1.6553847245645787e-06, "loss": 19.5346, "step": 379960 }, { "epoch": 0.7675634400869435, "grad_norm": 286.0669250488281, "learning_rate": 1.6551252610198266e-06, "loss": 18.5203, "step": 379970 }, { "epoch": 0.7675836407196274, "grad_norm": 222.2674102783203, "learning_rate": 1.6548658137775868e-06, "loss": 22.9019, "step": 379980 }, { "epoch": 0.7676038413523112, "grad_norm": 333.3114013671875, "learning_rate": 1.6546063828391272e-06, "loss": 22.5526, "step": 379990 }, { "epoch": 0.767624041984995, "grad_norm": 362.8835144042969, "learning_rate": 1.6543469682057105e-06, "loss": 11.9528, "step": 380000 }, { "epoch": 0.7676442426176788, "grad_norm": 427.2572326660156, "learning_rate": 1.654087569878599e-06, "loss": 21.9404, "step": 380010 }, { "epoch": 0.7676644432503626, "grad_norm": 424.8553466796875, "learning_rate": 1.653828187859059e-06, "loss": 12.2714, "step": 380020 }, { "epoch": 0.7676846438830465, "grad_norm": 209.9966278076172, "learning_rate": 1.6535688221483558e-06, "loss": 28.7189, "step": 380030 }, { "epoch": 0.7677048445157302, "grad_norm": 169.88417053222656, "learning_rate": 1.6533094727477528e-06, "loss": 12.2698, "step": 380040 }, { "epoch": 0.767725045148414, "grad_norm": 308.2015075683594, "learning_rate": 1.653050139658512e-06, "loss": 35.0971, "step": 380050 }, { "epoch": 0.7677452457810978, "grad_norm": 8.098689079284668, "learning_rate": 1.6527908228819e-06, "loss": 10.1755, "step": 380060 }, { "epoch": 0.7677654464137816, "grad_norm": 228.56390380859375, "learning_rate": 1.6525315224191795e-06, "loss": 16.6736, "step": 380070 }, { "epoch": 0.7677856470464655, "grad_norm": 865.73388671875, "learning_rate": 1.6522722382716133e-06, "loss": 10.5984, "step": 380080 }, { "epoch": 0.7678058476791493, "grad_norm": 351.1080322265625, "learning_rate": 1.6520129704404659e-06, "loss": 23.2793, "step": 380090 }, { "epoch": 0.7678260483118331, "grad_norm": 366.80743408203125, "learning_rate": 1.6517537189270043e-06, "loss": 19.1603, "step": 380100 }, { "epoch": 0.7678462489445169, "grad_norm": 536.4520874023438, "learning_rate": 1.651494483732486e-06, "loss": 27.78, "step": 380110 }, { "epoch": 0.7678664495772007, "grad_norm": 366.5112609863281, "learning_rate": 1.651235264858177e-06, "loss": 16.3296, "step": 380120 }, { "epoch": 0.7678866502098846, "grad_norm": 89.81643676757812, "learning_rate": 1.6509760623053435e-06, "loss": 20.7632, "step": 380130 }, { "epoch": 0.7679068508425684, "grad_norm": 779.8565063476562, "learning_rate": 1.6507168760752457e-06, "loss": 33.6351, "step": 380140 }, { "epoch": 0.7679270514752522, "grad_norm": 1.249252438545227, "learning_rate": 1.6504577061691468e-06, "loss": 11.8843, "step": 380150 }, { "epoch": 0.767947252107936, "grad_norm": 348.9931640625, "learning_rate": 1.650198552588312e-06, "loss": 20.5949, "step": 380160 }, { "epoch": 0.7679674527406198, "grad_norm": 340.8260803222656, "learning_rate": 1.649939415334003e-06, "loss": 6.7435, "step": 380170 }, { "epoch": 0.7679876533733037, "grad_norm": 313.790771484375, "learning_rate": 1.6496802944074814e-06, "loss": 19.5737, "step": 380180 }, { "epoch": 0.7680078540059875, "grad_norm": 6.209090709686279, "learning_rate": 1.649421189810012e-06, "loss": 19.2419, "step": 380190 }, { "epoch": 0.7680280546386713, "grad_norm": 490.5793151855469, "learning_rate": 1.6491621015428588e-06, "loss": 17.369, "step": 380200 }, { "epoch": 0.7680482552713551, "grad_norm": 500.19744873046875, "learning_rate": 1.6489030296072833e-06, "loss": 12.8155, "step": 380210 }, { "epoch": 0.7680684559040389, "grad_norm": 331.2500305175781, "learning_rate": 1.6486439740045463e-06, "loss": 20.4228, "step": 380220 }, { "epoch": 0.7680886565367228, "grad_norm": 141.11647033691406, "learning_rate": 1.6483849347359132e-06, "loss": 17.5376, "step": 380230 }, { "epoch": 0.7681088571694066, "grad_norm": 797.0460815429688, "learning_rate": 1.6481259118026444e-06, "loss": 21.028, "step": 380240 }, { "epoch": 0.7681290578020904, "grad_norm": 476.4696960449219, "learning_rate": 1.6478669052060048e-06, "loss": 16.271, "step": 380250 }, { "epoch": 0.7681492584347742, "grad_norm": 290.9534606933594, "learning_rate": 1.6476079149472552e-06, "loss": 12.3833, "step": 380260 }, { "epoch": 0.768169459067458, "grad_norm": 324.8664245605469, "learning_rate": 1.6473489410276565e-06, "loss": 30.8366, "step": 380270 }, { "epoch": 0.7681896597001419, "grad_norm": 280.6238098144531, "learning_rate": 1.6470899834484744e-06, "loss": 9.0797, "step": 380280 }, { "epoch": 0.7682098603328257, "grad_norm": 107.61328887939453, "learning_rate": 1.6468310422109684e-06, "loss": 8.9617, "step": 380290 }, { "epoch": 0.7682300609655094, "grad_norm": 29.13918113708496, "learning_rate": 1.6465721173164e-06, "loss": 20.003, "step": 380300 }, { "epoch": 0.7682502615981932, "grad_norm": 337.44024658203125, "learning_rate": 1.6463132087660327e-06, "loss": 9.134, "step": 380310 }, { "epoch": 0.768270462230877, "grad_norm": 230.2394256591797, "learning_rate": 1.6460543165611291e-06, "loss": 15.5104, "step": 380320 }, { "epoch": 0.7682906628635608, "grad_norm": 584.089111328125, "learning_rate": 1.6457954407029503e-06, "loss": 19.6054, "step": 380330 }, { "epoch": 0.7683108634962447, "grad_norm": 259.95806884765625, "learning_rate": 1.6455365811927559e-06, "loss": 8.7815, "step": 380340 }, { "epoch": 0.7683310641289285, "grad_norm": 358.2709045410156, "learning_rate": 1.6452777380318112e-06, "loss": 16.9212, "step": 380350 }, { "epoch": 0.7683512647616123, "grad_norm": 525.129638671875, "learning_rate": 1.645018911221376e-06, "loss": 17.4771, "step": 380360 }, { "epoch": 0.7683714653942961, "grad_norm": 358.9599914550781, "learning_rate": 1.6447601007627096e-06, "loss": 14.5307, "step": 380370 }, { "epoch": 0.76839166602698, "grad_norm": 914.122802734375, "learning_rate": 1.6445013066570758e-06, "loss": 12.692, "step": 380380 }, { "epoch": 0.7684118666596638, "grad_norm": 220.31948852539062, "learning_rate": 1.644242528905739e-06, "loss": 8.3485, "step": 380390 }, { "epoch": 0.7684320672923476, "grad_norm": 612.853515625, "learning_rate": 1.643983767509954e-06, "loss": 10.4811, "step": 380400 }, { "epoch": 0.7684522679250314, "grad_norm": 105.07130432128906, "learning_rate": 1.6437250224709844e-06, "loss": 12.8862, "step": 380410 }, { "epoch": 0.7684724685577152, "grad_norm": 395.0177307128906, "learning_rate": 1.6434662937900942e-06, "loss": 17.6527, "step": 380420 }, { "epoch": 0.768492669190399, "grad_norm": 437.62164306640625, "learning_rate": 1.6432075814685423e-06, "loss": 24.6856, "step": 380430 }, { "epoch": 0.7685128698230829, "grad_norm": 233.72462463378906, "learning_rate": 1.6429488855075876e-06, "loss": 18.4488, "step": 380440 }, { "epoch": 0.7685330704557667, "grad_norm": 281.45330810546875, "learning_rate": 1.6426902059084942e-06, "loss": 16.5913, "step": 380450 }, { "epoch": 0.7685532710884505, "grad_norm": 188.50828552246094, "learning_rate": 1.6424315426725224e-06, "loss": 12.7214, "step": 380460 }, { "epoch": 0.7685734717211343, "grad_norm": 336.333740234375, "learning_rate": 1.6421728958009298e-06, "loss": 22.2692, "step": 380470 }, { "epoch": 0.7685936723538181, "grad_norm": 510.2742919921875, "learning_rate": 1.6419142652949793e-06, "loss": 18.998, "step": 380480 }, { "epoch": 0.768613872986502, "grad_norm": 558.2223510742188, "learning_rate": 1.6416556511559329e-06, "loss": 10.7758, "step": 380490 }, { "epoch": 0.7686340736191858, "grad_norm": 375.12060546875, "learning_rate": 1.6413970533850498e-06, "loss": 14.4149, "step": 380500 }, { "epoch": 0.7686542742518696, "grad_norm": 0.0005101134884171188, "learning_rate": 1.6411384719835882e-06, "loss": 12.5642, "step": 380510 }, { "epoch": 0.7686744748845534, "grad_norm": 486.0578918457031, "learning_rate": 1.6408799069528119e-06, "loss": 16.8116, "step": 380520 }, { "epoch": 0.7686946755172372, "grad_norm": 0.0, "learning_rate": 1.6406213582939801e-06, "loss": 12.1538, "step": 380530 }, { "epoch": 0.7687148761499211, "grad_norm": 258.78338623046875, "learning_rate": 1.6403628260083503e-06, "loss": 19.9973, "step": 380540 }, { "epoch": 0.7687350767826048, "grad_norm": 405.3760070800781, "learning_rate": 1.6401043100971864e-06, "loss": 16.9124, "step": 380550 }, { "epoch": 0.7687552774152886, "grad_norm": 446.0802917480469, "learning_rate": 1.639845810561745e-06, "loss": 12.1335, "step": 380560 }, { "epoch": 0.7687754780479724, "grad_norm": 613.1295776367188, "learning_rate": 1.6395873274032887e-06, "loss": 12.5113, "step": 380570 }, { "epoch": 0.7687956786806562, "grad_norm": 649.5859375, "learning_rate": 1.6393288606230768e-06, "loss": 29.3049, "step": 380580 }, { "epoch": 0.7688158793133401, "grad_norm": 1147.98486328125, "learning_rate": 1.6390704102223664e-06, "loss": 31.9163, "step": 380590 }, { "epoch": 0.7688360799460239, "grad_norm": 648.0134887695312, "learning_rate": 1.6388119762024213e-06, "loss": 20.687, "step": 380600 }, { "epoch": 0.7688562805787077, "grad_norm": 368.980224609375, "learning_rate": 1.6385535585644985e-06, "loss": 14.665, "step": 380610 }, { "epoch": 0.7688764812113915, "grad_norm": 139.56597900390625, "learning_rate": 1.6382951573098565e-06, "loss": 23.6349, "step": 380620 }, { "epoch": 0.7688966818440753, "grad_norm": 420.3999938964844, "learning_rate": 1.638036772439756e-06, "loss": 15.8837, "step": 380630 }, { "epoch": 0.7689168824767592, "grad_norm": 353.9872131347656, "learning_rate": 1.6377784039554584e-06, "loss": 20.016, "step": 380640 }, { "epoch": 0.768937083109443, "grad_norm": 84.67881774902344, "learning_rate": 1.6375200518582208e-06, "loss": 24.0127, "step": 380650 }, { "epoch": 0.7689572837421268, "grad_norm": 771.5448608398438, "learning_rate": 1.6372617161493014e-06, "loss": 15.1039, "step": 380660 }, { "epoch": 0.7689774843748106, "grad_norm": 893.9058837890625, "learning_rate": 1.637003396829962e-06, "loss": 27.4231, "step": 380670 }, { "epoch": 0.7689976850074944, "grad_norm": 719.2028198242188, "learning_rate": 1.63674509390146e-06, "loss": 34.8571, "step": 380680 }, { "epoch": 0.7690178856401783, "grad_norm": 360.4425964355469, "learning_rate": 1.636486807365053e-06, "loss": 25.9847, "step": 380690 }, { "epoch": 0.7690380862728621, "grad_norm": 483.6988220214844, "learning_rate": 1.6362285372220016e-06, "loss": 29.8262, "step": 380700 }, { "epoch": 0.7690582869055459, "grad_norm": 199.2152099609375, "learning_rate": 1.635970283473567e-06, "loss": 16.3838, "step": 380710 }, { "epoch": 0.7690784875382297, "grad_norm": 431.31427001953125, "learning_rate": 1.6357120461210024e-06, "loss": 21.6255, "step": 380720 }, { "epoch": 0.7690986881709135, "grad_norm": 322.01214599609375, "learning_rate": 1.6354538251655695e-06, "loss": 15.2622, "step": 380730 }, { "epoch": 0.7691188888035974, "grad_norm": 432.5312194824219, "learning_rate": 1.6351956206085273e-06, "loss": 19.7821, "step": 380740 }, { "epoch": 0.7691390894362812, "grad_norm": 98.79090881347656, "learning_rate": 1.6349374324511347e-06, "loss": 16.8088, "step": 380750 }, { "epoch": 0.769159290068965, "grad_norm": 501.28057861328125, "learning_rate": 1.6346792606946466e-06, "loss": 21.1545, "step": 380760 }, { "epoch": 0.7691794907016488, "grad_norm": 439.8757629394531, "learning_rate": 1.6344211053403253e-06, "loss": 20.0453, "step": 380770 }, { "epoch": 0.7691996913343326, "grad_norm": 334.0589599609375, "learning_rate": 1.634162966389427e-06, "loss": 12.4049, "step": 380780 }, { "epoch": 0.7692198919670165, "grad_norm": 742.2249755859375, "learning_rate": 1.6339048438432093e-06, "loss": 22.8836, "step": 380790 }, { "epoch": 0.7692400925997003, "grad_norm": 34.85905838012695, "learning_rate": 1.6336467377029308e-06, "loss": 12.9863, "step": 380800 }, { "epoch": 0.769260293232384, "grad_norm": 215.7017822265625, "learning_rate": 1.6333886479698514e-06, "loss": 13.1861, "step": 380810 }, { "epoch": 0.7692804938650678, "grad_norm": 849.7139892578125, "learning_rate": 1.6331305746452276e-06, "loss": 15.6368, "step": 380820 }, { "epoch": 0.7693006944977516, "grad_norm": 118.31912231445312, "learning_rate": 1.632872517730315e-06, "loss": 16.8557, "step": 380830 }, { "epoch": 0.7693208951304354, "grad_norm": 290.2739562988281, "learning_rate": 1.6326144772263752e-06, "loss": 22.0026, "step": 380840 }, { "epoch": 0.7693410957631193, "grad_norm": 5.083776950836182, "learning_rate": 1.6323564531346642e-06, "loss": 9.5606, "step": 380850 }, { "epoch": 0.7693612963958031, "grad_norm": 138.90684509277344, "learning_rate": 1.6320984454564377e-06, "loss": 21.7563, "step": 380860 }, { "epoch": 0.7693814970284869, "grad_norm": 304.3311767578125, "learning_rate": 1.6318404541929562e-06, "loss": 22.3836, "step": 380870 }, { "epoch": 0.7694016976611707, "grad_norm": 664.1461791992188, "learning_rate": 1.6315824793454743e-06, "loss": 23.9397, "step": 380880 }, { "epoch": 0.7694218982938545, "grad_norm": 3.81365704536438, "learning_rate": 1.631324520915253e-06, "loss": 5.6078, "step": 380890 }, { "epoch": 0.7694420989265384, "grad_norm": 230.58444213867188, "learning_rate": 1.6310665789035468e-06, "loss": 12.9682, "step": 380900 }, { "epoch": 0.7694622995592222, "grad_norm": 395.6536560058594, "learning_rate": 1.6308086533116125e-06, "loss": 21.0332, "step": 380910 }, { "epoch": 0.769482500191906, "grad_norm": 605.6498413085938, "learning_rate": 1.6305507441407076e-06, "loss": 34.0215, "step": 380920 }, { "epoch": 0.7695027008245898, "grad_norm": 110.5271987915039, "learning_rate": 1.6302928513920912e-06, "loss": 17.813, "step": 380930 }, { "epoch": 0.7695229014572736, "grad_norm": 1174.549560546875, "learning_rate": 1.6300349750670191e-06, "loss": 13.342, "step": 380940 }, { "epoch": 0.7695431020899575, "grad_norm": 299.632568359375, "learning_rate": 1.6297771151667462e-06, "loss": 20.369, "step": 380950 }, { "epoch": 0.7695633027226413, "grad_norm": 354.7998962402344, "learning_rate": 1.6295192716925324e-06, "loss": 17.0283, "step": 380960 }, { "epoch": 0.7695835033553251, "grad_norm": 389.1010437011719, "learning_rate": 1.6292614446456328e-06, "loss": 13.1237, "step": 380970 }, { "epoch": 0.7696037039880089, "grad_norm": 343.6322021484375, "learning_rate": 1.629003634027303e-06, "loss": 12.6781, "step": 380980 }, { "epoch": 0.7696239046206927, "grad_norm": 459.3533630371094, "learning_rate": 1.6287458398388001e-06, "loss": 13.2048, "step": 380990 }, { "epoch": 0.7696441052533766, "grad_norm": 340.8312072753906, "learning_rate": 1.6284880620813847e-06, "loss": 21.0156, "step": 381000 }, { "epoch": 0.7696643058860604, "grad_norm": 93.18062591552734, "learning_rate": 1.6282303007563061e-06, "loss": 16.2557, "step": 381010 }, { "epoch": 0.7696845065187442, "grad_norm": 328.4632568359375, "learning_rate": 1.627972555864824e-06, "loss": 11.7841, "step": 381020 }, { "epoch": 0.769704707151428, "grad_norm": 454.50469970703125, "learning_rate": 1.6277148274081962e-06, "loss": 29.7739, "step": 381030 }, { "epoch": 0.7697249077841118, "grad_norm": 40.51005172729492, "learning_rate": 1.6274571153876777e-06, "loss": 15.5865, "step": 381040 }, { "epoch": 0.7697451084167957, "grad_norm": 316.1906433105469, "learning_rate": 1.627199419804522e-06, "loss": 11.8209, "step": 381050 }, { "epoch": 0.7697653090494795, "grad_norm": 547.84765625, "learning_rate": 1.6269417406599897e-06, "loss": 17.5443, "step": 381060 }, { "epoch": 0.7697855096821632, "grad_norm": 252.12637329101562, "learning_rate": 1.626684077955334e-06, "loss": 13.6713, "step": 381070 }, { "epoch": 0.769805710314847, "grad_norm": 535.2305908203125, "learning_rate": 1.6264264316918087e-06, "loss": 21.1895, "step": 381080 }, { "epoch": 0.7698259109475308, "grad_norm": 164.35350036621094, "learning_rate": 1.6261688018706724e-06, "loss": 8.5325, "step": 381090 }, { "epoch": 0.7698461115802147, "grad_norm": 255.1110076904297, "learning_rate": 1.6259111884931817e-06, "loss": 18.8768, "step": 381100 }, { "epoch": 0.7698663122128985, "grad_norm": 0.0, "learning_rate": 1.6256535915605904e-06, "loss": 9.0382, "step": 381110 }, { "epoch": 0.7698865128455823, "grad_norm": 249.6778106689453, "learning_rate": 1.6253960110741528e-06, "loss": 17.7898, "step": 381120 }, { "epoch": 0.7699067134782661, "grad_norm": 361.3498229980469, "learning_rate": 1.6251384470351272e-06, "loss": 21.6081, "step": 381130 }, { "epoch": 0.7699269141109499, "grad_norm": 233.3412322998047, "learning_rate": 1.6248808994447678e-06, "loss": 17.5962, "step": 381140 }, { "epoch": 0.7699471147436338, "grad_norm": 228.55316162109375, "learning_rate": 1.6246233683043279e-06, "loss": 17.8167, "step": 381150 }, { "epoch": 0.7699673153763176, "grad_norm": 189.47850036621094, "learning_rate": 1.6243658536150657e-06, "loss": 15.8199, "step": 381160 }, { "epoch": 0.7699875160090014, "grad_norm": 747.3308715820312, "learning_rate": 1.6241083553782332e-06, "loss": 17.8983, "step": 381170 }, { "epoch": 0.7700077166416852, "grad_norm": 310.81427001953125, "learning_rate": 1.6238508735950892e-06, "loss": 19.3162, "step": 381180 }, { "epoch": 0.770027917274369, "grad_norm": 432.24334716796875, "learning_rate": 1.6235934082668863e-06, "loss": 15.3876, "step": 381190 }, { "epoch": 0.7700481179070529, "grad_norm": 334.8131103515625, "learning_rate": 1.6233359593948777e-06, "loss": 19.6963, "step": 381200 }, { "epoch": 0.7700683185397367, "grad_norm": 135.47760009765625, "learning_rate": 1.6230785269803223e-06, "loss": 11.7463, "step": 381210 }, { "epoch": 0.7700885191724205, "grad_norm": 184.69654846191406, "learning_rate": 1.6228211110244725e-06, "loss": 18.422, "step": 381220 }, { "epoch": 0.7701087198051043, "grad_norm": 190.71502685546875, "learning_rate": 1.6225637115285809e-06, "loss": 19.1578, "step": 381230 }, { "epoch": 0.7701289204377881, "grad_norm": 502.76531982421875, "learning_rate": 1.6223063284939045e-06, "loss": 24.0089, "step": 381240 }, { "epoch": 0.770149121070472, "grad_norm": 576.7030029296875, "learning_rate": 1.6220489619216988e-06, "loss": 39.1933, "step": 381250 }, { "epoch": 0.7701693217031558, "grad_norm": 834.5010986328125, "learning_rate": 1.621791611813217e-06, "loss": 19.1321, "step": 381260 }, { "epoch": 0.7701895223358396, "grad_norm": 410.09454345703125, "learning_rate": 1.6215342781697118e-06, "loss": 24.6805, "step": 381270 }, { "epoch": 0.7702097229685234, "grad_norm": 625.7750244140625, "learning_rate": 1.62127696099244e-06, "loss": 38.776, "step": 381280 }, { "epoch": 0.7702299236012072, "grad_norm": 405.533935546875, "learning_rate": 1.6210196602826544e-06, "loss": 13.8247, "step": 381290 }, { "epoch": 0.770250124233891, "grad_norm": 301.3874816894531, "learning_rate": 1.6207623760416074e-06, "loss": 24.191, "step": 381300 }, { "epoch": 0.7702703248665749, "grad_norm": 414.2416687011719, "learning_rate": 1.6205051082705553e-06, "loss": 23.5549, "step": 381310 }, { "epoch": 0.7702905254992586, "grad_norm": 305.6679992675781, "learning_rate": 1.620247856970754e-06, "loss": 14.1989, "step": 381320 }, { "epoch": 0.7703107261319424, "grad_norm": 180.5402374267578, "learning_rate": 1.6199906221434525e-06, "loss": 11.7204, "step": 381330 }, { "epoch": 0.7703309267646262, "grad_norm": 360.4750671386719, "learning_rate": 1.619733403789906e-06, "loss": 9.5748, "step": 381340 }, { "epoch": 0.77035112739731, "grad_norm": 525.697021484375, "learning_rate": 1.6194762019113703e-06, "loss": 14.7872, "step": 381350 }, { "epoch": 0.7703713280299939, "grad_norm": 315.2054748535156, "learning_rate": 1.6192190165090982e-06, "loss": 13.9068, "step": 381360 }, { "epoch": 0.7703915286626777, "grad_norm": 762.55419921875, "learning_rate": 1.6189618475843406e-06, "loss": 15.1304, "step": 381370 }, { "epoch": 0.7704117292953615, "grad_norm": 140.41734313964844, "learning_rate": 1.618704695138353e-06, "loss": 17.7757, "step": 381380 }, { "epoch": 0.7704319299280453, "grad_norm": 732.903076171875, "learning_rate": 1.618447559172392e-06, "loss": 23.0551, "step": 381390 }, { "epoch": 0.7704521305607291, "grad_norm": 18.876867294311523, "learning_rate": 1.6181904396877041e-06, "loss": 9.4914, "step": 381400 }, { "epoch": 0.770472331193413, "grad_norm": 252.22230529785156, "learning_rate": 1.6179333366855455e-06, "loss": 24.7016, "step": 381410 }, { "epoch": 0.7704925318260968, "grad_norm": 225.93679809570312, "learning_rate": 1.6176762501671717e-06, "loss": 17.5837, "step": 381420 }, { "epoch": 0.7705127324587806, "grad_norm": 394.24365234375, "learning_rate": 1.6174191801338334e-06, "loss": 13.2283, "step": 381430 }, { "epoch": 0.7705329330914644, "grad_norm": 106.54386138916016, "learning_rate": 1.617162126586782e-06, "loss": 17.2904, "step": 381440 }, { "epoch": 0.7705531337241482, "grad_norm": 46.939537048339844, "learning_rate": 1.6169050895272743e-06, "loss": 19.5087, "step": 381450 }, { "epoch": 0.7705733343568321, "grad_norm": 132.36495971679688, "learning_rate": 1.61664806895656e-06, "loss": 9.7853, "step": 381460 }, { "epoch": 0.7705935349895159, "grad_norm": 78.6220932006836, "learning_rate": 1.6163910648758919e-06, "loss": 18.6722, "step": 381470 }, { "epoch": 0.7706137356221997, "grad_norm": 182.31996154785156, "learning_rate": 1.6161340772865247e-06, "loss": 10.6614, "step": 381480 }, { "epoch": 0.7706339362548835, "grad_norm": 625.3024291992188, "learning_rate": 1.6158771061897077e-06, "loss": 17.6466, "step": 381490 }, { "epoch": 0.7706541368875673, "grad_norm": 312.5849609375, "learning_rate": 1.6156201515866971e-06, "loss": 16.7991, "step": 381500 }, { "epoch": 0.7706743375202512, "grad_norm": 249.2038116455078, "learning_rate": 1.6153632134787433e-06, "loss": 36.6219, "step": 381510 }, { "epoch": 0.770694538152935, "grad_norm": 327.1910095214844, "learning_rate": 1.6151062918670973e-06, "loss": 11.6358, "step": 381520 }, { "epoch": 0.7707147387856188, "grad_norm": 197.6422119140625, "learning_rate": 1.614849386753014e-06, "loss": 15.3233, "step": 381530 }, { "epoch": 0.7707349394183026, "grad_norm": 249.66806030273438, "learning_rate": 1.6145924981377424e-06, "loss": 7.9539, "step": 381540 }, { "epoch": 0.7707551400509864, "grad_norm": 342.3058166503906, "learning_rate": 1.6143356260225385e-06, "loss": 14.0132, "step": 381550 }, { "epoch": 0.7707753406836703, "grad_norm": 243.70208740234375, "learning_rate": 1.6140787704086502e-06, "loss": 18.7993, "step": 381560 }, { "epoch": 0.7707955413163541, "grad_norm": 20.913881301879883, "learning_rate": 1.6138219312973335e-06, "loss": 19.2215, "step": 381570 }, { "epoch": 0.7708157419490378, "grad_norm": 637.6466064453125, "learning_rate": 1.6135651086898373e-06, "loss": 21.3557, "step": 381580 }, { "epoch": 0.7708359425817216, "grad_norm": 280.07177734375, "learning_rate": 1.613308302587413e-06, "loss": 19.861, "step": 381590 }, { "epoch": 0.7708561432144054, "grad_norm": 442.05072021484375, "learning_rate": 1.6130515129913144e-06, "loss": 10.3194, "step": 381600 }, { "epoch": 0.7708763438470893, "grad_norm": 229.8391876220703, "learning_rate": 1.6127947399027922e-06, "loss": 11.2043, "step": 381610 }, { "epoch": 0.7708965444797731, "grad_norm": 403.2405700683594, "learning_rate": 1.612537983323096e-06, "loss": 22.6117, "step": 381620 }, { "epoch": 0.7709167451124569, "grad_norm": 362.3747863769531, "learning_rate": 1.6122812432534785e-06, "loss": 22.793, "step": 381630 }, { "epoch": 0.7709369457451407, "grad_norm": 135.65130615234375, "learning_rate": 1.6120245196951933e-06, "loss": 11.5768, "step": 381640 }, { "epoch": 0.7709571463778245, "grad_norm": 551.9156494140625, "learning_rate": 1.6117678126494895e-06, "loss": 19.9291, "step": 381650 }, { "epoch": 0.7709773470105084, "grad_norm": 398.4827575683594, "learning_rate": 1.6115111221176171e-06, "loss": 13.0153, "step": 381660 }, { "epoch": 0.7709975476431922, "grad_norm": 250.5609588623047, "learning_rate": 1.6112544481008296e-06, "loss": 22.9284, "step": 381670 }, { "epoch": 0.771017748275876, "grad_norm": 494.46868896484375, "learning_rate": 1.6109977906003777e-06, "loss": 22.3495, "step": 381680 }, { "epoch": 0.7710379489085598, "grad_norm": 163.58030700683594, "learning_rate": 1.610741149617509e-06, "loss": 9.6822, "step": 381690 }, { "epoch": 0.7710581495412436, "grad_norm": 190.7425537109375, "learning_rate": 1.6104845251534772e-06, "loss": 14.6259, "step": 381700 }, { "epoch": 0.7710783501739275, "grad_norm": 387.62591552734375, "learning_rate": 1.6102279172095348e-06, "loss": 20.8189, "step": 381710 }, { "epoch": 0.7710985508066113, "grad_norm": 175.18040466308594, "learning_rate": 1.6099713257869303e-06, "loss": 10.4614, "step": 381720 }, { "epoch": 0.7711187514392951, "grad_norm": 746.3145141601562, "learning_rate": 1.6097147508869127e-06, "loss": 19.7552, "step": 381730 }, { "epoch": 0.7711389520719789, "grad_norm": 204.37940979003906, "learning_rate": 1.6094581925107356e-06, "loss": 9.6557, "step": 381740 }, { "epoch": 0.7711591527046627, "grad_norm": 1955.2236328125, "learning_rate": 1.6092016506596481e-06, "loss": 21.758, "step": 381750 }, { "epoch": 0.7711793533373466, "grad_norm": 298.4073181152344, "learning_rate": 1.6089451253348987e-06, "loss": 16.5147, "step": 381760 }, { "epoch": 0.7711995539700304, "grad_norm": 454.9640808105469, "learning_rate": 1.6086886165377414e-06, "loss": 16.8713, "step": 381770 }, { "epoch": 0.7712197546027142, "grad_norm": 171.67300415039062, "learning_rate": 1.608432124269423e-06, "loss": 13.2809, "step": 381780 }, { "epoch": 0.771239955235398, "grad_norm": 196.25167846679688, "learning_rate": 1.608175648531196e-06, "loss": 16.3012, "step": 381790 }, { "epoch": 0.7712601558680818, "grad_norm": 339.40155029296875, "learning_rate": 1.6079191893243102e-06, "loss": 17.3038, "step": 381800 }, { "epoch": 0.7712803565007657, "grad_norm": 143.8892059326172, "learning_rate": 1.6076627466500127e-06, "loss": 17.3237, "step": 381810 }, { "epoch": 0.7713005571334495, "grad_norm": 389.0728454589844, "learning_rate": 1.6074063205095576e-06, "loss": 41.0029, "step": 381820 }, { "epoch": 0.7713207577661332, "grad_norm": 1105.4664306640625, "learning_rate": 1.607149910904191e-06, "loss": 30.0587, "step": 381830 }, { "epoch": 0.771340958398817, "grad_norm": 548.2081909179688, "learning_rate": 1.6068935178351657e-06, "loss": 11.7212, "step": 381840 }, { "epoch": 0.7713611590315008, "grad_norm": 386.83685302734375, "learning_rate": 1.6066371413037286e-06, "loss": 22.2225, "step": 381850 }, { "epoch": 0.7713813596641846, "grad_norm": 316.70318603515625, "learning_rate": 1.6063807813111315e-06, "loss": 29.086, "step": 381860 }, { "epoch": 0.7714015602968685, "grad_norm": 367.0750427246094, "learning_rate": 1.6061244378586233e-06, "loss": 19.2121, "step": 381870 }, { "epoch": 0.7714217609295523, "grad_norm": 345.0139465332031, "learning_rate": 1.6058681109474511e-06, "loss": 16.8491, "step": 381880 }, { "epoch": 0.7714419615622361, "grad_norm": 375.4194030761719, "learning_rate": 1.605611800578868e-06, "loss": 19.4328, "step": 381890 }, { "epoch": 0.7714621621949199, "grad_norm": 1006.6027221679688, "learning_rate": 1.605355506754121e-06, "loss": 20.4447, "step": 381900 }, { "epoch": 0.7714823628276037, "grad_norm": 578.6243286132812, "learning_rate": 1.6050992294744578e-06, "loss": 29.9668, "step": 381910 }, { "epoch": 0.7715025634602876, "grad_norm": 136.8943328857422, "learning_rate": 1.6048429687411294e-06, "loss": 11.5305, "step": 381920 }, { "epoch": 0.7715227640929714, "grad_norm": 386.48602294921875, "learning_rate": 1.604586724555388e-06, "loss": 20.3794, "step": 381930 }, { "epoch": 0.7715429647256552, "grad_norm": 41.99283218383789, "learning_rate": 1.6043304969184752e-06, "loss": 18.3726, "step": 381940 }, { "epoch": 0.771563165358339, "grad_norm": 658.3291015625, "learning_rate": 1.6040742858316443e-06, "loss": 22.7599, "step": 381950 }, { "epoch": 0.7715833659910228, "grad_norm": 555.6497802734375, "learning_rate": 1.6038180912961455e-06, "loss": 57.4267, "step": 381960 }, { "epoch": 0.7716035666237067, "grad_norm": 280.7648620605469, "learning_rate": 1.6035619133132247e-06, "loss": 13.9776, "step": 381970 }, { "epoch": 0.7716237672563905, "grad_norm": 430.380859375, "learning_rate": 1.6033057518841294e-06, "loss": 28.2771, "step": 381980 }, { "epoch": 0.7716439678890743, "grad_norm": 596.4095458984375, "learning_rate": 1.6030496070101104e-06, "loss": 29.4352, "step": 381990 }, { "epoch": 0.7716641685217581, "grad_norm": 219.3242645263672, "learning_rate": 1.6027934786924187e-06, "loss": 24.9992, "step": 382000 }, { "epoch": 0.7716843691544419, "grad_norm": 126.5769271850586, "learning_rate": 1.6025373669322963e-06, "loss": 16.7645, "step": 382010 }, { "epoch": 0.7717045697871258, "grad_norm": 350.49432373046875, "learning_rate": 1.602281271730995e-06, "loss": 7.0701, "step": 382020 }, { "epoch": 0.7717247704198096, "grad_norm": 416.9051208496094, "learning_rate": 1.602025193089764e-06, "loss": 10.584, "step": 382030 }, { "epoch": 0.7717449710524934, "grad_norm": 414.9043273925781, "learning_rate": 1.60176913100985e-06, "loss": 24.8061, "step": 382040 }, { "epoch": 0.7717651716851772, "grad_norm": 322.49114990234375, "learning_rate": 1.6015130854924999e-06, "loss": 16.9898, "step": 382050 }, { "epoch": 0.771785372317861, "grad_norm": 488.6578369140625, "learning_rate": 1.601257056538964e-06, "loss": 15.9868, "step": 382060 }, { "epoch": 0.7718055729505449, "grad_norm": 138.75418090820312, "learning_rate": 1.6010010441504891e-06, "loss": 16.8504, "step": 382070 }, { "epoch": 0.7718257735832287, "grad_norm": 693.740478515625, "learning_rate": 1.6007450483283215e-06, "loss": 25.4663, "step": 382080 }, { "epoch": 0.7718459742159124, "grad_norm": 400.11407470703125, "learning_rate": 1.6004890690737114e-06, "loss": 26.5421, "step": 382090 }, { "epoch": 0.7718661748485962, "grad_norm": 314.84747314453125, "learning_rate": 1.600233106387904e-06, "loss": 26.856, "step": 382100 }, { "epoch": 0.77188637548128, "grad_norm": 316.89752197265625, "learning_rate": 1.5999771602721499e-06, "loss": 14.2165, "step": 382110 }, { "epoch": 0.7719065761139638, "grad_norm": 109.03117370605469, "learning_rate": 1.5997212307276943e-06, "loss": 13.3175, "step": 382120 }, { "epoch": 0.7719267767466477, "grad_norm": 645.1715698242188, "learning_rate": 1.5994653177557833e-06, "loss": 14.0254, "step": 382130 }, { "epoch": 0.7719469773793315, "grad_norm": 871.7144775390625, "learning_rate": 1.5992094213576682e-06, "loss": 32.4052, "step": 382140 }, { "epoch": 0.7719671780120153, "grad_norm": 479.74798583984375, "learning_rate": 1.598953541534592e-06, "loss": 24.3621, "step": 382150 }, { "epoch": 0.7719873786446991, "grad_norm": 464.97760009765625, "learning_rate": 1.598697678287805e-06, "loss": 19.9426, "step": 382160 }, { "epoch": 0.772007579277383, "grad_norm": 371.6749572753906, "learning_rate": 1.5984418316185518e-06, "loss": 16.0154, "step": 382170 }, { "epoch": 0.7720277799100668, "grad_norm": 300.82476806640625, "learning_rate": 1.598186001528082e-06, "loss": 15.7633, "step": 382180 }, { "epoch": 0.7720479805427506, "grad_norm": 1.938651442527771, "learning_rate": 1.5979301880176407e-06, "loss": 9.9504, "step": 382190 }, { "epoch": 0.7720681811754344, "grad_norm": 88.23099517822266, "learning_rate": 1.597674391088474e-06, "loss": 13.4447, "step": 382200 }, { "epoch": 0.7720883818081182, "grad_norm": 342.0703430175781, "learning_rate": 1.597418610741831e-06, "loss": 32.9549, "step": 382210 }, { "epoch": 0.772108582440802, "grad_norm": 208.0787353515625, "learning_rate": 1.5971628469789569e-06, "loss": 11.0357, "step": 382220 }, { "epoch": 0.7721287830734859, "grad_norm": 478.9021301269531, "learning_rate": 1.5969070998010972e-06, "loss": 16.869, "step": 382230 }, { "epoch": 0.7721489837061697, "grad_norm": 797.6223754882812, "learning_rate": 1.5966513692094992e-06, "loss": 13.592, "step": 382240 }, { "epoch": 0.7721691843388535, "grad_norm": 244.73097229003906, "learning_rate": 1.596395655205411e-06, "loss": 14.8758, "step": 382250 }, { "epoch": 0.7721893849715373, "grad_norm": 2272.3330078125, "learning_rate": 1.5961399577900783e-06, "loss": 37.7566, "step": 382260 }, { "epoch": 0.7722095856042211, "grad_norm": 439.3791809082031, "learning_rate": 1.5958842769647442e-06, "loss": 17.8415, "step": 382270 }, { "epoch": 0.772229786236905, "grad_norm": 284.2250061035156, "learning_rate": 1.5956286127306591e-06, "loss": 17.0185, "step": 382280 }, { "epoch": 0.7722499868695888, "grad_norm": 421.5446472167969, "learning_rate": 1.5953729650890675e-06, "loss": 16.382, "step": 382290 }, { "epoch": 0.7722701875022726, "grad_norm": 405.8324279785156, "learning_rate": 1.5951173340412134e-06, "loss": 20.0054, "step": 382300 }, { "epoch": 0.7722903881349564, "grad_norm": 276.3622741699219, "learning_rate": 1.5948617195883448e-06, "loss": 10.8424, "step": 382310 }, { "epoch": 0.7723105887676402, "grad_norm": 543.3141479492188, "learning_rate": 1.5946061217317082e-06, "loss": 22.2879, "step": 382320 }, { "epoch": 0.7723307894003241, "grad_norm": 1.156139612197876, "learning_rate": 1.594350540472549e-06, "loss": 17.0557, "step": 382330 }, { "epoch": 0.7723509900330079, "grad_norm": 267.58477783203125, "learning_rate": 1.59409497581211e-06, "loss": 13.4345, "step": 382340 }, { "epoch": 0.7723711906656916, "grad_norm": 72.86398315429688, "learning_rate": 1.5938394277516412e-06, "loss": 26.052, "step": 382350 }, { "epoch": 0.7723913912983754, "grad_norm": 144.9212188720703, "learning_rate": 1.5935838962923849e-06, "loss": 13.0571, "step": 382360 }, { "epoch": 0.7724115919310592, "grad_norm": 351.2877197265625, "learning_rate": 1.5933283814355871e-06, "loss": 22.0361, "step": 382370 }, { "epoch": 0.7724317925637431, "grad_norm": 434.0843200683594, "learning_rate": 1.5930728831824943e-06, "loss": 13.3632, "step": 382380 }, { "epoch": 0.7724519931964269, "grad_norm": 218.42372131347656, "learning_rate": 1.59281740153435e-06, "loss": 16.7192, "step": 382390 }, { "epoch": 0.7724721938291107, "grad_norm": 431.15924072265625, "learning_rate": 1.5925619364924016e-06, "loss": 13.0559, "step": 382400 }, { "epoch": 0.7724923944617945, "grad_norm": 329.8361511230469, "learning_rate": 1.5923064880578937e-06, "loss": 19.9663, "step": 382410 }, { "epoch": 0.7725125950944783, "grad_norm": 403.38616943359375, "learning_rate": 1.5920510562320685e-06, "loss": 26.0965, "step": 382420 }, { "epoch": 0.7725327957271622, "grad_norm": 388.4576416015625, "learning_rate": 1.591795641016175e-06, "loss": 9.0168, "step": 382430 }, { "epoch": 0.772552996359846, "grad_norm": 1017.5739135742188, "learning_rate": 1.5915402424114545e-06, "loss": 29.5508, "step": 382440 }, { "epoch": 0.7725731969925298, "grad_norm": 560.5558471679688, "learning_rate": 1.591284860419155e-06, "loss": 11.9871, "step": 382450 }, { "epoch": 0.7725933976252136, "grad_norm": 292.2740173339844, "learning_rate": 1.591029495040518e-06, "loss": 25.0928, "step": 382460 }, { "epoch": 0.7726135982578974, "grad_norm": 1157.902587890625, "learning_rate": 1.5907741462767916e-06, "loss": 22.0831, "step": 382470 }, { "epoch": 0.7726337988905813, "grad_norm": 1321.1202392578125, "learning_rate": 1.5905188141292182e-06, "loss": 22.8953, "step": 382480 }, { "epoch": 0.7726539995232651, "grad_norm": 271.35601806640625, "learning_rate": 1.5902634985990412e-06, "loss": 15.5145, "step": 382490 }, { "epoch": 0.7726742001559489, "grad_norm": 388.1890563964844, "learning_rate": 1.5900081996875083e-06, "loss": 18.603, "step": 382500 }, { "epoch": 0.7726944007886327, "grad_norm": 155.25045776367188, "learning_rate": 1.5897529173958615e-06, "loss": 19.8483, "step": 382510 }, { "epoch": 0.7727146014213165, "grad_norm": 203.2943572998047, "learning_rate": 1.5894976517253436e-06, "loss": 44.4506, "step": 382520 }, { "epoch": 0.7727348020540004, "grad_norm": 518.5247192382812, "learning_rate": 1.5892424026772008e-06, "loss": 30.9917, "step": 382530 }, { "epoch": 0.7727550026866842, "grad_norm": 248.99508666992188, "learning_rate": 1.5889871702526799e-06, "loss": 20.7534, "step": 382540 }, { "epoch": 0.772775203319368, "grad_norm": 134.00927734375, "learning_rate": 1.5887319544530182e-06, "loss": 19.9531, "step": 382550 }, { "epoch": 0.7727954039520518, "grad_norm": 527.314697265625, "learning_rate": 1.5884767552794639e-06, "loss": 15.5636, "step": 382560 }, { "epoch": 0.7728156045847356, "grad_norm": 302.09124755859375, "learning_rate": 1.5882215727332618e-06, "loss": 18.4815, "step": 382570 }, { "epoch": 0.7728358052174195, "grad_norm": 298.60162353515625, "learning_rate": 1.5879664068156535e-06, "loss": 16.8549, "step": 382580 }, { "epoch": 0.7728560058501033, "grad_norm": 423.8826599121094, "learning_rate": 1.5877112575278819e-06, "loss": 10.2502, "step": 382590 }, { "epoch": 0.772876206482787, "grad_norm": 211.8774871826172, "learning_rate": 1.587456124871191e-06, "loss": 24.0772, "step": 382600 }, { "epoch": 0.7728964071154708, "grad_norm": 162.92193603515625, "learning_rate": 1.5872010088468293e-06, "loss": 10.396, "step": 382610 }, { "epoch": 0.7729166077481546, "grad_norm": 704.2095336914062, "learning_rate": 1.586945909456032e-06, "loss": 10.3541, "step": 382620 }, { "epoch": 0.7729368083808384, "grad_norm": 1019.6995239257812, "learning_rate": 1.5866908267000464e-06, "loss": 20.1296, "step": 382630 }, { "epoch": 0.7729570090135223, "grad_norm": 48.223819732666016, "learning_rate": 1.586435760580118e-06, "loss": 14.7224, "step": 382640 }, { "epoch": 0.7729772096462061, "grad_norm": 319.72552490234375, "learning_rate": 1.5861807110974869e-06, "loss": 18.7333, "step": 382650 }, { "epoch": 0.7729974102788899, "grad_norm": 380.0537109375, "learning_rate": 1.585925678253396e-06, "loss": 27.573, "step": 382660 }, { "epoch": 0.7730176109115737, "grad_norm": 983.9434814453125, "learning_rate": 1.5856706620490902e-06, "loss": 22.5923, "step": 382670 }, { "epoch": 0.7730378115442575, "grad_norm": 382.8108215332031, "learning_rate": 1.5854156624858119e-06, "loss": 21.1205, "step": 382680 }, { "epoch": 0.7730580121769414, "grad_norm": 390.1148681640625, "learning_rate": 1.5851606795648023e-06, "loss": 18.5207, "step": 382690 }, { "epoch": 0.7730782128096252, "grad_norm": 496.3921203613281, "learning_rate": 1.5849057132873063e-06, "loss": 13.3286, "step": 382700 }, { "epoch": 0.773098413442309, "grad_norm": 454.15509033203125, "learning_rate": 1.5846507636545645e-06, "loss": 8.9551, "step": 382710 }, { "epoch": 0.7731186140749928, "grad_norm": 617.4522094726562, "learning_rate": 1.5843958306678219e-06, "loss": 16.2409, "step": 382720 }, { "epoch": 0.7731388147076766, "grad_norm": 58.4134407043457, "learning_rate": 1.58414091432832e-06, "loss": 11.9742, "step": 382730 }, { "epoch": 0.7731590153403605, "grad_norm": 44.10279846191406, "learning_rate": 1.5838860146372992e-06, "loss": 12.1516, "step": 382740 }, { "epoch": 0.7731792159730443, "grad_norm": 242.874755859375, "learning_rate": 1.5836311315960051e-06, "loss": 14.0989, "step": 382750 }, { "epoch": 0.7731994166057281, "grad_norm": 386.6486511230469, "learning_rate": 1.5833762652056773e-06, "loss": 29.4466, "step": 382760 }, { "epoch": 0.7732196172384119, "grad_norm": 244.56224060058594, "learning_rate": 1.5831214154675605e-06, "loss": 12.7304, "step": 382770 }, { "epoch": 0.7732398178710957, "grad_norm": 321.93145751953125, "learning_rate": 1.5828665823828943e-06, "loss": 16.6896, "step": 382780 }, { "epoch": 0.7732600185037796, "grad_norm": 521.9586181640625, "learning_rate": 1.5826117659529234e-06, "loss": 13.361, "step": 382790 }, { "epoch": 0.7732802191364634, "grad_norm": 635.6339111328125, "learning_rate": 1.582356966178888e-06, "loss": 28.9973, "step": 382800 }, { "epoch": 0.7733004197691472, "grad_norm": 109.14483642578125, "learning_rate": 1.5821021830620287e-06, "loss": 13.728, "step": 382810 }, { "epoch": 0.773320620401831, "grad_norm": 430.0278625488281, "learning_rate": 1.5818474166035907e-06, "loss": 15.9968, "step": 382820 }, { "epoch": 0.7733408210345148, "grad_norm": 169.47763061523438, "learning_rate": 1.5815926668048138e-06, "loss": 14.3138, "step": 382830 }, { "epoch": 0.7733610216671987, "grad_norm": 941.049560546875, "learning_rate": 1.5813379336669377e-06, "loss": 18.2106, "step": 382840 }, { "epoch": 0.7733812222998825, "grad_norm": 530.6898193359375, "learning_rate": 1.581083217191206e-06, "loss": 17.7321, "step": 382850 }, { "epoch": 0.7734014229325662, "grad_norm": 179.96353149414062, "learning_rate": 1.5808285173788617e-06, "loss": 13.7731, "step": 382860 }, { "epoch": 0.77342162356525, "grad_norm": 335.7576904296875, "learning_rate": 1.5805738342311444e-06, "loss": 9.9771, "step": 382870 }, { "epoch": 0.7734418241979338, "grad_norm": 1528.3177490234375, "learning_rate": 1.5803191677492941e-06, "loss": 24.7554, "step": 382880 }, { "epoch": 0.7734620248306177, "grad_norm": 286.49993896484375, "learning_rate": 1.5800645179345548e-06, "loss": 23.7924, "step": 382890 }, { "epoch": 0.7734822254633015, "grad_norm": 299.359130859375, "learning_rate": 1.5798098847881664e-06, "loss": 24.5248, "step": 382900 }, { "epoch": 0.7735024260959853, "grad_norm": 167.763427734375, "learning_rate": 1.5795552683113679e-06, "loss": 20.9832, "step": 382910 }, { "epoch": 0.7735226267286691, "grad_norm": 340.8807067871094, "learning_rate": 1.579300668505403e-06, "loss": 19.0091, "step": 382920 }, { "epoch": 0.7735428273613529, "grad_norm": 749.7567749023438, "learning_rate": 1.5790460853715123e-06, "loss": 21.8559, "step": 382930 }, { "epoch": 0.7735630279940368, "grad_norm": 55.97519302368164, "learning_rate": 1.578791518910937e-06, "loss": 12.544, "step": 382940 }, { "epoch": 0.7735832286267206, "grad_norm": 268.6002197265625, "learning_rate": 1.5785369691249147e-06, "loss": 40.3558, "step": 382950 }, { "epoch": 0.7736034292594044, "grad_norm": 138.2102508544922, "learning_rate": 1.5782824360146897e-06, "loss": 39.5704, "step": 382960 }, { "epoch": 0.7736236298920882, "grad_norm": 119.10359954833984, "learning_rate": 1.5780279195815018e-06, "loss": 18.7883, "step": 382970 }, { "epoch": 0.773643830524772, "grad_norm": 506.84320068359375, "learning_rate": 1.5777734198265887e-06, "loss": 22.4223, "step": 382980 }, { "epoch": 0.7736640311574559, "grad_norm": 374.88555908203125, "learning_rate": 1.5775189367511946e-06, "loss": 15.4941, "step": 382990 }, { "epoch": 0.7736842317901397, "grad_norm": 389.50445556640625, "learning_rate": 1.5772644703565564e-06, "loss": 12.4242, "step": 383000 }, { "epoch": 0.7737044324228235, "grad_norm": 238.9445037841797, "learning_rate": 1.5770100206439177e-06, "loss": 17.647, "step": 383010 }, { "epoch": 0.7737246330555073, "grad_norm": 309.0486145019531, "learning_rate": 1.576755587614517e-06, "loss": 28.6423, "step": 383020 }, { "epoch": 0.7737448336881911, "grad_norm": 172.6853485107422, "learning_rate": 1.5765011712695928e-06, "loss": 11.7171, "step": 383030 }, { "epoch": 0.773765034320875, "grad_norm": 702.5298461914062, "learning_rate": 1.5762467716103884e-06, "loss": 21.4869, "step": 383040 }, { "epoch": 0.7737852349535588, "grad_norm": 339.43353271484375, "learning_rate": 1.5759923886381402e-06, "loss": 19.1555, "step": 383050 }, { "epoch": 0.7738054355862426, "grad_norm": 338.2257995605469, "learning_rate": 1.5757380223540914e-06, "loss": 20.7537, "step": 383060 }, { "epoch": 0.7738256362189264, "grad_norm": 78.8567886352539, "learning_rate": 1.5754836727594786e-06, "loss": 11.621, "step": 383070 }, { "epoch": 0.7738458368516102, "grad_norm": 433.5685119628906, "learning_rate": 1.5752293398555446e-06, "loss": 14.2845, "step": 383080 }, { "epoch": 0.7738660374842941, "grad_norm": 183.4577178955078, "learning_rate": 1.5749750236435279e-06, "loss": 11.6864, "step": 383090 }, { "epoch": 0.7738862381169779, "grad_norm": 1075.549072265625, "learning_rate": 1.5747207241246654e-06, "loss": 23.7103, "step": 383100 }, { "epoch": 0.7739064387496616, "grad_norm": 190.39039611816406, "learning_rate": 1.5744664413002004e-06, "loss": 12.5373, "step": 383110 }, { "epoch": 0.7739266393823454, "grad_norm": 687.258056640625, "learning_rate": 1.5742121751713708e-06, "loss": 20.1562, "step": 383120 }, { "epoch": 0.7739468400150292, "grad_norm": 233.72378540039062, "learning_rate": 1.5739579257394132e-06, "loss": 18.9227, "step": 383130 }, { "epoch": 0.773967040647713, "grad_norm": 2.5841612815856934, "learning_rate": 1.57370369300557e-06, "loss": 14.079, "step": 383140 }, { "epoch": 0.7739872412803969, "grad_norm": 424.8268737792969, "learning_rate": 1.5734494769710817e-06, "loss": 12.2895, "step": 383150 }, { "epoch": 0.7740074419130807, "grad_norm": 736.3742065429688, "learning_rate": 1.5731952776371828e-06, "loss": 15.886, "step": 383160 }, { "epoch": 0.7740276425457645, "grad_norm": 557.7459716796875, "learning_rate": 1.5729410950051139e-06, "loss": 19.9959, "step": 383170 }, { "epoch": 0.7740478431784483, "grad_norm": 377.92144775390625, "learning_rate": 1.572686929076116e-06, "loss": 19.8222, "step": 383180 }, { "epoch": 0.7740680438111321, "grad_norm": 218.2261962890625, "learning_rate": 1.5724327798514267e-06, "loss": 22.5208, "step": 383190 }, { "epoch": 0.774088244443816, "grad_norm": 151.34141540527344, "learning_rate": 1.5721786473322825e-06, "loss": 11.3797, "step": 383200 }, { "epoch": 0.7741084450764998, "grad_norm": 483.9348449707031, "learning_rate": 1.571924531519924e-06, "loss": 20.1426, "step": 383210 }, { "epoch": 0.7741286457091836, "grad_norm": 32.00010681152344, "learning_rate": 1.571670432415593e-06, "loss": 14.2402, "step": 383220 }, { "epoch": 0.7741488463418674, "grad_norm": 31.30953598022461, "learning_rate": 1.5714163500205203e-06, "loss": 24.5089, "step": 383230 }, { "epoch": 0.7741690469745512, "grad_norm": 777.2874145507812, "learning_rate": 1.5711622843359492e-06, "loss": 40.0035, "step": 383240 }, { "epoch": 0.7741892476072351, "grad_norm": 375.6747131347656, "learning_rate": 1.5709082353631188e-06, "loss": 23.9119, "step": 383250 }, { "epoch": 0.7742094482399189, "grad_norm": 434.52178955078125, "learning_rate": 1.5706542031032663e-06, "loss": 21.5206, "step": 383260 }, { "epoch": 0.7742296488726027, "grad_norm": 461.241943359375, "learning_rate": 1.570400187557627e-06, "loss": 10.6992, "step": 383270 }, { "epoch": 0.7742498495052865, "grad_norm": 490.8486022949219, "learning_rate": 1.5701461887274428e-06, "loss": 21.7874, "step": 383280 }, { "epoch": 0.7742700501379703, "grad_norm": 435.8128662109375, "learning_rate": 1.5698922066139505e-06, "loss": 21.2518, "step": 383290 }, { "epoch": 0.7742902507706542, "grad_norm": 376.1637878417969, "learning_rate": 1.5696382412183853e-06, "loss": 17.9095, "step": 383300 }, { "epoch": 0.774310451403338, "grad_norm": 61.54003143310547, "learning_rate": 1.5693842925419894e-06, "loss": 17.7064, "step": 383310 }, { "epoch": 0.7743306520360218, "grad_norm": 1618.9508056640625, "learning_rate": 1.5691303605859964e-06, "loss": 38.0497, "step": 383320 }, { "epoch": 0.7743508526687056, "grad_norm": 594.95849609375, "learning_rate": 1.568876445351648e-06, "loss": 17.9375, "step": 383330 }, { "epoch": 0.7743710533013894, "grad_norm": 187.2743377685547, "learning_rate": 1.5686225468401795e-06, "loss": 31.4732, "step": 383340 }, { "epoch": 0.7743912539340733, "grad_norm": 350.3153381347656, "learning_rate": 1.5683686650528267e-06, "loss": 14.1288, "step": 383350 }, { "epoch": 0.7744114545667571, "grad_norm": 310.48394775390625, "learning_rate": 1.5681147999908308e-06, "loss": 24.6163, "step": 383360 }, { "epoch": 0.7744316551994408, "grad_norm": 636.476806640625, "learning_rate": 1.567860951655425e-06, "loss": 20.272, "step": 383370 }, { "epoch": 0.7744518558321246, "grad_norm": 602.2848510742188, "learning_rate": 1.5676071200478504e-06, "loss": 30.9711, "step": 383380 }, { "epoch": 0.7744720564648084, "grad_norm": 381.50213623046875, "learning_rate": 1.5673533051693413e-06, "loss": 18.0637, "step": 383390 }, { "epoch": 0.7744922570974923, "grad_norm": 1670.898681640625, "learning_rate": 1.567099507021137e-06, "loss": 38.0073, "step": 383400 }, { "epoch": 0.7745124577301761, "grad_norm": 166.84677124023438, "learning_rate": 1.5668457256044733e-06, "loss": 22.5448, "step": 383410 }, { "epoch": 0.7745326583628599, "grad_norm": 5.103789806365967, "learning_rate": 1.566591960920586e-06, "loss": 23.9697, "step": 383420 }, { "epoch": 0.7745528589955437, "grad_norm": 874.5416259765625, "learning_rate": 1.5663382129707144e-06, "loss": 24.9892, "step": 383430 }, { "epoch": 0.7745730596282275, "grad_norm": 49.70535659790039, "learning_rate": 1.5660844817560939e-06, "loss": 12.3777, "step": 383440 }, { "epoch": 0.7745932602609114, "grad_norm": 645.6976318359375, "learning_rate": 1.5658307672779594e-06, "loss": 17.1051, "step": 383450 }, { "epoch": 0.7746134608935952, "grad_norm": 410.8448486328125, "learning_rate": 1.5655770695375494e-06, "loss": 10.888, "step": 383460 }, { "epoch": 0.774633661526279, "grad_norm": 938.0996704101562, "learning_rate": 1.5653233885361013e-06, "loss": 24.6534, "step": 383470 }, { "epoch": 0.7746538621589628, "grad_norm": 339.14263916015625, "learning_rate": 1.5650697242748513e-06, "loss": 15.8851, "step": 383480 }, { "epoch": 0.7746740627916466, "grad_norm": 380.4107666015625, "learning_rate": 1.5648160767550324e-06, "loss": 16.784, "step": 383490 }, { "epoch": 0.7746942634243305, "grad_norm": 259.8949890136719, "learning_rate": 1.5645624459778858e-06, "loss": 10.5215, "step": 383500 }, { "epoch": 0.7747144640570143, "grad_norm": 1726.8370361328125, "learning_rate": 1.5643088319446441e-06, "loss": 26.8748, "step": 383510 }, { "epoch": 0.7747346646896981, "grad_norm": 353.5360107421875, "learning_rate": 1.5640552346565441e-06, "loss": 24.7704, "step": 383520 }, { "epoch": 0.7747548653223819, "grad_norm": 384.0005187988281, "learning_rate": 1.563801654114821e-06, "loss": 18.4352, "step": 383530 }, { "epoch": 0.7747750659550657, "grad_norm": 416.81451416015625, "learning_rate": 1.5635480903207139e-06, "loss": 17.7697, "step": 383540 }, { "epoch": 0.7747952665877496, "grad_norm": 829.3290405273438, "learning_rate": 1.563294543275457e-06, "loss": 21.636, "step": 383550 }, { "epoch": 0.7748154672204334, "grad_norm": 925.5904541015625, "learning_rate": 1.5630410129802837e-06, "loss": 36.7419, "step": 383560 }, { "epoch": 0.7748356678531172, "grad_norm": 170.02854919433594, "learning_rate": 1.5627874994364335e-06, "loss": 10.2682, "step": 383570 }, { "epoch": 0.774855868485801, "grad_norm": 68.25321197509766, "learning_rate": 1.5625340026451396e-06, "loss": 12.093, "step": 383580 }, { "epoch": 0.7748760691184848, "grad_norm": 423.32275390625, "learning_rate": 1.562280522607637e-06, "loss": 14.1, "step": 383590 }, { "epoch": 0.7748962697511687, "grad_norm": 518.2012939453125, "learning_rate": 1.5620270593251635e-06, "loss": 18.0794, "step": 383600 }, { "epoch": 0.7749164703838525, "grad_norm": 171.8751220703125, "learning_rate": 1.561773612798952e-06, "loss": 12.9444, "step": 383610 }, { "epoch": 0.7749366710165362, "grad_norm": 553.6185302734375, "learning_rate": 1.5615201830302402e-06, "loss": 13.3761, "step": 383620 }, { "epoch": 0.77495687164922, "grad_norm": 387.4228210449219, "learning_rate": 1.5612667700202616e-06, "loss": 17.978, "step": 383630 }, { "epoch": 0.7749770722819038, "grad_norm": 317.31097412109375, "learning_rate": 1.5610133737702503e-06, "loss": 25.5296, "step": 383640 }, { "epoch": 0.7749972729145876, "grad_norm": 90.6382827758789, "learning_rate": 1.560759994281445e-06, "loss": 13.9602, "step": 383650 }, { "epoch": 0.7750174735472715, "grad_norm": 312.0516052246094, "learning_rate": 1.5605066315550759e-06, "loss": 13.112, "step": 383660 }, { "epoch": 0.7750376741799553, "grad_norm": 305.4877624511719, "learning_rate": 1.5602532855923824e-06, "loss": 15.4547, "step": 383670 }, { "epoch": 0.7750578748126391, "grad_norm": 603.0599975585938, "learning_rate": 1.5599999563945955e-06, "loss": 11.5011, "step": 383680 }, { "epoch": 0.7750780754453229, "grad_norm": 338.0648498535156, "learning_rate": 1.5597466439629532e-06, "loss": 16.2301, "step": 383690 }, { "epoch": 0.7750982760780067, "grad_norm": 606.4363403320312, "learning_rate": 1.5594933482986885e-06, "loss": 31.9384, "step": 383700 }, { "epoch": 0.7751184767106906, "grad_norm": 1.41245436668396, "learning_rate": 1.5592400694030342e-06, "loss": 15.2708, "step": 383710 }, { "epoch": 0.7751386773433744, "grad_norm": 676.3465576171875, "learning_rate": 1.5589868072772279e-06, "loss": 51.5045, "step": 383720 }, { "epoch": 0.7751588779760582, "grad_norm": 7.440090179443359, "learning_rate": 1.558733561922503e-06, "loss": 7.3795, "step": 383730 }, { "epoch": 0.775179078608742, "grad_norm": 759.7951049804688, "learning_rate": 1.5584803333400917e-06, "loss": 21.857, "step": 383740 }, { "epoch": 0.7751992792414258, "grad_norm": 178.63719177246094, "learning_rate": 1.5582271215312294e-06, "loss": 15.4087, "step": 383750 }, { "epoch": 0.7752194798741097, "grad_norm": 778.220947265625, "learning_rate": 1.5579739264971544e-06, "loss": 25.2869, "step": 383760 }, { "epoch": 0.7752396805067935, "grad_norm": 338.1744079589844, "learning_rate": 1.5577207482390933e-06, "loss": 14.539, "step": 383770 }, { "epoch": 0.7752598811394773, "grad_norm": 483.3032531738281, "learning_rate": 1.5574675867582845e-06, "loss": 17.1825, "step": 383780 }, { "epoch": 0.7752800817721611, "grad_norm": 305.2230529785156, "learning_rate": 1.557214442055962e-06, "loss": 14.487, "step": 383790 }, { "epoch": 0.7753002824048449, "grad_norm": 85.836669921875, "learning_rate": 1.556961314133359e-06, "loss": 16.7747, "step": 383800 }, { "epoch": 0.7753204830375288, "grad_norm": 368.25970458984375, "learning_rate": 1.5567082029917074e-06, "loss": 12.5381, "step": 383810 }, { "epoch": 0.7753406836702126, "grad_norm": 348.8280334472656, "learning_rate": 1.5564551086322428e-06, "loss": 21.189, "step": 383820 }, { "epoch": 0.7753608843028964, "grad_norm": 331.0599365234375, "learning_rate": 1.556202031056201e-06, "loss": 8.9081, "step": 383830 }, { "epoch": 0.7753810849355802, "grad_norm": 248.07205200195312, "learning_rate": 1.5559489702648096e-06, "loss": 18.0033, "step": 383840 }, { "epoch": 0.775401285568264, "grad_norm": 461.08135986328125, "learning_rate": 1.5556959262593058e-06, "loss": 15.9705, "step": 383850 }, { "epoch": 0.7754214862009479, "grad_norm": 298.7308044433594, "learning_rate": 1.5554428990409232e-06, "loss": 13.7456, "step": 383860 }, { "epoch": 0.7754416868336317, "grad_norm": 529.7130737304688, "learning_rate": 1.5551898886108947e-06, "loss": 15.5866, "step": 383870 }, { "epoch": 0.7754618874663154, "grad_norm": 293.4848327636719, "learning_rate": 1.5549368949704507e-06, "loss": 18.0192, "step": 383880 }, { "epoch": 0.7754820880989992, "grad_norm": 474.0185852050781, "learning_rate": 1.5546839181208284e-06, "loss": 14.8301, "step": 383890 }, { "epoch": 0.775502288731683, "grad_norm": 489.7498474121094, "learning_rate": 1.554430958063259e-06, "loss": 25.3866, "step": 383900 }, { "epoch": 0.7755224893643669, "grad_norm": 499.2673645019531, "learning_rate": 1.5541780147989733e-06, "loss": 16.6086, "step": 383910 }, { "epoch": 0.7755426899970507, "grad_norm": 118.5300521850586, "learning_rate": 1.5539250883292078e-06, "loss": 20.088, "step": 383920 }, { "epoch": 0.7755628906297345, "grad_norm": 288.1347351074219, "learning_rate": 1.5536721786551918e-06, "loss": 25.9228, "step": 383930 }, { "epoch": 0.7755830912624183, "grad_norm": 574.680419921875, "learning_rate": 1.5534192857781611e-06, "loss": 11.4823, "step": 383940 }, { "epoch": 0.7756032918951021, "grad_norm": 302.4960632324219, "learning_rate": 1.5531664096993454e-06, "loss": 11.9024, "step": 383950 }, { "epoch": 0.775623492527786, "grad_norm": 5.106573581695557, "learning_rate": 1.55291355041998e-06, "loss": 14.0364, "step": 383960 }, { "epoch": 0.7756436931604698, "grad_norm": 287.447265625, "learning_rate": 1.552660707941296e-06, "loss": 12.1343, "step": 383970 }, { "epoch": 0.7756638937931536, "grad_norm": 295.5054016113281, "learning_rate": 1.552407882264524e-06, "loss": 18.9641, "step": 383980 }, { "epoch": 0.7756840944258374, "grad_norm": 318.2136535644531, "learning_rate": 1.552155073390899e-06, "loss": 33.7459, "step": 383990 }, { "epoch": 0.7757042950585212, "grad_norm": 103.75553131103516, "learning_rate": 1.551902281321651e-06, "loss": 16.6785, "step": 384000 }, { "epoch": 0.775724495691205, "grad_norm": 128.6370849609375, "learning_rate": 1.5516495060580145e-06, "loss": 12.6248, "step": 384010 }, { "epoch": 0.7757446963238889, "grad_norm": 266.0093688964844, "learning_rate": 1.5513967476012198e-06, "loss": 22.2741, "step": 384020 }, { "epoch": 0.7757648969565727, "grad_norm": 451.62847900390625, "learning_rate": 1.551144005952498e-06, "loss": 18.3263, "step": 384030 }, { "epoch": 0.7757850975892565, "grad_norm": 306.1957092285156, "learning_rate": 1.5508912811130832e-06, "loss": 8.4984, "step": 384040 }, { "epoch": 0.7758052982219403, "grad_norm": 294.24359130859375, "learning_rate": 1.5506385730842062e-06, "loss": 21.9452, "step": 384050 }, { "epoch": 0.7758254988546242, "grad_norm": 214.49560546875, "learning_rate": 1.5503858818670963e-06, "loss": 11.1492, "step": 384060 }, { "epoch": 0.775845699487308, "grad_norm": 338.5537109375, "learning_rate": 1.5501332074629876e-06, "loss": 24.4503, "step": 384070 }, { "epoch": 0.7758659001199918, "grad_norm": 360.83349609375, "learning_rate": 1.5498805498731146e-06, "loss": 15.6625, "step": 384080 }, { "epoch": 0.7758861007526756, "grad_norm": 462.2666931152344, "learning_rate": 1.549627909098702e-06, "loss": 19.1801, "step": 384090 }, { "epoch": 0.7759063013853594, "grad_norm": 611.6724243164062, "learning_rate": 1.5493752851409844e-06, "loss": 31.4426, "step": 384100 }, { "epoch": 0.7759265020180433, "grad_norm": 395.310546875, "learning_rate": 1.5491226780011954e-06, "loss": 37.8498, "step": 384110 }, { "epoch": 0.7759467026507271, "grad_norm": 191.07583618164062, "learning_rate": 1.548870087680563e-06, "loss": 17.6473, "step": 384120 }, { "epoch": 0.7759669032834109, "grad_norm": 781.8247680664062, "learning_rate": 1.5486175141803177e-06, "loss": 25.2453, "step": 384130 }, { "epoch": 0.7759871039160946, "grad_norm": 204.58645629882812, "learning_rate": 1.5483649575016929e-06, "loss": 19.3628, "step": 384140 }, { "epoch": 0.7760073045487784, "grad_norm": 261.19061279296875, "learning_rate": 1.5481124176459195e-06, "loss": 25.1665, "step": 384150 }, { "epoch": 0.7760275051814622, "grad_norm": 277.91497802734375, "learning_rate": 1.5478598946142277e-06, "loss": 9.8039, "step": 384160 }, { "epoch": 0.7760477058141461, "grad_norm": 472.0810241699219, "learning_rate": 1.5476073884078463e-06, "loss": 26.7061, "step": 384170 }, { "epoch": 0.7760679064468299, "grad_norm": 609.7418212890625, "learning_rate": 1.5473548990280097e-06, "loss": 16.6024, "step": 384180 }, { "epoch": 0.7760881070795137, "grad_norm": 554.5270385742188, "learning_rate": 1.5471024264759466e-06, "loss": 22.243, "step": 384190 }, { "epoch": 0.7761083077121975, "grad_norm": 230.99478149414062, "learning_rate": 1.5468499707528856e-06, "loss": 13.1364, "step": 384200 }, { "epoch": 0.7761285083448813, "grad_norm": 371.1297912597656, "learning_rate": 1.5465975318600607e-06, "loss": 21.091, "step": 384210 }, { "epoch": 0.7761487089775652, "grad_norm": 273.14007568359375, "learning_rate": 1.5463451097986993e-06, "loss": 24.4958, "step": 384220 }, { "epoch": 0.776168909610249, "grad_norm": 440.2649841308594, "learning_rate": 1.5460927045700342e-06, "loss": 26.5645, "step": 384230 }, { "epoch": 0.7761891102429328, "grad_norm": 220.97445678710938, "learning_rate": 1.5458403161752943e-06, "loss": 18.2712, "step": 384240 }, { "epoch": 0.7762093108756166, "grad_norm": 714.91845703125, "learning_rate": 1.5455879446157084e-06, "loss": 36.6765, "step": 384250 }, { "epoch": 0.7762295115083004, "grad_norm": 690.603515625, "learning_rate": 1.5453355898925094e-06, "loss": 25.6641, "step": 384260 }, { "epoch": 0.7762497121409843, "grad_norm": 524.8508911132812, "learning_rate": 1.5450832520069241e-06, "loss": 17.8045, "step": 384270 }, { "epoch": 0.7762699127736681, "grad_norm": 715.39306640625, "learning_rate": 1.5448309309601855e-06, "loss": 26.0652, "step": 384280 }, { "epoch": 0.7762901134063519, "grad_norm": 221.43211364746094, "learning_rate": 1.5445786267535207e-06, "loss": 12.6999, "step": 384290 }, { "epoch": 0.7763103140390357, "grad_norm": 348.0744323730469, "learning_rate": 1.5443263393881619e-06, "loss": 19.6884, "step": 384300 }, { "epoch": 0.7763305146717195, "grad_norm": 226.06593322753906, "learning_rate": 1.5440740688653372e-06, "loss": 17.7141, "step": 384310 }, { "epoch": 0.7763507153044034, "grad_norm": 483.87689208984375, "learning_rate": 1.543821815186275e-06, "loss": 22.7634, "step": 384320 }, { "epoch": 0.7763709159370872, "grad_norm": 778.535400390625, "learning_rate": 1.5435695783522076e-06, "loss": 24.2931, "step": 384330 }, { "epoch": 0.776391116569771, "grad_norm": 217.14146423339844, "learning_rate": 1.5433173583643628e-06, "loss": 12.3832, "step": 384340 }, { "epoch": 0.7764113172024548, "grad_norm": 433.1095886230469, "learning_rate": 1.5430651552239684e-06, "loss": 20.548, "step": 384350 }, { "epoch": 0.7764315178351386, "grad_norm": 288.0685729980469, "learning_rate": 1.5428129689322552e-06, "loss": 20.827, "step": 384360 }, { "epoch": 0.7764517184678225, "grad_norm": 240.14483642578125, "learning_rate": 1.5425607994904552e-06, "loss": 12.2251, "step": 384370 }, { "epoch": 0.7764719191005063, "grad_norm": 120.36453247070312, "learning_rate": 1.5423086468997917e-06, "loss": 14.1546, "step": 384380 }, { "epoch": 0.77649211973319, "grad_norm": 304.1021728515625, "learning_rate": 1.5420565111614965e-06, "loss": 17.2286, "step": 384390 }, { "epoch": 0.7765123203658738, "grad_norm": 324.0277099609375, "learning_rate": 1.5418043922768e-06, "loss": 12.4004, "step": 384400 }, { "epoch": 0.7765325209985576, "grad_norm": 408.1206359863281, "learning_rate": 1.5415522902469293e-06, "loss": 20.7216, "step": 384410 }, { "epoch": 0.7765527216312414, "grad_norm": 309.0054931640625, "learning_rate": 1.5413002050731118e-06, "loss": 21.8756, "step": 384420 }, { "epoch": 0.7765729222639253, "grad_norm": 208.31134033203125, "learning_rate": 1.5410481367565777e-06, "loss": 16.1911, "step": 384430 }, { "epoch": 0.7765931228966091, "grad_norm": 348.1766052246094, "learning_rate": 1.5407960852985582e-06, "loss": 17.2752, "step": 384440 }, { "epoch": 0.7766133235292929, "grad_norm": 481.5426025390625, "learning_rate": 1.540544050700276e-06, "loss": 7.244, "step": 384450 }, { "epoch": 0.7766335241619767, "grad_norm": 551.3931274414062, "learning_rate": 1.5402920329629627e-06, "loss": 18.0558, "step": 384460 }, { "epoch": 0.7766537247946605, "grad_norm": 240.8903350830078, "learning_rate": 1.5400400320878484e-06, "loss": 10.1459, "step": 384470 }, { "epoch": 0.7766739254273444, "grad_norm": 313.7499084472656, "learning_rate": 1.539788048076159e-06, "loss": 20.1553, "step": 384480 }, { "epoch": 0.7766941260600282, "grad_norm": 238.50909423828125, "learning_rate": 1.539536080929121e-06, "loss": 45.4269, "step": 384490 }, { "epoch": 0.776714326692712, "grad_norm": 214.79942321777344, "learning_rate": 1.5392841306479667e-06, "loss": 5.5206, "step": 384500 }, { "epoch": 0.7767345273253958, "grad_norm": 2.047757863998413, "learning_rate": 1.539032197233921e-06, "loss": 10.5082, "step": 384510 }, { "epoch": 0.7767547279580796, "grad_norm": 163.90113830566406, "learning_rate": 1.5387802806882118e-06, "loss": 16.4249, "step": 384520 }, { "epoch": 0.7767749285907635, "grad_norm": 77.99993133544922, "learning_rate": 1.5385283810120688e-06, "loss": 26.0398, "step": 384530 }, { "epoch": 0.7767951292234473, "grad_norm": 662.6762084960938, "learning_rate": 1.5382764982067172e-06, "loss": 24.6799, "step": 384540 }, { "epoch": 0.7768153298561311, "grad_norm": 535.010498046875, "learning_rate": 1.5380246322733883e-06, "loss": 43.2987, "step": 384550 }, { "epoch": 0.7768355304888149, "grad_norm": 92.85782623291016, "learning_rate": 1.5377727832133049e-06, "loss": 16.2858, "step": 384560 }, { "epoch": 0.7768557311214987, "grad_norm": 374.7579040527344, "learning_rate": 1.537520951027699e-06, "loss": 17.3287, "step": 384570 }, { "epoch": 0.7768759317541826, "grad_norm": 161.86851501464844, "learning_rate": 1.537269135717796e-06, "loss": 16.0337, "step": 384580 }, { "epoch": 0.7768961323868664, "grad_norm": 99.89845275878906, "learning_rate": 1.5370173372848218e-06, "loss": 8.8479, "step": 384590 }, { "epoch": 0.7769163330195502, "grad_norm": 339.26947021484375, "learning_rate": 1.5367655557300066e-06, "loss": 10.8958, "step": 384600 }, { "epoch": 0.776936533652234, "grad_norm": 823.4581298828125, "learning_rate": 1.5365137910545747e-06, "loss": 20.2525, "step": 384610 }, { "epoch": 0.7769567342849178, "grad_norm": 163.99513244628906, "learning_rate": 1.5362620432597559e-06, "loss": 25.8464, "step": 384620 }, { "epoch": 0.7769769349176017, "grad_norm": 422.450927734375, "learning_rate": 1.5360103123467756e-06, "loss": 31.5294, "step": 384630 }, { "epoch": 0.7769971355502855, "grad_norm": 609.9090576171875, "learning_rate": 1.5357585983168593e-06, "loss": 32.4314, "step": 384640 }, { "epoch": 0.7770173361829692, "grad_norm": 528.384765625, "learning_rate": 1.5355069011712376e-06, "loss": 18.6976, "step": 384650 }, { "epoch": 0.777037536815653, "grad_norm": 425.56732177734375, "learning_rate": 1.5352552209111344e-06, "loss": 16.283, "step": 384660 }, { "epoch": 0.7770577374483368, "grad_norm": 169.77854919433594, "learning_rate": 1.535003557537776e-06, "loss": 16.551, "step": 384670 }, { "epoch": 0.7770779380810207, "grad_norm": 0.015986911952495575, "learning_rate": 1.5347519110523895e-06, "loss": 19.9182, "step": 384680 }, { "epoch": 0.7770981387137045, "grad_norm": 245.8568572998047, "learning_rate": 1.5345002814562055e-06, "loss": 45.4147, "step": 384690 }, { "epoch": 0.7771183393463883, "grad_norm": 205.8431854248047, "learning_rate": 1.5342486687504432e-06, "loss": 12.4595, "step": 384700 }, { "epoch": 0.7771385399790721, "grad_norm": 4.2626519203186035, "learning_rate": 1.533997072936333e-06, "loss": 15.0956, "step": 384710 }, { "epoch": 0.7771587406117559, "grad_norm": 430.7354431152344, "learning_rate": 1.533745494015102e-06, "loss": 13.0182, "step": 384720 }, { "epoch": 0.7771789412444398, "grad_norm": 200.21856689453125, "learning_rate": 1.533493931987975e-06, "loss": 17.5419, "step": 384730 }, { "epoch": 0.7771991418771236, "grad_norm": 34.58863067626953, "learning_rate": 1.5332423868561769e-06, "loss": 27.4933, "step": 384740 }, { "epoch": 0.7772193425098074, "grad_norm": 474.97015380859375, "learning_rate": 1.5329908586209347e-06, "loss": 10.175, "step": 384750 }, { "epoch": 0.7772395431424912, "grad_norm": 428.1702575683594, "learning_rate": 1.5327393472834772e-06, "loss": 19.8074, "step": 384760 }, { "epoch": 0.777259743775175, "grad_norm": 457.3119812011719, "learning_rate": 1.5324878528450254e-06, "loss": 19.01, "step": 384770 }, { "epoch": 0.7772799444078589, "grad_norm": 192.80319213867188, "learning_rate": 1.5322363753068064e-06, "loss": 16.6181, "step": 384780 }, { "epoch": 0.7773001450405427, "grad_norm": 33.13753128051758, "learning_rate": 1.5319849146700488e-06, "loss": 23.758, "step": 384790 }, { "epoch": 0.7773203456732265, "grad_norm": 576.9069213867188, "learning_rate": 1.531733470935976e-06, "loss": 11.0886, "step": 384800 }, { "epoch": 0.7773405463059103, "grad_norm": 564.5203857421875, "learning_rate": 1.531482044105812e-06, "loss": 16.9974, "step": 384810 }, { "epoch": 0.7773607469385941, "grad_norm": 279.1705322265625, "learning_rate": 1.5312306341807858e-06, "loss": 28.6219, "step": 384820 }, { "epoch": 0.777380947571278, "grad_norm": 271.1581726074219, "learning_rate": 1.5309792411621204e-06, "loss": 26.1332, "step": 384830 }, { "epoch": 0.7774011482039618, "grad_norm": 542.3341064453125, "learning_rate": 1.5307278650510399e-06, "loss": 18.0816, "step": 384840 }, { "epoch": 0.7774213488366456, "grad_norm": 387.3022155761719, "learning_rate": 1.5304765058487725e-06, "loss": 13.1619, "step": 384850 }, { "epoch": 0.7774415494693294, "grad_norm": 269.654541015625, "learning_rate": 1.5302251635565401e-06, "loss": 41.7115, "step": 384860 }, { "epoch": 0.7774617501020132, "grad_norm": 140.29644775390625, "learning_rate": 1.5299738381755712e-06, "loss": 15.5364, "step": 384870 }, { "epoch": 0.7774819507346971, "grad_norm": 170.4852294921875, "learning_rate": 1.5297225297070879e-06, "loss": 23.2477, "step": 384880 }, { "epoch": 0.7775021513673809, "grad_norm": 316.4628601074219, "learning_rate": 1.5294712381523168e-06, "loss": 15.9037, "step": 384890 }, { "epoch": 0.7775223520000646, "grad_norm": 744.5531616210938, "learning_rate": 1.529219963512481e-06, "loss": 12.7941, "step": 384900 }, { "epoch": 0.7775425526327484, "grad_norm": 254.7901153564453, "learning_rate": 1.5289687057888075e-06, "loss": 15.0023, "step": 384910 }, { "epoch": 0.7775627532654322, "grad_norm": 443.2149963378906, "learning_rate": 1.5287174649825194e-06, "loss": 22.0366, "step": 384920 }, { "epoch": 0.777582953898116, "grad_norm": 307.50421142578125, "learning_rate": 1.5284662410948398e-06, "loss": 13.2912, "step": 384930 }, { "epoch": 0.7776031545307999, "grad_norm": 322.15985107421875, "learning_rate": 1.5282150341269964e-06, "loss": 15.5182, "step": 384940 }, { "epoch": 0.7776233551634837, "grad_norm": 413.22955322265625, "learning_rate": 1.5279638440802118e-06, "loss": 11.9651, "step": 384950 }, { "epoch": 0.7776435557961675, "grad_norm": 682.2948608398438, "learning_rate": 1.5277126709557088e-06, "loss": 13.074, "step": 384960 }, { "epoch": 0.7776637564288513, "grad_norm": 397.0071716308594, "learning_rate": 1.5274615147547128e-06, "loss": 19.5731, "step": 384970 }, { "epoch": 0.7776839570615351, "grad_norm": 140.2388916015625, "learning_rate": 1.5272103754784517e-06, "loss": 32.3869, "step": 384980 }, { "epoch": 0.777704157694219, "grad_norm": 374.5721435546875, "learning_rate": 1.526959253128143e-06, "loss": 12.7778, "step": 384990 }, { "epoch": 0.7777243583269028, "grad_norm": 431.6618957519531, "learning_rate": 1.5267081477050132e-06, "loss": 21.2579, "step": 385000 }, { "epoch": 0.7777445589595866, "grad_norm": 398.0082702636719, "learning_rate": 1.5264570592102883e-06, "loss": 21.9021, "step": 385010 }, { "epoch": 0.7777647595922704, "grad_norm": 436.4931945800781, "learning_rate": 1.5262059876451906e-06, "loss": 22.1284, "step": 385020 }, { "epoch": 0.7777849602249542, "grad_norm": 451.14666748046875, "learning_rate": 1.5259549330109424e-06, "loss": 12.1658, "step": 385030 }, { "epoch": 0.7778051608576381, "grad_norm": 284.5849914550781, "learning_rate": 1.5257038953087678e-06, "loss": 16.0711, "step": 385040 }, { "epoch": 0.7778253614903219, "grad_norm": 284.7397155761719, "learning_rate": 1.5254528745398943e-06, "loss": 20.7581, "step": 385050 }, { "epoch": 0.7778455621230057, "grad_norm": 468.49114990234375, "learning_rate": 1.5252018707055393e-06, "loss": 21.8534, "step": 385060 }, { "epoch": 0.7778657627556895, "grad_norm": 525.685791015625, "learning_rate": 1.5249508838069287e-06, "loss": 10.5381, "step": 385070 }, { "epoch": 0.7778859633883733, "grad_norm": 178.24122619628906, "learning_rate": 1.5246999138452878e-06, "loss": 16.3678, "step": 385080 }, { "epoch": 0.7779061640210572, "grad_norm": 438.45404052734375, "learning_rate": 1.5244489608218376e-06, "loss": 18.4635, "step": 385090 }, { "epoch": 0.777926364653741, "grad_norm": 578.2142333984375, "learning_rate": 1.5241980247378008e-06, "loss": 23.6082, "step": 385100 }, { "epoch": 0.7779465652864248, "grad_norm": 335.78887939453125, "learning_rate": 1.5239471055944022e-06, "loss": 13.7291, "step": 385110 }, { "epoch": 0.7779667659191086, "grad_norm": 316.18280029296875, "learning_rate": 1.5236962033928636e-06, "loss": 21.4164, "step": 385120 }, { "epoch": 0.7779869665517924, "grad_norm": 638.5242919921875, "learning_rate": 1.5234453181344071e-06, "loss": 23.4461, "step": 385130 }, { "epoch": 0.7780071671844763, "grad_norm": 255.30441284179688, "learning_rate": 1.5231944498202578e-06, "loss": 18.7992, "step": 385140 }, { "epoch": 0.7780273678171601, "grad_norm": 230.3295135498047, "learning_rate": 1.5229435984516355e-06, "loss": 24.4938, "step": 385150 }, { "epoch": 0.7780475684498438, "grad_norm": 443.8564758300781, "learning_rate": 1.5226927640297663e-06, "loss": 35.4686, "step": 385160 }, { "epoch": 0.7780677690825276, "grad_norm": 829.7617797851562, "learning_rate": 1.5224419465558687e-06, "loss": 18.2524, "step": 385170 }, { "epoch": 0.7780879697152114, "grad_norm": 299.2265625, "learning_rate": 1.522191146031169e-06, "loss": 22.1634, "step": 385180 }, { "epoch": 0.7781081703478953, "grad_norm": 668.59521484375, "learning_rate": 1.521940362456888e-06, "loss": 27.8544, "step": 385190 }, { "epoch": 0.7781283709805791, "grad_norm": 44.44142150878906, "learning_rate": 1.521689595834246e-06, "loss": 30.3538, "step": 385200 }, { "epoch": 0.7781485716132629, "grad_norm": 154.08029174804688, "learning_rate": 1.521438846164469e-06, "loss": 16.6562, "step": 385210 }, { "epoch": 0.7781687722459467, "grad_norm": 655.61328125, "learning_rate": 1.5211881134487755e-06, "loss": 22.0776, "step": 385220 }, { "epoch": 0.7781889728786305, "grad_norm": 195.63526916503906, "learning_rate": 1.5209373976883906e-06, "loss": 30.8979, "step": 385230 }, { "epoch": 0.7782091735113144, "grad_norm": 293.49652099609375, "learning_rate": 1.5206866988845348e-06, "loss": 15.782, "step": 385240 }, { "epoch": 0.7782293741439982, "grad_norm": 424.705078125, "learning_rate": 1.5204360170384286e-06, "loss": 14.5477, "step": 385250 }, { "epoch": 0.778249574776682, "grad_norm": 2079.281982421875, "learning_rate": 1.5201853521512967e-06, "loss": 50.4745, "step": 385260 }, { "epoch": 0.7782697754093658, "grad_norm": 310.99786376953125, "learning_rate": 1.5199347042243595e-06, "loss": 19.3026, "step": 385270 }, { "epoch": 0.7782899760420496, "grad_norm": 245.80108642578125, "learning_rate": 1.519684073258837e-06, "loss": 23.6941, "step": 385280 }, { "epoch": 0.7783101766747335, "grad_norm": 693.7467041015625, "learning_rate": 1.5194334592559517e-06, "loss": 21.5978, "step": 385290 }, { "epoch": 0.7783303773074173, "grad_norm": 882.8553466796875, "learning_rate": 1.519182862216929e-06, "loss": 18.9106, "step": 385300 }, { "epoch": 0.7783505779401011, "grad_norm": 227.40785217285156, "learning_rate": 1.5189322821429842e-06, "loss": 9.4096, "step": 385310 }, { "epoch": 0.7783707785727849, "grad_norm": 302.2127380371094, "learning_rate": 1.5186817190353404e-06, "loss": 19.1014, "step": 385320 }, { "epoch": 0.7783909792054687, "grad_norm": 514.7554931640625, "learning_rate": 1.5184311728952216e-06, "loss": 17.937, "step": 385330 }, { "epoch": 0.7784111798381526, "grad_norm": 492.3889465332031, "learning_rate": 1.5181806437238472e-06, "loss": 12.4642, "step": 385340 }, { "epoch": 0.7784313804708364, "grad_norm": 475.7500305175781, "learning_rate": 1.5179301315224364e-06, "loss": 24.705, "step": 385350 }, { "epoch": 0.7784515811035202, "grad_norm": 249.72171020507812, "learning_rate": 1.5176796362922119e-06, "loss": 11.6964, "step": 385360 }, { "epoch": 0.778471781736204, "grad_norm": 240.2904815673828, "learning_rate": 1.5174291580343976e-06, "loss": 18.1363, "step": 385370 }, { "epoch": 0.7784919823688878, "grad_norm": 279.27685546875, "learning_rate": 1.5171786967502078e-06, "loss": 24.9632, "step": 385380 }, { "epoch": 0.7785121830015717, "grad_norm": 449.6131286621094, "learning_rate": 1.516928252440867e-06, "loss": 34.3537, "step": 385390 }, { "epoch": 0.7785323836342555, "grad_norm": 571.5137939453125, "learning_rate": 1.5166778251075964e-06, "loss": 16.6113, "step": 385400 }, { "epoch": 0.7785525842669393, "grad_norm": 620.7178344726562, "learning_rate": 1.516427414751616e-06, "loss": 15.0394, "step": 385410 }, { "epoch": 0.778572784899623, "grad_norm": 346.1143798828125, "learning_rate": 1.5161770213741444e-06, "loss": 18.6634, "step": 385420 }, { "epoch": 0.7785929855323068, "grad_norm": 218.16488647460938, "learning_rate": 1.5159266449764048e-06, "loss": 14.5162, "step": 385430 }, { "epoch": 0.7786131861649906, "grad_norm": 337.9759521484375, "learning_rate": 1.5156762855596162e-06, "loss": 12.06, "step": 385440 }, { "epoch": 0.7786333867976745, "grad_norm": 527.7807006835938, "learning_rate": 1.5154259431249978e-06, "loss": 17.1289, "step": 385450 }, { "epoch": 0.7786535874303583, "grad_norm": 303.9666748046875, "learning_rate": 1.5151756176737703e-06, "loss": 15.2366, "step": 385460 }, { "epoch": 0.7786737880630421, "grad_norm": 77.94291687011719, "learning_rate": 1.5149253092071554e-06, "loss": 16.9323, "step": 385470 }, { "epoch": 0.7786939886957259, "grad_norm": 517.1173706054688, "learning_rate": 1.5146750177263725e-06, "loss": 21.6196, "step": 385480 }, { "epoch": 0.7787141893284097, "grad_norm": 981.0882568359375, "learning_rate": 1.5144247432326386e-06, "loss": 22.6039, "step": 385490 }, { "epoch": 0.7787343899610936, "grad_norm": 427.5642395019531, "learning_rate": 1.514174485727178e-06, "loss": 15.4875, "step": 385500 }, { "epoch": 0.7787545905937774, "grad_norm": 326.907958984375, "learning_rate": 1.5139242452112074e-06, "loss": 24.2637, "step": 385510 }, { "epoch": 0.7787747912264612, "grad_norm": 19.355066299438477, "learning_rate": 1.5136740216859464e-06, "loss": 12.2308, "step": 385520 }, { "epoch": 0.778794991859145, "grad_norm": 416.51190185546875, "learning_rate": 1.5134238151526166e-06, "loss": 21.3602, "step": 385530 }, { "epoch": 0.7788151924918288, "grad_norm": 213.76942443847656, "learning_rate": 1.5131736256124346e-06, "loss": 8.8589, "step": 385540 }, { "epoch": 0.7788353931245127, "grad_norm": 50.654388427734375, "learning_rate": 1.5129234530666232e-06, "loss": 19.1779, "step": 385550 }, { "epoch": 0.7788555937571965, "grad_norm": 325.6395263671875, "learning_rate": 1.5126732975164e-06, "loss": 12.2903, "step": 385560 }, { "epoch": 0.7788757943898803, "grad_norm": 256.3102111816406, "learning_rate": 1.5124231589629823e-06, "loss": 6.6157, "step": 385570 }, { "epoch": 0.7788959950225641, "grad_norm": 99.91606140136719, "learning_rate": 1.5121730374075916e-06, "loss": 14.7548, "step": 385580 }, { "epoch": 0.778916195655248, "grad_norm": 195.95484924316406, "learning_rate": 1.511922932851449e-06, "loss": 6.047, "step": 385590 }, { "epoch": 0.7789363962879318, "grad_norm": 527.3798217773438, "learning_rate": 1.5116728452957686e-06, "loss": 10.9641, "step": 385600 }, { "epoch": 0.7789565969206156, "grad_norm": 1464.2406005859375, "learning_rate": 1.511422774741771e-06, "loss": 23.2647, "step": 385610 }, { "epoch": 0.7789767975532994, "grad_norm": 194.63536071777344, "learning_rate": 1.5111727211906774e-06, "loss": 13.5937, "step": 385620 }, { "epoch": 0.7789969981859832, "grad_norm": 174.94775390625, "learning_rate": 1.5109226846437054e-06, "loss": 29.5315, "step": 385630 }, { "epoch": 0.779017198818667, "grad_norm": 402.3554382324219, "learning_rate": 1.510672665102071e-06, "loss": 20.5427, "step": 385640 }, { "epoch": 0.7790373994513509, "grad_norm": 363.8941650390625, "learning_rate": 1.5104226625669943e-06, "loss": 9.9553, "step": 385650 }, { "epoch": 0.7790576000840347, "grad_norm": 502.1727294921875, "learning_rate": 1.5101726770396986e-06, "loss": 28.6017, "step": 385660 }, { "epoch": 0.7790778007167184, "grad_norm": 90.51630401611328, "learning_rate": 1.509922708521394e-06, "loss": 12.2599, "step": 385670 }, { "epoch": 0.7790980013494022, "grad_norm": 494.1336975097656, "learning_rate": 1.509672757013303e-06, "loss": 28.2401, "step": 385680 }, { "epoch": 0.779118201982086, "grad_norm": 391.7030944824219, "learning_rate": 1.509422822516645e-06, "loss": 15.7195, "step": 385690 }, { "epoch": 0.7791384026147699, "grad_norm": 270.3523864746094, "learning_rate": 1.5091729050326376e-06, "loss": 14.2068, "step": 385700 }, { "epoch": 0.7791586032474537, "grad_norm": 157.23414611816406, "learning_rate": 1.5089230045624958e-06, "loss": 27.5659, "step": 385710 }, { "epoch": 0.7791788038801375, "grad_norm": 335.717041015625, "learning_rate": 1.5086731211074418e-06, "loss": 13.2512, "step": 385720 }, { "epoch": 0.7791990045128213, "grad_norm": 418.17840576171875, "learning_rate": 1.5084232546686911e-06, "loss": 14.8485, "step": 385730 }, { "epoch": 0.7792192051455051, "grad_norm": 560.2449340820312, "learning_rate": 1.508173405247461e-06, "loss": 25.812, "step": 385740 }, { "epoch": 0.779239405778189, "grad_norm": 686.2391357421875, "learning_rate": 1.5079235728449714e-06, "loss": 16.8938, "step": 385750 }, { "epoch": 0.7792596064108728, "grad_norm": 175.08425903320312, "learning_rate": 1.5076737574624372e-06, "loss": 14.4246, "step": 385760 }, { "epoch": 0.7792798070435566, "grad_norm": 417.5024719238281, "learning_rate": 1.5074239591010791e-06, "loss": 6.5063, "step": 385770 }, { "epoch": 0.7793000076762404, "grad_norm": 395.2223205566406, "learning_rate": 1.507174177762112e-06, "loss": 36.0661, "step": 385780 }, { "epoch": 0.7793202083089242, "grad_norm": 490.0069580078125, "learning_rate": 1.5069244134467553e-06, "loss": 21.2514, "step": 385790 }, { "epoch": 0.779340408941608, "grad_norm": 1034.65966796875, "learning_rate": 1.5066746661562254e-06, "loss": 22.4074, "step": 385800 }, { "epoch": 0.7793606095742919, "grad_norm": 654.3142700195312, "learning_rate": 1.5064249358917383e-06, "loss": 21.8747, "step": 385810 }, { "epoch": 0.7793808102069757, "grad_norm": 331.18170166015625, "learning_rate": 1.5061752226545134e-06, "loss": 8.6893, "step": 385820 }, { "epoch": 0.7794010108396595, "grad_norm": 621.185546875, "learning_rate": 1.5059255264457656e-06, "loss": 20.6916, "step": 385830 }, { "epoch": 0.7794212114723433, "grad_norm": 464.4343566894531, "learning_rate": 1.5056758472667144e-06, "loss": 23.6093, "step": 385840 }, { "epoch": 0.7794414121050272, "grad_norm": 682.4192504882812, "learning_rate": 1.5054261851185753e-06, "loss": 18.9779, "step": 385850 }, { "epoch": 0.779461612737711, "grad_norm": 190.57699584960938, "learning_rate": 1.5051765400025636e-06, "loss": 13.4317, "step": 385860 }, { "epoch": 0.7794818133703948, "grad_norm": 3304.729248046875, "learning_rate": 1.5049269119198988e-06, "loss": 29.976, "step": 385870 }, { "epoch": 0.7795020140030786, "grad_norm": 254.01058959960938, "learning_rate": 1.5046773008717968e-06, "loss": 21.1212, "step": 385880 }, { "epoch": 0.7795222146357624, "grad_norm": 709.68115234375, "learning_rate": 1.5044277068594721e-06, "loss": 21.5708, "step": 385890 }, { "epoch": 0.7795424152684463, "grad_norm": 734.3587646484375, "learning_rate": 1.5041781298841424e-06, "loss": 29.4084, "step": 385900 }, { "epoch": 0.7795626159011301, "grad_norm": 330.8517150878906, "learning_rate": 1.503928569947028e-06, "loss": 23.806, "step": 385910 }, { "epoch": 0.7795828165338139, "grad_norm": 359.6206970214844, "learning_rate": 1.5036790270493383e-06, "loss": 16.4389, "step": 385920 }, { "epoch": 0.7796030171664976, "grad_norm": 381.5948181152344, "learning_rate": 1.5034295011922933e-06, "loss": 15.3177, "step": 385930 }, { "epoch": 0.7796232177991814, "grad_norm": 31.452110290527344, "learning_rate": 1.5031799923771102e-06, "loss": 22.716, "step": 385940 }, { "epoch": 0.7796434184318652, "grad_norm": 238.20335388183594, "learning_rate": 1.5029305006050038e-06, "loss": 15.4614, "step": 385950 }, { "epoch": 0.7796636190645491, "grad_norm": 5.884660720825195, "learning_rate": 1.5026810258771885e-06, "loss": 20.7957, "step": 385960 }, { "epoch": 0.7796838196972329, "grad_norm": 679.9158935546875, "learning_rate": 1.5024315681948815e-06, "loss": 14.5647, "step": 385970 }, { "epoch": 0.7797040203299167, "grad_norm": 1149.2484130859375, "learning_rate": 1.5021821275593018e-06, "loss": 32.3723, "step": 385980 }, { "epoch": 0.7797242209626005, "grad_norm": 590.5807495117188, "learning_rate": 1.5019327039716598e-06, "loss": 21.5006, "step": 385990 }, { "epoch": 0.7797444215952843, "grad_norm": 231.70526123046875, "learning_rate": 1.5016832974331725e-06, "loss": 31.0076, "step": 386000 }, { "epoch": 0.7797646222279682, "grad_norm": 424.45361328125, "learning_rate": 1.5014339079450586e-06, "loss": 21.9043, "step": 386010 }, { "epoch": 0.779784822860652, "grad_norm": 130.33021545410156, "learning_rate": 1.501184535508532e-06, "loss": 13.1588, "step": 386020 }, { "epoch": 0.7798050234933358, "grad_norm": 269.3244323730469, "learning_rate": 1.500935180124805e-06, "loss": 13.8246, "step": 386030 }, { "epoch": 0.7798252241260196, "grad_norm": 513.9899291992188, "learning_rate": 1.500685841795098e-06, "loss": 33.1769, "step": 386040 }, { "epoch": 0.7798454247587034, "grad_norm": 424.0687255859375, "learning_rate": 1.5004365205206235e-06, "loss": 27.0779, "step": 386050 }, { "epoch": 0.7798656253913873, "grad_norm": 343.50421142578125, "learning_rate": 1.5001872163025954e-06, "loss": 19.2403, "step": 386060 }, { "epoch": 0.7798858260240711, "grad_norm": 28.880369186401367, "learning_rate": 1.49993792914223e-06, "loss": 22.104, "step": 386070 }, { "epoch": 0.7799060266567549, "grad_norm": 223.69723510742188, "learning_rate": 1.4996886590407445e-06, "loss": 14.1746, "step": 386080 }, { "epoch": 0.7799262272894387, "grad_norm": 277.2913818359375, "learning_rate": 1.4994394059993522e-06, "loss": 11.7292, "step": 386090 }, { "epoch": 0.7799464279221225, "grad_norm": 345.3890075683594, "learning_rate": 1.4991901700192657e-06, "loss": 23.5399, "step": 386100 }, { "epoch": 0.7799666285548064, "grad_norm": 256.9220275878906, "learning_rate": 1.4989409511017034e-06, "loss": 14.9563, "step": 386110 }, { "epoch": 0.7799868291874902, "grad_norm": 107.40361785888672, "learning_rate": 1.498691749247878e-06, "loss": 13.2521, "step": 386120 }, { "epoch": 0.780007029820174, "grad_norm": 168.9518280029297, "learning_rate": 1.4984425644590033e-06, "loss": 20.286, "step": 386130 }, { "epoch": 0.7800272304528578, "grad_norm": 406.5779724121094, "learning_rate": 1.498193396736296e-06, "loss": 18.0538, "step": 386140 }, { "epoch": 0.7800474310855416, "grad_norm": 163.77590942382812, "learning_rate": 1.4979442460809684e-06, "loss": 6.0938, "step": 386150 }, { "epoch": 0.7800676317182255, "grad_norm": 270.00030517578125, "learning_rate": 1.4976951124942369e-06, "loss": 15.4553, "step": 386160 }, { "epoch": 0.7800878323509093, "grad_norm": 323.8747253417969, "learning_rate": 1.4974459959773146e-06, "loss": 12.7156, "step": 386170 }, { "epoch": 0.780108032983593, "grad_norm": 144.15640258789062, "learning_rate": 1.4971968965314143e-06, "loss": 16.3609, "step": 386180 }, { "epoch": 0.7801282336162768, "grad_norm": 542.04833984375, "learning_rate": 1.4969478141577531e-06, "loss": 12.2939, "step": 386190 }, { "epoch": 0.7801484342489606, "grad_norm": 417.11761474609375, "learning_rate": 1.496698748857543e-06, "loss": 21.2529, "step": 386200 }, { "epoch": 0.7801686348816445, "grad_norm": 515.2703247070312, "learning_rate": 1.4964497006319972e-06, "loss": 14.6671, "step": 386210 }, { "epoch": 0.7801888355143283, "grad_norm": 909.078125, "learning_rate": 1.4962006694823306e-06, "loss": 25.3786, "step": 386220 }, { "epoch": 0.7802090361470121, "grad_norm": 400.2494812011719, "learning_rate": 1.4959516554097581e-06, "loss": 14.9983, "step": 386230 }, { "epoch": 0.7802292367796959, "grad_norm": 176.0259552001953, "learning_rate": 1.4957026584154926e-06, "loss": 15.1697, "step": 386240 }, { "epoch": 0.7802494374123797, "grad_norm": 901.3413696289062, "learning_rate": 1.4954536785007456e-06, "loss": 25.1786, "step": 386250 }, { "epoch": 0.7802696380450636, "grad_norm": 471.90386962890625, "learning_rate": 1.4952047156667326e-06, "loss": 24.8239, "step": 386260 }, { "epoch": 0.7802898386777474, "grad_norm": 585.814697265625, "learning_rate": 1.4949557699146694e-06, "loss": 21.5982, "step": 386270 }, { "epoch": 0.7803100393104312, "grad_norm": 512.4572143554688, "learning_rate": 1.4947068412457639e-06, "loss": 23.7617, "step": 386280 }, { "epoch": 0.780330239943115, "grad_norm": 464.33660888671875, "learning_rate": 1.4944579296612323e-06, "loss": 20.042, "step": 386290 }, { "epoch": 0.7803504405757988, "grad_norm": 496.6575927734375, "learning_rate": 1.4942090351622884e-06, "loss": 17.0011, "step": 386300 }, { "epoch": 0.7803706412084827, "grad_norm": 361.2125549316406, "learning_rate": 1.493960157750145e-06, "loss": 15.9416, "step": 386310 }, { "epoch": 0.7803908418411665, "grad_norm": 205.79136657714844, "learning_rate": 1.493711297426013e-06, "loss": 16.0879, "step": 386320 }, { "epoch": 0.7804110424738503, "grad_norm": 500.2698974609375, "learning_rate": 1.4934624541911086e-06, "loss": 14.078, "step": 386330 }, { "epoch": 0.7804312431065341, "grad_norm": 298.42779541015625, "learning_rate": 1.4932136280466426e-06, "loss": 23.3748, "step": 386340 }, { "epoch": 0.7804514437392179, "grad_norm": 249.26150512695312, "learning_rate": 1.492964818993826e-06, "loss": 18.0938, "step": 386350 }, { "epoch": 0.7804716443719018, "grad_norm": 0.031371522694826126, "learning_rate": 1.492716027033876e-06, "loss": 18.898, "step": 386360 }, { "epoch": 0.7804918450045856, "grad_norm": 463.8792724609375, "learning_rate": 1.4924672521680006e-06, "loss": 14.4938, "step": 386370 }, { "epoch": 0.7805120456372694, "grad_norm": 416.21453857421875, "learning_rate": 1.4922184943974167e-06, "loss": 20.2173, "step": 386380 }, { "epoch": 0.7805322462699532, "grad_norm": 966.876953125, "learning_rate": 1.4919697537233318e-06, "loss": 28.3099, "step": 386390 }, { "epoch": 0.780552446902637, "grad_norm": 530.0706787109375, "learning_rate": 1.491721030146963e-06, "loss": 18.5152, "step": 386400 }, { "epoch": 0.7805726475353209, "grad_norm": 160.47454833984375, "learning_rate": 1.4914723236695206e-06, "loss": 11.2257, "step": 386410 }, { "epoch": 0.7805928481680047, "grad_norm": 191.1073760986328, "learning_rate": 1.4912236342922143e-06, "loss": 21.4983, "step": 386420 }, { "epoch": 0.7806130488006885, "grad_norm": 625.6028442382812, "learning_rate": 1.4909749620162605e-06, "loss": 22.6572, "step": 386430 }, { "epoch": 0.7806332494333722, "grad_norm": 236.0618438720703, "learning_rate": 1.4907263068428673e-06, "loss": 13.1335, "step": 386440 }, { "epoch": 0.780653450066056, "grad_norm": 285.98828125, "learning_rate": 1.4904776687732503e-06, "loss": 5.3353, "step": 386450 }, { "epoch": 0.7806736506987398, "grad_norm": 480.0706481933594, "learning_rate": 1.4902290478086195e-06, "loss": 19.6177, "step": 386460 }, { "epoch": 0.7806938513314237, "grad_norm": 268.5168762207031, "learning_rate": 1.4899804439501853e-06, "loss": 34.3631, "step": 386470 }, { "epoch": 0.7807140519641075, "grad_norm": 252.37997436523438, "learning_rate": 1.4897318571991615e-06, "loss": 21.8165, "step": 386480 }, { "epoch": 0.7807342525967913, "grad_norm": 204.5614471435547, "learning_rate": 1.4894832875567593e-06, "loss": 11.2927, "step": 386490 }, { "epoch": 0.7807544532294751, "grad_norm": 536.2254028320312, "learning_rate": 1.489234735024188e-06, "loss": 12.7074, "step": 386500 }, { "epoch": 0.7807746538621589, "grad_norm": 90.27301788330078, "learning_rate": 1.4889861996026617e-06, "loss": 19.6775, "step": 386510 }, { "epoch": 0.7807948544948428, "grad_norm": 606.7857055664062, "learning_rate": 1.4887376812933913e-06, "loss": 18.9317, "step": 386520 }, { "epoch": 0.7808150551275266, "grad_norm": 4.590639591217041, "learning_rate": 1.488489180097588e-06, "loss": 33.1678, "step": 386530 }, { "epoch": 0.7808352557602104, "grad_norm": 573.2119140625, "learning_rate": 1.4882406960164615e-06, "loss": 10.8807, "step": 386540 }, { "epoch": 0.7808554563928942, "grad_norm": 573.2291870117188, "learning_rate": 1.4879922290512244e-06, "loss": 20.2384, "step": 386550 }, { "epoch": 0.780875657025578, "grad_norm": 368.7428894042969, "learning_rate": 1.487743779203088e-06, "loss": 14.5099, "step": 386560 }, { "epoch": 0.7808958576582619, "grad_norm": 234.43643188476562, "learning_rate": 1.4874953464732606e-06, "loss": 21.0713, "step": 386570 }, { "epoch": 0.7809160582909457, "grad_norm": 647.460205078125, "learning_rate": 1.487246930862955e-06, "loss": 14.3458, "step": 386580 }, { "epoch": 0.7809362589236295, "grad_norm": 195.44033813476562, "learning_rate": 1.486998532373385e-06, "loss": 12.5954, "step": 386590 }, { "epoch": 0.7809564595563133, "grad_norm": 79.07245635986328, "learning_rate": 1.4867501510057548e-06, "loss": 18.258, "step": 386600 }, { "epoch": 0.7809766601889971, "grad_norm": 403.3853454589844, "learning_rate": 1.486501786761278e-06, "loss": 19.6852, "step": 386610 }, { "epoch": 0.780996860821681, "grad_norm": 313.54241943359375, "learning_rate": 1.4862534396411671e-06, "loss": 14.3152, "step": 386620 }, { "epoch": 0.7810170614543648, "grad_norm": 557.8474731445312, "learning_rate": 1.486005109646631e-06, "loss": 15.661, "step": 386630 }, { "epoch": 0.7810372620870486, "grad_norm": 203.18238830566406, "learning_rate": 1.4857567967788784e-06, "loss": 18.5647, "step": 386640 }, { "epoch": 0.7810574627197324, "grad_norm": 359.0729675292969, "learning_rate": 1.4855085010391217e-06, "loss": 20.258, "step": 386650 }, { "epoch": 0.7810776633524162, "grad_norm": 720.0196533203125, "learning_rate": 1.485260222428571e-06, "loss": 17.099, "step": 386660 }, { "epoch": 0.7810978639851001, "grad_norm": 574.5145874023438, "learning_rate": 1.4850119609484342e-06, "loss": 27.3897, "step": 386670 }, { "epoch": 0.7811180646177839, "grad_norm": 474.090087890625, "learning_rate": 1.4847637165999224e-06, "loss": 18.5943, "step": 386680 }, { "epoch": 0.7811382652504676, "grad_norm": 129.98526000976562, "learning_rate": 1.4845154893842473e-06, "loss": 18.4085, "step": 386690 }, { "epoch": 0.7811584658831514, "grad_norm": 1041.7506103515625, "learning_rate": 1.484267279302618e-06, "loss": 20.6864, "step": 386700 }, { "epoch": 0.7811786665158352, "grad_norm": 122.86719512939453, "learning_rate": 1.4840190863562414e-06, "loss": 15.1946, "step": 386710 }, { "epoch": 0.781198867148519, "grad_norm": 701.8041381835938, "learning_rate": 1.483770910546331e-06, "loss": 18.744, "step": 386720 }, { "epoch": 0.7812190677812029, "grad_norm": 99.49020385742188, "learning_rate": 1.4835227518740951e-06, "loss": 10.8952, "step": 386730 }, { "epoch": 0.7812392684138867, "grad_norm": 263.1635437011719, "learning_rate": 1.4832746103407409e-06, "loss": 13.1393, "step": 386740 }, { "epoch": 0.7812594690465705, "grad_norm": 142.5100555419922, "learning_rate": 1.4830264859474814e-06, "loss": 13.7558, "step": 386750 }, { "epoch": 0.7812796696792543, "grad_norm": 924.5542602539062, "learning_rate": 1.4827783786955224e-06, "loss": 20.6192, "step": 386760 }, { "epoch": 0.7812998703119381, "grad_norm": 235.34300231933594, "learning_rate": 1.482530288586077e-06, "loss": 16.9288, "step": 386770 }, { "epoch": 0.781320070944622, "grad_norm": 28.140670776367188, "learning_rate": 1.482282215620352e-06, "loss": 13.6741, "step": 386780 }, { "epoch": 0.7813402715773058, "grad_norm": 13.361457824707031, "learning_rate": 1.4820341597995558e-06, "loss": 14.6516, "step": 386790 }, { "epoch": 0.7813604722099896, "grad_norm": 84.14933013916016, "learning_rate": 1.4817861211248996e-06, "loss": 24.9434, "step": 386800 }, { "epoch": 0.7813806728426734, "grad_norm": 637.1309204101562, "learning_rate": 1.4815380995975908e-06, "loss": 21.3062, "step": 386810 }, { "epoch": 0.7814008734753572, "grad_norm": 445.86187744140625, "learning_rate": 1.4812900952188374e-06, "loss": 14.363, "step": 386820 }, { "epoch": 0.7814210741080411, "grad_norm": 1048.857177734375, "learning_rate": 1.4810421079898495e-06, "loss": 27.6814, "step": 386830 }, { "epoch": 0.7814412747407249, "grad_norm": 641.3052978515625, "learning_rate": 1.4807941379118368e-06, "loss": 16.5968, "step": 386840 }, { "epoch": 0.7814614753734087, "grad_norm": 583.0075073242188, "learning_rate": 1.480546184986007e-06, "loss": 14.5565, "step": 386850 }, { "epoch": 0.7814816760060925, "grad_norm": 204.2652587890625, "learning_rate": 1.4802982492135664e-06, "loss": 13.9256, "step": 386860 }, { "epoch": 0.7815018766387763, "grad_norm": 338.5167236328125, "learning_rate": 1.4800503305957264e-06, "loss": 11.404, "step": 386870 }, { "epoch": 0.7815220772714602, "grad_norm": 240.81396484375, "learning_rate": 1.4798024291336949e-06, "loss": 25.4999, "step": 386880 }, { "epoch": 0.781542277904144, "grad_norm": 380.2215881347656, "learning_rate": 1.4795545448286774e-06, "loss": 29.0816, "step": 386890 }, { "epoch": 0.7815624785368278, "grad_norm": 214.4364776611328, "learning_rate": 1.4793066776818843e-06, "loss": 13.0321, "step": 386900 }, { "epoch": 0.7815826791695116, "grad_norm": 515.7333374023438, "learning_rate": 1.479058827694525e-06, "loss": 11.2031, "step": 386910 }, { "epoch": 0.7816028798021954, "grad_norm": 532.2525024414062, "learning_rate": 1.4788109948678058e-06, "loss": 15.7329, "step": 386920 }, { "epoch": 0.7816230804348793, "grad_norm": 111.3807144165039, "learning_rate": 1.478563179202933e-06, "loss": 52.7665, "step": 386930 }, { "epoch": 0.7816432810675631, "grad_norm": 270.2540283203125, "learning_rate": 1.4783153807011186e-06, "loss": 18.9392, "step": 386940 }, { "epoch": 0.7816634817002468, "grad_norm": 309.8558654785156, "learning_rate": 1.4780675993635668e-06, "loss": 15.1325, "step": 386950 }, { "epoch": 0.7816836823329306, "grad_norm": 738.7531127929688, "learning_rate": 1.4778198351914853e-06, "loss": 14.0651, "step": 386960 }, { "epoch": 0.7817038829656144, "grad_norm": 247.15045166015625, "learning_rate": 1.4775720881860845e-06, "loss": 15.0891, "step": 386970 }, { "epoch": 0.7817240835982983, "grad_norm": 500.20452880859375, "learning_rate": 1.4773243583485681e-06, "loss": 21.4067, "step": 386980 }, { "epoch": 0.7817442842309821, "grad_norm": 371.3633117675781, "learning_rate": 1.4770766456801477e-06, "loss": 25.5898, "step": 386990 }, { "epoch": 0.7817644848636659, "grad_norm": 318.5390319824219, "learning_rate": 1.4768289501820265e-06, "loss": 14.3243, "step": 387000 }, { "epoch": 0.7817846854963497, "grad_norm": 131.52696228027344, "learning_rate": 1.476581271855415e-06, "loss": 23.0765, "step": 387010 }, { "epoch": 0.7818048861290335, "grad_norm": 1093.0257568359375, "learning_rate": 1.4763336107015192e-06, "loss": 11.2298, "step": 387020 }, { "epoch": 0.7818250867617174, "grad_norm": 17.26053237915039, "learning_rate": 1.4760859667215449e-06, "loss": 20.0986, "step": 387030 }, { "epoch": 0.7818452873944012, "grad_norm": 432.4856262207031, "learning_rate": 1.4758383399167014e-06, "loss": 16.8963, "step": 387040 }, { "epoch": 0.781865488027085, "grad_norm": 344.38140869140625, "learning_rate": 1.4755907302881927e-06, "loss": 11.4162, "step": 387050 }, { "epoch": 0.7818856886597688, "grad_norm": 882.4750366210938, "learning_rate": 1.4753431378372291e-06, "loss": 18.6084, "step": 387060 }, { "epoch": 0.7819058892924526, "grad_norm": 471.34710693359375, "learning_rate": 1.4750955625650153e-06, "loss": 13.8958, "step": 387070 }, { "epoch": 0.7819260899251365, "grad_norm": 5.232560634613037, "learning_rate": 1.474848004472757e-06, "loss": 14.3645, "step": 387080 }, { "epoch": 0.7819462905578203, "grad_norm": 269.0277404785156, "learning_rate": 1.4746004635616634e-06, "loss": 9.4807, "step": 387090 }, { "epoch": 0.7819664911905041, "grad_norm": 645.8562622070312, "learning_rate": 1.4743529398329393e-06, "loss": 21.5985, "step": 387100 }, { "epoch": 0.7819866918231879, "grad_norm": 218.50521850585938, "learning_rate": 1.4741054332877902e-06, "loss": 21.1489, "step": 387110 }, { "epoch": 0.7820068924558717, "grad_norm": 216.4319610595703, "learning_rate": 1.4738579439274236e-06, "loss": 32.7325, "step": 387120 }, { "epoch": 0.7820270930885556, "grad_norm": 581.9794921875, "learning_rate": 1.473610471753047e-06, "loss": 17.003, "step": 387130 }, { "epoch": 0.7820472937212394, "grad_norm": 358.75482177734375, "learning_rate": 1.4733630167658652e-06, "loss": 19.3711, "step": 387140 }, { "epoch": 0.7820674943539232, "grad_norm": 456.4960021972656, "learning_rate": 1.473115578967083e-06, "loss": 17.8683, "step": 387150 }, { "epoch": 0.782087694986607, "grad_norm": 763.3374633789062, "learning_rate": 1.4728681583579091e-06, "loss": 16.0971, "step": 387160 }, { "epoch": 0.7821078956192908, "grad_norm": 301.2882995605469, "learning_rate": 1.4726207549395482e-06, "loss": 16.7476, "step": 387170 }, { "epoch": 0.7821280962519747, "grad_norm": 360.88348388671875, "learning_rate": 1.4723733687132041e-06, "loss": 43.6114, "step": 387180 }, { "epoch": 0.7821482968846585, "grad_norm": 505.7632751464844, "learning_rate": 1.4721259996800847e-06, "loss": 17.4158, "step": 387190 }, { "epoch": 0.7821684975173423, "grad_norm": 147.3037109375, "learning_rate": 1.4718786478413983e-06, "loss": 30.0771, "step": 387200 }, { "epoch": 0.782188698150026, "grad_norm": 31.770463943481445, "learning_rate": 1.471631313198344e-06, "loss": 20.0511, "step": 387210 }, { "epoch": 0.7822088987827098, "grad_norm": 451.4087219238281, "learning_rate": 1.4713839957521315e-06, "loss": 17.5128, "step": 387220 }, { "epoch": 0.7822290994153936, "grad_norm": 6.938021183013916, "learning_rate": 1.4711366955039664e-06, "loss": 15.298, "step": 387230 }, { "epoch": 0.7822493000480775, "grad_norm": 572.2969360351562, "learning_rate": 1.4708894124550527e-06, "loss": 21.4585, "step": 387240 }, { "epoch": 0.7822695006807613, "grad_norm": 547.682861328125, "learning_rate": 1.4706421466065952e-06, "loss": 14.4308, "step": 387250 }, { "epoch": 0.7822897013134451, "grad_norm": 713.3699340820312, "learning_rate": 1.470394897959801e-06, "loss": 15.0501, "step": 387260 }, { "epoch": 0.7823099019461289, "grad_norm": 713.3102416992188, "learning_rate": 1.4701476665158738e-06, "loss": 26.4644, "step": 387270 }, { "epoch": 0.7823301025788127, "grad_norm": 433.7578430175781, "learning_rate": 1.4699004522760174e-06, "loss": 17.473, "step": 387280 }, { "epoch": 0.7823503032114966, "grad_norm": 315.8951110839844, "learning_rate": 1.4696532552414383e-06, "loss": 14.5305, "step": 387290 }, { "epoch": 0.7823705038441804, "grad_norm": 400.2901611328125, "learning_rate": 1.469406075413342e-06, "loss": 10.748, "step": 387300 }, { "epoch": 0.7823907044768642, "grad_norm": 160.92108154296875, "learning_rate": 1.4691589127929328e-06, "loss": 17.2667, "step": 387310 }, { "epoch": 0.782410905109548, "grad_norm": 266.0498046875, "learning_rate": 1.4689117673814135e-06, "loss": 30.5759, "step": 387320 }, { "epoch": 0.7824311057422318, "grad_norm": 95.8498306274414, "learning_rate": 1.4686646391799909e-06, "loss": 14.7226, "step": 387330 }, { "epoch": 0.7824513063749157, "grad_norm": 464.59521484375, "learning_rate": 1.4684175281898688e-06, "loss": 10.7037, "step": 387340 }, { "epoch": 0.7824715070075995, "grad_norm": 242.17062377929688, "learning_rate": 1.46817043441225e-06, "loss": 16.1035, "step": 387350 }, { "epoch": 0.7824917076402833, "grad_norm": 220.4538116455078, "learning_rate": 1.4679233578483415e-06, "loss": 21.0202, "step": 387360 }, { "epoch": 0.7825119082729671, "grad_norm": 1055.5733642578125, "learning_rate": 1.4676762984993443e-06, "loss": 20.5439, "step": 387370 }, { "epoch": 0.782532108905651, "grad_norm": 1321.3682861328125, "learning_rate": 1.467429256366466e-06, "loss": 17.3385, "step": 387380 }, { "epoch": 0.7825523095383348, "grad_norm": 349.6588134765625, "learning_rate": 1.4671822314509099e-06, "loss": 23.1099, "step": 387390 }, { "epoch": 0.7825725101710186, "grad_norm": 152.19427490234375, "learning_rate": 1.4669352237538763e-06, "loss": 18.292, "step": 387400 }, { "epoch": 0.7825927108037024, "grad_norm": 261.75164794921875, "learning_rate": 1.4666882332765747e-06, "loss": 28.4234, "step": 387410 }, { "epoch": 0.7826129114363862, "grad_norm": 257.6233825683594, "learning_rate": 1.4664412600202056e-06, "loss": 7.0353, "step": 387420 }, { "epoch": 0.78263311206907, "grad_norm": 204.0417938232422, "learning_rate": 1.4661943039859716e-06, "loss": 9.2934, "step": 387430 }, { "epoch": 0.7826533127017539, "grad_norm": 187.93104553222656, "learning_rate": 1.4659473651750777e-06, "loss": 15.1497, "step": 387440 }, { "epoch": 0.7826735133344377, "grad_norm": 489.39453125, "learning_rate": 1.4657004435887296e-06, "loss": 18.4237, "step": 387450 }, { "epoch": 0.7826937139671214, "grad_norm": 254.7746124267578, "learning_rate": 1.4654535392281287e-06, "loss": 18.6789, "step": 387460 }, { "epoch": 0.7827139145998052, "grad_norm": 470.4129943847656, "learning_rate": 1.4652066520944774e-06, "loss": 26.8949, "step": 387470 }, { "epoch": 0.782734115232489, "grad_norm": 369.17156982421875, "learning_rate": 1.4649597821889817e-06, "loss": 16.8451, "step": 387480 }, { "epoch": 0.7827543158651729, "grad_norm": 56.73841094970703, "learning_rate": 1.4647129295128426e-06, "loss": 23.2344, "step": 387490 }, { "epoch": 0.7827745164978567, "grad_norm": 490.67572021484375, "learning_rate": 1.4644660940672628e-06, "loss": 22.2702, "step": 387500 }, { "epoch": 0.7827947171305405, "grad_norm": 12.69742202758789, "learning_rate": 1.4642192758534463e-06, "loss": 8.2552, "step": 387510 }, { "epoch": 0.7828149177632243, "grad_norm": 294.6560363769531, "learning_rate": 1.463972474872598e-06, "loss": 16.5292, "step": 387520 }, { "epoch": 0.7828351183959081, "grad_norm": 278.0723571777344, "learning_rate": 1.463725691125919e-06, "loss": 30.3822, "step": 387530 }, { "epoch": 0.782855319028592, "grad_norm": 716.5742797851562, "learning_rate": 1.4634789246146103e-06, "loss": 25.5102, "step": 387540 }, { "epoch": 0.7828755196612758, "grad_norm": 356.6044616699219, "learning_rate": 1.463232175339878e-06, "loss": 14.8314, "step": 387550 }, { "epoch": 0.7828957202939596, "grad_norm": 451.978271484375, "learning_rate": 1.4629854433029234e-06, "loss": 30.8221, "step": 387560 }, { "epoch": 0.7829159209266434, "grad_norm": 394.9523620605469, "learning_rate": 1.4627387285049465e-06, "loss": 12.3614, "step": 387570 }, { "epoch": 0.7829361215593272, "grad_norm": 239.18336486816406, "learning_rate": 1.462492030947153e-06, "loss": 15.4607, "step": 387580 }, { "epoch": 0.7829563221920111, "grad_norm": 403.53228759765625, "learning_rate": 1.462245350630745e-06, "loss": 10.6075, "step": 387590 }, { "epoch": 0.7829765228246949, "grad_norm": 783.1351318359375, "learning_rate": 1.4619986875569247e-06, "loss": 21.1282, "step": 387600 }, { "epoch": 0.7829967234573787, "grad_norm": 482.3123474121094, "learning_rate": 1.4617520417268916e-06, "loss": 12.1442, "step": 387610 }, { "epoch": 0.7830169240900625, "grad_norm": 422.8743591308594, "learning_rate": 1.4615054131418521e-06, "loss": 10.1425, "step": 387620 }, { "epoch": 0.7830371247227463, "grad_norm": 251.38690185546875, "learning_rate": 1.4612588018030055e-06, "loss": 18.1368, "step": 387630 }, { "epoch": 0.7830573253554302, "grad_norm": 333.3215637207031, "learning_rate": 1.461012207711553e-06, "loss": 29.8625, "step": 387640 }, { "epoch": 0.783077525988114, "grad_norm": 687.0955810546875, "learning_rate": 1.460765630868699e-06, "loss": 25.649, "step": 387650 }, { "epoch": 0.7830977266207978, "grad_norm": 12.569586753845215, "learning_rate": 1.4605190712756428e-06, "loss": 12.0924, "step": 387660 }, { "epoch": 0.7831179272534816, "grad_norm": 559.3131103515625, "learning_rate": 1.460272528933589e-06, "loss": 28.3499, "step": 387670 }, { "epoch": 0.7831381278861654, "grad_norm": 230.3157196044922, "learning_rate": 1.4600260038437376e-06, "loss": 17.6607, "step": 387680 }, { "epoch": 0.7831583285188493, "grad_norm": 731.9522705078125, "learning_rate": 1.459779496007288e-06, "loss": 16.4776, "step": 387690 }, { "epoch": 0.7831785291515331, "grad_norm": 290.20904541015625, "learning_rate": 1.459533005425446e-06, "loss": 11.4024, "step": 387700 }, { "epoch": 0.7831987297842169, "grad_norm": 209.58937072753906, "learning_rate": 1.4592865320994103e-06, "loss": 17.2254, "step": 387710 }, { "epoch": 0.7832189304169006, "grad_norm": 534.1865844726562, "learning_rate": 1.4590400760303814e-06, "loss": 16.533, "step": 387720 }, { "epoch": 0.7832391310495844, "grad_norm": 245.14344787597656, "learning_rate": 1.4587936372195611e-06, "loss": 19.2951, "step": 387730 }, { "epoch": 0.7832593316822682, "grad_norm": 13.814839363098145, "learning_rate": 1.4585472156681535e-06, "loss": 10.595, "step": 387740 }, { "epoch": 0.7832795323149521, "grad_norm": 10.924476623535156, "learning_rate": 1.4583008113773567e-06, "loss": 17.6907, "step": 387750 }, { "epoch": 0.7832997329476359, "grad_norm": 408.18524169921875, "learning_rate": 1.4580544243483708e-06, "loss": 17.6132, "step": 387760 }, { "epoch": 0.7833199335803197, "grad_norm": 0.14816100895404816, "learning_rate": 1.4578080545823991e-06, "loss": 14.5498, "step": 387770 }, { "epoch": 0.7833401342130035, "grad_norm": 450.8954162597656, "learning_rate": 1.457561702080642e-06, "loss": 13.728, "step": 387780 }, { "epoch": 0.7833603348456873, "grad_norm": 244.66517639160156, "learning_rate": 1.457315366844298e-06, "loss": 12.0544, "step": 387790 }, { "epoch": 0.7833805354783712, "grad_norm": 418.8738708496094, "learning_rate": 1.4570690488745687e-06, "loss": 18.9703, "step": 387800 }, { "epoch": 0.783400736111055, "grad_norm": 631.1736450195312, "learning_rate": 1.4568227481726589e-06, "loss": 10.4646, "step": 387810 }, { "epoch": 0.7834209367437388, "grad_norm": 506.9795227050781, "learning_rate": 1.4565764647397612e-06, "loss": 11.6902, "step": 387820 }, { "epoch": 0.7834411373764226, "grad_norm": 116.03436279296875, "learning_rate": 1.4563301985770812e-06, "loss": 17.3234, "step": 387830 }, { "epoch": 0.7834613380091064, "grad_norm": 295.0504455566406, "learning_rate": 1.4560839496858187e-06, "loss": 13.5444, "step": 387840 }, { "epoch": 0.7834815386417903, "grad_norm": 575.4901123046875, "learning_rate": 1.4558377180671734e-06, "loss": 18.5078, "step": 387850 }, { "epoch": 0.7835017392744741, "grad_norm": 170.54981994628906, "learning_rate": 1.4555915037223438e-06, "loss": 16.1026, "step": 387860 }, { "epoch": 0.7835219399071579, "grad_norm": 1.215683937072754, "learning_rate": 1.455345306652533e-06, "loss": 11.0265, "step": 387870 }, { "epoch": 0.7835421405398417, "grad_norm": 47.566925048828125, "learning_rate": 1.4550991268589393e-06, "loss": 36.6043, "step": 387880 }, { "epoch": 0.7835623411725255, "grad_norm": 127.637939453125, "learning_rate": 1.4548529643427607e-06, "loss": 15.5971, "step": 387890 }, { "epoch": 0.7835825418052094, "grad_norm": 454.849853515625, "learning_rate": 1.4546068191051988e-06, "loss": 15.9606, "step": 387900 }, { "epoch": 0.7836027424378932, "grad_norm": 167.77975463867188, "learning_rate": 1.4543606911474545e-06, "loss": 19.1838, "step": 387910 }, { "epoch": 0.783622943070577, "grad_norm": 528.1384887695312, "learning_rate": 1.4541145804707268e-06, "loss": 27.7355, "step": 387920 }, { "epoch": 0.7836431437032608, "grad_norm": 602.1045532226562, "learning_rate": 1.4538684870762127e-06, "loss": 18.1397, "step": 387930 }, { "epoch": 0.7836633443359446, "grad_norm": 252.25518798828125, "learning_rate": 1.4536224109651148e-06, "loss": 13.3719, "step": 387940 }, { "epoch": 0.7836835449686285, "grad_norm": 66.79181671142578, "learning_rate": 1.4533763521386319e-06, "loss": 21.1371, "step": 387950 }, { "epoch": 0.7837037456013123, "grad_norm": 61.031314849853516, "learning_rate": 1.4531303105979605e-06, "loss": 6.9965, "step": 387960 }, { "epoch": 0.783723946233996, "grad_norm": 180.56610107421875, "learning_rate": 1.4528842863443033e-06, "loss": 14.9373, "step": 387970 }, { "epoch": 0.7837441468666798, "grad_norm": 278.0017395019531, "learning_rate": 1.4526382793788564e-06, "loss": 14.2005, "step": 387980 }, { "epoch": 0.7837643474993636, "grad_norm": 169.85379028320312, "learning_rate": 1.4523922897028215e-06, "loss": 18.7049, "step": 387990 }, { "epoch": 0.7837845481320475, "grad_norm": 412.85125732421875, "learning_rate": 1.4521463173173966e-06, "loss": 13.6385, "step": 388000 }, { "epoch": 0.7838047487647313, "grad_norm": 386.0448913574219, "learning_rate": 1.4519003622237788e-06, "loss": 13.6984, "step": 388010 }, { "epoch": 0.7838249493974151, "grad_norm": 409.82012939453125, "learning_rate": 1.4516544244231695e-06, "loss": 18.8317, "step": 388020 }, { "epoch": 0.7838451500300989, "grad_norm": 405.0011291503906, "learning_rate": 1.4514085039167652e-06, "loss": 5.4395, "step": 388030 }, { "epoch": 0.7838653506627827, "grad_norm": 489.4347839355469, "learning_rate": 1.4511626007057667e-06, "loss": 11.3004, "step": 388040 }, { "epoch": 0.7838855512954666, "grad_norm": 592.531982421875, "learning_rate": 1.4509167147913693e-06, "loss": 16.8958, "step": 388050 }, { "epoch": 0.7839057519281504, "grad_norm": 428.1918029785156, "learning_rate": 1.4506708461747754e-06, "loss": 18.5203, "step": 388060 }, { "epoch": 0.7839259525608342, "grad_norm": 538.6543579101562, "learning_rate": 1.4504249948571814e-06, "loss": 24.9611, "step": 388070 }, { "epoch": 0.783946153193518, "grad_norm": 274.9072570800781, "learning_rate": 1.4501791608397835e-06, "loss": 28.5931, "step": 388080 }, { "epoch": 0.7839663538262018, "grad_norm": 420.49224853515625, "learning_rate": 1.449933344123784e-06, "loss": 16.1609, "step": 388090 }, { "epoch": 0.7839865544588857, "grad_norm": 643.5992431640625, "learning_rate": 1.4496875447103781e-06, "loss": 25.3927, "step": 388100 }, { "epoch": 0.7840067550915695, "grad_norm": 307.52093505859375, "learning_rate": 1.4494417626007633e-06, "loss": 17.6272, "step": 388110 }, { "epoch": 0.7840269557242533, "grad_norm": 335.6693115234375, "learning_rate": 1.449195997796139e-06, "loss": 12.8733, "step": 388120 }, { "epoch": 0.7840471563569371, "grad_norm": 224.05914306640625, "learning_rate": 1.4489502502977037e-06, "loss": 12.8672, "step": 388130 }, { "epoch": 0.7840673569896209, "grad_norm": 314.70001220703125, "learning_rate": 1.4487045201066547e-06, "loss": 19.8743, "step": 388140 }, { "epoch": 0.7840875576223048, "grad_norm": 1050.0281982421875, "learning_rate": 1.4484588072241873e-06, "loss": 23.1158, "step": 388150 }, { "epoch": 0.7841077582549886, "grad_norm": 0.7938860058784485, "learning_rate": 1.4482131116515026e-06, "loss": 10.3835, "step": 388160 }, { "epoch": 0.7841279588876724, "grad_norm": 558.4970092773438, "learning_rate": 1.4479674333897964e-06, "loss": 15.0136, "step": 388170 }, { "epoch": 0.7841481595203562, "grad_norm": 923.7531127929688, "learning_rate": 1.4477217724402643e-06, "loss": 17.374, "step": 388180 }, { "epoch": 0.78416836015304, "grad_norm": 273.45819091796875, "learning_rate": 1.4474761288041057e-06, "loss": 13.9706, "step": 388190 }, { "epoch": 0.7841885607857239, "grad_norm": 1157.492919921875, "learning_rate": 1.4472305024825189e-06, "loss": 25.9856, "step": 388200 }, { "epoch": 0.7842087614184077, "grad_norm": 218.44546508789062, "learning_rate": 1.4469848934767e-06, "loss": 10.8597, "step": 388210 }, { "epoch": 0.7842289620510915, "grad_norm": 193.62757873535156, "learning_rate": 1.4467393017878444e-06, "loss": 23.0175, "step": 388220 }, { "epoch": 0.7842491626837752, "grad_norm": 452.9956970214844, "learning_rate": 1.446493727417152e-06, "loss": 27.1394, "step": 388230 }, { "epoch": 0.784269363316459, "grad_norm": 159.25376892089844, "learning_rate": 1.4462481703658177e-06, "loss": 8.6344, "step": 388240 }, { "epoch": 0.7842895639491428, "grad_norm": 1031.0223388671875, "learning_rate": 1.4460026306350378e-06, "loss": 13.0682, "step": 388250 }, { "epoch": 0.7843097645818267, "grad_norm": 596.4764404296875, "learning_rate": 1.4457571082260113e-06, "loss": 28.9894, "step": 388260 }, { "epoch": 0.7843299652145105, "grad_norm": 267.9975280761719, "learning_rate": 1.445511603139932e-06, "loss": 12.5665, "step": 388270 }, { "epoch": 0.7843501658471943, "grad_norm": 271.784423828125, "learning_rate": 1.4452661153779996e-06, "loss": 23.4446, "step": 388280 }, { "epoch": 0.7843703664798781, "grad_norm": 432.1602783203125, "learning_rate": 1.445020644941409e-06, "loss": 21.4996, "step": 388290 }, { "epoch": 0.7843905671125619, "grad_norm": 376.19134521484375, "learning_rate": 1.4447751918313552e-06, "loss": 14.6372, "step": 388300 }, { "epoch": 0.7844107677452458, "grad_norm": 1015.6385498046875, "learning_rate": 1.4445297560490373e-06, "loss": 32.7653, "step": 388310 }, { "epoch": 0.7844309683779296, "grad_norm": 476.18585205078125, "learning_rate": 1.4442843375956506e-06, "loss": 25.0618, "step": 388320 }, { "epoch": 0.7844511690106134, "grad_norm": 355.6081237792969, "learning_rate": 1.4440389364723889e-06, "loss": 20.9087, "step": 388330 }, { "epoch": 0.7844713696432972, "grad_norm": 290.85345458984375, "learning_rate": 1.4437935526804497e-06, "loss": 7.4059, "step": 388340 }, { "epoch": 0.784491570275981, "grad_norm": 289.3037109375, "learning_rate": 1.4435481862210315e-06, "loss": 16.1818, "step": 388350 }, { "epoch": 0.7845117709086649, "grad_norm": 163.52731323242188, "learning_rate": 1.4433028370953279e-06, "loss": 21.5489, "step": 388360 }, { "epoch": 0.7845319715413487, "grad_norm": 447.26934814453125, "learning_rate": 1.4430575053045337e-06, "loss": 17.4456, "step": 388370 }, { "epoch": 0.7845521721740325, "grad_norm": 31.244953155517578, "learning_rate": 1.4428121908498472e-06, "loss": 23.6349, "step": 388380 }, { "epoch": 0.7845723728067163, "grad_norm": 456.913818359375, "learning_rate": 1.4425668937324623e-06, "loss": 14.1966, "step": 388390 }, { "epoch": 0.7845925734394001, "grad_norm": 29.88794708251953, "learning_rate": 1.4423216139535735e-06, "loss": 22.1706, "step": 388400 }, { "epoch": 0.784612774072084, "grad_norm": 295.7020568847656, "learning_rate": 1.4420763515143777e-06, "loss": 26.525, "step": 388410 }, { "epoch": 0.7846329747047678, "grad_norm": 209.07150268554688, "learning_rate": 1.4418311064160735e-06, "loss": 16.2925, "step": 388420 }, { "epoch": 0.7846531753374516, "grad_norm": 4.983144283294678, "learning_rate": 1.4415858786598496e-06, "loss": 14.472, "step": 388430 }, { "epoch": 0.7846733759701354, "grad_norm": 388.2928466796875, "learning_rate": 1.4413406682469044e-06, "loss": 21.0465, "step": 388440 }, { "epoch": 0.7846935766028192, "grad_norm": 700.1123046875, "learning_rate": 1.4410954751784352e-06, "loss": 26.1824, "step": 388450 }, { "epoch": 0.7847137772355031, "grad_norm": 828.24560546875, "learning_rate": 1.440850299455635e-06, "loss": 29.1401, "step": 388460 }, { "epoch": 0.7847339778681869, "grad_norm": 288.4026184082031, "learning_rate": 1.4406051410796968e-06, "loss": 22.7773, "step": 388470 }, { "epoch": 0.7847541785008707, "grad_norm": 391.3822937011719, "learning_rate": 1.4403600000518191e-06, "loss": 13.4979, "step": 388480 }, { "epoch": 0.7847743791335544, "grad_norm": 538.2525634765625, "learning_rate": 1.4401148763731953e-06, "loss": 12.9334, "step": 388490 }, { "epoch": 0.7847945797662382, "grad_norm": 771.3076171875, "learning_rate": 1.4398697700450181e-06, "loss": 22.9988, "step": 388500 }, { "epoch": 0.784814780398922, "grad_norm": 209.12530517578125, "learning_rate": 1.4396246810684839e-06, "loss": 16.0794, "step": 388510 }, { "epoch": 0.7848349810316059, "grad_norm": 431.53363037109375, "learning_rate": 1.4393796094447886e-06, "loss": 22.1997, "step": 388520 }, { "epoch": 0.7848551816642897, "grad_norm": 57.958675384521484, "learning_rate": 1.4391345551751251e-06, "loss": 17.6947, "step": 388530 }, { "epoch": 0.7848753822969735, "grad_norm": 350.1054382324219, "learning_rate": 1.4388895182606867e-06, "loss": 10.1518, "step": 388540 }, { "epoch": 0.7848955829296573, "grad_norm": 901.931884765625, "learning_rate": 1.4386444987026705e-06, "loss": 20.1345, "step": 388550 }, { "epoch": 0.7849157835623412, "grad_norm": 329.38287353515625, "learning_rate": 1.4383994965022684e-06, "loss": 20.9889, "step": 388560 }, { "epoch": 0.784935984195025, "grad_norm": 501.769775390625, "learning_rate": 1.4381545116606744e-06, "loss": 22.558, "step": 388570 }, { "epoch": 0.7849561848277088, "grad_norm": 602.0369262695312, "learning_rate": 1.4379095441790847e-06, "loss": 28.9831, "step": 388580 }, { "epoch": 0.7849763854603926, "grad_norm": 155.15145874023438, "learning_rate": 1.4376645940586898e-06, "loss": 21.0178, "step": 388590 }, { "epoch": 0.7849965860930764, "grad_norm": 688.16650390625, "learning_rate": 1.4374196613006874e-06, "loss": 24.7662, "step": 388600 }, { "epoch": 0.7850167867257603, "grad_norm": 244.4649200439453, "learning_rate": 1.4371747459062695e-06, "loss": 11.4804, "step": 388610 }, { "epoch": 0.7850369873584441, "grad_norm": 156.22230529785156, "learning_rate": 1.4369298478766286e-06, "loss": 21.9342, "step": 388620 }, { "epoch": 0.7850571879911279, "grad_norm": 138.66293334960938, "learning_rate": 1.4366849672129607e-06, "loss": 18.6234, "step": 388630 }, { "epoch": 0.7850773886238117, "grad_norm": 18.091754913330078, "learning_rate": 1.4364401039164566e-06, "loss": 14.6982, "step": 388640 }, { "epoch": 0.7850975892564955, "grad_norm": 1073.05517578125, "learning_rate": 1.4361952579883127e-06, "loss": 20.9805, "step": 388650 }, { "epoch": 0.7851177898891794, "grad_norm": 239.39566040039062, "learning_rate": 1.4359504294297195e-06, "loss": 16.8774, "step": 388660 }, { "epoch": 0.7851379905218632, "grad_norm": 326.1767272949219, "learning_rate": 1.4357056182418727e-06, "loss": 26.5385, "step": 388670 }, { "epoch": 0.785158191154547, "grad_norm": 209.94781494140625, "learning_rate": 1.4354608244259649e-06, "loss": 19.8899, "step": 388680 }, { "epoch": 0.7851783917872308, "grad_norm": 230.2615966796875, "learning_rate": 1.4352160479831873e-06, "loss": 10.7179, "step": 388690 }, { "epoch": 0.7851985924199146, "grad_norm": 298.5816650390625, "learning_rate": 1.4349712889147355e-06, "loss": 12.5192, "step": 388700 }, { "epoch": 0.7852187930525985, "grad_norm": 267.53173828125, "learning_rate": 1.4347265472218014e-06, "loss": 19.6131, "step": 388710 }, { "epoch": 0.7852389936852823, "grad_norm": 4.332077503204346, "learning_rate": 1.4344818229055762e-06, "loss": 13.5306, "step": 388720 }, { "epoch": 0.7852591943179661, "grad_norm": 543.1563720703125, "learning_rate": 1.434237115967254e-06, "loss": 23.7846, "step": 388730 }, { "epoch": 0.7852793949506498, "grad_norm": 250.99136352539062, "learning_rate": 1.4339924264080308e-06, "loss": 15.9756, "step": 388740 }, { "epoch": 0.7852995955833336, "grad_norm": 584.9282836914062, "learning_rate": 1.433747754229093e-06, "loss": 24.7947, "step": 388750 }, { "epoch": 0.7853197962160174, "grad_norm": 351.9540100097656, "learning_rate": 1.4335030994316357e-06, "loss": 15.2832, "step": 388760 }, { "epoch": 0.7853399968487013, "grad_norm": 246.7580108642578, "learning_rate": 1.4332584620168538e-06, "loss": 19.1401, "step": 388770 }, { "epoch": 0.7853601974813851, "grad_norm": 352.8351135253906, "learning_rate": 1.4330138419859375e-06, "loss": 32.0533, "step": 388780 }, { "epoch": 0.7853803981140689, "grad_norm": 56.823421478271484, "learning_rate": 1.4327692393400771e-06, "loss": 12.9277, "step": 388790 }, { "epoch": 0.7854005987467527, "grad_norm": 307.0126037597656, "learning_rate": 1.4325246540804672e-06, "loss": 22.0669, "step": 388800 }, { "epoch": 0.7854207993794365, "grad_norm": 1.0302592515945435, "learning_rate": 1.4322800862083009e-06, "loss": 7.5862, "step": 388810 }, { "epoch": 0.7854410000121204, "grad_norm": 655.0123291015625, "learning_rate": 1.4320355357247689e-06, "loss": 25.2072, "step": 388820 }, { "epoch": 0.7854612006448042, "grad_norm": 152.23594665527344, "learning_rate": 1.4317910026310611e-06, "loss": 20.326, "step": 388830 }, { "epoch": 0.785481401277488, "grad_norm": 1402.765869140625, "learning_rate": 1.431546486928373e-06, "loss": 27.2022, "step": 388840 }, { "epoch": 0.7855016019101718, "grad_norm": 476.7830810546875, "learning_rate": 1.4313019886178942e-06, "loss": 16.5796, "step": 388850 }, { "epoch": 0.7855218025428556, "grad_norm": 425.1496887207031, "learning_rate": 1.4310575077008154e-06, "loss": 18.6398, "step": 388860 }, { "epoch": 0.7855420031755395, "grad_norm": 285.9603576660156, "learning_rate": 1.4308130441783307e-06, "loss": 15.7991, "step": 388870 }, { "epoch": 0.7855622038082233, "grad_norm": 381.857666015625, "learning_rate": 1.4305685980516293e-06, "loss": 18.2687, "step": 388880 }, { "epoch": 0.7855824044409071, "grad_norm": 216.32675170898438, "learning_rate": 1.430324169321905e-06, "loss": 6.9295, "step": 388890 }, { "epoch": 0.7856026050735909, "grad_norm": 1054.424560546875, "learning_rate": 1.4300797579903476e-06, "loss": 33.8434, "step": 388900 }, { "epoch": 0.7856228057062747, "grad_norm": 188.59909057617188, "learning_rate": 1.429835364058147e-06, "loss": 24.688, "step": 388910 }, { "epoch": 0.7856430063389586, "grad_norm": 635.3093872070312, "learning_rate": 1.4295909875264973e-06, "loss": 15.7614, "step": 388920 }, { "epoch": 0.7856632069716424, "grad_norm": 524.1882934570312, "learning_rate": 1.4293466283965878e-06, "loss": 15.9969, "step": 388930 }, { "epoch": 0.7856834076043262, "grad_norm": 350.4474182128906, "learning_rate": 1.4291022866696086e-06, "loss": 30.6318, "step": 388940 }, { "epoch": 0.78570360823701, "grad_norm": 59.84031295776367, "learning_rate": 1.428857962346752e-06, "loss": 30.0063, "step": 388950 }, { "epoch": 0.7857238088696938, "grad_norm": 250.73403930664062, "learning_rate": 1.4286136554292096e-06, "loss": 14.1806, "step": 388960 }, { "epoch": 0.7857440095023777, "grad_norm": 203.14076232910156, "learning_rate": 1.4283693659181713e-06, "loss": 23.3837, "step": 388970 }, { "epoch": 0.7857642101350615, "grad_norm": 313.9321594238281, "learning_rate": 1.4281250938148262e-06, "loss": 13.177, "step": 388980 }, { "epoch": 0.7857844107677453, "grad_norm": 332.5777282714844, "learning_rate": 1.4278808391203674e-06, "loss": 11.7201, "step": 388990 }, { "epoch": 0.785804611400429, "grad_norm": 412.3974914550781, "learning_rate": 1.4276366018359845e-06, "loss": 14.7089, "step": 389000 }, { "epoch": 0.7858248120331128, "grad_norm": 235.46893310546875, "learning_rate": 1.4273923819628654e-06, "loss": 19.7594, "step": 389010 }, { "epoch": 0.7858450126657966, "grad_norm": 308.99261474609375, "learning_rate": 1.427148179502203e-06, "loss": 26.9167, "step": 389020 }, { "epoch": 0.7858652132984805, "grad_norm": 945.6626586914062, "learning_rate": 1.42690399445519e-06, "loss": 23.02, "step": 389030 }, { "epoch": 0.7858854139311643, "grad_norm": 462.1242370605469, "learning_rate": 1.4266598268230102e-06, "loss": 19.9528, "step": 389040 }, { "epoch": 0.7859056145638481, "grad_norm": 0.0, "learning_rate": 1.4264156766068577e-06, "loss": 21.8942, "step": 389050 }, { "epoch": 0.7859258151965319, "grad_norm": 315.1378173828125, "learning_rate": 1.4261715438079227e-06, "loss": 27.4553, "step": 389060 }, { "epoch": 0.7859460158292157, "grad_norm": 374.4578857421875, "learning_rate": 1.4259274284273943e-06, "loss": 18.1581, "step": 389070 }, { "epoch": 0.7859662164618996, "grad_norm": 423.93682861328125, "learning_rate": 1.4256833304664609e-06, "loss": 20.6491, "step": 389080 }, { "epoch": 0.7859864170945834, "grad_norm": 336.80517578125, "learning_rate": 1.425439249926313e-06, "loss": 17.3852, "step": 389090 }, { "epoch": 0.7860066177272672, "grad_norm": 365.2979431152344, "learning_rate": 1.4251951868081438e-06, "loss": 21.3594, "step": 389100 }, { "epoch": 0.786026818359951, "grad_norm": 153.6697540283203, "learning_rate": 1.4249511411131367e-06, "loss": 19.2336, "step": 389110 }, { "epoch": 0.7860470189926348, "grad_norm": 480.56158447265625, "learning_rate": 1.4247071128424838e-06, "loss": 16.8388, "step": 389120 }, { "epoch": 0.7860672196253187, "grad_norm": 406.1510925292969, "learning_rate": 1.424463101997377e-06, "loss": 13.4169, "step": 389130 }, { "epoch": 0.7860874202580025, "grad_norm": 257.09716796875, "learning_rate": 1.424219108579003e-06, "loss": 15.5408, "step": 389140 }, { "epoch": 0.7861076208906863, "grad_norm": 307.8927001953125, "learning_rate": 1.4239751325885499e-06, "loss": 15.6521, "step": 389150 }, { "epoch": 0.7861278215233701, "grad_norm": 470.115234375, "learning_rate": 1.4237311740272097e-06, "loss": 18.4215, "step": 389160 }, { "epoch": 0.786148022156054, "grad_norm": 0.0, "learning_rate": 1.4234872328961702e-06, "loss": 13.725, "step": 389170 }, { "epoch": 0.7861682227887378, "grad_norm": 764.4478759765625, "learning_rate": 1.4232433091966187e-06, "loss": 30.2444, "step": 389180 }, { "epoch": 0.7861884234214216, "grad_norm": 119.15901947021484, "learning_rate": 1.4229994029297467e-06, "loss": 28.2807, "step": 389190 }, { "epoch": 0.7862086240541054, "grad_norm": 978.8793334960938, "learning_rate": 1.4227555140967402e-06, "loss": 16.5327, "step": 389200 }, { "epoch": 0.7862288246867892, "grad_norm": 932.5587158203125, "learning_rate": 1.4225116426987916e-06, "loss": 25.7453, "step": 389210 }, { "epoch": 0.786249025319473, "grad_norm": 313.8899230957031, "learning_rate": 1.4222677887370868e-06, "loss": 6.8717, "step": 389220 }, { "epoch": 0.7862692259521569, "grad_norm": 162.51959228515625, "learning_rate": 1.4220239522128138e-06, "loss": 22.4034, "step": 389230 }, { "epoch": 0.7862894265848407, "grad_norm": 169.2135467529297, "learning_rate": 1.421780133127163e-06, "loss": 9.9378, "step": 389240 }, { "epoch": 0.7863096272175244, "grad_norm": 394.2033386230469, "learning_rate": 1.4215363314813208e-06, "loss": 17.0908, "step": 389250 }, { "epoch": 0.7863298278502082, "grad_norm": 191.8592987060547, "learning_rate": 1.4212925472764777e-06, "loss": 18.1082, "step": 389260 }, { "epoch": 0.786350028482892, "grad_norm": 8.816691398620605, "learning_rate": 1.4210487805138195e-06, "loss": 29.5246, "step": 389270 }, { "epoch": 0.7863702291155759, "grad_norm": 433.8766784667969, "learning_rate": 1.4208050311945365e-06, "loss": 15.919, "step": 389280 }, { "epoch": 0.7863904297482597, "grad_norm": 452.135498046875, "learning_rate": 1.4205612993198165e-06, "loss": 25.2342, "step": 389290 }, { "epoch": 0.7864106303809435, "grad_norm": 154.0487060546875, "learning_rate": 1.420317584890844e-06, "loss": 19.6334, "step": 389300 }, { "epoch": 0.7864308310136273, "grad_norm": 342.992431640625, "learning_rate": 1.4200738879088117e-06, "loss": 17.9108, "step": 389310 }, { "epoch": 0.7864510316463111, "grad_norm": 345.0018310546875, "learning_rate": 1.4198302083749049e-06, "loss": 11.1524, "step": 389320 }, { "epoch": 0.786471232278995, "grad_norm": 84.22262573242188, "learning_rate": 1.4195865462903102e-06, "loss": 16.6948, "step": 389330 }, { "epoch": 0.7864914329116788, "grad_norm": 710.9985961914062, "learning_rate": 1.4193429016562161e-06, "loss": 13.8726, "step": 389340 }, { "epoch": 0.7865116335443626, "grad_norm": 524.85498046875, "learning_rate": 1.4190992744738135e-06, "loss": 15.7337, "step": 389350 }, { "epoch": 0.7865318341770464, "grad_norm": 325.3512268066406, "learning_rate": 1.4188556647442836e-06, "loss": 20.1727, "step": 389360 }, { "epoch": 0.7865520348097302, "grad_norm": 567.08447265625, "learning_rate": 1.4186120724688169e-06, "loss": 18.4844, "step": 389370 }, { "epoch": 0.7865722354424141, "grad_norm": 511.7174072265625, "learning_rate": 1.4183684976486024e-06, "loss": 12.9145, "step": 389380 }, { "epoch": 0.7865924360750979, "grad_norm": 251.32025146484375, "learning_rate": 1.4181249402848246e-06, "loss": 24.8652, "step": 389390 }, { "epoch": 0.7866126367077817, "grad_norm": 790.8467407226562, "learning_rate": 1.4178814003786706e-06, "loss": 17.5066, "step": 389400 }, { "epoch": 0.7866328373404655, "grad_norm": 364.34979248046875, "learning_rate": 1.4176378779313282e-06, "loss": 21.0615, "step": 389410 }, { "epoch": 0.7866530379731493, "grad_norm": 272.425048828125, "learning_rate": 1.417394372943987e-06, "loss": 11.4304, "step": 389420 }, { "epoch": 0.7866732386058332, "grad_norm": 243.82444763183594, "learning_rate": 1.4171508854178284e-06, "loss": 13.9975, "step": 389430 }, { "epoch": 0.786693439238517, "grad_norm": 302.4471740722656, "learning_rate": 1.4169074153540418e-06, "loss": 13.661, "step": 389440 }, { "epoch": 0.7867136398712008, "grad_norm": 303.0475158691406, "learning_rate": 1.4166639627538153e-06, "loss": 12.0609, "step": 389450 }, { "epoch": 0.7867338405038846, "grad_norm": 272.08837890625, "learning_rate": 1.416420527618334e-06, "loss": 21.7047, "step": 389460 }, { "epoch": 0.7867540411365684, "grad_norm": 264.9498596191406, "learning_rate": 1.4161771099487832e-06, "loss": 16.6128, "step": 389470 }, { "epoch": 0.7867742417692523, "grad_norm": 270.763671875, "learning_rate": 1.4159337097463515e-06, "loss": 14.9909, "step": 389480 }, { "epoch": 0.7867944424019361, "grad_norm": 251.77639770507812, "learning_rate": 1.415690327012223e-06, "loss": 22.8092, "step": 389490 }, { "epoch": 0.7868146430346199, "grad_norm": 51.446590423583984, "learning_rate": 1.4154469617475864e-06, "loss": 13.1177, "step": 389500 }, { "epoch": 0.7868348436673036, "grad_norm": 1031.5816650390625, "learning_rate": 1.4152036139536269e-06, "loss": 16.6685, "step": 389510 }, { "epoch": 0.7868550442999874, "grad_norm": 116.60513305664062, "learning_rate": 1.4149602836315285e-06, "loss": 14.5776, "step": 389520 }, { "epoch": 0.7868752449326712, "grad_norm": 349.8798828125, "learning_rate": 1.4147169707824805e-06, "loss": 21.2689, "step": 389530 }, { "epoch": 0.7868954455653551, "grad_norm": 327.8604431152344, "learning_rate": 1.414473675407667e-06, "loss": 15.3557, "step": 389540 }, { "epoch": 0.7869156461980389, "grad_norm": 491.3643798828125, "learning_rate": 1.4142303975082723e-06, "loss": 22.4333, "step": 389550 }, { "epoch": 0.7869358468307227, "grad_norm": 186.81837463378906, "learning_rate": 1.413987137085484e-06, "loss": 12.3864, "step": 389560 }, { "epoch": 0.7869560474634065, "grad_norm": 299.52606201171875, "learning_rate": 1.413743894140489e-06, "loss": 26.4502, "step": 389570 }, { "epoch": 0.7869762480960903, "grad_norm": 260.9884338378906, "learning_rate": 1.4135006686744711e-06, "loss": 19.6507, "step": 389580 }, { "epoch": 0.7869964487287742, "grad_norm": 415.31402587890625, "learning_rate": 1.4132574606886146e-06, "loss": 14.6892, "step": 389590 }, { "epoch": 0.787016649361458, "grad_norm": 306.3960876464844, "learning_rate": 1.4130142701841076e-06, "loss": 15.7391, "step": 389600 }, { "epoch": 0.7870368499941418, "grad_norm": 994.6866455078125, "learning_rate": 1.4127710971621339e-06, "loss": 30.5757, "step": 389610 }, { "epoch": 0.7870570506268256, "grad_norm": 623.6600341796875, "learning_rate": 1.4125279416238773e-06, "loss": 17.5492, "step": 389620 }, { "epoch": 0.7870772512595094, "grad_norm": 107.94436645507812, "learning_rate": 1.412284803570525e-06, "loss": 8.8308, "step": 389630 }, { "epoch": 0.7870974518921933, "grad_norm": 372.19940185546875, "learning_rate": 1.4120416830032641e-06, "loss": 23.1538, "step": 389640 }, { "epoch": 0.7871176525248771, "grad_norm": 226.026123046875, "learning_rate": 1.4117985799232735e-06, "loss": 18.6804, "step": 389650 }, { "epoch": 0.7871378531575609, "grad_norm": 281.11907958984375, "learning_rate": 1.4115554943317416e-06, "loss": 21.3654, "step": 389660 }, { "epoch": 0.7871580537902447, "grad_norm": 202.64096069335938, "learning_rate": 1.4113124262298544e-06, "loss": 36.7346, "step": 389670 }, { "epoch": 0.7871782544229285, "grad_norm": 250.60726928710938, "learning_rate": 1.4110693756187954e-06, "loss": 21.4456, "step": 389680 }, { "epoch": 0.7871984550556124, "grad_norm": 263.5244445800781, "learning_rate": 1.4108263424997475e-06, "loss": 22.3812, "step": 389690 }, { "epoch": 0.7872186556882962, "grad_norm": 258.9317626953125, "learning_rate": 1.4105833268738966e-06, "loss": 9.4989, "step": 389700 }, { "epoch": 0.78723885632098, "grad_norm": 674.4338989257812, "learning_rate": 1.4103403287424306e-06, "loss": 19.2276, "step": 389710 }, { "epoch": 0.7872590569536638, "grad_norm": 111.07289123535156, "learning_rate": 1.4100973481065266e-06, "loss": 22.8958, "step": 389720 }, { "epoch": 0.7872792575863476, "grad_norm": 216.6655731201172, "learning_rate": 1.4098543849673734e-06, "loss": 14.623, "step": 389730 }, { "epoch": 0.7872994582190315, "grad_norm": 329.5108947753906, "learning_rate": 1.4096114393261557e-06, "loss": 19.5182, "step": 389740 }, { "epoch": 0.7873196588517153, "grad_norm": 970.3660888671875, "learning_rate": 1.4093685111840567e-06, "loss": 14.4819, "step": 389750 }, { "epoch": 0.787339859484399, "grad_norm": 113.45997619628906, "learning_rate": 1.4091256005422583e-06, "loss": 14.2308, "step": 389760 }, { "epoch": 0.7873600601170828, "grad_norm": 589.2026977539062, "learning_rate": 1.4088827074019479e-06, "loss": 32.8889, "step": 389770 }, { "epoch": 0.7873802607497666, "grad_norm": 574.441162109375, "learning_rate": 1.4086398317643074e-06, "loss": 29.1568, "step": 389780 }, { "epoch": 0.7874004613824505, "grad_norm": 226.6656951904297, "learning_rate": 1.4083969736305191e-06, "loss": 16.7552, "step": 389790 }, { "epoch": 0.7874206620151343, "grad_norm": 403.8798828125, "learning_rate": 1.4081541330017706e-06, "loss": 26.2683, "step": 389800 }, { "epoch": 0.7874408626478181, "grad_norm": 461.10943603515625, "learning_rate": 1.4079113098792413e-06, "loss": 27.7547, "step": 389810 }, { "epoch": 0.7874610632805019, "grad_norm": 98.00347900390625, "learning_rate": 1.407668504264118e-06, "loss": 17.0657, "step": 389820 }, { "epoch": 0.7874812639131857, "grad_norm": 492.1479797363281, "learning_rate": 1.4074257161575828e-06, "loss": 23.8312, "step": 389830 }, { "epoch": 0.7875014645458696, "grad_norm": 107.99175262451172, "learning_rate": 1.407182945560817e-06, "loss": 11.9698, "step": 389840 }, { "epoch": 0.7875216651785534, "grad_norm": 247.5465087890625, "learning_rate": 1.4069401924750082e-06, "loss": 9.3671, "step": 389850 }, { "epoch": 0.7875418658112372, "grad_norm": 466.1968078613281, "learning_rate": 1.4066974569013346e-06, "loss": 11.9463, "step": 389860 }, { "epoch": 0.787562066443921, "grad_norm": 193.89556884765625, "learning_rate": 1.4064547388409838e-06, "loss": 27.172, "step": 389870 }, { "epoch": 0.7875822670766048, "grad_norm": 759.7708740234375, "learning_rate": 1.4062120382951355e-06, "loss": 20.6499, "step": 389880 }, { "epoch": 0.7876024677092887, "grad_norm": 443.9864807128906, "learning_rate": 1.405969355264975e-06, "loss": 9.3756, "step": 389890 }, { "epoch": 0.7876226683419725, "grad_norm": 378.2743225097656, "learning_rate": 1.4057266897516842e-06, "loss": 20.7845, "step": 389900 }, { "epoch": 0.7876428689746563, "grad_norm": 456.9198913574219, "learning_rate": 1.4054840417564436e-06, "loss": 8.7446, "step": 389910 }, { "epoch": 0.7876630696073401, "grad_norm": 230.49256896972656, "learning_rate": 1.4052414112804396e-06, "loss": 9.2653, "step": 389920 }, { "epoch": 0.7876832702400239, "grad_norm": 243.60069274902344, "learning_rate": 1.404998798324853e-06, "loss": 11.8547, "step": 389930 }, { "epoch": 0.7877034708727078, "grad_norm": 383.02056884765625, "learning_rate": 1.404756202890865e-06, "loss": 16.3119, "step": 389940 }, { "epoch": 0.7877236715053916, "grad_norm": 726.7916870117188, "learning_rate": 1.4045136249796588e-06, "loss": 18.4984, "step": 389950 }, { "epoch": 0.7877438721380754, "grad_norm": 303.83380126953125, "learning_rate": 1.4042710645924207e-06, "loss": 8.8965, "step": 389960 }, { "epoch": 0.7877640727707592, "grad_norm": 423.7670593261719, "learning_rate": 1.4040285217303256e-06, "loss": 22.0491, "step": 389970 }, { "epoch": 0.787784273403443, "grad_norm": 294.0246887207031, "learning_rate": 1.4037859963945598e-06, "loss": 16.3219, "step": 389980 }, { "epoch": 0.7878044740361269, "grad_norm": 398.1592712402344, "learning_rate": 1.4035434885863064e-06, "loss": 12.661, "step": 389990 }, { "epoch": 0.7878246746688107, "grad_norm": 119.3377456665039, "learning_rate": 1.4033009983067454e-06, "loss": 28.423, "step": 390000 }, { "epoch": 0.7878448753014945, "grad_norm": 807.6712036132812, "learning_rate": 1.4030585255570577e-06, "loss": 25.2311, "step": 390010 }, { "epoch": 0.7878650759341782, "grad_norm": 129.84024047851562, "learning_rate": 1.402816070338427e-06, "loss": 12.5746, "step": 390020 }, { "epoch": 0.787885276566862, "grad_norm": 132.0088348388672, "learning_rate": 1.4025736326520373e-06, "loss": 15.5592, "step": 390030 }, { "epoch": 0.7879054771995458, "grad_norm": 639.2557983398438, "learning_rate": 1.402331212499064e-06, "loss": 13.7056, "step": 390040 }, { "epoch": 0.7879256778322297, "grad_norm": 332.296875, "learning_rate": 1.4020888098806924e-06, "loss": 20.4145, "step": 390050 }, { "epoch": 0.7879458784649135, "grad_norm": 49.81679916381836, "learning_rate": 1.401846424798105e-06, "loss": 16.4018, "step": 390060 }, { "epoch": 0.7879660790975973, "grad_norm": 509.4182434082031, "learning_rate": 1.4016040572524813e-06, "loss": 14.52, "step": 390070 }, { "epoch": 0.7879862797302811, "grad_norm": 65.95465850830078, "learning_rate": 1.4013617072450019e-06, "loss": 32.8255, "step": 390080 }, { "epoch": 0.788006480362965, "grad_norm": 180.2455291748047, "learning_rate": 1.401119374776851e-06, "loss": 15.8193, "step": 390090 }, { "epoch": 0.7880266809956488, "grad_norm": 174.9076690673828, "learning_rate": 1.4008770598492072e-06, "loss": 8.4116, "step": 390100 }, { "epoch": 0.7880468816283326, "grad_norm": 356.658935546875, "learning_rate": 1.4006347624632505e-06, "loss": 23.3492, "step": 390110 }, { "epoch": 0.7880670822610164, "grad_norm": 373.9306335449219, "learning_rate": 1.4003924826201653e-06, "loss": 19.9767, "step": 390120 }, { "epoch": 0.7880872828937002, "grad_norm": 604.8828735351562, "learning_rate": 1.4001502203211286e-06, "loss": 19.2727, "step": 390130 }, { "epoch": 0.788107483526384, "grad_norm": 292.82464599609375, "learning_rate": 1.399907975567325e-06, "loss": 23.8888, "step": 390140 }, { "epoch": 0.7881276841590679, "grad_norm": 293.5496520996094, "learning_rate": 1.3996657483599318e-06, "loss": 11.5477, "step": 390150 }, { "epoch": 0.7881478847917517, "grad_norm": 142.21266174316406, "learning_rate": 1.3994235387001326e-06, "loss": 11.2106, "step": 390160 }, { "epoch": 0.7881680854244355, "grad_norm": 471.30609130859375, "learning_rate": 1.3991813465891046e-06, "loss": 14.1122, "step": 390170 }, { "epoch": 0.7881882860571193, "grad_norm": 508.54669189453125, "learning_rate": 1.3989391720280316e-06, "loss": 21.9794, "step": 390180 }, { "epoch": 0.7882084866898031, "grad_norm": 180.43858337402344, "learning_rate": 1.3986970150180923e-06, "loss": 19.0057, "step": 390190 }, { "epoch": 0.788228687322487, "grad_norm": 37.364959716796875, "learning_rate": 1.3984548755604655e-06, "loss": 30.1706, "step": 390200 }, { "epoch": 0.7882488879551708, "grad_norm": 385.6730651855469, "learning_rate": 1.3982127536563345e-06, "loss": 20.253, "step": 390210 }, { "epoch": 0.7882690885878546, "grad_norm": 251.6427001953125, "learning_rate": 1.3979706493068772e-06, "loss": 9.2216, "step": 390220 }, { "epoch": 0.7882892892205384, "grad_norm": 333.47467041015625, "learning_rate": 1.397728562513273e-06, "loss": 21.7927, "step": 390230 }, { "epoch": 0.7883094898532222, "grad_norm": 986.24658203125, "learning_rate": 1.397486493276703e-06, "loss": 39.0049, "step": 390240 }, { "epoch": 0.7883296904859061, "grad_norm": 308.32647705078125, "learning_rate": 1.3972444415983495e-06, "loss": 20.7179, "step": 390250 }, { "epoch": 0.7883498911185899, "grad_norm": 242.95274353027344, "learning_rate": 1.397002407479387e-06, "loss": 19.1673, "step": 390260 }, { "epoch": 0.7883700917512737, "grad_norm": 338.8032531738281, "learning_rate": 1.3967603909209976e-06, "loss": 22.7495, "step": 390270 }, { "epoch": 0.7883902923839574, "grad_norm": 318.2375183105469, "learning_rate": 1.3965183919243624e-06, "loss": 16.4054, "step": 390280 }, { "epoch": 0.7884104930166412, "grad_norm": 267.89013671875, "learning_rate": 1.3962764104906596e-06, "loss": 18.8724, "step": 390290 }, { "epoch": 0.788430693649325, "grad_norm": 510.5446472167969, "learning_rate": 1.3960344466210669e-06, "loss": 42.5586, "step": 390300 }, { "epoch": 0.7884508942820089, "grad_norm": 249.7393341064453, "learning_rate": 1.3957925003167655e-06, "loss": 21.2731, "step": 390310 }, { "epoch": 0.7884710949146927, "grad_norm": 74.7342529296875, "learning_rate": 1.3955505715789368e-06, "loss": 15.1349, "step": 390320 }, { "epoch": 0.7884912955473765, "grad_norm": 48.525596618652344, "learning_rate": 1.395308660408755e-06, "loss": 31.3871, "step": 390330 }, { "epoch": 0.7885114961800603, "grad_norm": 466.8329772949219, "learning_rate": 1.3950667668074015e-06, "loss": 21.7112, "step": 390340 }, { "epoch": 0.7885316968127442, "grad_norm": 182.07376098632812, "learning_rate": 1.3948248907760565e-06, "loss": 22.0986, "step": 390350 }, { "epoch": 0.788551897445428, "grad_norm": 33.70562744140625, "learning_rate": 1.3945830323158982e-06, "loss": 27.627, "step": 390360 }, { "epoch": 0.7885720980781118, "grad_norm": 224.57284545898438, "learning_rate": 1.394341191428103e-06, "loss": 9.883, "step": 390370 }, { "epoch": 0.7885922987107956, "grad_norm": 111.23213195800781, "learning_rate": 1.3940993681138533e-06, "loss": 21.0308, "step": 390380 }, { "epoch": 0.7886124993434794, "grad_norm": 249.69235229492188, "learning_rate": 1.3938575623743262e-06, "loss": 26.5926, "step": 390390 }, { "epoch": 0.7886326999761633, "grad_norm": 183.27928161621094, "learning_rate": 1.3936157742106977e-06, "loss": 13.7726, "step": 390400 }, { "epoch": 0.7886529006088471, "grad_norm": 166.25120544433594, "learning_rate": 1.3933740036241505e-06, "loss": 21.9439, "step": 390410 }, { "epoch": 0.7886731012415309, "grad_norm": 393.0563659667969, "learning_rate": 1.3931322506158596e-06, "loss": 16.4815, "step": 390420 }, { "epoch": 0.7886933018742147, "grad_norm": 122.62916564941406, "learning_rate": 1.3928905151870059e-06, "loss": 16.9871, "step": 390430 }, { "epoch": 0.7887135025068985, "grad_norm": 326.7325439453125, "learning_rate": 1.3926487973387665e-06, "loss": 8.6477, "step": 390440 }, { "epoch": 0.7887337031395824, "grad_norm": 252.49375915527344, "learning_rate": 1.3924070970723176e-06, "loss": 13.2418, "step": 390450 }, { "epoch": 0.7887539037722662, "grad_norm": 352.0291748046875, "learning_rate": 1.3921654143888403e-06, "loss": 13.5124, "step": 390460 }, { "epoch": 0.78877410440495, "grad_norm": 333.3211364746094, "learning_rate": 1.39192374928951e-06, "loss": 17.1857, "step": 390470 }, { "epoch": 0.7887943050376338, "grad_norm": 34.31233215332031, "learning_rate": 1.3916821017755073e-06, "loss": 16.9706, "step": 390480 }, { "epoch": 0.7888145056703176, "grad_norm": 1086.7008056640625, "learning_rate": 1.3914404718480067e-06, "loss": 29.5429, "step": 390490 }, { "epoch": 0.7888347063030015, "grad_norm": 357.86199951171875, "learning_rate": 1.3911988595081894e-06, "loss": 10.5794, "step": 390500 }, { "epoch": 0.7888549069356853, "grad_norm": 258.2838134765625, "learning_rate": 1.3909572647572312e-06, "loss": 18.9244, "step": 390510 }, { "epoch": 0.7888751075683691, "grad_norm": 258.4314880371094, "learning_rate": 1.3907156875963073e-06, "loss": 19.3583, "step": 390520 }, { "epoch": 0.7888953082010528, "grad_norm": 253.37208557128906, "learning_rate": 1.3904741280265998e-06, "loss": 15.9204, "step": 390530 }, { "epoch": 0.7889155088337366, "grad_norm": 528.6141967773438, "learning_rate": 1.3902325860492832e-06, "loss": 12.6963, "step": 390540 }, { "epoch": 0.7889357094664204, "grad_norm": 1056.8690185546875, "learning_rate": 1.3899910616655338e-06, "loss": 39.604, "step": 390550 }, { "epoch": 0.7889559100991043, "grad_norm": 804.58544921875, "learning_rate": 1.38974955487653e-06, "loss": 17.4595, "step": 390560 }, { "epoch": 0.7889761107317881, "grad_norm": 432.2843322753906, "learning_rate": 1.389508065683452e-06, "loss": 11.8486, "step": 390570 }, { "epoch": 0.7889963113644719, "grad_norm": 760.0068969726562, "learning_rate": 1.3892665940874705e-06, "loss": 26.9778, "step": 390580 }, { "epoch": 0.7890165119971557, "grad_norm": 304.84832763671875, "learning_rate": 1.3890251400897663e-06, "loss": 30.2592, "step": 390590 }, { "epoch": 0.7890367126298395, "grad_norm": 258.3695983886719, "learning_rate": 1.3887837036915169e-06, "loss": 19.3298, "step": 390600 }, { "epoch": 0.7890569132625234, "grad_norm": 174.30075073242188, "learning_rate": 1.3885422848938974e-06, "loss": 15.9546, "step": 390610 }, { "epoch": 0.7890771138952072, "grad_norm": 350.92291259765625, "learning_rate": 1.3883008836980837e-06, "loss": 12.4152, "step": 390620 }, { "epoch": 0.789097314527891, "grad_norm": 228.29713439941406, "learning_rate": 1.3880595001052533e-06, "loss": 14.4857, "step": 390630 }, { "epoch": 0.7891175151605748, "grad_norm": 411.44049072265625, "learning_rate": 1.3878181341165858e-06, "loss": 16.262, "step": 390640 }, { "epoch": 0.7891377157932586, "grad_norm": 518.0469970703125, "learning_rate": 1.3875767857332512e-06, "loss": 19.8735, "step": 390650 }, { "epoch": 0.7891579164259425, "grad_norm": 364.7655944824219, "learning_rate": 1.38733545495643e-06, "loss": 32.1867, "step": 390660 }, { "epoch": 0.7891781170586263, "grad_norm": 492.7501525878906, "learning_rate": 1.3870941417872985e-06, "loss": 18.8283, "step": 390670 }, { "epoch": 0.7891983176913101, "grad_norm": 16.221332550048828, "learning_rate": 1.3868528462270326e-06, "loss": 17.0699, "step": 390680 }, { "epoch": 0.7892185183239939, "grad_norm": 278.474853515625, "learning_rate": 1.3866115682768055e-06, "loss": 13.8446, "step": 390690 }, { "epoch": 0.7892387189566777, "grad_norm": 201.19393920898438, "learning_rate": 1.3863703079377971e-06, "loss": 20.1051, "step": 390700 }, { "epoch": 0.7892589195893616, "grad_norm": 149.2244873046875, "learning_rate": 1.3861290652111819e-06, "loss": 15.4303, "step": 390710 }, { "epoch": 0.7892791202220454, "grad_norm": 588.6261596679688, "learning_rate": 1.3858878400981335e-06, "loss": 18.6694, "step": 390720 }, { "epoch": 0.7892993208547292, "grad_norm": 505.7598571777344, "learning_rate": 1.3856466325998307e-06, "loss": 16.5566, "step": 390730 }, { "epoch": 0.789319521487413, "grad_norm": 253.9817657470703, "learning_rate": 1.3854054427174468e-06, "loss": 20.0498, "step": 390740 }, { "epoch": 0.7893397221200968, "grad_norm": 816.97802734375, "learning_rate": 1.3851642704521596e-06, "loss": 24.4637, "step": 390750 }, { "epoch": 0.7893599227527807, "grad_norm": 622.67529296875, "learning_rate": 1.3849231158051418e-06, "loss": 13.5268, "step": 390760 }, { "epoch": 0.7893801233854645, "grad_norm": 79.00733947753906, "learning_rate": 1.3846819787775723e-06, "loss": 13.9648, "step": 390770 }, { "epoch": 0.7894003240181483, "grad_norm": 273.0272216796875, "learning_rate": 1.3844408593706238e-06, "loss": 13.587, "step": 390780 }, { "epoch": 0.789420524650832, "grad_norm": 169.9261474609375, "learning_rate": 1.3841997575854703e-06, "loss": 12.3255, "step": 390790 }, { "epoch": 0.7894407252835158, "grad_norm": 376.6979064941406, "learning_rate": 1.3839586734232907e-06, "loss": 11.2065, "step": 390800 }, { "epoch": 0.7894609259161997, "grad_norm": 87.50740814208984, "learning_rate": 1.3837176068852565e-06, "loss": 4.0255, "step": 390810 }, { "epoch": 0.7894811265488835, "grad_norm": 231.6638946533203, "learning_rate": 1.3834765579725452e-06, "loss": 13.0445, "step": 390820 }, { "epoch": 0.7895013271815673, "grad_norm": 188.67898559570312, "learning_rate": 1.3832355266863307e-06, "loss": 11.0469, "step": 390830 }, { "epoch": 0.7895215278142511, "grad_norm": 247.35105895996094, "learning_rate": 1.3829945130277861e-06, "loss": 21.8402, "step": 390840 }, { "epoch": 0.7895417284469349, "grad_norm": 713.51513671875, "learning_rate": 1.3827535169980888e-06, "loss": 17.8452, "step": 390850 }, { "epoch": 0.7895619290796188, "grad_norm": 850.671875, "learning_rate": 1.3825125385984123e-06, "loss": 28.3461, "step": 390860 }, { "epoch": 0.7895821297123026, "grad_norm": 772.30322265625, "learning_rate": 1.3822715778299295e-06, "loss": 21.265, "step": 390870 }, { "epoch": 0.7896023303449864, "grad_norm": 133.23912048339844, "learning_rate": 1.3820306346938161e-06, "loss": 21.7986, "step": 390880 }, { "epoch": 0.7896225309776702, "grad_norm": 674.4517822265625, "learning_rate": 1.3817897091912485e-06, "loss": 20.0414, "step": 390890 }, { "epoch": 0.789642731610354, "grad_norm": 457.153564453125, "learning_rate": 1.3815488013233986e-06, "loss": 16.7648, "step": 390900 }, { "epoch": 0.7896629322430379, "grad_norm": 712.7386474609375, "learning_rate": 1.3813079110914396e-06, "loss": 30.4711, "step": 390910 }, { "epoch": 0.7896831328757217, "grad_norm": 241.47642517089844, "learning_rate": 1.3810670384965469e-06, "loss": 19.2033, "step": 390920 }, { "epoch": 0.7897033335084055, "grad_norm": 442.91015625, "learning_rate": 1.380826183539898e-06, "loss": 32.3804, "step": 390930 }, { "epoch": 0.7897235341410893, "grad_norm": 386.73651123046875, "learning_rate": 1.38058534622266e-06, "loss": 22.0351, "step": 390940 }, { "epoch": 0.7897437347737731, "grad_norm": 645.64111328125, "learning_rate": 1.3803445265460096e-06, "loss": 7.6352, "step": 390950 }, { "epoch": 0.789763935406457, "grad_norm": 337.479248046875, "learning_rate": 1.3801037245111233e-06, "loss": 30.6434, "step": 390960 }, { "epoch": 0.7897841360391408, "grad_norm": 595.5153198242188, "learning_rate": 1.3798629401191715e-06, "loss": 20.989, "step": 390970 }, { "epoch": 0.7898043366718246, "grad_norm": 278.4434509277344, "learning_rate": 1.3796221733713278e-06, "loss": 15.418, "step": 390980 }, { "epoch": 0.7898245373045084, "grad_norm": 152.62255859375, "learning_rate": 1.3793814242687676e-06, "loss": 17.0787, "step": 390990 }, { "epoch": 0.7898447379371922, "grad_norm": 111.47569274902344, "learning_rate": 1.3791406928126638e-06, "loss": 13.7548, "step": 391000 }, { "epoch": 0.789864938569876, "grad_norm": 407.932861328125, "learning_rate": 1.3788999790041867e-06, "loss": 12.2205, "step": 391010 }, { "epoch": 0.7898851392025599, "grad_norm": 503.17254638671875, "learning_rate": 1.3786592828445144e-06, "loss": 16.619, "step": 391020 }, { "epoch": 0.7899053398352437, "grad_norm": 383.8878173828125, "learning_rate": 1.3784186043348151e-06, "loss": 8.142, "step": 391030 }, { "epoch": 0.7899255404679274, "grad_norm": 448.76788330078125, "learning_rate": 1.3781779434762666e-06, "loss": 39.0891, "step": 391040 }, { "epoch": 0.7899457411006112, "grad_norm": 425.3940734863281, "learning_rate": 1.3779373002700391e-06, "loss": 18.9904, "step": 391050 }, { "epoch": 0.789965941733295, "grad_norm": 663.6753540039062, "learning_rate": 1.377696674717305e-06, "loss": 19.2672, "step": 391060 }, { "epoch": 0.7899861423659789, "grad_norm": 461.9291076660156, "learning_rate": 1.3774560668192389e-06, "loss": 15.2556, "step": 391070 }, { "epoch": 0.7900063429986627, "grad_norm": 500.15228271484375, "learning_rate": 1.3772154765770106e-06, "loss": 25.29, "step": 391080 }, { "epoch": 0.7900265436313465, "grad_norm": 176.2303466796875, "learning_rate": 1.3769749039917968e-06, "loss": 27.7685, "step": 391090 }, { "epoch": 0.7900467442640303, "grad_norm": 576.8667602539062, "learning_rate": 1.3767343490647668e-06, "loss": 20.2137, "step": 391100 }, { "epoch": 0.7900669448967141, "grad_norm": 556.3174438476562, "learning_rate": 1.376493811797095e-06, "loss": 19.5516, "step": 391110 }, { "epoch": 0.790087145529398, "grad_norm": 488.4520568847656, "learning_rate": 1.3762532921899529e-06, "loss": 21.1224, "step": 391120 }, { "epoch": 0.7901073461620818, "grad_norm": 227.12380981445312, "learning_rate": 1.3760127902445114e-06, "loss": 25.1898, "step": 391130 }, { "epoch": 0.7901275467947656, "grad_norm": 618.3762817382812, "learning_rate": 1.3757723059619455e-06, "loss": 22.9171, "step": 391140 }, { "epoch": 0.7901477474274494, "grad_norm": 223.0682830810547, "learning_rate": 1.3755318393434259e-06, "loss": 32.6294, "step": 391150 }, { "epoch": 0.7901679480601332, "grad_norm": 148.1354522705078, "learning_rate": 1.3752913903901227e-06, "loss": 12.9979, "step": 391160 }, { "epoch": 0.7901881486928171, "grad_norm": 240.58709716796875, "learning_rate": 1.3750509591032102e-06, "loss": 14.8266, "step": 391170 }, { "epoch": 0.7902083493255009, "grad_norm": 216.39732360839844, "learning_rate": 1.3748105454838623e-06, "loss": 21.5026, "step": 391180 }, { "epoch": 0.7902285499581847, "grad_norm": 152.46498107910156, "learning_rate": 1.3745701495332447e-06, "loss": 10.914, "step": 391190 }, { "epoch": 0.7902487505908685, "grad_norm": 348.2789001464844, "learning_rate": 1.3743297712525334e-06, "loss": 18.8635, "step": 391200 }, { "epoch": 0.7902689512235523, "grad_norm": 1.2223727703094482, "learning_rate": 1.3740894106428997e-06, "loss": 12.6695, "step": 391210 }, { "epoch": 0.7902891518562362, "grad_norm": 180.6900634765625, "learning_rate": 1.373849067705515e-06, "loss": 16.0197, "step": 391220 }, { "epoch": 0.79030935248892, "grad_norm": 344.8309631347656, "learning_rate": 1.3736087424415483e-06, "loss": 16.7079, "step": 391230 }, { "epoch": 0.7903295531216038, "grad_norm": 408.68280029296875, "learning_rate": 1.373368434852173e-06, "loss": 11.0919, "step": 391240 }, { "epoch": 0.7903497537542876, "grad_norm": 176.5845184326172, "learning_rate": 1.373128144938563e-06, "loss": 9.4938, "step": 391250 }, { "epoch": 0.7903699543869714, "grad_norm": 595.7118530273438, "learning_rate": 1.372887872701884e-06, "loss": 16.1899, "step": 391260 }, { "epoch": 0.7903901550196553, "grad_norm": 70.69458770751953, "learning_rate": 1.372647618143309e-06, "loss": 20.3304, "step": 391270 }, { "epoch": 0.7904103556523391, "grad_norm": 634.7303466796875, "learning_rate": 1.372407381264011e-06, "loss": 31.9421, "step": 391280 }, { "epoch": 0.7904305562850229, "grad_norm": 644.4024658203125, "learning_rate": 1.37216716206516e-06, "loss": 17.8286, "step": 391290 }, { "epoch": 0.7904507569177066, "grad_norm": 440.9824523925781, "learning_rate": 1.3719269605479241e-06, "loss": 19.8846, "step": 391300 }, { "epoch": 0.7904709575503904, "grad_norm": 396.9098205566406, "learning_rate": 1.3716867767134783e-06, "loss": 25.3281, "step": 391310 }, { "epoch": 0.7904911581830742, "grad_norm": 55.08015823364258, "learning_rate": 1.3714466105629908e-06, "loss": 12.2769, "step": 391320 }, { "epoch": 0.7905113588157581, "grad_norm": 279.33721923828125, "learning_rate": 1.3712064620976305e-06, "loss": 13.5692, "step": 391330 }, { "epoch": 0.7905315594484419, "grad_norm": 662.1928100585938, "learning_rate": 1.3709663313185723e-06, "loss": 26.2303, "step": 391340 }, { "epoch": 0.7905517600811257, "grad_norm": 307.8935546875, "learning_rate": 1.3707262182269814e-06, "loss": 13.6516, "step": 391350 }, { "epoch": 0.7905719607138095, "grad_norm": 438.3340759277344, "learning_rate": 1.370486122824033e-06, "loss": 16.9934, "step": 391360 }, { "epoch": 0.7905921613464933, "grad_norm": 624.7930908203125, "learning_rate": 1.3702460451108934e-06, "loss": 21.0231, "step": 391370 }, { "epoch": 0.7906123619791772, "grad_norm": 230.44725036621094, "learning_rate": 1.370005985088736e-06, "loss": 18.2515, "step": 391380 }, { "epoch": 0.790632562611861, "grad_norm": 449.1631164550781, "learning_rate": 1.3697659427587284e-06, "loss": 18.5813, "step": 391390 }, { "epoch": 0.7906527632445448, "grad_norm": 390.2234802246094, "learning_rate": 1.3695259181220405e-06, "loss": 26.9494, "step": 391400 }, { "epoch": 0.7906729638772286, "grad_norm": 292.9412841796875, "learning_rate": 1.3692859111798446e-06, "loss": 13.5067, "step": 391410 }, { "epoch": 0.7906931645099124, "grad_norm": 374.9375915527344, "learning_rate": 1.3690459219333068e-06, "loss": 20.0724, "step": 391420 }, { "epoch": 0.7907133651425963, "grad_norm": 415.5243225097656, "learning_rate": 1.3688059503836004e-06, "loss": 17.3517, "step": 391430 }, { "epoch": 0.7907335657752801, "grad_norm": 420.6901550292969, "learning_rate": 1.3685659965318937e-06, "loss": 8.9723, "step": 391440 }, { "epoch": 0.7907537664079639, "grad_norm": 334.8971862792969, "learning_rate": 1.368326060379354e-06, "loss": 10.9845, "step": 391450 }, { "epoch": 0.7907739670406477, "grad_norm": 115.13829040527344, "learning_rate": 1.368086141927154e-06, "loss": 9.9702, "step": 391460 }, { "epoch": 0.7907941676733315, "grad_norm": 394.88336181640625, "learning_rate": 1.367846241176462e-06, "loss": 23.7669, "step": 391470 }, { "epoch": 0.7908143683060154, "grad_norm": 209.13877868652344, "learning_rate": 1.3676063581284454e-06, "loss": 16.8719, "step": 391480 }, { "epoch": 0.7908345689386992, "grad_norm": 459.1510314941406, "learning_rate": 1.367366492784275e-06, "loss": 17.2768, "step": 391490 }, { "epoch": 0.790854769571383, "grad_norm": 418.3271789550781, "learning_rate": 1.3671266451451209e-06, "loss": 28.1231, "step": 391500 }, { "epoch": 0.7908749702040668, "grad_norm": 7.374743938446045, "learning_rate": 1.3668868152121505e-06, "loss": 14.3796, "step": 391510 }, { "epoch": 0.7908951708367506, "grad_norm": 238.83999633789062, "learning_rate": 1.3666470029865325e-06, "loss": 24.2418, "step": 391520 }, { "epoch": 0.7909153714694345, "grad_norm": 266.6180114746094, "learning_rate": 1.3664072084694374e-06, "loss": 16.7649, "step": 391530 }, { "epoch": 0.7909355721021183, "grad_norm": 365.9168395996094, "learning_rate": 1.3661674316620332e-06, "loss": 12.9025, "step": 391540 }, { "epoch": 0.7909557727348021, "grad_norm": 721.8485107421875, "learning_rate": 1.3659276725654863e-06, "loss": 15.6994, "step": 391550 }, { "epoch": 0.7909759733674858, "grad_norm": 696.6354370117188, "learning_rate": 1.3656879311809674e-06, "loss": 20.8847, "step": 391560 }, { "epoch": 0.7909961740001696, "grad_norm": 630.0370483398438, "learning_rate": 1.365448207509646e-06, "loss": 26.8489, "step": 391570 }, { "epoch": 0.7910163746328535, "grad_norm": 446.8143615722656, "learning_rate": 1.3652085015526895e-06, "loss": 15.1067, "step": 391580 }, { "epoch": 0.7910365752655373, "grad_norm": 447.8006896972656, "learning_rate": 1.3649688133112644e-06, "loss": 27.6257, "step": 391590 }, { "epoch": 0.7910567758982211, "grad_norm": 569.8394165039062, "learning_rate": 1.3647291427865417e-06, "loss": 15.9739, "step": 391600 }, { "epoch": 0.7910769765309049, "grad_norm": 596.05908203125, "learning_rate": 1.364489489979688e-06, "loss": 24.0909, "step": 391610 }, { "epoch": 0.7910971771635887, "grad_norm": 747.9852905273438, "learning_rate": 1.3642498548918704e-06, "loss": 12.6635, "step": 391620 }, { "epoch": 0.7911173777962726, "grad_norm": 701.9400634765625, "learning_rate": 1.3640102375242598e-06, "loss": 22.2039, "step": 391630 }, { "epoch": 0.7911375784289564, "grad_norm": 184.07884216308594, "learning_rate": 1.3637706378780209e-06, "loss": 18.1476, "step": 391640 }, { "epoch": 0.7911577790616402, "grad_norm": 318.0747985839844, "learning_rate": 1.3635310559543235e-06, "loss": 24.6376, "step": 391650 }, { "epoch": 0.791177979694324, "grad_norm": 233.07652282714844, "learning_rate": 1.3632914917543338e-06, "loss": 15.8256, "step": 391660 }, { "epoch": 0.7911981803270078, "grad_norm": 165.37388610839844, "learning_rate": 1.3630519452792219e-06, "loss": 17.8628, "step": 391670 }, { "epoch": 0.7912183809596917, "grad_norm": 176.56077575683594, "learning_rate": 1.3628124165301537e-06, "loss": 12.3118, "step": 391680 }, { "epoch": 0.7912385815923755, "grad_norm": 189.4002685546875, "learning_rate": 1.362572905508295e-06, "loss": 18.7685, "step": 391690 }, { "epoch": 0.7912587822250593, "grad_norm": 375.29034423828125, "learning_rate": 1.3623334122148164e-06, "loss": 21.122, "step": 391700 }, { "epoch": 0.7912789828577431, "grad_norm": 212.97000122070312, "learning_rate": 1.3620939366508818e-06, "loss": 9.0183, "step": 391710 }, { "epoch": 0.7912991834904269, "grad_norm": 386.3677062988281, "learning_rate": 1.361854478817662e-06, "loss": 19.8714, "step": 391720 }, { "epoch": 0.7913193841231108, "grad_norm": 34.30253601074219, "learning_rate": 1.361615038716322e-06, "loss": 19.0238, "step": 391730 }, { "epoch": 0.7913395847557946, "grad_norm": 350.99078369140625, "learning_rate": 1.3613756163480275e-06, "loss": 34.4645, "step": 391740 }, { "epoch": 0.7913597853884784, "grad_norm": 314.75653076171875, "learning_rate": 1.3611362117139481e-06, "loss": 19.9491, "step": 391750 }, { "epoch": 0.7913799860211622, "grad_norm": 260.4652404785156, "learning_rate": 1.3608968248152498e-06, "loss": 12.6497, "step": 391760 }, { "epoch": 0.791400186653846, "grad_norm": 266.1238708496094, "learning_rate": 1.3606574556530976e-06, "loss": 16.1195, "step": 391770 }, { "epoch": 0.7914203872865299, "grad_norm": 79.61407470703125, "learning_rate": 1.3604181042286597e-06, "loss": 13.4222, "step": 391780 }, { "epoch": 0.7914405879192137, "grad_norm": 278.7680969238281, "learning_rate": 1.3601787705431052e-06, "loss": 16.558, "step": 391790 }, { "epoch": 0.7914607885518975, "grad_norm": 515.685302734375, "learning_rate": 1.3599394545975952e-06, "loss": 13.3479, "step": 391800 }, { "epoch": 0.7914809891845812, "grad_norm": 337.426025390625, "learning_rate": 1.3597001563932982e-06, "loss": 25.6135, "step": 391810 }, { "epoch": 0.791501189817265, "grad_norm": 246.74937438964844, "learning_rate": 1.3594608759313832e-06, "loss": 10.5852, "step": 391820 }, { "epoch": 0.7915213904499488, "grad_norm": 334.9891662597656, "learning_rate": 1.3592216132130142e-06, "loss": 18.746, "step": 391830 }, { "epoch": 0.7915415910826327, "grad_norm": 3060.901123046875, "learning_rate": 1.358982368239356e-06, "loss": 17.8981, "step": 391840 }, { "epoch": 0.7915617917153165, "grad_norm": 65.82020568847656, "learning_rate": 1.3587431410115765e-06, "loss": 10.5679, "step": 391850 }, { "epoch": 0.7915819923480003, "grad_norm": 447.283447265625, "learning_rate": 1.3585039315308436e-06, "loss": 22.4207, "step": 391860 }, { "epoch": 0.7916021929806841, "grad_norm": 1267.4034423828125, "learning_rate": 1.3582647397983185e-06, "loss": 39.3353, "step": 391870 }, { "epoch": 0.791622393613368, "grad_norm": 27.980613708496094, "learning_rate": 1.3580255658151687e-06, "loss": 18.3648, "step": 391880 }, { "epoch": 0.7916425942460518, "grad_norm": 139.32876586914062, "learning_rate": 1.3577864095825627e-06, "loss": 11.3425, "step": 391890 }, { "epoch": 0.7916627948787356, "grad_norm": 185.72015380859375, "learning_rate": 1.3575472711016634e-06, "loss": 18.7808, "step": 391900 }, { "epoch": 0.7916829955114194, "grad_norm": 657.601318359375, "learning_rate": 1.3573081503736362e-06, "loss": 14.2909, "step": 391910 }, { "epoch": 0.7917031961441032, "grad_norm": 134.64427185058594, "learning_rate": 1.3570690473996483e-06, "loss": 19.7827, "step": 391920 }, { "epoch": 0.791723396776787, "grad_norm": 399.5252380371094, "learning_rate": 1.356829962180864e-06, "loss": 17.1316, "step": 391930 }, { "epoch": 0.7917435974094709, "grad_norm": 217.53396606445312, "learning_rate": 1.356590894718447e-06, "loss": 18.9685, "step": 391940 }, { "epoch": 0.7917637980421547, "grad_norm": 192.8686981201172, "learning_rate": 1.356351845013566e-06, "loss": 18.3453, "step": 391950 }, { "epoch": 0.7917839986748385, "grad_norm": 193.24600219726562, "learning_rate": 1.3561128130673823e-06, "loss": 11.519, "step": 391960 }, { "epoch": 0.7918041993075223, "grad_norm": 93.30621337890625, "learning_rate": 1.3558737988810644e-06, "loss": 17.8471, "step": 391970 }, { "epoch": 0.7918243999402061, "grad_norm": 257.2477111816406, "learning_rate": 1.3556348024557743e-06, "loss": 11.8228, "step": 391980 }, { "epoch": 0.79184460057289, "grad_norm": 411.4393615722656, "learning_rate": 1.3553958237926794e-06, "loss": 22.5637, "step": 391990 }, { "epoch": 0.7918648012055738, "grad_norm": 919.718994140625, "learning_rate": 1.3551568628929434e-06, "loss": 34.8736, "step": 392000 }, { "epoch": 0.7918850018382576, "grad_norm": 385.0460510253906, "learning_rate": 1.3549179197577295e-06, "loss": 14.2107, "step": 392010 }, { "epoch": 0.7919052024709414, "grad_norm": 365.8689270019531, "learning_rate": 1.3546789943882045e-06, "loss": 17.5699, "step": 392020 }, { "epoch": 0.7919254031036252, "grad_norm": 262.1510009765625, "learning_rate": 1.3544400867855306e-06, "loss": 24.0885, "step": 392030 }, { "epoch": 0.7919456037363091, "grad_norm": 457.37420654296875, "learning_rate": 1.3542011969508756e-06, "loss": 19.9309, "step": 392040 }, { "epoch": 0.7919658043689929, "grad_norm": 361.2774658203125, "learning_rate": 1.3539623248854012e-06, "loss": 11.8868, "step": 392050 }, { "epoch": 0.7919860050016767, "grad_norm": 27.487533569335938, "learning_rate": 1.3537234705902709e-06, "loss": 9.8462, "step": 392060 }, { "epoch": 0.7920062056343604, "grad_norm": 394.543212890625, "learning_rate": 1.353484634066652e-06, "loss": 8.8337, "step": 392070 }, { "epoch": 0.7920264062670442, "grad_norm": 330.3654479980469, "learning_rate": 1.3532458153157062e-06, "loss": 8.504, "step": 392080 }, { "epoch": 0.7920466068997281, "grad_norm": 178.21826171875, "learning_rate": 1.3530070143385966e-06, "loss": 13.4085, "step": 392090 }, { "epoch": 0.7920668075324119, "grad_norm": 429.0102233886719, "learning_rate": 1.3527682311364886e-06, "loss": 20.5051, "step": 392100 }, { "epoch": 0.7920870081650957, "grad_norm": 422.8012390136719, "learning_rate": 1.3525294657105476e-06, "loss": 9.6095, "step": 392110 }, { "epoch": 0.7921072087977795, "grad_norm": 0.0, "learning_rate": 1.352290718061935e-06, "loss": 9.1739, "step": 392120 }, { "epoch": 0.7921274094304633, "grad_norm": 463.0601501464844, "learning_rate": 1.3520519881918143e-06, "loss": 25.2802, "step": 392130 }, { "epoch": 0.7921476100631472, "grad_norm": 143.3258819580078, "learning_rate": 1.3518132761013509e-06, "loss": 19.414, "step": 392140 }, { "epoch": 0.792167810695831, "grad_norm": 310.7194519042969, "learning_rate": 1.351574581791707e-06, "loss": 21.0858, "step": 392150 }, { "epoch": 0.7921880113285148, "grad_norm": 509.84344482421875, "learning_rate": 1.3513359052640445e-06, "loss": 19.4459, "step": 392160 }, { "epoch": 0.7922082119611986, "grad_norm": 312.1219482421875, "learning_rate": 1.3510972465195283e-06, "loss": 24.1111, "step": 392170 }, { "epoch": 0.7922284125938824, "grad_norm": 808.8726196289062, "learning_rate": 1.350858605559323e-06, "loss": 34.325, "step": 392180 }, { "epoch": 0.7922486132265663, "grad_norm": 570.1800537109375, "learning_rate": 1.3506199823845905e-06, "loss": 24.7095, "step": 392190 }, { "epoch": 0.7922688138592501, "grad_norm": 271.8632507324219, "learning_rate": 1.3503813769964923e-06, "loss": 18.3585, "step": 392200 }, { "epoch": 0.7922890144919339, "grad_norm": 146.16278076171875, "learning_rate": 1.3501427893961938e-06, "loss": 14.5004, "step": 392210 }, { "epoch": 0.7923092151246177, "grad_norm": 56.90432357788086, "learning_rate": 1.3499042195848571e-06, "loss": 27.4381, "step": 392220 }, { "epoch": 0.7923294157573015, "grad_norm": 128.71359252929688, "learning_rate": 1.3496656675636427e-06, "loss": 31.9426, "step": 392230 }, { "epoch": 0.7923496163899854, "grad_norm": 190.23631286621094, "learning_rate": 1.3494271333337162e-06, "loss": 19.4458, "step": 392240 }, { "epoch": 0.7923698170226692, "grad_norm": 35.6628532409668, "learning_rate": 1.349188616896238e-06, "loss": 30.5413, "step": 392250 }, { "epoch": 0.792390017655353, "grad_norm": 349.12017822265625, "learning_rate": 1.3489501182523735e-06, "loss": 34.5905, "step": 392260 }, { "epoch": 0.7924102182880368, "grad_norm": 432.92059326171875, "learning_rate": 1.3487116374032811e-06, "loss": 14.4094, "step": 392270 }, { "epoch": 0.7924304189207206, "grad_norm": 453.3785705566406, "learning_rate": 1.3484731743501272e-06, "loss": 30.512, "step": 392280 }, { "epoch": 0.7924506195534045, "grad_norm": 403.2294921875, "learning_rate": 1.3482347290940723e-06, "loss": 19.341, "step": 392290 }, { "epoch": 0.7924708201860883, "grad_norm": 273.9934387207031, "learning_rate": 1.3479963016362768e-06, "loss": 21.9321, "step": 392300 }, { "epoch": 0.7924910208187721, "grad_norm": 807.1016845703125, "learning_rate": 1.3477578919779062e-06, "loss": 26.1982, "step": 392310 }, { "epoch": 0.7925112214514558, "grad_norm": 605.7578735351562, "learning_rate": 1.3475195001201186e-06, "loss": 29.8424, "step": 392320 }, { "epoch": 0.7925314220841396, "grad_norm": 32.64149475097656, "learning_rate": 1.34728112606408e-06, "loss": 17.4115, "step": 392330 }, { "epoch": 0.7925516227168234, "grad_norm": 239.3573455810547, "learning_rate": 1.3470427698109496e-06, "loss": 24.8317, "step": 392340 }, { "epoch": 0.7925718233495073, "grad_norm": 159.31484985351562, "learning_rate": 1.3468044313618883e-06, "loss": 31.5261, "step": 392350 }, { "epoch": 0.7925920239821911, "grad_norm": 235.64517211914062, "learning_rate": 1.346566110718061e-06, "loss": 17.4422, "step": 392360 }, { "epoch": 0.7926122246148749, "grad_norm": 314.08721923828125, "learning_rate": 1.3463278078806274e-06, "loss": 19.0374, "step": 392370 }, { "epoch": 0.7926324252475587, "grad_norm": 356.0980224609375, "learning_rate": 1.346089522850747e-06, "loss": 20.1301, "step": 392380 }, { "epoch": 0.7926526258802425, "grad_norm": 180.72988891601562, "learning_rate": 1.3458512556295833e-06, "loss": 18.3178, "step": 392390 }, { "epoch": 0.7926728265129264, "grad_norm": 219.0718536376953, "learning_rate": 1.3456130062183003e-06, "loss": 16.6303, "step": 392400 }, { "epoch": 0.7926930271456102, "grad_norm": 306.7239990234375, "learning_rate": 1.3453747746180535e-06, "loss": 30.4495, "step": 392410 }, { "epoch": 0.792713227778294, "grad_norm": 174.364501953125, "learning_rate": 1.3451365608300066e-06, "loss": 16.6836, "step": 392420 }, { "epoch": 0.7927334284109778, "grad_norm": 496.9266357421875, "learning_rate": 1.3448983648553227e-06, "loss": 18.4559, "step": 392430 }, { "epoch": 0.7927536290436616, "grad_norm": 360.1391296386719, "learning_rate": 1.3446601866951604e-06, "loss": 21.449, "step": 392440 }, { "epoch": 0.7927738296763455, "grad_norm": 210.6453094482422, "learning_rate": 1.3444220263506797e-06, "loss": 20.3085, "step": 392450 }, { "epoch": 0.7927940303090293, "grad_norm": 261.69805908203125, "learning_rate": 1.3441838838230425e-06, "loss": 16.1359, "step": 392460 }, { "epoch": 0.7928142309417131, "grad_norm": 123.70108032226562, "learning_rate": 1.343945759113413e-06, "loss": 9.9051, "step": 392470 }, { "epoch": 0.7928344315743969, "grad_norm": 181.96739196777344, "learning_rate": 1.3437076522229454e-06, "loss": 10.5061, "step": 392480 }, { "epoch": 0.7928546322070807, "grad_norm": 142.8966522216797, "learning_rate": 1.3434695631528028e-06, "loss": 22.1133, "step": 392490 }, { "epoch": 0.7928748328397646, "grad_norm": 422.648681640625, "learning_rate": 1.3432314919041478e-06, "loss": 16.3421, "step": 392500 }, { "epoch": 0.7928950334724484, "grad_norm": 111.23240661621094, "learning_rate": 1.342993438478139e-06, "loss": 21.4282, "step": 392510 }, { "epoch": 0.7929152341051322, "grad_norm": 219.13015747070312, "learning_rate": 1.3427554028759355e-06, "loss": 11.9455, "step": 392520 }, { "epoch": 0.792935434737816, "grad_norm": 667.111572265625, "learning_rate": 1.3425173850986994e-06, "loss": 13.5365, "step": 392530 }, { "epoch": 0.7929556353704998, "grad_norm": 497.7477111816406, "learning_rate": 1.3422793851475907e-06, "loss": 17.9203, "step": 392540 }, { "epoch": 0.7929758360031837, "grad_norm": 183.91111755371094, "learning_rate": 1.3420414030237667e-06, "loss": 5.6767, "step": 392550 }, { "epoch": 0.7929960366358675, "grad_norm": 481.2868347167969, "learning_rate": 1.3418034387283907e-06, "loss": 21.1909, "step": 392560 }, { "epoch": 0.7930162372685513, "grad_norm": 325.7046203613281, "learning_rate": 1.3415654922626198e-06, "loss": 16.1793, "step": 392570 }, { "epoch": 0.793036437901235, "grad_norm": 651.7623291015625, "learning_rate": 1.3413275636276164e-06, "loss": 23.6038, "step": 392580 }, { "epoch": 0.7930566385339188, "grad_norm": 733.0885620117188, "learning_rate": 1.3410896528245371e-06, "loss": 32.7139, "step": 392590 }, { "epoch": 0.7930768391666027, "grad_norm": 287.5675048828125, "learning_rate": 1.3408517598545446e-06, "loss": 17.5776, "step": 392600 }, { "epoch": 0.7930970397992865, "grad_norm": 193.81536865234375, "learning_rate": 1.3406138847187971e-06, "loss": 9.7663, "step": 392610 }, { "epoch": 0.7931172404319703, "grad_norm": 168.9902801513672, "learning_rate": 1.340376027418452e-06, "loss": 31.5444, "step": 392620 }, { "epoch": 0.7931374410646541, "grad_norm": 209.39915466308594, "learning_rate": 1.3401381879546716e-06, "loss": 22.2623, "step": 392630 }, { "epoch": 0.7931576416973379, "grad_norm": 677.6997680664062, "learning_rate": 1.3399003663286125e-06, "loss": 28.2255, "step": 392640 }, { "epoch": 0.7931778423300218, "grad_norm": 264.1876525878906, "learning_rate": 1.3396625625414362e-06, "loss": 26.0871, "step": 392650 }, { "epoch": 0.7931980429627056, "grad_norm": 228.8098602294922, "learning_rate": 1.3394247765943013e-06, "loss": 13.0514, "step": 392660 }, { "epoch": 0.7932182435953894, "grad_norm": 172.20529174804688, "learning_rate": 1.339187008488364e-06, "loss": 19.7662, "step": 392670 }, { "epoch": 0.7932384442280732, "grad_norm": 303.61370849609375, "learning_rate": 1.338949258224787e-06, "loss": 22.0276, "step": 392680 }, { "epoch": 0.793258644860757, "grad_norm": 505.311279296875, "learning_rate": 1.3387115258047272e-06, "loss": 14.4202, "step": 392690 }, { "epoch": 0.7932788454934409, "grad_norm": 266.0522766113281, "learning_rate": 1.3384738112293415e-06, "loss": 18.1336, "step": 392700 }, { "epoch": 0.7932990461261247, "grad_norm": 468.30267333984375, "learning_rate": 1.3382361144997912e-06, "loss": 19.9952, "step": 392710 }, { "epoch": 0.7933192467588085, "grad_norm": 328.1174011230469, "learning_rate": 1.337998435617235e-06, "loss": 14.7651, "step": 392720 }, { "epoch": 0.7933394473914923, "grad_norm": 688.8742065429688, "learning_rate": 1.3377607745828302e-06, "loss": 12.9913, "step": 392730 }, { "epoch": 0.7933596480241761, "grad_norm": 296.8131103515625, "learning_rate": 1.337523131397734e-06, "loss": 14.5713, "step": 392740 }, { "epoch": 0.79337984865686, "grad_norm": 164.2072296142578, "learning_rate": 1.3372855060631067e-06, "loss": 19.8475, "step": 392750 }, { "epoch": 0.7934000492895438, "grad_norm": 210.5634307861328, "learning_rate": 1.3370478985801062e-06, "loss": 22.8115, "step": 392760 }, { "epoch": 0.7934202499222276, "grad_norm": 176.11083984375, "learning_rate": 1.3368103089498886e-06, "loss": 21.7571, "step": 392770 }, { "epoch": 0.7934404505549114, "grad_norm": 314.0799560546875, "learning_rate": 1.3365727371736127e-06, "loss": 16.8113, "step": 392780 }, { "epoch": 0.7934606511875952, "grad_norm": 311.234375, "learning_rate": 1.3363351832524385e-06, "loss": 9.8002, "step": 392790 }, { "epoch": 0.793480851820279, "grad_norm": 612.8961181640625, "learning_rate": 1.3360976471875226e-06, "loss": 16.0739, "step": 392800 }, { "epoch": 0.7935010524529629, "grad_norm": 374.7096252441406, "learning_rate": 1.3358601289800211e-06, "loss": 17.2786, "step": 392810 }, { "epoch": 0.7935212530856467, "grad_norm": 73.8741683959961, "learning_rate": 1.335622628631094e-06, "loss": 8.7438, "step": 392820 }, { "epoch": 0.7935414537183304, "grad_norm": 321.8848571777344, "learning_rate": 1.3353851461418976e-06, "loss": 18.7795, "step": 392830 }, { "epoch": 0.7935616543510142, "grad_norm": 510.4248046875, "learning_rate": 1.3351476815135883e-06, "loss": 19.6084, "step": 392840 }, { "epoch": 0.793581854983698, "grad_norm": 207.49549865722656, "learning_rate": 1.3349102347473264e-06, "loss": 14.2602, "step": 392850 }, { "epoch": 0.7936020556163819, "grad_norm": 4.39988374710083, "learning_rate": 1.334672805844266e-06, "loss": 10.138, "step": 392860 }, { "epoch": 0.7936222562490657, "grad_norm": 22.672176361083984, "learning_rate": 1.3344353948055672e-06, "loss": 12.7341, "step": 392870 }, { "epoch": 0.7936424568817495, "grad_norm": 358.9695129394531, "learning_rate": 1.3341980016323841e-06, "loss": 20.6453, "step": 392880 }, { "epoch": 0.7936626575144333, "grad_norm": 294.8444519042969, "learning_rate": 1.333960626325877e-06, "loss": 38.9246, "step": 392890 }, { "epoch": 0.7936828581471171, "grad_norm": 344.99530029296875, "learning_rate": 1.333723268887201e-06, "loss": 19.0355, "step": 392900 }, { "epoch": 0.793703058779801, "grad_norm": 212.49868774414062, "learning_rate": 1.3334859293175113e-06, "loss": 15.9298, "step": 392910 }, { "epoch": 0.7937232594124848, "grad_norm": 508.03704833984375, "learning_rate": 1.3332486076179684e-06, "loss": 21.3234, "step": 392920 }, { "epoch": 0.7937434600451686, "grad_norm": 94.76934051513672, "learning_rate": 1.3330113037897257e-06, "loss": 8.6602, "step": 392930 }, { "epoch": 0.7937636606778524, "grad_norm": 338.82281494140625, "learning_rate": 1.3327740178339421e-06, "loss": 7.4053, "step": 392940 }, { "epoch": 0.7937838613105362, "grad_norm": 106.09545135498047, "learning_rate": 1.3325367497517739e-06, "loss": 13.124, "step": 392950 }, { "epoch": 0.7938040619432201, "grad_norm": 221.4761199951172, "learning_rate": 1.3322994995443744e-06, "loss": 28.3337, "step": 392960 }, { "epoch": 0.7938242625759039, "grad_norm": 269.02587890625, "learning_rate": 1.3320622672129046e-06, "loss": 19.4028, "step": 392970 }, { "epoch": 0.7938444632085877, "grad_norm": 295.42327880859375, "learning_rate": 1.331825052758518e-06, "loss": 7.8659, "step": 392980 }, { "epoch": 0.7938646638412715, "grad_norm": 80.26911926269531, "learning_rate": 1.3315878561823697e-06, "loss": 13.2659, "step": 392990 }, { "epoch": 0.7938848644739553, "grad_norm": 492.2503356933594, "learning_rate": 1.3313506774856177e-06, "loss": 18.8608, "step": 393000 }, { "epoch": 0.7939050651066392, "grad_norm": 72.5807876586914, "learning_rate": 1.33111351666942e-06, "loss": 10.6923, "step": 393010 }, { "epoch": 0.793925265739323, "grad_norm": 520.0384521484375, "learning_rate": 1.3308763737349273e-06, "loss": 18.93, "step": 393020 }, { "epoch": 0.7939454663720068, "grad_norm": 439.1123962402344, "learning_rate": 1.3306392486832982e-06, "loss": 23.7647, "step": 393030 }, { "epoch": 0.7939656670046906, "grad_norm": 153.1498260498047, "learning_rate": 1.3304021415156898e-06, "loss": 11.0697, "step": 393040 }, { "epoch": 0.7939858676373744, "grad_norm": 528.4383544921875, "learning_rate": 1.3301650522332566e-06, "loss": 19.9529, "step": 393050 }, { "epoch": 0.7940060682700583, "grad_norm": 257.60479736328125, "learning_rate": 1.3299279808371517e-06, "loss": 12.547, "step": 393060 }, { "epoch": 0.7940262689027421, "grad_norm": 290.8949279785156, "learning_rate": 1.329690927328533e-06, "loss": 9.9437, "step": 393070 }, { "epoch": 0.7940464695354259, "grad_norm": 71.61394500732422, "learning_rate": 1.3294538917085586e-06, "loss": 22.1281, "step": 393080 }, { "epoch": 0.7940666701681096, "grad_norm": 477.6353759765625, "learning_rate": 1.329216873978378e-06, "loss": 14.6355, "step": 393090 }, { "epoch": 0.7940868708007934, "grad_norm": 275.8092956542969, "learning_rate": 1.3289798741391486e-06, "loss": 17.8101, "step": 393100 }, { "epoch": 0.7941070714334773, "grad_norm": 606.7242431640625, "learning_rate": 1.3287428921920275e-06, "loss": 9.4685, "step": 393110 }, { "epoch": 0.7941272720661611, "grad_norm": 59.06065368652344, "learning_rate": 1.328505928138169e-06, "loss": 21.1656, "step": 393120 }, { "epoch": 0.7941474726988449, "grad_norm": 411.5826416015625, "learning_rate": 1.3282689819787253e-06, "loss": 9.8361, "step": 393130 }, { "epoch": 0.7941676733315287, "grad_norm": 236.2435302734375, "learning_rate": 1.328032053714855e-06, "loss": 10.2996, "step": 393140 }, { "epoch": 0.7941878739642125, "grad_norm": 38.0343017578125, "learning_rate": 1.327795143347711e-06, "loss": 6.9945, "step": 393150 }, { "epoch": 0.7942080745968964, "grad_norm": 233.21644592285156, "learning_rate": 1.3275582508784462e-06, "loss": 19.6127, "step": 393160 }, { "epoch": 0.7942282752295802, "grad_norm": 312.2197570800781, "learning_rate": 1.3273213763082188e-06, "loss": 17.6795, "step": 393170 }, { "epoch": 0.794248475862264, "grad_norm": 1112.5133056640625, "learning_rate": 1.3270845196381805e-06, "loss": 30.4112, "step": 393180 }, { "epoch": 0.7942686764949478, "grad_norm": 0.7452117800712585, "learning_rate": 1.3268476808694881e-06, "loss": 20.2499, "step": 393190 }, { "epoch": 0.7942888771276316, "grad_norm": 106.0436019897461, "learning_rate": 1.3266108600032928e-06, "loss": 16.6449, "step": 393200 }, { "epoch": 0.7943090777603155, "grad_norm": 344.6287841796875, "learning_rate": 1.3263740570407524e-06, "loss": 10.6484, "step": 393210 }, { "epoch": 0.7943292783929993, "grad_norm": 774.5092163085938, "learning_rate": 1.326137271983019e-06, "loss": 18.0544, "step": 393220 }, { "epoch": 0.7943494790256831, "grad_norm": 52.625030517578125, "learning_rate": 1.3259005048312457e-06, "loss": 13.479, "step": 393230 }, { "epoch": 0.7943696796583669, "grad_norm": 178.127685546875, "learning_rate": 1.3256637555865892e-06, "loss": 8.8126, "step": 393240 }, { "epoch": 0.7943898802910507, "grad_norm": 215.18649291992188, "learning_rate": 1.3254270242502004e-06, "loss": 7.4319, "step": 393250 }, { "epoch": 0.7944100809237346, "grad_norm": 800.3228759765625, "learning_rate": 1.3251903108232362e-06, "loss": 26.9087, "step": 393260 }, { "epoch": 0.7944302815564184, "grad_norm": 165.53640747070312, "learning_rate": 1.3249536153068487e-06, "loss": 18.4781, "step": 393270 }, { "epoch": 0.7944504821891022, "grad_norm": 181.44790649414062, "learning_rate": 1.3247169377021896e-06, "loss": 17.2046, "step": 393280 }, { "epoch": 0.794470682821786, "grad_norm": 31.7740421295166, "learning_rate": 1.3244802780104166e-06, "loss": 26.8276, "step": 393290 }, { "epoch": 0.7944908834544698, "grad_norm": 98.43057250976562, "learning_rate": 1.3242436362326804e-06, "loss": 10.26, "step": 393300 }, { "epoch": 0.7945110840871537, "grad_norm": 282.8909912109375, "learning_rate": 1.3240070123701337e-06, "loss": 8.1901, "step": 393310 }, { "epoch": 0.7945312847198375, "grad_norm": 854.9933471679688, "learning_rate": 1.323770406423931e-06, "loss": 31.1475, "step": 393320 }, { "epoch": 0.7945514853525213, "grad_norm": 523.5370483398438, "learning_rate": 1.3235338183952268e-06, "loss": 35.4954, "step": 393330 }, { "epoch": 0.7945716859852051, "grad_norm": 452.68084716796875, "learning_rate": 1.323297248285173e-06, "loss": 19.9129, "step": 393340 }, { "epoch": 0.7945918866178888, "grad_norm": 5.8730549812316895, "learning_rate": 1.3230606960949204e-06, "loss": 8.4925, "step": 393350 }, { "epoch": 0.7946120872505726, "grad_norm": 288.4775085449219, "learning_rate": 1.322824161825626e-06, "loss": 14.5479, "step": 393360 }, { "epoch": 0.7946322878832565, "grad_norm": 244.63304138183594, "learning_rate": 1.3225876454784409e-06, "loss": 23.3344, "step": 393370 }, { "epoch": 0.7946524885159403, "grad_norm": 222.38368225097656, "learning_rate": 1.3223511470545158e-06, "loss": 13.8454, "step": 393380 }, { "epoch": 0.7946726891486241, "grad_norm": 360.0292663574219, "learning_rate": 1.3221146665550055e-06, "loss": 20.8501, "step": 393390 }, { "epoch": 0.7946928897813079, "grad_norm": 310.9561767578125, "learning_rate": 1.3218782039810634e-06, "loss": 18.6549, "step": 393400 }, { "epoch": 0.7947130904139917, "grad_norm": 449.3248596191406, "learning_rate": 1.321641759333841e-06, "loss": 19.3592, "step": 393410 }, { "epoch": 0.7947332910466756, "grad_norm": 129.15274047851562, "learning_rate": 1.3214053326144888e-06, "loss": 19.0694, "step": 393420 }, { "epoch": 0.7947534916793594, "grad_norm": 0.25493475794792175, "learning_rate": 1.321168923824162e-06, "loss": 12.7415, "step": 393430 }, { "epoch": 0.7947736923120432, "grad_norm": 217.77394104003906, "learning_rate": 1.3209325329640126e-06, "loss": 16.7637, "step": 393440 }, { "epoch": 0.794793892944727, "grad_norm": 149.8814239501953, "learning_rate": 1.3206961600351897e-06, "loss": 13.435, "step": 393450 }, { "epoch": 0.7948140935774108, "grad_norm": 295.07928466796875, "learning_rate": 1.320459805038849e-06, "loss": 30.2789, "step": 393460 }, { "epoch": 0.7948342942100947, "grad_norm": 157.8231658935547, "learning_rate": 1.32022346797614e-06, "loss": 19.1555, "step": 393470 }, { "epoch": 0.7948544948427785, "grad_norm": 319.9383850097656, "learning_rate": 1.3199871488482163e-06, "loss": 9.6858, "step": 393480 }, { "epoch": 0.7948746954754623, "grad_norm": 271.5839538574219, "learning_rate": 1.3197508476562277e-06, "loss": 16.0141, "step": 393490 }, { "epoch": 0.7948948961081461, "grad_norm": 424.5205383300781, "learning_rate": 1.3195145644013286e-06, "loss": 13.9018, "step": 393500 }, { "epoch": 0.7949150967408299, "grad_norm": 212.56304931640625, "learning_rate": 1.3192782990846692e-06, "loss": 15.9282, "step": 393510 }, { "epoch": 0.7949352973735138, "grad_norm": 476.3667907714844, "learning_rate": 1.3190420517073993e-06, "loss": 21.4514, "step": 393520 }, { "epoch": 0.7949554980061976, "grad_norm": 473.9554748535156, "learning_rate": 1.3188058222706735e-06, "loss": 16.1802, "step": 393530 }, { "epoch": 0.7949756986388814, "grad_norm": 100.92284393310547, "learning_rate": 1.3185696107756402e-06, "loss": 13.5259, "step": 393540 }, { "epoch": 0.7949958992715652, "grad_norm": 217.48333740234375, "learning_rate": 1.3183334172234536e-06, "loss": 18.6937, "step": 393550 }, { "epoch": 0.795016099904249, "grad_norm": 137.55055236816406, "learning_rate": 1.3180972416152637e-06, "loss": 11.7866, "step": 393560 }, { "epoch": 0.7950363005369329, "grad_norm": 188.703857421875, "learning_rate": 1.3178610839522193e-06, "loss": 8.2441, "step": 393570 }, { "epoch": 0.7950565011696167, "grad_norm": 672.42041015625, "learning_rate": 1.317624944235475e-06, "loss": 13.1175, "step": 393580 }, { "epoch": 0.7950767018023005, "grad_norm": 858.04296875, "learning_rate": 1.3173888224661802e-06, "loss": 12.281, "step": 393590 }, { "epoch": 0.7950969024349842, "grad_norm": 232.22882080078125, "learning_rate": 1.317152718645484e-06, "loss": 18.2014, "step": 393600 }, { "epoch": 0.795117103067668, "grad_norm": 304.68597412109375, "learning_rate": 1.3169166327745392e-06, "loss": 28.0134, "step": 393610 }, { "epoch": 0.7951373037003518, "grad_norm": 28.386598587036133, "learning_rate": 1.316680564854499e-06, "loss": 21.7759, "step": 393620 }, { "epoch": 0.7951575043330357, "grad_norm": 0.0007220363477244973, "learning_rate": 1.3164445148865073e-06, "loss": 21.6391, "step": 393630 }, { "epoch": 0.7951777049657195, "grad_norm": 126.11640167236328, "learning_rate": 1.3162084828717187e-06, "loss": 17.208, "step": 393640 }, { "epoch": 0.7951979055984033, "grad_norm": 378.008056640625, "learning_rate": 1.3159724688112846e-06, "loss": 26.1479, "step": 393650 }, { "epoch": 0.7952181062310871, "grad_norm": 313.0746154785156, "learning_rate": 1.3157364727063542e-06, "loss": 13.67, "step": 393660 }, { "epoch": 0.795238306863771, "grad_norm": 280.4048156738281, "learning_rate": 1.3155004945580757e-06, "loss": 13.3941, "step": 393670 }, { "epoch": 0.7952585074964548, "grad_norm": 149.05538940429688, "learning_rate": 1.3152645343676007e-06, "loss": 18.4442, "step": 393680 }, { "epoch": 0.7952787081291386, "grad_norm": 307.603271484375, "learning_rate": 1.3150285921360823e-06, "loss": 18.4287, "step": 393690 }, { "epoch": 0.7952989087618224, "grad_norm": 508.5350036621094, "learning_rate": 1.314792667864665e-06, "loss": 14.0235, "step": 393700 }, { "epoch": 0.7953191093945062, "grad_norm": 434.1549987792969, "learning_rate": 1.3145567615545013e-06, "loss": 20.5497, "step": 393710 }, { "epoch": 0.79533931002719, "grad_norm": 279.7898864746094, "learning_rate": 1.3143208732067426e-06, "loss": 15.663, "step": 393720 }, { "epoch": 0.7953595106598739, "grad_norm": 97.50518035888672, "learning_rate": 1.314085002822536e-06, "loss": 14.4952, "step": 393730 }, { "epoch": 0.7953797112925577, "grad_norm": 11.618754386901855, "learning_rate": 1.3138491504030314e-06, "loss": 9.7702, "step": 393740 }, { "epoch": 0.7953999119252415, "grad_norm": 264.45269775390625, "learning_rate": 1.3136133159493803e-06, "loss": 19.0229, "step": 393750 }, { "epoch": 0.7954201125579253, "grad_norm": 696.5134887695312, "learning_rate": 1.3133774994627307e-06, "loss": 32.1867, "step": 393760 }, { "epoch": 0.7954403131906091, "grad_norm": 122.29059600830078, "learning_rate": 1.313141700944231e-06, "loss": 20.7587, "step": 393770 }, { "epoch": 0.795460513823293, "grad_norm": 419.3998718261719, "learning_rate": 1.3129059203950306e-06, "loss": 21.3142, "step": 393780 }, { "epoch": 0.7954807144559768, "grad_norm": 331.98126220703125, "learning_rate": 1.312670157816282e-06, "loss": 13.6463, "step": 393790 }, { "epoch": 0.7955009150886606, "grad_norm": 246.06785583496094, "learning_rate": 1.312434413209131e-06, "loss": 14.7045, "step": 393800 }, { "epoch": 0.7955211157213444, "grad_norm": 607.720947265625, "learning_rate": 1.3121986865747267e-06, "loss": 14.9481, "step": 393810 }, { "epoch": 0.7955413163540282, "grad_norm": 448.0708923339844, "learning_rate": 1.3119629779142196e-06, "loss": 20.9657, "step": 393820 }, { "epoch": 0.7955615169867121, "grad_norm": 431.65411376953125, "learning_rate": 1.3117272872287578e-06, "loss": 21.2966, "step": 393830 }, { "epoch": 0.7955817176193959, "grad_norm": 282.5703125, "learning_rate": 1.3114916145194884e-06, "loss": 26.2329, "step": 393840 }, { "epoch": 0.7956019182520797, "grad_norm": 401.35076904296875, "learning_rate": 1.3112559597875628e-06, "loss": 19.5809, "step": 393850 }, { "epoch": 0.7956221188847634, "grad_norm": 750.6825561523438, "learning_rate": 1.3110203230341273e-06, "loss": 24.4017, "step": 393860 }, { "epoch": 0.7956423195174472, "grad_norm": 0.21479862928390503, "learning_rate": 1.3107847042603328e-06, "loss": 24.2475, "step": 393870 }, { "epoch": 0.7956625201501311, "grad_norm": 139.98748779296875, "learning_rate": 1.3105491034673256e-06, "loss": 11.7494, "step": 393880 }, { "epoch": 0.7956827207828149, "grad_norm": 269.61834716796875, "learning_rate": 1.3103135206562535e-06, "loss": 10.683, "step": 393890 }, { "epoch": 0.7957029214154987, "grad_norm": 437.0902404785156, "learning_rate": 1.3100779558282673e-06, "loss": 16.1986, "step": 393900 }, { "epoch": 0.7957231220481825, "grad_norm": 633.8558349609375, "learning_rate": 1.3098424089845136e-06, "loss": 25.6735, "step": 393910 }, { "epoch": 0.7957433226808663, "grad_norm": 578.57763671875, "learning_rate": 1.3096068801261386e-06, "loss": 16.9048, "step": 393920 }, { "epoch": 0.7957635233135502, "grad_norm": 0.0013803989859297872, "learning_rate": 1.3093713692542925e-06, "loss": 21.8858, "step": 393930 }, { "epoch": 0.795783723946234, "grad_norm": 656.0813598632812, "learning_rate": 1.309135876370124e-06, "loss": 13.1338, "step": 393940 }, { "epoch": 0.7958039245789178, "grad_norm": 378.0755615234375, "learning_rate": 1.3089004014747797e-06, "loss": 16.7051, "step": 393950 }, { "epoch": 0.7958241252116016, "grad_norm": 325.6436767578125, "learning_rate": 1.3086649445694056e-06, "loss": 17.3309, "step": 393960 }, { "epoch": 0.7958443258442854, "grad_norm": 126.03115844726562, "learning_rate": 1.308429505655152e-06, "loss": 19.0706, "step": 393970 }, { "epoch": 0.7958645264769693, "grad_norm": 202.6767120361328, "learning_rate": 1.3081940847331658e-06, "loss": 13.6732, "step": 393980 }, { "epoch": 0.7958847271096531, "grad_norm": 575.3368530273438, "learning_rate": 1.3079586818045925e-06, "loss": 26.4399, "step": 393990 }, { "epoch": 0.7959049277423369, "grad_norm": 101.26583862304688, "learning_rate": 1.3077232968705805e-06, "loss": 10.4103, "step": 394000 }, { "epoch": 0.7959251283750207, "grad_norm": 339.0704650878906, "learning_rate": 1.3074879299322802e-06, "loss": 14.2013, "step": 394010 }, { "epoch": 0.7959453290077045, "grad_norm": 467.2294006347656, "learning_rate": 1.3072525809908332e-06, "loss": 14.1033, "step": 394020 }, { "epoch": 0.7959655296403884, "grad_norm": 264.7349853515625, "learning_rate": 1.3070172500473888e-06, "loss": 20.4526, "step": 394030 }, { "epoch": 0.7959857302730722, "grad_norm": 455.54400634765625, "learning_rate": 1.3067819371030966e-06, "loss": 21.736, "step": 394040 }, { "epoch": 0.796005930905756, "grad_norm": 235.12664794921875, "learning_rate": 1.3065466421591006e-06, "loss": 31.3294, "step": 394050 }, { "epoch": 0.7960261315384398, "grad_norm": 422.9604187011719, "learning_rate": 1.306311365216547e-06, "loss": 12.6948, "step": 394060 }, { "epoch": 0.7960463321711236, "grad_norm": 171.83616638183594, "learning_rate": 1.3060761062765853e-06, "loss": 17.2273, "step": 394070 }, { "epoch": 0.7960665328038075, "grad_norm": 396.9876403808594, "learning_rate": 1.3058408653403609e-06, "loss": 25.2987, "step": 394080 }, { "epoch": 0.7960867334364913, "grad_norm": 424.2646484375, "learning_rate": 1.3056056424090186e-06, "loss": 24.977, "step": 394090 }, { "epoch": 0.7961069340691751, "grad_norm": 205.80349731445312, "learning_rate": 1.3053704374837063e-06, "loss": 12.2314, "step": 394100 }, { "epoch": 0.7961271347018588, "grad_norm": 121.14717864990234, "learning_rate": 1.3051352505655713e-06, "loss": 17.3948, "step": 394110 }, { "epoch": 0.7961473353345426, "grad_norm": 323.67108154296875, "learning_rate": 1.3049000816557595e-06, "loss": 22.5437, "step": 394120 }, { "epoch": 0.7961675359672264, "grad_norm": 168.21630859375, "learning_rate": 1.304664930755415e-06, "loss": 18.7534, "step": 394130 }, { "epoch": 0.7961877365999103, "grad_norm": 405.3740234375, "learning_rate": 1.3044297978656867e-06, "loss": 14.6358, "step": 394140 }, { "epoch": 0.7962079372325941, "grad_norm": 74.92567443847656, "learning_rate": 1.3041946829877178e-06, "loss": 28.3451, "step": 394150 }, { "epoch": 0.7962281378652779, "grad_norm": 492.98333740234375, "learning_rate": 1.3039595861226579e-06, "loss": 20.2084, "step": 394160 }, { "epoch": 0.7962483384979617, "grad_norm": 404.05169677734375, "learning_rate": 1.3037245072716504e-06, "loss": 27.0056, "step": 394170 }, { "epoch": 0.7962685391306455, "grad_norm": 666.1986083984375, "learning_rate": 1.3034894464358395e-06, "loss": 11.6529, "step": 394180 }, { "epoch": 0.7962887397633294, "grad_norm": 307.9724426269531, "learning_rate": 1.3032544036163742e-06, "loss": 8.2639, "step": 394190 }, { "epoch": 0.7963089403960132, "grad_norm": 25.722850799560547, "learning_rate": 1.3030193788143991e-06, "loss": 18.8226, "step": 394200 }, { "epoch": 0.796329141028697, "grad_norm": 617.319580078125, "learning_rate": 1.3027843720310574e-06, "loss": 24.744, "step": 394210 }, { "epoch": 0.7963493416613808, "grad_norm": 281.46490478515625, "learning_rate": 1.3025493832674963e-06, "loss": 34.2395, "step": 394220 }, { "epoch": 0.7963695422940646, "grad_norm": 1.4975876808166504, "learning_rate": 1.302314412524862e-06, "loss": 32.9228, "step": 394230 }, { "epoch": 0.7963897429267485, "grad_norm": 275.6388854980469, "learning_rate": 1.3020794598042996e-06, "loss": 21.2948, "step": 394240 }, { "epoch": 0.7964099435594323, "grad_norm": 291.4058837890625, "learning_rate": 1.301844525106951e-06, "loss": 12.3671, "step": 394250 }, { "epoch": 0.7964301441921161, "grad_norm": 99.08885192871094, "learning_rate": 1.3016096084339658e-06, "loss": 21.0551, "step": 394260 }, { "epoch": 0.7964503448247999, "grad_norm": 341.2049255371094, "learning_rate": 1.301374709786487e-06, "loss": 15.1307, "step": 394270 }, { "epoch": 0.7964705454574837, "grad_norm": 277.8306884765625, "learning_rate": 1.3011398291656575e-06, "loss": 11.555, "step": 394280 }, { "epoch": 0.7964907460901676, "grad_norm": 210.9628143310547, "learning_rate": 1.3009049665726236e-06, "loss": 16.8974, "step": 394290 }, { "epoch": 0.7965109467228514, "grad_norm": 210.85662841796875, "learning_rate": 1.3006701220085338e-06, "loss": 17.8028, "step": 394300 }, { "epoch": 0.7965311473555352, "grad_norm": 130.49485778808594, "learning_rate": 1.3004352954745257e-06, "loss": 22.2752, "step": 394310 }, { "epoch": 0.796551347988219, "grad_norm": 1315.4393310546875, "learning_rate": 1.3002004869717472e-06, "loss": 20.5979, "step": 394320 }, { "epoch": 0.7965715486209028, "grad_norm": 209.0332794189453, "learning_rate": 1.2999656965013447e-06, "loss": 9.4618, "step": 394330 }, { "epoch": 0.7965917492535867, "grad_norm": 526.4258422851562, "learning_rate": 1.2997309240644607e-06, "loss": 12.1449, "step": 394340 }, { "epoch": 0.7966119498862705, "grad_norm": 683.3538818359375, "learning_rate": 1.299496169662237e-06, "loss": 15.6115, "step": 394350 }, { "epoch": 0.7966321505189543, "grad_norm": 208.3459014892578, "learning_rate": 1.2992614332958226e-06, "loss": 10.9918, "step": 394360 }, { "epoch": 0.796652351151638, "grad_norm": 236.00804138183594, "learning_rate": 1.2990267149663588e-06, "loss": 19.7191, "step": 394370 }, { "epoch": 0.7966725517843218, "grad_norm": 1093.6866455078125, "learning_rate": 1.2987920146749883e-06, "loss": 13.9512, "step": 394380 }, { "epoch": 0.7966927524170057, "grad_norm": 572.6358032226562, "learning_rate": 1.2985573324228568e-06, "loss": 19.6329, "step": 394390 }, { "epoch": 0.7967129530496895, "grad_norm": 202.0952606201172, "learning_rate": 1.2983226682111094e-06, "loss": 25.3241, "step": 394400 }, { "epoch": 0.7967331536823733, "grad_norm": 302.8909606933594, "learning_rate": 1.2980880220408887e-06, "loss": 11.107, "step": 394410 }, { "epoch": 0.7967533543150571, "grad_norm": 372.9821472167969, "learning_rate": 1.2978533939133358e-06, "loss": 15.4005, "step": 394420 }, { "epoch": 0.7967735549477409, "grad_norm": 254.22006225585938, "learning_rate": 1.2976187838295984e-06, "loss": 13.6604, "step": 394430 }, { "epoch": 0.7967937555804248, "grad_norm": 436.09619140625, "learning_rate": 1.2973841917908175e-06, "loss": 15.5366, "step": 394440 }, { "epoch": 0.7968139562131086, "grad_norm": 280.6983947753906, "learning_rate": 1.2971496177981362e-06, "loss": 15.5349, "step": 394450 }, { "epoch": 0.7968341568457924, "grad_norm": 330.1646423339844, "learning_rate": 1.2969150618527e-06, "loss": 32.7581, "step": 394460 }, { "epoch": 0.7968543574784762, "grad_norm": 486.09503173828125, "learning_rate": 1.2966805239556484e-06, "loss": 23.7823, "step": 394470 }, { "epoch": 0.79687455811116, "grad_norm": 6.7952494621276855, "learning_rate": 1.2964460041081288e-06, "loss": 15.3201, "step": 394480 }, { "epoch": 0.7968947587438439, "grad_norm": 142.43624877929688, "learning_rate": 1.296211502311282e-06, "loss": 18.9219, "step": 394490 }, { "epoch": 0.7969149593765277, "grad_norm": 529.5702514648438, "learning_rate": 1.2959770185662502e-06, "loss": 21.5324, "step": 394500 }, { "epoch": 0.7969351600092115, "grad_norm": 240.99537658691406, "learning_rate": 1.295742552874178e-06, "loss": 17.789, "step": 394510 }, { "epoch": 0.7969553606418953, "grad_norm": 984.2542724609375, "learning_rate": 1.2955081052362072e-06, "loss": 29.0571, "step": 394520 }, { "epoch": 0.7969755612745791, "grad_norm": 325.11138916015625, "learning_rate": 1.2952736756534796e-06, "loss": 8.1615, "step": 394530 }, { "epoch": 0.796995761907263, "grad_norm": 291.4028015136719, "learning_rate": 1.2950392641271386e-06, "loss": 21.4612, "step": 394540 }, { "epoch": 0.7970159625399468, "grad_norm": 379.6304626464844, "learning_rate": 1.2948048706583284e-06, "loss": 16.4679, "step": 394550 }, { "epoch": 0.7970361631726306, "grad_norm": 614.7577514648438, "learning_rate": 1.2945704952481896e-06, "loss": 13.7075, "step": 394560 }, { "epoch": 0.7970563638053144, "grad_norm": 528.6258544921875, "learning_rate": 1.2943361378978636e-06, "loss": 19.4213, "step": 394570 }, { "epoch": 0.7970765644379982, "grad_norm": 379.50335693359375, "learning_rate": 1.2941017986084953e-06, "loss": 12.0558, "step": 394580 }, { "epoch": 0.7970967650706821, "grad_norm": 528.6602172851562, "learning_rate": 1.2938674773812255e-06, "loss": 27.86, "step": 394590 }, { "epoch": 0.7971169657033659, "grad_norm": 312.184814453125, "learning_rate": 1.2936331742171943e-06, "loss": 15.0957, "step": 394600 }, { "epoch": 0.7971371663360497, "grad_norm": 321.55584716796875, "learning_rate": 1.2933988891175458e-06, "loss": 27.9651, "step": 394610 }, { "epoch": 0.7971573669687335, "grad_norm": 77.93047332763672, "learning_rate": 1.2931646220834242e-06, "loss": 7.1763, "step": 394620 }, { "epoch": 0.7971775676014172, "grad_norm": 1702.705322265625, "learning_rate": 1.292930373115966e-06, "loss": 13.2088, "step": 394630 }, { "epoch": 0.797197768234101, "grad_norm": 409.6087646484375, "learning_rate": 1.2926961422163154e-06, "loss": 9.8329, "step": 394640 }, { "epoch": 0.7972179688667849, "grad_norm": 258.40966796875, "learning_rate": 1.2924619293856155e-06, "loss": 13.6099, "step": 394650 }, { "epoch": 0.7972381694994687, "grad_norm": 245.12200927734375, "learning_rate": 1.2922277346250067e-06, "loss": 14.0059, "step": 394660 }, { "epoch": 0.7972583701321525, "grad_norm": 757.044189453125, "learning_rate": 1.2919935579356285e-06, "loss": 15.0933, "step": 394670 }, { "epoch": 0.7972785707648363, "grad_norm": 137.48452758789062, "learning_rate": 1.2917593993186257e-06, "loss": 15.7164, "step": 394680 }, { "epoch": 0.7972987713975201, "grad_norm": 1023.2304077148438, "learning_rate": 1.2915252587751376e-06, "loss": 14.3711, "step": 394690 }, { "epoch": 0.797318972030204, "grad_norm": 248.52865600585938, "learning_rate": 1.2912911363063048e-06, "loss": 15.0697, "step": 394700 }, { "epoch": 0.7973391726628878, "grad_norm": 464.6304016113281, "learning_rate": 1.291057031913268e-06, "loss": 44.176, "step": 394710 }, { "epoch": 0.7973593732955716, "grad_norm": 434.9127197265625, "learning_rate": 1.2908229455971717e-06, "loss": 29.4214, "step": 394720 }, { "epoch": 0.7973795739282554, "grad_norm": 490.8548278808594, "learning_rate": 1.2905888773591546e-06, "loss": 18.168, "step": 394730 }, { "epoch": 0.7973997745609392, "grad_norm": 325.1541442871094, "learning_rate": 1.2903548272003552e-06, "loss": 13.8529, "step": 394740 }, { "epoch": 0.7974199751936231, "grad_norm": 306.61810302734375, "learning_rate": 1.2901207951219186e-06, "loss": 8.425, "step": 394750 }, { "epoch": 0.7974401758263069, "grad_norm": 392.6197814941406, "learning_rate": 1.2898867811249832e-06, "loss": 30.7993, "step": 394760 }, { "epoch": 0.7974603764589907, "grad_norm": 198.2019805908203, "learning_rate": 1.2896527852106876e-06, "loss": 18.272, "step": 394770 }, { "epoch": 0.7974805770916745, "grad_norm": 498.5760803222656, "learning_rate": 1.2894188073801766e-06, "loss": 22.122, "step": 394780 }, { "epoch": 0.7975007777243583, "grad_norm": 490.75921630859375, "learning_rate": 1.2891848476345864e-06, "loss": 15.0195, "step": 394790 }, { "epoch": 0.7975209783570422, "grad_norm": 56.68571090698242, "learning_rate": 1.2889509059750605e-06, "loss": 10.773, "step": 394800 }, { "epoch": 0.797541178989726, "grad_norm": 457.5586853027344, "learning_rate": 1.288716982402738e-06, "loss": 12.8355, "step": 394810 }, { "epoch": 0.7975613796224098, "grad_norm": 1123.6107177734375, "learning_rate": 1.2884830769187572e-06, "loss": 18.1294, "step": 394820 }, { "epoch": 0.7975815802550936, "grad_norm": 611.557373046875, "learning_rate": 1.2882491895242599e-06, "loss": 15.0549, "step": 394830 }, { "epoch": 0.7976017808877774, "grad_norm": 259.11004638671875, "learning_rate": 1.2880153202203877e-06, "loss": 10.5003, "step": 394840 }, { "epoch": 0.7976219815204613, "grad_norm": 612.1580200195312, "learning_rate": 1.287781469008278e-06, "loss": 32.6783, "step": 394850 }, { "epoch": 0.7976421821531451, "grad_norm": 152.5412139892578, "learning_rate": 1.2875476358890698e-06, "loss": 10.7557, "step": 394860 }, { "epoch": 0.7976623827858289, "grad_norm": 83.74934387207031, "learning_rate": 1.2873138208639057e-06, "loss": 10.8741, "step": 394870 }, { "epoch": 0.7976825834185126, "grad_norm": 187.7102508544922, "learning_rate": 1.2870800239339237e-06, "loss": 9.8461, "step": 394880 }, { "epoch": 0.7977027840511964, "grad_norm": 318.344482421875, "learning_rate": 1.2868462451002623e-06, "loss": 5.4056, "step": 394890 }, { "epoch": 0.7977229846838803, "grad_norm": 446.8281555175781, "learning_rate": 1.2866124843640614e-06, "loss": 18.0713, "step": 394900 }, { "epoch": 0.7977431853165641, "grad_norm": 336.1660461425781, "learning_rate": 1.2863787417264639e-06, "loss": 28.2284, "step": 394910 }, { "epoch": 0.7977633859492479, "grad_norm": 119.48172760009766, "learning_rate": 1.2861450171886037e-06, "loss": 9.7535, "step": 394920 }, { "epoch": 0.7977835865819317, "grad_norm": 499.10699462890625, "learning_rate": 1.2859113107516212e-06, "loss": 19.604, "step": 394930 }, { "epoch": 0.7978037872146155, "grad_norm": 70.6404800415039, "learning_rate": 1.2856776224166589e-06, "loss": 9.231, "step": 394940 }, { "epoch": 0.7978239878472994, "grad_norm": 175.56529235839844, "learning_rate": 1.2854439521848526e-06, "loss": 12.3219, "step": 394950 }, { "epoch": 0.7978441884799832, "grad_norm": 773.7423706054688, "learning_rate": 1.2852103000573413e-06, "loss": 24.3522, "step": 394960 }, { "epoch": 0.797864389112667, "grad_norm": 69.67157745361328, "learning_rate": 1.2849766660352652e-06, "loss": 7.5418, "step": 394970 }, { "epoch": 0.7978845897453508, "grad_norm": 148.06202697753906, "learning_rate": 1.2847430501197627e-06, "loss": 18.4866, "step": 394980 }, { "epoch": 0.7979047903780346, "grad_norm": 75.63851928710938, "learning_rate": 1.2845094523119706e-06, "loss": 14.312, "step": 394990 }, { "epoch": 0.7979249910107185, "grad_norm": 101.40110778808594, "learning_rate": 1.2842758726130283e-06, "loss": 10.1742, "step": 395000 }, { "epoch": 0.7979451916434023, "grad_norm": 12.643241882324219, "learning_rate": 1.2840423110240762e-06, "loss": 30.2885, "step": 395010 }, { "epoch": 0.7979653922760861, "grad_norm": 40.91701126098633, "learning_rate": 1.2838087675462518e-06, "loss": 21.6835, "step": 395020 }, { "epoch": 0.7979855929087699, "grad_norm": 246.21238708496094, "learning_rate": 1.2835752421806908e-06, "loss": 16.0245, "step": 395030 }, { "epoch": 0.7980057935414537, "grad_norm": 337.4515380859375, "learning_rate": 1.283341734928535e-06, "loss": 20.9604, "step": 395040 }, { "epoch": 0.7980259941741376, "grad_norm": 152.22817993164062, "learning_rate": 1.2831082457909206e-06, "loss": 27.1174, "step": 395050 }, { "epoch": 0.7980461948068214, "grad_norm": 337.61846923828125, "learning_rate": 1.2828747747689846e-06, "loss": 20.1692, "step": 395060 }, { "epoch": 0.7980663954395052, "grad_norm": 192.54147338867188, "learning_rate": 1.2826413218638672e-06, "loss": 20.242, "step": 395070 }, { "epoch": 0.798086596072189, "grad_norm": 581.4447021484375, "learning_rate": 1.2824078870767036e-06, "loss": 19.2308, "step": 395080 }, { "epoch": 0.7981067967048728, "grad_norm": 1.4781781435012817, "learning_rate": 1.2821744704086353e-06, "loss": 24.4192, "step": 395090 }, { "epoch": 0.7981269973375567, "grad_norm": 316.1722412109375, "learning_rate": 1.2819410718607972e-06, "loss": 40.5452, "step": 395100 }, { "epoch": 0.7981471979702405, "grad_norm": 95.39334869384766, "learning_rate": 1.2817076914343257e-06, "loss": 81.6254, "step": 395110 }, { "epoch": 0.7981673986029243, "grad_norm": 330.116455078125, "learning_rate": 1.2814743291303616e-06, "loss": 17.7754, "step": 395120 }, { "epoch": 0.7981875992356081, "grad_norm": 485.01861572265625, "learning_rate": 1.2812409849500408e-06, "loss": 11.6857, "step": 395130 }, { "epoch": 0.7982077998682918, "grad_norm": 302.6898193359375, "learning_rate": 1.2810076588944987e-06, "loss": 16.6038, "step": 395140 }, { "epoch": 0.7982280005009756, "grad_norm": 268.6713562011719, "learning_rate": 1.2807743509648745e-06, "loss": 22.0135, "step": 395150 }, { "epoch": 0.7982482011336595, "grad_norm": 271.824462890625, "learning_rate": 1.280541061162306e-06, "loss": 8.693, "step": 395160 }, { "epoch": 0.7982684017663433, "grad_norm": 301.60888671875, "learning_rate": 1.2803077894879296e-06, "loss": 14.459, "step": 395170 }, { "epoch": 0.7982886023990271, "grad_norm": 218.03836059570312, "learning_rate": 1.2800745359428807e-06, "loss": 20.5122, "step": 395180 }, { "epoch": 0.7983088030317109, "grad_norm": 339.1699523925781, "learning_rate": 1.2798413005282984e-06, "loss": 19.4057, "step": 395190 }, { "epoch": 0.7983290036643947, "grad_norm": 312.39691162109375, "learning_rate": 1.2796080832453183e-06, "loss": 10.9805, "step": 395200 }, { "epoch": 0.7983492042970786, "grad_norm": 165.0194549560547, "learning_rate": 1.279374884095076e-06, "loss": 30.4858, "step": 395210 }, { "epoch": 0.7983694049297624, "grad_norm": 172.35121154785156, "learning_rate": 1.279141703078709e-06, "loss": 19.7434, "step": 395220 }, { "epoch": 0.7983896055624462, "grad_norm": 230.8863067626953, "learning_rate": 1.2789085401973572e-06, "loss": 24.9739, "step": 395230 }, { "epoch": 0.79840980619513, "grad_norm": 346.300048828125, "learning_rate": 1.2786753954521508e-06, "loss": 39.0584, "step": 395240 }, { "epoch": 0.7984300068278138, "grad_norm": 454.89654541015625, "learning_rate": 1.2784422688442294e-06, "loss": 13.4399, "step": 395250 }, { "epoch": 0.7984502074604977, "grad_norm": 237.23622131347656, "learning_rate": 1.2782091603747304e-06, "loss": 14.5553, "step": 395260 }, { "epoch": 0.7984704080931815, "grad_norm": 336.5985107421875, "learning_rate": 1.2779760700447885e-06, "loss": 24.699, "step": 395270 }, { "epoch": 0.7984906087258653, "grad_norm": 411.7083740234375, "learning_rate": 1.2777429978555383e-06, "loss": 19.8059, "step": 395280 }, { "epoch": 0.7985108093585491, "grad_norm": 169.06935119628906, "learning_rate": 1.2775099438081173e-06, "loss": 15.3829, "step": 395290 }, { "epoch": 0.7985310099912329, "grad_norm": 573.5012817382812, "learning_rate": 1.2772769079036639e-06, "loss": 22.6528, "step": 395300 }, { "epoch": 0.7985512106239168, "grad_norm": 29.020320892333984, "learning_rate": 1.277043890143309e-06, "loss": 6.569, "step": 395310 }, { "epoch": 0.7985714112566006, "grad_norm": 944.8809204101562, "learning_rate": 1.2768108905281906e-06, "loss": 17.0352, "step": 395320 }, { "epoch": 0.7985916118892844, "grad_norm": 107.38935852050781, "learning_rate": 1.2765779090594454e-06, "loss": 21.9235, "step": 395330 }, { "epoch": 0.7986118125219682, "grad_norm": 1010.5393676757812, "learning_rate": 1.2763449457382083e-06, "loss": 31.6762, "step": 395340 }, { "epoch": 0.798632013154652, "grad_norm": 173.56773376464844, "learning_rate": 1.2761120005656125e-06, "loss": 30.6485, "step": 395350 }, { "epoch": 0.7986522137873359, "grad_norm": 278.5769958496094, "learning_rate": 1.2758790735427966e-06, "loss": 11.0597, "step": 395360 }, { "epoch": 0.7986724144200197, "grad_norm": 160.19850158691406, "learning_rate": 1.275646164670895e-06, "loss": 19.5009, "step": 395370 }, { "epoch": 0.7986926150527035, "grad_norm": 171.7584686279297, "learning_rate": 1.27541327395104e-06, "loss": 23.2055, "step": 395380 }, { "epoch": 0.7987128156853872, "grad_norm": 301.77984619140625, "learning_rate": 1.275180401384371e-06, "loss": 17.2965, "step": 395390 }, { "epoch": 0.798733016318071, "grad_norm": 341.29913330078125, "learning_rate": 1.2749475469720196e-06, "loss": 25.0192, "step": 395400 }, { "epoch": 0.7987532169507549, "grad_norm": 626.1749267578125, "learning_rate": 1.274714710715123e-06, "loss": 18.06, "step": 395410 }, { "epoch": 0.7987734175834387, "grad_norm": 175.00750732421875, "learning_rate": 1.2744818926148157e-06, "loss": 10.0525, "step": 395420 }, { "epoch": 0.7987936182161225, "grad_norm": 230.48179626464844, "learning_rate": 1.2742490926722295e-06, "loss": 23.7853, "step": 395430 }, { "epoch": 0.7988138188488063, "grad_norm": 348.3998107910156, "learning_rate": 1.2740163108885033e-06, "loss": 15.9951, "step": 395440 }, { "epoch": 0.7988340194814901, "grad_norm": 389.44061279296875, "learning_rate": 1.2737835472647686e-06, "loss": 34.202, "step": 395450 }, { "epoch": 0.798854220114174, "grad_norm": 458.6150817871094, "learning_rate": 1.273550801802162e-06, "loss": 24.2333, "step": 395460 }, { "epoch": 0.7988744207468578, "grad_norm": 91.32477569580078, "learning_rate": 1.2733180745018154e-06, "loss": 16.5594, "step": 395470 }, { "epoch": 0.7988946213795416, "grad_norm": 387.79022216796875, "learning_rate": 1.2730853653648657e-06, "loss": 18.4532, "step": 395480 }, { "epoch": 0.7989148220122254, "grad_norm": 542.697509765625, "learning_rate": 1.2728526743924462e-06, "loss": 14.1246, "step": 395490 }, { "epoch": 0.7989350226449092, "grad_norm": 182.57275390625, "learning_rate": 1.2726200015856893e-06, "loss": 19.1147, "step": 395500 }, { "epoch": 0.798955223277593, "grad_norm": 179.49337768554688, "learning_rate": 1.2723873469457304e-06, "loss": 16.6853, "step": 395510 }, { "epoch": 0.7989754239102769, "grad_norm": 235.9488525390625, "learning_rate": 1.2721547104737065e-06, "loss": 22.9069, "step": 395520 }, { "epoch": 0.7989956245429607, "grad_norm": 117.95890045166016, "learning_rate": 1.2719220921707453e-06, "loss": 13.7428, "step": 395530 }, { "epoch": 0.7990158251756445, "grad_norm": 0.0, "learning_rate": 1.2716894920379835e-06, "loss": 14.2216, "step": 395540 }, { "epoch": 0.7990360258083283, "grad_norm": 355.38250732421875, "learning_rate": 1.2714569100765567e-06, "loss": 16.9524, "step": 395550 }, { "epoch": 0.7990562264410122, "grad_norm": 432.5514831542969, "learning_rate": 1.2712243462875967e-06, "loss": 16.7309, "step": 395560 }, { "epoch": 0.799076427073696, "grad_norm": 205.42091369628906, "learning_rate": 1.2709918006722355e-06, "loss": 33.1063, "step": 395570 }, { "epoch": 0.7990966277063798, "grad_norm": 1120.8974609375, "learning_rate": 1.2707592732316092e-06, "loss": 23.3487, "step": 395580 }, { "epoch": 0.7991168283390636, "grad_norm": 265.7613525390625, "learning_rate": 1.2705267639668501e-06, "loss": 14.2579, "step": 395590 }, { "epoch": 0.7991370289717474, "grad_norm": 13.7578125, "learning_rate": 1.2702942728790897e-06, "loss": 10.8584, "step": 395600 }, { "epoch": 0.7991572296044313, "grad_norm": 486.94915771484375, "learning_rate": 1.2700617999694626e-06, "loss": 19.4967, "step": 395610 }, { "epoch": 0.7991774302371151, "grad_norm": 169.88043212890625, "learning_rate": 1.2698293452391036e-06, "loss": 20.9465, "step": 395620 }, { "epoch": 0.7991976308697989, "grad_norm": 387.9173889160156, "learning_rate": 1.2695969086891436e-06, "loss": 21.2436, "step": 395630 }, { "epoch": 0.7992178315024827, "grad_norm": 304.60455322265625, "learning_rate": 1.2693644903207146e-06, "loss": 12.9429, "step": 395640 }, { "epoch": 0.7992380321351664, "grad_norm": 519.8191528320312, "learning_rate": 1.2691320901349518e-06, "loss": 14.3901, "step": 395650 }, { "epoch": 0.7992582327678502, "grad_norm": 126.75354766845703, "learning_rate": 1.2688997081329874e-06, "loss": 18.2547, "step": 395660 }, { "epoch": 0.7992784334005341, "grad_norm": 332.45257568359375, "learning_rate": 1.2686673443159515e-06, "loss": 16.2801, "step": 395670 }, { "epoch": 0.7992986340332179, "grad_norm": 212.67904663085938, "learning_rate": 1.2684349986849791e-06, "loss": 21.5422, "step": 395680 }, { "epoch": 0.7993188346659017, "grad_norm": 366.7095947265625, "learning_rate": 1.2682026712412016e-06, "loss": 25.9652, "step": 395690 }, { "epoch": 0.7993390352985855, "grad_norm": 611.2752685546875, "learning_rate": 1.2679703619857525e-06, "loss": 19.9384, "step": 395700 }, { "epoch": 0.7993592359312693, "grad_norm": 352.1509094238281, "learning_rate": 1.2677380709197634e-06, "loss": 11.0338, "step": 395710 }, { "epoch": 0.7993794365639532, "grad_norm": 146.7178497314453, "learning_rate": 1.2675057980443644e-06, "loss": 8.9294, "step": 395720 }, { "epoch": 0.799399637196637, "grad_norm": 318.03125, "learning_rate": 1.2672735433606914e-06, "loss": 19.5553, "step": 395730 }, { "epoch": 0.7994198378293208, "grad_norm": 471.7959289550781, "learning_rate": 1.2670413068698745e-06, "loss": 23.6517, "step": 395740 }, { "epoch": 0.7994400384620046, "grad_norm": 507.58929443359375, "learning_rate": 1.2668090885730439e-06, "loss": 18.7919, "step": 395750 }, { "epoch": 0.7994602390946884, "grad_norm": 731.7949829101562, "learning_rate": 1.2665768884713326e-06, "loss": 16.968, "step": 395760 }, { "epoch": 0.7994804397273723, "grad_norm": 475.5935363769531, "learning_rate": 1.2663447065658746e-06, "loss": 29.8474, "step": 395770 }, { "epoch": 0.7995006403600561, "grad_norm": 175.68096923828125, "learning_rate": 1.2661125428577998e-06, "loss": 14.3358, "step": 395780 }, { "epoch": 0.7995208409927399, "grad_norm": 0.010071820579469204, "learning_rate": 1.265880397348238e-06, "loss": 21.9857, "step": 395790 }, { "epoch": 0.7995410416254237, "grad_norm": 28.79306411743164, "learning_rate": 1.2656482700383238e-06, "loss": 17.6525, "step": 395800 }, { "epoch": 0.7995612422581075, "grad_norm": 641.2723388671875, "learning_rate": 1.2654161609291864e-06, "loss": 14.5989, "step": 395810 }, { "epoch": 0.7995814428907914, "grad_norm": 263.36883544921875, "learning_rate": 1.265184070021957e-06, "loss": 31.5257, "step": 395820 }, { "epoch": 0.7996016435234752, "grad_norm": 250.98660278320312, "learning_rate": 1.2649519973177672e-06, "loss": 22.3223, "step": 395830 }, { "epoch": 0.799621844156159, "grad_norm": 568.1674194335938, "learning_rate": 1.2647199428177509e-06, "loss": 43.9173, "step": 395840 }, { "epoch": 0.7996420447888428, "grad_norm": 452.78033447265625, "learning_rate": 1.2644879065230343e-06, "loss": 29.0686, "step": 395850 }, { "epoch": 0.7996622454215266, "grad_norm": 174.3638153076172, "learning_rate": 1.26425588843475e-06, "loss": 22.5565, "step": 395860 }, { "epoch": 0.7996824460542105, "grad_norm": 311.9055480957031, "learning_rate": 1.2640238885540313e-06, "loss": 17.1418, "step": 395870 }, { "epoch": 0.7997026466868943, "grad_norm": 186.3204345703125, "learning_rate": 1.263791906882007e-06, "loss": 15.7676, "step": 395880 }, { "epoch": 0.7997228473195781, "grad_norm": 456.97198486328125, "learning_rate": 1.263559943419806e-06, "loss": 27.3653, "step": 395890 }, { "epoch": 0.7997430479522618, "grad_norm": 795.5870361328125, "learning_rate": 1.2633279981685608e-06, "loss": 21.4389, "step": 395900 }, { "epoch": 0.7997632485849456, "grad_norm": 357.4993591308594, "learning_rate": 1.2630960711294049e-06, "loss": 17.7066, "step": 395910 }, { "epoch": 0.7997834492176294, "grad_norm": 413.597412109375, "learning_rate": 1.2628641623034627e-06, "loss": 27.9636, "step": 395920 }, { "epoch": 0.7998036498503133, "grad_norm": 437.23468017578125, "learning_rate": 1.2626322716918672e-06, "loss": 15.24, "step": 395930 }, { "epoch": 0.7998238504829971, "grad_norm": 6900.7216796875, "learning_rate": 1.2624003992957494e-06, "loss": 55.5842, "step": 395940 }, { "epoch": 0.7998440511156809, "grad_norm": 433.11737060546875, "learning_rate": 1.2621685451162397e-06, "loss": 11.88, "step": 395950 }, { "epoch": 0.7998642517483647, "grad_norm": 690.3412475585938, "learning_rate": 1.2619367091544654e-06, "loss": 22.324, "step": 395960 }, { "epoch": 0.7998844523810485, "grad_norm": 144.7787322998047, "learning_rate": 1.2617048914115593e-06, "loss": 21.3205, "step": 395970 }, { "epoch": 0.7999046530137324, "grad_norm": 545.2615966796875, "learning_rate": 1.2614730918886509e-06, "loss": 26.1908, "step": 395980 }, { "epoch": 0.7999248536464162, "grad_norm": 277.0710144042969, "learning_rate": 1.261241310586867e-06, "loss": 19.5164, "step": 395990 }, { "epoch": 0.7999450542791, "grad_norm": 175.83204650878906, "learning_rate": 1.2610095475073415e-06, "loss": 20.0264, "step": 396000 }, { "epoch": 0.7999652549117838, "grad_norm": 11.009918212890625, "learning_rate": 1.2607778026512002e-06, "loss": 16.6202, "step": 396010 }, { "epoch": 0.7999854555444676, "grad_norm": 525.2348022460938, "learning_rate": 1.2605460760195759e-06, "loss": 19.783, "step": 396020 }, { "epoch": 0.8000056561771515, "grad_norm": 188.04359436035156, "learning_rate": 1.2603143676135965e-06, "loss": 20.8798, "step": 396030 }, { "epoch": 0.8000258568098353, "grad_norm": 24.16062355041504, "learning_rate": 1.26008267743439e-06, "loss": 14.6832, "step": 396040 }, { "epoch": 0.8000460574425191, "grad_norm": 438.58056640625, "learning_rate": 1.2598510054830888e-06, "loss": 15.1189, "step": 396050 }, { "epoch": 0.8000662580752029, "grad_norm": 303.39556884765625, "learning_rate": 1.2596193517608179e-06, "loss": 21.6028, "step": 396060 }, { "epoch": 0.8000864587078867, "grad_norm": 307.9757080078125, "learning_rate": 1.25938771626871e-06, "loss": 26.5777, "step": 396070 }, { "epoch": 0.8001066593405706, "grad_norm": 0.0, "learning_rate": 1.259156099007892e-06, "loss": 16.7807, "step": 396080 }, { "epoch": 0.8001268599732544, "grad_norm": 318.7864685058594, "learning_rate": 1.2589244999794947e-06, "loss": 9.5164, "step": 396090 }, { "epoch": 0.8001470606059382, "grad_norm": 228.4259490966797, "learning_rate": 1.2586929191846453e-06, "loss": 18.3174, "step": 396100 }, { "epoch": 0.800167261238622, "grad_norm": 234.86070251464844, "learning_rate": 1.2584613566244713e-06, "loss": 21.9184, "step": 396110 }, { "epoch": 0.8001874618713058, "grad_norm": 374.68157958984375, "learning_rate": 1.2582298123001046e-06, "loss": 20.8922, "step": 396120 }, { "epoch": 0.8002076625039897, "grad_norm": 54.8770637512207, "learning_rate": 1.2579982862126722e-06, "loss": 9.7939, "step": 396130 }, { "epoch": 0.8002278631366735, "grad_norm": 804.0513916015625, "learning_rate": 1.2577667783633007e-06, "loss": 12.1743, "step": 396140 }, { "epoch": 0.8002480637693573, "grad_norm": 531.5899658203125, "learning_rate": 1.25753528875312e-06, "loss": 16.6296, "step": 396150 }, { "epoch": 0.800268264402041, "grad_norm": 277.38885498046875, "learning_rate": 1.2573038173832597e-06, "loss": 30.3747, "step": 396160 }, { "epoch": 0.8002884650347248, "grad_norm": 455.3837890625, "learning_rate": 1.2570723642548465e-06, "loss": 16.5166, "step": 396170 }, { "epoch": 0.8003086656674087, "grad_norm": 337.4343566894531, "learning_rate": 1.2568409293690077e-06, "loss": 12.9993, "step": 396180 }, { "epoch": 0.8003288663000925, "grad_norm": 39.89707946777344, "learning_rate": 1.2566095127268734e-06, "loss": 31.3835, "step": 396190 }, { "epoch": 0.8003490669327763, "grad_norm": 470.049072265625, "learning_rate": 1.2563781143295705e-06, "loss": 35.6281, "step": 396200 }, { "epoch": 0.8003692675654601, "grad_norm": 701.8652954101562, "learning_rate": 1.2561467341782247e-06, "loss": 23.626, "step": 396210 }, { "epoch": 0.8003894681981439, "grad_norm": 103.31476593017578, "learning_rate": 1.2559153722739658e-06, "loss": 16.6704, "step": 396220 }, { "epoch": 0.8004096688308278, "grad_norm": 75.45716094970703, "learning_rate": 1.2556840286179234e-06, "loss": 20.8166, "step": 396230 }, { "epoch": 0.8004298694635116, "grad_norm": 125.75484466552734, "learning_rate": 1.2554527032112224e-06, "loss": 5.9354, "step": 396240 }, { "epoch": 0.8004500700961954, "grad_norm": 353.441162109375, "learning_rate": 1.2552213960549891e-06, "loss": 14.4581, "step": 396250 }, { "epoch": 0.8004702707288792, "grad_norm": 245.49456787109375, "learning_rate": 1.254990107150354e-06, "loss": 8.6626, "step": 396260 }, { "epoch": 0.800490471361563, "grad_norm": 816.9166259765625, "learning_rate": 1.2547588364984431e-06, "loss": 23.9752, "step": 396270 }, { "epoch": 0.8005106719942469, "grad_norm": 785.3043823242188, "learning_rate": 1.2545275841003818e-06, "loss": 15.4153, "step": 396280 }, { "epoch": 0.8005308726269307, "grad_norm": 751.38232421875, "learning_rate": 1.2542963499573007e-06, "loss": 20.2525, "step": 396290 }, { "epoch": 0.8005510732596145, "grad_norm": 422.921875, "learning_rate": 1.2540651340703231e-06, "loss": 22.5981, "step": 396300 }, { "epoch": 0.8005712738922983, "grad_norm": 251.9776611328125, "learning_rate": 1.253833936440579e-06, "loss": 22.8785, "step": 396310 }, { "epoch": 0.8005914745249821, "grad_norm": 328.57470703125, "learning_rate": 1.2536027570691938e-06, "loss": 16.0463, "step": 396320 }, { "epoch": 0.800611675157666, "grad_norm": 490.6477966308594, "learning_rate": 1.2533715959572935e-06, "loss": 18.1275, "step": 396330 }, { "epoch": 0.8006318757903498, "grad_norm": 616.2587890625, "learning_rate": 1.253140453106007e-06, "loss": 33.4304, "step": 396340 }, { "epoch": 0.8006520764230336, "grad_norm": 308.67938232421875, "learning_rate": 1.2529093285164579e-06, "loss": 26.025, "step": 396350 }, { "epoch": 0.8006722770557174, "grad_norm": 473.9761047363281, "learning_rate": 1.2526782221897755e-06, "loss": 7.2248, "step": 396360 }, { "epoch": 0.8006924776884012, "grad_norm": 398.3085632324219, "learning_rate": 1.252447134127084e-06, "loss": 28.8853, "step": 396370 }, { "epoch": 0.8007126783210851, "grad_norm": 387.9682312011719, "learning_rate": 1.2522160643295112e-06, "loss": 9.0265, "step": 396380 }, { "epoch": 0.8007328789537689, "grad_norm": 279.651611328125, "learning_rate": 1.2519850127981836e-06, "loss": 12.125, "step": 396390 }, { "epoch": 0.8007530795864527, "grad_norm": 390.2597961425781, "learning_rate": 1.2517539795342248e-06, "loss": 22.5556, "step": 396400 }, { "epoch": 0.8007732802191365, "grad_norm": 231.67532348632812, "learning_rate": 1.2515229645387639e-06, "loss": 11.2884, "step": 396410 }, { "epoch": 0.8007934808518202, "grad_norm": 504.6092529296875, "learning_rate": 1.2512919678129254e-06, "loss": 10.5464, "step": 396420 }, { "epoch": 0.800813681484504, "grad_norm": 539.6251220703125, "learning_rate": 1.2510609893578335e-06, "loss": 32.6886, "step": 396430 }, { "epoch": 0.8008338821171879, "grad_norm": 903.2691650390625, "learning_rate": 1.2508300291746162e-06, "loss": 34.8789, "step": 396440 }, { "epoch": 0.8008540827498717, "grad_norm": 308.7208557128906, "learning_rate": 1.2505990872644008e-06, "loss": 17.5599, "step": 396450 }, { "epoch": 0.8008742833825555, "grad_norm": 523.7728271484375, "learning_rate": 1.2503681636283082e-06, "loss": 16.3667, "step": 396460 }, { "epoch": 0.8008944840152393, "grad_norm": 218.46133422851562, "learning_rate": 1.2501372582674665e-06, "loss": 12.6188, "step": 396470 }, { "epoch": 0.8009146846479231, "grad_norm": 177.7940216064453, "learning_rate": 1.2499063711830023e-06, "loss": 16.5464, "step": 396480 }, { "epoch": 0.800934885280607, "grad_norm": 101.02649688720703, "learning_rate": 1.2496755023760398e-06, "loss": 24.4657, "step": 396490 }, { "epoch": 0.8009550859132908, "grad_norm": 402.1471862792969, "learning_rate": 1.2494446518477022e-06, "loss": 12.5647, "step": 396500 }, { "epoch": 0.8009752865459746, "grad_norm": 430.8567810058594, "learning_rate": 1.249213819599117e-06, "loss": 13.368, "step": 396510 }, { "epoch": 0.8009954871786584, "grad_norm": 354.9683837890625, "learning_rate": 1.248983005631411e-06, "loss": 15.4742, "step": 396520 }, { "epoch": 0.8010156878113422, "grad_norm": 836.7166748046875, "learning_rate": 1.2487522099457044e-06, "loss": 26.8622, "step": 396530 }, { "epoch": 0.8010358884440261, "grad_norm": 420.2633361816406, "learning_rate": 1.248521432543125e-06, "loss": 29.0428, "step": 396540 }, { "epoch": 0.8010560890767099, "grad_norm": 335.16082763671875, "learning_rate": 1.248290673424798e-06, "loss": 11.3361, "step": 396550 }, { "epoch": 0.8010762897093937, "grad_norm": 80.3372802734375, "learning_rate": 1.2480599325918474e-06, "loss": 10.0713, "step": 396560 }, { "epoch": 0.8010964903420775, "grad_norm": 36.19289016723633, "learning_rate": 1.247829210045396e-06, "loss": 32.7652, "step": 396570 }, { "epoch": 0.8011166909747613, "grad_norm": 646.9605102539062, "learning_rate": 1.2475985057865714e-06, "loss": 17.6624, "step": 396580 }, { "epoch": 0.8011368916074452, "grad_norm": 172.8927459716797, "learning_rate": 1.2473678198164967e-06, "loss": 20.7343, "step": 396590 }, { "epoch": 0.801157092240129, "grad_norm": 106.66670989990234, "learning_rate": 1.2471371521362946e-06, "loss": 15.0763, "step": 396600 }, { "epoch": 0.8011772928728128, "grad_norm": 533.6571044921875, "learning_rate": 1.2469065027470923e-06, "loss": 21.8675, "step": 396610 }, { "epoch": 0.8011974935054966, "grad_norm": 0.0, "learning_rate": 1.246675871650011e-06, "loss": 11.3138, "step": 396620 }, { "epoch": 0.8012176941381804, "grad_norm": 320.3057556152344, "learning_rate": 1.2464452588461778e-06, "loss": 8.236, "step": 396630 }, { "epoch": 0.8012378947708643, "grad_norm": 307.5802307128906, "learning_rate": 1.2462146643367156e-06, "loss": 18.8169, "step": 396640 }, { "epoch": 0.8012580954035481, "grad_norm": 136.44705200195312, "learning_rate": 1.2459840881227458e-06, "loss": 20.7404, "step": 396650 }, { "epoch": 0.8012782960362319, "grad_norm": 513.2805786132812, "learning_rate": 1.2457535302053957e-06, "loss": 12.792, "step": 396660 }, { "epoch": 0.8012984966689156, "grad_norm": 334.1730041503906, "learning_rate": 1.2455229905857863e-06, "loss": 12.3761, "step": 396670 }, { "epoch": 0.8013186973015994, "grad_norm": 482.0895690917969, "learning_rate": 1.2452924692650443e-06, "loss": 20.5428, "step": 396680 }, { "epoch": 0.8013388979342833, "grad_norm": 203.59678649902344, "learning_rate": 1.2450619662442892e-06, "loss": 10.6843, "step": 396690 }, { "epoch": 0.8013590985669671, "grad_norm": 435.9341735839844, "learning_rate": 1.2448314815246487e-06, "loss": 13.401, "step": 396700 }, { "epoch": 0.8013792991996509, "grad_norm": 315.55419921875, "learning_rate": 1.244601015107244e-06, "loss": 20.2064, "step": 396710 }, { "epoch": 0.8013994998323347, "grad_norm": 574.0961303710938, "learning_rate": 1.2443705669931966e-06, "loss": 11.4189, "step": 396720 }, { "epoch": 0.8014197004650185, "grad_norm": 1980.1788330078125, "learning_rate": 1.2441401371836337e-06, "loss": 55.0449, "step": 396730 }, { "epoch": 0.8014399010977024, "grad_norm": 248.682373046875, "learning_rate": 1.2439097256796756e-06, "loss": 24.5463, "step": 396740 }, { "epoch": 0.8014601017303862, "grad_norm": 281.3458557128906, "learning_rate": 1.2436793324824448e-06, "loss": 26.7099, "step": 396750 }, { "epoch": 0.80148030236307, "grad_norm": 236.09158325195312, "learning_rate": 1.2434489575930652e-06, "loss": 7.0798, "step": 396760 }, { "epoch": 0.8015005029957538, "grad_norm": 374.0956115722656, "learning_rate": 1.2432186010126613e-06, "loss": 13.3354, "step": 396770 }, { "epoch": 0.8015207036284376, "grad_norm": 448.5460510253906, "learning_rate": 1.2429882627423545e-06, "loss": 22.8503, "step": 396780 }, { "epoch": 0.8015409042611215, "grad_norm": 385.36029052734375, "learning_rate": 1.2427579427832654e-06, "loss": 18.5393, "step": 396790 }, { "epoch": 0.8015611048938053, "grad_norm": 280.7149353027344, "learning_rate": 1.24252764113652e-06, "loss": 19.2721, "step": 396800 }, { "epoch": 0.8015813055264891, "grad_norm": 190.941162109375, "learning_rate": 1.2422973578032394e-06, "loss": 15.9896, "step": 396810 }, { "epoch": 0.8016015061591729, "grad_norm": 249.75369262695312, "learning_rate": 1.2420670927845441e-06, "loss": 27.7864, "step": 396820 }, { "epoch": 0.8016217067918567, "grad_norm": 50.38788986206055, "learning_rate": 1.2418368460815578e-06, "loss": 16.6244, "step": 396830 }, { "epoch": 0.8016419074245406, "grad_norm": 477.87506103515625, "learning_rate": 1.2416066176954044e-06, "loss": 15.3904, "step": 396840 }, { "epoch": 0.8016621080572244, "grad_norm": 497.0939025878906, "learning_rate": 1.241376407627205e-06, "loss": 27.108, "step": 396850 }, { "epoch": 0.8016823086899082, "grad_norm": 310.3155212402344, "learning_rate": 1.2411462158780791e-06, "loss": 28.3227, "step": 396860 }, { "epoch": 0.801702509322592, "grad_norm": 265.06317138671875, "learning_rate": 1.2409160424491524e-06, "loss": 13.765, "step": 396870 }, { "epoch": 0.8017227099552758, "grad_norm": 237.19984436035156, "learning_rate": 1.240685887341545e-06, "loss": 16.64, "step": 396880 }, { "epoch": 0.8017429105879597, "grad_norm": 438.68988037109375, "learning_rate": 1.240455750556377e-06, "loss": 22.226, "step": 396890 }, { "epoch": 0.8017631112206435, "grad_norm": 262.5321044921875, "learning_rate": 1.240225632094773e-06, "loss": 21.8872, "step": 396900 }, { "epoch": 0.8017833118533273, "grad_norm": 634.115966796875, "learning_rate": 1.2399955319578521e-06, "loss": 28.4038, "step": 396910 }, { "epoch": 0.8018035124860111, "grad_norm": 63.45467758178711, "learning_rate": 1.2397654501467387e-06, "loss": 18.5422, "step": 396920 }, { "epoch": 0.8018237131186948, "grad_norm": 605.983642578125, "learning_rate": 1.2395353866625521e-06, "loss": 17.3402, "step": 396930 }, { "epoch": 0.8018439137513786, "grad_norm": 199.43296813964844, "learning_rate": 1.2393053415064121e-06, "loss": 11.353, "step": 396940 }, { "epoch": 0.8018641143840625, "grad_norm": 97.341796875, "learning_rate": 1.2390753146794438e-06, "loss": 20.3572, "step": 396950 }, { "epoch": 0.8018843150167463, "grad_norm": 197.81533813476562, "learning_rate": 1.2388453061827644e-06, "loss": 14.9244, "step": 396960 }, { "epoch": 0.8019045156494301, "grad_norm": 1986.1629638671875, "learning_rate": 1.2386153160174986e-06, "loss": 25.1023, "step": 396970 }, { "epoch": 0.8019247162821139, "grad_norm": 363.91241455078125, "learning_rate": 1.2383853441847638e-06, "loss": 27.498, "step": 396980 }, { "epoch": 0.8019449169147977, "grad_norm": 460.5152893066406, "learning_rate": 1.2381553906856842e-06, "loss": 19.7326, "step": 396990 }, { "epoch": 0.8019651175474816, "grad_norm": 134.38975524902344, "learning_rate": 1.2379254555213788e-06, "loss": 23.5737, "step": 397000 }, { "epoch": 0.8019853181801654, "grad_norm": 89.52670288085938, "learning_rate": 1.2376955386929673e-06, "loss": 10.0938, "step": 397010 }, { "epoch": 0.8020055188128492, "grad_norm": 277.80548095703125, "learning_rate": 1.2374656402015728e-06, "loss": 21.5251, "step": 397020 }, { "epoch": 0.802025719445533, "grad_norm": 438.4913024902344, "learning_rate": 1.2372357600483142e-06, "loss": 20.0417, "step": 397030 }, { "epoch": 0.8020459200782168, "grad_norm": 557.4075317382812, "learning_rate": 1.2370058982343109e-06, "loss": 25.3092, "step": 397040 }, { "epoch": 0.8020661207109007, "grad_norm": 263.2080383300781, "learning_rate": 1.2367760547606844e-06, "loss": 13.6746, "step": 397050 }, { "epoch": 0.8020863213435845, "grad_norm": 215.56797790527344, "learning_rate": 1.236546229628558e-06, "loss": 12.948, "step": 397060 }, { "epoch": 0.8021065219762683, "grad_norm": 32.382484436035156, "learning_rate": 1.2363164228390456e-06, "loss": 21.1817, "step": 397070 }, { "epoch": 0.8021267226089521, "grad_norm": 313.60711669921875, "learning_rate": 1.236086634393271e-06, "loss": 6.7464, "step": 397080 }, { "epoch": 0.802146923241636, "grad_norm": 342.4676208496094, "learning_rate": 1.2358568642923546e-06, "loss": 12.7703, "step": 397090 }, { "epoch": 0.8021671238743198, "grad_norm": 247.1126708984375, "learning_rate": 1.2356271125374153e-06, "loss": 19.5519, "step": 397100 }, { "epoch": 0.8021873245070036, "grad_norm": 73.73995971679688, "learning_rate": 1.2353973791295715e-06, "loss": 18.9362, "step": 397110 }, { "epoch": 0.8022075251396874, "grad_norm": 462.5740051269531, "learning_rate": 1.2351676640699444e-06, "loss": 30.8039, "step": 397120 }, { "epoch": 0.8022277257723712, "grad_norm": 273.22747802734375, "learning_rate": 1.2349379673596568e-06, "loss": 8.9714, "step": 397130 }, { "epoch": 0.802247926405055, "grad_norm": 566.9596557617188, "learning_rate": 1.2347082889998214e-06, "loss": 15.8455, "step": 397140 }, { "epoch": 0.8022681270377389, "grad_norm": 492.3504638671875, "learning_rate": 1.234478628991561e-06, "loss": 18.6748, "step": 397150 }, { "epoch": 0.8022883276704227, "grad_norm": 333.3385314941406, "learning_rate": 1.234248987335997e-06, "loss": 16.2732, "step": 397160 }, { "epoch": 0.8023085283031065, "grad_norm": 222.15713500976562, "learning_rate": 1.234019364034247e-06, "loss": 15.5507, "step": 397170 }, { "epoch": 0.8023287289357902, "grad_norm": 364.03106689453125, "learning_rate": 1.2337897590874275e-06, "loss": 29.7851, "step": 397180 }, { "epoch": 0.802348929568474, "grad_norm": 205.8481903076172, "learning_rate": 1.2335601724966617e-06, "loss": 12.525, "step": 397190 }, { "epoch": 0.8023691302011579, "grad_norm": 452.48822021484375, "learning_rate": 1.2333306042630672e-06, "loss": 20.0239, "step": 397200 }, { "epoch": 0.8023893308338417, "grad_norm": 397.8550720214844, "learning_rate": 1.2331010543877608e-06, "loss": 29.5752, "step": 397210 }, { "epoch": 0.8024095314665255, "grad_norm": 351.6863098144531, "learning_rate": 1.232871522871864e-06, "loss": 23.35, "step": 397220 }, { "epoch": 0.8024297320992093, "grad_norm": 88.65419006347656, "learning_rate": 1.2326420097164938e-06, "loss": 17.8378, "step": 397230 }, { "epoch": 0.8024499327318931, "grad_norm": 241.8977508544922, "learning_rate": 1.2324125149227705e-06, "loss": 22.2992, "step": 397240 }, { "epoch": 0.802470133364577, "grad_norm": 263.2344970703125, "learning_rate": 1.2321830384918116e-06, "loss": 18.148, "step": 397250 }, { "epoch": 0.8024903339972608, "grad_norm": 130.0895538330078, "learning_rate": 1.2319535804247345e-06, "loss": 6.7968, "step": 397260 }, { "epoch": 0.8025105346299446, "grad_norm": 520.0706787109375, "learning_rate": 1.2317241407226598e-06, "loss": 26.9545, "step": 397270 }, { "epoch": 0.8025307352626284, "grad_norm": 229.78622436523438, "learning_rate": 1.2314947193867034e-06, "loss": 9.8173, "step": 397280 }, { "epoch": 0.8025509358953122, "grad_norm": 122.68806457519531, "learning_rate": 1.2312653164179861e-06, "loss": 9.8707, "step": 397290 }, { "epoch": 0.802571136527996, "grad_norm": 321.5652770996094, "learning_rate": 1.2310359318176229e-06, "loss": 17.3135, "step": 397300 }, { "epoch": 0.8025913371606799, "grad_norm": 214.66514587402344, "learning_rate": 1.2308065655867346e-06, "loss": 20.6322, "step": 397310 }, { "epoch": 0.8026115377933637, "grad_norm": 366.59429931640625, "learning_rate": 1.2305772177264385e-06, "loss": 11.4024, "step": 397320 }, { "epoch": 0.8026317384260475, "grad_norm": 274.4979553222656, "learning_rate": 1.2303478882378506e-06, "loss": 16.8746, "step": 397330 }, { "epoch": 0.8026519390587313, "grad_norm": 280.9788513183594, "learning_rate": 1.2301185771220907e-06, "loss": 40.8339, "step": 397340 }, { "epoch": 0.8026721396914152, "grad_norm": 299.7403564453125, "learning_rate": 1.2298892843802756e-06, "loss": 17.0093, "step": 397350 }, { "epoch": 0.802692340324099, "grad_norm": 426.50408935546875, "learning_rate": 1.2296600100135219e-06, "loss": 27.7678, "step": 397360 }, { "epoch": 0.8027125409567828, "grad_norm": 266.2039794921875, "learning_rate": 1.2294307540229478e-06, "loss": 19.4155, "step": 397370 }, { "epoch": 0.8027327415894666, "grad_norm": 487.9114685058594, "learning_rate": 1.2292015164096726e-06, "loss": 15.9295, "step": 397380 }, { "epoch": 0.8027529422221504, "grad_norm": 229.36537170410156, "learning_rate": 1.2289722971748113e-06, "loss": 17.6065, "step": 397390 }, { "epoch": 0.8027731428548343, "grad_norm": 176.56796264648438, "learning_rate": 1.2287430963194807e-06, "loss": 22.1076, "step": 397400 }, { "epoch": 0.8027933434875181, "grad_norm": 325.32183837890625, "learning_rate": 1.2285139138448005e-06, "loss": 11.1035, "step": 397410 }, { "epoch": 0.8028135441202019, "grad_norm": 242.32443237304688, "learning_rate": 1.2282847497518857e-06, "loss": 24.5044, "step": 397420 }, { "epoch": 0.8028337447528857, "grad_norm": 234.795166015625, "learning_rate": 1.2280556040418517e-06, "loss": 18.323, "step": 397430 }, { "epoch": 0.8028539453855694, "grad_norm": 216.63821411132812, "learning_rate": 1.2278264767158176e-06, "loss": 29.2998, "step": 397440 }, { "epoch": 0.8028741460182532, "grad_norm": 355.3334655761719, "learning_rate": 1.2275973677749015e-06, "loss": 8.1372, "step": 397450 }, { "epoch": 0.8028943466509371, "grad_norm": 280.875, "learning_rate": 1.2273682772202183e-06, "loss": 18.087, "step": 397460 }, { "epoch": 0.8029145472836209, "grad_norm": 353.6507873535156, "learning_rate": 1.2271392050528825e-06, "loss": 24.4111, "step": 397470 }, { "epoch": 0.8029347479163047, "grad_norm": 532.7893676757812, "learning_rate": 1.2269101512740145e-06, "loss": 26.5952, "step": 397480 }, { "epoch": 0.8029549485489885, "grad_norm": 314.47088623046875, "learning_rate": 1.2266811158847285e-06, "loss": 13.219, "step": 397490 }, { "epoch": 0.8029751491816723, "grad_norm": 138.06919860839844, "learning_rate": 1.22645209888614e-06, "loss": 17.6856, "step": 397500 }, { "epoch": 0.8029953498143562, "grad_norm": 666.2764892578125, "learning_rate": 1.226223100279368e-06, "loss": 19.3284, "step": 397510 }, { "epoch": 0.80301555044704, "grad_norm": 509.1011962890625, "learning_rate": 1.2259941200655246e-06, "loss": 8.9179, "step": 397520 }, { "epoch": 0.8030357510797238, "grad_norm": 386.4242248535156, "learning_rate": 1.2257651582457302e-06, "loss": 20.7277, "step": 397530 }, { "epoch": 0.8030559517124076, "grad_norm": 325.4468688964844, "learning_rate": 1.2255362148210987e-06, "loss": 18.3119, "step": 397540 }, { "epoch": 0.8030761523450914, "grad_norm": 291.003662109375, "learning_rate": 1.2253072897927437e-06, "loss": 12.979, "step": 397550 }, { "epoch": 0.8030963529777753, "grad_norm": 302.1463928222656, "learning_rate": 1.2250783831617852e-06, "loss": 9.8737, "step": 397560 }, { "epoch": 0.8031165536104591, "grad_norm": 126.8495101928711, "learning_rate": 1.2248494949293354e-06, "loss": 31.0371, "step": 397570 }, { "epoch": 0.8031367542431429, "grad_norm": 303.8444519042969, "learning_rate": 1.2246206250965127e-06, "loss": 23.4929, "step": 397580 }, { "epoch": 0.8031569548758267, "grad_norm": 367.688720703125, "learning_rate": 1.2243917736644296e-06, "loss": 13.9533, "step": 397590 }, { "epoch": 0.8031771555085105, "grad_norm": 562.4437255859375, "learning_rate": 1.2241629406342048e-06, "loss": 34.8172, "step": 397600 }, { "epoch": 0.8031973561411944, "grad_norm": 373.95379638671875, "learning_rate": 1.2239341260069516e-06, "loss": 20.0785, "step": 397610 }, { "epoch": 0.8032175567738782, "grad_norm": 217.52337646484375, "learning_rate": 1.2237053297837841e-06, "loss": 18.1145, "step": 397620 }, { "epoch": 0.803237757406562, "grad_norm": 330.6252136230469, "learning_rate": 1.2234765519658204e-06, "loss": 17.3215, "step": 397630 }, { "epoch": 0.8032579580392458, "grad_norm": 572.371826171875, "learning_rate": 1.2232477925541736e-06, "loss": 21.833, "step": 397640 }, { "epoch": 0.8032781586719296, "grad_norm": 145.16079711914062, "learning_rate": 1.223019051549958e-06, "loss": 26.6915, "step": 397650 }, { "epoch": 0.8032983593046135, "grad_norm": 128.88552856445312, "learning_rate": 1.2227903289542892e-06, "loss": 11.6647, "step": 397660 }, { "epoch": 0.8033185599372973, "grad_norm": 228.22393798828125, "learning_rate": 1.2225616247682848e-06, "loss": 16.5292, "step": 397670 }, { "epoch": 0.8033387605699811, "grad_norm": 77.66017150878906, "learning_rate": 1.2223329389930544e-06, "loss": 25.5775, "step": 397680 }, { "epoch": 0.8033589612026649, "grad_norm": 143.2098846435547, "learning_rate": 1.2221042716297148e-06, "loss": 9.8505, "step": 397690 }, { "epoch": 0.8033791618353486, "grad_norm": 935.0286254882812, "learning_rate": 1.2218756226793827e-06, "loss": 15.5926, "step": 397700 }, { "epoch": 0.8033993624680325, "grad_norm": 279.19903564453125, "learning_rate": 1.22164699214317e-06, "loss": 17.6214, "step": 397710 }, { "epoch": 0.8034195631007163, "grad_norm": 439.5152282714844, "learning_rate": 1.2214183800221906e-06, "loss": 29.815, "step": 397720 }, { "epoch": 0.8034397637334001, "grad_norm": 267.2311096191406, "learning_rate": 1.2211897863175597e-06, "loss": 23.3302, "step": 397730 }, { "epoch": 0.8034599643660839, "grad_norm": 328.5450439453125, "learning_rate": 1.2209612110303941e-06, "loss": 27.6764, "step": 397740 }, { "epoch": 0.8034801649987677, "grad_norm": 0.0, "learning_rate": 1.2207326541618024e-06, "loss": 16.7296, "step": 397750 }, { "epoch": 0.8035003656314516, "grad_norm": 427.4633483886719, "learning_rate": 1.2205041157129017e-06, "loss": 20.971, "step": 397760 }, { "epoch": 0.8035205662641354, "grad_norm": 569.0933837890625, "learning_rate": 1.2202755956848067e-06, "loss": 13.5047, "step": 397770 }, { "epoch": 0.8035407668968192, "grad_norm": 410.8171691894531, "learning_rate": 1.2200470940786302e-06, "loss": 14.1582, "step": 397780 }, { "epoch": 0.803560967529503, "grad_norm": 77.08499908447266, "learning_rate": 1.219818610895484e-06, "loss": 17.0055, "step": 397790 }, { "epoch": 0.8035811681621868, "grad_norm": 594.0213623046875, "learning_rate": 1.2195901461364851e-06, "loss": 12.5723, "step": 397800 }, { "epoch": 0.8036013687948707, "grad_norm": 128.66441345214844, "learning_rate": 1.2193616998027452e-06, "loss": 15.1553, "step": 397810 }, { "epoch": 0.8036215694275545, "grad_norm": 294.89154052734375, "learning_rate": 1.2191332718953763e-06, "loss": 15.3579, "step": 397820 }, { "epoch": 0.8036417700602383, "grad_norm": 577.250732421875, "learning_rate": 1.2189048624154948e-06, "loss": 20.3436, "step": 397830 }, { "epoch": 0.8036619706929221, "grad_norm": 734.5863647460938, "learning_rate": 1.2186764713642108e-06, "loss": 18.3468, "step": 397840 }, { "epoch": 0.8036821713256059, "grad_norm": 290.3452453613281, "learning_rate": 1.218448098742641e-06, "loss": 13.6241, "step": 397850 }, { "epoch": 0.8037023719582898, "grad_norm": 194.62033081054688, "learning_rate": 1.2182197445518946e-06, "loss": 19.5496, "step": 397860 }, { "epoch": 0.8037225725909736, "grad_norm": 214.85061645507812, "learning_rate": 1.2179914087930884e-06, "loss": 15.1666, "step": 397870 }, { "epoch": 0.8037427732236574, "grad_norm": 273.3961181640625, "learning_rate": 1.2177630914673327e-06, "loss": 16.2342, "step": 397880 }, { "epoch": 0.8037629738563412, "grad_norm": 601.1372680664062, "learning_rate": 1.2175347925757397e-06, "loss": 45.6443, "step": 397890 }, { "epoch": 0.803783174489025, "grad_norm": 984.5829467773438, "learning_rate": 1.217306512119425e-06, "loss": 20.6048, "step": 397900 }, { "epoch": 0.8038033751217089, "grad_norm": 319.5831298828125, "learning_rate": 1.2170782500994983e-06, "loss": 27.8044, "step": 397910 }, { "epoch": 0.8038235757543927, "grad_norm": 169.63516235351562, "learning_rate": 1.2168500065170747e-06, "loss": 12.06, "step": 397920 }, { "epoch": 0.8038437763870765, "grad_norm": 168.23828125, "learning_rate": 1.216621781373265e-06, "loss": 26.9941, "step": 397930 }, { "epoch": 0.8038639770197603, "grad_norm": 262.0617370605469, "learning_rate": 1.2163935746691807e-06, "loss": 18.2955, "step": 397940 }, { "epoch": 0.803884177652444, "grad_norm": 265.5741271972656, "learning_rate": 1.216165386405937e-06, "loss": 17.8595, "step": 397950 }, { "epoch": 0.8039043782851278, "grad_norm": 483.8246154785156, "learning_rate": 1.215937216584644e-06, "loss": 20.2787, "step": 397960 }, { "epoch": 0.8039245789178117, "grad_norm": 505.77178955078125, "learning_rate": 1.2157090652064124e-06, "loss": 21.4979, "step": 397970 }, { "epoch": 0.8039447795504955, "grad_norm": 43.7061653137207, "learning_rate": 1.215480932272356e-06, "loss": 9.9025, "step": 397980 }, { "epoch": 0.8039649801831793, "grad_norm": 357.3522033691406, "learning_rate": 1.2152528177835892e-06, "loss": 16.7999, "step": 397990 }, { "epoch": 0.8039851808158631, "grad_norm": 564.4266967773438, "learning_rate": 1.2150247217412186e-06, "loss": 12.643, "step": 398000 }, { "epoch": 0.8040053814485469, "grad_norm": 115.50165557861328, "learning_rate": 1.2147966441463583e-06, "loss": 19.1571, "step": 398010 }, { "epoch": 0.8040255820812308, "grad_norm": 363.2052917480469, "learning_rate": 1.2145685850001216e-06, "loss": 15.7835, "step": 398020 }, { "epoch": 0.8040457827139146, "grad_norm": 247.6719970703125, "learning_rate": 1.2143405443036182e-06, "loss": 14.8488, "step": 398030 }, { "epoch": 0.8040659833465984, "grad_norm": 190.08120727539062, "learning_rate": 1.2141125220579585e-06, "loss": 9.337, "step": 398040 }, { "epoch": 0.8040861839792822, "grad_norm": 444.0274963378906, "learning_rate": 1.2138845182642555e-06, "loss": 21.3661, "step": 398050 }, { "epoch": 0.804106384611966, "grad_norm": 364.2178649902344, "learning_rate": 1.2136565329236217e-06, "loss": 13.6055, "step": 398060 }, { "epoch": 0.8041265852446499, "grad_norm": 142.36663818359375, "learning_rate": 1.2134285660371665e-06, "loss": 6.8863, "step": 398070 }, { "epoch": 0.8041467858773337, "grad_norm": 478.2822265625, "learning_rate": 1.2132006176059997e-06, "loss": 19.6823, "step": 398080 }, { "epoch": 0.8041669865100175, "grad_norm": 169.50241088867188, "learning_rate": 1.2129726876312348e-06, "loss": 24.8239, "step": 398090 }, { "epoch": 0.8041871871427013, "grad_norm": 357.72119140625, "learning_rate": 1.2127447761139821e-06, "loss": 15.6783, "step": 398100 }, { "epoch": 0.8042073877753851, "grad_norm": 366.50384521484375, "learning_rate": 1.2125168830553508e-06, "loss": 27.5083, "step": 398110 }, { "epoch": 0.804227588408069, "grad_norm": 204.8233642578125, "learning_rate": 1.2122890084564542e-06, "loss": 12.7033, "step": 398120 }, { "epoch": 0.8042477890407528, "grad_norm": 649.1748046875, "learning_rate": 1.2120611523184e-06, "loss": 18.9861, "step": 398130 }, { "epoch": 0.8042679896734366, "grad_norm": 341.6905212402344, "learning_rate": 1.2118333146423016e-06, "loss": 9.5192, "step": 398140 }, { "epoch": 0.8042881903061204, "grad_norm": 489.960205078125, "learning_rate": 1.2116054954292688e-06, "loss": 29.6769, "step": 398150 }, { "epoch": 0.8043083909388042, "grad_norm": 139.4357147216797, "learning_rate": 1.2113776946804096e-06, "loss": 36.3575, "step": 398160 }, { "epoch": 0.8043285915714881, "grad_norm": 448.1143493652344, "learning_rate": 1.2111499123968374e-06, "loss": 15.5382, "step": 398170 }, { "epoch": 0.8043487922041719, "grad_norm": 176.39828491210938, "learning_rate": 1.2109221485796592e-06, "loss": 25.3181, "step": 398180 }, { "epoch": 0.8043689928368557, "grad_norm": 456.3003234863281, "learning_rate": 1.210694403229989e-06, "loss": 14.8564, "step": 398190 }, { "epoch": 0.8043891934695395, "grad_norm": 422.00701904296875, "learning_rate": 1.2104666763489326e-06, "loss": 13.6546, "step": 398200 }, { "epoch": 0.8044093941022232, "grad_norm": 217.01031494140625, "learning_rate": 1.2102389679376037e-06, "loss": 19.9483, "step": 398210 }, { "epoch": 0.804429594734907, "grad_norm": 247.6773223876953, "learning_rate": 1.2100112779971107e-06, "loss": 16.3041, "step": 398220 }, { "epoch": 0.8044497953675909, "grad_norm": 275.5755310058594, "learning_rate": 1.2097836065285611e-06, "loss": 13.8357, "step": 398230 }, { "epoch": 0.8044699960002747, "grad_norm": 30.836162567138672, "learning_rate": 1.2095559535330681e-06, "loss": 5.0338, "step": 398240 }, { "epoch": 0.8044901966329585, "grad_norm": 5.109502792358398, "learning_rate": 1.20932831901174e-06, "loss": 13.3135, "step": 398250 }, { "epoch": 0.8045103972656423, "grad_norm": 451.49810791015625, "learning_rate": 1.2091007029656843e-06, "loss": 13.8472, "step": 398260 }, { "epoch": 0.8045305978983261, "grad_norm": 332.7293395996094, "learning_rate": 1.2088731053960118e-06, "loss": 14.8156, "step": 398270 }, { "epoch": 0.80455079853101, "grad_norm": 275.114013671875, "learning_rate": 1.2086455263038349e-06, "loss": 11.0916, "step": 398280 }, { "epoch": 0.8045709991636938, "grad_norm": 233.9296875, "learning_rate": 1.2084179656902573e-06, "loss": 20.5777, "step": 398290 }, { "epoch": 0.8045911997963776, "grad_norm": 264.5608215332031, "learning_rate": 1.2081904235563908e-06, "loss": 18.5516, "step": 398300 }, { "epoch": 0.8046114004290614, "grad_norm": 252.69302368164062, "learning_rate": 1.2079628999033449e-06, "loss": 24.9003, "step": 398310 }, { "epoch": 0.8046316010617452, "grad_norm": 644.6895141601562, "learning_rate": 1.2077353947322284e-06, "loss": 19.7171, "step": 398320 }, { "epoch": 0.8046518016944291, "grad_norm": 223.94583129882812, "learning_rate": 1.2075079080441482e-06, "loss": 13.4161, "step": 398330 }, { "epoch": 0.8046720023271129, "grad_norm": 150.11373901367188, "learning_rate": 1.207280439840215e-06, "loss": 9.5801, "step": 398340 }, { "epoch": 0.8046922029597967, "grad_norm": 111.24942779541016, "learning_rate": 1.2070529901215388e-06, "loss": 19.0224, "step": 398350 }, { "epoch": 0.8047124035924805, "grad_norm": 307.4959716796875, "learning_rate": 1.206825558889224e-06, "loss": 34.8143, "step": 398360 }, { "epoch": 0.8047326042251643, "grad_norm": 467.3067932128906, "learning_rate": 1.2065981461443815e-06, "loss": 13.0586, "step": 398370 }, { "epoch": 0.8047528048578482, "grad_norm": 515.9276733398438, "learning_rate": 1.2063707518881207e-06, "loss": 13.4302, "step": 398380 }, { "epoch": 0.804773005490532, "grad_norm": 370.0353088378906, "learning_rate": 1.206143376121549e-06, "loss": 27.9527, "step": 398390 }, { "epoch": 0.8047932061232158, "grad_norm": 265.7731628417969, "learning_rate": 1.2059160188457724e-06, "loss": 13.6677, "step": 398400 }, { "epoch": 0.8048134067558996, "grad_norm": 647.421142578125, "learning_rate": 1.2056886800619028e-06, "loss": 23.1429, "step": 398410 }, { "epoch": 0.8048336073885834, "grad_norm": 576.6586303710938, "learning_rate": 1.2054613597710463e-06, "loss": 13.7681, "step": 398420 }, { "epoch": 0.8048538080212673, "grad_norm": 287.4187316894531, "learning_rate": 1.2052340579743093e-06, "loss": 9.2779, "step": 398430 }, { "epoch": 0.8048740086539511, "grad_norm": 80.31151580810547, "learning_rate": 1.2050067746728033e-06, "loss": 15.723, "step": 398440 }, { "epoch": 0.8048942092866349, "grad_norm": 610.2584228515625, "learning_rate": 1.2047795098676317e-06, "loss": 17.7203, "step": 398450 }, { "epoch": 0.8049144099193186, "grad_norm": 503.1271057128906, "learning_rate": 1.2045522635599066e-06, "loss": 19.5396, "step": 398460 }, { "epoch": 0.8049346105520024, "grad_norm": 115.04306030273438, "learning_rate": 1.204325035750732e-06, "loss": 16.2918, "step": 398470 }, { "epoch": 0.8049548111846863, "grad_norm": 187.89588928222656, "learning_rate": 1.204097826441218e-06, "loss": 19.2752, "step": 398480 }, { "epoch": 0.8049750118173701, "grad_norm": 167.96168518066406, "learning_rate": 1.2038706356324703e-06, "loss": 19.6608, "step": 398490 }, { "epoch": 0.8049952124500539, "grad_norm": 274.8702087402344, "learning_rate": 1.203643463325596e-06, "loss": 11.9754, "step": 398500 }, { "epoch": 0.8050154130827377, "grad_norm": 669.0701293945312, "learning_rate": 1.2034163095217045e-06, "loss": 15.9731, "step": 398510 }, { "epoch": 0.8050356137154215, "grad_norm": 333.1326599121094, "learning_rate": 1.2031891742218992e-06, "loss": 20.093, "step": 398520 }, { "epoch": 0.8050558143481054, "grad_norm": 161.7942352294922, "learning_rate": 1.2029620574272916e-06, "loss": 16.4859, "step": 398530 }, { "epoch": 0.8050760149807892, "grad_norm": 116.2698974609375, "learning_rate": 1.2027349591389858e-06, "loss": 10.9212, "step": 398540 }, { "epoch": 0.805096215613473, "grad_norm": 209.771728515625, "learning_rate": 1.2025078793580885e-06, "loss": 16.7754, "step": 398550 }, { "epoch": 0.8051164162461568, "grad_norm": 156.68161010742188, "learning_rate": 1.202280818085708e-06, "loss": 12.0347, "step": 398560 }, { "epoch": 0.8051366168788406, "grad_norm": 148.68417358398438, "learning_rate": 1.2020537753229506e-06, "loss": 27.6422, "step": 398570 }, { "epoch": 0.8051568175115245, "grad_norm": 169.93348693847656, "learning_rate": 1.2018267510709208e-06, "loss": 13.968, "step": 398580 }, { "epoch": 0.8051770181442083, "grad_norm": 184.7910614013672, "learning_rate": 1.201599745330727e-06, "loss": 15.3194, "step": 398590 }, { "epoch": 0.8051972187768921, "grad_norm": 248.0599822998047, "learning_rate": 1.2013727581034783e-06, "loss": 14.647, "step": 398600 }, { "epoch": 0.8052174194095759, "grad_norm": 319.2702941894531, "learning_rate": 1.201145789390275e-06, "loss": 17.0601, "step": 398610 }, { "epoch": 0.8052376200422597, "grad_norm": 216.8593292236328, "learning_rate": 1.2009188391922261e-06, "loss": 15.7776, "step": 398620 }, { "epoch": 0.8052578206749436, "grad_norm": 408.43060302734375, "learning_rate": 1.2006919075104396e-06, "loss": 23.2166, "step": 398630 }, { "epoch": 0.8052780213076274, "grad_norm": 27.785552978515625, "learning_rate": 1.20046499434602e-06, "loss": 26.1633, "step": 398640 }, { "epoch": 0.8052982219403112, "grad_norm": 575.4810791015625, "learning_rate": 1.2002380997000717e-06, "loss": 24.9866, "step": 398650 }, { "epoch": 0.805318422572995, "grad_norm": 637.1857299804688, "learning_rate": 1.200011223573702e-06, "loss": 17.8723, "step": 398660 }, { "epoch": 0.8053386232056788, "grad_norm": 750.9044189453125, "learning_rate": 1.1997843659680202e-06, "loss": 21.2408, "step": 398670 }, { "epoch": 0.8053588238383627, "grad_norm": 129.53472900390625, "learning_rate": 1.1995575268841254e-06, "loss": 23.3928, "step": 398680 }, { "epoch": 0.8053790244710465, "grad_norm": 357.01812744140625, "learning_rate": 1.1993307063231258e-06, "loss": 19.5596, "step": 398690 }, { "epoch": 0.8053992251037303, "grad_norm": 7249.14892578125, "learning_rate": 1.199103904286129e-06, "loss": 32.026, "step": 398700 }, { "epoch": 0.8054194257364141, "grad_norm": 414.0624084472656, "learning_rate": 1.1988771207742388e-06, "loss": 20.9807, "step": 398710 }, { "epoch": 0.8054396263690978, "grad_norm": 689.0746459960938, "learning_rate": 1.1986503557885587e-06, "loss": 20.1095, "step": 398720 }, { "epoch": 0.8054598270017816, "grad_norm": 152.21286010742188, "learning_rate": 1.1984236093301976e-06, "loss": 19.3516, "step": 398730 }, { "epoch": 0.8054800276344655, "grad_norm": 255.29714965820312, "learning_rate": 1.1981968814002576e-06, "loss": 14.5699, "step": 398740 }, { "epoch": 0.8055002282671493, "grad_norm": 528.74560546875, "learning_rate": 1.1979701719998454e-06, "loss": 20.1821, "step": 398750 }, { "epoch": 0.8055204288998331, "grad_norm": 278.7982482910156, "learning_rate": 1.1977434811300664e-06, "loss": 16.4397, "step": 398760 }, { "epoch": 0.8055406295325169, "grad_norm": 609.253662109375, "learning_rate": 1.1975168087920226e-06, "loss": 13.1578, "step": 398770 }, { "epoch": 0.8055608301652007, "grad_norm": 339.8735656738281, "learning_rate": 1.1972901549868222e-06, "loss": 15.2729, "step": 398780 }, { "epoch": 0.8055810307978846, "grad_norm": 65.78746795654297, "learning_rate": 1.1970635197155671e-06, "loss": 24.322, "step": 398790 }, { "epoch": 0.8056012314305684, "grad_norm": 178.88465881347656, "learning_rate": 1.1968369029793642e-06, "loss": 22.919, "step": 398800 }, { "epoch": 0.8056214320632522, "grad_norm": 844.7586669921875, "learning_rate": 1.1966103047793158e-06, "loss": 24.4495, "step": 398810 }, { "epoch": 0.805641632695936, "grad_norm": 152.448486328125, "learning_rate": 1.196383725116529e-06, "loss": 24.2517, "step": 398820 }, { "epoch": 0.8056618333286198, "grad_norm": 318.3756408691406, "learning_rate": 1.1961571639921066e-06, "loss": 17.6758, "step": 398830 }, { "epoch": 0.8056820339613037, "grad_norm": 178.51841735839844, "learning_rate": 1.1959306214071508e-06, "loss": 42.8888, "step": 398840 }, { "epoch": 0.8057022345939875, "grad_norm": 289.67938232421875, "learning_rate": 1.1957040973627698e-06, "loss": 23.0294, "step": 398850 }, { "epoch": 0.8057224352266713, "grad_norm": 182.93289184570312, "learning_rate": 1.1954775918600658e-06, "loss": 15.1336, "step": 398860 }, { "epoch": 0.8057426358593551, "grad_norm": 261.6667785644531, "learning_rate": 1.1952511049001407e-06, "loss": 27.8058, "step": 398870 }, { "epoch": 0.805762836492039, "grad_norm": 906.7447509765625, "learning_rate": 1.1950246364841005e-06, "loss": 20.0649, "step": 398880 }, { "epoch": 0.8057830371247228, "grad_norm": 383.3402404785156, "learning_rate": 1.1947981866130515e-06, "loss": 9.2169, "step": 398890 }, { "epoch": 0.8058032377574066, "grad_norm": 560.3885498046875, "learning_rate": 1.1945717552880919e-06, "loss": 14.4276, "step": 398900 }, { "epoch": 0.8058234383900904, "grad_norm": 416.9820861816406, "learning_rate": 1.194345342510328e-06, "loss": 19.4729, "step": 398910 }, { "epoch": 0.8058436390227742, "grad_norm": 204.42552185058594, "learning_rate": 1.1941189482808645e-06, "loss": 13.8222, "step": 398920 }, { "epoch": 0.805863839655458, "grad_norm": 523.3088989257812, "learning_rate": 1.193892572600804e-06, "loss": 21.5251, "step": 398930 }, { "epoch": 0.8058840402881419, "grad_norm": 207.76956176757812, "learning_rate": 1.1936662154712475e-06, "loss": 9.1551, "step": 398940 }, { "epoch": 0.8059042409208257, "grad_norm": 422.6136169433594, "learning_rate": 1.193439876893301e-06, "loss": 12.6284, "step": 398950 }, { "epoch": 0.8059244415535095, "grad_norm": 859.187255859375, "learning_rate": 1.1932135568680691e-06, "loss": 30.7848, "step": 398960 }, { "epoch": 0.8059446421861932, "grad_norm": 398.545166015625, "learning_rate": 1.1929872553966497e-06, "loss": 13.7408, "step": 398970 }, { "epoch": 0.805964842818877, "grad_norm": 436.9729919433594, "learning_rate": 1.1927609724801492e-06, "loss": 24.225, "step": 398980 }, { "epoch": 0.8059850434515609, "grad_norm": 479.7727966308594, "learning_rate": 1.1925347081196709e-06, "loss": 14.9331, "step": 398990 }, { "epoch": 0.8060052440842447, "grad_norm": 510.92584228515625, "learning_rate": 1.1923084623163172e-06, "loss": 15.4965, "step": 399000 }, { "epoch": 0.8060254447169285, "grad_norm": 109.71276092529297, "learning_rate": 1.192082235071188e-06, "loss": 13.8335, "step": 399010 }, { "epoch": 0.8060456453496123, "grad_norm": 846.4888305664062, "learning_rate": 1.1918560263853902e-06, "loss": 21.4266, "step": 399020 }, { "epoch": 0.8060658459822961, "grad_norm": 16.963756561279297, "learning_rate": 1.1916298362600243e-06, "loss": 13.8133, "step": 399030 }, { "epoch": 0.80608604661498, "grad_norm": 550.7333984375, "learning_rate": 1.1914036646961907e-06, "loss": 17.8056, "step": 399040 }, { "epoch": 0.8061062472476638, "grad_norm": 31.84421730041504, "learning_rate": 1.1911775116949958e-06, "loss": 9.0356, "step": 399050 }, { "epoch": 0.8061264478803476, "grad_norm": 362.2513427734375, "learning_rate": 1.1909513772575383e-06, "loss": 13.9265, "step": 399060 }, { "epoch": 0.8061466485130314, "grad_norm": 150.13502502441406, "learning_rate": 1.1907252613849224e-06, "loss": 16.7787, "step": 399070 }, { "epoch": 0.8061668491457152, "grad_norm": 151.5859832763672, "learning_rate": 1.1904991640782487e-06, "loss": 20.1903, "step": 399080 }, { "epoch": 0.8061870497783991, "grad_norm": 353.0852966308594, "learning_rate": 1.190273085338622e-06, "loss": 16.6556, "step": 399090 }, { "epoch": 0.8062072504110829, "grad_norm": 341.1142578125, "learning_rate": 1.1900470251671415e-06, "loss": 19.0918, "step": 399100 }, { "epoch": 0.8062274510437667, "grad_norm": 312.563232421875, "learning_rate": 1.1898209835649083e-06, "loss": 13.2726, "step": 399110 }, { "epoch": 0.8062476516764505, "grad_norm": 8.31265926361084, "learning_rate": 1.189594960533027e-06, "loss": 11.9841, "step": 399120 }, { "epoch": 0.8062678523091343, "grad_norm": 277.1001892089844, "learning_rate": 1.1893689560725963e-06, "loss": 10.1152, "step": 399130 }, { "epoch": 0.8062880529418182, "grad_norm": 129.29551696777344, "learning_rate": 1.1891429701847207e-06, "loss": 19.1193, "step": 399140 }, { "epoch": 0.806308253574502, "grad_norm": 309.3577575683594, "learning_rate": 1.1889170028705e-06, "loss": 26.0598, "step": 399150 }, { "epoch": 0.8063284542071858, "grad_norm": 761.1845092773438, "learning_rate": 1.1886910541310342e-06, "loss": 20.8598, "step": 399160 }, { "epoch": 0.8063486548398696, "grad_norm": 447.2908935546875, "learning_rate": 1.1884651239674272e-06, "loss": 24.8396, "step": 399170 }, { "epoch": 0.8063688554725534, "grad_norm": 429.6351623535156, "learning_rate": 1.188239212380779e-06, "loss": 28.037, "step": 399180 }, { "epoch": 0.8063890561052373, "grad_norm": 589.7367553710938, "learning_rate": 1.1880133193721893e-06, "loss": 8.272, "step": 399190 }, { "epoch": 0.8064092567379211, "grad_norm": 292.37335205078125, "learning_rate": 1.18778744494276e-06, "loss": 13.0333, "step": 399200 }, { "epoch": 0.8064294573706049, "grad_norm": 313.3642272949219, "learning_rate": 1.1875615890935954e-06, "loss": 18.613, "step": 399210 }, { "epoch": 0.8064496580032887, "grad_norm": 112.42382049560547, "learning_rate": 1.1873357518257905e-06, "loss": 4.7573, "step": 399220 }, { "epoch": 0.8064698586359724, "grad_norm": 40.10010528564453, "learning_rate": 1.187109933140449e-06, "loss": 5.4787, "step": 399230 }, { "epoch": 0.8064900592686562, "grad_norm": 134.88690185546875, "learning_rate": 1.186884133038672e-06, "loss": 13.5613, "step": 399240 }, { "epoch": 0.8065102599013401, "grad_norm": 177.24685668945312, "learning_rate": 1.1866583515215597e-06, "loss": 14.959, "step": 399250 }, { "epoch": 0.8065304605340239, "grad_norm": 634.709716796875, "learning_rate": 1.18643258859021e-06, "loss": 18.0084, "step": 399260 }, { "epoch": 0.8065506611667077, "grad_norm": 275.6827697753906, "learning_rate": 1.1862068442457264e-06, "loss": 10.1867, "step": 399270 }, { "epoch": 0.8065708617993915, "grad_norm": 105.64037322998047, "learning_rate": 1.18598111848921e-06, "loss": 10.9114, "step": 399280 }, { "epoch": 0.8065910624320753, "grad_norm": 290.1683044433594, "learning_rate": 1.1857554113217568e-06, "loss": 23.3434, "step": 399290 }, { "epoch": 0.8066112630647592, "grad_norm": 15.911661148071289, "learning_rate": 1.185529722744469e-06, "loss": 14.5655, "step": 399300 }, { "epoch": 0.806631463697443, "grad_norm": 416.8236389160156, "learning_rate": 1.1853040527584475e-06, "loss": 16.1546, "step": 399310 }, { "epoch": 0.8066516643301268, "grad_norm": 84.52220153808594, "learning_rate": 1.185078401364792e-06, "loss": 19.1157, "step": 399320 }, { "epoch": 0.8066718649628106, "grad_norm": 414.135986328125, "learning_rate": 1.1848527685646e-06, "loss": 13.6018, "step": 399330 }, { "epoch": 0.8066920655954944, "grad_norm": 163.0150604248047, "learning_rate": 1.1846271543589743e-06, "loss": 17.9266, "step": 399340 }, { "epoch": 0.8067122662281783, "grad_norm": 361.9199523925781, "learning_rate": 1.1844015587490138e-06, "loss": 20.8676, "step": 399350 }, { "epoch": 0.8067324668608621, "grad_norm": 497.72503662109375, "learning_rate": 1.184175981735815e-06, "loss": 17.6227, "step": 399360 }, { "epoch": 0.8067526674935459, "grad_norm": 197.9222869873047, "learning_rate": 1.18395042332048e-06, "loss": 29.214, "step": 399370 }, { "epoch": 0.8067728681262297, "grad_norm": 170.6032257080078, "learning_rate": 1.1837248835041093e-06, "loss": 14.2644, "step": 399380 }, { "epoch": 0.8067930687589135, "grad_norm": 610.2376098632812, "learning_rate": 1.1834993622878004e-06, "loss": 21.1365, "step": 399390 }, { "epoch": 0.8068132693915974, "grad_norm": 298.6968994140625, "learning_rate": 1.1832738596726518e-06, "loss": 19.939, "step": 399400 }, { "epoch": 0.8068334700242812, "grad_norm": 109.22898864746094, "learning_rate": 1.1830483756597643e-06, "loss": 26.7395, "step": 399410 }, { "epoch": 0.806853670656965, "grad_norm": 201.33030700683594, "learning_rate": 1.1828229102502364e-06, "loss": 11.6022, "step": 399420 }, { "epoch": 0.8068738712896488, "grad_norm": 23.997217178344727, "learning_rate": 1.1825974634451653e-06, "loss": 22.8611, "step": 399430 }, { "epoch": 0.8068940719223326, "grad_norm": 391.8190002441406, "learning_rate": 1.1823720352456525e-06, "loss": 13.7551, "step": 399440 }, { "epoch": 0.8069142725550165, "grad_norm": 442.6551818847656, "learning_rate": 1.1821466256527942e-06, "loss": 10.4822, "step": 399450 }, { "epoch": 0.8069344731877003, "grad_norm": 331.6322937011719, "learning_rate": 1.181921234667691e-06, "loss": 7.8176, "step": 399460 }, { "epoch": 0.8069546738203841, "grad_norm": 204.58738708496094, "learning_rate": 1.181695862291441e-06, "loss": 22.6816, "step": 399470 }, { "epoch": 0.8069748744530679, "grad_norm": 294.2364196777344, "learning_rate": 1.181470508525141e-06, "loss": 23.8682, "step": 399480 }, { "epoch": 0.8069950750857516, "grad_norm": 19.0340576171875, "learning_rate": 1.1812451733698905e-06, "loss": 17.3152, "step": 399490 }, { "epoch": 0.8070152757184355, "grad_norm": 401.44287109375, "learning_rate": 1.1810198568267906e-06, "loss": 27.1681, "step": 399500 }, { "epoch": 0.8070354763511193, "grad_norm": 706.4053955078125, "learning_rate": 1.180794558896934e-06, "loss": 17.0426, "step": 399510 }, { "epoch": 0.8070556769838031, "grad_norm": 1950.119873046875, "learning_rate": 1.180569279581421e-06, "loss": 28.3513, "step": 399520 }, { "epoch": 0.8070758776164869, "grad_norm": 367.019287109375, "learning_rate": 1.1803440188813526e-06, "loss": 24.6241, "step": 399530 }, { "epoch": 0.8070960782491707, "grad_norm": 414.6794128417969, "learning_rate": 1.1801187767978234e-06, "loss": 27.6976, "step": 399540 }, { "epoch": 0.8071162788818546, "grad_norm": 26.97930145263672, "learning_rate": 1.1798935533319305e-06, "loss": 10.439, "step": 399550 }, { "epoch": 0.8071364795145384, "grad_norm": 623.3947143554688, "learning_rate": 1.1796683484847731e-06, "loss": 21.4734, "step": 399560 }, { "epoch": 0.8071566801472222, "grad_norm": 130.07791137695312, "learning_rate": 1.179443162257452e-06, "loss": 18.3431, "step": 399570 }, { "epoch": 0.807176880779906, "grad_norm": 334.9212341308594, "learning_rate": 1.179217994651059e-06, "loss": 11.5006, "step": 399580 }, { "epoch": 0.8071970814125898, "grad_norm": 210.55154418945312, "learning_rate": 1.1789928456666933e-06, "loss": 23.3158, "step": 399590 }, { "epoch": 0.8072172820452737, "grad_norm": 448.5078125, "learning_rate": 1.178767715305455e-06, "loss": 21.8411, "step": 399600 }, { "epoch": 0.8072374826779575, "grad_norm": 56.96674346923828, "learning_rate": 1.1785426035684395e-06, "loss": 30.0421, "step": 399610 }, { "epoch": 0.8072576833106413, "grad_norm": 487.0782470703125, "learning_rate": 1.1783175104567418e-06, "loss": 23.2399, "step": 399620 }, { "epoch": 0.8072778839433251, "grad_norm": 235.6630096435547, "learning_rate": 1.178092435971463e-06, "loss": 14.6871, "step": 399630 }, { "epoch": 0.8072980845760089, "grad_norm": 367.70233154296875, "learning_rate": 1.177867380113698e-06, "loss": 20.5984, "step": 399640 }, { "epoch": 0.8073182852086928, "grad_norm": 541.5439453125, "learning_rate": 1.1776423428845423e-06, "loss": 29.2618, "step": 399650 }, { "epoch": 0.8073384858413766, "grad_norm": 537.5186767578125, "learning_rate": 1.1774173242850955e-06, "loss": 17.9111, "step": 399660 }, { "epoch": 0.8073586864740604, "grad_norm": 301.9331359863281, "learning_rate": 1.1771923243164518e-06, "loss": 14.7783, "step": 399670 }, { "epoch": 0.8073788871067442, "grad_norm": 402.8861083984375, "learning_rate": 1.1769673429797107e-06, "loss": 21.9451, "step": 399680 }, { "epoch": 0.807399087739428, "grad_norm": 199.38743591308594, "learning_rate": 1.1767423802759653e-06, "loss": 11.2573, "step": 399690 }, { "epoch": 0.8074192883721119, "grad_norm": 439.7703857421875, "learning_rate": 1.1765174362063152e-06, "loss": 11.169, "step": 399700 }, { "epoch": 0.8074394890047957, "grad_norm": 24.274436950683594, "learning_rate": 1.1762925107718558e-06, "loss": 20.235, "step": 399710 }, { "epoch": 0.8074596896374795, "grad_norm": 223.40164184570312, "learning_rate": 1.1760676039736813e-06, "loss": 17.247, "step": 399720 }, { "epoch": 0.8074798902701633, "grad_norm": 638.76513671875, "learning_rate": 1.175842715812891e-06, "loss": 24.7132, "step": 399730 }, { "epoch": 0.807500090902847, "grad_norm": 561.298828125, "learning_rate": 1.1756178462905782e-06, "loss": 20.7884, "step": 399740 }, { "epoch": 0.8075202915355308, "grad_norm": 882.0572509765625, "learning_rate": 1.1753929954078414e-06, "loss": 18.2897, "step": 399750 }, { "epoch": 0.8075404921682147, "grad_norm": 403.7325439453125, "learning_rate": 1.1751681631657752e-06, "loss": 21.9839, "step": 399760 }, { "epoch": 0.8075606928008985, "grad_norm": 214.2028045654297, "learning_rate": 1.1749433495654743e-06, "loss": 19.8607, "step": 399770 }, { "epoch": 0.8075808934335823, "grad_norm": 526.0719604492188, "learning_rate": 1.174718554608037e-06, "loss": 18.6298, "step": 399780 }, { "epoch": 0.8076010940662661, "grad_norm": 233.352783203125, "learning_rate": 1.174493778294557e-06, "loss": 11.2677, "step": 399790 }, { "epoch": 0.8076212946989499, "grad_norm": 390.91180419921875, "learning_rate": 1.1742690206261293e-06, "loss": 19.352, "step": 399800 }, { "epoch": 0.8076414953316338, "grad_norm": 664.2025146484375, "learning_rate": 1.1740442816038505e-06, "loss": 25.8564, "step": 399810 }, { "epoch": 0.8076616959643176, "grad_norm": 518.6954345703125, "learning_rate": 1.173819561228819e-06, "loss": 16.0014, "step": 399820 }, { "epoch": 0.8076818965970014, "grad_norm": 183.4699249267578, "learning_rate": 1.1735948595021234e-06, "loss": 19.0936, "step": 399830 }, { "epoch": 0.8077020972296852, "grad_norm": 551.70263671875, "learning_rate": 1.1733701764248623e-06, "loss": 24.6808, "step": 399840 }, { "epoch": 0.807722297862369, "grad_norm": 214.64559936523438, "learning_rate": 1.1731455119981327e-06, "loss": 14.4356, "step": 399850 }, { "epoch": 0.8077424984950529, "grad_norm": 281.9747314453125, "learning_rate": 1.1729208662230273e-06, "loss": 19.6334, "step": 399860 }, { "epoch": 0.8077626991277367, "grad_norm": 78.07125854492188, "learning_rate": 1.1726962391006409e-06, "loss": 21.6389, "step": 399870 }, { "epoch": 0.8077828997604205, "grad_norm": 382.7174072265625, "learning_rate": 1.1724716306320676e-06, "loss": 19.6051, "step": 399880 }, { "epoch": 0.8078031003931043, "grad_norm": 53.93277359008789, "learning_rate": 1.1722470408184072e-06, "loss": 13.8435, "step": 399890 }, { "epoch": 0.8078233010257881, "grad_norm": 235.94053649902344, "learning_rate": 1.1720224696607474e-06, "loss": 11.6338, "step": 399900 }, { "epoch": 0.807843501658472, "grad_norm": 1165.390380859375, "learning_rate": 1.1717979171601857e-06, "loss": 19.0239, "step": 399910 }, { "epoch": 0.8078637022911558, "grad_norm": 440.1308288574219, "learning_rate": 1.1715733833178178e-06, "loss": 21.0809, "step": 399920 }, { "epoch": 0.8078839029238396, "grad_norm": 807.2807006835938, "learning_rate": 1.1713488681347375e-06, "loss": 22.7856, "step": 399930 }, { "epoch": 0.8079041035565234, "grad_norm": 48.46335220336914, "learning_rate": 1.1711243716120363e-06, "loss": 40.0194, "step": 399940 }, { "epoch": 0.8079243041892072, "grad_norm": 275.75421142578125, "learning_rate": 1.1708998937508126e-06, "loss": 8.6214, "step": 399950 }, { "epoch": 0.8079445048218911, "grad_norm": 530.797607421875, "learning_rate": 1.1706754345521582e-06, "loss": 19.6889, "step": 399960 }, { "epoch": 0.8079647054545749, "grad_norm": 582.1223754882812, "learning_rate": 1.1704509940171655e-06, "loss": 21.8478, "step": 399970 }, { "epoch": 0.8079849060872587, "grad_norm": 341.1997985839844, "learning_rate": 1.1702265721469302e-06, "loss": 8.5072, "step": 399980 }, { "epoch": 0.8080051067199425, "grad_norm": 475.842041015625, "learning_rate": 1.1700021689425478e-06, "loss": 23.2782, "step": 399990 }, { "epoch": 0.8080253073526262, "grad_norm": 137.1321258544922, "learning_rate": 1.1697777844051105e-06, "loss": 19.8697, "step": 400000 }, { "epoch": 0.80804550798531, "grad_norm": 691.2139282226562, "learning_rate": 1.16955341853571e-06, "loss": 24.2845, "step": 400010 }, { "epoch": 0.8080657086179939, "grad_norm": 349.56134033203125, "learning_rate": 1.1693290713354433e-06, "loss": 14.356, "step": 400020 }, { "epoch": 0.8080859092506777, "grad_norm": 176.1207733154297, "learning_rate": 1.169104742805402e-06, "loss": 8.2134, "step": 400030 }, { "epoch": 0.8081061098833615, "grad_norm": 275.6594543457031, "learning_rate": 1.168880432946678e-06, "loss": 9.6885, "step": 400040 }, { "epoch": 0.8081263105160453, "grad_norm": 452.2191162109375, "learning_rate": 1.1686561417603677e-06, "loss": 19.1874, "step": 400050 }, { "epoch": 0.8081465111487292, "grad_norm": 269.15997314453125, "learning_rate": 1.168431869247561e-06, "loss": 18.2211, "step": 400060 }, { "epoch": 0.808166711781413, "grad_norm": 253.88807678222656, "learning_rate": 1.1682076154093542e-06, "loss": 36.2333, "step": 400070 }, { "epoch": 0.8081869124140968, "grad_norm": 351.63763427734375, "learning_rate": 1.1679833802468387e-06, "loss": 12.8735, "step": 400080 }, { "epoch": 0.8082071130467806, "grad_norm": 316.8739013671875, "learning_rate": 1.1677591637611057e-06, "loss": 14.9047, "step": 400090 }, { "epoch": 0.8082273136794644, "grad_norm": 185.6357879638672, "learning_rate": 1.1675349659532514e-06, "loss": 26.8589, "step": 400100 }, { "epoch": 0.8082475143121483, "grad_norm": 557.3394165039062, "learning_rate": 1.1673107868243672e-06, "loss": 16.9194, "step": 400110 }, { "epoch": 0.8082677149448321, "grad_norm": 424.20867919921875, "learning_rate": 1.1670866263755437e-06, "loss": 26.7866, "step": 400120 }, { "epoch": 0.8082879155775159, "grad_norm": 773.7348022460938, "learning_rate": 1.1668624846078752e-06, "loss": 25.3745, "step": 400130 }, { "epoch": 0.8083081162101997, "grad_norm": 149.3612060546875, "learning_rate": 1.1666383615224553e-06, "loss": 15.7016, "step": 400140 }, { "epoch": 0.8083283168428835, "grad_norm": 305.80084228515625, "learning_rate": 1.1664142571203751e-06, "loss": 16.0033, "step": 400150 }, { "epoch": 0.8083485174755674, "grad_norm": 216.41941833496094, "learning_rate": 1.1661901714027258e-06, "loss": 11.1485, "step": 400160 }, { "epoch": 0.8083687181082512, "grad_norm": 624.4144287109375, "learning_rate": 1.1659661043706e-06, "loss": 21.9399, "step": 400170 }, { "epoch": 0.808388918740935, "grad_norm": 200.66453552246094, "learning_rate": 1.1657420560250938e-06, "loss": 18.7956, "step": 400180 }, { "epoch": 0.8084091193736188, "grad_norm": 195.721923828125, "learning_rate": 1.1655180263672928e-06, "loss": 16.7465, "step": 400190 }, { "epoch": 0.8084293200063026, "grad_norm": 199.95681762695312, "learning_rate": 1.1652940153982917e-06, "loss": 8.8618, "step": 400200 }, { "epoch": 0.8084495206389865, "grad_norm": 287.88494873046875, "learning_rate": 1.1650700231191842e-06, "loss": 10.2409, "step": 400210 }, { "epoch": 0.8084697212716703, "grad_norm": 242.36024475097656, "learning_rate": 1.16484604953106e-06, "loss": 16.0757, "step": 400220 }, { "epoch": 0.8084899219043541, "grad_norm": 392.25262451171875, "learning_rate": 1.1646220946350095e-06, "loss": 15.1735, "step": 400230 }, { "epoch": 0.8085101225370379, "grad_norm": 371.1950378417969, "learning_rate": 1.1643981584321273e-06, "loss": 16.5909, "step": 400240 }, { "epoch": 0.8085303231697216, "grad_norm": 115.65122985839844, "learning_rate": 1.164174240923503e-06, "loss": 28.1685, "step": 400250 }, { "epoch": 0.8085505238024054, "grad_norm": 267.21051025390625, "learning_rate": 1.1639503421102272e-06, "loss": 21.4339, "step": 400260 }, { "epoch": 0.8085707244350893, "grad_norm": 256.7766418457031, "learning_rate": 1.1637264619933936e-06, "loss": 16.3704, "step": 400270 }, { "epoch": 0.8085909250677731, "grad_norm": 255.3925018310547, "learning_rate": 1.1635026005740902e-06, "loss": 7.5286, "step": 400280 }, { "epoch": 0.8086111257004569, "grad_norm": 560.7896728515625, "learning_rate": 1.1632787578534116e-06, "loss": 20.2387, "step": 400290 }, { "epoch": 0.8086313263331407, "grad_norm": 715.2000122070312, "learning_rate": 1.1630549338324454e-06, "loss": 14.4149, "step": 400300 }, { "epoch": 0.8086515269658245, "grad_norm": 468.9555358886719, "learning_rate": 1.1628311285122857e-06, "loss": 16.9689, "step": 400310 }, { "epoch": 0.8086717275985084, "grad_norm": 799.9161376953125, "learning_rate": 1.1626073418940214e-06, "loss": 22.3565, "step": 400320 }, { "epoch": 0.8086919282311922, "grad_norm": 234.2010498046875, "learning_rate": 1.162383573978742e-06, "loss": 25.0959, "step": 400330 }, { "epoch": 0.808712128863876, "grad_norm": 352.39031982421875, "learning_rate": 1.1621598247675415e-06, "loss": 22.739, "step": 400340 }, { "epoch": 0.8087323294965598, "grad_norm": 591.6005859375, "learning_rate": 1.1619360942615065e-06, "loss": 22.8182, "step": 400350 }, { "epoch": 0.8087525301292436, "grad_norm": 708.0045166015625, "learning_rate": 1.1617123824617315e-06, "loss": 26.2195, "step": 400360 }, { "epoch": 0.8087727307619275, "grad_norm": 620.4705810546875, "learning_rate": 1.1614886893693044e-06, "loss": 24.1007, "step": 400370 }, { "epoch": 0.8087929313946113, "grad_norm": 416.3150939941406, "learning_rate": 1.1612650149853144e-06, "loss": 28.3288, "step": 400380 }, { "epoch": 0.8088131320272951, "grad_norm": 327.38043212890625, "learning_rate": 1.161041359310855e-06, "loss": 10.2856, "step": 400390 }, { "epoch": 0.8088333326599789, "grad_norm": 266.3319091796875, "learning_rate": 1.160817722347014e-06, "loss": 15.2799, "step": 400400 }, { "epoch": 0.8088535332926627, "grad_norm": 352.69488525390625, "learning_rate": 1.1605941040948803e-06, "loss": 13.1876, "step": 400410 }, { "epoch": 0.8088737339253466, "grad_norm": 291.1202087402344, "learning_rate": 1.1603705045555457e-06, "loss": 13.842, "step": 400420 }, { "epoch": 0.8088939345580304, "grad_norm": 369.0484924316406, "learning_rate": 1.160146923730101e-06, "loss": 15.03, "step": 400430 }, { "epoch": 0.8089141351907142, "grad_norm": 0.0, "learning_rate": 1.1599233616196343e-06, "loss": 13.4674, "step": 400440 }, { "epoch": 0.808934335823398, "grad_norm": 153.3062744140625, "learning_rate": 1.159699818225234e-06, "loss": 9.7743, "step": 400450 }, { "epoch": 0.8089545364560818, "grad_norm": 529.4329223632812, "learning_rate": 1.159476293547992e-06, "loss": 18.5159, "step": 400460 }, { "epoch": 0.8089747370887657, "grad_norm": 374.7325744628906, "learning_rate": 1.1592527875889969e-06, "loss": 15.755, "step": 400470 }, { "epoch": 0.8089949377214495, "grad_norm": 301.7434387207031, "learning_rate": 1.159029300349337e-06, "loss": 26.1856, "step": 400480 }, { "epoch": 0.8090151383541333, "grad_norm": 547.7313232421875, "learning_rate": 1.1588058318301021e-06, "loss": 14.8381, "step": 400490 }, { "epoch": 0.8090353389868171, "grad_norm": 93.29722595214844, "learning_rate": 1.1585823820323845e-06, "loss": 9.381, "step": 400500 }, { "epoch": 0.8090555396195008, "grad_norm": 110.5420913696289, "learning_rate": 1.1583589509572679e-06, "loss": 11.4279, "step": 400510 }, { "epoch": 0.8090757402521846, "grad_norm": 319.33612060546875, "learning_rate": 1.1581355386058434e-06, "loss": 9.2072, "step": 400520 }, { "epoch": 0.8090959408848685, "grad_norm": 369.46038818359375, "learning_rate": 1.1579121449792018e-06, "loss": 27.1392, "step": 400530 }, { "epoch": 0.8091161415175523, "grad_norm": 140.03639221191406, "learning_rate": 1.1576887700784307e-06, "loss": 17.1667, "step": 400540 }, { "epoch": 0.8091363421502361, "grad_norm": 458.397705078125, "learning_rate": 1.1574654139046171e-06, "loss": 27.9674, "step": 400550 }, { "epoch": 0.8091565427829199, "grad_norm": 206.59007263183594, "learning_rate": 1.1572420764588522e-06, "loss": 9.2913, "step": 400560 }, { "epoch": 0.8091767434156037, "grad_norm": 164.22911071777344, "learning_rate": 1.1570187577422237e-06, "loss": 11.4083, "step": 400570 }, { "epoch": 0.8091969440482876, "grad_norm": 468.7096862792969, "learning_rate": 1.1567954577558177e-06, "loss": 11.0282, "step": 400580 }, { "epoch": 0.8092171446809714, "grad_norm": 776.8186645507812, "learning_rate": 1.1565721765007247e-06, "loss": 32.2028, "step": 400590 }, { "epoch": 0.8092373453136552, "grad_norm": 215.9740753173828, "learning_rate": 1.1563489139780344e-06, "loss": 17.1194, "step": 400600 }, { "epoch": 0.809257545946339, "grad_norm": 386.4876708984375, "learning_rate": 1.1561256701888335e-06, "loss": 17.8536, "step": 400610 }, { "epoch": 0.8092777465790228, "grad_norm": 869.1632690429688, "learning_rate": 1.1559024451342082e-06, "loss": 26.9815, "step": 400620 }, { "epoch": 0.8092979472117067, "grad_norm": 1911.39697265625, "learning_rate": 1.1556792388152494e-06, "loss": 32.4074, "step": 400630 }, { "epoch": 0.8093181478443905, "grad_norm": 314.6783752441406, "learning_rate": 1.1554560512330437e-06, "loss": 16.0918, "step": 400640 }, { "epoch": 0.8093383484770743, "grad_norm": 484.771484375, "learning_rate": 1.1552328823886776e-06, "loss": 18.9869, "step": 400650 }, { "epoch": 0.8093585491097581, "grad_norm": 294.3255920410156, "learning_rate": 1.155009732283242e-06, "loss": 16.7085, "step": 400660 }, { "epoch": 0.809378749742442, "grad_norm": 336.3222961425781, "learning_rate": 1.1547866009178204e-06, "loss": 23.7809, "step": 400670 }, { "epoch": 0.8093989503751258, "grad_norm": 173.28057861328125, "learning_rate": 1.1545634882935048e-06, "loss": 29.9988, "step": 400680 }, { "epoch": 0.8094191510078096, "grad_norm": 137.8450469970703, "learning_rate": 1.1543403944113797e-06, "loss": 16.778, "step": 400690 }, { "epoch": 0.8094393516404934, "grad_norm": 433.0418701171875, "learning_rate": 1.154117319272532e-06, "loss": 11.905, "step": 400700 }, { "epoch": 0.8094595522731772, "grad_norm": 145.5989990234375, "learning_rate": 1.1538942628780513e-06, "loss": 16.4736, "step": 400710 }, { "epoch": 0.809479752905861, "grad_norm": 370.5570068359375, "learning_rate": 1.153671225229024e-06, "loss": 16.5558, "step": 400720 }, { "epoch": 0.8094999535385449, "grad_norm": 151.88180541992188, "learning_rate": 1.1534482063265346e-06, "loss": 9.2769, "step": 400730 }, { "epoch": 0.8095201541712287, "grad_norm": 690.061279296875, "learning_rate": 1.153225206171672e-06, "loss": 23.9408, "step": 400740 }, { "epoch": 0.8095403548039125, "grad_norm": 94.86724090576172, "learning_rate": 1.1530022247655253e-06, "loss": 11.7036, "step": 400750 }, { "epoch": 0.8095605554365963, "grad_norm": 327.24957275390625, "learning_rate": 1.1527792621091787e-06, "loss": 21.9644, "step": 400760 }, { "epoch": 0.80958075606928, "grad_norm": 589.7625122070312, "learning_rate": 1.1525563182037181e-06, "loss": 38.0113, "step": 400770 }, { "epoch": 0.8096009567019639, "grad_norm": 518.3905029296875, "learning_rate": 1.1523333930502323e-06, "loss": 13.1313, "step": 400780 }, { "epoch": 0.8096211573346477, "grad_norm": 514.0545654296875, "learning_rate": 1.1521104866498073e-06, "loss": 27.4999, "step": 400790 }, { "epoch": 0.8096413579673315, "grad_norm": 324.7932434082031, "learning_rate": 1.1518875990035278e-06, "loss": 16.8554, "step": 400800 }, { "epoch": 0.8096615586000153, "grad_norm": 340.92047119140625, "learning_rate": 1.1516647301124812e-06, "loss": 15.0591, "step": 400810 }, { "epoch": 0.8096817592326991, "grad_norm": 830.7328491210938, "learning_rate": 1.1514418799777554e-06, "loss": 24.784, "step": 400820 }, { "epoch": 0.809701959865383, "grad_norm": 300.0881042480469, "learning_rate": 1.1512190486004353e-06, "loss": 22.6477, "step": 400830 }, { "epoch": 0.8097221604980668, "grad_norm": 184.5310821533203, "learning_rate": 1.1509962359816052e-06, "loss": 18.6409, "step": 400840 }, { "epoch": 0.8097423611307506, "grad_norm": 214.54428100585938, "learning_rate": 1.1507734421223544e-06, "loss": 11.8894, "step": 400850 }, { "epoch": 0.8097625617634344, "grad_norm": 119.18444061279297, "learning_rate": 1.1505506670237664e-06, "loss": 22.754, "step": 400860 }, { "epoch": 0.8097827623961182, "grad_norm": 371.9199523925781, "learning_rate": 1.1503279106869264e-06, "loss": 17.2868, "step": 400870 }, { "epoch": 0.8098029630288021, "grad_norm": 314.87274169921875, "learning_rate": 1.1501051731129227e-06, "loss": 8.8082, "step": 400880 }, { "epoch": 0.8098231636614859, "grad_norm": 404.53485107421875, "learning_rate": 1.149882454302838e-06, "loss": 12.3033, "step": 400890 }, { "epoch": 0.8098433642941697, "grad_norm": 143.2717742919922, "learning_rate": 1.1496597542577603e-06, "loss": 7.4328, "step": 400900 }, { "epoch": 0.8098635649268535, "grad_norm": 333.5477294921875, "learning_rate": 1.149437072978773e-06, "loss": 26.1383, "step": 400910 }, { "epoch": 0.8098837655595373, "grad_norm": 424.95855712890625, "learning_rate": 1.1492144104669639e-06, "loss": 10.5219, "step": 400920 }, { "epoch": 0.8099039661922212, "grad_norm": 356.6832580566406, "learning_rate": 1.1489917667234162e-06, "loss": 17.2317, "step": 400930 }, { "epoch": 0.809924166824905, "grad_norm": 295.1436462402344, "learning_rate": 1.1487691417492147e-06, "loss": 19.8804, "step": 400940 }, { "epoch": 0.8099443674575888, "grad_norm": 407.83599853515625, "learning_rate": 1.1485465355454467e-06, "loss": 15.6823, "step": 400950 }, { "epoch": 0.8099645680902726, "grad_norm": 286.6570129394531, "learning_rate": 1.1483239481131942e-06, "loss": 15.7468, "step": 400960 }, { "epoch": 0.8099847687229564, "grad_norm": 223.4400177001953, "learning_rate": 1.148101379453545e-06, "loss": 10.8784, "step": 400970 }, { "epoch": 0.8100049693556403, "grad_norm": 311.2491455078125, "learning_rate": 1.1478788295675824e-06, "loss": 14.4636, "step": 400980 }, { "epoch": 0.8100251699883241, "grad_norm": 7.400152683258057, "learning_rate": 1.14765629845639e-06, "loss": 22.4408, "step": 400990 }, { "epoch": 0.8100453706210079, "grad_norm": 79.97480010986328, "learning_rate": 1.1474337861210543e-06, "loss": 16.5655, "step": 401000 }, { "epoch": 0.8100655712536917, "grad_norm": 113.31886291503906, "learning_rate": 1.14721129256266e-06, "loss": 11.061, "step": 401010 }, { "epoch": 0.8100857718863754, "grad_norm": 447.0079345703125, "learning_rate": 1.1469888177822885e-06, "loss": 13.1508, "step": 401020 }, { "epoch": 0.8101059725190592, "grad_norm": 125.6728744506836, "learning_rate": 1.1467663617810264e-06, "loss": 17.4938, "step": 401030 }, { "epoch": 0.8101261731517431, "grad_norm": 152.93800354003906, "learning_rate": 1.1465439245599591e-06, "loss": 25.3538, "step": 401040 }, { "epoch": 0.8101463737844269, "grad_norm": 463.0459899902344, "learning_rate": 1.1463215061201694e-06, "loss": 24.4372, "step": 401050 }, { "epoch": 0.8101665744171107, "grad_norm": 233.2440643310547, "learning_rate": 1.1460991064627403e-06, "loss": 11.7019, "step": 401060 }, { "epoch": 0.8101867750497945, "grad_norm": 5.622211456298828, "learning_rate": 1.1458767255887576e-06, "loss": 19.5494, "step": 401070 }, { "epoch": 0.8102069756824783, "grad_norm": 448.8087158203125, "learning_rate": 1.1456543634993046e-06, "loss": 22.6072, "step": 401080 }, { "epoch": 0.8102271763151622, "grad_norm": 505.4259033203125, "learning_rate": 1.1454320201954628e-06, "loss": 17.0476, "step": 401090 }, { "epoch": 0.810247376947846, "grad_norm": 204.0558319091797, "learning_rate": 1.1452096956783181e-06, "loss": 9.2969, "step": 401100 }, { "epoch": 0.8102675775805298, "grad_norm": 655.0369262695312, "learning_rate": 1.1449873899489566e-06, "loss": 13.7258, "step": 401110 }, { "epoch": 0.8102877782132136, "grad_norm": 169.9647979736328, "learning_rate": 1.1447651030084567e-06, "loss": 17.9725, "step": 401120 }, { "epoch": 0.8103079788458974, "grad_norm": 395.2190246582031, "learning_rate": 1.1445428348579035e-06, "loss": 13.6773, "step": 401130 }, { "epoch": 0.8103281794785813, "grad_norm": 215.7799835205078, "learning_rate": 1.1443205854983824e-06, "loss": 25.8711, "step": 401140 }, { "epoch": 0.8103483801112651, "grad_norm": 424.9611511230469, "learning_rate": 1.1440983549309753e-06, "loss": 57.7286, "step": 401150 }, { "epoch": 0.8103685807439489, "grad_norm": 405.81195068359375, "learning_rate": 1.1438761431567641e-06, "loss": 20.7329, "step": 401160 }, { "epoch": 0.8103887813766327, "grad_norm": 194.59307861328125, "learning_rate": 1.1436539501768334e-06, "loss": 7.5697, "step": 401170 }, { "epoch": 0.8104089820093165, "grad_norm": 96.64669036865234, "learning_rate": 1.1434317759922664e-06, "loss": 15.5658, "step": 401180 }, { "epoch": 0.8104291826420004, "grad_norm": 246.2642822265625, "learning_rate": 1.1432096206041438e-06, "loss": 14.9546, "step": 401190 }, { "epoch": 0.8104493832746842, "grad_norm": 88.6189956665039, "learning_rate": 1.1429874840135492e-06, "loss": 19.2168, "step": 401200 }, { "epoch": 0.810469583907368, "grad_norm": 399.97540283203125, "learning_rate": 1.1427653662215675e-06, "loss": 14.9003, "step": 401210 }, { "epoch": 0.8104897845400518, "grad_norm": 215.20079040527344, "learning_rate": 1.1425432672292798e-06, "loss": 28.3903, "step": 401220 }, { "epoch": 0.8105099851727356, "grad_norm": 365.6468505859375, "learning_rate": 1.1423211870377666e-06, "loss": 27.7487, "step": 401230 }, { "epoch": 0.8105301858054195, "grad_norm": 6.0168232917785645, "learning_rate": 1.1420991256481133e-06, "loss": 14.9814, "step": 401240 }, { "epoch": 0.8105503864381033, "grad_norm": 379.3415832519531, "learning_rate": 1.1418770830614012e-06, "loss": 23.0263, "step": 401250 }, { "epoch": 0.8105705870707871, "grad_norm": 1574.305908203125, "learning_rate": 1.1416550592787106e-06, "loss": 41.68, "step": 401260 }, { "epoch": 0.8105907877034709, "grad_norm": 564.3516235351562, "learning_rate": 1.141433054301127e-06, "loss": 9.5871, "step": 401270 }, { "epoch": 0.8106109883361546, "grad_norm": 430.0144348144531, "learning_rate": 1.1412110681297296e-06, "loss": 11.8704, "step": 401280 }, { "epoch": 0.8106311889688385, "grad_norm": 207.62380981445312, "learning_rate": 1.1409891007656022e-06, "loss": 20.9196, "step": 401290 }, { "epoch": 0.8106513896015223, "grad_norm": 391.6383361816406, "learning_rate": 1.1407671522098262e-06, "loss": 16.6244, "step": 401300 }, { "epoch": 0.8106715902342061, "grad_norm": 261.6241760253906, "learning_rate": 1.1405452224634817e-06, "loss": 13.7126, "step": 401310 }, { "epoch": 0.8106917908668899, "grad_norm": 500.1307373046875, "learning_rate": 1.140323311527653e-06, "loss": 56.0972, "step": 401320 }, { "epoch": 0.8107119914995737, "grad_norm": 310.52386474609375, "learning_rate": 1.1401014194034205e-06, "loss": 22.8916, "step": 401330 }, { "epoch": 0.8107321921322576, "grad_norm": 284.5379333496094, "learning_rate": 1.1398795460918637e-06, "loss": 11.5335, "step": 401340 }, { "epoch": 0.8107523927649414, "grad_norm": 373.453369140625, "learning_rate": 1.139657691594066e-06, "loss": 24.3312, "step": 401350 }, { "epoch": 0.8107725933976252, "grad_norm": 437.35235595703125, "learning_rate": 1.1394358559111101e-06, "loss": 16.2901, "step": 401360 }, { "epoch": 0.810792794030309, "grad_norm": 190.43516540527344, "learning_rate": 1.1392140390440754e-06, "loss": 11.5918, "step": 401370 }, { "epoch": 0.8108129946629928, "grad_norm": 412.5727233886719, "learning_rate": 1.1389922409940423e-06, "loss": 16.8753, "step": 401380 }, { "epoch": 0.8108331952956767, "grad_norm": 447.2630310058594, "learning_rate": 1.1387704617620937e-06, "loss": 17.5915, "step": 401390 }, { "epoch": 0.8108533959283605, "grad_norm": 28.676143646240234, "learning_rate": 1.1385487013493095e-06, "loss": 14.9792, "step": 401400 }, { "epoch": 0.8108735965610443, "grad_norm": 321.4709167480469, "learning_rate": 1.1383269597567691e-06, "loss": 23.3169, "step": 401410 }, { "epoch": 0.8108937971937281, "grad_norm": 696.7423095703125, "learning_rate": 1.138105236985555e-06, "loss": 15.216, "step": 401420 }, { "epoch": 0.8109139978264119, "grad_norm": 45.329654693603516, "learning_rate": 1.1378835330367494e-06, "loss": 25.8943, "step": 401430 }, { "epoch": 0.8109341984590958, "grad_norm": 373.6303405761719, "learning_rate": 1.1376618479114304e-06, "loss": 17.9609, "step": 401440 }, { "epoch": 0.8109543990917796, "grad_norm": 500.3052978515625, "learning_rate": 1.1374401816106778e-06, "loss": 19.6485, "step": 401450 }, { "epoch": 0.8109745997244634, "grad_norm": 51.37641143798828, "learning_rate": 1.1372185341355746e-06, "loss": 12.2529, "step": 401460 }, { "epoch": 0.8109948003571472, "grad_norm": 101.87545776367188, "learning_rate": 1.1369969054871998e-06, "loss": 22.0237, "step": 401470 }, { "epoch": 0.811015000989831, "grad_norm": 98.46989440917969, "learning_rate": 1.1367752956666322e-06, "loss": 16.4904, "step": 401480 }, { "epoch": 0.8110352016225149, "grad_norm": 100.5164566040039, "learning_rate": 1.1365537046749536e-06, "loss": 26.3304, "step": 401490 }, { "epoch": 0.8110554022551987, "grad_norm": 467.39208984375, "learning_rate": 1.136332132513245e-06, "loss": 9.3387, "step": 401500 }, { "epoch": 0.8110756028878825, "grad_norm": 9.399454116821289, "learning_rate": 1.1361105791825845e-06, "loss": 17.5661, "step": 401510 }, { "epoch": 0.8110958035205663, "grad_norm": 293.3193054199219, "learning_rate": 1.1358890446840514e-06, "loss": 21.871, "step": 401520 }, { "epoch": 0.81111600415325, "grad_norm": 435.3813781738281, "learning_rate": 1.135667529018728e-06, "loss": 13.8553, "step": 401530 }, { "epoch": 0.8111362047859338, "grad_norm": 682.3662719726562, "learning_rate": 1.135446032187692e-06, "loss": 31.9891, "step": 401540 }, { "epoch": 0.8111564054186177, "grad_norm": 749.76171875, "learning_rate": 1.135224554192022e-06, "loss": 26.1557, "step": 401550 }, { "epoch": 0.8111766060513015, "grad_norm": 210.04885864257812, "learning_rate": 1.1350030950328001e-06, "loss": 13.8013, "step": 401560 }, { "epoch": 0.8111968066839853, "grad_norm": 48.78976821899414, "learning_rate": 1.1347816547111029e-06, "loss": 12.8956, "step": 401570 }, { "epoch": 0.8112170073166691, "grad_norm": 362.14215087890625, "learning_rate": 1.1345602332280125e-06, "loss": 24.5604, "step": 401580 }, { "epoch": 0.811237207949353, "grad_norm": 635.3172607421875, "learning_rate": 1.134338830584607e-06, "loss": 16.3891, "step": 401590 }, { "epoch": 0.8112574085820368, "grad_norm": 936.0580444335938, "learning_rate": 1.1341174467819637e-06, "loss": 29.421, "step": 401600 }, { "epoch": 0.8112776092147206, "grad_norm": 772.8532104492188, "learning_rate": 1.1338960818211647e-06, "loss": 21.7755, "step": 401610 }, { "epoch": 0.8112978098474044, "grad_norm": 82.2054672241211, "learning_rate": 1.1336747357032868e-06, "loss": 10.8459, "step": 401620 }, { "epoch": 0.8113180104800882, "grad_norm": 171.50927734375, "learning_rate": 1.1334534084294084e-06, "loss": 19.3667, "step": 401630 }, { "epoch": 0.811338211112772, "grad_norm": 164.10752868652344, "learning_rate": 1.1332321000006086e-06, "loss": 25.8789, "step": 401640 }, { "epoch": 0.8113584117454559, "grad_norm": 496.20977783203125, "learning_rate": 1.1330108104179682e-06, "loss": 15.3479, "step": 401650 }, { "epoch": 0.8113786123781397, "grad_norm": 156.0892333984375, "learning_rate": 1.1327895396825644e-06, "loss": 16.3611, "step": 401660 }, { "epoch": 0.8113988130108235, "grad_norm": 220.75396728515625, "learning_rate": 1.1325682877954736e-06, "loss": 17.7, "step": 401670 }, { "epoch": 0.8114190136435073, "grad_norm": 446.0356750488281, "learning_rate": 1.1323470547577774e-06, "loss": 13.671, "step": 401680 }, { "epoch": 0.8114392142761911, "grad_norm": 303.70318603515625, "learning_rate": 1.1321258405705526e-06, "loss": 18.6709, "step": 401690 }, { "epoch": 0.811459414908875, "grad_norm": 7.471003532409668, "learning_rate": 1.1319046452348758e-06, "loss": 20.7449, "step": 401700 }, { "epoch": 0.8114796155415588, "grad_norm": 344.658447265625, "learning_rate": 1.131683468751827e-06, "loss": 22.884, "step": 401710 }, { "epoch": 0.8114998161742426, "grad_norm": 317.5362243652344, "learning_rate": 1.1314623111224865e-06, "loss": 17.6677, "step": 401720 }, { "epoch": 0.8115200168069264, "grad_norm": 265.2457275390625, "learning_rate": 1.1312411723479261e-06, "loss": 15.9968, "step": 401730 }, { "epoch": 0.8115402174396102, "grad_norm": 480.3129577636719, "learning_rate": 1.1310200524292275e-06, "loss": 12.3708, "step": 401740 }, { "epoch": 0.8115604180722941, "grad_norm": 82.8792495727539, "learning_rate": 1.1307989513674695e-06, "loss": 9.7043, "step": 401750 }, { "epoch": 0.8115806187049779, "grad_norm": 109.63460540771484, "learning_rate": 1.1305778691637276e-06, "loss": 7.1868, "step": 401760 }, { "epoch": 0.8116008193376617, "grad_norm": 279.5165710449219, "learning_rate": 1.1303568058190789e-06, "loss": 29.7321, "step": 401770 }, { "epoch": 0.8116210199703455, "grad_norm": 64.94554138183594, "learning_rate": 1.1301357613346032e-06, "loss": 7.8405, "step": 401780 }, { "epoch": 0.8116412206030292, "grad_norm": 741.6187744140625, "learning_rate": 1.129914735711376e-06, "loss": 18.0711, "step": 401790 }, { "epoch": 0.811661421235713, "grad_norm": 361.603759765625, "learning_rate": 1.129693728950474e-06, "loss": 21.3723, "step": 401800 }, { "epoch": 0.8116816218683969, "grad_norm": 37.61235427856445, "learning_rate": 1.1294727410529754e-06, "loss": 23.2052, "step": 401810 }, { "epoch": 0.8117018225010807, "grad_norm": 303.11474609375, "learning_rate": 1.1292517720199581e-06, "loss": 18.4122, "step": 401820 }, { "epoch": 0.8117220231337645, "grad_norm": 147.80593872070312, "learning_rate": 1.1290308218524986e-06, "loss": 17.6941, "step": 401830 }, { "epoch": 0.8117422237664483, "grad_norm": 179.4315185546875, "learning_rate": 1.128809890551672e-06, "loss": 19.127, "step": 401840 }, { "epoch": 0.8117624243991322, "grad_norm": 498.0691833496094, "learning_rate": 1.1285889781185576e-06, "loss": 15.6153, "step": 401850 }, { "epoch": 0.811782625031816, "grad_norm": 442.79736328125, "learning_rate": 1.1283680845542316e-06, "loss": 15.8981, "step": 401860 }, { "epoch": 0.8118028256644998, "grad_norm": 361.2605895996094, "learning_rate": 1.128147209859768e-06, "loss": 20.6368, "step": 401870 }, { "epoch": 0.8118230262971836, "grad_norm": 14.113608360290527, "learning_rate": 1.1279263540362468e-06, "loss": 24.9256, "step": 401880 }, { "epoch": 0.8118432269298674, "grad_norm": 386.19049072265625, "learning_rate": 1.1277055170847416e-06, "loss": 19.3577, "step": 401890 }, { "epoch": 0.8118634275625513, "grad_norm": 362.01763916015625, "learning_rate": 1.1274846990063314e-06, "loss": 15.0212, "step": 401900 }, { "epoch": 0.8118836281952351, "grad_norm": 625.5485229492188, "learning_rate": 1.1272638998020913e-06, "loss": 18.1125, "step": 401910 }, { "epoch": 0.8119038288279189, "grad_norm": 103.92401885986328, "learning_rate": 1.1270431194730959e-06, "loss": 15.5105, "step": 401920 }, { "epoch": 0.8119240294606027, "grad_norm": 26.25572395324707, "learning_rate": 1.1268223580204234e-06, "loss": 7.4456, "step": 401930 }, { "epoch": 0.8119442300932865, "grad_norm": 218.33631896972656, "learning_rate": 1.1266016154451492e-06, "loss": 14.0734, "step": 401940 }, { "epoch": 0.8119644307259704, "grad_norm": 111.6611099243164, "learning_rate": 1.1263808917483476e-06, "loss": 16.248, "step": 401950 }, { "epoch": 0.8119846313586542, "grad_norm": 414.2197570800781, "learning_rate": 1.1261601869310962e-06, "loss": 14.3198, "step": 401960 }, { "epoch": 0.812004831991338, "grad_norm": 59.31288146972656, "learning_rate": 1.1259395009944713e-06, "loss": 18.0006, "step": 401970 }, { "epoch": 0.8120250326240218, "grad_norm": 493.8407897949219, "learning_rate": 1.125718833939547e-06, "loss": 22.5595, "step": 401980 }, { "epoch": 0.8120452332567056, "grad_norm": 589.6556396484375, "learning_rate": 1.125498185767398e-06, "loss": 15.9924, "step": 401990 }, { "epoch": 0.8120654338893895, "grad_norm": 272.1883850097656, "learning_rate": 1.1252775564791023e-06, "loss": 16.4307, "step": 402000 }, { "epoch": 0.8120856345220733, "grad_norm": 549.4430541992188, "learning_rate": 1.1250569460757344e-06, "loss": 26.6369, "step": 402010 }, { "epoch": 0.8121058351547571, "grad_norm": 273.0378112792969, "learning_rate": 1.1248363545583675e-06, "loss": 21.2479, "step": 402020 }, { "epoch": 0.8121260357874409, "grad_norm": 208.7742462158203, "learning_rate": 1.1246157819280772e-06, "loss": 19.5443, "step": 402030 }, { "epoch": 0.8121462364201246, "grad_norm": 233.62985229492188, "learning_rate": 1.1243952281859422e-06, "loss": 26.6932, "step": 402040 }, { "epoch": 0.8121664370528084, "grad_norm": 145.3607940673828, "learning_rate": 1.1241746933330338e-06, "loss": 13.2304, "step": 402050 }, { "epoch": 0.8121866376854923, "grad_norm": 211.1546630859375, "learning_rate": 1.123954177370427e-06, "loss": 16.9454, "step": 402060 }, { "epoch": 0.8122068383181761, "grad_norm": 799.8381958007812, "learning_rate": 1.1237336802991989e-06, "loss": 23.1495, "step": 402070 }, { "epoch": 0.8122270389508599, "grad_norm": 293.4764404296875, "learning_rate": 1.1235132021204226e-06, "loss": 20.9247, "step": 402080 }, { "epoch": 0.8122472395835437, "grad_norm": 585.5978393554688, "learning_rate": 1.1232927428351714e-06, "loss": 32.2018, "step": 402090 }, { "epoch": 0.8122674402162275, "grad_norm": 543.6815185546875, "learning_rate": 1.1230723024445212e-06, "loss": 22.7182, "step": 402100 }, { "epoch": 0.8122876408489114, "grad_norm": 592.7957153320312, "learning_rate": 1.1228518809495475e-06, "loss": 30.7611, "step": 402110 }, { "epoch": 0.8123078414815952, "grad_norm": 706.2651977539062, "learning_rate": 1.1226314783513238e-06, "loss": 12.2706, "step": 402120 }, { "epoch": 0.812328042114279, "grad_norm": 51.723060607910156, "learning_rate": 1.122411094650922e-06, "loss": 12.408, "step": 402130 }, { "epoch": 0.8123482427469628, "grad_norm": 154.70314025878906, "learning_rate": 1.1221907298494205e-06, "loss": 13.539, "step": 402140 }, { "epoch": 0.8123684433796466, "grad_norm": 353.5382995605469, "learning_rate": 1.1219703839478907e-06, "loss": 31.1909, "step": 402150 }, { "epoch": 0.8123886440123305, "grad_norm": 184.29806518554688, "learning_rate": 1.1217500569474054e-06, "loss": 13.1566, "step": 402160 }, { "epoch": 0.8124088446450143, "grad_norm": 745.1736450195312, "learning_rate": 1.1215297488490412e-06, "loss": 26.4486, "step": 402170 }, { "epoch": 0.8124290452776981, "grad_norm": 188.0874481201172, "learning_rate": 1.12130945965387e-06, "loss": 16.0302, "step": 402180 }, { "epoch": 0.8124492459103819, "grad_norm": 224.18624877929688, "learning_rate": 1.1210891893629665e-06, "loss": 13.0421, "step": 402190 }, { "epoch": 0.8124694465430657, "grad_norm": 297.636474609375, "learning_rate": 1.120868937977404e-06, "loss": 12.9067, "step": 402200 }, { "epoch": 0.8124896471757496, "grad_norm": 20.107284545898438, "learning_rate": 1.1206487054982551e-06, "loss": 111.9853, "step": 402210 }, { "epoch": 0.8125098478084334, "grad_norm": 322.66339111328125, "learning_rate": 1.1204284919265946e-06, "loss": 22.0205, "step": 402220 }, { "epoch": 0.8125300484411172, "grad_norm": 590.3783569335938, "learning_rate": 1.1202082972634952e-06, "loss": 13.0414, "step": 402230 }, { "epoch": 0.812550249073801, "grad_norm": 427.2761535644531, "learning_rate": 1.119988121510029e-06, "loss": 27.9798, "step": 402240 }, { "epoch": 0.8125704497064848, "grad_norm": 238.13414001464844, "learning_rate": 1.1197679646672698e-06, "loss": 15.6964, "step": 402250 }, { "epoch": 0.8125906503391687, "grad_norm": 638.0228271484375, "learning_rate": 1.1195478267362924e-06, "loss": 18.874, "step": 402260 }, { "epoch": 0.8126108509718525, "grad_norm": 323.3406066894531, "learning_rate": 1.119327707718168e-06, "loss": 13.2257, "step": 402270 }, { "epoch": 0.8126310516045363, "grad_norm": 1900.7822265625, "learning_rate": 1.1191076076139684e-06, "loss": 13.8603, "step": 402280 }, { "epoch": 0.8126512522372201, "grad_norm": 554.580810546875, "learning_rate": 1.1188875264247695e-06, "loss": 32.6755, "step": 402290 }, { "epoch": 0.8126714528699038, "grad_norm": 212.0137176513672, "learning_rate": 1.1186674641516415e-06, "loss": 10.6111, "step": 402300 }, { "epoch": 0.8126916535025877, "grad_norm": 89.68228912353516, "learning_rate": 1.1184474207956564e-06, "loss": 18.3645, "step": 402310 }, { "epoch": 0.8127118541352715, "grad_norm": 263.7377014160156, "learning_rate": 1.1182273963578877e-06, "loss": 40.6328, "step": 402320 }, { "epoch": 0.8127320547679553, "grad_norm": 245.92799377441406, "learning_rate": 1.1180073908394108e-06, "loss": 16.7199, "step": 402330 }, { "epoch": 0.8127522554006391, "grad_norm": 360.03826904296875, "learning_rate": 1.1177874042412923e-06, "loss": 19.2404, "step": 402340 }, { "epoch": 0.8127724560333229, "grad_norm": 243.1678466796875, "learning_rate": 1.1175674365646067e-06, "loss": 12.9017, "step": 402350 }, { "epoch": 0.8127926566660068, "grad_norm": 269.5718688964844, "learning_rate": 1.1173474878104285e-06, "loss": 25.318, "step": 402360 }, { "epoch": 0.8128128572986906, "grad_norm": 154.96092224121094, "learning_rate": 1.1171275579798274e-06, "loss": 14.6612, "step": 402370 }, { "epoch": 0.8128330579313744, "grad_norm": 350.7008972167969, "learning_rate": 1.1169076470738744e-06, "loss": 34.7705, "step": 402380 }, { "epoch": 0.8128532585640582, "grad_norm": 145.260986328125, "learning_rate": 1.1166877550936433e-06, "loss": 12.0329, "step": 402390 }, { "epoch": 0.812873459196742, "grad_norm": 246.95323181152344, "learning_rate": 1.1164678820402059e-06, "loss": 24.2772, "step": 402400 }, { "epoch": 0.8128936598294259, "grad_norm": 189.96546936035156, "learning_rate": 1.1162480279146315e-06, "loss": 19.6054, "step": 402410 }, { "epoch": 0.8129138604621097, "grad_norm": 525.8419799804688, "learning_rate": 1.1160281927179927e-06, "loss": 11.8074, "step": 402420 }, { "epoch": 0.8129340610947935, "grad_norm": 326.4888610839844, "learning_rate": 1.1158083764513634e-06, "loss": 16.4423, "step": 402430 }, { "epoch": 0.8129542617274773, "grad_norm": 236.17709350585938, "learning_rate": 1.1155885791158128e-06, "loss": 14.372, "step": 402440 }, { "epoch": 0.8129744623601611, "grad_norm": 590.6957397460938, "learning_rate": 1.1153688007124109e-06, "loss": 18.3441, "step": 402450 }, { "epoch": 0.812994662992845, "grad_norm": 403.65301513671875, "learning_rate": 1.1151490412422316e-06, "loss": 19.8515, "step": 402460 }, { "epoch": 0.8130148636255288, "grad_norm": 221.52354431152344, "learning_rate": 1.114929300706345e-06, "loss": 14.8902, "step": 402470 }, { "epoch": 0.8130350642582126, "grad_norm": 232.07037353515625, "learning_rate": 1.1147095791058198e-06, "loss": 26.7014, "step": 402480 }, { "epoch": 0.8130552648908964, "grad_norm": 580.7557983398438, "learning_rate": 1.114489876441731e-06, "loss": 12.2789, "step": 402490 }, { "epoch": 0.8130754655235802, "grad_norm": 486.24896240234375, "learning_rate": 1.1142701927151456e-06, "loss": 22.9559, "step": 402500 }, { "epoch": 0.813095666156264, "grad_norm": 1497.037353515625, "learning_rate": 1.1140505279271373e-06, "loss": 31.7122, "step": 402510 }, { "epoch": 0.8131158667889479, "grad_norm": 273.04931640625, "learning_rate": 1.1138308820787752e-06, "loss": 15.8489, "step": 402520 }, { "epoch": 0.8131360674216317, "grad_norm": 215.6087188720703, "learning_rate": 1.1136112551711293e-06, "loss": 16.5628, "step": 402530 }, { "epoch": 0.8131562680543155, "grad_norm": 201.78170776367188, "learning_rate": 1.113391647205272e-06, "loss": 11.3917, "step": 402540 }, { "epoch": 0.8131764686869993, "grad_norm": 510.38427734375, "learning_rate": 1.1131720581822703e-06, "loss": 21.7584, "step": 402550 }, { "epoch": 0.813196669319683, "grad_norm": 644.1688842773438, "learning_rate": 1.1129524881031989e-06, "loss": 10.8479, "step": 402560 }, { "epoch": 0.8132168699523669, "grad_norm": 22.48255157470703, "learning_rate": 1.1127329369691236e-06, "loss": 14.5291, "step": 402570 }, { "epoch": 0.8132370705850507, "grad_norm": 124.2413330078125, "learning_rate": 1.1125134047811182e-06, "loss": 9.475, "step": 402580 }, { "epoch": 0.8132572712177345, "grad_norm": 159.1404266357422, "learning_rate": 1.1122938915402508e-06, "loss": 6.645, "step": 402590 }, { "epoch": 0.8132774718504183, "grad_norm": 336.38653564453125, "learning_rate": 1.11207439724759e-06, "loss": 16.921, "step": 402600 }, { "epoch": 0.8132976724831021, "grad_norm": 310.8020324707031, "learning_rate": 1.1118549219042085e-06, "loss": 17.5297, "step": 402610 }, { "epoch": 0.813317873115786, "grad_norm": 317.6047058105469, "learning_rate": 1.111635465511175e-06, "loss": 16.1258, "step": 402620 }, { "epoch": 0.8133380737484698, "grad_norm": 540.7821655273438, "learning_rate": 1.1114160280695568e-06, "loss": 15.7187, "step": 402630 }, { "epoch": 0.8133582743811536, "grad_norm": 718.9857177734375, "learning_rate": 1.1111966095804254e-06, "loss": 16.3498, "step": 402640 }, { "epoch": 0.8133784750138374, "grad_norm": 26.156211853027344, "learning_rate": 1.1109772100448512e-06, "loss": 19.6282, "step": 402650 }, { "epoch": 0.8133986756465212, "grad_norm": 383.561767578125, "learning_rate": 1.1107578294639026e-06, "loss": 22.3377, "step": 402660 }, { "epoch": 0.8134188762792051, "grad_norm": 652.242919921875, "learning_rate": 1.1105384678386472e-06, "loss": 16.9119, "step": 402670 }, { "epoch": 0.8134390769118889, "grad_norm": 687.17236328125, "learning_rate": 1.1103191251701573e-06, "loss": 12.5076, "step": 402680 }, { "epoch": 0.8134592775445727, "grad_norm": 480.04132080078125, "learning_rate": 1.1100998014594993e-06, "loss": 15.089, "step": 402690 }, { "epoch": 0.8134794781772565, "grad_norm": 418.9132995605469, "learning_rate": 1.1098804967077425e-06, "loss": 17.6941, "step": 402700 }, { "epoch": 0.8134996788099403, "grad_norm": 492.0087585449219, "learning_rate": 1.109661210915956e-06, "loss": 14.0928, "step": 402710 }, { "epoch": 0.8135198794426242, "grad_norm": 122.6615219116211, "learning_rate": 1.1094419440852105e-06, "loss": 19.9941, "step": 402720 }, { "epoch": 0.813540080075308, "grad_norm": 428.9898681640625, "learning_rate": 1.109222696216573e-06, "loss": 16.79, "step": 402730 }, { "epoch": 0.8135602807079918, "grad_norm": 1433.088623046875, "learning_rate": 1.109003467311111e-06, "loss": 37.8135, "step": 402740 }, { "epoch": 0.8135804813406756, "grad_norm": 308.20782470703125, "learning_rate": 1.1087842573698953e-06, "loss": 24.2398, "step": 402750 }, { "epoch": 0.8136006819733594, "grad_norm": 184.24591064453125, "learning_rate": 1.1085650663939933e-06, "loss": 26.7286, "step": 402760 }, { "epoch": 0.8136208826060433, "grad_norm": 337.38543701171875, "learning_rate": 1.1083458943844721e-06, "loss": 21.6237, "step": 402770 }, { "epoch": 0.8136410832387271, "grad_norm": 227.1483154296875, "learning_rate": 1.1081267413424018e-06, "loss": 19.5517, "step": 402780 }, { "epoch": 0.8136612838714109, "grad_norm": 240.45953369140625, "learning_rate": 1.1079076072688489e-06, "loss": 13.4554, "step": 402790 }, { "epoch": 0.8136814845040947, "grad_norm": 266.16412353515625, "learning_rate": 1.1076884921648834e-06, "loss": 21.7566, "step": 402800 }, { "epoch": 0.8137016851367784, "grad_norm": 378.9514465332031, "learning_rate": 1.1074693960315718e-06, "loss": 15.3425, "step": 402810 }, { "epoch": 0.8137218857694622, "grad_norm": 381.3688659667969, "learning_rate": 1.1072503188699811e-06, "loss": 17.5778, "step": 402820 }, { "epoch": 0.8137420864021461, "grad_norm": 759.8544921875, "learning_rate": 1.1070312606811816e-06, "loss": 19.0493, "step": 402830 }, { "epoch": 0.8137622870348299, "grad_norm": 390.9328308105469, "learning_rate": 1.1068122214662397e-06, "loss": 23.379, "step": 402840 }, { "epoch": 0.8137824876675137, "grad_norm": 239.4142608642578, "learning_rate": 1.1065932012262215e-06, "loss": 18.6682, "step": 402850 }, { "epoch": 0.8138026883001975, "grad_norm": 348.1329345703125, "learning_rate": 1.1063741999621958e-06, "loss": 14.7364, "step": 402860 }, { "epoch": 0.8138228889328813, "grad_norm": 427.66949462890625, "learning_rate": 1.1061552176752311e-06, "loss": 20.0499, "step": 402870 }, { "epoch": 0.8138430895655652, "grad_norm": 323.6460266113281, "learning_rate": 1.1059362543663944e-06, "loss": 17.2907, "step": 402880 }, { "epoch": 0.813863290198249, "grad_norm": 0.04072647541761398, "learning_rate": 1.1057173100367495e-06, "loss": 10.3853, "step": 402890 }, { "epoch": 0.8138834908309328, "grad_norm": 347.5143127441406, "learning_rate": 1.1054983846873684e-06, "loss": 19.3528, "step": 402900 }, { "epoch": 0.8139036914636166, "grad_norm": 447.7604064941406, "learning_rate": 1.1052794783193155e-06, "loss": 20.1185, "step": 402910 }, { "epoch": 0.8139238920963004, "grad_norm": 642.8087768554688, "learning_rate": 1.105060590933657e-06, "loss": 21.1317, "step": 402920 }, { "epoch": 0.8139440927289843, "grad_norm": 67.33672332763672, "learning_rate": 1.10484172253146e-06, "loss": 26.4036, "step": 402930 }, { "epoch": 0.8139642933616681, "grad_norm": 315.9908447265625, "learning_rate": 1.1046228731137953e-06, "loss": 13.0807, "step": 402940 }, { "epoch": 0.8139844939943519, "grad_norm": 608.756103515625, "learning_rate": 1.1044040426817237e-06, "loss": 17.9246, "step": 402950 }, { "epoch": 0.8140046946270357, "grad_norm": 44.12290573120117, "learning_rate": 1.1041852312363144e-06, "loss": 14.1365, "step": 402960 }, { "epoch": 0.8140248952597195, "grad_norm": 533.66015625, "learning_rate": 1.1039664387786348e-06, "loss": 33.035, "step": 402970 }, { "epoch": 0.8140450958924034, "grad_norm": 430.2449951171875, "learning_rate": 1.1037476653097501e-06, "loss": 23.2743, "step": 402980 }, { "epoch": 0.8140652965250872, "grad_norm": 216.03126525878906, "learning_rate": 1.1035289108307256e-06, "loss": 15.3637, "step": 402990 }, { "epoch": 0.814085497157771, "grad_norm": 23.619455337524414, "learning_rate": 1.1033101753426285e-06, "loss": 10.1855, "step": 403000 }, { "epoch": 0.8141056977904548, "grad_norm": 436.244873046875, "learning_rate": 1.1030914588465281e-06, "loss": 16.8647, "step": 403010 }, { "epoch": 0.8141258984231386, "grad_norm": 150.64334106445312, "learning_rate": 1.1028727613434842e-06, "loss": 30.5275, "step": 403020 }, { "epoch": 0.8141460990558225, "grad_norm": 453.5550537109375, "learning_rate": 1.1026540828345656e-06, "loss": 19.0017, "step": 403030 }, { "epoch": 0.8141662996885063, "grad_norm": 475.4017333984375, "learning_rate": 1.10243542332084e-06, "loss": 15.6427, "step": 403040 }, { "epoch": 0.8141865003211901, "grad_norm": 134.8949432373047, "learning_rate": 1.1022167828033715e-06, "loss": 11.7787, "step": 403050 }, { "epoch": 0.8142067009538739, "grad_norm": 276.0162048339844, "learning_rate": 1.1019981612832243e-06, "loss": 36.4231, "step": 403060 }, { "epoch": 0.8142269015865576, "grad_norm": 258.1709899902344, "learning_rate": 1.101779558761466e-06, "loss": 9.948, "step": 403070 }, { "epoch": 0.8142471022192415, "grad_norm": 640.7821044921875, "learning_rate": 1.101560975239162e-06, "loss": 16.1311, "step": 403080 }, { "epoch": 0.8142673028519253, "grad_norm": 520.6428833007812, "learning_rate": 1.1013424107173753e-06, "loss": 17.7264, "step": 403090 }, { "epoch": 0.8142875034846091, "grad_norm": 508.30267333984375, "learning_rate": 1.1011238651971744e-06, "loss": 19.0138, "step": 403100 }, { "epoch": 0.8143077041172929, "grad_norm": 341.8509521484375, "learning_rate": 1.1009053386796215e-06, "loss": 17.658, "step": 403110 }, { "epoch": 0.8143279047499767, "grad_norm": 457.42999267578125, "learning_rate": 1.1006868311657848e-06, "loss": 14.7793, "step": 403120 }, { "epoch": 0.8143481053826606, "grad_norm": 904.3662719726562, "learning_rate": 1.100468342656727e-06, "loss": 23.9291, "step": 403130 }, { "epoch": 0.8143683060153444, "grad_norm": 259.1905212402344, "learning_rate": 1.1002498731535123e-06, "loss": 8.0702, "step": 403140 }, { "epoch": 0.8143885066480282, "grad_norm": 458.9711608886719, "learning_rate": 1.1000314226572083e-06, "loss": 25.5438, "step": 403150 }, { "epoch": 0.814408707280712, "grad_norm": 650.0687866210938, "learning_rate": 1.0998129911688766e-06, "loss": 22.2907, "step": 403160 }, { "epoch": 0.8144289079133958, "grad_norm": 118.45144653320312, "learning_rate": 1.0995945786895846e-06, "loss": 18.8227, "step": 403170 }, { "epoch": 0.8144491085460797, "grad_norm": 321.75457763671875, "learning_rate": 1.0993761852203943e-06, "loss": 12.9093, "step": 403180 }, { "epoch": 0.8144693091787635, "grad_norm": 291.5635681152344, "learning_rate": 1.099157810762373e-06, "loss": 25.4308, "step": 403190 }, { "epoch": 0.8144895098114473, "grad_norm": 570.944091796875, "learning_rate": 1.0989394553165833e-06, "loss": 32.1767, "step": 403200 }, { "epoch": 0.8145097104441311, "grad_norm": 231.1138916015625, "learning_rate": 1.098721118884088e-06, "loss": 8.4851, "step": 403210 }, { "epoch": 0.8145299110768149, "grad_norm": 8.792352676391602, "learning_rate": 1.0985028014659544e-06, "loss": 9.7354, "step": 403220 }, { "epoch": 0.8145501117094988, "grad_norm": 379.2449645996094, "learning_rate": 1.0982845030632445e-06, "loss": 12.45, "step": 403230 }, { "epoch": 0.8145703123421826, "grad_norm": 261.1451416015625, "learning_rate": 1.0980662236770217e-06, "loss": 10.7204, "step": 403240 }, { "epoch": 0.8145905129748664, "grad_norm": 271.76934814453125, "learning_rate": 1.097847963308351e-06, "loss": 29.2455, "step": 403250 }, { "epoch": 0.8146107136075502, "grad_norm": 296.5911865234375, "learning_rate": 1.0976297219582988e-06, "loss": 26.4947, "step": 403260 }, { "epoch": 0.814630914240234, "grad_norm": 501.515869140625, "learning_rate": 1.0974114996279229e-06, "loss": 15.3263, "step": 403270 }, { "epoch": 0.8146511148729179, "grad_norm": 437.312744140625, "learning_rate": 1.09719329631829e-06, "loss": 18.0124, "step": 403280 }, { "epoch": 0.8146713155056017, "grad_norm": 2777.39013671875, "learning_rate": 1.0969751120304656e-06, "loss": 33.5099, "step": 403290 }, { "epoch": 0.8146915161382855, "grad_norm": 155.07955932617188, "learning_rate": 1.0967569467655104e-06, "loss": 16.7757, "step": 403300 }, { "epoch": 0.8147117167709693, "grad_norm": 64.98365783691406, "learning_rate": 1.0965388005244876e-06, "loss": 20.1442, "step": 403310 }, { "epoch": 0.814731917403653, "grad_norm": 844.584228515625, "learning_rate": 1.0963206733084607e-06, "loss": 28.434, "step": 403320 }, { "epoch": 0.8147521180363368, "grad_norm": 282.7172546386719, "learning_rate": 1.096102565118497e-06, "loss": 8.1605, "step": 403330 }, { "epoch": 0.8147723186690207, "grad_norm": 216.3434600830078, "learning_rate": 1.0958844759556525e-06, "loss": 13.9023, "step": 403340 }, { "epoch": 0.8147925193017045, "grad_norm": 35.27849578857422, "learning_rate": 1.0956664058209936e-06, "loss": 28.5152, "step": 403350 }, { "epoch": 0.8148127199343883, "grad_norm": 423.6056823730469, "learning_rate": 1.0954483547155846e-06, "loss": 17.6159, "step": 403360 }, { "epoch": 0.8148329205670721, "grad_norm": 84.75102233886719, "learning_rate": 1.095230322640487e-06, "loss": 17.3009, "step": 403370 }, { "epoch": 0.814853121199756, "grad_norm": 377.6807556152344, "learning_rate": 1.0950123095967614e-06, "loss": 7.4353, "step": 403380 }, { "epoch": 0.8148733218324398, "grad_norm": 326.2807312011719, "learning_rate": 1.094794315585474e-06, "loss": 15.2747, "step": 403390 }, { "epoch": 0.8148935224651236, "grad_norm": 167.96466064453125, "learning_rate": 1.0945763406076837e-06, "loss": 15.7253, "step": 403400 }, { "epoch": 0.8149137230978074, "grad_norm": 331.5107116699219, "learning_rate": 1.0943583846644561e-06, "loss": 19.3244, "step": 403410 }, { "epoch": 0.8149339237304912, "grad_norm": 530.7330932617188, "learning_rate": 1.0941404477568524e-06, "loss": 22.7064, "step": 403420 }, { "epoch": 0.814954124363175, "grad_norm": 522.302490234375, "learning_rate": 1.0939225298859324e-06, "loss": 26.8466, "step": 403430 }, { "epoch": 0.8149743249958589, "grad_norm": 593.5051879882812, "learning_rate": 1.093704631052762e-06, "loss": 14.2516, "step": 403440 }, { "epoch": 0.8149945256285427, "grad_norm": 1005.314453125, "learning_rate": 1.0934867512584013e-06, "loss": 28.3563, "step": 403450 }, { "epoch": 0.8150147262612265, "grad_norm": 551.16748046875, "learning_rate": 1.0932688905039113e-06, "loss": 24.4639, "step": 403460 }, { "epoch": 0.8150349268939103, "grad_norm": 150.2791748046875, "learning_rate": 1.0930510487903544e-06, "loss": 24.9412, "step": 403470 }, { "epoch": 0.8150551275265941, "grad_norm": 604.1342163085938, "learning_rate": 1.0928332261187947e-06, "loss": 19.7963, "step": 403480 }, { "epoch": 0.815075328159278, "grad_norm": 290.0917053222656, "learning_rate": 1.0926154224902919e-06, "loss": 25.0408, "step": 403490 }, { "epoch": 0.8150955287919618, "grad_norm": 314.20062255859375, "learning_rate": 1.0923976379059059e-06, "loss": 23.2143, "step": 403500 }, { "epoch": 0.8151157294246456, "grad_norm": 665.75244140625, "learning_rate": 1.0921798723667015e-06, "loss": 24.1577, "step": 403510 }, { "epoch": 0.8151359300573294, "grad_norm": 496.9930419921875, "learning_rate": 1.0919621258737384e-06, "loss": 19.7391, "step": 403520 }, { "epoch": 0.8151561306900132, "grad_norm": 489.5891418457031, "learning_rate": 1.0917443984280769e-06, "loss": 14.7234, "step": 403530 }, { "epoch": 0.8151763313226971, "grad_norm": 342.9373474121094, "learning_rate": 1.0915266900307785e-06, "loss": 22.7523, "step": 403540 }, { "epoch": 0.8151965319553809, "grad_norm": 365.2608337402344, "learning_rate": 1.0913090006829085e-06, "loss": 31.3453, "step": 403550 }, { "epoch": 0.8152167325880647, "grad_norm": 8.63314437866211, "learning_rate": 1.0910913303855208e-06, "loss": 12.228, "step": 403560 }, { "epoch": 0.8152369332207485, "grad_norm": 114.32124328613281, "learning_rate": 1.0908736791396807e-06, "loss": 10.0802, "step": 403570 }, { "epoch": 0.8152571338534322, "grad_norm": 497.6786193847656, "learning_rate": 1.0906560469464488e-06, "loss": 16.794, "step": 403580 }, { "epoch": 0.8152773344861161, "grad_norm": 617.8209838867188, "learning_rate": 1.0904384338068856e-06, "loss": 17.6674, "step": 403590 }, { "epoch": 0.8152975351187999, "grad_norm": 416.3782043457031, "learning_rate": 1.09022083972205e-06, "loss": 14.1802, "step": 403600 }, { "epoch": 0.8153177357514837, "grad_norm": 403.2692565917969, "learning_rate": 1.090003264693003e-06, "loss": 16.3625, "step": 403610 }, { "epoch": 0.8153379363841675, "grad_norm": 113.26661682128906, "learning_rate": 1.0897857087208098e-06, "loss": 16.398, "step": 403620 }, { "epoch": 0.8153581370168513, "grad_norm": 328.08807373046875, "learning_rate": 1.0895681718065231e-06, "loss": 21.6225, "step": 403630 }, { "epoch": 0.8153783376495352, "grad_norm": 1026.9281005859375, "learning_rate": 1.0893506539512071e-06, "loss": 27.294, "step": 403640 }, { "epoch": 0.815398538282219, "grad_norm": 102.67794799804688, "learning_rate": 1.0891331551559237e-06, "loss": 15.5267, "step": 403650 }, { "epoch": 0.8154187389149028, "grad_norm": 361.410400390625, "learning_rate": 1.0889156754217306e-06, "loss": 17.3861, "step": 403660 }, { "epoch": 0.8154389395475866, "grad_norm": 267.0630798339844, "learning_rate": 1.0886982147496866e-06, "loss": 22.795, "step": 403670 }, { "epoch": 0.8154591401802704, "grad_norm": 220.8252716064453, "learning_rate": 1.0884807731408542e-06, "loss": 18.6385, "step": 403680 }, { "epoch": 0.8154793408129543, "grad_norm": 350.0182800292969, "learning_rate": 1.0882633505962924e-06, "loss": 24.2768, "step": 403690 }, { "epoch": 0.8154995414456381, "grad_norm": 535.690673828125, "learning_rate": 1.0880459471170597e-06, "loss": 25.3048, "step": 403700 }, { "epoch": 0.8155197420783219, "grad_norm": 388.8749694824219, "learning_rate": 1.0878285627042173e-06, "loss": 17.7655, "step": 403710 }, { "epoch": 0.8155399427110057, "grad_norm": 404.4278869628906, "learning_rate": 1.0876111973588233e-06, "loss": 15.9903, "step": 403720 }, { "epoch": 0.8155601433436895, "grad_norm": 672.9110717773438, "learning_rate": 1.0873938510819381e-06, "loss": 23.4759, "step": 403730 }, { "epoch": 0.8155803439763734, "grad_norm": 529.5740356445312, "learning_rate": 1.0871765238746219e-06, "loss": 24.3309, "step": 403740 }, { "epoch": 0.8156005446090572, "grad_norm": 523.105224609375, "learning_rate": 1.0869592157379305e-06, "loss": 17.2011, "step": 403750 }, { "epoch": 0.815620745241741, "grad_norm": 307.237060546875, "learning_rate": 1.0867419266729274e-06, "loss": 17.2972, "step": 403760 }, { "epoch": 0.8156409458744248, "grad_norm": 136.25494384765625, "learning_rate": 1.0865246566806676e-06, "loss": 22.0616, "step": 403770 }, { "epoch": 0.8156611465071086, "grad_norm": 69.41106414794922, "learning_rate": 1.0863074057622136e-06, "loss": 22.4569, "step": 403780 }, { "epoch": 0.8156813471397925, "grad_norm": 529.0281982421875, "learning_rate": 1.0860901739186209e-06, "loss": 27.5257, "step": 403790 }, { "epoch": 0.8157015477724763, "grad_norm": 33.62493896484375, "learning_rate": 1.0858729611509516e-06, "loss": 13.3982, "step": 403800 }, { "epoch": 0.8157217484051601, "grad_norm": 362.707763671875, "learning_rate": 1.085655767460263e-06, "loss": 15.2563, "step": 403810 }, { "epoch": 0.8157419490378439, "grad_norm": 314.35736083984375, "learning_rate": 1.085438592847612e-06, "loss": 19.942, "step": 403820 }, { "epoch": 0.8157621496705277, "grad_norm": 461.19097900390625, "learning_rate": 1.0852214373140596e-06, "loss": 17.6933, "step": 403830 }, { "epoch": 0.8157823503032114, "grad_norm": 484.7992248535156, "learning_rate": 1.085004300860663e-06, "loss": 22.1922, "step": 403840 }, { "epoch": 0.8158025509358953, "grad_norm": 338.7452087402344, "learning_rate": 1.0847871834884798e-06, "loss": 20.6989, "step": 403850 }, { "epoch": 0.8158227515685791, "grad_norm": 1.6299223899841309, "learning_rate": 1.0845700851985686e-06, "loss": 43.439, "step": 403860 }, { "epoch": 0.8158429522012629, "grad_norm": 495.8385009765625, "learning_rate": 1.084353005991991e-06, "loss": 16.6965, "step": 403870 }, { "epoch": 0.8158631528339467, "grad_norm": 245.8248291015625, "learning_rate": 1.0841359458697986e-06, "loss": 10.4521, "step": 403880 }, { "epoch": 0.8158833534666305, "grad_norm": 452.2076416015625, "learning_rate": 1.0839189048330534e-06, "loss": 23.4567, "step": 403890 }, { "epoch": 0.8159035540993144, "grad_norm": 383.6417541503906, "learning_rate": 1.0837018828828133e-06, "loss": 11.7336, "step": 403900 }, { "epoch": 0.8159237547319982, "grad_norm": 503.1542053222656, "learning_rate": 1.0834848800201358e-06, "loss": 17.9203, "step": 403910 }, { "epoch": 0.815943955364682, "grad_norm": 663.6284790039062, "learning_rate": 1.0832678962460759e-06, "loss": 13.4979, "step": 403920 }, { "epoch": 0.8159641559973658, "grad_norm": 263.2528991699219, "learning_rate": 1.0830509315616938e-06, "loss": 8.2308, "step": 403930 }, { "epoch": 0.8159843566300496, "grad_norm": 56.36683654785156, "learning_rate": 1.0828339859680487e-06, "loss": 10.5519, "step": 403940 }, { "epoch": 0.8160045572627335, "grad_norm": 387.2181701660156, "learning_rate": 1.0826170594661933e-06, "loss": 19.104, "step": 403950 }, { "epoch": 0.8160247578954173, "grad_norm": 483.9422302246094, "learning_rate": 1.082400152057187e-06, "loss": 22.9121, "step": 403960 }, { "epoch": 0.8160449585281011, "grad_norm": 285.2896423339844, "learning_rate": 1.0821832637420887e-06, "loss": 12.4481, "step": 403970 }, { "epoch": 0.8160651591607849, "grad_norm": 646.0913696289062, "learning_rate": 1.0819663945219538e-06, "loss": 12.5675, "step": 403980 }, { "epoch": 0.8160853597934687, "grad_norm": 20.109582901000977, "learning_rate": 1.0817495443978381e-06, "loss": 11.5095, "step": 403990 }, { "epoch": 0.8161055604261526, "grad_norm": 417.9738464355469, "learning_rate": 1.0815327133708015e-06, "loss": 28.713, "step": 404000 }, { "epoch": 0.8161257610588364, "grad_norm": 544.0221557617188, "learning_rate": 1.0813159014418995e-06, "loss": 13.9935, "step": 404010 }, { "epoch": 0.8161459616915202, "grad_norm": 352.82421875, "learning_rate": 1.0810991086121863e-06, "loss": 19.4792, "step": 404020 }, { "epoch": 0.816166162324204, "grad_norm": 199.01919555664062, "learning_rate": 1.080882334882723e-06, "loss": 14.4945, "step": 404030 }, { "epoch": 0.8161863629568878, "grad_norm": 382.7222900390625, "learning_rate": 1.0806655802545617e-06, "loss": 25.3046, "step": 404040 }, { "epoch": 0.8162065635895717, "grad_norm": 108.6363296508789, "learning_rate": 1.080448844728763e-06, "loss": 15.9086, "step": 404050 }, { "epoch": 0.8162267642222555, "grad_norm": 161.2914581298828, "learning_rate": 1.0802321283063794e-06, "loss": 11.3683, "step": 404060 }, { "epoch": 0.8162469648549393, "grad_norm": 1235.232421875, "learning_rate": 1.0800154309884708e-06, "loss": 18.9475, "step": 404070 }, { "epoch": 0.8162671654876231, "grad_norm": 730.6261596679688, "learning_rate": 1.07979875277609e-06, "loss": 18.1601, "step": 404080 }, { "epoch": 0.8162873661203068, "grad_norm": 498.7947998046875, "learning_rate": 1.079582093670296e-06, "loss": 30.4366, "step": 404090 }, { "epoch": 0.8163075667529907, "grad_norm": 422.489013671875, "learning_rate": 1.0793654536721432e-06, "loss": 24.6204, "step": 404100 }, { "epoch": 0.8163277673856745, "grad_norm": 469.9093322753906, "learning_rate": 1.0791488327826865e-06, "loss": 13.0858, "step": 404110 }, { "epoch": 0.8163479680183583, "grad_norm": 302.9147033691406, "learning_rate": 1.0789322310029842e-06, "loss": 18.3818, "step": 404120 }, { "epoch": 0.8163681686510421, "grad_norm": 20.856220245361328, "learning_rate": 1.0787156483340905e-06, "loss": 35.7565, "step": 404130 }, { "epoch": 0.8163883692837259, "grad_norm": 424.981689453125, "learning_rate": 1.07849908477706e-06, "loss": 14.2802, "step": 404140 }, { "epoch": 0.8164085699164098, "grad_norm": 258.5683898925781, "learning_rate": 1.0782825403329488e-06, "loss": 10.4218, "step": 404150 }, { "epoch": 0.8164287705490936, "grad_norm": 80.01229858398438, "learning_rate": 1.0780660150028161e-06, "loss": 16.5788, "step": 404160 }, { "epoch": 0.8164489711817774, "grad_norm": 496.80877685546875, "learning_rate": 1.077849508787711e-06, "loss": 17.968, "step": 404170 }, { "epoch": 0.8164691718144612, "grad_norm": 243.45626831054688, "learning_rate": 1.077633021688692e-06, "loss": 12.9528, "step": 404180 }, { "epoch": 0.816489372447145, "grad_norm": 1015.302001953125, "learning_rate": 1.0774165537068154e-06, "loss": 23.2021, "step": 404190 }, { "epoch": 0.8165095730798289, "grad_norm": 326.9949951171875, "learning_rate": 1.077200104843134e-06, "loss": 21.9942, "step": 404200 }, { "epoch": 0.8165297737125127, "grad_norm": 580.8416137695312, "learning_rate": 1.0769836750987028e-06, "loss": 22.5684, "step": 404210 }, { "epoch": 0.8165499743451965, "grad_norm": 247.5938720703125, "learning_rate": 1.0767672644745774e-06, "loss": 22.6101, "step": 404220 }, { "epoch": 0.8165701749778803, "grad_norm": 343.797119140625, "learning_rate": 1.076550872971815e-06, "loss": 18.3597, "step": 404230 }, { "epoch": 0.8165903756105641, "grad_norm": 482.3746337890625, "learning_rate": 1.0763345005914649e-06, "loss": 22.0443, "step": 404240 }, { "epoch": 0.816610576243248, "grad_norm": 80.26362609863281, "learning_rate": 1.076118147334585e-06, "loss": 25.7038, "step": 404250 }, { "epoch": 0.8166307768759318, "grad_norm": 228.49267578125, "learning_rate": 1.0759018132022302e-06, "loss": 16.9956, "step": 404260 }, { "epoch": 0.8166509775086156, "grad_norm": 614.4641723632812, "learning_rate": 1.0756854981954546e-06, "loss": 17.7355, "step": 404270 }, { "epoch": 0.8166711781412994, "grad_norm": 381.8021240234375, "learning_rate": 1.0754692023153101e-06, "loss": 12.6121, "step": 404280 }, { "epoch": 0.8166913787739832, "grad_norm": 364.3545837402344, "learning_rate": 1.0752529255628542e-06, "loss": 12.8285, "step": 404290 }, { "epoch": 0.816711579406667, "grad_norm": 631.591064453125, "learning_rate": 1.0750366679391393e-06, "loss": 33.1447, "step": 404300 }, { "epoch": 0.8167317800393509, "grad_norm": 119.57291412353516, "learning_rate": 1.0748204294452187e-06, "loss": 14.9711, "step": 404310 }, { "epoch": 0.8167519806720347, "grad_norm": 201.0452423095703, "learning_rate": 1.0746042100821485e-06, "loss": 19.1192, "step": 404320 }, { "epoch": 0.8167721813047185, "grad_norm": 16.87002944946289, "learning_rate": 1.0743880098509802e-06, "loss": 10.1328, "step": 404330 }, { "epoch": 0.8167923819374023, "grad_norm": 395.2453918457031, "learning_rate": 1.074171828752769e-06, "loss": 17.627, "step": 404340 }, { "epoch": 0.816812582570086, "grad_norm": 385.77197265625, "learning_rate": 1.0739556667885692e-06, "loss": 25.4675, "step": 404350 }, { "epoch": 0.8168327832027699, "grad_norm": 399.2333984375, "learning_rate": 1.0737395239594318e-06, "loss": 14.6198, "step": 404360 }, { "epoch": 0.8168529838354537, "grad_norm": 221.7762908935547, "learning_rate": 1.0735234002664123e-06, "loss": 15.0746, "step": 404370 }, { "epoch": 0.8168731844681375, "grad_norm": 556.7322387695312, "learning_rate": 1.0733072957105633e-06, "loss": 16.0611, "step": 404380 }, { "epoch": 0.8168933851008213, "grad_norm": 318.266845703125, "learning_rate": 1.0730912102929392e-06, "loss": 14.5884, "step": 404390 }, { "epoch": 0.8169135857335051, "grad_norm": 185.9705352783203, "learning_rate": 1.0728751440145907e-06, "loss": 6.693, "step": 404400 }, { "epoch": 0.816933786366189, "grad_norm": 246.94725036621094, "learning_rate": 1.0726590968765738e-06, "loss": 14.6006, "step": 404410 }, { "epoch": 0.8169539869988728, "grad_norm": 938.7547607421875, "learning_rate": 1.0724430688799402e-06, "loss": 15.6411, "step": 404420 }, { "epoch": 0.8169741876315566, "grad_norm": 113.59290313720703, "learning_rate": 1.0722270600257411e-06, "loss": 25.6076, "step": 404430 }, { "epoch": 0.8169943882642404, "grad_norm": 1010.087646484375, "learning_rate": 1.0720110703150327e-06, "loss": 28.7306, "step": 404440 }, { "epoch": 0.8170145888969242, "grad_norm": 516.1767578125, "learning_rate": 1.0717950997488662e-06, "loss": 12.0093, "step": 404450 }, { "epoch": 0.8170347895296081, "grad_norm": 119.9769287109375, "learning_rate": 1.0715791483282922e-06, "loss": 26.2367, "step": 404460 }, { "epoch": 0.8170549901622919, "grad_norm": 882.0177612304688, "learning_rate": 1.0713632160543647e-06, "loss": 20.4992, "step": 404470 }, { "epoch": 0.8170751907949757, "grad_norm": 547.028076171875, "learning_rate": 1.0711473029281394e-06, "loss": 13.6764, "step": 404480 }, { "epoch": 0.8170953914276595, "grad_norm": 143.71363830566406, "learning_rate": 1.0709314089506634e-06, "loss": 17.0387, "step": 404490 }, { "epoch": 0.8171155920603433, "grad_norm": 488.9263000488281, "learning_rate": 1.0707155341229902e-06, "loss": 16.6229, "step": 404500 }, { "epoch": 0.8171357926930272, "grad_norm": 382.60723876953125, "learning_rate": 1.0704996784461753e-06, "loss": 30.174, "step": 404510 }, { "epoch": 0.817155993325711, "grad_norm": 285.2334289550781, "learning_rate": 1.0702838419212674e-06, "loss": 19.4932, "step": 404520 }, { "epoch": 0.8171761939583948, "grad_norm": 410.0068664550781, "learning_rate": 1.0700680245493188e-06, "loss": 19.0309, "step": 404530 }, { "epoch": 0.8171963945910786, "grad_norm": 619.8792724609375, "learning_rate": 1.0698522263313816e-06, "loss": 11.0317, "step": 404540 }, { "epoch": 0.8172165952237624, "grad_norm": 278.36474609375, "learning_rate": 1.0696364472685112e-06, "loss": 9.48, "step": 404550 }, { "epoch": 0.8172367958564463, "grad_norm": 166.591796875, "learning_rate": 1.069420687361753e-06, "loss": 9.1543, "step": 404560 }, { "epoch": 0.8172569964891301, "grad_norm": 264.59930419921875, "learning_rate": 1.0692049466121613e-06, "loss": 11.8805, "step": 404570 }, { "epoch": 0.8172771971218139, "grad_norm": 520.3172607421875, "learning_rate": 1.0689892250207894e-06, "loss": 16.2525, "step": 404580 }, { "epoch": 0.8172973977544977, "grad_norm": 450.2430114746094, "learning_rate": 1.0687735225886874e-06, "loss": 24.1585, "step": 404590 }, { "epoch": 0.8173175983871814, "grad_norm": 769.260009765625, "learning_rate": 1.0685578393169054e-06, "loss": 23.6105, "step": 404600 }, { "epoch": 0.8173377990198653, "grad_norm": 858.8841552734375, "learning_rate": 1.068342175206497e-06, "loss": 15.8296, "step": 404610 }, { "epoch": 0.8173579996525491, "grad_norm": 1134.909912109375, "learning_rate": 1.0681265302585114e-06, "loss": 28.3626, "step": 404620 }, { "epoch": 0.8173782002852329, "grad_norm": 487.4468688964844, "learning_rate": 1.0679109044739994e-06, "loss": 12.7139, "step": 404630 }, { "epoch": 0.8173984009179167, "grad_norm": 429.1744079589844, "learning_rate": 1.067695297854014e-06, "loss": 29.3317, "step": 404640 }, { "epoch": 0.8174186015506005, "grad_norm": 230.45068359375, "learning_rate": 1.0674797103996033e-06, "loss": 11.5221, "step": 404650 }, { "epoch": 0.8174388021832844, "grad_norm": 285.8327941894531, "learning_rate": 1.0672641421118214e-06, "loss": 15.1606, "step": 404660 }, { "epoch": 0.8174590028159682, "grad_norm": 595.5794677734375, "learning_rate": 1.067048592991715e-06, "loss": 16.7449, "step": 404670 }, { "epoch": 0.817479203448652, "grad_norm": 312.82275390625, "learning_rate": 1.0668330630403383e-06, "loss": 21.7909, "step": 404680 }, { "epoch": 0.8174994040813358, "grad_norm": 646.6598510742188, "learning_rate": 1.0666175522587402e-06, "loss": 14.1596, "step": 404690 }, { "epoch": 0.8175196047140196, "grad_norm": 343.3512268066406, "learning_rate": 1.0664020606479702e-06, "loss": 18.2318, "step": 404700 }, { "epoch": 0.8175398053467035, "grad_norm": 286.7987060546875, "learning_rate": 1.0661865882090805e-06, "loss": 14.2659, "step": 404710 }, { "epoch": 0.8175600059793873, "grad_norm": 522.711181640625, "learning_rate": 1.0659711349431184e-06, "loss": 16.0903, "step": 404720 }, { "epoch": 0.8175802066120711, "grad_norm": 211.34999084472656, "learning_rate": 1.0657557008511377e-06, "loss": 15.7913, "step": 404730 }, { "epoch": 0.8176004072447549, "grad_norm": 293.5294494628906, "learning_rate": 1.0655402859341868e-06, "loss": 21.6211, "step": 404740 }, { "epoch": 0.8176206078774387, "grad_norm": 131.2279815673828, "learning_rate": 1.065324890193314e-06, "loss": 14.5233, "step": 404750 }, { "epoch": 0.8176408085101226, "grad_norm": 132.0124969482422, "learning_rate": 1.0651095136295713e-06, "loss": 26.1818, "step": 404760 }, { "epoch": 0.8176610091428064, "grad_norm": 258.7015686035156, "learning_rate": 1.064894156244008e-06, "loss": 60.9527, "step": 404770 }, { "epoch": 0.8176812097754902, "grad_norm": 0.49908149242401123, "learning_rate": 1.0646788180376716e-06, "loss": 18.8364, "step": 404780 }, { "epoch": 0.817701410408174, "grad_norm": 522.22265625, "learning_rate": 1.0644634990116132e-06, "loss": 17.0823, "step": 404790 }, { "epoch": 0.8177216110408578, "grad_norm": 166.57650756835938, "learning_rate": 1.064248199166884e-06, "loss": 20.4954, "step": 404800 }, { "epoch": 0.8177418116735417, "grad_norm": 406.01080322265625, "learning_rate": 1.0640329185045323e-06, "loss": 24.5166, "step": 404810 }, { "epoch": 0.8177620123062255, "grad_norm": 643.4286499023438, "learning_rate": 1.0638176570256048e-06, "loss": 16.3062, "step": 404820 }, { "epoch": 0.8177822129389093, "grad_norm": 501.24493408203125, "learning_rate": 1.0636024147311524e-06, "loss": 34.6792, "step": 404830 }, { "epoch": 0.8178024135715931, "grad_norm": 317.7840270996094, "learning_rate": 1.0633871916222277e-06, "loss": 17.0821, "step": 404840 }, { "epoch": 0.8178226142042769, "grad_norm": 754.8977661132812, "learning_rate": 1.0631719876998736e-06, "loss": 13.5358, "step": 404850 }, { "epoch": 0.8178428148369606, "grad_norm": 312.731201171875, "learning_rate": 1.0629568029651416e-06, "loss": 8.0272, "step": 404860 }, { "epoch": 0.8178630154696445, "grad_norm": 443.60174560546875, "learning_rate": 1.0627416374190818e-06, "loss": 18.4988, "step": 404870 }, { "epoch": 0.8178832161023283, "grad_norm": 319.082275390625, "learning_rate": 1.062526491062742e-06, "loss": 14.2685, "step": 404880 }, { "epoch": 0.8179034167350121, "grad_norm": 471.2208251953125, "learning_rate": 1.0623113638971688e-06, "loss": 19.6446, "step": 404890 }, { "epoch": 0.8179236173676959, "grad_norm": 170.76010131835938, "learning_rate": 1.0620962559234144e-06, "loss": 6.4314, "step": 404900 }, { "epoch": 0.8179438180003797, "grad_norm": 688.2703857421875, "learning_rate": 1.0618811671425244e-06, "loss": 20.1725, "step": 404910 }, { "epoch": 0.8179640186330636, "grad_norm": 306.8644714355469, "learning_rate": 1.0616660975555476e-06, "loss": 11.3003, "step": 404920 }, { "epoch": 0.8179842192657474, "grad_norm": 526.6558227539062, "learning_rate": 1.0614510471635332e-06, "loss": 21.5544, "step": 404930 }, { "epoch": 0.8180044198984312, "grad_norm": 33.974609375, "learning_rate": 1.0612360159675278e-06, "loss": 17.053, "step": 404940 }, { "epoch": 0.818024620531115, "grad_norm": 545.7177124023438, "learning_rate": 1.0610210039685815e-06, "loss": 42.8789, "step": 404950 }, { "epoch": 0.8180448211637988, "grad_norm": 299.2607727050781, "learning_rate": 1.0608060111677409e-06, "loss": 24.8136, "step": 404960 }, { "epoch": 0.8180650217964827, "grad_norm": 329.0627136230469, "learning_rate": 1.0605910375660527e-06, "loss": 23.0713, "step": 404970 }, { "epoch": 0.8180852224291665, "grad_norm": 301.731201171875, "learning_rate": 1.0603760831645677e-06, "loss": 20.3733, "step": 404980 }, { "epoch": 0.8181054230618503, "grad_norm": 68.4891586303711, "learning_rate": 1.0601611479643303e-06, "loss": 24.1063, "step": 404990 }, { "epoch": 0.8181256236945341, "grad_norm": 294.00238037109375, "learning_rate": 1.0599462319663906e-06, "loss": 12.5412, "step": 405000 }, { "epoch": 0.8181458243272179, "grad_norm": 380.47210693359375, "learning_rate": 1.0597313351717942e-06, "loss": 24.804, "step": 405010 }, { "epoch": 0.8181660249599018, "grad_norm": 228.90867614746094, "learning_rate": 1.0595164575815909e-06, "loss": 24.4986, "step": 405020 }, { "epoch": 0.8181862255925856, "grad_norm": 764.163818359375, "learning_rate": 1.0593015991968258e-06, "loss": 17.5265, "step": 405030 }, { "epoch": 0.8182064262252694, "grad_norm": 176.84999084472656, "learning_rate": 1.0590867600185462e-06, "loss": 16.077, "step": 405040 }, { "epoch": 0.8182266268579532, "grad_norm": 328.25531005859375, "learning_rate": 1.0588719400478004e-06, "loss": 24.8989, "step": 405050 }, { "epoch": 0.818246827490637, "grad_norm": 22.21516227722168, "learning_rate": 1.0586571392856354e-06, "loss": 13.0133, "step": 405060 }, { "epoch": 0.8182670281233209, "grad_norm": 229.92356872558594, "learning_rate": 1.0584423577330955e-06, "loss": 16.3715, "step": 405070 }, { "epoch": 0.8182872287560047, "grad_norm": 277.6341857910156, "learning_rate": 1.0582275953912296e-06, "loss": 19.9553, "step": 405080 }, { "epoch": 0.8183074293886885, "grad_norm": 303.4682922363281, "learning_rate": 1.0580128522610872e-06, "loss": 10.4468, "step": 405090 }, { "epoch": 0.8183276300213723, "grad_norm": 276.90863037109375, "learning_rate": 1.0577981283437095e-06, "loss": 27.1738, "step": 405100 }, { "epoch": 0.818347830654056, "grad_norm": 449.3738098144531, "learning_rate": 1.0575834236401455e-06, "loss": 16.6618, "step": 405110 }, { "epoch": 0.8183680312867398, "grad_norm": 418.2468566894531, "learning_rate": 1.057368738151443e-06, "loss": 18.0085, "step": 405120 }, { "epoch": 0.8183882319194237, "grad_norm": 276.5181579589844, "learning_rate": 1.0571540718786471e-06, "loss": 21.9944, "step": 405130 }, { "epoch": 0.8184084325521075, "grad_norm": 215.91197204589844, "learning_rate": 1.0569394248228026e-06, "loss": 22.492, "step": 405140 }, { "epoch": 0.8184286331847913, "grad_norm": 284.6844787597656, "learning_rate": 1.0567247969849576e-06, "loss": 19.5159, "step": 405150 }, { "epoch": 0.8184488338174751, "grad_norm": 509.63507080078125, "learning_rate": 1.05651018836616e-06, "loss": 13.3617, "step": 405160 }, { "epoch": 0.818469034450159, "grad_norm": 59.847103118896484, "learning_rate": 1.0562955989674506e-06, "loss": 16.318, "step": 405170 }, { "epoch": 0.8184892350828428, "grad_norm": 612.8087768554688, "learning_rate": 1.0560810287898783e-06, "loss": 17.2715, "step": 405180 }, { "epoch": 0.8185094357155266, "grad_norm": 515.5489501953125, "learning_rate": 1.05586647783449e-06, "loss": 10.0786, "step": 405190 }, { "epoch": 0.8185296363482104, "grad_norm": 211.81011962890625, "learning_rate": 1.0556519461023301e-06, "loss": 10.0412, "step": 405200 }, { "epoch": 0.8185498369808942, "grad_norm": 277.3237609863281, "learning_rate": 1.0554374335944429e-06, "loss": 21.2124, "step": 405210 }, { "epoch": 0.818570037613578, "grad_norm": 16.84105682373047, "learning_rate": 1.055222940311877e-06, "loss": 13.1539, "step": 405220 }, { "epoch": 0.8185902382462619, "grad_norm": 501.70941162109375, "learning_rate": 1.0550084662556753e-06, "loss": 18.1266, "step": 405230 }, { "epoch": 0.8186104388789457, "grad_norm": 722.0267944335938, "learning_rate": 1.0547940114268828e-06, "loss": 27.4088, "step": 405240 }, { "epoch": 0.8186306395116295, "grad_norm": 510.4221496582031, "learning_rate": 1.0545795758265476e-06, "loss": 18.9063, "step": 405250 }, { "epoch": 0.8186508401443133, "grad_norm": 469.6507873535156, "learning_rate": 1.0543651594557113e-06, "loss": 23.375, "step": 405260 }, { "epoch": 0.8186710407769971, "grad_norm": 726.9376831054688, "learning_rate": 1.0541507623154218e-06, "loss": 18.6523, "step": 405270 }, { "epoch": 0.818691241409681, "grad_norm": 549.9287719726562, "learning_rate": 1.0539363844067218e-06, "loss": 12.9811, "step": 405280 }, { "epoch": 0.8187114420423648, "grad_norm": 445.047119140625, "learning_rate": 1.053722025730659e-06, "loss": 22.0171, "step": 405290 }, { "epoch": 0.8187316426750486, "grad_norm": 396.04705810546875, "learning_rate": 1.053507686288276e-06, "loss": 13.1831, "step": 405300 }, { "epoch": 0.8187518433077324, "grad_norm": 203.05210876464844, "learning_rate": 1.0532933660806166e-06, "loss": 15.9318, "step": 405310 }, { "epoch": 0.8187720439404162, "grad_norm": 362.0765686035156, "learning_rate": 1.053079065108728e-06, "loss": 18.4131, "step": 405320 }, { "epoch": 0.8187922445731001, "grad_norm": 342.76129150390625, "learning_rate": 1.0528647833736516e-06, "loss": 7.8498, "step": 405330 }, { "epoch": 0.8188124452057839, "grad_norm": 172.86709594726562, "learning_rate": 1.0526505208764353e-06, "loss": 14.6202, "step": 405340 }, { "epoch": 0.8188326458384677, "grad_norm": 442.9986572265625, "learning_rate": 1.052436277618122e-06, "loss": 19.5566, "step": 405350 }, { "epoch": 0.8188528464711515, "grad_norm": 801.3681030273438, "learning_rate": 1.0522220535997534e-06, "loss": 12.3308, "step": 405360 }, { "epoch": 0.8188730471038352, "grad_norm": 31.05193519592285, "learning_rate": 1.0520078488223772e-06, "loss": 19.1345, "step": 405370 }, { "epoch": 0.8188932477365191, "grad_norm": 49.77863311767578, "learning_rate": 1.0517936632870362e-06, "loss": 15.6791, "step": 405380 }, { "epoch": 0.8189134483692029, "grad_norm": 21.847225189208984, "learning_rate": 1.0515794969947724e-06, "loss": 11.8785, "step": 405390 }, { "epoch": 0.8189336490018867, "grad_norm": 493.5738525390625, "learning_rate": 1.0513653499466315e-06, "loss": 13.4028, "step": 405400 }, { "epoch": 0.8189538496345705, "grad_norm": 441.8828125, "learning_rate": 1.0511512221436581e-06, "loss": 14.2989, "step": 405410 }, { "epoch": 0.8189740502672543, "grad_norm": 462.55963134765625, "learning_rate": 1.0509371135868945e-06, "loss": 14.1361, "step": 405420 }, { "epoch": 0.8189942508999382, "grad_norm": 739.9083862304688, "learning_rate": 1.0507230242773836e-06, "loss": 29.2888, "step": 405430 }, { "epoch": 0.819014451532622, "grad_norm": 546.8694458007812, "learning_rate": 1.0505089542161707e-06, "loss": 16.9196, "step": 405440 }, { "epoch": 0.8190346521653058, "grad_norm": 196.59988403320312, "learning_rate": 1.0502949034042985e-06, "loss": 20.0, "step": 405450 }, { "epoch": 0.8190548527979896, "grad_norm": 31.419471740722656, "learning_rate": 1.050080871842808e-06, "loss": 11.916, "step": 405460 }, { "epoch": 0.8190750534306734, "grad_norm": 297.56781005859375, "learning_rate": 1.0498668595327448e-06, "loss": 10.3404, "step": 405470 }, { "epoch": 0.8190952540633573, "grad_norm": 281.243896484375, "learning_rate": 1.0496528664751527e-06, "loss": 11.0291, "step": 405480 }, { "epoch": 0.8191154546960411, "grad_norm": 215.0363311767578, "learning_rate": 1.049438892671073e-06, "loss": 8.4872, "step": 405490 }, { "epoch": 0.8191356553287249, "grad_norm": 351.2985534667969, "learning_rate": 1.049224938121548e-06, "loss": 11.865, "step": 405500 }, { "epoch": 0.8191558559614087, "grad_norm": 323.0808410644531, "learning_rate": 1.049011002827623e-06, "loss": 28.4416, "step": 405510 }, { "epoch": 0.8191760565940925, "grad_norm": 647.8573608398438, "learning_rate": 1.0487970867903385e-06, "loss": 14.6466, "step": 405520 }, { "epoch": 0.8191962572267764, "grad_norm": 557.99072265625, "learning_rate": 1.0485831900107368e-06, "loss": 21.3212, "step": 405530 }, { "epoch": 0.8192164578594602, "grad_norm": 197.75636291503906, "learning_rate": 1.0483693124898631e-06, "loss": 21.2272, "step": 405540 }, { "epoch": 0.819236658492144, "grad_norm": 474.7149353027344, "learning_rate": 1.0481554542287565e-06, "loss": 18.4791, "step": 405550 }, { "epoch": 0.8192568591248278, "grad_norm": 1080.687255859375, "learning_rate": 1.0479416152284622e-06, "loss": 20.9776, "step": 405560 }, { "epoch": 0.8192770597575116, "grad_norm": 157.8067626953125, "learning_rate": 1.0477277954900194e-06, "loss": 20.7023, "step": 405570 }, { "epoch": 0.8192972603901955, "grad_norm": 242.31314086914062, "learning_rate": 1.047513995014474e-06, "loss": 14.757, "step": 405580 }, { "epoch": 0.8193174610228793, "grad_norm": 126.24282836914062, "learning_rate": 1.0473002138028654e-06, "loss": 25.1427, "step": 405590 }, { "epoch": 0.8193376616555631, "grad_norm": 13.731419563293457, "learning_rate": 1.047086451856235e-06, "loss": 6.8258, "step": 405600 }, { "epoch": 0.8193578622882469, "grad_norm": 394.7142333984375, "learning_rate": 1.0468727091756275e-06, "loss": 19.3574, "step": 405610 }, { "epoch": 0.8193780629209307, "grad_norm": 895.7384643554688, "learning_rate": 1.0466589857620813e-06, "loss": 19.1313, "step": 405620 }, { "epoch": 0.8193982635536144, "grad_norm": 374.3149719238281, "learning_rate": 1.0464452816166416e-06, "loss": 17.0161, "step": 405630 }, { "epoch": 0.8194184641862983, "grad_norm": 1000.359130859375, "learning_rate": 1.0462315967403475e-06, "loss": 22.6076, "step": 405640 }, { "epoch": 0.8194386648189821, "grad_norm": 472.3176574707031, "learning_rate": 1.0460179311342394e-06, "loss": 18.4308, "step": 405650 }, { "epoch": 0.8194588654516659, "grad_norm": 121.66295623779297, "learning_rate": 1.0458042847993627e-06, "loss": 27.6165, "step": 405660 }, { "epoch": 0.8194790660843497, "grad_norm": 315.0002746582031, "learning_rate": 1.0455906577367553e-06, "loss": 17.5618, "step": 405670 }, { "epoch": 0.8194992667170335, "grad_norm": 223.4075927734375, "learning_rate": 1.0453770499474585e-06, "loss": 21.8781, "step": 405680 }, { "epoch": 0.8195194673497174, "grad_norm": 386.3526611328125, "learning_rate": 1.0451634614325146e-06, "loss": 15.8846, "step": 405690 }, { "epoch": 0.8195396679824012, "grad_norm": 223.708984375, "learning_rate": 1.0449498921929669e-06, "loss": 13.9674, "step": 405700 }, { "epoch": 0.819559868615085, "grad_norm": 194.04403686523438, "learning_rate": 1.0447363422298507e-06, "loss": 19.5599, "step": 405710 }, { "epoch": 0.8195800692477688, "grad_norm": 119.22209930419922, "learning_rate": 1.0445228115442102e-06, "loss": 11.4453, "step": 405720 }, { "epoch": 0.8196002698804526, "grad_norm": 268.7986145019531, "learning_rate": 1.044309300137087e-06, "loss": 12.7573, "step": 405730 }, { "epoch": 0.8196204705131365, "grad_norm": 89.77639770507812, "learning_rate": 1.0440958080095204e-06, "loss": 10.0482, "step": 405740 }, { "epoch": 0.8196406711458203, "grad_norm": 539.5626220703125, "learning_rate": 1.04388233516255e-06, "loss": 19.2888, "step": 405750 }, { "epoch": 0.8196608717785041, "grad_norm": 179.42356872558594, "learning_rate": 1.0436688815972168e-06, "loss": 17.396, "step": 405760 }, { "epoch": 0.8196810724111879, "grad_norm": 503.5247802734375, "learning_rate": 1.0434554473145646e-06, "loss": 16.8302, "step": 405770 }, { "epoch": 0.8197012730438717, "grad_norm": 389.95904541015625, "learning_rate": 1.0432420323156284e-06, "loss": 18.0237, "step": 405780 }, { "epoch": 0.8197214736765556, "grad_norm": 98.8677978515625, "learning_rate": 1.0430286366014496e-06, "loss": 17.0766, "step": 405790 }, { "epoch": 0.8197416743092394, "grad_norm": 269.2354431152344, "learning_rate": 1.0428152601730718e-06, "loss": 25.5945, "step": 405800 }, { "epoch": 0.8197618749419232, "grad_norm": 229.5015106201172, "learning_rate": 1.0426019030315314e-06, "loss": 15.0495, "step": 405810 }, { "epoch": 0.819782075574607, "grad_norm": 477.2650146484375, "learning_rate": 1.0423885651778688e-06, "loss": 8.8372, "step": 405820 }, { "epoch": 0.8198022762072908, "grad_norm": 599.321533203125, "learning_rate": 1.0421752466131258e-06, "loss": 19.5767, "step": 405830 }, { "epoch": 0.8198224768399747, "grad_norm": 481.2432556152344, "learning_rate": 1.0419619473383402e-06, "loss": 23.8262, "step": 405840 }, { "epoch": 0.8198426774726585, "grad_norm": 330.3309631347656, "learning_rate": 1.0417486673545508e-06, "loss": 16.1757, "step": 405850 }, { "epoch": 0.8198628781053423, "grad_norm": 614.2745971679688, "learning_rate": 1.0415354066627993e-06, "loss": 10.8526, "step": 405860 }, { "epoch": 0.8198830787380261, "grad_norm": 240.1322479248047, "learning_rate": 1.041322165264123e-06, "loss": 23.1328, "step": 405870 }, { "epoch": 0.8199032793707098, "grad_norm": 213.60418701171875, "learning_rate": 1.0411089431595639e-06, "loss": 26.3818, "step": 405880 }, { "epoch": 0.8199234800033937, "grad_norm": 461.4310607910156, "learning_rate": 1.040895740350158e-06, "loss": 20.5856, "step": 405890 }, { "epoch": 0.8199436806360775, "grad_norm": 249.21217346191406, "learning_rate": 1.0406825568369478e-06, "loss": 12.6341, "step": 405900 }, { "epoch": 0.8199638812687613, "grad_norm": 519.0851440429688, "learning_rate": 1.0404693926209702e-06, "loss": 17.4954, "step": 405910 }, { "epoch": 0.8199840819014451, "grad_norm": 582.5599975585938, "learning_rate": 1.0402562477032635e-06, "loss": 25.6814, "step": 405920 }, { "epoch": 0.8200042825341289, "grad_norm": 310.9371337890625, "learning_rate": 1.0400431220848688e-06, "loss": 25.2741, "step": 405930 }, { "epoch": 0.8200244831668128, "grad_norm": 643.0136108398438, "learning_rate": 1.0398300157668222e-06, "loss": 19.6303, "step": 405940 }, { "epoch": 0.8200446837994966, "grad_norm": 202.9320526123047, "learning_rate": 1.0396169287501652e-06, "loss": 17.305, "step": 405950 }, { "epoch": 0.8200648844321804, "grad_norm": 605.00634765625, "learning_rate": 1.0394038610359352e-06, "loss": 19.8948, "step": 405960 }, { "epoch": 0.8200850850648642, "grad_norm": 91.46730041503906, "learning_rate": 1.0391908126251688e-06, "loss": 12.6257, "step": 405970 }, { "epoch": 0.820105285697548, "grad_norm": 1556.552734375, "learning_rate": 1.0389777835189075e-06, "loss": 16.1427, "step": 405980 }, { "epoch": 0.8201254863302319, "grad_norm": 220.1592254638672, "learning_rate": 1.0387647737181877e-06, "loss": 15.6865, "step": 405990 }, { "epoch": 0.8201456869629157, "grad_norm": 589.5980834960938, "learning_rate": 1.0385517832240472e-06, "loss": 19.6536, "step": 406000 }, { "epoch": 0.8201658875955995, "grad_norm": 489.4096374511719, "learning_rate": 1.0383388120375242e-06, "loss": 19.5343, "step": 406010 }, { "epoch": 0.8201860882282833, "grad_norm": 311.42254638671875, "learning_rate": 1.0381258601596594e-06, "loss": 9.4658, "step": 406020 }, { "epoch": 0.8202062888609671, "grad_norm": 356.68560791015625, "learning_rate": 1.0379129275914878e-06, "loss": 27.3836, "step": 406030 }, { "epoch": 0.820226489493651, "grad_norm": 298.08416748046875, "learning_rate": 1.037700014334047e-06, "loss": 21.9449, "step": 406040 }, { "epoch": 0.8202466901263348, "grad_norm": 156.88995361328125, "learning_rate": 1.0374871203883774e-06, "loss": 13.8985, "step": 406050 }, { "epoch": 0.8202668907590186, "grad_norm": 187.8990478515625, "learning_rate": 1.0372742457555151e-06, "loss": 10.9844, "step": 406060 }, { "epoch": 0.8202870913917024, "grad_norm": 527.1951904296875, "learning_rate": 1.0370613904364957e-06, "loss": 13.1193, "step": 406070 }, { "epoch": 0.8203072920243862, "grad_norm": 364.440185546875, "learning_rate": 1.0368485544323586e-06, "loss": 17.7838, "step": 406080 }, { "epoch": 0.8203274926570701, "grad_norm": 85.72505187988281, "learning_rate": 1.0366357377441427e-06, "loss": 14.7729, "step": 406090 }, { "epoch": 0.8203476932897539, "grad_norm": 212.0575408935547, "learning_rate": 1.036422940372883e-06, "loss": 18.9541, "step": 406100 }, { "epoch": 0.8203678939224377, "grad_norm": 139.4064178466797, "learning_rate": 1.0362101623196158e-06, "loss": 8.1081, "step": 406110 }, { "epoch": 0.8203880945551215, "grad_norm": 284.49493408203125, "learning_rate": 1.0359974035853814e-06, "loss": 15.8686, "step": 406120 }, { "epoch": 0.8204082951878053, "grad_norm": 494.59967041015625, "learning_rate": 1.0357846641712143e-06, "loss": 7.0495, "step": 406130 }, { "epoch": 0.820428495820489, "grad_norm": 355.716064453125, "learning_rate": 1.0355719440781508e-06, "loss": 12.4157, "step": 406140 }, { "epoch": 0.8204486964531729, "grad_norm": 434.7909240722656, "learning_rate": 1.0353592433072302e-06, "loss": 25.2197, "step": 406150 }, { "epoch": 0.8204688970858567, "grad_norm": 312.6859130859375, "learning_rate": 1.035146561859487e-06, "loss": 21.4402, "step": 406160 }, { "epoch": 0.8204890977185405, "grad_norm": 82.07292938232422, "learning_rate": 1.0349338997359593e-06, "loss": 15.2223, "step": 406170 }, { "epoch": 0.8205092983512243, "grad_norm": 236.8374786376953, "learning_rate": 1.0347212569376814e-06, "loss": 15.4553, "step": 406180 }, { "epoch": 0.8205294989839081, "grad_norm": 140.76976013183594, "learning_rate": 1.0345086334656929e-06, "loss": 16.7068, "step": 406190 }, { "epoch": 0.820549699616592, "grad_norm": 12.717177391052246, "learning_rate": 1.0342960293210281e-06, "loss": 19.1874, "step": 406200 }, { "epoch": 0.8205699002492758, "grad_norm": 265.7485046386719, "learning_rate": 1.034083444504722e-06, "loss": 12.1559, "step": 406210 }, { "epoch": 0.8205901008819596, "grad_norm": 134.8882598876953, "learning_rate": 1.0338708790178136e-06, "loss": 25.6306, "step": 406220 }, { "epoch": 0.8206103015146434, "grad_norm": 183.5823211669922, "learning_rate": 1.0336583328613364e-06, "loss": 29.2117, "step": 406230 }, { "epoch": 0.8206305021473272, "grad_norm": 452.5019226074219, "learning_rate": 1.0334458060363289e-06, "loss": 18.6306, "step": 406240 }, { "epoch": 0.8206507027800111, "grad_norm": 338.053955078125, "learning_rate": 1.0332332985438248e-06, "loss": 11.713, "step": 406250 }, { "epoch": 0.8206709034126949, "grad_norm": 416.09368896484375, "learning_rate": 1.03302081038486e-06, "loss": 14.9481, "step": 406260 }, { "epoch": 0.8206911040453787, "grad_norm": 260.2134094238281, "learning_rate": 1.032808341560471e-06, "loss": 29.0405, "step": 406270 }, { "epoch": 0.8207113046780625, "grad_norm": 779.87646484375, "learning_rate": 1.032595892071694e-06, "loss": 35.7845, "step": 406280 }, { "epoch": 0.8207315053107463, "grad_norm": 0.04122190177440643, "learning_rate": 1.0323834619195617e-06, "loss": 9.071, "step": 406290 }, { "epoch": 0.8207517059434302, "grad_norm": 116.37691497802734, "learning_rate": 1.0321710511051108e-06, "loss": 14.0588, "step": 406300 }, { "epoch": 0.820771906576114, "grad_norm": 404.05853271484375, "learning_rate": 1.03195865962938e-06, "loss": 9.1866, "step": 406310 }, { "epoch": 0.8207921072087978, "grad_norm": 256.5055847167969, "learning_rate": 1.0317462874933987e-06, "loss": 26.4847, "step": 406320 }, { "epoch": 0.8208123078414816, "grad_norm": 599.2551879882812, "learning_rate": 1.0315339346982044e-06, "loss": 30.4853, "step": 406330 }, { "epoch": 0.8208325084741654, "grad_norm": 187.8599395751953, "learning_rate": 1.0313216012448341e-06, "loss": 25.9453, "step": 406340 }, { "epoch": 0.8208527091068493, "grad_norm": 612.1749267578125, "learning_rate": 1.0311092871343209e-06, "loss": 24.1508, "step": 406350 }, { "epoch": 0.8208729097395331, "grad_norm": 468.5676574707031, "learning_rate": 1.0308969923676987e-06, "loss": 21.0865, "step": 406360 }, { "epoch": 0.8208931103722169, "grad_norm": 348.3234558105469, "learning_rate": 1.0306847169460028e-06, "loss": 21.1996, "step": 406370 }, { "epoch": 0.8209133110049007, "grad_norm": 1181.8826904296875, "learning_rate": 1.0304724608702704e-06, "loss": 14.7764, "step": 406380 }, { "epoch": 0.8209335116375844, "grad_norm": 238.29066467285156, "learning_rate": 1.0302602241415316e-06, "loss": 12.5135, "step": 406390 }, { "epoch": 0.8209537122702683, "grad_norm": 268.7695617675781, "learning_rate": 1.0300480067608232e-06, "loss": 11.5064, "step": 406400 }, { "epoch": 0.8209739129029521, "grad_norm": 123.91963958740234, "learning_rate": 1.0298358087291803e-06, "loss": 20.9362, "step": 406410 }, { "epoch": 0.8209941135356359, "grad_norm": 407.3598327636719, "learning_rate": 1.0296236300476359e-06, "loss": 12.349, "step": 406420 }, { "epoch": 0.8210143141683197, "grad_norm": 548.6702270507812, "learning_rate": 1.0294114707172236e-06, "loss": 15.9261, "step": 406430 }, { "epoch": 0.8210345148010035, "grad_norm": 5.207859992980957, "learning_rate": 1.0291993307389792e-06, "loss": 18.7373, "step": 406440 }, { "epoch": 0.8210547154336874, "grad_norm": 723.1630249023438, "learning_rate": 1.0289872101139359e-06, "loss": 16.8577, "step": 406450 }, { "epoch": 0.8210749160663712, "grad_norm": 338.55328369140625, "learning_rate": 1.0287751088431257e-06, "loss": 12.0091, "step": 406460 }, { "epoch": 0.821095116699055, "grad_norm": 458.1700744628906, "learning_rate": 1.028563026927586e-06, "loss": 15.2878, "step": 406470 }, { "epoch": 0.8211153173317388, "grad_norm": 559.7627563476562, "learning_rate": 1.0283509643683464e-06, "loss": 14.1064, "step": 406480 }, { "epoch": 0.8211355179644226, "grad_norm": 144.1278533935547, "learning_rate": 1.0281389211664439e-06, "loss": 16.2972, "step": 406490 }, { "epoch": 0.8211557185971065, "grad_norm": 167.664306640625, "learning_rate": 1.0279268973229089e-06, "loss": 16.3451, "step": 406500 }, { "epoch": 0.8211759192297903, "grad_norm": 10.996707916259766, "learning_rate": 1.0277148928387788e-06, "loss": 13.8368, "step": 406510 }, { "epoch": 0.8211961198624741, "grad_norm": 299.57427978515625, "learning_rate": 1.0275029077150838e-06, "loss": 24.8445, "step": 406520 }, { "epoch": 0.8212163204951579, "grad_norm": 190.02394104003906, "learning_rate": 1.0272909419528565e-06, "loss": 20.3937, "step": 406530 }, { "epoch": 0.8212365211278417, "grad_norm": 256.25341796875, "learning_rate": 1.0270789955531329e-06, "loss": 12.2424, "step": 406540 }, { "epoch": 0.8212567217605256, "grad_norm": 608.8441162109375, "learning_rate": 1.026867068516943e-06, "loss": 20.8768, "step": 406550 }, { "epoch": 0.8212769223932094, "grad_norm": 897.0269775390625, "learning_rate": 1.0266551608453224e-06, "loss": 21.9482, "step": 406560 }, { "epoch": 0.8212971230258932, "grad_norm": 308.4391784667969, "learning_rate": 1.0264432725393026e-06, "loss": 25.2127, "step": 406570 }, { "epoch": 0.821317323658577, "grad_norm": 269.1169738769531, "learning_rate": 1.026231403599915e-06, "loss": 36.8312, "step": 406580 }, { "epoch": 0.8213375242912608, "grad_norm": 422.3494873046875, "learning_rate": 1.0260195540281948e-06, "loss": 28.029, "step": 406590 }, { "epoch": 0.8213577249239447, "grad_norm": 170.7576904296875, "learning_rate": 1.0258077238251735e-06, "loss": 9.9392, "step": 406600 }, { "epoch": 0.8213779255566285, "grad_norm": 902.087158203125, "learning_rate": 1.0255959129918825e-06, "loss": 26.662, "step": 406610 }, { "epoch": 0.8213981261893123, "grad_norm": 314.4224548339844, "learning_rate": 1.0253841215293541e-06, "loss": 25.7107, "step": 406620 }, { "epoch": 0.8214183268219961, "grad_norm": 343.95343017578125, "learning_rate": 1.0251723494386234e-06, "loss": 22.3633, "step": 406630 }, { "epoch": 0.8214385274546799, "grad_norm": 878.520263671875, "learning_rate": 1.0249605967207204e-06, "loss": 16.6829, "step": 406640 }, { "epoch": 0.8214587280873636, "grad_norm": 477.63262939453125, "learning_rate": 1.0247488633766756e-06, "loss": 20.8903, "step": 406650 }, { "epoch": 0.8214789287200475, "grad_norm": 558.3587646484375, "learning_rate": 1.0245371494075246e-06, "loss": 13.331, "step": 406660 }, { "epoch": 0.8214991293527313, "grad_norm": 383.0649108886719, "learning_rate": 1.0243254548142973e-06, "loss": 26.3895, "step": 406670 }, { "epoch": 0.8215193299854151, "grad_norm": 453.21136474609375, "learning_rate": 1.0241137795980239e-06, "loss": 29.1606, "step": 406680 }, { "epoch": 0.8215395306180989, "grad_norm": 252.2771453857422, "learning_rate": 1.023902123759738e-06, "loss": 24.245, "step": 406690 }, { "epoch": 0.8215597312507827, "grad_norm": 376.6198425292969, "learning_rate": 1.0236904873004722e-06, "loss": 22.271, "step": 406700 }, { "epoch": 0.8215799318834666, "grad_norm": 324.5942687988281, "learning_rate": 1.023478870221256e-06, "loss": 15.6521, "step": 406710 }, { "epoch": 0.8216001325161504, "grad_norm": 809.4425048828125, "learning_rate": 1.0232672725231213e-06, "loss": 22.5213, "step": 406720 }, { "epoch": 0.8216203331488342, "grad_norm": 568.722412109375, "learning_rate": 1.0230556942071002e-06, "loss": 23.1406, "step": 406730 }, { "epoch": 0.821640533781518, "grad_norm": 170.7123565673828, "learning_rate": 1.0228441352742236e-06, "loss": 9.9331, "step": 406740 }, { "epoch": 0.8216607344142018, "grad_norm": 430.9437255859375, "learning_rate": 1.0226325957255207e-06, "loss": 19.6494, "step": 406750 }, { "epoch": 0.8216809350468857, "grad_norm": 935.859619140625, "learning_rate": 1.0224210755620257e-06, "loss": 28.973, "step": 406760 }, { "epoch": 0.8217011356795695, "grad_norm": 383.5516052246094, "learning_rate": 1.0222095747847666e-06, "loss": 30.4782, "step": 406770 }, { "epoch": 0.8217213363122533, "grad_norm": 83.07652282714844, "learning_rate": 1.0219980933947772e-06, "loss": 23.0471, "step": 406780 }, { "epoch": 0.8217415369449371, "grad_norm": 255.9378662109375, "learning_rate": 1.0217866313930847e-06, "loss": 18.1824, "step": 406790 }, { "epoch": 0.8217617375776209, "grad_norm": 358.04937744140625, "learning_rate": 1.0215751887807228e-06, "loss": 26.3627, "step": 406800 }, { "epoch": 0.8217819382103048, "grad_norm": 1221.672119140625, "learning_rate": 1.0213637655587216e-06, "loss": 37.2455, "step": 406810 }, { "epoch": 0.8218021388429886, "grad_norm": 171.9769287109375, "learning_rate": 1.0211523617281095e-06, "loss": 12.8814, "step": 406820 }, { "epoch": 0.8218223394756724, "grad_norm": 124.1884536743164, "learning_rate": 1.0209409772899192e-06, "loss": 16.5848, "step": 406830 }, { "epoch": 0.8218425401083562, "grad_norm": 113.08357238769531, "learning_rate": 1.0207296122451789e-06, "loss": 11.1415, "step": 406840 }, { "epoch": 0.82186274074104, "grad_norm": 601.0794677734375, "learning_rate": 1.020518266594921e-06, "loss": 18.2521, "step": 406850 }, { "epoch": 0.8218829413737239, "grad_norm": 130.7305908203125, "learning_rate": 1.0203069403401743e-06, "loss": 17.6013, "step": 406860 }, { "epoch": 0.8219031420064077, "grad_norm": 208.6722869873047, "learning_rate": 1.0200956334819678e-06, "loss": 21.3193, "step": 406870 }, { "epoch": 0.8219233426390915, "grad_norm": 19.291900634765625, "learning_rate": 1.0198843460213337e-06, "loss": 8.2939, "step": 406880 }, { "epoch": 0.8219435432717753, "grad_norm": 912.6710205078125, "learning_rate": 1.0196730779593006e-06, "loss": 20.773, "step": 406890 }, { "epoch": 0.8219637439044591, "grad_norm": 477.4142150878906, "learning_rate": 1.0194618292968972e-06, "loss": 26.0923, "step": 406900 }, { "epoch": 0.8219839445371429, "grad_norm": 184.98167419433594, "learning_rate": 1.0192506000351532e-06, "loss": 17.0479, "step": 406910 }, { "epoch": 0.8220041451698267, "grad_norm": 34.30686569213867, "learning_rate": 1.0190393901751023e-06, "loss": 17.5799, "step": 406920 }, { "epoch": 0.8220243458025105, "grad_norm": 249.46302795410156, "learning_rate": 1.0188281997177679e-06, "loss": 19.1335, "step": 406930 }, { "epoch": 0.8220445464351943, "grad_norm": 379.62921142578125, "learning_rate": 1.0186170286641816e-06, "loss": 16.6195, "step": 406940 }, { "epoch": 0.8220647470678781, "grad_norm": 304.0983581542969, "learning_rate": 1.0184058770153748e-06, "loss": 13.9507, "step": 406950 }, { "epoch": 0.822084947700562, "grad_norm": 255.19227600097656, "learning_rate": 1.0181947447723744e-06, "loss": 20.8534, "step": 406960 }, { "epoch": 0.8221051483332458, "grad_norm": 155.5648651123047, "learning_rate": 1.017983631936209e-06, "loss": 15.6805, "step": 406970 }, { "epoch": 0.8221253489659296, "grad_norm": 0.8212962746620178, "learning_rate": 1.0177725385079084e-06, "loss": 8.0075, "step": 406980 }, { "epoch": 0.8221455495986134, "grad_norm": 299.80841064453125, "learning_rate": 1.017561464488504e-06, "loss": 14.4082, "step": 406990 }, { "epoch": 0.8221657502312972, "grad_norm": 185.31739807128906, "learning_rate": 1.0173504098790188e-06, "loss": 10.34, "step": 407000 }, { "epoch": 0.822185950863981, "grad_norm": 307.5030517578125, "learning_rate": 1.0171393746804854e-06, "loss": 17.1441, "step": 407010 }, { "epoch": 0.8222061514966649, "grad_norm": 243.54632568359375, "learning_rate": 1.0169283588939326e-06, "loss": 17.0271, "step": 407020 }, { "epoch": 0.8222263521293487, "grad_norm": 498.6639099121094, "learning_rate": 1.016717362520388e-06, "loss": 25.0322, "step": 407030 }, { "epoch": 0.8222465527620325, "grad_norm": 133.57798767089844, "learning_rate": 1.0165063855608786e-06, "loss": 6.8695, "step": 407040 }, { "epoch": 0.8222667533947163, "grad_norm": 505.4360656738281, "learning_rate": 1.016295428016435e-06, "loss": 22.3942, "step": 407050 }, { "epoch": 0.8222869540274002, "grad_norm": 1129.1591796875, "learning_rate": 1.0160844898880845e-06, "loss": 26.4678, "step": 407060 }, { "epoch": 0.822307154660084, "grad_norm": 519.4834594726562, "learning_rate": 1.0158735711768542e-06, "loss": 22.2843, "step": 407070 }, { "epoch": 0.8223273552927678, "grad_norm": 173.5504913330078, "learning_rate": 1.0156626718837737e-06, "loss": 28.651, "step": 407080 }, { "epoch": 0.8223475559254516, "grad_norm": 436.7134094238281, "learning_rate": 1.0154517920098682e-06, "loss": 11.499, "step": 407090 }, { "epoch": 0.8223677565581354, "grad_norm": 116.7005615234375, "learning_rate": 1.0152409315561696e-06, "loss": 9.7145, "step": 407100 }, { "epoch": 0.8223879571908193, "grad_norm": 642.4334106445312, "learning_rate": 1.0150300905237015e-06, "loss": 31.7975, "step": 407110 }, { "epoch": 0.8224081578235031, "grad_norm": 207.56675720214844, "learning_rate": 1.014819268913495e-06, "loss": 16.446, "step": 407120 }, { "epoch": 0.8224283584561869, "grad_norm": 400.88409423828125, "learning_rate": 1.0146084667265766e-06, "loss": 10.7764, "step": 407130 }, { "epoch": 0.8224485590888707, "grad_norm": 490.64605712890625, "learning_rate": 1.0143976839639713e-06, "loss": 17.8865, "step": 407140 }, { "epoch": 0.8224687597215545, "grad_norm": 617.2076416015625, "learning_rate": 1.0141869206267097e-06, "loss": 20.4385, "step": 407150 }, { "epoch": 0.8224889603542382, "grad_norm": 373.3833923339844, "learning_rate": 1.0139761767158158e-06, "loss": 17.1855, "step": 407160 }, { "epoch": 0.8225091609869221, "grad_norm": 1924.2100830078125, "learning_rate": 1.0137654522323204e-06, "loss": 25.3028, "step": 407170 }, { "epoch": 0.8225293616196059, "grad_norm": 202.76271057128906, "learning_rate": 1.0135547471772488e-06, "loss": 12.4462, "step": 407180 }, { "epoch": 0.8225495622522897, "grad_norm": 27.07765007019043, "learning_rate": 1.013344061551626e-06, "loss": 16.7884, "step": 407190 }, { "epoch": 0.8225697628849735, "grad_norm": 275.3519287109375, "learning_rate": 1.0131333953564825e-06, "loss": 10.6686, "step": 407200 }, { "epoch": 0.8225899635176573, "grad_norm": 603.3817749023438, "learning_rate": 1.0129227485928432e-06, "loss": 18.0398, "step": 407210 }, { "epoch": 0.8226101641503412, "grad_norm": 275.7763366699219, "learning_rate": 1.0127121212617335e-06, "loss": 10.4766, "step": 407220 }, { "epoch": 0.822630364783025, "grad_norm": 477.1412658691406, "learning_rate": 1.0125015133641813e-06, "loss": 21.3107, "step": 407230 }, { "epoch": 0.8226505654157088, "grad_norm": 323.2248840332031, "learning_rate": 1.0122909249012148e-06, "loss": 17.129, "step": 407240 }, { "epoch": 0.8226707660483926, "grad_norm": 165.0032196044922, "learning_rate": 1.0120803558738585e-06, "loss": 18.286, "step": 407250 }, { "epoch": 0.8226909666810764, "grad_norm": 537.7188110351562, "learning_rate": 1.0118698062831372e-06, "loss": 24.8882, "step": 407260 }, { "epoch": 0.8227111673137603, "grad_norm": 226.24362182617188, "learning_rate": 1.0116592761300804e-06, "loss": 9.4518, "step": 407270 }, { "epoch": 0.8227313679464441, "grad_norm": 288.0339660644531, "learning_rate": 1.0114487654157123e-06, "loss": 21.0934, "step": 407280 }, { "epoch": 0.8227515685791279, "grad_norm": 465.50848388671875, "learning_rate": 1.0112382741410582e-06, "loss": 21.5515, "step": 407290 }, { "epoch": 0.8227717692118117, "grad_norm": 187.2500457763672, "learning_rate": 1.0110278023071445e-06, "loss": 11.8522, "step": 407300 }, { "epoch": 0.8227919698444955, "grad_norm": 210.75131225585938, "learning_rate": 1.0108173499149991e-06, "loss": 16.1581, "step": 407310 }, { "epoch": 0.8228121704771794, "grad_norm": 0.0, "learning_rate": 1.0106069169656464e-06, "loss": 10.5133, "step": 407320 }, { "epoch": 0.8228323711098632, "grad_norm": 303.7751159667969, "learning_rate": 1.0103965034601098e-06, "loss": 19.7185, "step": 407330 }, { "epoch": 0.822852571742547, "grad_norm": 655.2921142578125, "learning_rate": 1.0101861093994182e-06, "loss": 22.9425, "step": 407340 }, { "epoch": 0.8228727723752308, "grad_norm": 270.5400390625, "learning_rate": 1.0099757347845957e-06, "loss": 20.7602, "step": 407350 }, { "epoch": 0.8228929730079146, "grad_norm": 644.845458984375, "learning_rate": 1.0097653796166662e-06, "loss": 21.5414, "step": 407360 }, { "epoch": 0.8229131736405985, "grad_norm": 206.19781494140625, "learning_rate": 1.0095550438966578e-06, "loss": 15.1997, "step": 407370 }, { "epoch": 0.8229333742732823, "grad_norm": 733.670654296875, "learning_rate": 1.009344727625593e-06, "loss": 21.8256, "step": 407380 }, { "epoch": 0.8229535749059661, "grad_norm": 148.4443817138672, "learning_rate": 1.0091344308044987e-06, "loss": 9.7204, "step": 407390 }, { "epoch": 0.8229737755386499, "grad_norm": 319.5944519042969, "learning_rate": 1.0089241534343986e-06, "loss": 14.368, "step": 407400 }, { "epoch": 0.8229939761713337, "grad_norm": 412.720458984375, "learning_rate": 1.008713895516319e-06, "loss": 19.7029, "step": 407410 }, { "epoch": 0.8230141768040174, "grad_norm": 248.20687866210938, "learning_rate": 1.0085036570512836e-06, "loss": 14.2003, "step": 407420 }, { "epoch": 0.8230343774367013, "grad_norm": 623.3434448242188, "learning_rate": 1.0082934380403159e-06, "loss": 29.236, "step": 407430 }, { "epoch": 0.8230545780693851, "grad_norm": 305.50140380859375, "learning_rate": 1.0080832384844437e-06, "loss": 27.633, "step": 407440 }, { "epoch": 0.8230747787020689, "grad_norm": 803.7622680664062, "learning_rate": 1.0078730583846879e-06, "loss": 18.9865, "step": 407450 }, { "epoch": 0.8230949793347527, "grad_norm": 426.3224182128906, "learning_rate": 1.0076628977420761e-06, "loss": 21.8781, "step": 407460 }, { "epoch": 0.8231151799674365, "grad_norm": 144.5908966064453, "learning_rate": 1.0074527565576308e-06, "loss": 8.5792, "step": 407470 }, { "epoch": 0.8231353806001204, "grad_norm": 185.98281860351562, "learning_rate": 1.0072426348323754e-06, "loss": 14.8563, "step": 407480 }, { "epoch": 0.8231555812328042, "grad_norm": 183.10203552246094, "learning_rate": 1.0070325325673364e-06, "loss": 31.4714, "step": 407490 }, { "epoch": 0.823175781865488, "grad_norm": 391.8500671386719, "learning_rate": 1.006822449763537e-06, "loss": 23.1074, "step": 407500 }, { "epoch": 0.8231959824981718, "grad_norm": 448.3106384277344, "learning_rate": 1.006612386421999e-06, "loss": 9.8672, "step": 407510 }, { "epoch": 0.8232161831308556, "grad_norm": 247.23480224609375, "learning_rate": 1.006402342543748e-06, "loss": 11.6359, "step": 407520 }, { "epoch": 0.8232363837635395, "grad_norm": 370.77362060546875, "learning_rate": 1.00619231812981e-06, "loss": 12.3722, "step": 407530 }, { "epoch": 0.8232565843962233, "grad_norm": 337.4403076171875, "learning_rate": 1.0059823131812035e-06, "loss": 10.3751, "step": 407540 }, { "epoch": 0.8232767850289071, "grad_norm": 332.5984802246094, "learning_rate": 1.0057723276989551e-06, "loss": 17.4446, "step": 407550 }, { "epoch": 0.8232969856615909, "grad_norm": 148.41029357910156, "learning_rate": 1.0055623616840893e-06, "loss": 12.2314, "step": 407560 }, { "epoch": 0.8233171862942747, "grad_norm": 298.4598083496094, "learning_rate": 1.0053524151376283e-06, "loss": 8.6461, "step": 407570 }, { "epoch": 0.8233373869269586, "grad_norm": 1214.2337646484375, "learning_rate": 1.005142488060593e-06, "loss": 31.857, "step": 407580 }, { "epoch": 0.8233575875596424, "grad_norm": 397.0372619628906, "learning_rate": 1.0049325804540094e-06, "loss": 21.9878, "step": 407590 }, { "epoch": 0.8233777881923262, "grad_norm": 315.916015625, "learning_rate": 1.0047226923189024e-06, "loss": 12.3157, "step": 407600 }, { "epoch": 0.82339798882501, "grad_norm": 73.71172332763672, "learning_rate": 1.0045128236562895e-06, "loss": 27.1555, "step": 407610 }, { "epoch": 0.8234181894576938, "grad_norm": 789.0155029296875, "learning_rate": 1.0043029744671967e-06, "loss": 14.7152, "step": 407620 }, { "epoch": 0.8234383900903777, "grad_norm": 311.51495361328125, "learning_rate": 1.004093144752648e-06, "loss": 13.1633, "step": 407630 }, { "epoch": 0.8234585907230615, "grad_norm": 305.6420593261719, "learning_rate": 1.0038833345136644e-06, "loss": 10.7144, "step": 407640 }, { "epoch": 0.8234787913557453, "grad_norm": 812.8828125, "learning_rate": 1.003673543751268e-06, "loss": 12.7567, "step": 407650 }, { "epoch": 0.8234989919884291, "grad_norm": 320.21844482421875, "learning_rate": 1.0034637724664832e-06, "loss": 13.4099, "step": 407660 }, { "epoch": 0.8235191926211128, "grad_norm": 289.0506591796875, "learning_rate": 1.0032540206603309e-06, "loss": 19.9731, "step": 407670 }, { "epoch": 0.8235393932537967, "grad_norm": 462.7651672363281, "learning_rate": 1.0030442883338325e-06, "loss": 21.0934, "step": 407680 }, { "epoch": 0.8235595938864805, "grad_norm": 349.7567443847656, "learning_rate": 1.0028345754880114e-06, "loss": 29.654, "step": 407690 }, { "epoch": 0.8235797945191643, "grad_norm": 237.719482421875, "learning_rate": 1.0026248821238915e-06, "loss": 20.4587, "step": 407700 }, { "epoch": 0.8235999951518481, "grad_norm": 463.9545593261719, "learning_rate": 1.0024152082424926e-06, "loss": 9.4284, "step": 407710 }, { "epoch": 0.8236201957845319, "grad_norm": 326.577392578125, "learning_rate": 1.002205553844836e-06, "loss": 26.6947, "step": 407720 }, { "epoch": 0.8236403964172158, "grad_norm": 291.4574279785156, "learning_rate": 1.0019959189319462e-06, "loss": 16.8537, "step": 407730 }, { "epoch": 0.8236605970498996, "grad_norm": 475.0122985839844, "learning_rate": 1.0017863035048431e-06, "loss": 18.0452, "step": 407740 }, { "epoch": 0.8236807976825834, "grad_norm": 192.9997100830078, "learning_rate": 1.0015767075645472e-06, "loss": 22.6792, "step": 407750 }, { "epoch": 0.8237009983152672, "grad_norm": 68.14810180664062, "learning_rate": 1.0013671311120832e-06, "loss": 9.7926, "step": 407760 }, { "epoch": 0.823721198947951, "grad_norm": 359.591552734375, "learning_rate": 1.001157574148469e-06, "loss": 23.7008, "step": 407770 }, { "epoch": 0.8237413995806349, "grad_norm": 270.8547058105469, "learning_rate": 1.00094803667473e-06, "loss": 20.2577, "step": 407780 }, { "epoch": 0.8237616002133187, "grad_norm": 217.625244140625, "learning_rate": 1.0007385186918844e-06, "loss": 8.6643, "step": 407790 }, { "epoch": 0.8237818008460025, "grad_norm": 104.18912506103516, "learning_rate": 1.0005290202009533e-06, "loss": 10.6152, "step": 407800 }, { "epoch": 0.8238020014786863, "grad_norm": 300.1459045410156, "learning_rate": 1.00031954120296e-06, "loss": 16.6248, "step": 407810 }, { "epoch": 0.8238222021113701, "grad_norm": 548.8994140625, "learning_rate": 1.000110081698924e-06, "loss": 31.0033, "step": 407820 }, { "epoch": 0.823842402744054, "grad_norm": 732.3353881835938, "learning_rate": 9.99900641689865e-07, "loss": 16.5561, "step": 407830 }, { "epoch": 0.8238626033767378, "grad_norm": 621.445556640625, "learning_rate": 9.99691221176805e-07, "loss": 13.8444, "step": 407840 }, { "epoch": 0.8238828040094216, "grad_norm": 7.0599141120910645, "learning_rate": 9.994818201607665e-07, "loss": 28.752, "step": 407850 }, { "epoch": 0.8239030046421054, "grad_norm": 457.2989501953125, "learning_rate": 9.992724386427676e-07, "loss": 22.4032, "step": 407860 }, { "epoch": 0.8239232052747892, "grad_norm": 299.7660827636719, "learning_rate": 9.990630766238292e-07, "loss": 11.6483, "step": 407870 }, { "epoch": 0.8239434059074731, "grad_norm": 306.6111755371094, "learning_rate": 9.988537341049732e-07, "loss": 11.5791, "step": 407880 }, { "epoch": 0.8239636065401569, "grad_norm": 542.0485229492188, "learning_rate": 9.986444110872185e-07, "loss": 13.4904, "step": 407890 }, { "epoch": 0.8239838071728407, "grad_norm": 178.08766174316406, "learning_rate": 9.984351075715848e-07, "loss": 10.2934, "step": 407900 }, { "epoch": 0.8240040078055245, "grad_norm": 593.4957275390625, "learning_rate": 9.982258235590926e-07, "loss": 24.5771, "step": 407910 }, { "epoch": 0.8240242084382083, "grad_norm": 244.8531494140625, "learning_rate": 9.98016559050765e-07, "loss": 27.4848, "step": 407920 }, { "epoch": 0.824044409070892, "grad_norm": 223.80311584472656, "learning_rate": 9.978073140476169e-07, "loss": 13.6994, "step": 407930 }, { "epoch": 0.8240646097035759, "grad_norm": 381.5936584472656, "learning_rate": 9.975980885506708e-07, "loss": 8.4069, "step": 407940 }, { "epoch": 0.8240848103362597, "grad_norm": 174.5120086669922, "learning_rate": 9.973888825609474e-07, "loss": 22.6345, "step": 407950 }, { "epoch": 0.8241050109689435, "grad_norm": 499.43511962890625, "learning_rate": 9.971796960794644e-07, "loss": 22.6273, "step": 407960 }, { "epoch": 0.8241252116016273, "grad_norm": 228.4709930419922, "learning_rate": 9.969705291072419e-07, "loss": 30.4253, "step": 407970 }, { "epoch": 0.8241454122343111, "grad_norm": 965.5435791015625, "learning_rate": 9.967613816452997e-07, "loss": 26.2806, "step": 407980 }, { "epoch": 0.824165612866995, "grad_norm": 330.20074462890625, "learning_rate": 9.965522536946564e-07, "loss": 17.9362, "step": 407990 }, { "epoch": 0.8241858134996788, "grad_norm": 527.2703857421875, "learning_rate": 9.963431452563331e-07, "loss": 21.5365, "step": 408000 }, { "epoch": 0.8242060141323626, "grad_norm": 339.7175598144531, "learning_rate": 9.96134056331346e-07, "loss": 14.217, "step": 408010 }, { "epoch": 0.8242262147650464, "grad_norm": 269.4036560058594, "learning_rate": 9.959249869207177e-07, "loss": 18.8896, "step": 408020 }, { "epoch": 0.8242464153977302, "grad_norm": 357.4214782714844, "learning_rate": 9.957159370254654e-07, "loss": 19.9985, "step": 408030 }, { "epoch": 0.8242666160304141, "grad_norm": 519.70068359375, "learning_rate": 9.95506906646606e-07, "loss": 15.1204, "step": 408040 }, { "epoch": 0.8242868166630979, "grad_norm": 413.96832275390625, "learning_rate": 9.952978957851622e-07, "loss": 27.0462, "step": 408050 }, { "epoch": 0.8243070172957817, "grad_norm": 632.7415771484375, "learning_rate": 9.95088904442149e-07, "loss": 11.7946, "step": 408060 }, { "epoch": 0.8243272179284655, "grad_norm": 288.6258239746094, "learning_rate": 9.948799326185886e-07, "loss": 27.4577, "step": 408070 }, { "epoch": 0.8243474185611493, "grad_norm": 181.74131774902344, "learning_rate": 9.946709803154975e-07, "loss": 15.3863, "step": 408080 }, { "epoch": 0.8243676191938332, "grad_norm": 142.41555786132812, "learning_rate": 9.94462047533893e-07, "loss": 17.9423, "step": 408090 }, { "epoch": 0.824387819826517, "grad_norm": 499.7061767578125, "learning_rate": 9.942531342747953e-07, "loss": 15.1426, "step": 408100 }, { "epoch": 0.8244080204592008, "grad_norm": 324.0824890136719, "learning_rate": 9.940442405392226e-07, "loss": 12.2927, "step": 408110 }, { "epoch": 0.8244282210918846, "grad_norm": 154.07171630859375, "learning_rate": 9.938353663281908e-07, "loss": 15.4776, "step": 408120 }, { "epoch": 0.8244484217245684, "grad_norm": 251.8144073486328, "learning_rate": 9.936265116427195e-07, "loss": 19.2839, "step": 408130 }, { "epoch": 0.8244686223572523, "grad_norm": 131.8602752685547, "learning_rate": 9.9341767648383e-07, "loss": 20.3942, "step": 408140 }, { "epoch": 0.8244888229899361, "grad_norm": 423.2018127441406, "learning_rate": 9.932088608525336e-07, "loss": 24.359, "step": 408150 }, { "epoch": 0.8245090236226199, "grad_norm": 493.63970947265625, "learning_rate": 9.93000064749851e-07, "loss": 22.8545, "step": 408160 }, { "epoch": 0.8245292242553037, "grad_norm": 35.90886306762695, "learning_rate": 9.92791288176802e-07, "loss": 12.6635, "step": 408170 }, { "epoch": 0.8245494248879874, "grad_norm": 402.9112548828125, "learning_rate": 9.925825311344018e-07, "loss": 14.5346, "step": 408180 }, { "epoch": 0.8245696255206713, "grad_norm": 844.3985595703125, "learning_rate": 9.923737936236671e-07, "loss": 29.3343, "step": 408190 }, { "epoch": 0.8245898261533551, "grad_norm": 941.7723999023438, "learning_rate": 9.921650756456164e-07, "loss": 28.1865, "step": 408200 }, { "epoch": 0.8246100267860389, "grad_norm": 349.99188232421875, "learning_rate": 9.919563772012697e-07, "loss": 17.0764, "step": 408210 }, { "epoch": 0.8246302274187227, "grad_norm": 561.3889770507812, "learning_rate": 9.917476982916391e-07, "loss": 18.3785, "step": 408220 }, { "epoch": 0.8246504280514065, "grad_norm": 30.30302619934082, "learning_rate": 9.915390389177438e-07, "loss": 25.7135, "step": 408230 }, { "epoch": 0.8246706286840904, "grad_norm": 1548.602294921875, "learning_rate": 9.913303990806028e-07, "loss": 19.1297, "step": 408240 }, { "epoch": 0.8246908293167742, "grad_norm": 369.7165222167969, "learning_rate": 9.911217787812305e-07, "loss": 14.5312, "step": 408250 }, { "epoch": 0.824711029949458, "grad_norm": 297.04144287109375, "learning_rate": 9.909131780206437e-07, "loss": 13.3768, "step": 408260 }, { "epoch": 0.8247312305821418, "grad_norm": 348.3402404785156, "learning_rate": 9.907045967998613e-07, "loss": 9.3411, "step": 408270 }, { "epoch": 0.8247514312148256, "grad_norm": 89.20021057128906, "learning_rate": 9.904960351198977e-07, "loss": 7.4829, "step": 408280 }, { "epoch": 0.8247716318475095, "grad_norm": 986.4409790039062, "learning_rate": 9.902874929817696e-07, "loss": 17.8379, "step": 408290 }, { "epoch": 0.8247918324801933, "grad_norm": 203.3545379638672, "learning_rate": 9.900789703864933e-07, "loss": 26.6569, "step": 408300 }, { "epoch": 0.8248120331128771, "grad_norm": 214.7468719482422, "learning_rate": 9.898704673350872e-07, "loss": 15.4125, "step": 408310 }, { "epoch": 0.8248322337455609, "grad_norm": 387.4945068359375, "learning_rate": 9.896619838285664e-07, "loss": 16.3487, "step": 408320 }, { "epoch": 0.8248524343782447, "grad_norm": 273.38397216796875, "learning_rate": 9.89453519867945e-07, "loss": 21.7352, "step": 408330 }, { "epoch": 0.8248726350109286, "grad_norm": 281.3521423339844, "learning_rate": 9.892450754542427e-07, "loss": 25.8193, "step": 408340 }, { "epoch": 0.8248928356436124, "grad_norm": 515.4878540039062, "learning_rate": 9.890366505884725e-07, "loss": 16.4527, "step": 408350 }, { "epoch": 0.8249130362762962, "grad_norm": 205.6038055419922, "learning_rate": 9.888282452716507e-07, "loss": 26.7113, "step": 408360 }, { "epoch": 0.82493323690898, "grad_norm": 380.3154602050781, "learning_rate": 9.88619859504794e-07, "loss": 28.7859, "step": 408370 }, { "epoch": 0.8249534375416638, "grad_norm": 358.2257080078125, "learning_rate": 9.884114932889172e-07, "loss": 16.7804, "step": 408380 }, { "epoch": 0.8249736381743477, "grad_norm": 144.62191772460938, "learning_rate": 9.88203146625037e-07, "loss": 21.5211, "step": 408390 }, { "epoch": 0.8249938388070315, "grad_norm": 251.671630859375, "learning_rate": 9.879948195141681e-07, "loss": 14.84, "step": 408400 }, { "epoch": 0.8250140394397153, "grad_norm": 574.9043579101562, "learning_rate": 9.877865119573249e-07, "loss": 26.5908, "step": 408410 }, { "epoch": 0.8250342400723991, "grad_norm": 955.4150390625, "learning_rate": 9.87578223955525e-07, "loss": 24.9013, "step": 408420 }, { "epoch": 0.8250544407050829, "grad_norm": 375.3989562988281, "learning_rate": 9.873699555097815e-07, "loss": 18.8558, "step": 408430 }, { "epoch": 0.8250746413377666, "grad_norm": 162.96932983398438, "learning_rate": 9.871617066211092e-07, "loss": 15.9927, "step": 408440 }, { "epoch": 0.8250948419704505, "grad_norm": 380.4167175292969, "learning_rate": 9.869534772905242e-07, "loss": 8.1155, "step": 408450 }, { "epoch": 0.8251150426031343, "grad_norm": 469.0326232910156, "learning_rate": 9.867452675190425e-07, "loss": 11.9704, "step": 408460 }, { "epoch": 0.8251352432358181, "grad_norm": 679.4263305664062, "learning_rate": 9.86537077307677e-07, "loss": 21.8916, "step": 408470 }, { "epoch": 0.8251554438685019, "grad_norm": 702.8475952148438, "learning_rate": 9.863289066574426e-07, "loss": 19.0194, "step": 408480 }, { "epoch": 0.8251756445011857, "grad_norm": 143.41806030273438, "learning_rate": 9.861207555693552e-07, "loss": 11.9352, "step": 408490 }, { "epoch": 0.8251958451338696, "grad_norm": 417.0471496582031, "learning_rate": 9.859126240444284e-07, "loss": 17.789, "step": 408500 }, { "epoch": 0.8252160457665534, "grad_norm": 118.43316650390625, "learning_rate": 9.857045120836756e-07, "loss": 19.5522, "step": 408510 }, { "epoch": 0.8252362463992372, "grad_norm": 298.9678649902344, "learning_rate": 9.854964196881117e-07, "loss": 22.8157, "step": 408520 }, { "epoch": 0.825256447031921, "grad_norm": 688.9229125976562, "learning_rate": 9.852883468587544e-07, "loss": 27.4831, "step": 408530 }, { "epoch": 0.8252766476646048, "grad_norm": 346.8231201171875, "learning_rate": 9.85080293596612e-07, "loss": 14.2504, "step": 408540 }, { "epoch": 0.8252968482972887, "grad_norm": 27.613510131835938, "learning_rate": 9.848722599027012e-07, "loss": 13.9152, "step": 408550 }, { "epoch": 0.8253170489299725, "grad_norm": 590.6968383789062, "learning_rate": 9.84664245778037e-07, "loss": 25.6394, "step": 408560 }, { "epoch": 0.8253372495626563, "grad_norm": 203.42098999023438, "learning_rate": 9.844562512236327e-07, "loss": 9.9276, "step": 408570 }, { "epoch": 0.8253574501953401, "grad_norm": 381.7028503417969, "learning_rate": 9.842482762405004e-07, "loss": 25.0408, "step": 408580 }, { "epoch": 0.825377650828024, "grad_norm": 407.7652587890625, "learning_rate": 9.840403208296556e-07, "loss": 21.7263, "step": 408590 }, { "epoch": 0.8253978514607078, "grad_norm": 495.6151123046875, "learning_rate": 9.838323849921123e-07, "loss": 15.1366, "step": 408600 }, { "epoch": 0.8254180520933916, "grad_norm": 481.92291259765625, "learning_rate": 9.836244687288803e-07, "loss": 14.103, "step": 408610 }, { "epoch": 0.8254382527260754, "grad_norm": 376.790771484375, "learning_rate": 9.834165720409767e-07, "loss": 12.3299, "step": 408620 }, { "epoch": 0.8254584533587592, "grad_norm": 731.8862915039062, "learning_rate": 9.83208694929414e-07, "loss": 16.4488, "step": 408630 }, { "epoch": 0.825478653991443, "grad_norm": 870.9635009765625, "learning_rate": 9.830008373952054e-07, "loss": 29.8897, "step": 408640 }, { "epoch": 0.8254988546241269, "grad_norm": 171.99343872070312, "learning_rate": 9.82792999439362e-07, "loss": 20.0473, "step": 408650 }, { "epoch": 0.8255190552568107, "grad_norm": 441.4473876953125, "learning_rate": 9.825851810628995e-07, "loss": 20.2918, "step": 408660 }, { "epoch": 0.8255392558894945, "grad_norm": 63.19327926635742, "learning_rate": 9.823773822668298e-07, "loss": 37.2769, "step": 408670 }, { "epoch": 0.8255594565221783, "grad_norm": 215.95689392089844, "learning_rate": 9.821696030521644e-07, "loss": 15.3831, "step": 408680 }, { "epoch": 0.8255796571548621, "grad_norm": 313.6595764160156, "learning_rate": 9.81961843419918e-07, "loss": 24.235, "step": 408690 }, { "epoch": 0.8255998577875459, "grad_norm": 208.46145629882812, "learning_rate": 9.81754103371101e-07, "loss": 16.2879, "step": 408700 }, { "epoch": 0.8256200584202297, "grad_norm": 224.81307983398438, "learning_rate": 9.815463829067284e-07, "loss": 10.0573, "step": 408710 }, { "epoch": 0.8256402590529135, "grad_norm": 156.67495727539062, "learning_rate": 9.813386820278114e-07, "loss": 21.7002, "step": 408720 }, { "epoch": 0.8256604596855973, "grad_norm": 57.414039611816406, "learning_rate": 9.811310007353608e-07, "loss": 17.3652, "step": 408730 }, { "epoch": 0.8256806603182811, "grad_norm": 553.4667358398438, "learning_rate": 9.809233390303901e-07, "loss": 26.2718, "step": 408740 }, { "epoch": 0.825700860950965, "grad_norm": 508.50726318359375, "learning_rate": 9.807156969139136e-07, "loss": 10.7769, "step": 408750 }, { "epoch": 0.8257210615836488, "grad_norm": 35.79631423950195, "learning_rate": 9.805080743869406e-07, "loss": 13.6019, "step": 408760 }, { "epoch": 0.8257412622163326, "grad_norm": 267.25439453125, "learning_rate": 9.803004714504827e-07, "loss": 36.682, "step": 408770 }, { "epoch": 0.8257614628490164, "grad_norm": 1.7341084480285645, "learning_rate": 9.800928881055543e-07, "loss": 31.0383, "step": 408780 }, { "epoch": 0.8257816634817002, "grad_norm": 106.06595611572266, "learning_rate": 9.798853243531654e-07, "loss": 13.1472, "step": 408790 }, { "epoch": 0.825801864114384, "grad_norm": 551.80224609375, "learning_rate": 9.79677780194327e-07, "loss": 29.5755, "step": 408800 }, { "epoch": 0.8258220647470679, "grad_norm": 395.3323059082031, "learning_rate": 9.794702556300505e-07, "loss": 31.686, "step": 408810 }, { "epoch": 0.8258422653797517, "grad_norm": 457.2997131347656, "learning_rate": 9.792627506613517e-07, "loss": 16.0439, "step": 408820 }, { "epoch": 0.8258624660124355, "grad_norm": 62.50281524658203, "learning_rate": 9.79055265289236e-07, "loss": 8.3, "step": 408830 }, { "epoch": 0.8258826666451193, "grad_norm": 554.3505859375, "learning_rate": 9.788477995147173e-07, "loss": 31.2342, "step": 408840 }, { "epoch": 0.8259028672778032, "grad_norm": 340.3030700683594, "learning_rate": 9.786403533388072e-07, "loss": 15.5099, "step": 408850 }, { "epoch": 0.825923067910487, "grad_norm": 438.2608642578125, "learning_rate": 9.78432926762517e-07, "loss": 22.3036, "step": 408860 }, { "epoch": 0.8259432685431708, "grad_norm": 340.9075012207031, "learning_rate": 9.782255197868556e-07, "loss": 13.3878, "step": 408870 }, { "epoch": 0.8259634691758546, "grad_norm": 128.02398681640625, "learning_rate": 9.780181324128368e-07, "loss": 7.8571, "step": 408880 }, { "epoch": 0.8259836698085384, "grad_norm": 335.00469970703125, "learning_rate": 9.778107646414691e-07, "loss": 8.0132, "step": 408890 }, { "epoch": 0.8260038704412223, "grad_norm": 497.5658874511719, "learning_rate": 9.77603416473763e-07, "loss": 13.8475, "step": 408900 }, { "epoch": 0.8260240710739061, "grad_norm": 681.4411010742188, "learning_rate": 9.773960879107303e-07, "loss": 32.2834, "step": 408910 }, { "epoch": 0.8260442717065899, "grad_norm": 569.7233276367188, "learning_rate": 9.771887789533818e-07, "loss": 21.9859, "step": 408920 }, { "epoch": 0.8260644723392737, "grad_norm": 396.6499938964844, "learning_rate": 9.76981489602728e-07, "loss": 22.3758, "step": 408930 }, { "epoch": 0.8260846729719575, "grad_norm": 422.7529602050781, "learning_rate": 9.767742198597769e-07, "loss": 20.051, "step": 408940 }, { "epoch": 0.8261048736046412, "grad_norm": 278.30377197265625, "learning_rate": 9.765669697255413e-07, "loss": 21.6211, "step": 408950 }, { "epoch": 0.8261250742373251, "grad_norm": 398.0464782714844, "learning_rate": 9.763597392010304e-07, "loss": 19.0091, "step": 408960 }, { "epoch": 0.8261452748700089, "grad_norm": 233.4273223876953, "learning_rate": 9.76152528287253e-07, "loss": 8.3059, "step": 408970 }, { "epoch": 0.8261654755026927, "grad_norm": 132.3906707763672, "learning_rate": 9.759453369852213e-07, "loss": 14.8877, "step": 408980 }, { "epoch": 0.8261856761353765, "grad_norm": 229.14915466308594, "learning_rate": 9.75738165295943e-07, "loss": 17.5051, "step": 408990 }, { "epoch": 0.8262058767680603, "grad_norm": 1248.712646484375, "learning_rate": 9.7553101322043e-07, "loss": 33.4552, "step": 409000 }, { "epoch": 0.8262260774007442, "grad_norm": 239.10772705078125, "learning_rate": 9.753238807596903e-07, "loss": 16.8984, "step": 409010 }, { "epoch": 0.826246278033428, "grad_norm": 1.1654975414276123, "learning_rate": 9.751167679147328e-07, "loss": 14.2042, "step": 409020 }, { "epoch": 0.8262664786661118, "grad_norm": 348.69879150390625, "learning_rate": 9.749096746865695e-07, "loss": 20.265, "step": 409030 }, { "epoch": 0.8262866792987956, "grad_norm": 96.15825653076172, "learning_rate": 9.747026010762084e-07, "loss": 9.8876, "step": 409040 }, { "epoch": 0.8263068799314794, "grad_norm": 471.69818115234375, "learning_rate": 9.744955470846567e-07, "loss": 20.7782, "step": 409050 }, { "epoch": 0.8263270805641633, "grad_norm": 855.0213623046875, "learning_rate": 9.74288512712926e-07, "loss": 17.223, "step": 409060 }, { "epoch": 0.8263472811968471, "grad_norm": 515.3373413085938, "learning_rate": 9.740814979620262e-07, "loss": 25.8495, "step": 409070 }, { "epoch": 0.8263674818295309, "grad_norm": 464.31170654296875, "learning_rate": 9.738745028329643e-07, "loss": 13.3487, "step": 409080 }, { "epoch": 0.8263876824622147, "grad_norm": 697.5110473632812, "learning_rate": 9.736675273267488e-07, "loss": 17.6352, "step": 409090 }, { "epoch": 0.8264078830948985, "grad_norm": 331.9514465332031, "learning_rate": 9.734605714443906e-07, "loss": 16.6086, "step": 409100 }, { "epoch": 0.8264280837275824, "grad_norm": 419.98114013671875, "learning_rate": 9.732536351868977e-07, "loss": 14.8694, "step": 409110 }, { "epoch": 0.8264482843602662, "grad_norm": 104.95368194580078, "learning_rate": 9.730467185552762e-07, "loss": 11.4161, "step": 409120 }, { "epoch": 0.82646848499295, "grad_norm": 51.8902702331543, "learning_rate": 9.728398215505369e-07, "loss": 30.52, "step": 409130 }, { "epoch": 0.8264886856256338, "grad_norm": 601.8125, "learning_rate": 9.72632944173691e-07, "loss": 40.3721, "step": 409140 }, { "epoch": 0.8265088862583176, "grad_norm": 461.0328063964844, "learning_rate": 9.724260864257401e-07, "loss": 19.0334, "step": 409150 }, { "epoch": 0.8265290868910015, "grad_norm": 313.46246337890625, "learning_rate": 9.722192483076965e-07, "loss": 17.4693, "step": 409160 }, { "epoch": 0.8265492875236853, "grad_norm": 615.93212890625, "learning_rate": 9.720124298205692e-07, "loss": 20.7167, "step": 409170 }, { "epoch": 0.8265694881563691, "grad_norm": 209.0207977294922, "learning_rate": 9.718056309653646e-07, "loss": 31.1384, "step": 409180 }, { "epoch": 0.8265896887890529, "grad_norm": 490.0838623046875, "learning_rate": 9.715988517430896e-07, "loss": 19.0117, "step": 409190 }, { "epoch": 0.8266098894217367, "grad_norm": 313.73626708984375, "learning_rate": 9.713920921547532e-07, "loss": 13.3818, "step": 409200 }, { "epoch": 0.8266300900544205, "grad_norm": 177.05203247070312, "learning_rate": 9.711853522013653e-07, "loss": 26.8708, "step": 409210 }, { "epoch": 0.8266502906871043, "grad_norm": 211.54241943359375, "learning_rate": 9.709786318839293e-07, "loss": 21.1137, "step": 409220 }, { "epoch": 0.8266704913197881, "grad_norm": 538.9469604492188, "learning_rate": 9.707719312034548e-07, "loss": 10.6276, "step": 409230 }, { "epoch": 0.8266906919524719, "grad_norm": 26.654558181762695, "learning_rate": 9.705652501609503e-07, "loss": 10.1592, "step": 409240 }, { "epoch": 0.8267108925851557, "grad_norm": 358.67205810546875, "learning_rate": 9.70358588757422e-07, "loss": 12.8663, "step": 409250 }, { "epoch": 0.8267310932178396, "grad_norm": 203.9763946533203, "learning_rate": 9.701519469938759e-07, "loss": 17.6682, "step": 409260 }, { "epoch": 0.8267512938505234, "grad_norm": 100.33971405029297, "learning_rate": 9.699453248713215e-07, "loss": 13.6117, "step": 409270 }, { "epoch": 0.8267714944832072, "grad_norm": 35.611656188964844, "learning_rate": 9.69738722390765e-07, "loss": 19.3551, "step": 409280 }, { "epoch": 0.826791695115891, "grad_norm": 36.33995819091797, "learning_rate": 9.69532139553212e-07, "loss": 9.602, "step": 409290 }, { "epoch": 0.8268118957485748, "grad_norm": 1031.1058349609375, "learning_rate": 9.69325576359672e-07, "loss": 19.8677, "step": 409300 }, { "epoch": 0.8268320963812587, "grad_norm": 456.8401184082031, "learning_rate": 9.691190328111488e-07, "loss": 16.6949, "step": 409310 }, { "epoch": 0.8268522970139425, "grad_norm": 153.07640075683594, "learning_rate": 9.689125089086514e-07, "loss": 14.1037, "step": 409320 }, { "epoch": 0.8268724976466263, "grad_norm": 115.52783203125, "learning_rate": 9.687060046531866e-07, "loss": 20.026, "step": 409330 }, { "epoch": 0.8268926982793101, "grad_norm": 486.40216064453125, "learning_rate": 9.684995200457574e-07, "loss": 18.2196, "step": 409340 }, { "epoch": 0.8269128989119939, "grad_norm": 444.5055236816406, "learning_rate": 9.682930550873742e-07, "loss": 12.0368, "step": 409350 }, { "epoch": 0.8269330995446778, "grad_norm": 383.5010986328125, "learning_rate": 9.680866097790409e-07, "loss": 9.2006, "step": 409360 }, { "epoch": 0.8269533001773616, "grad_norm": 85.42082977294922, "learning_rate": 9.67880184121765e-07, "loss": 11.2452, "step": 409370 }, { "epoch": 0.8269735008100454, "grad_norm": 515.4810180664062, "learning_rate": 9.676737781165513e-07, "loss": 16.0794, "step": 409380 }, { "epoch": 0.8269937014427292, "grad_norm": 191.40391540527344, "learning_rate": 9.674673917644072e-07, "loss": 23.007, "step": 409390 }, { "epoch": 0.827013902075413, "grad_norm": 231.99940490722656, "learning_rate": 9.67261025066339e-07, "loss": 21.2716, "step": 409400 }, { "epoch": 0.8270341027080969, "grad_norm": 417.308837890625, "learning_rate": 9.670546780233493e-07, "loss": 16.8201, "step": 409410 }, { "epoch": 0.8270543033407807, "grad_norm": 106.47950744628906, "learning_rate": 9.668483506364462e-07, "loss": 10.7087, "step": 409420 }, { "epoch": 0.8270745039734645, "grad_norm": 244.87522888183594, "learning_rate": 9.666420429066376e-07, "loss": 14.4273, "step": 409430 }, { "epoch": 0.8270947046061483, "grad_norm": 456.020263671875, "learning_rate": 9.66435754834924e-07, "loss": 18.7284, "step": 409440 }, { "epoch": 0.8271149052388321, "grad_norm": 318.734130859375, "learning_rate": 9.662294864223132e-07, "loss": 23.9312, "step": 409450 }, { "epoch": 0.8271351058715158, "grad_norm": 193.71820068359375, "learning_rate": 9.66023237669812e-07, "loss": 13.9561, "step": 409460 }, { "epoch": 0.8271553065041997, "grad_norm": 279.223388671875, "learning_rate": 9.658170085784242e-07, "loss": 18.7789, "step": 409470 }, { "epoch": 0.8271755071368835, "grad_norm": 574.6185302734375, "learning_rate": 9.656107991491536e-07, "loss": 19.9033, "step": 409480 }, { "epoch": 0.8271957077695673, "grad_norm": 125.4744644165039, "learning_rate": 9.654046093830084e-07, "loss": 18.6923, "step": 409490 }, { "epoch": 0.8272159084022511, "grad_norm": 112.13639831542969, "learning_rate": 9.651984392809916e-07, "loss": 10.577, "step": 409500 }, { "epoch": 0.8272361090349349, "grad_norm": 221.3907012939453, "learning_rate": 9.649922888441065e-07, "loss": 18.3657, "step": 409510 }, { "epoch": 0.8272563096676188, "grad_norm": 281.2359313964844, "learning_rate": 9.647861580733603e-07, "loss": 22.1341, "step": 409520 }, { "epoch": 0.8272765103003026, "grad_norm": 614.3504028320312, "learning_rate": 9.645800469697575e-07, "loss": 24.8991, "step": 409530 }, { "epoch": 0.8272967109329864, "grad_norm": 302.5175476074219, "learning_rate": 9.64373955534303e-07, "loss": 20.377, "step": 409540 }, { "epoch": 0.8273169115656702, "grad_norm": 362.9315185546875, "learning_rate": 9.641678837679985e-07, "loss": 26.6605, "step": 409550 }, { "epoch": 0.827337112198354, "grad_norm": 357.0892639160156, "learning_rate": 9.639618316718519e-07, "loss": 17.2189, "step": 409560 }, { "epoch": 0.8273573128310379, "grad_norm": 231.19859313964844, "learning_rate": 9.637557992468655e-07, "loss": 13.5761, "step": 409570 }, { "epoch": 0.8273775134637217, "grad_norm": 276.0561828613281, "learning_rate": 9.635497864940425e-07, "loss": 15.793, "step": 409580 }, { "epoch": 0.8273977140964055, "grad_norm": 300.688720703125, "learning_rate": 9.633437934143896e-07, "loss": 13.512, "step": 409590 }, { "epoch": 0.8274179147290893, "grad_norm": 255.2481231689453, "learning_rate": 9.631378200089082e-07, "loss": 33.7828, "step": 409600 }, { "epoch": 0.8274381153617731, "grad_norm": 938.2775268554688, "learning_rate": 9.629318662786047e-07, "loss": 24.1358, "step": 409610 }, { "epoch": 0.827458315994457, "grad_norm": 257.51910400390625, "learning_rate": 9.62725932224482e-07, "loss": 14.944, "step": 409620 }, { "epoch": 0.8274785166271408, "grad_norm": 453.6352233886719, "learning_rate": 9.62520017847542e-07, "loss": 10.9707, "step": 409630 }, { "epoch": 0.8274987172598246, "grad_norm": 374.6878356933594, "learning_rate": 9.623141231487904e-07, "loss": 20.3693, "step": 409640 }, { "epoch": 0.8275189178925084, "grad_norm": 401.16461181640625, "learning_rate": 9.621082481292309e-07, "loss": 12.5384, "step": 409650 }, { "epoch": 0.8275391185251922, "grad_norm": 433.9198303222656, "learning_rate": 9.61902392789864e-07, "loss": 25.6992, "step": 409660 }, { "epoch": 0.8275593191578761, "grad_norm": 450.6440734863281, "learning_rate": 9.616965571316956e-07, "loss": 25.1214, "step": 409670 }, { "epoch": 0.8275795197905599, "grad_norm": 409.1781311035156, "learning_rate": 9.6149074115573e-07, "loss": 18.633, "step": 409680 }, { "epoch": 0.8275997204232437, "grad_norm": 516.9926147460938, "learning_rate": 9.61284944862968e-07, "loss": 21.039, "step": 409690 }, { "epoch": 0.8276199210559275, "grad_norm": 377.6710205078125, "learning_rate": 9.610791682544123e-07, "loss": 5.7048, "step": 409700 }, { "epoch": 0.8276401216886113, "grad_norm": 592.4374389648438, "learning_rate": 9.608734113310685e-07, "loss": 32.1657, "step": 409710 }, { "epoch": 0.827660322321295, "grad_norm": 182.7586212158203, "learning_rate": 9.606676740939375e-07, "loss": 17.4946, "step": 409720 }, { "epoch": 0.8276805229539789, "grad_norm": 0.0, "learning_rate": 9.60461956544021e-07, "loss": 10.775, "step": 409730 }, { "epoch": 0.8277007235866627, "grad_norm": 240.7188262939453, "learning_rate": 9.602562586823232e-07, "loss": 55.8408, "step": 409740 }, { "epoch": 0.8277209242193465, "grad_norm": 155.72105407714844, "learning_rate": 9.600505805098486e-07, "loss": 4.9667, "step": 409750 }, { "epoch": 0.8277411248520303, "grad_norm": 138.11083984375, "learning_rate": 9.59844922027595e-07, "loss": 17.2876, "step": 409760 }, { "epoch": 0.8277613254847141, "grad_norm": 406.36859130859375, "learning_rate": 9.596392832365676e-07, "loss": 17.6788, "step": 409770 }, { "epoch": 0.827781526117398, "grad_norm": 396.15576171875, "learning_rate": 9.594336641377695e-07, "loss": 13.3846, "step": 409780 }, { "epoch": 0.8278017267500818, "grad_norm": 513.1149291992188, "learning_rate": 9.592280647322015e-07, "loss": 26.491, "step": 409790 }, { "epoch": 0.8278219273827656, "grad_norm": 1416.871826171875, "learning_rate": 9.590224850208645e-07, "loss": 35.7224, "step": 409800 }, { "epoch": 0.8278421280154494, "grad_norm": 564.0831909179688, "learning_rate": 9.588169250047624e-07, "loss": 13.2908, "step": 409810 }, { "epoch": 0.8278623286481332, "grad_norm": 514.4425659179688, "learning_rate": 9.586113846848982e-07, "loss": 17.1973, "step": 409820 }, { "epoch": 0.8278825292808171, "grad_norm": 400.3626708984375, "learning_rate": 9.584058640622702e-07, "loss": 14.8681, "step": 409830 }, { "epoch": 0.8279027299135009, "grad_norm": 304.7544250488281, "learning_rate": 9.58200363137881e-07, "loss": 18.7906, "step": 409840 }, { "epoch": 0.8279229305461847, "grad_norm": 199.91624450683594, "learning_rate": 9.57994881912735e-07, "loss": 19.397, "step": 409850 }, { "epoch": 0.8279431311788685, "grad_norm": 708.1905517578125, "learning_rate": 9.577894203878313e-07, "loss": 30.0399, "step": 409860 }, { "epoch": 0.8279633318115523, "grad_norm": 168.64984130859375, "learning_rate": 9.575839785641706e-07, "loss": 36.4297, "step": 409870 }, { "epoch": 0.8279835324442362, "grad_norm": 377.0751953125, "learning_rate": 9.573785564427563e-07, "loss": 9.8389, "step": 409880 }, { "epoch": 0.82800373307692, "grad_norm": 375.81939697265625, "learning_rate": 9.571731540245887e-07, "loss": 16.9625, "step": 409890 }, { "epoch": 0.8280239337096038, "grad_norm": 346.2193603515625, "learning_rate": 9.569677713106673e-07, "loss": 19.8607, "step": 409900 }, { "epoch": 0.8280441343422876, "grad_norm": 308.16851806640625, "learning_rate": 9.567624083019966e-07, "loss": 22.3001, "step": 409910 }, { "epoch": 0.8280643349749714, "grad_norm": 540.9521484375, "learning_rate": 9.565570649995736e-07, "loss": 23.195, "step": 409920 }, { "epoch": 0.8280845356076553, "grad_norm": 7.390625476837158, "learning_rate": 9.563517414044028e-07, "loss": 11.8909, "step": 409930 }, { "epoch": 0.8281047362403391, "grad_norm": 313.5946960449219, "learning_rate": 9.561464375174827e-07, "loss": 10.91, "step": 409940 }, { "epoch": 0.8281249368730229, "grad_norm": 405.2649230957031, "learning_rate": 9.559411533398139e-07, "loss": 11.8811, "step": 409950 }, { "epoch": 0.8281451375057067, "grad_norm": 400.70880126953125, "learning_rate": 9.557358888723977e-07, "loss": 11.1705, "step": 409960 }, { "epoch": 0.8281653381383905, "grad_norm": 217.29420471191406, "learning_rate": 9.555306441162337e-07, "loss": 10.797, "step": 409970 }, { "epoch": 0.8281855387710743, "grad_norm": 433.2265625, "learning_rate": 9.553254190723239e-07, "loss": 29.504, "step": 409980 }, { "epoch": 0.8282057394037581, "grad_norm": 104.81578063964844, "learning_rate": 9.55120213741666e-07, "loss": 11.4437, "step": 409990 }, { "epoch": 0.8282259400364419, "grad_norm": 110.06478881835938, "learning_rate": 9.549150281252633e-07, "loss": 14.5852, "step": 410000 }, { "epoch": 0.8282461406691257, "grad_norm": 260.6749572753906, "learning_rate": 9.54709862224114e-07, "loss": 18.3852, "step": 410010 }, { "epoch": 0.8282663413018095, "grad_norm": 134.43032836914062, "learning_rate": 9.545047160392169e-07, "loss": 13.3911, "step": 410020 }, { "epoch": 0.8282865419344934, "grad_norm": 419.7550354003906, "learning_rate": 9.54299589571574e-07, "loss": 20.2757, "step": 410030 }, { "epoch": 0.8283067425671772, "grad_norm": 461.2312316894531, "learning_rate": 9.540944828221848e-07, "loss": 16.329, "step": 410040 }, { "epoch": 0.828326943199861, "grad_norm": 4.0432024002075195, "learning_rate": 9.538893957920464e-07, "loss": 6.0722, "step": 410050 }, { "epoch": 0.8283471438325448, "grad_norm": 3.8514244556427, "learning_rate": 9.536843284821612e-07, "loss": 7.3329, "step": 410060 }, { "epoch": 0.8283673444652286, "grad_norm": 149.021484375, "learning_rate": 9.534792808935284e-07, "loss": 10.045, "step": 410070 }, { "epoch": 0.8283875450979125, "grad_norm": 102.64237213134766, "learning_rate": 9.532742530271471e-07, "loss": 13.7285, "step": 410080 }, { "epoch": 0.8284077457305963, "grad_norm": 328.27374267578125, "learning_rate": 9.530692448840151e-07, "loss": 20.2738, "step": 410090 }, { "epoch": 0.8284279463632801, "grad_norm": 646.1759643554688, "learning_rate": 9.528642564651341e-07, "loss": 32.3501, "step": 410100 }, { "epoch": 0.8284481469959639, "grad_norm": 377.765380859375, "learning_rate": 9.526592877715019e-07, "loss": 15.0377, "step": 410110 }, { "epoch": 0.8284683476286477, "grad_norm": 209.18014526367188, "learning_rate": 9.524543388041157e-07, "loss": 56.0616, "step": 410120 }, { "epoch": 0.8284885482613316, "grad_norm": 128.7597198486328, "learning_rate": 9.522494095639762e-07, "loss": 13.2762, "step": 410130 }, { "epoch": 0.8285087488940154, "grad_norm": 7.26552677154541, "learning_rate": 9.52044500052084e-07, "loss": 7.3488, "step": 410140 }, { "epoch": 0.8285289495266992, "grad_norm": 275.5406188964844, "learning_rate": 9.518396102694355e-07, "loss": 33.0551, "step": 410150 }, { "epoch": 0.828549150159383, "grad_norm": 394.4302978515625, "learning_rate": 9.516347402170284e-07, "loss": 19.0952, "step": 410160 }, { "epoch": 0.8285693507920668, "grad_norm": 260.6647033691406, "learning_rate": 9.514298898958641e-07, "loss": 27.6525, "step": 410170 }, { "epoch": 0.8285895514247507, "grad_norm": 422.53643798828125, "learning_rate": 9.512250593069394e-07, "loss": 11.8781, "step": 410180 }, { "epoch": 0.8286097520574345, "grad_norm": 531.523193359375, "learning_rate": 9.510202484512516e-07, "loss": 12.3363, "step": 410190 }, { "epoch": 0.8286299526901183, "grad_norm": 101.8055419921875, "learning_rate": 9.508154573298012e-07, "loss": 18.8377, "step": 410200 }, { "epoch": 0.8286501533228021, "grad_norm": 139.24330139160156, "learning_rate": 9.506106859435838e-07, "loss": 15.0086, "step": 410210 }, { "epoch": 0.8286703539554859, "grad_norm": 263.025146484375, "learning_rate": 9.504059342936001e-07, "loss": 19.079, "step": 410220 }, { "epoch": 0.8286905545881696, "grad_norm": 224.90972900390625, "learning_rate": 9.502012023808466e-07, "loss": 22.5412, "step": 410230 }, { "epoch": 0.8287107552208535, "grad_norm": 428.4664611816406, "learning_rate": 9.499964902063203e-07, "loss": 15.266, "step": 410240 }, { "epoch": 0.8287309558535373, "grad_norm": 381.9537658691406, "learning_rate": 9.497917977710208e-07, "loss": 22.7627, "step": 410250 }, { "epoch": 0.8287511564862211, "grad_norm": 201.21485900878906, "learning_rate": 9.495871250759437e-07, "loss": 17.1629, "step": 410260 }, { "epoch": 0.8287713571189049, "grad_norm": 417.35638427734375, "learning_rate": 9.49382472122089e-07, "loss": 20.4261, "step": 410270 }, { "epoch": 0.8287915577515887, "grad_norm": 74.46168518066406, "learning_rate": 9.491778389104511e-07, "loss": 81.7224, "step": 410280 }, { "epoch": 0.8288117583842726, "grad_norm": 80.65632629394531, "learning_rate": 9.489732254420315e-07, "loss": 12.7389, "step": 410290 }, { "epoch": 0.8288319590169564, "grad_norm": 187.4225311279297, "learning_rate": 9.487686317178241e-07, "loss": 10.1137, "step": 410300 }, { "epoch": 0.8288521596496402, "grad_norm": 145.52528381347656, "learning_rate": 9.48564057738826e-07, "loss": 16.1393, "step": 410310 }, { "epoch": 0.828872360282324, "grad_norm": 100.24214172363281, "learning_rate": 9.483595035060367e-07, "loss": 10.1025, "step": 410320 }, { "epoch": 0.8288925609150078, "grad_norm": 383.0196533203125, "learning_rate": 9.481549690204517e-07, "loss": 14.8997, "step": 410330 }, { "epoch": 0.8289127615476917, "grad_norm": 258.4500427246094, "learning_rate": 9.47950454283067e-07, "loss": 24.6888, "step": 410340 }, { "epoch": 0.8289329621803755, "grad_norm": 315.6438293457031, "learning_rate": 9.477459592948796e-07, "loss": 29.4249, "step": 410350 }, { "epoch": 0.8289531628130593, "grad_norm": 337.36968994140625, "learning_rate": 9.475414840568903e-07, "loss": 13.5003, "step": 410360 }, { "epoch": 0.8289733634457431, "grad_norm": 419.51239013671875, "learning_rate": 9.473370285700889e-07, "loss": 19.1721, "step": 410370 }, { "epoch": 0.828993564078427, "grad_norm": 99.75167083740234, "learning_rate": 9.471325928354758e-07, "loss": 19.118, "step": 410380 }, { "epoch": 0.8290137647111108, "grad_norm": 387.23175048828125, "learning_rate": 9.469281768540484e-07, "loss": 22.0007, "step": 410390 }, { "epoch": 0.8290339653437946, "grad_norm": 287.00994873046875, "learning_rate": 9.467237806268009e-07, "loss": 13.1687, "step": 410400 }, { "epoch": 0.8290541659764784, "grad_norm": 621.3529052734375, "learning_rate": 9.465194041547294e-07, "loss": 19.163, "step": 410410 }, { "epoch": 0.8290743666091622, "grad_norm": 392.2379455566406, "learning_rate": 9.463150474388305e-07, "loss": 23.21, "step": 410420 }, { "epoch": 0.829094567241846, "grad_norm": 42.52976989746094, "learning_rate": 9.461107104801026e-07, "loss": 14.2209, "step": 410430 }, { "epoch": 0.8291147678745299, "grad_norm": 2.3006131649017334, "learning_rate": 9.45906393279537e-07, "loss": 14.5232, "step": 410440 }, { "epoch": 0.8291349685072137, "grad_norm": 663.354248046875, "learning_rate": 9.457020958381324e-07, "loss": 22.7271, "step": 410450 }, { "epoch": 0.8291551691398975, "grad_norm": 302.34716796875, "learning_rate": 9.454978181568847e-07, "loss": 10.4355, "step": 410460 }, { "epoch": 0.8291753697725813, "grad_norm": 488.5825500488281, "learning_rate": 9.452935602367897e-07, "loss": 15.7274, "step": 410470 }, { "epoch": 0.8291955704052651, "grad_norm": 405.4479064941406, "learning_rate": 9.450893220788399e-07, "loss": 18.9441, "step": 410480 }, { "epoch": 0.8292157710379489, "grad_norm": 227.84194946289062, "learning_rate": 9.448851036840345e-07, "loss": 25.4943, "step": 410490 }, { "epoch": 0.8292359716706327, "grad_norm": 303.5299072265625, "learning_rate": 9.446809050533679e-07, "loss": 10.6827, "step": 410500 }, { "epoch": 0.8292561723033165, "grad_norm": 159.5272979736328, "learning_rate": 9.444767261878329e-07, "loss": 22.9927, "step": 410510 }, { "epoch": 0.8292763729360003, "grad_norm": 342.5478515625, "learning_rate": 9.442725670884278e-07, "loss": 13.5177, "step": 410520 }, { "epoch": 0.8292965735686841, "grad_norm": 397.901611328125, "learning_rate": 9.440684277561452e-07, "loss": 17.6795, "step": 410530 }, { "epoch": 0.829316774201368, "grad_norm": 678.2149047851562, "learning_rate": 9.438643081919818e-07, "loss": 36.0053, "step": 410540 }, { "epoch": 0.8293369748340518, "grad_norm": 976.1620483398438, "learning_rate": 9.436602083969326e-07, "loss": 37.0097, "step": 410550 }, { "epoch": 0.8293571754667356, "grad_norm": 924.8463745117188, "learning_rate": 9.434561283719901e-07, "loss": 21.1344, "step": 410560 }, { "epoch": 0.8293773760994194, "grad_norm": 47.7503662109375, "learning_rate": 9.432520681181512e-07, "loss": 13.8507, "step": 410570 }, { "epoch": 0.8293975767321032, "grad_norm": 175.39149475097656, "learning_rate": 9.430480276364091e-07, "loss": 18.1541, "step": 410580 }, { "epoch": 0.8294177773647871, "grad_norm": 304.0611572265625, "learning_rate": 9.428440069277595e-07, "loss": 14.9955, "step": 410590 }, { "epoch": 0.8294379779974709, "grad_norm": 15.601140022277832, "learning_rate": 9.426400059931956e-07, "loss": 17.8279, "step": 410600 }, { "epoch": 0.8294581786301547, "grad_norm": 668.298095703125, "learning_rate": 9.424360248337128e-07, "loss": 29.1598, "step": 410610 }, { "epoch": 0.8294783792628385, "grad_norm": 408.1661376953125, "learning_rate": 9.422320634503052e-07, "loss": 15.3479, "step": 410620 }, { "epoch": 0.8294985798955223, "grad_norm": 19.777023315429688, "learning_rate": 9.420281218439648e-07, "loss": 16.2468, "step": 410630 }, { "epoch": 0.8295187805282062, "grad_norm": 189.5535430908203, "learning_rate": 9.418242000156886e-07, "loss": 21.9398, "step": 410640 }, { "epoch": 0.82953898116089, "grad_norm": 283.3282470703125, "learning_rate": 9.41620297966469e-07, "loss": 9.5893, "step": 410650 }, { "epoch": 0.8295591817935738, "grad_norm": 670.09765625, "learning_rate": 9.414164156972982e-07, "loss": 24.5431, "step": 410660 }, { "epoch": 0.8295793824262576, "grad_norm": 159.86378479003906, "learning_rate": 9.41212553209172e-07, "loss": 18.8697, "step": 410670 }, { "epoch": 0.8295995830589414, "grad_norm": 290.81488037109375, "learning_rate": 9.410087105030846e-07, "loss": 21.4635, "step": 410680 }, { "epoch": 0.8296197836916253, "grad_norm": 313.0645751953125, "learning_rate": 9.408048875800286e-07, "loss": 10.5297, "step": 410690 }, { "epoch": 0.8296399843243091, "grad_norm": 242.60292053222656, "learning_rate": 9.406010844409957e-07, "loss": 12.4543, "step": 410700 }, { "epoch": 0.8296601849569929, "grad_norm": 7.086318492889404, "learning_rate": 9.403973010869826e-07, "loss": 10.9747, "step": 410710 }, { "epoch": 0.8296803855896767, "grad_norm": 358.39837646484375, "learning_rate": 9.401935375189802e-07, "loss": 13.273, "step": 410720 }, { "epoch": 0.8297005862223605, "grad_norm": 1072.34130859375, "learning_rate": 9.39989793737981e-07, "loss": 26.2195, "step": 410730 }, { "epoch": 0.8297207868550442, "grad_norm": 271.1131896972656, "learning_rate": 9.39786069744979e-07, "loss": 10.2916, "step": 410740 }, { "epoch": 0.8297409874877281, "grad_norm": 144.45306396484375, "learning_rate": 9.395823655409686e-07, "loss": 13.8853, "step": 410750 }, { "epoch": 0.8297611881204119, "grad_norm": 406.6188049316406, "learning_rate": 9.393786811269418e-07, "loss": 28.0983, "step": 410760 }, { "epoch": 0.8297813887530957, "grad_norm": 303.7313537597656, "learning_rate": 9.391750165038887e-07, "loss": 31.1664, "step": 410770 }, { "epoch": 0.8298015893857795, "grad_norm": 591.87841796875, "learning_rate": 9.38971371672806e-07, "loss": 11.3182, "step": 410780 }, { "epoch": 0.8298217900184633, "grad_norm": 314.4297790527344, "learning_rate": 9.387677466346839e-07, "loss": 13.0558, "step": 410790 }, { "epoch": 0.8298419906511472, "grad_norm": 261.5039978027344, "learning_rate": 9.385641413905139e-07, "loss": 10.7185, "step": 410800 }, { "epoch": 0.829862191283831, "grad_norm": 632.2877807617188, "learning_rate": 9.383605559412911e-07, "loss": 23.6123, "step": 410810 }, { "epoch": 0.8298823919165148, "grad_norm": 315.0278015136719, "learning_rate": 9.38156990288005e-07, "loss": 17.0746, "step": 410820 }, { "epoch": 0.8299025925491986, "grad_norm": 507.8117980957031, "learning_rate": 9.379534444316507e-07, "loss": 25.6956, "step": 410830 }, { "epoch": 0.8299227931818824, "grad_norm": 389.24224853515625, "learning_rate": 9.37749918373218e-07, "loss": 8.4088, "step": 410840 }, { "epoch": 0.8299429938145663, "grad_norm": 223.9938507080078, "learning_rate": 9.375464121136984e-07, "loss": 11.5526, "step": 410850 }, { "epoch": 0.8299631944472501, "grad_norm": 231.66468811035156, "learning_rate": 9.373429256540866e-07, "loss": 21.1557, "step": 410860 }, { "epoch": 0.8299833950799339, "grad_norm": 133.00701904296875, "learning_rate": 9.371394589953714e-07, "loss": 12.3585, "step": 410870 }, { "epoch": 0.8300035957126177, "grad_norm": 711.957275390625, "learning_rate": 9.369360121385463e-07, "loss": 20.4118, "step": 410880 }, { "epoch": 0.8300237963453015, "grad_norm": 536.5179443359375, "learning_rate": 9.367325850846015e-07, "loss": 12.788, "step": 410890 }, { "epoch": 0.8300439969779854, "grad_norm": 569.9052124023438, "learning_rate": 9.365291778345303e-07, "loss": 16.8838, "step": 410900 }, { "epoch": 0.8300641976106692, "grad_norm": 399.3359375, "learning_rate": 9.363257903893235e-07, "loss": 16.0271, "step": 410910 }, { "epoch": 0.830084398243353, "grad_norm": 554.6058959960938, "learning_rate": 9.361224227499704e-07, "loss": 17.2537, "step": 410920 }, { "epoch": 0.8301045988760368, "grad_norm": 352.2273864746094, "learning_rate": 9.359190749174645e-07, "loss": 35.0574, "step": 410930 }, { "epoch": 0.8301247995087206, "grad_norm": 416.0232849121094, "learning_rate": 9.357157468927969e-07, "loss": 25.0878, "step": 410940 }, { "epoch": 0.8301450001414045, "grad_norm": 665.92626953125, "learning_rate": 9.355124386769559e-07, "loss": 20.1041, "step": 410950 }, { "epoch": 0.8301652007740883, "grad_norm": 138.0343475341797, "learning_rate": 9.353091502709349e-07, "loss": 21.9213, "step": 410960 }, { "epoch": 0.8301854014067721, "grad_norm": 165.73696899414062, "learning_rate": 9.351058816757264e-07, "loss": 12.9137, "step": 410970 }, { "epoch": 0.8302056020394559, "grad_norm": 386.0030212402344, "learning_rate": 9.349026328923161e-07, "loss": 16.0946, "step": 410980 }, { "epoch": 0.8302258026721397, "grad_norm": 408.11669921875, "learning_rate": 9.346994039216972e-07, "loss": 11.2878, "step": 410990 }, { "epoch": 0.8302460033048235, "grad_norm": 404.0555419921875, "learning_rate": 9.344961947648624e-07, "loss": 6.5853, "step": 411000 }, { "epoch": 0.8302662039375073, "grad_norm": 180.45965576171875, "learning_rate": 9.342930054227994e-07, "loss": 17.7546, "step": 411010 }, { "epoch": 0.8302864045701911, "grad_norm": 379.35809326171875, "learning_rate": 9.340898358964978e-07, "loss": 11.1587, "step": 411020 }, { "epoch": 0.8303066052028749, "grad_norm": 179.00009155273438, "learning_rate": 9.338866861869494e-07, "loss": 23.9938, "step": 411030 }, { "epoch": 0.8303268058355587, "grad_norm": 211.85879516601562, "learning_rate": 9.336835562951468e-07, "loss": 8.7912, "step": 411040 }, { "epoch": 0.8303470064682426, "grad_norm": 167.1537628173828, "learning_rate": 9.334804462220748e-07, "loss": 14.0006, "step": 411050 }, { "epoch": 0.8303672071009264, "grad_norm": 565.4570922851562, "learning_rate": 9.332773559687258e-07, "loss": 22.9532, "step": 411060 }, { "epoch": 0.8303874077336102, "grad_norm": 299.7813720703125, "learning_rate": 9.330742855360914e-07, "loss": 24.9923, "step": 411070 }, { "epoch": 0.830407608366294, "grad_norm": 625.1273193359375, "learning_rate": 9.32871234925159e-07, "loss": 21.7001, "step": 411080 }, { "epoch": 0.8304278089989778, "grad_norm": 334.21612548828125, "learning_rate": 9.326682041369178e-07, "loss": 19.3692, "step": 411090 }, { "epoch": 0.8304480096316617, "grad_norm": 416.40594482421875, "learning_rate": 9.3246519317236e-07, "loss": 12.3695, "step": 411100 }, { "epoch": 0.8304682102643455, "grad_norm": 254.921630859375, "learning_rate": 9.322622020324734e-07, "loss": 17.0086, "step": 411110 }, { "epoch": 0.8304884108970293, "grad_norm": 12.822563171386719, "learning_rate": 9.320592307182463e-07, "loss": 27.2341, "step": 411120 }, { "epoch": 0.8305086115297131, "grad_norm": 325.56964111328125, "learning_rate": 9.318562792306707e-07, "loss": 16.0775, "step": 411130 }, { "epoch": 0.8305288121623969, "grad_norm": 686.0072631835938, "learning_rate": 9.316533475707324e-07, "loss": 14.179, "step": 411140 }, { "epoch": 0.8305490127950808, "grad_norm": 207.0223388671875, "learning_rate": 9.314504357394233e-07, "loss": 18.4516, "step": 411150 }, { "epoch": 0.8305692134277646, "grad_norm": 388.1821594238281, "learning_rate": 9.312475437377322e-07, "loss": 21.1226, "step": 411160 }, { "epoch": 0.8305894140604484, "grad_norm": 361.5314636230469, "learning_rate": 9.310446715666449e-07, "loss": 15.0574, "step": 411170 }, { "epoch": 0.8306096146931322, "grad_norm": 407.1485290527344, "learning_rate": 9.30841819227154e-07, "loss": 15.617, "step": 411180 }, { "epoch": 0.830629815325816, "grad_norm": 270.5536804199219, "learning_rate": 9.306389867202454e-07, "loss": 18.7332, "step": 411190 }, { "epoch": 0.8306500159584999, "grad_norm": 639.2105712890625, "learning_rate": 9.304361740469103e-07, "loss": 15.6452, "step": 411200 }, { "epoch": 0.8306702165911837, "grad_norm": 461.1962585449219, "learning_rate": 9.302333812081338e-07, "loss": 22.9837, "step": 411210 }, { "epoch": 0.8306904172238675, "grad_norm": 152.81753540039062, "learning_rate": 9.300306082049082e-07, "loss": 16.5258, "step": 411220 }, { "epoch": 0.8307106178565513, "grad_norm": 199.67291259765625, "learning_rate": 9.298278550382189e-07, "loss": 6.3796, "step": 411230 }, { "epoch": 0.8307308184892351, "grad_norm": 280.73388671875, "learning_rate": 9.296251217090546e-07, "loss": 18.1557, "step": 411240 }, { "epoch": 0.8307510191219188, "grad_norm": 253.43238830566406, "learning_rate": 9.294224082184045e-07, "loss": 22.3909, "step": 411250 }, { "epoch": 0.8307712197546027, "grad_norm": 312.5126647949219, "learning_rate": 9.29219714567256e-07, "loss": 7.3432, "step": 411260 }, { "epoch": 0.8307914203872865, "grad_norm": 192.78823852539062, "learning_rate": 9.290170407565957e-07, "loss": 15.9798, "step": 411270 }, { "epoch": 0.8308116210199703, "grad_norm": 375.00958251953125, "learning_rate": 9.288143867874127e-07, "loss": 23.6673, "step": 411280 }, { "epoch": 0.8308318216526541, "grad_norm": 290.5165100097656, "learning_rate": 9.286117526606958e-07, "loss": 15.3995, "step": 411290 }, { "epoch": 0.8308520222853379, "grad_norm": 379.6672668457031, "learning_rate": 9.284091383774313e-07, "loss": 16.1637, "step": 411300 }, { "epoch": 0.8308722229180218, "grad_norm": 318.8584899902344, "learning_rate": 9.282065439386057e-07, "loss": 17.6491, "step": 411310 }, { "epoch": 0.8308924235507056, "grad_norm": 13.372910499572754, "learning_rate": 9.280039693452086e-07, "loss": 9.7132, "step": 411320 }, { "epoch": 0.8309126241833894, "grad_norm": 274.3443603515625, "learning_rate": 9.278014145982261e-07, "loss": 11.666, "step": 411330 }, { "epoch": 0.8309328248160732, "grad_norm": 213.70191955566406, "learning_rate": 9.275988796986451e-07, "loss": 16.9673, "step": 411340 }, { "epoch": 0.830953025448757, "grad_norm": 491.6261291503906, "learning_rate": 9.273963646474527e-07, "loss": 16.951, "step": 411350 }, { "epoch": 0.8309732260814409, "grad_norm": 42.702850341796875, "learning_rate": 9.271938694456378e-07, "loss": 15.4695, "step": 411360 }, { "epoch": 0.8309934267141247, "grad_norm": 469.57257080078125, "learning_rate": 9.26991394094186e-07, "loss": 22.5239, "step": 411370 }, { "epoch": 0.8310136273468085, "grad_norm": 788.18017578125, "learning_rate": 9.267889385940826e-07, "loss": 33.9399, "step": 411380 }, { "epoch": 0.8310338279794923, "grad_norm": 29.37600326538086, "learning_rate": 9.265865029463178e-07, "loss": 24.9759, "step": 411390 }, { "epoch": 0.8310540286121761, "grad_norm": 793.1854858398438, "learning_rate": 9.263840871518759e-07, "loss": 28.1995, "step": 411400 }, { "epoch": 0.83107422924486, "grad_norm": 246.13809204101562, "learning_rate": 9.261816912117428e-07, "loss": 25.9248, "step": 411410 }, { "epoch": 0.8310944298775438, "grad_norm": 212.0892791748047, "learning_rate": 9.259793151269075e-07, "loss": 23.8625, "step": 411420 }, { "epoch": 0.8311146305102276, "grad_norm": 0.5332641005516052, "learning_rate": 9.257769588983533e-07, "loss": 21.5472, "step": 411430 }, { "epoch": 0.8311348311429114, "grad_norm": 309.0165100097656, "learning_rate": 9.255746225270689e-07, "loss": 15.9197, "step": 411440 }, { "epoch": 0.8311550317755952, "grad_norm": 60.359275817871094, "learning_rate": 9.253723060140407e-07, "loss": 11.6712, "step": 411450 }, { "epoch": 0.8311752324082791, "grad_norm": 333.2156066894531, "learning_rate": 9.251700093602517e-07, "loss": 29.5264, "step": 411460 }, { "epoch": 0.8311954330409629, "grad_norm": 198.3731689453125, "learning_rate": 9.249677325666912e-07, "loss": 16.8443, "step": 411470 }, { "epoch": 0.8312156336736467, "grad_norm": 412.7635803222656, "learning_rate": 9.247654756343427e-07, "loss": 25.6259, "step": 411480 }, { "epoch": 0.8312358343063305, "grad_norm": 172.53404235839844, "learning_rate": 9.24563238564194e-07, "loss": 14.6385, "step": 411490 }, { "epoch": 0.8312560349390143, "grad_norm": 399.63623046875, "learning_rate": 9.243610213572285e-07, "loss": 25.3241, "step": 411500 }, { "epoch": 0.831276235571698, "grad_norm": 438.2187805175781, "learning_rate": 9.241588240144345e-07, "loss": 24.2764, "step": 411510 }, { "epoch": 0.8312964362043819, "grad_norm": 141.4309844970703, "learning_rate": 9.23956646536796e-07, "loss": 25.2009, "step": 411520 }, { "epoch": 0.8313166368370657, "grad_norm": 895.9459228515625, "learning_rate": 9.237544889252969e-07, "loss": 19.0765, "step": 411530 }, { "epoch": 0.8313368374697495, "grad_norm": 254.4736328125, "learning_rate": 9.235523511809258e-07, "loss": 15.787, "step": 411540 }, { "epoch": 0.8313570381024333, "grad_norm": 673.9073486328125, "learning_rate": 9.233502333046662e-07, "loss": 18.7821, "step": 411550 }, { "epoch": 0.8313772387351172, "grad_norm": 572.1876220703125, "learning_rate": 9.231481352975014e-07, "loss": 16.1685, "step": 411560 }, { "epoch": 0.831397439367801, "grad_norm": 255.88951110839844, "learning_rate": 9.229460571604182e-07, "loss": 32.6987, "step": 411570 }, { "epoch": 0.8314176400004848, "grad_norm": 721.99560546875, "learning_rate": 9.227439988944042e-07, "loss": 19.6822, "step": 411580 }, { "epoch": 0.8314378406331686, "grad_norm": 574.9505615234375, "learning_rate": 9.225419605004387e-07, "loss": 32.5775, "step": 411590 }, { "epoch": 0.8314580412658524, "grad_norm": 426.6134338378906, "learning_rate": 9.223399419795093e-07, "loss": 35.3379, "step": 411600 }, { "epoch": 0.8314782418985363, "grad_norm": 1008.5599365234375, "learning_rate": 9.221379433326017e-07, "loss": 29.3601, "step": 411610 }, { "epoch": 0.8314984425312201, "grad_norm": 346.8797912597656, "learning_rate": 9.21935964560699e-07, "loss": 17.456, "step": 411620 }, { "epoch": 0.8315186431639039, "grad_norm": 47.59032440185547, "learning_rate": 9.217340056647844e-07, "loss": 10.1423, "step": 411630 }, { "epoch": 0.8315388437965877, "grad_norm": 1047.11083984375, "learning_rate": 9.215320666458438e-07, "loss": 23.1623, "step": 411640 }, { "epoch": 0.8315590444292715, "grad_norm": 1145.93212890625, "learning_rate": 9.213301475048642e-07, "loss": 25.3491, "step": 411650 }, { "epoch": 0.8315792450619554, "grad_norm": 226.3491973876953, "learning_rate": 9.211282482428241e-07, "loss": 10.8633, "step": 411660 }, { "epoch": 0.8315994456946392, "grad_norm": 306.0322570800781, "learning_rate": 9.209263688607095e-07, "loss": 20.6406, "step": 411670 }, { "epoch": 0.831619646327323, "grad_norm": 467.8283386230469, "learning_rate": 9.207245093595068e-07, "loss": 11.8403, "step": 411680 }, { "epoch": 0.8316398469600068, "grad_norm": 48.53239440917969, "learning_rate": 9.205226697401981e-07, "loss": 19.3827, "step": 411690 }, { "epoch": 0.8316600475926906, "grad_norm": 256.9255676269531, "learning_rate": 9.203208500037664e-07, "loss": 12.6553, "step": 411700 }, { "epoch": 0.8316802482253745, "grad_norm": 23.489212036132812, "learning_rate": 9.201190501511964e-07, "loss": 11.7247, "step": 411710 }, { "epoch": 0.8317004488580583, "grad_norm": 510.6328430175781, "learning_rate": 9.199172701834718e-07, "loss": 15.9846, "step": 411720 }, { "epoch": 0.8317206494907421, "grad_norm": 319.6410217285156, "learning_rate": 9.197155101015742e-07, "loss": 12.1113, "step": 411730 }, { "epoch": 0.8317408501234259, "grad_norm": 601.1150512695312, "learning_rate": 9.195137699064899e-07, "loss": 16.7689, "step": 411740 }, { "epoch": 0.8317610507561097, "grad_norm": 377.2607116699219, "learning_rate": 9.193120495991986e-07, "loss": 24.6712, "step": 411750 }, { "epoch": 0.8317812513887936, "grad_norm": 795.7567749023438, "learning_rate": 9.191103491806875e-07, "loss": 30.6676, "step": 411760 }, { "epoch": 0.8318014520214773, "grad_norm": 157.62258911132812, "learning_rate": 9.189086686519361e-07, "loss": 10.8706, "step": 411770 }, { "epoch": 0.8318216526541611, "grad_norm": 531.1155395507812, "learning_rate": 9.187070080139299e-07, "loss": 16.4465, "step": 411780 }, { "epoch": 0.8318418532868449, "grad_norm": 143.78717041015625, "learning_rate": 9.185053672676508e-07, "loss": 17.1793, "step": 411790 }, { "epoch": 0.8318620539195287, "grad_norm": 249.1612548828125, "learning_rate": 9.183037464140804e-07, "loss": 44.7882, "step": 411800 }, { "epoch": 0.8318822545522125, "grad_norm": 329.79327392578125, "learning_rate": 9.181021454542033e-07, "loss": 14.9824, "step": 411810 }, { "epoch": 0.8319024551848964, "grad_norm": 186.31150817871094, "learning_rate": 9.179005643890005e-07, "loss": 16.0255, "step": 411820 }, { "epoch": 0.8319226558175802, "grad_norm": 196.14999389648438, "learning_rate": 9.176990032194566e-07, "loss": 17.4042, "step": 411830 }, { "epoch": 0.831942856450264, "grad_norm": 456.0150451660156, "learning_rate": 9.174974619465521e-07, "loss": 17.0844, "step": 411840 }, { "epoch": 0.8319630570829478, "grad_norm": 132.55392456054688, "learning_rate": 9.17295940571269e-07, "loss": 19.5425, "step": 411850 }, { "epoch": 0.8319832577156316, "grad_norm": 446.3443908691406, "learning_rate": 9.170944390945918e-07, "loss": 19.4433, "step": 411860 }, { "epoch": 0.8320034583483155, "grad_norm": 6.196475505828857, "learning_rate": 9.168929575175006e-07, "loss": 20.4936, "step": 411870 }, { "epoch": 0.8320236589809993, "grad_norm": 210.97549438476562, "learning_rate": 9.166914958409767e-07, "loss": 21.8871, "step": 411880 }, { "epoch": 0.8320438596136831, "grad_norm": 47.52590560913086, "learning_rate": 9.164900540660032e-07, "loss": 13.5703, "step": 411890 }, { "epoch": 0.8320640602463669, "grad_norm": 397.4734802246094, "learning_rate": 9.162886321935632e-07, "loss": 12.2626, "step": 411900 }, { "epoch": 0.8320842608790507, "grad_norm": 859.37890625, "learning_rate": 9.160872302246376e-07, "loss": 20.3554, "step": 411910 }, { "epoch": 0.8321044615117346, "grad_norm": 255.53822326660156, "learning_rate": 9.158858481602057e-07, "loss": 9.5881, "step": 411920 }, { "epoch": 0.8321246621444184, "grad_norm": 338.6055908203125, "learning_rate": 9.15684486001252e-07, "loss": 24.1861, "step": 411930 }, { "epoch": 0.8321448627771022, "grad_norm": 633.619384765625, "learning_rate": 9.154831437487571e-07, "loss": 34.5775, "step": 411940 }, { "epoch": 0.832165063409786, "grad_norm": 275.36859130859375, "learning_rate": 9.152818214037007e-07, "loss": 35.7982, "step": 411950 }, { "epoch": 0.8321852640424698, "grad_norm": 360.8341064453125, "learning_rate": 9.150805189670653e-07, "loss": 17.0425, "step": 411960 }, { "epoch": 0.8322054646751537, "grad_norm": 310.9940185546875, "learning_rate": 9.148792364398328e-07, "loss": 12.6153, "step": 411970 }, { "epoch": 0.8322256653078375, "grad_norm": 545.13232421875, "learning_rate": 9.146779738229838e-07, "loss": 33.1984, "step": 411980 }, { "epoch": 0.8322458659405213, "grad_norm": 190.50120544433594, "learning_rate": 9.144767311174979e-07, "loss": 43.5754, "step": 411990 }, { "epoch": 0.8322660665732051, "grad_norm": 486.85797119140625, "learning_rate": 9.142755083243577e-07, "loss": 26.0594, "step": 412000 }, { "epoch": 0.8322862672058889, "grad_norm": 248.93751525878906, "learning_rate": 9.140743054445434e-07, "loss": 26.355, "step": 412010 }, { "epoch": 0.8323064678385727, "grad_norm": 360.4058837890625, "learning_rate": 9.138731224790337e-07, "loss": 18.8557, "step": 412020 }, { "epoch": 0.8323266684712565, "grad_norm": 476.5465087890625, "learning_rate": 9.136719594288124e-07, "loss": 23.4403, "step": 412030 }, { "epoch": 0.8323468691039403, "grad_norm": 254.30259704589844, "learning_rate": 9.134708162948575e-07, "loss": 19.2192, "step": 412040 }, { "epoch": 0.8323670697366241, "grad_norm": 320.5162048339844, "learning_rate": 9.132696930781509e-07, "loss": 19.1303, "step": 412050 }, { "epoch": 0.8323872703693079, "grad_norm": 368.6312561035156, "learning_rate": 9.130685897796721e-07, "loss": 20.132, "step": 412060 }, { "epoch": 0.8324074710019918, "grad_norm": 410.7149963378906, "learning_rate": 9.128675064004006e-07, "loss": 15.2506, "step": 412070 }, { "epoch": 0.8324276716346756, "grad_norm": 75.53740692138672, "learning_rate": 9.126664429413179e-07, "loss": 16.6689, "step": 412080 }, { "epoch": 0.8324478722673594, "grad_norm": 356.1733093261719, "learning_rate": 9.124653994034022e-07, "loss": 17.0235, "step": 412090 }, { "epoch": 0.8324680729000432, "grad_norm": 258.3155822753906, "learning_rate": 9.122643757876354e-07, "loss": 7.5383, "step": 412100 }, { "epoch": 0.832488273532727, "grad_norm": 229.26480102539062, "learning_rate": 9.120633720949951e-07, "loss": 22.1528, "step": 412110 }, { "epoch": 0.8325084741654108, "grad_norm": 417.92156982421875, "learning_rate": 9.118623883264633e-07, "loss": 16.4424, "step": 412120 }, { "epoch": 0.8325286747980947, "grad_norm": 139.851806640625, "learning_rate": 9.116614244830186e-07, "loss": 12.0887, "step": 412130 }, { "epoch": 0.8325488754307785, "grad_norm": 849.2883911132812, "learning_rate": 9.11460480565639e-07, "loss": 22.8037, "step": 412140 }, { "epoch": 0.8325690760634623, "grad_norm": 616.9066772460938, "learning_rate": 9.112595565753063e-07, "loss": 26.1492, "step": 412150 }, { "epoch": 0.8325892766961461, "grad_norm": 414.7738952636719, "learning_rate": 9.110586525129988e-07, "loss": 10.7356, "step": 412160 }, { "epoch": 0.83260947732883, "grad_norm": 107.51226043701172, "learning_rate": 9.108577683796938e-07, "loss": 11.3799, "step": 412170 }, { "epoch": 0.8326296779615138, "grad_norm": 339.81304931640625, "learning_rate": 9.106569041763725e-07, "loss": 18.7923, "step": 412180 }, { "epoch": 0.8326498785941976, "grad_norm": 277.03924560546875, "learning_rate": 9.104560599040158e-07, "loss": 11.1877, "step": 412190 }, { "epoch": 0.8326700792268814, "grad_norm": 409.3796081542969, "learning_rate": 9.10255235563598e-07, "loss": 9.2172, "step": 412200 }, { "epoch": 0.8326902798595652, "grad_norm": 397.9833068847656, "learning_rate": 9.100544311561e-07, "loss": 18.0096, "step": 412210 }, { "epoch": 0.832710480492249, "grad_norm": 225.15573120117188, "learning_rate": 9.098536466825014e-07, "loss": 21.4823, "step": 412220 }, { "epoch": 0.8327306811249329, "grad_norm": 524.3678588867188, "learning_rate": 9.096528821437806e-07, "loss": 21.0009, "step": 412230 }, { "epoch": 0.8327508817576167, "grad_norm": 525.455810546875, "learning_rate": 9.094521375409143e-07, "loss": 23.6072, "step": 412240 }, { "epoch": 0.8327710823903005, "grad_norm": 73.05036163330078, "learning_rate": 9.09251412874882e-07, "loss": 24.2346, "step": 412250 }, { "epoch": 0.8327912830229843, "grad_norm": 256.35650634765625, "learning_rate": 9.090507081466648e-07, "loss": 18.9737, "step": 412260 }, { "epoch": 0.8328114836556681, "grad_norm": 86.88150024414062, "learning_rate": 9.088500233572356e-07, "loss": 8.7211, "step": 412270 }, { "epoch": 0.8328316842883519, "grad_norm": 199.1029052734375, "learning_rate": 9.086493585075757e-07, "loss": 24.8217, "step": 412280 }, { "epoch": 0.8328518849210357, "grad_norm": 113.49649810791016, "learning_rate": 9.08448713598663e-07, "loss": 23.8517, "step": 412290 }, { "epoch": 0.8328720855537195, "grad_norm": 601.8291625976562, "learning_rate": 9.08248088631476e-07, "loss": 17.3809, "step": 412300 }, { "epoch": 0.8328922861864033, "grad_norm": 443.21893310546875, "learning_rate": 9.080474836069896e-07, "loss": 12.3209, "step": 412310 }, { "epoch": 0.8329124868190871, "grad_norm": 333.4514465332031, "learning_rate": 9.078468985261851e-07, "loss": 10.3934, "step": 412320 }, { "epoch": 0.832932687451771, "grad_norm": 150.7950439453125, "learning_rate": 9.076463333900382e-07, "loss": 15.1107, "step": 412330 }, { "epoch": 0.8329528880844548, "grad_norm": 74.56997680664062, "learning_rate": 9.074457881995252e-07, "loss": 26.3782, "step": 412340 }, { "epoch": 0.8329730887171386, "grad_norm": 474.6545104980469, "learning_rate": 9.072452629556272e-07, "loss": 17.4449, "step": 412350 }, { "epoch": 0.8329932893498224, "grad_norm": 289.1212463378906, "learning_rate": 9.070447576593172e-07, "loss": 29.2215, "step": 412360 }, { "epoch": 0.8330134899825062, "grad_norm": 527.9814453125, "learning_rate": 9.068442723115766e-07, "loss": 27.2809, "step": 412370 }, { "epoch": 0.8330336906151901, "grad_norm": 858.263916015625, "learning_rate": 9.066438069133787e-07, "loss": 26.6856, "step": 412380 }, { "epoch": 0.8330538912478739, "grad_norm": 259.76495361328125, "learning_rate": 9.064433614657042e-07, "loss": 14.1867, "step": 412390 }, { "epoch": 0.8330740918805577, "grad_norm": 548.4869995117188, "learning_rate": 9.06242935969528e-07, "loss": 17.8981, "step": 412400 }, { "epoch": 0.8330942925132415, "grad_norm": 194.95555114746094, "learning_rate": 9.060425304258263e-07, "loss": 17.1656, "step": 412410 }, { "epoch": 0.8331144931459253, "grad_norm": 574.6517333984375, "learning_rate": 9.058421448355775e-07, "loss": 14.5448, "step": 412420 }, { "epoch": 0.8331346937786092, "grad_norm": 333.46209716796875, "learning_rate": 9.056417791997568e-07, "loss": 11.9902, "step": 412430 }, { "epoch": 0.833154894411293, "grad_norm": 179.5861053466797, "learning_rate": 9.054414335193424e-07, "loss": 17.2803, "step": 412440 }, { "epoch": 0.8331750950439768, "grad_norm": 481.0077819824219, "learning_rate": 9.052411077953099e-07, "loss": 19.6829, "step": 412450 }, { "epoch": 0.8331952956766606, "grad_norm": 673.9221801757812, "learning_rate": 9.050408020286344e-07, "loss": 18.1825, "step": 412460 }, { "epoch": 0.8332154963093444, "grad_norm": 432.15472412109375, "learning_rate": 9.048405162202944e-07, "loss": 20.572, "step": 412470 }, { "epoch": 0.8332356969420283, "grad_norm": 394.2590026855469, "learning_rate": 9.046402503712653e-07, "loss": 20.2231, "step": 412480 }, { "epoch": 0.8332558975747121, "grad_norm": 0.0, "learning_rate": 9.044400044825219e-07, "loss": 21.5337, "step": 412490 }, { "epoch": 0.8332760982073959, "grad_norm": 275.9071044921875, "learning_rate": 9.042397785550405e-07, "loss": 18.5616, "step": 412500 }, { "epoch": 0.8332962988400797, "grad_norm": 193.82522583007812, "learning_rate": 9.04039572589801e-07, "loss": 28.0304, "step": 412510 }, { "epoch": 0.8333164994727635, "grad_norm": 559.5623168945312, "learning_rate": 9.038393865877725e-07, "loss": 26.5004, "step": 412520 }, { "epoch": 0.8333367001054472, "grad_norm": 394.2783203125, "learning_rate": 9.036392205499344e-07, "loss": 12.8299, "step": 412530 }, { "epoch": 0.8333569007381311, "grad_norm": 119.73848724365234, "learning_rate": 9.034390744772637e-07, "loss": 22.5734, "step": 412540 }, { "epoch": 0.8333771013708149, "grad_norm": 119.30097961425781, "learning_rate": 9.032389483707332e-07, "loss": 22.4433, "step": 412550 }, { "epoch": 0.8333973020034987, "grad_norm": 694.3966064453125, "learning_rate": 9.030388422313185e-07, "loss": 26.6818, "step": 412560 }, { "epoch": 0.8334175026361825, "grad_norm": 45.48277282714844, "learning_rate": 9.028387560599955e-07, "loss": 17.5723, "step": 412570 }, { "epoch": 0.8334377032688663, "grad_norm": 206.2269744873047, "learning_rate": 9.026386898577417e-07, "loss": 17.653, "step": 412580 }, { "epoch": 0.8334579039015502, "grad_norm": 218.09690856933594, "learning_rate": 9.024386436255278e-07, "loss": 20.3943, "step": 412590 }, { "epoch": 0.833478104534234, "grad_norm": 193.77537536621094, "learning_rate": 9.022386173643305e-07, "loss": 10.1374, "step": 412600 }, { "epoch": 0.8334983051669178, "grad_norm": 94.75252532958984, "learning_rate": 9.020386110751267e-07, "loss": 11.1618, "step": 412610 }, { "epoch": 0.8335185057996016, "grad_norm": 260.6609191894531, "learning_rate": 9.018386247588901e-07, "loss": 9.4794, "step": 412620 }, { "epoch": 0.8335387064322854, "grad_norm": 758.8001098632812, "learning_rate": 9.016386584165932e-07, "loss": 38.9882, "step": 412630 }, { "epoch": 0.8335589070649693, "grad_norm": 483.4865417480469, "learning_rate": 9.014387120492141e-07, "loss": 16.5911, "step": 412640 }, { "epoch": 0.8335791076976531, "grad_norm": 469.7196350097656, "learning_rate": 9.012387856577238e-07, "loss": 22.2123, "step": 412650 }, { "epoch": 0.8335993083303369, "grad_norm": 333.5057678222656, "learning_rate": 9.010388792431002e-07, "loss": 17.5803, "step": 412660 }, { "epoch": 0.8336195089630207, "grad_norm": 61.50606918334961, "learning_rate": 9.008389928063161e-07, "loss": 20.5005, "step": 412670 }, { "epoch": 0.8336397095957045, "grad_norm": 86.1005859375, "learning_rate": 9.006391263483438e-07, "loss": 6.5381, "step": 412680 }, { "epoch": 0.8336599102283884, "grad_norm": 73.05723571777344, "learning_rate": 9.004392798701605e-07, "loss": 7.1045, "step": 412690 }, { "epoch": 0.8336801108610722, "grad_norm": 180.08389282226562, "learning_rate": 9.002394533727382e-07, "loss": 19.4751, "step": 412700 }, { "epoch": 0.833700311493756, "grad_norm": 220.0135498046875, "learning_rate": 9.000396468570527e-07, "loss": 20.2486, "step": 412710 }, { "epoch": 0.8337205121264398, "grad_norm": 930.2962036132812, "learning_rate": 8.998398603240755e-07, "loss": 33.2094, "step": 412720 }, { "epoch": 0.8337407127591236, "grad_norm": 440.52740478515625, "learning_rate": 8.996400937747823e-07, "loss": 17.4437, "step": 412730 }, { "epoch": 0.8337609133918075, "grad_norm": 323.77581787109375, "learning_rate": 8.994403472101465e-07, "loss": 11.9247, "step": 412740 }, { "epoch": 0.8337811140244913, "grad_norm": 193.76463317871094, "learning_rate": 8.992406206311394e-07, "loss": 11.6993, "step": 412750 }, { "epoch": 0.8338013146571751, "grad_norm": 407.347412109375, "learning_rate": 8.990409140387374e-07, "loss": 19.1552, "step": 412760 }, { "epoch": 0.8338215152898589, "grad_norm": 499.37908935546875, "learning_rate": 8.988412274339131e-07, "loss": 25.1889, "step": 412770 }, { "epoch": 0.8338417159225427, "grad_norm": 443.1248779296875, "learning_rate": 8.986415608176375e-07, "loss": 15.8374, "step": 412780 }, { "epoch": 0.8338619165552265, "grad_norm": 390.159423828125, "learning_rate": 8.984419141908857e-07, "loss": 21.9738, "step": 412790 }, { "epoch": 0.8338821171879103, "grad_norm": 1117.803466796875, "learning_rate": 8.982422875546332e-07, "loss": 19.0937, "step": 412800 }, { "epoch": 0.8339023178205941, "grad_norm": 166.6840362548828, "learning_rate": 8.980426809098475e-07, "loss": 17.6338, "step": 412810 }, { "epoch": 0.8339225184532779, "grad_norm": 23.828283309936523, "learning_rate": 8.978430942575045e-07, "loss": 15.1676, "step": 412820 }, { "epoch": 0.8339427190859617, "grad_norm": 1324.7325439453125, "learning_rate": 8.976435275985779e-07, "loss": 23.5533, "step": 412830 }, { "epoch": 0.8339629197186456, "grad_norm": 59.1019401550293, "learning_rate": 8.974439809340391e-07, "loss": 13.1922, "step": 412840 }, { "epoch": 0.8339831203513294, "grad_norm": 763.927978515625, "learning_rate": 8.972444542648595e-07, "loss": 15.6377, "step": 412850 }, { "epoch": 0.8340033209840132, "grad_norm": 167.46139526367188, "learning_rate": 8.970449475920129e-07, "loss": 19.5433, "step": 412860 }, { "epoch": 0.834023521616697, "grad_norm": 569.6694946289062, "learning_rate": 8.968454609164745e-07, "loss": 38.9997, "step": 412870 }, { "epoch": 0.8340437222493808, "grad_norm": 459.7696533203125, "learning_rate": 8.966459942392108e-07, "loss": 14.4037, "step": 412880 }, { "epoch": 0.8340639228820647, "grad_norm": 281.04302978515625, "learning_rate": 8.964465475611967e-07, "loss": 12.7747, "step": 412890 }, { "epoch": 0.8340841235147485, "grad_norm": 172.90126037597656, "learning_rate": 8.962471208834056e-07, "loss": 24.5858, "step": 412900 }, { "epoch": 0.8341043241474323, "grad_norm": 370.8978576660156, "learning_rate": 8.960477142068085e-07, "loss": 12.1178, "step": 412910 }, { "epoch": 0.8341245247801161, "grad_norm": 418.8710632324219, "learning_rate": 8.958483275323759e-07, "loss": 35.5518, "step": 412920 }, { "epoch": 0.8341447254127999, "grad_norm": 87.06157684326172, "learning_rate": 8.956489608610825e-07, "loss": 11.6532, "step": 412930 }, { "epoch": 0.8341649260454838, "grad_norm": 750.3912963867188, "learning_rate": 8.954496141938973e-07, "loss": 29.8639, "step": 412940 }, { "epoch": 0.8341851266781676, "grad_norm": 410.21051025390625, "learning_rate": 8.95250287531792e-07, "loss": 14.5647, "step": 412950 }, { "epoch": 0.8342053273108514, "grad_norm": 557.1311645507812, "learning_rate": 8.950509808757408e-07, "loss": 33.6352, "step": 412960 }, { "epoch": 0.8342255279435352, "grad_norm": 266.1794738769531, "learning_rate": 8.94851694226711e-07, "loss": 14.0841, "step": 412970 }, { "epoch": 0.834245728576219, "grad_norm": 312.001953125, "learning_rate": 8.946524275856783e-07, "loss": 19.4741, "step": 412980 }, { "epoch": 0.8342659292089029, "grad_norm": 256.38763427734375, "learning_rate": 8.9445318095361e-07, "loss": 11.4826, "step": 412990 }, { "epoch": 0.8342861298415867, "grad_norm": 211.61795043945312, "learning_rate": 8.942539543314799e-07, "loss": 22.2258, "step": 413000 }, { "epoch": 0.8343063304742705, "grad_norm": 215.25364685058594, "learning_rate": 8.940547477202588e-07, "loss": 14.7484, "step": 413010 }, { "epoch": 0.8343265311069543, "grad_norm": 427.4831848144531, "learning_rate": 8.938555611209149e-07, "loss": 23.3023, "step": 413020 }, { "epoch": 0.8343467317396381, "grad_norm": 405.29644775390625, "learning_rate": 8.936563945344229e-07, "loss": 9.2948, "step": 413030 }, { "epoch": 0.8343669323723218, "grad_norm": 405.0749816894531, "learning_rate": 8.934572479617498e-07, "loss": 10.7187, "step": 413040 }, { "epoch": 0.8343871330050057, "grad_norm": 478.822265625, "learning_rate": 8.932581214038693e-07, "loss": 20.9375, "step": 413050 }, { "epoch": 0.8344073336376895, "grad_norm": 371.0656433105469, "learning_rate": 8.930590148617513e-07, "loss": 18.6845, "step": 413060 }, { "epoch": 0.8344275342703733, "grad_norm": 468.7947998046875, "learning_rate": 8.928599283363637e-07, "loss": 13.453, "step": 413070 }, { "epoch": 0.8344477349030571, "grad_norm": 360.0147705078125, "learning_rate": 8.926608618286797e-07, "loss": 13.7299, "step": 413080 }, { "epoch": 0.834467935535741, "grad_norm": 101.20233154296875, "learning_rate": 8.924618153396691e-07, "loss": 13.2995, "step": 413090 }, { "epoch": 0.8344881361684248, "grad_norm": 223.70965576171875, "learning_rate": 8.922627888703e-07, "loss": 12.3701, "step": 413100 }, { "epoch": 0.8345083368011086, "grad_norm": 874.8382568359375, "learning_rate": 8.920637824215433e-07, "loss": 18.3292, "step": 413110 }, { "epoch": 0.8345285374337924, "grad_norm": 546.51416015625, "learning_rate": 8.918647959943727e-07, "loss": 25.3475, "step": 413120 }, { "epoch": 0.8345487380664762, "grad_norm": 34.34029769897461, "learning_rate": 8.916658295897523e-07, "loss": 22.9749, "step": 413130 }, { "epoch": 0.83456893869916, "grad_norm": 301.0979919433594, "learning_rate": 8.914668832086543e-07, "loss": 11.5164, "step": 413140 }, { "epoch": 0.8345891393318439, "grad_norm": 563.997802734375, "learning_rate": 8.912679568520494e-07, "loss": 18.5705, "step": 413150 }, { "epoch": 0.8346093399645277, "grad_norm": 593.2365112304688, "learning_rate": 8.910690505209063e-07, "loss": 17.4941, "step": 413160 }, { "epoch": 0.8346295405972115, "grad_norm": 525.0170288085938, "learning_rate": 8.908701642161927e-07, "loss": 14.5259, "step": 413170 }, { "epoch": 0.8346497412298953, "grad_norm": 374.2406005859375, "learning_rate": 8.906712979388799e-07, "loss": 22.2857, "step": 413180 }, { "epoch": 0.8346699418625791, "grad_norm": 499.7447814941406, "learning_rate": 8.904724516899394e-07, "loss": 17.2774, "step": 413190 }, { "epoch": 0.834690142495263, "grad_norm": 350.549072265625, "learning_rate": 8.902736254703347e-07, "loss": 19.1012, "step": 413200 }, { "epoch": 0.8347103431279468, "grad_norm": 172.6784210205078, "learning_rate": 8.900748192810387e-07, "loss": 17.8349, "step": 413210 }, { "epoch": 0.8347305437606306, "grad_norm": 233.56097412109375, "learning_rate": 8.898760331230206e-07, "loss": 18.4664, "step": 413220 }, { "epoch": 0.8347507443933144, "grad_norm": 255.29087829589844, "learning_rate": 8.896772669972475e-07, "loss": 15.4086, "step": 413230 }, { "epoch": 0.8347709450259982, "grad_norm": 453.50433349609375, "learning_rate": 8.894785209046886e-07, "loss": 28.1366, "step": 413240 }, { "epoch": 0.8347911456586821, "grad_norm": 408.1929931640625, "learning_rate": 8.892797948463134e-07, "loss": 19.2199, "step": 413250 }, { "epoch": 0.8348113462913659, "grad_norm": 413.4342041015625, "learning_rate": 8.8908108882309e-07, "loss": 30.1597, "step": 413260 }, { "epoch": 0.8348315469240497, "grad_norm": 305.1564025878906, "learning_rate": 8.888824028359855e-07, "loss": 13.2087, "step": 413270 }, { "epoch": 0.8348517475567335, "grad_norm": 78.74534606933594, "learning_rate": 8.886837368859713e-07, "loss": 11.1992, "step": 413280 }, { "epoch": 0.8348719481894173, "grad_norm": 181.76473999023438, "learning_rate": 8.884850909740123e-07, "loss": 15.7444, "step": 413290 }, { "epoch": 0.834892148822101, "grad_norm": 258.8009948730469, "learning_rate": 8.882864651010798e-07, "loss": 29.8117, "step": 413300 }, { "epoch": 0.8349123494547849, "grad_norm": 207.19329833984375, "learning_rate": 8.880878592681386e-07, "loss": 16.7603, "step": 413310 }, { "epoch": 0.8349325500874687, "grad_norm": 192.0643768310547, "learning_rate": 8.878892734761602e-07, "loss": 16.0246, "step": 413320 }, { "epoch": 0.8349527507201525, "grad_norm": 370.33599853515625, "learning_rate": 8.876907077261093e-07, "loss": 8.2631, "step": 413330 }, { "epoch": 0.8349729513528363, "grad_norm": 17.950401306152344, "learning_rate": 8.874921620189564e-07, "loss": 18.9132, "step": 413340 }, { "epoch": 0.8349931519855202, "grad_norm": 5.475889682769775, "learning_rate": 8.872936363556678e-07, "loss": 14.4931, "step": 413350 }, { "epoch": 0.835013352618204, "grad_norm": 235.54627990722656, "learning_rate": 8.8709513073721e-07, "loss": 10.696, "step": 413360 }, { "epoch": 0.8350335532508878, "grad_norm": 354.1400146484375, "learning_rate": 8.868966451645533e-07, "loss": 17.4819, "step": 413370 }, { "epoch": 0.8350537538835716, "grad_norm": 612.1651000976562, "learning_rate": 8.866981796386631e-07, "loss": 29.6195, "step": 413380 }, { "epoch": 0.8350739545162554, "grad_norm": 201.45852661132812, "learning_rate": 8.864997341605059e-07, "loss": 15.0578, "step": 413390 }, { "epoch": 0.8350941551489393, "grad_norm": 8.226776123046875, "learning_rate": 8.863013087310502e-07, "loss": 18.1037, "step": 413400 }, { "epoch": 0.8351143557816231, "grad_norm": 90.51039123535156, "learning_rate": 8.861029033512652e-07, "loss": 18.2073, "step": 413410 }, { "epoch": 0.8351345564143069, "grad_norm": 283.7022399902344, "learning_rate": 8.859045180221137e-07, "loss": 16.5587, "step": 413420 }, { "epoch": 0.8351547570469907, "grad_norm": 350.72943115234375, "learning_rate": 8.857061527445643e-07, "loss": 14.1672, "step": 413430 }, { "epoch": 0.8351749576796745, "grad_norm": 636.0487670898438, "learning_rate": 8.85507807519585e-07, "loss": 28.9502, "step": 413440 }, { "epoch": 0.8351951583123584, "grad_norm": 167.53924560546875, "learning_rate": 8.853094823481423e-07, "loss": 18.9471, "step": 413450 }, { "epoch": 0.8352153589450422, "grad_norm": 599.519775390625, "learning_rate": 8.851111772312004e-07, "loss": 29.2826, "step": 413460 }, { "epoch": 0.835235559577726, "grad_norm": 618.7284545898438, "learning_rate": 8.849128921697276e-07, "loss": 19.7537, "step": 413470 }, { "epoch": 0.8352557602104098, "grad_norm": 221.83441162109375, "learning_rate": 8.847146271646928e-07, "loss": 21.1147, "step": 413480 }, { "epoch": 0.8352759608430936, "grad_norm": 634.7052001953125, "learning_rate": 8.845163822170577e-07, "loss": 15.7934, "step": 413490 }, { "epoch": 0.8352961614757775, "grad_norm": 320.6510925292969, "learning_rate": 8.843181573277904e-07, "loss": 13.7606, "step": 413500 }, { "epoch": 0.8353163621084613, "grad_norm": 104.05023193359375, "learning_rate": 8.841199524978583e-07, "loss": 13.1243, "step": 413510 }, { "epoch": 0.8353365627411451, "grad_norm": 225.48289489746094, "learning_rate": 8.839217677282264e-07, "loss": 17.1197, "step": 413520 }, { "epoch": 0.8353567633738289, "grad_norm": 289.8655090332031, "learning_rate": 8.837236030198593e-07, "loss": 23.3992, "step": 413530 }, { "epoch": 0.8353769640065127, "grad_norm": 438.6549987792969, "learning_rate": 8.835254583737251e-07, "loss": 32.0711, "step": 413540 }, { "epoch": 0.8353971646391966, "grad_norm": 241.2873992919922, "learning_rate": 8.833273337907888e-07, "loss": 16.7628, "step": 413550 }, { "epoch": 0.8354173652718803, "grad_norm": 504.1148376464844, "learning_rate": 8.831292292720151e-07, "loss": 10.8753, "step": 413560 }, { "epoch": 0.8354375659045641, "grad_norm": 681.5499267578125, "learning_rate": 8.829311448183708e-07, "loss": 28.6235, "step": 413570 }, { "epoch": 0.8354577665372479, "grad_norm": 392.9539794921875, "learning_rate": 8.827330804308199e-07, "loss": 11.4326, "step": 413580 }, { "epoch": 0.8354779671699317, "grad_norm": 153.1405487060547, "learning_rate": 8.825350361103291e-07, "loss": 14.1093, "step": 413590 }, { "epoch": 0.8354981678026155, "grad_norm": 123.26155090332031, "learning_rate": 8.823370118578628e-07, "loss": 19.5911, "step": 413600 }, { "epoch": 0.8355183684352994, "grad_norm": 159.26515197753906, "learning_rate": 8.821390076743874e-07, "loss": 12.1363, "step": 413610 }, { "epoch": 0.8355385690679832, "grad_norm": 175.86227416992188, "learning_rate": 8.819410235608666e-07, "loss": 18.3747, "step": 413620 }, { "epoch": 0.835558769700667, "grad_norm": 307.37799072265625, "learning_rate": 8.817430595182652e-07, "loss": 10.5693, "step": 413630 }, { "epoch": 0.8355789703333508, "grad_norm": 262.6300964355469, "learning_rate": 8.815451155475496e-07, "loss": 11.735, "step": 413640 }, { "epoch": 0.8355991709660346, "grad_norm": 489.4883728027344, "learning_rate": 8.813471916496824e-07, "loss": 12.7488, "step": 413650 }, { "epoch": 0.8356193715987185, "grad_norm": 410.9041748046875, "learning_rate": 8.811492878256306e-07, "loss": 23.8351, "step": 413660 }, { "epoch": 0.8356395722314023, "grad_norm": 559.9931640625, "learning_rate": 8.809514040763578e-07, "loss": 18.7952, "step": 413670 }, { "epoch": 0.8356597728640861, "grad_norm": 250.8330535888672, "learning_rate": 8.807535404028267e-07, "loss": 66.0124, "step": 413680 }, { "epoch": 0.8356799734967699, "grad_norm": 328.7674865722656, "learning_rate": 8.805556968060047e-07, "loss": 11.4522, "step": 413690 }, { "epoch": 0.8357001741294537, "grad_norm": 482.66925048828125, "learning_rate": 8.803578732868545e-07, "loss": 15.3524, "step": 413700 }, { "epoch": 0.8357203747621376, "grad_norm": 276.3381652832031, "learning_rate": 8.801600698463397e-07, "loss": 12.5239, "step": 413710 }, { "epoch": 0.8357405753948214, "grad_norm": 268.27520751953125, "learning_rate": 8.799622864854246e-07, "loss": 18.2343, "step": 413720 }, { "epoch": 0.8357607760275052, "grad_norm": 155.68911743164062, "learning_rate": 8.797645232050761e-07, "loss": 14.467, "step": 413730 }, { "epoch": 0.835780976660189, "grad_norm": 501.7003173828125, "learning_rate": 8.795667800062529e-07, "loss": 19.7305, "step": 413740 }, { "epoch": 0.8358011772928728, "grad_norm": 186.0370635986328, "learning_rate": 8.793690568899216e-07, "loss": 9.8526, "step": 413750 }, { "epoch": 0.8358213779255567, "grad_norm": 79.15919494628906, "learning_rate": 8.791713538570474e-07, "loss": 12.5743, "step": 413760 }, { "epoch": 0.8358415785582405, "grad_norm": 12.420004844665527, "learning_rate": 8.789736709085917e-07, "loss": 18.4904, "step": 413770 }, { "epoch": 0.8358617791909243, "grad_norm": 295.5406188964844, "learning_rate": 8.787760080455171e-07, "loss": 14.9115, "step": 413780 }, { "epoch": 0.8358819798236081, "grad_norm": 565.0404052734375, "learning_rate": 8.78578365268789e-07, "loss": 24.2188, "step": 413790 }, { "epoch": 0.8359021804562919, "grad_norm": 546.0817260742188, "learning_rate": 8.783807425793722e-07, "loss": 15.092, "step": 413800 }, { "epoch": 0.8359223810889757, "grad_norm": 164.84117126464844, "learning_rate": 8.781831399782254e-07, "loss": 13.3016, "step": 413810 }, { "epoch": 0.8359425817216595, "grad_norm": 347.80291748046875, "learning_rate": 8.779855574663138e-07, "loss": 20.6909, "step": 413820 }, { "epoch": 0.8359627823543433, "grad_norm": 471.7724914550781, "learning_rate": 8.777879950446022e-07, "loss": 20.3324, "step": 413830 }, { "epoch": 0.8359829829870271, "grad_norm": 575.730712890625, "learning_rate": 8.775904527140522e-07, "loss": 24.1305, "step": 413840 }, { "epoch": 0.8360031836197109, "grad_norm": 375.9475402832031, "learning_rate": 8.773929304756246e-07, "loss": 28.2862, "step": 413850 }, { "epoch": 0.8360233842523948, "grad_norm": 444.1407165527344, "learning_rate": 8.771954283302852e-07, "loss": 16.1395, "step": 413860 }, { "epoch": 0.8360435848850786, "grad_norm": 8.394010543823242, "learning_rate": 8.769979462789957e-07, "loss": 8.5753, "step": 413870 }, { "epoch": 0.8360637855177624, "grad_norm": 7.048148155212402, "learning_rate": 8.768004843227162e-07, "loss": 9.9166, "step": 413880 }, { "epoch": 0.8360839861504462, "grad_norm": 636.8203735351562, "learning_rate": 8.766030424624117e-07, "loss": 20.2075, "step": 413890 }, { "epoch": 0.83610418678313, "grad_norm": 98.16224670410156, "learning_rate": 8.764056206990446e-07, "loss": 17.7166, "step": 413900 }, { "epoch": 0.8361243874158139, "grad_norm": 323.1768798828125, "learning_rate": 8.762082190335763e-07, "loss": 20.1645, "step": 413910 }, { "epoch": 0.8361445880484977, "grad_norm": 378.2060546875, "learning_rate": 8.760108374669679e-07, "loss": 18.7097, "step": 413920 }, { "epoch": 0.8361647886811815, "grad_norm": 379.65814208984375, "learning_rate": 8.75813476000184e-07, "loss": 10.7528, "step": 413930 }, { "epoch": 0.8361849893138653, "grad_norm": 200.08131408691406, "learning_rate": 8.756161346341851e-07, "loss": 15.3422, "step": 413940 }, { "epoch": 0.8362051899465491, "grad_norm": 359.909912109375, "learning_rate": 8.754188133699316e-07, "loss": 22.8936, "step": 413950 }, { "epoch": 0.836225390579233, "grad_norm": 406.63525390625, "learning_rate": 8.752215122083874e-07, "loss": 19.9985, "step": 413960 }, { "epoch": 0.8362455912119168, "grad_norm": 225.33741760253906, "learning_rate": 8.750242311505125e-07, "loss": 15.3774, "step": 413970 }, { "epoch": 0.8362657918446006, "grad_norm": 235.2423858642578, "learning_rate": 8.7482697019727e-07, "loss": 36.1446, "step": 413980 }, { "epoch": 0.8362859924772844, "grad_norm": 364.4989318847656, "learning_rate": 8.746297293496209e-07, "loss": 16.6937, "step": 413990 }, { "epoch": 0.8363061931099682, "grad_norm": 370.710693359375, "learning_rate": 8.744325086085248e-07, "loss": 14.9425, "step": 414000 }, { "epoch": 0.836326393742652, "grad_norm": 384.3143615722656, "learning_rate": 8.74235307974945e-07, "loss": 24.9753, "step": 414010 }, { "epoch": 0.8363465943753359, "grad_norm": 116.01148986816406, "learning_rate": 8.740381274498427e-07, "loss": 16.6167, "step": 414020 }, { "epoch": 0.8363667950080197, "grad_norm": 131.11326599121094, "learning_rate": 8.738409670341764e-07, "loss": 19.161, "step": 414030 }, { "epoch": 0.8363869956407035, "grad_norm": 251.4010772705078, "learning_rate": 8.736438267289088e-07, "loss": 21.4255, "step": 414040 }, { "epoch": 0.8364071962733873, "grad_norm": 141.95697021484375, "learning_rate": 8.734467065350022e-07, "loss": 26.4255, "step": 414050 }, { "epoch": 0.8364273969060712, "grad_norm": 747.612548828125, "learning_rate": 8.732496064534163e-07, "loss": 30.1295, "step": 414060 }, { "epoch": 0.8364475975387549, "grad_norm": 96.67201232910156, "learning_rate": 8.730525264851092e-07, "loss": 12.5954, "step": 414070 }, { "epoch": 0.8364677981714387, "grad_norm": 289.9397277832031, "learning_rate": 8.728554666310441e-07, "loss": 15.3013, "step": 414080 }, { "epoch": 0.8364879988041225, "grad_norm": 540.086669921875, "learning_rate": 8.726584268921829e-07, "loss": 13.3588, "step": 414090 }, { "epoch": 0.8365081994368063, "grad_norm": 229.41238403320312, "learning_rate": 8.72461407269482e-07, "loss": 13.3224, "step": 414100 }, { "epoch": 0.8365284000694901, "grad_norm": 403.1856689453125, "learning_rate": 8.722644077639031e-07, "loss": 29.2782, "step": 414110 }, { "epoch": 0.836548600702174, "grad_norm": 540.8037719726562, "learning_rate": 8.720674283764086e-07, "loss": 18.8913, "step": 414120 }, { "epoch": 0.8365688013348578, "grad_norm": 23.485515594482422, "learning_rate": 8.718704691079566e-07, "loss": 14.9548, "step": 414130 }, { "epoch": 0.8365890019675416, "grad_norm": 145.4017791748047, "learning_rate": 8.716735299595059e-07, "loss": 21.5552, "step": 414140 }, { "epoch": 0.8366092026002254, "grad_norm": 73.29997253417969, "learning_rate": 8.714766109320188e-07, "loss": 16.1105, "step": 414150 }, { "epoch": 0.8366294032329092, "grad_norm": 320.2896423339844, "learning_rate": 8.712797120264543e-07, "loss": 18.929, "step": 414160 }, { "epoch": 0.8366496038655931, "grad_norm": 272.5067138671875, "learning_rate": 8.710828332437704e-07, "loss": 17.7957, "step": 414170 }, { "epoch": 0.8366698044982769, "grad_norm": 113.86924743652344, "learning_rate": 8.70885974584929e-07, "loss": 18.2122, "step": 414180 }, { "epoch": 0.8366900051309607, "grad_norm": 122.1517333984375, "learning_rate": 8.706891360508874e-07, "loss": 16.3829, "step": 414190 }, { "epoch": 0.8367102057636445, "grad_norm": 279.2169189453125, "learning_rate": 8.704923176426072e-07, "loss": 14.7806, "step": 414200 }, { "epoch": 0.8367304063963283, "grad_norm": 163.48916625976562, "learning_rate": 8.702955193610457e-07, "loss": 8.444, "step": 414210 }, { "epoch": 0.8367506070290122, "grad_norm": 406.5298156738281, "learning_rate": 8.700987412071643e-07, "loss": 16.508, "step": 414220 }, { "epoch": 0.836770807661696, "grad_norm": 198.86090087890625, "learning_rate": 8.699019831819206e-07, "loss": 11.3153, "step": 414230 }, { "epoch": 0.8367910082943798, "grad_norm": 460.2008361816406, "learning_rate": 8.697052452862726e-07, "loss": 12.9046, "step": 414240 }, { "epoch": 0.8368112089270636, "grad_norm": 115.87354278564453, "learning_rate": 8.695085275211812e-07, "loss": 27.3795, "step": 414250 }, { "epoch": 0.8368314095597474, "grad_norm": 499.9412536621094, "learning_rate": 8.69311829887603e-07, "loss": 16.2713, "step": 414260 }, { "epoch": 0.8368516101924313, "grad_norm": 221.27325439453125, "learning_rate": 8.691151523864993e-07, "loss": 22.8541, "step": 414270 }, { "epoch": 0.8368718108251151, "grad_norm": 495.54376220703125, "learning_rate": 8.689184950188279e-07, "loss": 22.65, "step": 414280 }, { "epoch": 0.8368920114577989, "grad_norm": 119.42444610595703, "learning_rate": 8.687218577855444e-07, "loss": 20.8679, "step": 414290 }, { "epoch": 0.8369122120904827, "grad_norm": 538.4122314453125, "learning_rate": 8.685252406876116e-07, "loss": 21.7959, "step": 414300 }, { "epoch": 0.8369324127231665, "grad_norm": 268.2040710449219, "learning_rate": 8.683286437259852e-07, "loss": 18.0892, "step": 414310 }, { "epoch": 0.8369526133558503, "grad_norm": 390.52825927734375, "learning_rate": 8.68132066901623e-07, "loss": 23.0368, "step": 414320 }, { "epoch": 0.8369728139885341, "grad_norm": 639.8551025390625, "learning_rate": 8.679355102154841e-07, "loss": 16.519, "step": 414330 }, { "epoch": 0.8369930146212179, "grad_norm": 275.55413818359375, "learning_rate": 8.677389736685271e-07, "loss": 13.5942, "step": 414340 }, { "epoch": 0.8370132152539017, "grad_norm": 265.37274169921875, "learning_rate": 8.675424572617092e-07, "loss": 7.7294, "step": 414350 }, { "epoch": 0.8370334158865855, "grad_norm": 790.9881591796875, "learning_rate": 8.673459609959872e-07, "loss": 18.9518, "step": 414360 }, { "epoch": 0.8370536165192694, "grad_norm": 182.69476318359375, "learning_rate": 8.671494848723211e-07, "loss": 16.3131, "step": 414370 }, { "epoch": 0.8370738171519532, "grad_norm": 337.262939453125, "learning_rate": 8.669530288916667e-07, "loss": 17.6301, "step": 414380 }, { "epoch": 0.837094017784637, "grad_norm": 155.77891540527344, "learning_rate": 8.667565930549809e-07, "loss": 22.1242, "step": 414390 }, { "epoch": 0.8371142184173208, "grad_norm": 434.0264892578125, "learning_rate": 8.665601773632226e-07, "loss": 12.7268, "step": 414400 }, { "epoch": 0.8371344190500046, "grad_norm": 769.7467041015625, "learning_rate": 8.663637818173504e-07, "loss": 17.8749, "step": 414410 }, { "epoch": 0.8371546196826885, "grad_norm": 300.7909851074219, "learning_rate": 8.661674064183179e-07, "loss": 17.9705, "step": 414420 }, { "epoch": 0.8371748203153723, "grad_norm": 815.4336547851562, "learning_rate": 8.659710511670838e-07, "loss": 17.56, "step": 414430 }, { "epoch": 0.8371950209480561, "grad_norm": 607.55810546875, "learning_rate": 8.657747160646068e-07, "loss": 19.1566, "step": 414440 }, { "epoch": 0.8372152215807399, "grad_norm": 537.06298828125, "learning_rate": 8.655784011118424e-07, "loss": 22.8558, "step": 414450 }, { "epoch": 0.8372354222134237, "grad_norm": 710.0247802734375, "learning_rate": 8.653821063097462e-07, "loss": 34.4708, "step": 414460 }, { "epoch": 0.8372556228461076, "grad_norm": 312.3938903808594, "learning_rate": 8.65185831659277e-07, "loss": 11.5337, "step": 414470 }, { "epoch": 0.8372758234787914, "grad_norm": 235.47027587890625, "learning_rate": 8.649895771613909e-07, "loss": 23.7216, "step": 414480 }, { "epoch": 0.8372960241114752, "grad_norm": 494.0722351074219, "learning_rate": 8.64793342817043e-07, "loss": 24.8195, "step": 414490 }, { "epoch": 0.837316224744159, "grad_norm": 194.82943725585938, "learning_rate": 8.645971286271903e-07, "loss": 15.1807, "step": 414500 }, { "epoch": 0.8373364253768428, "grad_norm": 548.2518310546875, "learning_rate": 8.644009345927912e-07, "loss": 22.8696, "step": 414510 }, { "epoch": 0.8373566260095267, "grad_norm": 832.0997314453125, "learning_rate": 8.642047607148008e-07, "loss": 18.8447, "step": 414520 }, { "epoch": 0.8373768266422105, "grad_norm": 519.0682983398438, "learning_rate": 8.640086069941727e-07, "loss": 18.8811, "step": 414530 }, { "epoch": 0.8373970272748943, "grad_norm": 783.4784545898438, "learning_rate": 8.638124734318664e-07, "loss": 30.9056, "step": 414540 }, { "epoch": 0.8374172279075781, "grad_norm": 266.5477600097656, "learning_rate": 8.636163600288372e-07, "loss": 15.8093, "step": 414550 }, { "epoch": 0.8374374285402619, "grad_norm": 440.7388916015625, "learning_rate": 8.634202667860381e-07, "loss": 25.2162, "step": 414560 }, { "epoch": 0.8374576291729458, "grad_norm": 615.1576538085938, "learning_rate": 8.632241937044283e-07, "loss": 16.1164, "step": 414570 }, { "epoch": 0.8374778298056295, "grad_norm": 29.328777313232422, "learning_rate": 8.630281407849612e-07, "loss": 16.8257, "step": 414580 }, { "epoch": 0.8374980304383133, "grad_norm": 357.13653564453125, "learning_rate": 8.628321080285945e-07, "loss": 18.8659, "step": 414590 }, { "epoch": 0.8375182310709971, "grad_norm": 660.4081420898438, "learning_rate": 8.626360954362817e-07, "loss": 12.5095, "step": 414600 }, { "epoch": 0.8375384317036809, "grad_norm": 317.3007507324219, "learning_rate": 8.62440103008978e-07, "loss": 24.9659, "step": 414610 }, { "epoch": 0.8375586323363647, "grad_norm": 625.5695190429688, "learning_rate": 8.622441307476404e-07, "loss": 27.2896, "step": 414620 }, { "epoch": 0.8375788329690486, "grad_norm": 483.0949401855469, "learning_rate": 8.62048178653223e-07, "loss": 12.7133, "step": 414630 }, { "epoch": 0.8375990336017324, "grad_norm": 152.7284393310547, "learning_rate": 8.618522467266799e-07, "loss": 12.2879, "step": 414640 }, { "epoch": 0.8376192342344162, "grad_norm": 44.15868377685547, "learning_rate": 8.616563349689672e-07, "loss": 16.6405, "step": 414650 }, { "epoch": 0.8376394348671, "grad_norm": 2.977247953414917, "learning_rate": 8.614604433810408e-07, "loss": 12.0111, "step": 414660 }, { "epoch": 0.8376596354997838, "grad_norm": 424.8921813964844, "learning_rate": 8.612645719638541e-07, "loss": 10.2665, "step": 414670 }, { "epoch": 0.8376798361324677, "grad_norm": 511.0644226074219, "learning_rate": 8.610687207183604e-07, "loss": 28.0615, "step": 414680 }, { "epoch": 0.8377000367651515, "grad_norm": 341.1947937011719, "learning_rate": 8.608728896455177e-07, "loss": 9.8692, "step": 414690 }, { "epoch": 0.8377202373978353, "grad_norm": 583.61865234375, "learning_rate": 8.606770787462776e-07, "loss": 23.7715, "step": 414700 }, { "epoch": 0.8377404380305191, "grad_norm": 575.1815185546875, "learning_rate": 8.604812880215946e-07, "loss": 25.0434, "step": 414710 }, { "epoch": 0.8377606386632029, "grad_norm": 721.6419677734375, "learning_rate": 8.60285517472424e-07, "loss": 19.6313, "step": 414720 }, { "epoch": 0.8377808392958868, "grad_norm": 19.51460838317871, "learning_rate": 8.600897670997205e-07, "loss": 12.7547, "step": 414730 }, { "epoch": 0.8378010399285706, "grad_norm": 238.0825958251953, "learning_rate": 8.598940369044378e-07, "loss": 25.7607, "step": 414740 }, { "epoch": 0.8378212405612544, "grad_norm": 397.0168762207031, "learning_rate": 8.596983268875281e-07, "loss": 9.5079, "step": 414750 }, { "epoch": 0.8378414411939382, "grad_norm": 1884.20556640625, "learning_rate": 8.595026370499477e-07, "loss": 22.7407, "step": 414760 }, { "epoch": 0.837861641826622, "grad_norm": 219.27264404296875, "learning_rate": 8.59306967392649e-07, "loss": 13.0254, "step": 414770 }, { "epoch": 0.8378818424593059, "grad_norm": 59.17811584472656, "learning_rate": 8.59111317916585e-07, "loss": 18.7953, "step": 414780 }, { "epoch": 0.8379020430919897, "grad_norm": 513.6927490234375, "learning_rate": 8.589156886227112e-07, "loss": 15.032, "step": 414790 }, { "epoch": 0.8379222437246735, "grad_norm": 222.7626953125, "learning_rate": 8.587200795119793e-07, "loss": 6.7889, "step": 414800 }, { "epoch": 0.8379424443573573, "grad_norm": 223.1555938720703, "learning_rate": 8.585244905853446e-07, "loss": 45.8468, "step": 414810 }, { "epoch": 0.8379626449900411, "grad_norm": 179.75576782226562, "learning_rate": 8.583289218437574e-07, "loss": 40.9749, "step": 414820 }, { "epoch": 0.837982845622725, "grad_norm": 193.07220458984375, "learning_rate": 8.581333732881747e-07, "loss": 8.5778, "step": 414830 }, { "epoch": 0.8380030462554087, "grad_norm": 465.3057556152344, "learning_rate": 8.579378449195469e-07, "loss": 16.5517, "step": 414840 }, { "epoch": 0.8380232468880925, "grad_norm": 628.0454711914062, "learning_rate": 8.577423367388271e-07, "loss": 25.3636, "step": 414850 }, { "epoch": 0.8380434475207763, "grad_norm": 114.03556823730469, "learning_rate": 8.575468487469696e-07, "loss": 22.118, "step": 414860 }, { "epoch": 0.8380636481534601, "grad_norm": 24.78908920288086, "learning_rate": 8.573513809449252e-07, "loss": 12.9919, "step": 414870 }, { "epoch": 0.838083848786144, "grad_norm": 833.2183837890625, "learning_rate": 8.571559333336488e-07, "loss": 21.047, "step": 414880 }, { "epoch": 0.8381040494188278, "grad_norm": 236.3020782470703, "learning_rate": 8.569605059140923e-07, "loss": 33.2618, "step": 414890 }, { "epoch": 0.8381242500515116, "grad_norm": 305.19927978515625, "learning_rate": 8.567650986872061e-07, "loss": 18.8827, "step": 414900 }, { "epoch": 0.8381444506841954, "grad_norm": 1474.306640625, "learning_rate": 8.565697116539462e-07, "loss": 27.5161, "step": 414910 }, { "epoch": 0.8381646513168792, "grad_norm": 357.6324462890625, "learning_rate": 8.563743448152623e-07, "loss": 20.0884, "step": 414920 }, { "epoch": 0.838184851949563, "grad_norm": 288.3275451660156, "learning_rate": 8.561789981721064e-07, "loss": 10.7723, "step": 414930 }, { "epoch": 0.8382050525822469, "grad_norm": 393.3634948730469, "learning_rate": 8.559836717254316e-07, "loss": 29.6967, "step": 414940 }, { "epoch": 0.8382252532149307, "grad_norm": 443.06097412109375, "learning_rate": 8.557883654761906e-07, "loss": 35.6726, "step": 414950 }, { "epoch": 0.8382454538476145, "grad_norm": 396.83074951171875, "learning_rate": 8.555930794253347e-07, "loss": 9.0194, "step": 414960 }, { "epoch": 0.8382656544802983, "grad_norm": 5.810521125793457, "learning_rate": 8.553978135738139e-07, "loss": 11.9886, "step": 414970 }, { "epoch": 0.8382858551129821, "grad_norm": 207.85836791992188, "learning_rate": 8.552025679225834e-07, "loss": 11.3733, "step": 414980 }, { "epoch": 0.838306055745666, "grad_norm": 350.19366455078125, "learning_rate": 8.550073424725924e-07, "loss": 17.0902, "step": 414990 }, { "epoch": 0.8383262563783498, "grad_norm": 159.68637084960938, "learning_rate": 8.54812137224792e-07, "loss": 15.9937, "step": 415000 }, { "epoch": 0.8383464570110336, "grad_norm": 285.9203796386719, "learning_rate": 8.54616952180134e-07, "loss": 25.6369, "step": 415010 }, { "epoch": 0.8383666576437174, "grad_norm": 127.26937866210938, "learning_rate": 8.544217873395727e-07, "loss": 24.5282, "step": 415020 }, { "epoch": 0.8383868582764012, "grad_norm": 0.43699193000793457, "learning_rate": 8.542266427040546e-07, "loss": 9.2404, "step": 415030 }, { "epoch": 0.8384070589090851, "grad_norm": 324.5606384277344, "learning_rate": 8.540315182745329e-07, "loss": 17.5618, "step": 415040 }, { "epoch": 0.8384272595417689, "grad_norm": 216.83395385742188, "learning_rate": 8.5383641405196e-07, "loss": 25.6635, "step": 415050 }, { "epoch": 0.8384474601744527, "grad_norm": 401.2691650390625, "learning_rate": 8.536413300372859e-07, "loss": 10.4735, "step": 415060 }, { "epoch": 0.8384676608071365, "grad_norm": 264.07440185546875, "learning_rate": 8.534462662314597e-07, "loss": 17.2263, "step": 415070 }, { "epoch": 0.8384878614398203, "grad_norm": 380.81732177734375, "learning_rate": 8.532512226354345e-07, "loss": 14.928, "step": 415080 }, { "epoch": 0.8385080620725041, "grad_norm": 292.3482971191406, "learning_rate": 8.530561992501596e-07, "loss": 23.0051, "step": 415090 }, { "epoch": 0.8385282627051879, "grad_norm": 237.59320068359375, "learning_rate": 8.528611960765853e-07, "loss": 21.9377, "step": 415100 }, { "epoch": 0.8385484633378717, "grad_norm": 419.3064880371094, "learning_rate": 8.526662131156621e-07, "loss": 12.194, "step": 415110 }, { "epoch": 0.8385686639705555, "grad_norm": 264.32537841796875, "learning_rate": 8.524712503683419e-07, "loss": 21.6371, "step": 415120 }, { "epoch": 0.8385888646032393, "grad_norm": 455.37042236328125, "learning_rate": 8.522763078355739e-07, "loss": 11.2926, "step": 415130 }, { "epoch": 0.8386090652359232, "grad_norm": 499.8186340332031, "learning_rate": 8.520813855183069e-07, "loss": 9.7159, "step": 415140 }, { "epoch": 0.838629265868607, "grad_norm": 329.1452331542969, "learning_rate": 8.518864834174939e-07, "loss": 12.3609, "step": 415150 }, { "epoch": 0.8386494665012908, "grad_norm": 677.871826171875, "learning_rate": 8.516916015340826e-07, "loss": 24.037, "step": 415160 }, { "epoch": 0.8386696671339746, "grad_norm": 302.4140930175781, "learning_rate": 8.514967398690215e-07, "loss": 15.02, "step": 415170 }, { "epoch": 0.8386898677666584, "grad_norm": 165.50599670410156, "learning_rate": 8.513018984232641e-07, "loss": 18.3824, "step": 415180 }, { "epoch": 0.8387100683993423, "grad_norm": 470.12298583984375, "learning_rate": 8.511070771977569e-07, "loss": 18.0278, "step": 415190 }, { "epoch": 0.8387302690320261, "grad_norm": 761.8771362304688, "learning_rate": 8.509122761934519e-07, "loss": 16.4101, "step": 415200 }, { "epoch": 0.8387504696647099, "grad_norm": 521.2567138671875, "learning_rate": 8.507174954112968e-07, "loss": 16.6844, "step": 415210 }, { "epoch": 0.8387706702973937, "grad_norm": 358.11822509765625, "learning_rate": 8.505227348522404e-07, "loss": 33.388, "step": 415220 }, { "epoch": 0.8387908709300775, "grad_norm": 506.910400390625, "learning_rate": 8.503279945172338e-07, "loss": 20.2392, "step": 415230 }, { "epoch": 0.8388110715627614, "grad_norm": 515.4656372070312, "learning_rate": 8.501332744072255e-07, "loss": 14.4212, "step": 415240 }, { "epoch": 0.8388312721954452, "grad_norm": 222.73764038085938, "learning_rate": 8.499385745231631e-07, "loss": 13.0752, "step": 415250 }, { "epoch": 0.838851472828129, "grad_norm": 453.6046142578125, "learning_rate": 8.497438948659969e-07, "loss": 22.6873, "step": 415260 }, { "epoch": 0.8388716734608128, "grad_norm": 172.0892791748047, "learning_rate": 8.495492354366764e-07, "loss": 7.291, "step": 415270 }, { "epoch": 0.8388918740934966, "grad_norm": 372.55084228515625, "learning_rate": 8.493545962361499e-07, "loss": 22.3061, "step": 415280 }, { "epoch": 0.8389120747261805, "grad_norm": 1095.3577880859375, "learning_rate": 8.491599772653647e-07, "loss": 28.2512, "step": 415290 }, { "epoch": 0.8389322753588643, "grad_norm": 397.1434631347656, "learning_rate": 8.489653785252711e-07, "loss": 22.9261, "step": 415300 }, { "epoch": 0.8389524759915481, "grad_norm": 285.25030517578125, "learning_rate": 8.487708000168166e-07, "loss": 20.4244, "step": 415310 }, { "epoch": 0.8389726766242319, "grad_norm": 429.2090148925781, "learning_rate": 8.485762417409488e-07, "loss": 24.278, "step": 415320 }, { "epoch": 0.8389928772569157, "grad_norm": 68.62606048583984, "learning_rate": 8.483817036986169e-07, "loss": 12.0669, "step": 415330 }, { "epoch": 0.8390130778895996, "grad_norm": 190.48098754882812, "learning_rate": 8.481871858907703e-07, "loss": 16.2902, "step": 415340 }, { "epoch": 0.8390332785222833, "grad_norm": 252.4988555908203, "learning_rate": 8.479926883183559e-07, "loss": 18.1994, "step": 415350 }, { "epoch": 0.8390534791549671, "grad_norm": 453.9949951171875, "learning_rate": 8.477982109823202e-07, "loss": 20.5837, "step": 415360 }, { "epoch": 0.8390736797876509, "grad_norm": 4.822742462158203, "learning_rate": 8.476037538836134e-07, "loss": 13.4309, "step": 415370 }, { "epoch": 0.8390938804203347, "grad_norm": 355.41363525390625, "learning_rate": 8.474093170231828e-07, "loss": 13.9954, "step": 415380 }, { "epoch": 0.8391140810530185, "grad_norm": 598.54150390625, "learning_rate": 8.472149004019742e-07, "loss": 13.5178, "step": 415390 }, { "epoch": 0.8391342816857024, "grad_norm": 425.3116760253906, "learning_rate": 8.470205040209362e-07, "loss": 18.0247, "step": 415400 }, { "epoch": 0.8391544823183862, "grad_norm": 289.0229187011719, "learning_rate": 8.46826127881018e-07, "loss": 14.5099, "step": 415410 }, { "epoch": 0.83917468295107, "grad_norm": 426.1451110839844, "learning_rate": 8.466317719831657e-07, "loss": 20.0752, "step": 415420 }, { "epoch": 0.8391948835837538, "grad_norm": 130.5015411376953, "learning_rate": 8.464374363283245e-07, "loss": 19.8057, "step": 415430 }, { "epoch": 0.8392150842164376, "grad_norm": 216.7658233642578, "learning_rate": 8.462431209174454e-07, "loss": 13.6538, "step": 415440 }, { "epoch": 0.8392352848491215, "grad_norm": 760.1143188476562, "learning_rate": 8.460488257514731e-07, "loss": 34.9151, "step": 415450 }, { "epoch": 0.8392554854818053, "grad_norm": 397.96795654296875, "learning_rate": 8.458545508313543e-07, "loss": 9.9936, "step": 415460 }, { "epoch": 0.8392756861144891, "grad_norm": 272.09503173828125, "learning_rate": 8.456602961580374e-07, "loss": 16.4152, "step": 415470 }, { "epoch": 0.8392958867471729, "grad_norm": 701.9149169921875, "learning_rate": 8.454660617324672e-07, "loss": 25.2512, "step": 415480 }, { "epoch": 0.8393160873798567, "grad_norm": 280.76959228515625, "learning_rate": 8.452718475555927e-07, "loss": 13.5794, "step": 415490 }, { "epoch": 0.8393362880125406, "grad_norm": 237.9507293701172, "learning_rate": 8.450776536283594e-07, "loss": 13.2203, "step": 415500 }, { "epoch": 0.8393564886452244, "grad_norm": 284.7758483886719, "learning_rate": 8.448834799517125e-07, "loss": 19.6669, "step": 415510 }, { "epoch": 0.8393766892779082, "grad_norm": 102.53799438476562, "learning_rate": 8.446893265266005e-07, "loss": 13.1576, "step": 415520 }, { "epoch": 0.839396889910592, "grad_norm": 178.54124450683594, "learning_rate": 8.444951933539691e-07, "loss": 16.2389, "step": 415530 }, { "epoch": 0.8394170905432758, "grad_norm": 411.52203369140625, "learning_rate": 8.443010804347629e-07, "loss": 16.3723, "step": 415540 }, { "epoch": 0.8394372911759597, "grad_norm": 486.77764892578125, "learning_rate": 8.441069877699287e-07, "loss": 27.4579, "step": 415550 }, { "epoch": 0.8394574918086435, "grad_norm": 159.8814697265625, "learning_rate": 8.439129153604148e-07, "loss": 18.4531, "step": 415560 }, { "epoch": 0.8394776924413273, "grad_norm": 840.0407104492188, "learning_rate": 8.437188632071652e-07, "loss": 24.8126, "step": 415570 }, { "epoch": 0.8394978930740111, "grad_norm": 307.8695373535156, "learning_rate": 8.435248313111244e-07, "loss": 16.5966, "step": 415580 }, { "epoch": 0.839518093706695, "grad_norm": 167.97000122070312, "learning_rate": 8.433308196732403e-07, "loss": 10.4787, "step": 415590 }, { "epoch": 0.8395382943393787, "grad_norm": 611.5538330078125, "learning_rate": 8.431368282944585e-07, "loss": 25.2954, "step": 415600 }, { "epoch": 0.8395584949720625, "grad_norm": 533.6309814453125, "learning_rate": 8.42942857175722e-07, "loss": 25.0081, "step": 415610 }, { "epoch": 0.8395786956047463, "grad_norm": 599.7293701171875, "learning_rate": 8.427489063179778e-07, "loss": 19.8581, "step": 415620 }, { "epoch": 0.8395988962374301, "grad_norm": 384.72125244140625, "learning_rate": 8.425549757221734e-07, "loss": 21.0972, "step": 415630 }, { "epoch": 0.8396190968701139, "grad_norm": 245.0561065673828, "learning_rate": 8.423610653892494e-07, "loss": 21.7128, "step": 415640 }, { "epoch": 0.8396392975027978, "grad_norm": 131.35284423828125, "learning_rate": 8.421671753201538e-07, "loss": 17.4895, "step": 415650 }, { "epoch": 0.8396594981354816, "grad_norm": 97.52739715576172, "learning_rate": 8.419733055158319e-07, "loss": 11.632, "step": 415660 }, { "epoch": 0.8396796987681654, "grad_norm": 512.2359008789062, "learning_rate": 8.41779455977228e-07, "loss": 16.4915, "step": 415670 }, { "epoch": 0.8396998994008492, "grad_norm": 383.025146484375, "learning_rate": 8.415856267052852e-07, "loss": 11.6689, "step": 415680 }, { "epoch": 0.839720100033533, "grad_norm": 433.9366760253906, "learning_rate": 8.413918177009512e-07, "loss": 14.7481, "step": 415690 }, { "epoch": 0.8397403006662169, "grad_norm": 268.4071350097656, "learning_rate": 8.411980289651689e-07, "loss": 17.461, "step": 415700 }, { "epoch": 0.8397605012989007, "grad_norm": 488.49359130859375, "learning_rate": 8.410042604988822e-07, "loss": 19.7896, "step": 415710 }, { "epoch": 0.8397807019315845, "grad_norm": 401.6903381347656, "learning_rate": 8.408105123030358e-07, "loss": 15.051, "step": 415720 }, { "epoch": 0.8398009025642683, "grad_norm": 1138.7222900390625, "learning_rate": 8.406167843785762e-07, "loss": 20.6059, "step": 415730 }, { "epoch": 0.8398211031969521, "grad_norm": 875.8900756835938, "learning_rate": 8.404230767264454e-07, "loss": 16.6636, "step": 415740 }, { "epoch": 0.839841303829636, "grad_norm": 299.91119384765625, "learning_rate": 8.402293893475872e-07, "loss": 30.0651, "step": 415750 }, { "epoch": 0.8398615044623198, "grad_norm": 530.86669921875, "learning_rate": 8.400357222429473e-07, "loss": 10.671, "step": 415760 }, { "epoch": 0.8398817050950036, "grad_norm": 395.6985168457031, "learning_rate": 8.39842075413469e-07, "loss": 21.6314, "step": 415770 }, { "epoch": 0.8399019057276874, "grad_norm": 835.7567138671875, "learning_rate": 8.396484488600948e-07, "loss": 28.1015, "step": 415780 }, { "epoch": 0.8399221063603712, "grad_norm": 20.669448852539062, "learning_rate": 8.394548425837706e-07, "loss": 11.6772, "step": 415790 }, { "epoch": 0.839942306993055, "grad_norm": 303.6646728515625, "learning_rate": 8.392612565854374e-07, "loss": 15.3869, "step": 415800 }, { "epoch": 0.8399625076257389, "grad_norm": 386.441650390625, "learning_rate": 8.390676908660417e-07, "loss": 20.5151, "step": 415810 }, { "epoch": 0.8399827082584227, "grad_norm": 208.04490661621094, "learning_rate": 8.388741454265254e-07, "loss": 32.2923, "step": 415820 }, { "epoch": 0.8400029088911065, "grad_norm": 1051.257080078125, "learning_rate": 8.386806202678305e-07, "loss": 34.7388, "step": 415830 }, { "epoch": 0.8400231095237903, "grad_norm": 452.179931640625, "learning_rate": 8.384871153909025e-07, "loss": 28.9045, "step": 415840 }, { "epoch": 0.8400433101564742, "grad_norm": 737.98974609375, "learning_rate": 8.382936307966838e-07, "loss": 16.3271, "step": 415850 }, { "epoch": 0.8400635107891579, "grad_norm": 648.748291015625, "learning_rate": 8.381001664861161e-07, "loss": 18.4748, "step": 415860 }, { "epoch": 0.8400837114218417, "grad_norm": 391.1263427734375, "learning_rate": 8.379067224601433e-07, "loss": 23.7422, "step": 415870 }, { "epoch": 0.8401039120545255, "grad_norm": 632.5194702148438, "learning_rate": 8.3771329871971e-07, "loss": 25.7779, "step": 415880 }, { "epoch": 0.8401241126872093, "grad_norm": 0.5611941814422607, "learning_rate": 8.375198952657565e-07, "loss": 15.2841, "step": 415890 }, { "epoch": 0.8401443133198931, "grad_norm": 168.1457977294922, "learning_rate": 8.373265120992252e-07, "loss": 15.9845, "step": 415900 }, { "epoch": 0.840164513952577, "grad_norm": 237.24925231933594, "learning_rate": 8.371331492210611e-07, "loss": 25.797, "step": 415910 }, { "epoch": 0.8401847145852608, "grad_norm": 454.6050720214844, "learning_rate": 8.369398066322049e-07, "loss": 20.6318, "step": 415920 }, { "epoch": 0.8402049152179446, "grad_norm": 564.5012817382812, "learning_rate": 8.367464843335981e-07, "loss": 17.6519, "step": 415930 }, { "epoch": 0.8402251158506284, "grad_norm": 426.44891357421875, "learning_rate": 8.365531823261841e-07, "loss": 18.174, "step": 415940 }, { "epoch": 0.8402453164833122, "grad_norm": 869.7213134765625, "learning_rate": 8.363599006109057e-07, "loss": 27.4623, "step": 415950 }, { "epoch": 0.8402655171159961, "grad_norm": 242.62086486816406, "learning_rate": 8.361666391887047e-07, "loss": 17.5728, "step": 415960 }, { "epoch": 0.8402857177486799, "grad_norm": 656.5515747070312, "learning_rate": 8.359733980605211e-07, "loss": 23.2706, "step": 415970 }, { "epoch": 0.8403059183813637, "grad_norm": 274.0316162109375, "learning_rate": 8.357801772272988e-07, "loss": 20.1416, "step": 415980 }, { "epoch": 0.8403261190140475, "grad_norm": 312.2007751464844, "learning_rate": 8.355869766899793e-07, "loss": 24.8726, "step": 415990 }, { "epoch": 0.8403463196467313, "grad_norm": 668.3038940429688, "learning_rate": 8.353937964495029e-07, "loss": 17.8256, "step": 416000 }, { "epoch": 0.8403665202794152, "grad_norm": 373.26690673828125, "learning_rate": 8.352006365068116e-07, "loss": 19.7764, "step": 416010 }, { "epoch": 0.840386720912099, "grad_norm": 471.3692932128906, "learning_rate": 8.350074968628486e-07, "loss": 12.6507, "step": 416020 }, { "epoch": 0.8404069215447828, "grad_norm": 1139.7874755859375, "learning_rate": 8.348143775185536e-07, "loss": 34.0043, "step": 416030 }, { "epoch": 0.8404271221774666, "grad_norm": 210.00518798828125, "learning_rate": 8.346212784748676e-07, "loss": 11.1326, "step": 416040 }, { "epoch": 0.8404473228101504, "grad_norm": 234.03453063964844, "learning_rate": 8.344281997327331e-07, "loss": 16.5989, "step": 416050 }, { "epoch": 0.8404675234428343, "grad_norm": 252.06436157226562, "learning_rate": 8.342351412930899e-07, "loss": 16.515, "step": 416060 }, { "epoch": 0.8404877240755181, "grad_norm": 14.403572082519531, "learning_rate": 8.340421031568791e-07, "loss": 20.2605, "step": 416070 }, { "epoch": 0.8405079247082019, "grad_norm": 516.9014282226562, "learning_rate": 8.338490853250425e-07, "loss": 17.6014, "step": 416080 }, { "epoch": 0.8405281253408857, "grad_norm": 166.3125, "learning_rate": 8.336560877985189e-07, "loss": 19.7227, "step": 416090 }, { "epoch": 0.8405483259735695, "grad_norm": 65.38711547851562, "learning_rate": 8.334631105782515e-07, "loss": 19.9046, "step": 416100 }, { "epoch": 0.8405685266062533, "grad_norm": 191.06500244140625, "learning_rate": 8.332701536651794e-07, "loss": 12.5053, "step": 416110 }, { "epoch": 0.8405887272389371, "grad_norm": 225.70530700683594, "learning_rate": 8.330772170602424e-07, "loss": 21.7426, "step": 416120 }, { "epoch": 0.8406089278716209, "grad_norm": 202.8973388671875, "learning_rate": 8.328843007643828e-07, "loss": 8.0427, "step": 416130 }, { "epoch": 0.8406291285043047, "grad_norm": 1216.110595703125, "learning_rate": 8.326914047785395e-07, "loss": 18.0305, "step": 416140 }, { "epoch": 0.8406493291369885, "grad_norm": 569.0231323242188, "learning_rate": 8.324985291036513e-07, "loss": 24.0127, "step": 416150 }, { "epoch": 0.8406695297696724, "grad_norm": 474.5058288574219, "learning_rate": 8.323056737406604e-07, "loss": 27.7385, "step": 416160 }, { "epoch": 0.8406897304023562, "grad_norm": 415.0338439941406, "learning_rate": 8.32112838690507e-07, "loss": 38.8405, "step": 416170 }, { "epoch": 0.84070993103504, "grad_norm": 262.8816833496094, "learning_rate": 8.319200239541303e-07, "loss": 14.3544, "step": 416180 }, { "epoch": 0.8407301316677238, "grad_norm": 423.84130859375, "learning_rate": 8.317272295324691e-07, "loss": 27.0065, "step": 416190 }, { "epoch": 0.8407503323004076, "grad_norm": 88.60386657714844, "learning_rate": 8.315344554264643e-07, "loss": 9.2607, "step": 416200 }, { "epoch": 0.8407705329330915, "grad_norm": 319.1097412109375, "learning_rate": 8.313417016370557e-07, "loss": 12.6866, "step": 416210 }, { "epoch": 0.8407907335657753, "grad_norm": 797.659912109375, "learning_rate": 8.311489681651803e-07, "loss": 20.9408, "step": 416220 }, { "epoch": 0.8408109341984591, "grad_norm": 328.889892578125, "learning_rate": 8.309562550117789e-07, "loss": 16.7754, "step": 416230 }, { "epoch": 0.8408311348311429, "grad_norm": 94.6286849975586, "learning_rate": 8.307635621777943e-07, "loss": 17.0616, "step": 416240 }, { "epoch": 0.8408513354638267, "grad_norm": 705.1524658203125, "learning_rate": 8.305708896641596e-07, "loss": 27.5486, "step": 416250 }, { "epoch": 0.8408715360965106, "grad_norm": 618.1636962890625, "learning_rate": 8.303782374718167e-07, "loss": 18.6452, "step": 416260 }, { "epoch": 0.8408917367291944, "grad_norm": 419.329345703125, "learning_rate": 8.30185605601706e-07, "loss": 18.2721, "step": 416270 }, { "epoch": 0.8409119373618782, "grad_norm": 543.0031127929688, "learning_rate": 8.299929940547646e-07, "loss": 22.3321, "step": 416280 }, { "epoch": 0.840932137994562, "grad_norm": 214.42633056640625, "learning_rate": 8.298004028319306e-07, "loss": 18.5606, "step": 416290 }, { "epoch": 0.8409523386272458, "grad_norm": 243.7238311767578, "learning_rate": 8.296078319341444e-07, "loss": 23.4548, "step": 416300 }, { "epoch": 0.8409725392599297, "grad_norm": 349.8535461425781, "learning_rate": 8.294152813623446e-07, "loss": 21.2033, "step": 416310 }, { "epoch": 0.8409927398926135, "grad_norm": 1284.961181640625, "learning_rate": 8.292227511174671e-07, "loss": 27.151, "step": 416320 }, { "epoch": 0.8410129405252973, "grad_norm": 60.14564514160156, "learning_rate": 8.29030241200452e-07, "loss": 20.0398, "step": 416330 }, { "epoch": 0.8410331411579811, "grad_norm": 280.21649169921875, "learning_rate": 8.288377516122393e-07, "loss": 24.6728, "step": 416340 }, { "epoch": 0.8410533417906649, "grad_norm": 468.47125244140625, "learning_rate": 8.286452823537649e-07, "loss": 13.5939, "step": 416350 }, { "epoch": 0.8410735424233488, "grad_norm": 17.582555770874023, "learning_rate": 8.284528334259667e-07, "loss": 12.8961, "step": 416360 }, { "epoch": 0.8410937430560325, "grad_norm": 144.01602172851562, "learning_rate": 8.282604048297848e-07, "loss": 12.5265, "step": 416370 }, { "epoch": 0.8411139436887163, "grad_norm": 522.6692504882812, "learning_rate": 8.280679965661554e-07, "loss": 15.1801, "step": 416380 }, { "epoch": 0.8411341443214001, "grad_norm": 663.3296508789062, "learning_rate": 8.278756086360157e-07, "loss": 14.2881, "step": 416390 }, { "epoch": 0.8411543449540839, "grad_norm": 272.4671936035156, "learning_rate": 8.276832410403051e-07, "loss": 24.4269, "step": 416400 }, { "epoch": 0.8411745455867677, "grad_norm": 730.2803955078125, "learning_rate": 8.274908937799592e-07, "loss": 18.7027, "step": 416410 }, { "epoch": 0.8411947462194516, "grad_norm": 814.9945678710938, "learning_rate": 8.27298566855918e-07, "loss": 20.1449, "step": 416420 }, { "epoch": 0.8412149468521354, "grad_norm": 343.75592041015625, "learning_rate": 8.271062602691171e-07, "loss": 41.2443, "step": 416430 }, { "epoch": 0.8412351474848192, "grad_norm": 410.6811828613281, "learning_rate": 8.269139740204935e-07, "loss": 19.676, "step": 416440 }, { "epoch": 0.841255348117503, "grad_norm": 266.7729797363281, "learning_rate": 8.267217081109863e-07, "loss": 24.5343, "step": 416450 }, { "epoch": 0.8412755487501868, "grad_norm": 360.75347900390625, "learning_rate": 8.265294625415299e-07, "loss": 11.8981, "step": 416460 }, { "epoch": 0.8412957493828707, "grad_norm": 268.4195861816406, "learning_rate": 8.263372373130635e-07, "loss": 22.2304, "step": 416470 }, { "epoch": 0.8413159500155545, "grad_norm": 24.121749877929688, "learning_rate": 8.261450324265225e-07, "loss": 14.1595, "step": 416480 }, { "epoch": 0.8413361506482383, "grad_norm": 124.1473617553711, "learning_rate": 8.259528478828455e-07, "loss": 25.6739, "step": 416490 }, { "epoch": 0.8413563512809221, "grad_norm": 485.75091552734375, "learning_rate": 8.25760683682968e-07, "loss": 28.142, "step": 416500 }, { "epoch": 0.8413765519136059, "grad_norm": 702.4967041015625, "learning_rate": 8.255685398278257e-07, "loss": 26.1443, "step": 416510 }, { "epoch": 0.8413967525462898, "grad_norm": 742.955810546875, "learning_rate": 8.25376416318357e-07, "loss": 16.2622, "step": 416520 }, { "epoch": 0.8414169531789736, "grad_norm": 2220.86767578125, "learning_rate": 8.25184313155497e-07, "loss": 24.0307, "step": 416530 }, { "epoch": 0.8414371538116574, "grad_norm": 689.5069580078125, "learning_rate": 8.249922303401814e-07, "loss": 12.3775, "step": 416540 }, { "epoch": 0.8414573544443412, "grad_norm": 553.2268676757812, "learning_rate": 8.248001678733475e-07, "loss": 23.2607, "step": 416550 }, { "epoch": 0.841477555077025, "grad_norm": 432.29266357421875, "learning_rate": 8.246081257559324e-07, "loss": 30.3421, "step": 416560 }, { "epoch": 0.8414977557097089, "grad_norm": 283.49530029296875, "learning_rate": 8.244161039888709e-07, "loss": 10.0194, "step": 416570 }, { "epoch": 0.8415179563423927, "grad_norm": 222.84986877441406, "learning_rate": 8.242241025730974e-07, "loss": 19.9328, "step": 416580 }, { "epoch": 0.8415381569750765, "grad_norm": 332.9171142578125, "learning_rate": 8.240321215095504e-07, "loss": 25.312, "step": 416590 }, { "epoch": 0.8415583576077603, "grad_norm": 182.77035522460938, "learning_rate": 8.238401607991647e-07, "loss": 21.3855, "step": 416600 }, { "epoch": 0.8415785582404441, "grad_norm": 751.35888671875, "learning_rate": 8.236482204428737e-07, "loss": 14.0024, "step": 416610 }, { "epoch": 0.841598758873128, "grad_norm": 531.2789916992188, "learning_rate": 8.234563004416151e-07, "loss": 8.2383, "step": 416620 }, { "epoch": 0.8416189595058117, "grad_norm": 621.9959716796875, "learning_rate": 8.232644007963253e-07, "loss": 39.9086, "step": 416630 }, { "epoch": 0.8416391601384955, "grad_norm": 384.6670227050781, "learning_rate": 8.230725215079383e-07, "loss": 19.9908, "step": 416640 }, { "epoch": 0.8416593607711793, "grad_norm": 391.2987060546875, "learning_rate": 8.228806625773878e-07, "loss": 11.5974, "step": 416650 }, { "epoch": 0.8416795614038631, "grad_norm": 168.9244842529297, "learning_rate": 8.226888240056114e-07, "loss": 12.1507, "step": 416660 }, { "epoch": 0.841699762036547, "grad_norm": 642.2132568359375, "learning_rate": 8.224970057935433e-07, "loss": 15.2827, "step": 416670 }, { "epoch": 0.8417199626692308, "grad_norm": 602.5213623046875, "learning_rate": 8.223052079421167e-07, "loss": 19.2457, "step": 416680 }, { "epoch": 0.8417401633019146, "grad_norm": 894.4662475585938, "learning_rate": 8.221134304522694e-07, "loss": 20.9106, "step": 416690 }, { "epoch": 0.8417603639345984, "grad_norm": 332.2417297363281, "learning_rate": 8.21921673324933e-07, "loss": 12.6066, "step": 416700 }, { "epoch": 0.8417805645672822, "grad_norm": 448.7167053222656, "learning_rate": 8.217299365610448e-07, "loss": 16.0514, "step": 416710 }, { "epoch": 0.841800765199966, "grad_norm": 254.240234375, "learning_rate": 8.215382201615379e-07, "loss": 16.29, "step": 416720 }, { "epoch": 0.8418209658326499, "grad_norm": 257.3818664550781, "learning_rate": 8.213465241273461e-07, "loss": 13.0963, "step": 416730 }, { "epoch": 0.8418411664653337, "grad_norm": 152.66146850585938, "learning_rate": 8.211548484594057e-07, "loss": 20.5531, "step": 416740 }, { "epoch": 0.8418613670980175, "grad_norm": 290.1943359375, "learning_rate": 8.209631931586499e-07, "loss": 14.6181, "step": 416750 }, { "epoch": 0.8418815677307013, "grad_norm": 276.91009521484375, "learning_rate": 8.207715582260112e-07, "loss": 28.6372, "step": 416760 }, { "epoch": 0.8419017683633852, "grad_norm": 232.94760131835938, "learning_rate": 8.205799436624251e-07, "loss": 10.0377, "step": 416770 }, { "epoch": 0.841921968996069, "grad_norm": 595.9349365234375, "learning_rate": 8.203883494688264e-07, "loss": 28.9553, "step": 416780 }, { "epoch": 0.8419421696287528, "grad_norm": 459.4786682128906, "learning_rate": 8.201967756461482e-07, "loss": 14.0377, "step": 416790 }, { "epoch": 0.8419623702614366, "grad_norm": 546.0062255859375, "learning_rate": 8.200052221953231e-07, "loss": 20.928, "step": 416800 }, { "epoch": 0.8419825708941204, "grad_norm": 146.12875366210938, "learning_rate": 8.198136891172864e-07, "loss": 19.1887, "step": 416810 }, { "epoch": 0.8420027715268043, "grad_norm": 302.6186218261719, "learning_rate": 8.196221764129708e-07, "loss": 19.0241, "step": 416820 }, { "epoch": 0.8420229721594881, "grad_norm": 549.2744140625, "learning_rate": 8.194306840833083e-07, "loss": 30.9922, "step": 416830 }, { "epoch": 0.8420431727921719, "grad_norm": 546.9652099609375, "learning_rate": 8.192392121292336e-07, "loss": 28.0652, "step": 416840 }, { "epoch": 0.8420633734248557, "grad_norm": 156.42550659179688, "learning_rate": 8.190477605516828e-07, "loss": 19.8071, "step": 416850 }, { "epoch": 0.8420835740575395, "grad_norm": 337.0184326171875, "learning_rate": 8.188563293515834e-07, "loss": 35.2146, "step": 416860 }, { "epoch": 0.8421037746902234, "grad_norm": 279.08404541015625, "learning_rate": 8.186649185298712e-07, "loss": 13.8124, "step": 416870 }, { "epoch": 0.8421239753229071, "grad_norm": 671.6292114257812, "learning_rate": 8.184735280874801e-07, "loss": 18.5853, "step": 416880 }, { "epoch": 0.8421441759555909, "grad_norm": 474.8827819824219, "learning_rate": 8.182821580253425e-07, "loss": 28.7646, "step": 416890 }, { "epoch": 0.8421643765882747, "grad_norm": 183.2584991455078, "learning_rate": 8.180908083443884e-07, "loss": 15.0636, "step": 416900 }, { "epoch": 0.8421845772209585, "grad_norm": 540.3548583984375, "learning_rate": 8.178994790455541e-07, "loss": 28.3647, "step": 416910 }, { "epoch": 0.8422047778536423, "grad_norm": 61.81666946411133, "learning_rate": 8.177081701297706e-07, "loss": 15.9166, "step": 416920 }, { "epoch": 0.8422249784863262, "grad_norm": 419.9620056152344, "learning_rate": 8.175168815979689e-07, "loss": 15.7197, "step": 416930 }, { "epoch": 0.84224517911901, "grad_norm": 360.3706359863281, "learning_rate": 8.173256134510827e-07, "loss": 14.0688, "step": 416940 }, { "epoch": 0.8422653797516938, "grad_norm": 632.1427612304688, "learning_rate": 8.171343656900455e-07, "loss": 19.6811, "step": 416950 }, { "epoch": 0.8422855803843776, "grad_norm": 314.94830322265625, "learning_rate": 8.169431383157877e-07, "loss": 14.6761, "step": 416960 }, { "epoch": 0.8423057810170614, "grad_norm": 198.35939025878906, "learning_rate": 8.16751931329241e-07, "loss": 18.2067, "step": 416970 }, { "epoch": 0.8423259816497453, "grad_norm": 257.9476318359375, "learning_rate": 8.16560744731339e-07, "loss": 10.4845, "step": 416980 }, { "epoch": 0.8423461822824291, "grad_norm": 317.8804931640625, "learning_rate": 8.163695785230125e-07, "loss": 24.2213, "step": 416990 }, { "epoch": 0.8423663829151129, "grad_norm": 255.24928283691406, "learning_rate": 8.161784327051919e-07, "loss": 13.5445, "step": 417000 }, { "epoch": 0.8423865835477967, "grad_norm": 29.380022048950195, "learning_rate": 8.159873072788116e-07, "loss": 14.2142, "step": 417010 }, { "epoch": 0.8424067841804805, "grad_norm": 535.5570678710938, "learning_rate": 8.157962022448001e-07, "loss": 13.1268, "step": 417020 }, { "epoch": 0.8424269848131644, "grad_norm": 971.4058837890625, "learning_rate": 8.156051176040919e-07, "loss": 20.5933, "step": 417030 }, { "epoch": 0.8424471854458482, "grad_norm": 389.6575012207031, "learning_rate": 8.154140533576171e-07, "loss": 17.0994, "step": 417040 }, { "epoch": 0.842467386078532, "grad_norm": 251.94143676757812, "learning_rate": 8.152230095063051e-07, "loss": 24.1352, "step": 417050 }, { "epoch": 0.8424875867112158, "grad_norm": 322.27734375, "learning_rate": 8.150319860510903e-07, "loss": 12.5676, "step": 417060 }, { "epoch": 0.8425077873438996, "grad_norm": 396.6203918457031, "learning_rate": 8.148409829929005e-07, "loss": 11.8115, "step": 417070 }, { "epoch": 0.8425279879765835, "grad_norm": 240.94351196289062, "learning_rate": 8.14650000332669e-07, "loss": 9.2375, "step": 417080 }, { "epoch": 0.8425481886092673, "grad_norm": 819.8513793945312, "learning_rate": 8.144590380713252e-07, "loss": 12.0466, "step": 417090 }, { "epoch": 0.8425683892419511, "grad_norm": 297.76092529296875, "learning_rate": 8.142680962098016e-07, "loss": 14.8963, "step": 417100 }, { "epoch": 0.8425885898746349, "grad_norm": 93.6225814819336, "learning_rate": 8.140771747490273e-07, "loss": 11.1213, "step": 417110 }, { "epoch": 0.8426087905073187, "grad_norm": 474.6208801269531, "learning_rate": 8.138862736899317e-07, "loss": 16.4858, "step": 417120 }, { "epoch": 0.8426289911400026, "grad_norm": 514.2171630859375, "learning_rate": 8.136953930334484e-07, "loss": 11.1815, "step": 417130 }, { "epoch": 0.8426491917726863, "grad_norm": 234.20852661132812, "learning_rate": 8.135045327805058e-07, "loss": 18.8199, "step": 417140 }, { "epoch": 0.8426693924053701, "grad_norm": 78.7138671875, "learning_rate": 8.133136929320329e-07, "loss": 22.1076, "step": 417150 }, { "epoch": 0.8426895930380539, "grad_norm": 86.7906494140625, "learning_rate": 8.131228734889618e-07, "loss": 9.5779, "step": 417160 }, { "epoch": 0.8427097936707377, "grad_norm": 358.8228759765625, "learning_rate": 8.12932074452224e-07, "loss": 15.5598, "step": 417170 }, { "epoch": 0.8427299943034215, "grad_norm": 306.94622802734375, "learning_rate": 8.127412958227454e-07, "loss": 16.4282, "step": 417180 }, { "epoch": 0.8427501949361054, "grad_norm": 5.045441627502441, "learning_rate": 8.125505376014576e-07, "loss": 18.441, "step": 417190 }, { "epoch": 0.8427703955687892, "grad_norm": 221.3944091796875, "learning_rate": 8.123597997892918e-07, "loss": 20.5832, "step": 417200 }, { "epoch": 0.842790596201473, "grad_norm": 186.3422393798828, "learning_rate": 8.121690823871764e-07, "loss": 20.6377, "step": 417210 }, { "epoch": 0.8428107968341568, "grad_norm": 532.1461791992188, "learning_rate": 8.119783853960401e-07, "loss": 23.3955, "step": 417220 }, { "epoch": 0.8428309974668406, "grad_norm": 127.2115478515625, "learning_rate": 8.11787708816813e-07, "loss": 18.1984, "step": 417230 }, { "epoch": 0.8428511980995245, "grad_norm": 477.167236328125, "learning_rate": 8.115970526504258e-07, "loss": 18.1049, "step": 417240 }, { "epoch": 0.8428713987322083, "grad_norm": 163.93153381347656, "learning_rate": 8.114064168978064e-07, "loss": 11.5624, "step": 417250 }, { "epoch": 0.8428915993648921, "grad_norm": 777.0939331054688, "learning_rate": 8.112158015598832e-07, "loss": 20.3062, "step": 417260 }, { "epoch": 0.8429117999975759, "grad_norm": 54.6539306640625, "learning_rate": 8.110252066375873e-07, "loss": 11.629, "step": 417270 }, { "epoch": 0.8429320006302597, "grad_norm": 82.93055725097656, "learning_rate": 8.108346321318467e-07, "loss": 10.9843, "step": 417280 }, { "epoch": 0.8429522012629436, "grad_norm": 660.4531860351562, "learning_rate": 8.106440780435881e-07, "loss": 13.2302, "step": 417290 }, { "epoch": 0.8429724018956274, "grad_norm": 184.3472900390625, "learning_rate": 8.104535443737438e-07, "loss": 17.7074, "step": 417300 }, { "epoch": 0.8429926025283112, "grad_norm": 586.7990112304688, "learning_rate": 8.102630311232395e-07, "loss": 19.8601, "step": 417310 }, { "epoch": 0.843012803160995, "grad_norm": 348.7237854003906, "learning_rate": 8.100725382930064e-07, "loss": 20.8465, "step": 417320 }, { "epoch": 0.8430330037936788, "grad_norm": 678.0906372070312, "learning_rate": 8.098820658839718e-07, "loss": 19.6183, "step": 417330 }, { "epoch": 0.8430532044263627, "grad_norm": 380.850341796875, "learning_rate": 8.096916138970623e-07, "loss": 18.166, "step": 417340 }, { "epoch": 0.8430734050590465, "grad_norm": 565.8128662109375, "learning_rate": 8.095011823332089e-07, "loss": 19.0084, "step": 417350 }, { "epoch": 0.8430936056917303, "grad_norm": 253.3446502685547, "learning_rate": 8.093107711933385e-07, "loss": 6.7644, "step": 417360 }, { "epoch": 0.8431138063244141, "grad_norm": 400.7567138671875, "learning_rate": 8.091203804783776e-07, "loss": 16.0196, "step": 417370 }, { "epoch": 0.843134006957098, "grad_norm": 231.70989990234375, "learning_rate": 8.089300101892561e-07, "loss": 15.8512, "step": 417380 }, { "epoch": 0.8431542075897817, "grad_norm": 230.20811462402344, "learning_rate": 8.087396603269027e-07, "loss": 12.4343, "step": 417390 }, { "epoch": 0.8431744082224655, "grad_norm": 86.41107940673828, "learning_rate": 8.085493308922432e-07, "loss": 15.9593, "step": 417400 }, { "epoch": 0.8431946088551493, "grad_norm": 317.251220703125, "learning_rate": 8.083590218862053e-07, "loss": 22.2669, "step": 417410 }, { "epoch": 0.8432148094878331, "grad_norm": 499.9970703125, "learning_rate": 8.081687333097183e-07, "loss": 23.5597, "step": 417420 }, { "epoch": 0.8432350101205169, "grad_norm": 271.93731689453125, "learning_rate": 8.079784651637084e-07, "loss": 15.5502, "step": 417430 }, { "epoch": 0.8432552107532008, "grad_norm": 416.82958984375, "learning_rate": 8.077882174491014e-07, "loss": 15.8095, "step": 417440 }, { "epoch": 0.8432754113858846, "grad_norm": 464.6175537109375, "learning_rate": 8.075979901668269e-07, "loss": 23.8034, "step": 417450 }, { "epoch": 0.8432956120185684, "grad_norm": 284.9903564453125, "learning_rate": 8.074077833178135e-07, "loss": 12.4611, "step": 417460 }, { "epoch": 0.8433158126512522, "grad_norm": 143.05435180664062, "learning_rate": 8.072175969029832e-07, "loss": 11.4091, "step": 417470 }, { "epoch": 0.843336013283936, "grad_norm": 227.09988403320312, "learning_rate": 8.070274309232662e-07, "loss": 16.5846, "step": 417480 }, { "epoch": 0.8433562139166199, "grad_norm": 54.895660400390625, "learning_rate": 8.068372853795903e-07, "loss": 12.8017, "step": 417490 }, { "epoch": 0.8433764145493037, "grad_norm": 195.83372497558594, "learning_rate": 8.066471602728804e-07, "loss": 13.6173, "step": 417500 }, { "epoch": 0.8433966151819875, "grad_norm": 468.7023620605469, "learning_rate": 8.064570556040629e-07, "loss": 7.9044, "step": 417510 }, { "epoch": 0.8434168158146713, "grad_norm": 811.3773193359375, "learning_rate": 8.06266971374065e-07, "loss": 71.1642, "step": 417520 }, { "epoch": 0.8434370164473551, "grad_norm": 380.46160888671875, "learning_rate": 8.060769075838154e-07, "loss": 22.0033, "step": 417530 }, { "epoch": 0.843457217080039, "grad_norm": 353.1867370605469, "learning_rate": 8.058868642342366e-07, "loss": 23.6593, "step": 417540 }, { "epoch": 0.8434774177127228, "grad_norm": 255.4258575439453, "learning_rate": 8.056968413262555e-07, "loss": 20.9205, "step": 417550 }, { "epoch": 0.8434976183454066, "grad_norm": 628.8818969726562, "learning_rate": 8.055068388608011e-07, "loss": 19.278, "step": 417560 }, { "epoch": 0.8435178189780904, "grad_norm": 0.0, "learning_rate": 8.053168568387976e-07, "loss": 19.0558, "step": 417570 }, { "epoch": 0.8435380196107742, "grad_norm": 553.6051025390625, "learning_rate": 8.051268952611696e-07, "loss": 20.5668, "step": 417580 }, { "epoch": 0.8435582202434581, "grad_norm": 200.57460021972656, "learning_rate": 8.04936954128846e-07, "loss": 23.3032, "step": 417590 }, { "epoch": 0.8435784208761419, "grad_norm": 540.6591186523438, "learning_rate": 8.047470334427504e-07, "loss": 25.1376, "step": 417600 }, { "epoch": 0.8435986215088257, "grad_norm": 305.846435546875, "learning_rate": 8.045571332038082e-07, "loss": 16.2725, "step": 417610 }, { "epoch": 0.8436188221415095, "grad_norm": 129.34548950195312, "learning_rate": 8.043672534129465e-07, "loss": 22.8199, "step": 417620 }, { "epoch": 0.8436390227741933, "grad_norm": 503.1650695800781, "learning_rate": 8.041773940710884e-07, "loss": 22.0007, "step": 417630 }, { "epoch": 0.8436592234068772, "grad_norm": 598.3443603515625, "learning_rate": 8.039875551791626e-07, "loss": 19.9376, "step": 417640 }, { "epoch": 0.8436794240395609, "grad_norm": 36.877967834472656, "learning_rate": 8.037977367380922e-07, "loss": 16.0755, "step": 417650 }, { "epoch": 0.8436996246722447, "grad_norm": 80.91256713867188, "learning_rate": 8.036079387488016e-07, "loss": 9.9852, "step": 417660 }, { "epoch": 0.8437198253049285, "grad_norm": 272.49676513671875, "learning_rate": 8.034181612122183e-07, "loss": 19.1621, "step": 417670 }, { "epoch": 0.8437400259376123, "grad_norm": 422.8641052246094, "learning_rate": 8.032284041292649e-07, "loss": 29.3479, "step": 417680 }, { "epoch": 0.8437602265702961, "grad_norm": 508.0213928222656, "learning_rate": 8.030386675008678e-07, "loss": 22.7619, "step": 417690 }, { "epoch": 0.84378042720298, "grad_norm": 504.5783996582031, "learning_rate": 8.028489513279503e-07, "loss": 22.4872, "step": 417700 }, { "epoch": 0.8438006278356638, "grad_norm": 847.2953491210938, "learning_rate": 8.026592556114393e-07, "loss": 31.6101, "step": 417710 }, { "epoch": 0.8438208284683476, "grad_norm": 361.8731689453125, "learning_rate": 8.02469580352258e-07, "loss": 21.0525, "step": 417720 }, { "epoch": 0.8438410291010314, "grad_norm": 255.57284545898438, "learning_rate": 8.022799255513297e-07, "loss": 15.0666, "step": 417730 }, { "epoch": 0.8438612297337152, "grad_norm": 145.09686279296875, "learning_rate": 8.020902912095807e-07, "loss": 10.2129, "step": 417740 }, { "epoch": 0.8438814303663991, "grad_norm": 383.4001770019531, "learning_rate": 8.019006773279348e-07, "loss": 23.5098, "step": 417750 }, { "epoch": 0.8439016309990829, "grad_norm": 146.52294921875, "learning_rate": 8.01711083907315e-07, "loss": 10.6484, "step": 417760 }, { "epoch": 0.8439218316317667, "grad_norm": 692.0272216796875, "learning_rate": 8.015215109486457e-07, "loss": 14.2041, "step": 417770 }, { "epoch": 0.8439420322644505, "grad_norm": 436.42169189453125, "learning_rate": 8.013319584528539e-07, "loss": 23.2882, "step": 417780 }, { "epoch": 0.8439622328971343, "grad_norm": 351.6768798828125, "learning_rate": 8.011424264208584e-07, "loss": 14.4282, "step": 417790 }, { "epoch": 0.8439824335298182, "grad_norm": 14.351901054382324, "learning_rate": 8.009529148535855e-07, "loss": 25.9734, "step": 417800 }, { "epoch": 0.844002634162502, "grad_norm": 185.14767456054688, "learning_rate": 8.007634237519595e-07, "loss": 20.9192, "step": 417810 }, { "epoch": 0.8440228347951858, "grad_norm": 589.0468139648438, "learning_rate": 8.005739531169044e-07, "loss": 15.9933, "step": 417820 }, { "epoch": 0.8440430354278696, "grad_norm": 383.037841796875, "learning_rate": 8.003845029493407e-07, "loss": 18.9315, "step": 417830 }, { "epoch": 0.8440632360605534, "grad_norm": 417.3287048339844, "learning_rate": 8.001950732501934e-07, "loss": 12.2846, "step": 417840 }, { "epoch": 0.8440834366932373, "grad_norm": 0.0, "learning_rate": 8.000056640203885e-07, "loss": 15.2792, "step": 417850 }, { "epoch": 0.8441036373259211, "grad_norm": 475.166015625, "learning_rate": 7.99816275260844e-07, "loss": 19.9353, "step": 417860 }, { "epoch": 0.8441238379586049, "grad_norm": 279.0462951660156, "learning_rate": 7.996269069724861e-07, "loss": 20.397, "step": 417870 }, { "epoch": 0.8441440385912887, "grad_norm": 289.1851501464844, "learning_rate": 7.994375591562376e-07, "loss": 10.9729, "step": 417880 }, { "epoch": 0.8441642392239725, "grad_norm": 120.9384994506836, "learning_rate": 7.992482318130218e-07, "loss": 14.9548, "step": 417890 }, { "epoch": 0.8441844398566564, "grad_norm": 785.109375, "learning_rate": 7.990589249437591e-07, "loss": 14.2628, "step": 417900 }, { "epoch": 0.8442046404893401, "grad_norm": 314.58782958984375, "learning_rate": 7.988696385493744e-07, "loss": 17.008, "step": 417910 }, { "epoch": 0.8442248411220239, "grad_norm": 226.8375701904297, "learning_rate": 7.986803726307901e-07, "loss": 15.2947, "step": 417920 }, { "epoch": 0.8442450417547077, "grad_norm": 551.301025390625, "learning_rate": 7.984911271889267e-07, "loss": 19.3609, "step": 417930 }, { "epoch": 0.8442652423873915, "grad_norm": 293.1257019042969, "learning_rate": 7.983019022247096e-07, "loss": 11.7711, "step": 417940 }, { "epoch": 0.8442854430200754, "grad_norm": 529.7147216796875, "learning_rate": 7.98112697739058e-07, "loss": 20.5691, "step": 417950 }, { "epoch": 0.8443056436527592, "grad_norm": 159.33741760253906, "learning_rate": 7.979235137328961e-07, "loss": 7.6765, "step": 417960 }, { "epoch": 0.844325844285443, "grad_norm": 36.75069046020508, "learning_rate": 7.97734350207145e-07, "loss": 20.2914, "step": 417970 }, { "epoch": 0.8443460449181268, "grad_norm": 212.27427673339844, "learning_rate": 7.975452071627277e-07, "loss": 17.0566, "step": 417980 }, { "epoch": 0.8443662455508106, "grad_norm": 7.3990983963012695, "learning_rate": 7.973560846005646e-07, "loss": 38.3213, "step": 417990 }, { "epoch": 0.8443864461834945, "grad_norm": 307.8514404296875, "learning_rate": 7.971669825215789e-07, "loss": 12.0889, "step": 418000 }, { "epoch": 0.8444066468161783, "grad_norm": 210.28102111816406, "learning_rate": 7.969779009266915e-07, "loss": 11.6439, "step": 418010 }, { "epoch": 0.8444268474488621, "grad_norm": 271.6314697265625, "learning_rate": 7.967888398168233e-07, "loss": 17.1989, "step": 418020 }, { "epoch": 0.8444470480815459, "grad_norm": 108.26612091064453, "learning_rate": 7.965997991928975e-07, "loss": 22.6884, "step": 418030 }, { "epoch": 0.8444672487142297, "grad_norm": 34.8552131652832, "learning_rate": 7.964107790558345e-07, "loss": 11.7574, "step": 418040 }, { "epoch": 0.8444874493469136, "grad_norm": 261.3864440917969, "learning_rate": 7.962217794065547e-07, "loss": 15.4715, "step": 418050 }, { "epoch": 0.8445076499795974, "grad_norm": 63.58285140991211, "learning_rate": 7.960328002459794e-07, "loss": 27.665, "step": 418060 }, { "epoch": 0.8445278506122812, "grad_norm": 1134.8045654296875, "learning_rate": 7.958438415750331e-07, "loss": 19.2892, "step": 418070 }, { "epoch": 0.844548051244965, "grad_norm": 412.56097412109375, "learning_rate": 7.956549033946314e-07, "loss": 11.5439, "step": 418080 }, { "epoch": 0.8445682518776488, "grad_norm": 50.35539245605469, "learning_rate": 7.954659857056984e-07, "loss": 25.4583, "step": 418090 }, { "epoch": 0.8445884525103327, "grad_norm": 220.02413940429688, "learning_rate": 7.952770885091548e-07, "loss": 10.1853, "step": 418100 }, { "epoch": 0.8446086531430165, "grad_norm": 501.8572998046875, "learning_rate": 7.950882118059211e-07, "loss": 17.0491, "step": 418110 }, { "epoch": 0.8446288537757003, "grad_norm": 3.201639175415039, "learning_rate": 7.948993555969159e-07, "loss": 18.2402, "step": 418120 }, { "epoch": 0.8446490544083841, "grad_norm": 322.1613464355469, "learning_rate": 7.947105198830612e-07, "loss": 14.7168, "step": 418130 }, { "epoch": 0.8446692550410679, "grad_norm": 709.388427734375, "learning_rate": 7.945217046652804e-07, "loss": 20.6433, "step": 418140 }, { "epoch": 0.8446894556737518, "grad_norm": 165.0631561279297, "learning_rate": 7.94332909944488e-07, "loss": 31.5094, "step": 418150 }, { "epoch": 0.8447096563064355, "grad_norm": 380.36480712890625, "learning_rate": 7.941441357216068e-07, "loss": 13.631, "step": 418160 }, { "epoch": 0.8447298569391193, "grad_norm": 1242.5738525390625, "learning_rate": 7.939553819975582e-07, "loss": 34.1927, "step": 418170 }, { "epoch": 0.8447500575718031, "grad_norm": 477.37554931640625, "learning_rate": 7.937666487732609e-07, "loss": 20.4424, "step": 418180 }, { "epoch": 0.8447702582044869, "grad_norm": 306.4768981933594, "learning_rate": 7.935779360496337e-07, "loss": 17.0729, "step": 418190 }, { "epoch": 0.8447904588371707, "grad_norm": 303.6390686035156, "learning_rate": 7.933892438275987e-07, "loss": 23.9531, "step": 418200 }, { "epoch": 0.8448106594698546, "grad_norm": 258.2592468261719, "learning_rate": 7.932005721080738e-07, "loss": 15.5802, "step": 418210 }, { "epoch": 0.8448308601025384, "grad_norm": 406.1451416015625, "learning_rate": 7.930119208919784e-07, "loss": 7.2556, "step": 418220 }, { "epoch": 0.8448510607352222, "grad_norm": 448.5814208984375, "learning_rate": 7.92823290180234e-07, "loss": 15.0219, "step": 418230 }, { "epoch": 0.844871261367906, "grad_norm": 554.1819458007812, "learning_rate": 7.926346799737572e-07, "loss": 21.8187, "step": 418240 }, { "epoch": 0.8448914620005898, "grad_norm": 417.68475341796875, "learning_rate": 7.924460902734698e-07, "loss": 37.0044, "step": 418250 }, { "epoch": 0.8449116626332737, "grad_norm": 555.5881958007812, "learning_rate": 7.922575210802896e-07, "loss": 17.1528, "step": 418260 }, { "epoch": 0.8449318632659575, "grad_norm": 317.4004821777344, "learning_rate": 7.920689723951353e-07, "loss": 16.6019, "step": 418270 }, { "epoch": 0.8449520638986413, "grad_norm": 28.36787223815918, "learning_rate": 7.918804442189271e-07, "loss": 23.7992, "step": 418280 }, { "epoch": 0.8449722645313251, "grad_norm": 399.80255126953125, "learning_rate": 7.916919365525827e-07, "loss": 9.0299, "step": 418290 }, { "epoch": 0.8449924651640089, "grad_norm": 201.8307647705078, "learning_rate": 7.91503449397022e-07, "loss": 21.9661, "step": 418300 }, { "epoch": 0.8450126657966928, "grad_norm": 473.5652770996094, "learning_rate": 7.913149827531619e-07, "loss": 18.6611, "step": 418310 }, { "epoch": 0.8450328664293766, "grad_norm": 422.7249755859375, "learning_rate": 7.911265366219234e-07, "loss": 18.0575, "step": 418320 }, { "epoch": 0.8450530670620604, "grad_norm": 108.836669921875, "learning_rate": 7.909381110042241e-07, "loss": 14.3215, "step": 418330 }, { "epoch": 0.8450732676947442, "grad_norm": 638.8184814453125, "learning_rate": 7.907497059009806e-07, "loss": 22.2167, "step": 418340 }, { "epoch": 0.845093468327428, "grad_norm": 88.75518798828125, "learning_rate": 7.90561321313113e-07, "loss": 10.6438, "step": 418350 }, { "epoch": 0.8451136689601119, "grad_norm": 278.2799072265625, "learning_rate": 7.903729572415397e-07, "loss": 15.4431, "step": 418360 }, { "epoch": 0.8451338695927957, "grad_norm": 168.87420654296875, "learning_rate": 7.901846136871766e-07, "loss": 11.0043, "step": 418370 }, { "epoch": 0.8451540702254795, "grad_norm": 257.9417419433594, "learning_rate": 7.899962906509434e-07, "loss": 10.7762, "step": 418380 }, { "epoch": 0.8451742708581633, "grad_norm": 434.875732421875, "learning_rate": 7.898079881337594e-07, "loss": 28.863, "step": 418390 }, { "epoch": 0.8451944714908471, "grad_norm": 27.86065673828125, "learning_rate": 7.89619706136539e-07, "loss": 16.9371, "step": 418400 }, { "epoch": 0.845214672123531, "grad_norm": 224.47476196289062, "learning_rate": 7.894314446602013e-07, "loss": 28.2371, "step": 418410 }, { "epoch": 0.8452348727562147, "grad_norm": 769.4656982421875, "learning_rate": 7.892432037056652e-07, "loss": 16.6101, "step": 418420 }, { "epoch": 0.8452550733888985, "grad_norm": 262.0355529785156, "learning_rate": 7.890549832738465e-07, "loss": 22.3285, "step": 418430 }, { "epoch": 0.8452752740215823, "grad_norm": 214.3258819580078, "learning_rate": 7.888667833656627e-07, "loss": 16.3148, "step": 418440 }, { "epoch": 0.8452954746542661, "grad_norm": 118.48668670654297, "learning_rate": 7.88678603982031e-07, "loss": 20.364, "step": 418450 }, { "epoch": 0.84531567528695, "grad_norm": 140.25999450683594, "learning_rate": 7.884904451238712e-07, "loss": 8.9497, "step": 418460 }, { "epoch": 0.8453358759196338, "grad_norm": 440.9783935546875, "learning_rate": 7.883023067920964e-07, "loss": 11.734, "step": 418470 }, { "epoch": 0.8453560765523176, "grad_norm": 297.5727844238281, "learning_rate": 7.881141889876248e-07, "loss": 13.3235, "step": 418480 }, { "epoch": 0.8453762771850014, "grad_norm": 0.0, "learning_rate": 7.879260917113751e-07, "loss": 13.3954, "step": 418490 }, { "epoch": 0.8453964778176852, "grad_norm": 308.35198974609375, "learning_rate": 7.877380149642628e-07, "loss": 10.9062, "step": 418500 }, { "epoch": 0.845416678450369, "grad_norm": 332.7234191894531, "learning_rate": 7.875499587472035e-07, "loss": 17.3936, "step": 418510 }, { "epoch": 0.8454368790830529, "grad_norm": 69.22209930419922, "learning_rate": 7.873619230611157e-07, "loss": 21.6091, "step": 418520 }, { "epoch": 0.8454570797157367, "grad_norm": 530.6533203125, "learning_rate": 7.871739079069152e-07, "loss": 16.756, "step": 418530 }, { "epoch": 0.8454772803484205, "grad_norm": 450.29388427734375, "learning_rate": 7.869859132855168e-07, "loss": 21.5757, "step": 418540 }, { "epoch": 0.8454974809811043, "grad_norm": 36.44185256958008, "learning_rate": 7.867979391978398e-07, "loss": 21.2164, "step": 418550 }, { "epoch": 0.8455176816137882, "grad_norm": 379.1933898925781, "learning_rate": 7.866099856447968e-07, "loss": 13.118, "step": 418560 }, { "epoch": 0.845537882246472, "grad_norm": 519.5206298828125, "learning_rate": 7.864220526273069e-07, "loss": 21.3711, "step": 418570 }, { "epoch": 0.8455580828791558, "grad_norm": 258.16107177734375, "learning_rate": 7.862341401462842e-07, "loss": 19.3362, "step": 418580 }, { "epoch": 0.8455782835118396, "grad_norm": 466.6935119628906, "learning_rate": 7.86046248202646e-07, "loss": 21.6412, "step": 418590 }, { "epoch": 0.8455984841445234, "grad_norm": 386.0509948730469, "learning_rate": 7.858583767973071e-07, "loss": 9.0111, "step": 418600 }, { "epoch": 0.8456186847772073, "grad_norm": 476.1527099609375, "learning_rate": 7.856705259311826e-07, "loss": 16.6665, "step": 418610 }, { "epoch": 0.8456388854098911, "grad_norm": 712.1090087890625, "learning_rate": 7.854826956051897e-07, "loss": 21.9178, "step": 418620 }, { "epoch": 0.8456590860425749, "grad_norm": 798.2106323242188, "learning_rate": 7.852948858202419e-07, "loss": 36.1859, "step": 418630 }, { "epoch": 0.8456792866752587, "grad_norm": 459.5579833984375, "learning_rate": 7.851070965772572e-07, "loss": 23.7613, "step": 418640 }, { "epoch": 0.8456994873079425, "grad_norm": 570.1857299804688, "learning_rate": 7.849193278771489e-07, "loss": 18.8711, "step": 418650 }, { "epoch": 0.8457196879406264, "grad_norm": 141.66217041015625, "learning_rate": 7.847315797208316e-07, "loss": 18.4804, "step": 418660 }, { "epoch": 0.8457398885733101, "grad_norm": 412.1319580078125, "learning_rate": 7.845438521092213e-07, "loss": 12.6574, "step": 418670 }, { "epoch": 0.8457600892059939, "grad_norm": 131.34910583496094, "learning_rate": 7.843561450432352e-07, "loss": 10.978, "step": 418680 }, { "epoch": 0.8457802898386777, "grad_norm": 167.0421142578125, "learning_rate": 7.841684585237836e-07, "loss": 17.1971, "step": 418690 }, { "epoch": 0.8458004904713615, "grad_norm": 510.4516296386719, "learning_rate": 7.839807925517834e-07, "loss": 17.4608, "step": 418700 }, { "epoch": 0.8458206911040453, "grad_norm": 278.193115234375, "learning_rate": 7.837931471281513e-07, "loss": 11.2845, "step": 418710 }, { "epoch": 0.8458408917367292, "grad_norm": 289.3529968261719, "learning_rate": 7.836055222537997e-07, "loss": 8.1362, "step": 418720 }, { "epoch": 0.845861092369413, "grad_norm": 482.45391845703125, "learning_rate": 7.834179179296419e-07, "loss": 13.6992, "step": 418730 }, { "epoch": 0.8458812930020968, "grad_norm": 90.5341567993164, "learning_rate": 7.832303341565938e-07, "loss": 15.0954, "step": 418740 }, { "epoch": 0.8459014936347806, "grad_norm": 542.28466796875, "learning_rate": 7.830427709355726e-07, "loss": 10.9046, "step": 418750 }, { "epoch": 0.8459216942674644, "grad_norm": 462.2651672363281, "learning_rate": 7.828552282674867e-07, "loss": 34.2711, "step": 418760 }, { "epoch": 0.8459418949001483, "grad_norm": 730.208984375, "learning_rate": 7.826677061532528e-07, "loss": 19.8283, "step": 418770 }, { "epoch": 0.8459620955328321, "grad_norm": 314.582275390625, "learning_rate": 7.824802045937863e-07, "loss": 27.1289, "step": 418780 }, { "epoch": 0.8459822961655159, "grad_norm": 522.7733154296875, "learning_rate": 7.822927235900001e-07, "loss": 12.4487, "step": 418790 }, { "epoch": 0.8460024967981997, "grad_norm": 107.94712829589844, "learning_rate": 7.821052631428061e-07, "loss": 12.2357, "step": 418800 }, { "epoch": 0.8460226974308835, "grad_norm": 296.1025085449219, "learning_rate": 7.819178232531205e-07, "loss": 11.7953, "step": 418810 }, { "epoch": 0.8460428980635674, "grad_norm": 190.7719268798828, "learning_rate": 7.81730403921856e-07, "loss": 20.8951, "step": 418820 }, { "epoch": 0.8460630986962512, "grad_norm": 181.572509765625, "learning_rate": 7.815430051499251e-07, "loss": 17.1508, "step": 418830 }, { "epoch": 0.846083299328935, "grad_norm": 123.58523559570312, "learning_rate": 7.813556269382427e-07, "loss": 17.396, "step": 418840 }, { "epoch": 0.8461034999616188, "grad_norm": 284.181884765625, "learning_rate": 7.811682692877204e-07, "loss": 16.8144, "step": 418850 }, { "epoch": 0.8461237005943026, "grad_norm": 437.01385498046875, "learning_rate": 7.809809321992729e-07, "loss": 18.8257, "step": 418860 }, { "epoch": 0.8461439012269865, "grad_norm": 137.48239135742188, "learning_rate": 7.807936156738133e-07, "loss": 10.2114, "step": 418870 }, { "epoch": 0.8461641018596703, "grad_norm": 38.34272384643555, "learning_rate": 7.80606319712252e-07, "loss": 13.6911, "step": 418880 }, { "epoch": 0.8461843024923541, "grad_norm": 320.4749755859375, "learning_rate": 7.804190443155057e-07, "loss": 9.4036, "step": 418890 }, { "epoch": 0.8462045031250379, "grad_norm": 527.709228515625, "learning_rate": 7.802317894844835e-07, "loss": 15.5926, "step": 418900 }, { "epoch": 0.8462247037577217, "grad_norm": 262.3277282714844, "learning_rate": 7.800445552201014e-07, "loss": 22.2958, "step": 418910 }, { "epoch": 0.8462449043904056, "grad_norm": 332.1363220214844, "learning_rate": 7.798573415232686e-07, "loss": 13.8192, "step": 418920 }, { "epoch": 0.8462651050230893, "grad_norm": 266.5022277832031, "learning_rate": 7.79670148394901e-07, "loss": 20.8731, "step": 418930 }, { "epoch": 0.8462853056557731, "grad_norm": 81.87886810302734, "learning_rate": 7.794829758359085e-07, "loss": 18.3454, "step": 418940 }, { "epoch": 0.8463055062884569, "grad_norm": 615.016845703125, "learning_rate": 7.792958238472037e-07, "loss": 21.2683, "step": 418950 }, { "epoch": 0.8463257069211407, "grad_norm": 299.83892822265625, "learning_rate": 7.791086924296998e-07, "loss": 20.1067, "step": 418960 }, { "epoch": 0.8463459075538246, "grad_norm": 742.3004150390625, "learning_rate": 7.789215815843082e-07, "loss": 28.9742, "step": 418970 }, { "epoch": 0.8463661081865084, "grad_norm": 306.6733703613281, "learning_rate": 7.787344913119399e-07, "loss": 9.6765, "step": 418980 }, { "epoch": 0.8463863088191922, "grad_norm": 59.7943229675293, "learning_rate": 7.785474216135081e-07, "loss": 6.1867, "step": 418990 }, { "epoch": 0.846406509451876, "grad_norm": 306.6423645019531, "learning_rate": 7.783603724899258e-07, "loss": 8.696, "step": 419000 }, { "epoch": 0.8464267100845598, "grad_norm": 192.6134796142578, "learning_rate": 7.781733439421013e-07, "loss": 17.1364, "step": 419010 }, { "epoch": 0.8464469107172437, "grad_norm": 316.57684326171875, "learning_rate": 7.779863359709472e-07, "loss": 13.7423, "step": 419020 }, { "epoch": 0.8464671113499275, "grad_norm": 244.40464782714844, "learning_rate": 7.777993485773771e-07, "loss": 7.499, "step": 419030 }, { "epoch": 0.8464873119826113, "grad_norm": 271.5377502441406, "learning_rate": 7.776123817623011e-07, "loss": 17.381, "step": 419040 }, { "epoch": 0.8465075126152951, "grad_norm": 281.7366027832031, "learning_rate": 7.774254355266287e-07, "loss": 6.5732, "step": 419050 }, { "epoch": 0.8465277132479789, "grad_norm": 173.3346405029297, "learning_rate": 7.772385098712731e-07, "loss": 30.2304, "step": 419060 }, { "epoch": 0.8465479138806628, "grad_norm": 68.60765075683594, "learning_rate": 7.770516047971466e-07, "loss": 10.1655, "step": 419070 }, { "epoch": 0.8465681145133466, "grad_norm": 230.587646484375, "learning_rate": 7.768647203051566e-07, "loss": 17.9789, "step": 419080 }, { "epoch": 0.8465883151460304, "grad_norm": 264.0446472167969, "learning_rate": 7.766778563962152e-07, "loss": 13.6752, "step": 419090 }, { "epoch": 0.8466085157787142, "grad_norm": 503.3843994140625, "learning_rate": 7.76491013071235e-07, "loss": 29.5807, "step": 419100 }, { "epoch": 0.846628716411398, "grad_norm": 331.93603515625, "learning_rate": 7.763041903311258e-07, "loss": 27.7832, "step": 419110 }, { "epoch": 0.8466489170440819, "grad_norm": 235.54718017578125, "learning_rate": 7.761173881767958e-07, "loss": 18.7475, "step": 419120 }, { "epoch": 0.8466691176767657, "grad_norm": 248.29202270507812, "learning_rate": 7.759306066091593e-07, "loss": 14.1553, "step": 419130 }, { "epoch": 0.8466893183094495, "grad_norm": 10.38530445098877, "learning_rate": 7.757438456291245e-07, "loss": 15.3336, "step": 419140 }, { "epoch": 0.8467095189421333, "grad_norm": 779.1312255859375, "learning_rate": 7.755571052376004e-07, "loss": 26.1951, "step": 419150 }, { "epoch": 0.8467297195748171, "grad_norm": 179.73486328125, "learning_rate": 7.753703854354999e-07, "loss": 12.9748, "step": 419160 }, { "epoch": 0.846749920207501, "grad_norm": 385.7361755371094, "learning_rate": 7.751836862237305e-07, "loss": 27.418, "step": 419170 }, { "epoch": 0.8467701208401847, "grad_norm": 370.1129150390625, "learning_rate": 7.749970076032048e-07, "loss": 12.369, "step": 419180 }, { "epoch": 0.8467903214728685, "grad_norm": 276.7247314453125, "learning_rate": 7.748103495748299e-07, "loss": 14.2185, "step": 419190 }, { "epoch": 0.8468105221055523, "grad_norm": 461.1651611328125, "learning_rate": 7.746237121395184e-07, "loss": 17.3819, "step": 419200 }, { "epoch": 0.8468307227382361, "grad_norm": 336.64678955078125, "learning_rate": 7.744370952981778e-07, "loss": 15.2645, "step": 419210 }, { "epoch": 0.8468509233709199, "grad_norm": 239.9922637939453, "learning_rate": 7.742504990517174e-07, "loss": 20.9572, "step": 419220 }, { "epoch": 0.8468711240036038, "grad_norm": 354.9892578125, "learning_rate": 7.740639234010488e-07, "loss": 17.1885, "step": 419230 }, { "epoch": 0.8468913246362876, "grad_norm": 346.224853515625, "learning_rate": 7.73877368347079e-07, "loss": 21.1393, "step": 419240 }, { "epoch": 0.8469115252689714, "grad_norm": 2.0196142196655273, "learning_rate": 7.736908338907195e-07, "loss": 28.4272, "step": 419250 }, { "epoch": 0.8469317259016552, "grad_norm": 301.7315979003906, "learning_rate": 7.735043200328784e-07, "loss": 11.9376, "step": 419260 }, { "epoch": 0.846951926534339, "grad_norm": 120.46678924560547, "learning_rate": 7.733178267744634e-07, "loss": 23.0687, "step": 419270 }, { "epoch": 0.8469721271670229, "grad_norm": 213.86441040039062, "learning_rate": 7.73131354116386e-07, "loss": 20.7339, "step": 419280 }, { "epoch": 0.8469923277997067, "grad_norm": 305.5952453613281, "learning_rate": 7.729449020595531e-07, "loss": 20.777, "step": 419290 }, { "epoch": 0.8470125284323905, "grad_norm": 324.5541076660156, "learning_rate": 7.727584706048735e-07, "loss": 11.8582, "step": 419300 }, { "epoch": 0.8470327290650743, "grad_norm": 314.37835693359375, "learning_rate": 7.72572059753256e-07, "loss": 24.1484, "step": 419310 }, { "epoch": 0.8470529296977581, "grad_norm": 577.531982421875, "learning_rate": 7.723856695056109e-07, "loss": 12.3574, "step": 419320 }, { "epoch": 0.847073130330442, "grad_norm": 294.72174072265625, "learning_rate": 7.721992998628452e-07, "loss": 14.6806, "step": 419330 }, { "epoch": 0.8470933309631258, "grad_norm": 275.53570556640625, "learning_rate": 7.720129508258667e-07, "loss": 22.1799, "step": 419340 }, { "epoch": 0.8471135315958096, "grad_norm": 132.30506896972656, "learning_rate": 7.71826622395585e-07, "loss": 12.9379, "step": 419350 }, { "epoch": 0.8471337322284934, "grad_norm": 49.43588638305664, "learning_rate": 7.716403145729073e-07, "loss": 26.2082, "step": 419360 }, { "epoch": 0.8471539328611772, "grad_norm": 632.3158569335938, "learning_rate": 7.714540273587412e-07, "loss": 13.9699, "step": 419370 }, { "epoch": 0.8471741334938611, "grad_norm": 576.3694458007812, "learning_rate": 7.712677607539948e-07, "loss": 18.9505, "step": 419380 }, { "epoch": 0.8471943341265449, "grad_norm": 270.6705322265625, "learning_rate": 7.710815147595779e-07, "loss": 14.4009, "step": 419390 }, { "epoch": 0.8472145347592287, "grad_norm": 484.50408935546875, "learning_rate": 7.708952893763972e-07, "loss": 21.9983, "step": 419400 }, { "epoch": 0.8472347353919125, "grad_norm": 135.74899291992188, "learning_rate": 7.707090846053577e-07, "loss": 38.6355, "step": 419410 }, { "epoch": 0.8472549360245963, "grad_norm": 633.8313598632812, "learning_rate": 7.705229004473713e-07, "loss": 14.0703, "step": 419420 }, { "epoch": 0.8472751366572802, "grad_norm": 421.99761962890625, "learning_rate": 7.703367369033432e-07, "loss": 19.2649, "step": 419430 }, { "epoch": 0.8472953372899639, "grad_norm": 155.74380493164062, "learning_rate": 7.701505939741793e-07, "loss": 17.2813, "step": 419440 }, { "epoch": 0.8473155379226477, "grad_norm": 489.8328552246094, "learning_rate": 7.699644716607896e-07, "loss": 16.5044, "step": 419450 }, { "epoch": 0.8473357385553315, "grad_norm": 95.47686004638672, "learning_rate": 7.697783699640793e-07, "loss": 14.6129, "step": 419460 }, { "epoch": 0.8473559391880153, "grad_norm": 133.22848510742188, "learning_rate": 7.695922888849566e-07, "loss": 44.6609, "step": 419470 }, { "epoch": 0.8473761398206991, "grad_norm": 166.31944274902344, "learning_rate": 7.694062284243287e-07, "loss": 16.1958, "step": 419480 }, { "epoch": 0.847396340453383, "grad_norm": 258.57080078125, "learning_rate": 7.692201885831002e-07, "loss": 36.7881, "step": 419490 }, { "epoch": 0.8474165410860668, "grad_norm": 318.3566589355469, "learning_rate": 7.690341693621805e-07, "loss": 18.1619, "step": 419500 }, { "epoch": 0.8474367417187506, "grad_norm": 139.5306854248047, "learning_rate": 7.68848170762474e-07, "loss": 18.9246, "step": 419510 }, { "epoch": 0.8474569423514344, "grad_norm": 554.505615234375, "learning_rate": 7.686621927848898e-07, "loss": 19.0492, "step": 419520 }, { "epoch": 0.8474771429841182, "grad_norm": 285.1543884277344, "learning_rate": 7.684762354303316e-07, "loss": 32.9261, "step": 419530 }, { "epoch": 0.8474973436168021, "grad_norm": 286.7790832519531, "learning_rate": 7.682902986997076e-07, "loss": 6.3039, "step": 419540 }, { "epoch": 0.8475175442494859, "grad_norm": 158.7056121826172, "learning_rate": 7.681043825939238e-07, "loss": 11.8037, "step": 419550 }, { "epoch": 0.8475377448821697, "grad_norm": 227.37754821777344, "learning_rate": 7.679184871138851e-07, "loss": 10.6346, "step": 419560 }, { "epoch": 0.8475579455148535, "grad_norm": 442.99200439453125, "learning_rate": 7.677326122604995e-07, "loss": 40.652, "step": 419570 }, { "epoch": 0.8475781461475373, "grad_norm": 409.53076171875, "learning_rate": 7.675467580346719e-07, "loss": 19.2032, "step": 419580 }, { "epoch": 0.8475983467802212, "grad_norm": 614.8516845703125, "learning_rate": 7.673609244373065e-07, "loss": 19.0753, "step": 419590 }, { "epoch": 0.847618547412905, "grad_norm": 293.042724609375, "learning_rate": 7.671751114693104e-07, "loss": 21.46, "step": 419600 }, { "epoch": 0.8476387480455888, "grad_norm": 289.71075439453125, "learning_rate": 7.669893191315924e-07, "loss": 18.8878, "step": 419610 }, { "epoch": 0.8476589486782726, "grad_norm": 325.4839782714844, "learning_rate": 7.668035474250523e-07, "loss": 30.0041, "step": 419620 }, { "epoch": 0.8476791493109564, "grad_norm": 173.85630798339844, "learning_rate": 7.666177963505989e-07, "loss": 8.7573, "step": 419630 }, { "epoch": 0.8476993499436403, "grad_norm": 526.9801025390625, "learning_rate": 7.664320659091373e-07, "loss": 27.2767, "step": 419640 }, { "epoch": 0.8477195505763241, "grad_norm": 909.6722412109375, "learning_rate": 7.662463561015726e-07, "loss": 16.1532, "step": 419650 }, { "epoch": 0.8477397512090079, "grad_norm": 204.0998077392578, "learning_rate": 7.66060666928809e-07, "loss": 14.3266, "step": 419660 }, { "epoch": 0.8477599518416917, "grad_norm": 357.2112121582031, "learning_rate": 7.658749983917512e-07, "loss": 23.5506, "step": 419670 }, { "epoch": 0.8477801524743755, "grad_norm": 116.1423568725586, "learning_rate": 7.656893504913082e-07, "loss": 17.3412, "step": 419680 }, { "epoch": 0.8478003531070594, "grad_norm": 1107.73388671875, "learning_rate": 7.655037232283791e-07, "loss": 29.388, "step": 419690 }, { "epoch": 0.8478205537397431, "grad_norm": 449.1150817871094, "learning_rate": 7.653181166038715e-07, "loss": 25.4893, "step": 419700 }, { "epoch": 0.8478407543724269, "grad_norm": 279.9544982910156, "learning_rate": 7.651325306186908e-07, "loss": 23.8791, "step": 419710 }, { "epoch": 0.8478609550051107, "grad_norm": 207.33346557617188, "learning_rate": 7.649469652737407e-07, "loss": 22.5027, "step": 419720 }, { "epoch": 0.8478811556377945, "grad_norm": 521.3672485351562, "learning_rate": 7.647614205699244e-07, "loss": 22.6143, "step": 419730 }, { "epoch": 0.8479013562704784, "grad_norm": 22.869937896728516, "learning_rate": 7.645758965081478e-07, "loss": 22.4272, "step": 419740 }, { "epoch": 0.8479215569031622, "grad_norm": 543.2610473632812, "learning_rate": 7.643903930893154e-07, "loss": 16.8551, "step": 419750 }, { "epoch": 0.847941757535846, "grad_norm": 526.971923828125, "learning_rate": 7.64204910314329e-07, "loss": 23.0066, "step": 419760 }, { "epoch": 0.8479619581685298, "grad_norm": 616.48876953125, "learning_rate": 7.640194481840951e-07, "loss": 21.9162, "step": 419770 }, { "epoch": 0.8479821588012136, "grad_norm": 275.0811462402344, "learning_rate": 7.638340066995154e-07, "loss": 13.5628, "step": 419780 }, { "epoch": 0.8480023594338975, "grad_norm": 305.490966796875, "learning_rate": 7.636485858614962e-07, "loss": 21.4191, "step": 419790 }, { "epoch": 0.8480225600665813, "grad_norm": 31.853561401367188, "learning_rate": 7.63463185670939e-07, "loss": 38.427, "step": 419800 }, { "epoch": 0.8480427606992651, "grad_norm": 205.2315216064453, "learning_rate": 7.632778061287494e-07, "loss": 11.8003, "step": 419810 }, { "epoch": 0.8480629613319489, "grad_norm": 0.2011181265115738, "learning_rate": 7.630924472358304e-07, "loss": 15.5303, "step": 419820 }, { "epoch": 0.8480831619646327, "grad_norm": 191.06072998046875, "learning_rate": 7.629071089930834e-07, "loss": 18.2567, "step": 419830 }, { "epoch": 0.8481033625973166, "grad_norm": 233.3933868408203, "learning_rate": 7.62721791401414e-07, "loss": 22.8896, "step": 419840 }, { "epoch": 0.8481235632300004, "grad_norm": 440.032958984375, "learning_rate": 7.625364944617242e-07, "loss": 25.5893, "step": 419850 }, { "epoch": 0.8481437638626842, "grad_norm": 341.0321960449219, "learning_rate": 7.623512181749182e-07, "loss": 14.6416, "step": 419860 }, { "epoch": 0.848163964495368, "grad_norm": 494.1391906738281, "learning_rate": 7.621659625418987e-07, "loss": 25.9168, "step": 419870 }, { "epoch": 0.8481841651280518, "grad_norm": 491.30035400390625, "learning_rate": 7.619807275635672e-07, "loss": 17.8396, "step": 419880 }, { "epoch": 0.8482043657607357, "grad_norm": 390.82745361328125, "learning_rate": 7.617955132408289e-07, "loss": 11.7248, "step": 419890 }, { "epoch": 0.8482245663934195, "grad_norm": 188.36827087402344, "learning_rate": 7.61610319574585e-07, "loss": 34.1716, "step": 419900 }, { "epoch": 0.8482447670261033, "grad_norm": 960.0751953125, "learning_rate": 7.614251465657374e-07, "loss": 22.1919, "step": 419910 }, { "epoch": 0.8482649676587871, "grad_norm": 12.805871963500977, "learning_rate": 7.612399942151894e-07, "loss": 12.9999, "step": 419920 }, { "epoch": 0.8482851682914709, "grad_norm": 360.5621643066406, "learning_rate": 7.610548625238445e-07, "loss": 15.7093, "step": 419930 }, { "epoch": 0.8483053689241548, "grad_norm": 436.2169494628906, "learning_rate": 7.608697514926045e-07, "loss": 13.3797, "step": 419940 }, { "epoch": 0.8483255695568385, "grad_norm": 324.625732421875, "learning_rate": 7.606846611223695e-07, "loss": 11.3572, "step": 419950 }, { "epoch": 0.8483457701895223, "grad_norm": 507.09783935546875, "learning_rate": 7.60499591414045e-07, "loss": 34.2821, "step": 419960 }, { "epoch": 0.8483659708222061, "grad_norm": 24.52530860900879, "learning_rate": 7.60314542368531e-07, "loss": 16.4815, "step": 419970 }, { "epoch": 0.8483861714548899, "grad_norm": 225.76918029785156, "learning_rate": 7.601295139867287e-07, "loss": 18.7387, "step": 419980 }, { "epoch": 0.8484063720875737, "grad_norm": 346.3161926269531, "learning_rate": 7.599445062695404e-07, "loss": 20.7518, "step": 419990 }, { "epoch": 0.8484265727202576, "grad_norm": 215.38546752929688, "learning_rate": 7.597595192178702e-07, "loss": 7.1108, "step": 420000 }, { "epoch": 0.8484467733529414, "grad_norm": 172.4610137939453, "learning_rate": 7.595745528326176e-07, "loss": 30.5344, "step": 420010 }, { "epoch": 0.8484669739856252, "grad_norm": 339.5697021484375, "learning_rate": 7.593896071146828e-07, "loss": 11.9902, "step": 420020 }, { "epoch": 0.848487174618309, "grad_norm": 445.3851013183594, "learning_rate": 7.592046820649706e-07, "loss": 13.3798, "step": 420030 }, { "epoch": 0.8485073752509928, "grad_norm": 216.46893310546875, "learning_rate": 7.5901977768438e-07, "loss": 17.0299, "step": 420040 }, { "epoch": 0.8485275758836767, "grad_norm": 429.12762451171875, "learning_rate": 7.588348939738116e-07, "loss": 13.6675, "step": 420050 }, { "epoch": 0.8485477765163605, "grad_norm": 9.526630401611328, "learning_rate": 7.586500309341682e-07, "loss": 14.2783, "step": 420060 }, { "epoch": 0.8485679771490443, "grad_norm": 292.997314453125, "learning_rate": 7.584651885663497e-07, "loss": 9.1951, "step": 420070 }, { "epoch": 0.8485881777817281, "grad_norm": 168.2415313720703, "learning_rate": 7.582803668712579e-07, "loss": 11.4824, "step": 420080 }, { "epoch": 0.848608378414412, "grad_norm": 236.3516387939453, "learning_rate": 7.580955658497924e-07, "loss": 21.5602, "step": 420090 }, { "epoch": 0.8486285790470958, "grad_norm": 239.54022216796875, "learning_rate": 7.579107855028562e-07, "loss": 21.8654, "step": 420100 }, { "epoch": 0.8486487796797796, "grad_norm": 361.20947265625, "learning_rate": 7.577260258313474e-07, "loss": 25.1178, "step": 420110 }, { "epoch": 0.8486689803124634, "grad_norm": 521.0519409179688, "learning_rate": 7.57541286836167e-07, "loss": 15.5121, "step": 420120 }, { "epoch": 0.8486891809451472, "grad_norm": 309.9486389160156, "learning_rate": 7.573565685182166e-07, "loss": 16.8341, "step": 420130 }, { "epoch": 0.848709381577831, "grad_norm": 99.5003433227539, "learning_rate": 7.571718708783948e-07, "loss": 11.3829, "step": 420140 }, { "epoch": 0.8487295822105149, "grad_norm": 501.2671203613281, "learning_rate": 7.569871939176037e-07, "loss": 22.9475, "step": 420150 }, { "epoch": 0.8487497828431987, "grad_norm": 145.5573272705078, "learning_rate": 7.568025376367422e-07, "loss": 12.3609, "step": 420160 }, { "epoch": 0.8487699834758825, "grad_norm": 389.3744201660156, "learning_rate": 7.566179020367098e-07, "loss": 18.8499, "step": 420170 }, { "epoch": 0.8487901841085663, "grad_norm": 376.95501708984375, "learning_rate": 7.564332871184077e-07, "loss": 23.0914, "step": 420180 }, { "epoch": 0.8488103847412501, "grad_norm": 844.72705078125, "learning_rate": 7.562486928827356e-07, "loss": 25.2918, "step": 420190 }, { "epoch": 0.848830585373934, "grad_norm": 556.1143188476562, "learning_rate": 7.560641193305912e-07, "loss": 30.2421, "step": 420200 }, { "epoch": 0.8488507860066177, "grad_norm": 452.9190979003906, "learning_rate": 7.55879566462876e-07, "loss": 20.9866, "step": 420210 }, { "epoch": 0.8488709866393015, "grad_norm": 344.0130920410156, "learning_rate": 7.556950342804908e-07, "loss": 12.773, "step": 420220 }, { "epoch": 0.8488911872719853, "grad_norm": 559.789794921875, "learning_rate": 7.555105227843312e-07, "loss": 22.4848, "step": 420230 }, { "epoch": 0.8489113879046691, "grad_norm": 236.39991760253906, "learning_rate": 7.553260319752986e-07, "loss": 11.7751, "step": 420240 }, { "epoch": 0.848931588537353, "grad_norm": 178.82774353027344, "learning_rate": 7.551415618542928e-07, "loss": 10.9281, "step": 420250 }, { "epoch": 0.8489517891700368, "grad_norm": 416.56451416015625, "learning_rate": 7.549571124222127e-07, "loss": 29.0814, "step": 420260 }, { "epoch": 0.8489719898027206, "grad_norm": 125.81304931640625, "learning_rate": 7.547726836799551e-07, "loss": 13.1673, "step": 420270 }, { "epoch": 0.8489921904354044, "grad_norm": 317.0643615722656, "learning_rate": 7.545882756284212e-07, "loss": 7.6171, "step": 420280 }, { "epoch": 0.8490123910680882, "grad_norm": 523.3759765625, "learning_rate": 7.544038882685112e-07, "loss": 25.3222, "step": 420290 }, { "epoch": 0.849032591700772, "grad_norm": 430.9664306640625, "learning_rate": 7.542195216011188e-07, "loss": 18.5014, "step": 420300 }, { "epoch": 0.8490527923334559, "grad_norm": 462.48175048828125, "learning_rate": 7.540351756271464e-07, "loss": 18.2764, "step": 420310 }, { "epoch": 0.8490729929661397, "grad_norm": 312.3878173828125, "learning_rate": 7.538508503474923e-07, "loss": 19.0678, "step": 420320 }, { "epoch": 0.8490931935988235, "grad_norm": 158.47137451171875, "learning_rate": 7.536665457630544e-07, "loss": 21.0344, "step": 420330 }, { "epoch": 0.8491133942315073, "grad_norm": 358.35675048828125, "learning_rate": 7.534822618747289e-07, "loss": 33.1354, "step": 420340 }, { "epoch": 0.8491335948641912, "grad_norm": 539.3812255859375, "learning_rate": 7.532979986834177e-07, "loss": 29.5604, "step": 420350 }, { "epoch": 0.849153795496875, "grad_norm": 234.95236206054688, "learning_rate": 7.53113756190017e-07, "loss": 10.4691, "step": 420360 }, { "epoch": 0.8491739961295588, "grad_norm": 205.41302490234375, "learning_rate": 7.529295343954229e-07, "loss": 9.7857, "step": 420370 }, { "epoch": 0.8491941967622426, "grad_norm": 466.1488037109375, "learning_rate": 7.527453333005368e-07, "loss": 16.7474, "step": 420380 }, { "epoch": 0.8492143973949264, "grad_norm": 1036.1993408203125, "learning_rate": 7.525611529062538e-07, "loss": 24.6821, "step": 420390 }, { "epoch": 0.8492345980276103, "grad_norm": 448.77362060546875, "learning_rate": 7.523769932134739e-07, "loss": 17.0026, "step": 420400 }, { "epoch": 0.8492547986602941, "grad_norm": 88.42417907714844, "learning_rate": 7.521928542230916e-07, "loss": 21.489, "step": 420410 }, { "epoch": 0.8492749992929779, "grad_norm": 304.2235412597656, "learning_rate": 7.520087359360073e-07, "loss": 6.9169, "step": 420420 }, { "epoch": 0.8492951999256617, "grad_norm": 195.1634063720703, "learning_rate": 7.51824638353118e-07, "loss": 17.1502, "step": 420430 }, { "epoch": 0.8493154005583455, "grad_norm": 17.28790855407715, "learning_rate": 7.51640561475318e-07, "loss": 16.0534, "step": 420440 }, { "epoch": 0.8493356011910294, "grad_norm": 185.24879455566406, "learning_rate": 7.514565053035083e-07, "loss": 10.7844, "step": 420450 }, { "epoch": 0.8493558018237131, "grad_norm": 467.2895202636719, "learning_rate": 7.512724698385831e-07, "loss": 12.9716, "step": 420460 }, { "epoch": 0.8493760024563969, "grad_norm": 195.4284210205078, "learning_rate": 7.510884550814418e-07, "loss": 13.6663, "step": 420470 }, { "epoch": 0.8493962030890807, "grad_norm": 298.0000915527344, "learning_rate": 7.509044610329803e-07, "loss": 29.8925, "step": 420480 }, { "epoch": 0.8494164037217645, "grad_norm": 297.3729248046875, "learning_rate": 7.507204876940938e-07, "loss": 13.3852, "step": 420490 }, { "epoch": 0.8494366043544483, "grad_norm": 187.8828887939453, "learning_rate": 7.505365350656813e-07, "loss": 11.998, "step": 420500 }, { "epoch": 0.8494568049871322, "grad_norm": 617.66552734375, "learning_rate": 7.50352603148638e-07, "loss": 25.929, "step": 420510 }, { "epoch": 0.849477005619816, "grad_norm": 349.9392395019531, "learning_rate": 7.5016869194386e-07, "loss": 14.9932, "step": 420520 }, { "epoch": 0.8494972062524998, "grad_norm": 259.2439270019531, "learning_rate": 7.499848014522443e-07, "loss": 19.1879, "step": 420530 }, { "epoch": 0.8495174068851836, "grad_norm": 257.3936767578125, "learning_rate": 7.498009316746879e-07, "loss": 30.295, "step": 420540 }, { "epoch": 0.8495376075178674, "grad_norm": 373.472900390625, "learning_rate": 7.496170826120869e-07, "loss": 19.4068, "step": 420550 }, { "epoch": 0.8495578081505513, "grad_norm": 835.5751953125, "learning_rate": 7.494332542653349e-07, "loss": 18.3841, "step": 420560 }, { "epoch": 0.8495780087832351, "grad_norm": 344.0382080078125, "learning_rate": 7.492494466353317e-07, "loss": 25.2305, "step": 420570 }, { "epoch": 0.8495982094159189, "grad_norm": 439.5791320800781, "learning_rate": 7.490656597229707e-07, "loss": 34.9259, "step": 420580 }, { "epoch": 0.8496184100486027, "grad_norm": 294.4809875488281, "learning_rate": 7.488818935291465e-07, "loss": 26.3275, "step": 420590 }, { "epoch": 0.8496386106812865, "grad_norm": 312.11370849609375, "learning_rate": 7.486981480547567e-07, "loss": 30.0317, "step": 420600 }, { "epoch": 0.8496588113139704, "grad_norm": 177.1890106201172, "learning_rate": 7.48514423300698e-07, "loss": 21.0186, "step": 420610 }, { "epoch": 0.8496790119466542, "grad_norm": 83.07833862304688, "learning_rate": 7.48330719267864e-07, "loss": 9.8696, "step": 420620 }, { "epoch": 0.849699212579338, "grad_norm": 548.93310546875, "learning_rate": 7.481470359571497e-07, "loss": 14.6292, "step": 420630 }, { "epoch": 0.8497194132120218, "grad_norm": 274.3866882324219, "learning_rate": 7.479633733694519e-07, "loss": 14.8856, "step": 420640 }, { "epoch": 0.8497396138447056, "grad_norm": 734.2679443359375, "learning_rate": 7.477797315056645e-07, "loss": 22.1115, "step": 420650 }, { "epoch": 0.8497598144773895, "grad_norm": 447.2192687988281, "learning_rate": 7.475961103666824e-07, "loss": 17.7805, "step": 420660 }, { "epoch": 0.8497800151100733, "grad_norm": 216.69761657714844, "learning_rate": 7.474125099534019e-07, "loss": 18.5491, "step": 420670 }, { "epoch": 0.8498002157427571, "grad_norm": 214.6820831298828, "learning_rate": 7.472289302667163e-07, "loss": 13.082, "step": 420680 }, { "epoch": 0.8498204163754409, "grad_norm": 361.2976379394531, "learning_rate": 7.470453713075215e-07, "loss": 24.109, "step": 420690 }, { "epoch": 0.8498406170081247, "grad_norm": 281.5863037109375, "learning_rate": 7.468618330767114e-07, "loss": 11.7758, "step": 420700 }, { "epoch": 0.8498608176408086, "grad_norm": 71.94729614257812, "learning_rate": 7.466783155751816e-07, "loss": 14.2478, "step": 420710 }, { "epoch": 0.8498810182734923, "grad_norm": 362.0234375, "learning_rate": 7.464948188038262e-07, "loss": 13.8452, "step": 420720 }, { "epoch": 0.8499012189061761, "grad_norm": 562.4525146484375, "learning_rate": 7.463113427635376e-07, "loss": 23.803, "step": 420730 }, { "epoch": 0.8499214195388599, "grad_norm": 171.87557983398438, "learning_rate": 7.461278874552131e-07, "loss": 7.2911, "step": 420740 }, { "epoch": 0.8499416201715437, "grad_norm": 215.71307373046875, "learning_rate": 7.459444528797438e-07, "loss": 9.4946, "step": 420750 }, { "epoch": 0.8499618208042276, "grad_norm": 240.4515838623047, "learning_rate": 7.457610390380265e-07, "loss": 15.6298, "step": 420760 }, { "epoch": 0.8499820214369114, "grad_norm": 410.1695251464844, "learning_rate": 7.455776459309538e-07, "loss": 15.6956, "step": 420770 }, { "epoch": 0.8500022220695952, "grad_norm": 235.95631408691406, "learning_rate": 7.453942735594189e-07, "loss": 31.0048, "step": 420780 }, { "epoch": 0.850022422702279, "grad_norm": 133.65708923339844, "learning_rate": 7.452109219243175e-07, "loss": 14.8682, "step": 420790 }, { "epoch": 0.8500426233349628, "grad_norm": 217.16836547851562, "learning_rate": 7.450275910265415e-07, "loss": 29.9921, "step": 420800 }, { "epoch": 0.8500628239676467, "grad_norm": 527.6741943359375, "learning_rate": 7.448442808669842e-07, "loss": 15.8221, "step": 420810 }, { "epoch": 0.8500830246003305, "grad_norm": 312.0057373046875, "learning_rate": 7.446609914465397e-07, "loss": 18.7422, "step": 420820 }, { "epoch": 0.8501032252330143, "grad_norm": 212.4395751953125, "learning_rate": 7.444777227661037e-07, "loss": 19.5951, "step": 420830 }, { "epoch": 0.8501234258656981, "grad_norm": 257.22882080078125, "learning_rate": 7.442944748265651e-07, "loss": 16.0408, "step": 420840 }, { "epoch": 0.8501436264983819, "grad_norm": 224.76571655273438, "learning_rate": 7.441112476288187e-07, "loss": 22.7621, "step": 420850 }, { "epoch": 0.8501638271310658, "grad_norm": 111.08911895751953, "learning_rate": 7.439280411737592e-07, "loss": 11.3516, "step": 420860 }, { "epoch": 0.8501840277637496, "grad_norm": 237.11033630371094, "learning_rate": 7.437448554622783e-07, "loss": 6.7287, "step": 420870 }, { "epoch": 0.8502042283964334, "grad_norm": 395.3372497558594, "learning_rate": 7.435616904952675e-07, "loss": 19.3152, "step": 420880 }, { "epoch": 0.8502244290291172, "grad_norm": 480.9623107910156, "learning_rate": 7.433785462736209e-07, "loss": 20.8863, "step": 420890 }, { "epoch": 0.850244629661801, "grad_norm": 382.73583984375, "learning_rate": 7.43195422798233e-07, "loss": 19.1049, "step": 420900 }, { "epoch": 0.8502648302944849, "grad_norm": 1869.8851318359375, "learning_rate": 7.430123200699924e-07, "loss": 32.079, "step": 420910 }, { "epoch": 0.8502850309271687, "grad_norm": 712.2645874023438, "learning_rate": 7.428292380897933e-07, "loss": 18.4824, "step": 420920 }, { "epoch": 0.8503052315598525, "grad_norm": 398.8208312988281, "learning_rate": 7.426461768585291e-07, "loss": 23.8987, "step": 420930 }, { "epoch": 0.8503254321925363, "grad_norm": 108.1100082397461, "learning_rate": 7.424631363770912e-07, "loss": 15.4419, "step": 420940 }, { "epoch": 0.8503456328252201, "grad_norm": 335.90740966796875, "learning_rate": 7.422801166463706e-07, "loss": 20.4621, "step": 420950 }, { "epoch": 0.850365833457904, "grad_norm": 288.9495544433594, "learning_rate": 7.420971176672614e-07, "loss": 17.5241, "step": 420960 }, { "epoch": 0.8503860340905878, "grad_norm": 348.71038818359375, "learning_rate": 7.419141394406543e-07, "loss": 18.832, "step": 420970 }, { "epoch": 0.8504062347232715, "grad_norm": 540.66650390625, "learning_rate": 7.4173118196744e-07, "loss": 33.4038, "step": 420980 }, { "epoch": 0.8504264353559553, "grad_norm": 284.1331787109375, "learning_rate": 7.415482452485129e-07, "loss": 21.0569, "step": 420990 }, { "epoch": 0.8504466359886391, "grad_norm": 199.09298706054688, "learning_rate": 7.413653292847617e-07, "loss": 22.0451, "step": 421000 }, { "epoch": 0.8504668366213229, "grad_norm": 437.1142883300781, "learning_rate": 7.411824340770813e-07, "loss": 21.0575, "step": 421010 }, { "epoch": 0.8504870372540068, "grad_norm": 7.600314617156982, "learning_rate": 7.409995596263591e-07, "loss": 10.5348, "step": 421020 }, { "epoch": 0.8505072378866906, "grad_norm": 411.22314453125, "learning_rate": 7.408167059334897e-07, "loss": 35.8578, "step": 421030 }, { "epoch": 0.8505274385193744, "grad_norm": 206.26797485351562, "learning_rate": 7.40633872999364e-07, "loss": 13.219, "step": 421040 }, { "epoch": 0.8505476391520582, "grad_norm": 343.618896484375, "learning_rate": 7.4045106082487e-07, "loss": 27.9574, "step": 421050 }, { "epoch": 0.850567839784742, "grad_norm": 161.28146362304688, "learning_rate": 7.402682694109026e-07, "loss": 18.7444, "step": 421060 }, { "epoch": 0.8505880404174259, "grad_norm": 341.72161865234375, "learning_rate": 7.4008549875835e-07, "loss": 16.2181, "step": 421070 }, { "epoch": 0.8506082410501097, "grad_norm": 453.3415222167969, "learning_rate": 7.399027488681049e-07, "loss": 28.2804, "step": 421080 }, { "epoch": 0.8506284416827935, "grad_norm": 495.8848876953125, "learning_rate": 7.39720019741057e-07, "loss": 30.7922, "step": 421090 }, { "epoch": 0.8506486423154773, "grad_norm": 277.91143798828125, "learning_rate": 7.395373113780962e-07, "loss": 18.172, "step": 421100 }, { "epoch": 0.8506688429481611, "grad_norm": 302.0050964355469, "learning_rate": 7.393546237801147e-07, "loss": 20.2798, "step": 421110 }, { "epoch": 0.850689043580845, "grad_norm": 3.2582924365997314, "learning_rate": 7.391719569480021e-07, "loss": 13.3446, "step": 421120 }, { "epoch": 0.8507092442135288, "grad_norm": 554.4869384765625, "learning_rate": 7.389893108826473e-07, "loss": 26.9601, "step": 421130 }, { "epoch": 0.8507294448462126, "grad_norm": 320.6636047363281, "learning_rate": 7.388066855849418e-07, "loss": 14.6543, "step": 421140 }, { "epoch": 0.8507496454788964, "grad_norm": 301.8233642578125, "learning_rate": 7.386240810557771e-07, "loss": 25.684, "step": 421150 }, { "epoch": 0.8507698461115802, "grad_norm": 1380.050048828125, "learning_rate": 7.384414972960419e-07, "loss": 27.1782, "step": 421160 }, { "epoch": 0.8507900467442641, "grad_norm": 360.6093444824219, "learning_rate": 7.382589343066243e-07, "loss": 14.7563, "step": 421170 }, { "epoch": 0.8508102473769479, "grad_norm": 667.0159912109375, "learning_rate": 7.380763920884171e-07, "loss": 20.4568, "step": 421180 }, { "epoch": 0.8508304480096317, "grad_norm": 194.67372131347656, "learning_rate": 7.378938706423089e-07, "loss": 13.4816, "step": 421190 }, { "epoch": 0.8508506486423155, "grad_norm": 118.02854919433594, "learning_rate": 7.377113699691879e-07, "loss": 15.1407, "step": 421200 }, { "epoch": 0.8508708492749993, "grad_norm": 446.3256530761719, "learning_rate": 7.375288900699445e-07, "loss": 10.0944, "step": 421210 }, { "epoch": 0.8508910499076832, "grad_norm": 54.94810485839844, "learning_rate": 7.373464309454698e-07, "loss": 22.6813, "step": 421220 }, { "epoch": 0.8509112505403669, "grad_norm": 132.21023559570312, "learning_rate": 7.371639925966512e-07, "loss": 11.2265, "step": 421230 }, { "epoch": 0.8509314511730507, "grad_norm": 949.7929077148438, "learning_rate": 7.369815750243769e-07, "loss": 17.2051, "step": 421240 }, { "epoch": 0.8509516518057345, "grad_norm": 444.0530090332031, "learning_rate": 7.367991782295392e-07, "loss": 23.2181, "step": 421250 }, { "epoch": 0.8509718524384183, "grad_norm": 639.97900390625, "learning_rate": 7.366168022130249e-07, "loss": 24.6395, "step": 421260 }, { "epoch": 0.8509920530711022, "grad_norm": 575.516845703125, "learning_rate": 7.364344469757223e-07, "loss": 21.0052, "step": 421270 }, { "epoch": 0.851012253703786, "grad_norm": 327.63720703125, "learning_rate": 7.362521125185218e-07, "loss": 17.7251, "step": 421280 }, { "epoch": 0.8510324543364698, "grad_norm": 325.7966003417969, "learning_rate": 7.360697988423105e-07, "loss": 13.0203, "step": 421290 }, { "epoch": 0.8510526549691536, "grad_norm": 282.32525634765625, "learning_rate": 7.358875059479792e-07, "loss": 14.0752, "step": 421300 }, { "epoch": 0.8510728556018374, "grad_norm": 528.5604858398438, "learning_rate": 7.357052338364134e-07, "loss": 14.7047, "step": 421310 }, { "epoch": 0.8510930562345213, "grad_norm": 2.1461079120635986, "learning_rate": 7.355229825085047e-07, "loss": 9.7225, "step": 421320 }, { "epoch": 0.8511132568672051, "grad_norm": 432.22021484375, "learning_rate": 7.353407519651395e-07, "loss": 27.3364, "step": 421330 }, { "epoch": 0.8511334574998889, "grad_norm": 551.1565551757812, "learning_rate": 7.351585422072049e-07, "loss": 15.5955, "step": 421340 }, { "epoch": 0.8511536581325727, "grad_norm": 288.9549560546875, "learning_rate": 7.349763532355919e-07, "loss": 14.2593, "step": 421350 }, { "epoch": 0.8511738587652565, "grad_norm": 47.11952209472656, "learning_rate": 7.347941850511853e-07, "loss": 20.5868, "step": 421360 }, { "epoch": 0.8511940593979404, "grad_norm": 447.0443420410156, "learning_rate": 7.34612037654876e-07, "loss": 11.4741, "step": 421370 }, { "epoch": 0.8512142600306242, "grad_norm": 382.4913024902344, "learning_rate": 7.344299110475506e-07, "loss": 21.1025, "step": 421380 }, { "epoch": 0.851234460663308, "grad_norm": 304.9476318359375, "learning_rate": 7.342478052300945e-07, "loss": 21.573, "step": 421390 }, { "epoch": 0.8512546612959918, "grad_norm": 327.41204833984375, "learning_rate": 7.34065720203399e-07, "loss": 16.9658, "step": 421400 }, { "epoch": 0.8512748619286756, "grad_norm": 293.70294189453125, "learning_rate": 7.338836559683493e-07, "loss": 15.7225, "step": 421410 }, { "epoch": 0.8512950625613595, "grad_norm": 349.07061767578125, "learning_rate": 7.337016125258323e-07, "loss": 17.4128, "step": 421420 }, { "epoch": 0.8513152631940433, "grad_norm": 261.398193359375, "learning_rate": 7.335195898767367e-07, "loss": 25.8577, "step": 421430 }, { "epoch": 0.8513354638267271, "grad_norm": 351.88543701171875, "learning_rate": 7.333375880219507e-07, "loss": 52.9905, "step": 421440 }, { "epoch": 0.8513556644594109, "grad_norm": 0.34273359179496765, "learning_rate": 7.33155606962358e-07, "loss": 26.1271, "step": 421450 }, { "epoch": 0.8513758650920947, "grad_norm": 578.9764404296875, "learning_rate": 7.329736466988469e-07, "loss": 11.8796, "step": 421460 }, { "epoch": 0.8513960657247786, "grad_norm": 174.4795684814453, "learning_rate": 7.327917072323065e-07, "loss": 31.6347, "step": 421470 }, { "epoch": 0.8514162663574624, "grad_norm": 111.70514678955078, "learning_rate": 7.326097885636214e-07, "loss": 11.7568, "step": 421480 }, { "epoch": 0.8514364669901461, "grad_norm": 278.8871154785156, "learning_rate": 7.324278906936771e-07, "loss": 24.3871, "step": 421490 }, { "epoch": 0.8514566676228299, "grad_norm": 619.1923828125, "learning_rate": 7.322460136233622e-07, "loss": 29.3718, "step": 421500 }, { "epoch": 0.8514768682555137, "grad_norm": 228.79319763183594, "learning_rate": 7.320641573535647e-07, "loss": 19.2691, "step": 421510 }, { "epoch": 0.8514970688881975, "grad_norm": 413.57049560546875, "learning_rate": 7.318823218851668e-07, "loss": 32.7849, "step": 421520 }, { "epoch": 0.8515172695208814, "grad_norm": 396.7378845214844, "learning_rate": 7.31700507219057e-07, "loss": 20.0575, "step": 421530 }, { "epoch": 0.8515374701535652, "grad_norm": 375.3517761230469, "learning_rate": 7.315187133561219e-07, "loss": 19.0317, "step": 421540 }, { "epoch": 0.851557670786249, "grad_norm": 37.742340087890625, "learning_rate": 7.31336940297247e-07, "loss": 20.6924, "step": 421550 }, { "epoch": 0.8515778714189328, "grad_norm": 427.2479248046875, "learning_rate": 7.311551880433171e-07, "loss": 13.0093, "step": 421560 }, { "epoch": 0.8515980720516166, "grad_norm": 529.8531494140625, "learning_rate": 7.309734565952198e-07, "loss": 16.0609, "step": 421570 }, { "epoch": 0.8516182726843005, "grad_norm": 294.2206726074219, "learning_rate": 7.307917459538405e-07, "loss": 22.9379, "step": 421580 }, { "epoch": 0.8516384733169843, "grad_norm": 397.2265625, "learning_rate": 7.30610056120063e-07, "loss": 24.8546, "step": 421590 }, { "epoch": 0.8516586739496681, "grad_norm": 20.17185401916504, "learning_rate": 7.304283870947748e-07, "loss": 17.733, "step": 421600 }, { "epoch": 0.8516788745823519, "grad_norm": 473.6755065917969, "learning_rate": 7.302467388788614e-07, "loss": 23.9961, "step": 421610 }, { "epoch": 0.8516990752150357, "grad_norm": 362.61431884765625, "learning_rate": 7.300651114732077e-07, "loss": 20.499, "step": 421620 }, { "epoch": 0.8517192758477196, "grad_norm": 176.23777770996094, "learning_rate": 7.298835048786979e-07, "loss": 15.5241, "step": 421630 }, { "epoch": 0.8517394764804034, "grad_norm": 443.8497009277344, "learning_rate": 7.29701919096219e-07, "loss": 17.5838, "step": 421640 }, { "epoch": 0.8517596771130872, "grad_norm": 549.1595458984375, "learning_rate": 7.295203541266549e-07, "loss": 21.9836, "step": 421650 }, { "epoch": 0.851779877745771, "grad_norm": 238.44998168945312, "learning_rate": 7.293388099708892e-07, "loss": 17.0428, "step": 421660 }, { "epoch": 0.8518000783784548, "grad_norm": 224.4907684326172, "learning_rate": 7.291572866298102e-07, "loss": 20.522, "step": 421670 }, { "epoch": 0.8518202790111387, "grad_norm": 39.98009490966797, "learning_rate": 7.289757841042988e-07, "loss": 22.7961, "step": 421680 }, { "epoch": 0.8518404796438225, "grad_norm": 395.55438232421875, "learning_rate": 7.287943023952426e-07, "loss": 14.4075, "step": 421690 }, { "epoch": 0.8518606802765063, "grad_norm": 462.35174560546875, "learning_rate": 7.286128415035249e-07, "loss": 20.4092, "step": 421700 }, { "epoch": 0.8518808809091901, "grad_norm": 342.838623046875, "learning_rate": 7.284314014300292e-07, "loss": 14.6635, "step": 421710 }, { "epoch": 0.8519010815418739, "grad_norm": 402.89068603515625, "learning_rate": 7.282499821756417e-07, "loss": 14.4933, "step": 421720 }, { "epoch": 0.8519212821745578, "grad_norm": 195.26206970214844, "learning_rate": 7.28068583741246e-07, "loss": 25.5276, "step": 421730 }, { "epoch": 0.8519414828072415, "grad_norm": 632.72265625, "learning_rate": 7.278872061277248e-07, "loss": 21.9685, "step": 421740 }, { "epoch": 0.8519616834399253, "grad_norm": 480.2913818359375, "learning_rate": 7.277058493359629e-07, "loss": 26.8477, "step": 421750 }, { "epoch": 0.8519818840726091, "grad_norm": 457.3885192871094, "learning_rate": 7.275245133668457e-07, "loss": 21.4772, "step": 421760 }, { "epoch": 0.8520020847052929, "grad_norm": 304.02252197265625, "learning_rate": 7.273431982212559e-07, "loss": 14.3607, "step": 421770 }, { "epoch": 0.8520222853379767, "grad_norm": 395.3974304199219, "learning_rate": 7.27161903900076e-07, "loss": 14.1178, "step": 421780 }, { "epoch": 0.8520424859706606, "grad_norm": 679.8463134765625, "learning_rate": 7.269806304041915e-07, "loss": 18.9653, "step": 421790 }, { "epoch": 0.8520626866033444, "grad_norm": 689.3289184570312, "learning_rate": 7.267993777344856e-07, "loss": 24.3499, "step": 421800 }, { "epoch": 0.8520828872360282, "grad_norm": 340.630126953125, "learning_rate": 7.266181458918403e-07, "loss": 33.4605, "step": 421810 }, { "epoch": 0.852103087868712, "grad_norm": 313.085205078125, "learning_rate": 7.264369348771394e-07, "loss": 17.1183, "step": 421820 }, { "epoch": 0.8521232885013958, "grad_norm": 342.37451171875, "learning_rate": 7.262557446912693e-07, "loss": 15.2027, "step": 421830 }, { "epoch": 0.8521434891340797, "grad_norm": 316.3703918457031, "learning_rate": 7.260745753351078e-07, "loss": 14.5585, "step": 421840 }, { "epoch": 0.8521636897667635, "grad_norm": 439.4654235839844, "learning_rate": 7.258934268095402e-07, "loss": 14.8989, "step": 421850 }, { "epoch": 0.8521838903994473, "grad_norm": 593.458740234375, "learning_rate": 7.257122991154514e-07, "loss": 26.9237, "step": 421860 }, { "epoch": 0.8522040910321311, "grad_norm": 509.9592590332031, "learning_rate": 7.255311922537217e-07, "loss": 21.6094, "step": 421870 }, { "epoch": 0.852224291664815, "grad_norm": 44.265228271484375, "learning_rate": 7.253501062252338e-07, "loss": 17.5555, "step": 421880 }, { "epoch": 0.8522444922974988, "grad_norm": 115.82080078125, "learning_rate": 7.251690410308726e-07, "loss": 21.4566, "step": 421890 }, { "epoch": 0.8522646929301826, "grad_norm": 234.79441833496094, "learning_rate": 7.249879966715174e-07, "loss": 13.3536, "step": 421900 }, { "epoch": 0.8522848935628664, "grad_norm": 245.02662658691406, "learning_rate": 7.248069731480533e-07, "loss": 11.3074, "step": 421910 }, { "epoch": 0.8523050941955502, "grad_norm": 447.1869812011719, "learning_rate": 7.246259704613606e-07, "loss": 19.349, "step": 421920 }, { "epoch": 0.852325294828234, "grad_norm": 258.03607177734375, "learning_rate": 7.244449886123233e-07, "loss": 20.7277, "step": 421930 }, { "epoch": 0.8523454954609179, "grad_norm": 416.8277282714844, "learning_rate": 7.242640276018226e-07, "loss": 15.8615, "step": 421940 }, { "epoch": 0.8523656960936017, "grad_norm": 9.882548332214355, "learning_rate": 7.240830874307392e-07, "loss": 21.3556, "step": 421950 }, { "epoch": 0.8523858967262855, "grad_norm": 509.8556213378906, "learning_rate": 7.239021680999575e-07, "loss": 18.4977, "step": 421960 }, { "epoch": 0.8524060973589693, "grad_norm": 307.5932922363281, "learning_rate": 7.237212696103568e-07, "loss": 19.4454, "step": 421970 }, { "epoch": 0.8524262979916531, "grad_norm": 57.08131408691406, "learning_rate": 7.235403919628214e-07, "loss": 9.8599, "step": 421980 }, { "epoch": 0.852446498624337, "grad_norm": 234.61434936523438, "learning_rate": 7.233595351582313e-07, "loss": 37.4964, "step": 421990 }, { "epoch": 0.8524666992570207, "grad_norm": 192.13845825195312, "learning_rate": 7.23178699197467e-07, "loss": 19.1351, "step": 422000 }, { "epoch": 0.8524868998897045, "grad_norm": 12.566235542297363, "learning_rate": 7.229978840814122e-07, "loss": 23.8078, "step": 422010 }, { "epoch": 0.8525071005223883, "grad_norm": 335.6422119140625, "learning_rate": 7.228170898109465e-07, "loss": 11.2894, "step": 422020 }, { "epoch": 0.8525273011550721, "grad_norm": 59.3913688659668, "learning_rate": 7.22636316386951e-07, "loss": 17.7019, "step": 422030 }, { "epoch": 0.852547501787756, "grad_norm": 292.2152099609375, "learning_rate": 7.22455563810307e-07, "loss": 18.1457, "step": 422040 }, { "epoch": 0.8525677024204398, "grad_norm": 203.8167266845703, "learning_rate": 7.222748320818984e-07, "loss": 13.9486, "step": 422050 }, { "epoch": 0.8525879030531236, "grad_norm": 192.4440155029297, "learning_rate": 7.220941212026005e-07, "loss": 12.7467, "step": 422060 }, { "epoch": 0.8526081036858074, "grad_norm": 151.43333435058594, "learning_rate": 7.219134311732978e-07, "loss": 17.172, "step": 422070 }, { "epoch": 0.8526283043184912, "grad_norm": 274.3697509765625, "learning_rate": 7.217327619948705e-07, "loss": 10.654, "step": 422080 }, { "epoch": 0.8526485049511751, "grad_norm": 333.4923400878906, "learning_rate": 7.215521136681997e-07, "loss": 16.1211, "step": 422090 }, { "epoch": 0.8526687055838589, "grad_norm": 509.5408020019531, "learning_rate": 7.213714861941628e-07, "loss": 21.663, "step": 422100 }, { "epoch": 0.8526889062165427, "grad_norm": 252.9277801513672, "learning_rate": 7.211908795736433e-07, "loss": 15.0233, "step": 422110 }, { "epoch": 0.8527091068492265, "grad_norm": 328.42047119140625, "learning_rate": 7.210102938075225e-07, "loss": 14.1027, "step": 422120 }, { "epoch": 0.8527293074819103, "grad_norm": 867.2739868164062, "learning_rate": 7.20829728896676e-07, "loss": 30.1601, "step": 422130 }, { "epoch": 0.8527495081145942, "grad_norm": 230.66310119628906, "learning_rate": 7.206491848419867e-07, "loss": 14.3198, "step": 422140 }, { "epoch": 0.852769708747278, "grad_norm": 273.2495422363281, "learning_rate": 7.204686616443352e-07, "loss": 26.9474, "step": 422150 }, { "epoch": 0.8527899093799618, "grad_norm": 440.524169921875, "learning_rate": 7.202881593046002e-07, "loss": 19.9583, "step": 422160 }, { "epoch": 0.8528101100126456, "grad_norm": 270.63421630859375, "learning_rate": 7.20107677823661e-07, "loss": 26.173, "step": 422170 }, { "epoch": 0.8528303106453294, "grad_norm": 816.7549438476562, "learning_rate": 7.199272172023986e-07, "loss": 30.6517, "step": 422180 }, { "epoch": 0.8528505112780133, "grad_norm": 11.268199920654297, "learning_rate": 7.197467774416921e-07, "loss": 17.9889, "step": 422190 }, { "epoch": 0.8528707119106971, "grad_norm": 447.5383605957031, "learning_rate": 7.195663585424195e-07, "loss": 13.2262, "step": 422200 }, { "epoch": 0.8528909125433809, "grad_norm": 179.54664611816406, "learning_rate": 7.193859605054615e-07, "loss": 16.2462, "step": 422210 }, { "epoch": 0.8529111131760647, "grad_norm": 292.2140197753906, "learning_rate": 7.19205583331698e-07, "loss": 10.8472, "step": 422220 }, { "epoch": 0.8529313138087485, "grad_norm": 346.0629577636719, "learning_rate": 7.190252270220071e-07, "loss": 10.9012, "step": 422230 }, { "epoch": 0.8529515144414324, "grad_norm": 255.02244567871094, "learning_rate": 7.188448915772673e-07, "loss": 11.8099, "step": 422240 }, { "epoch": 0.8529717150741161, "grad_norm": 21.119182586669922, "learning_rate": 7.186645769983591e-07, "loss": 15.2602, "step": 422250 }, { "epoch": 0.8529919157067999, "grad_norm": 140.7800750732422, "learning_rate": 7.18484283286161e-07, "loss": 34.9589, "step": 422260 }, { "epoch": 0.8530121163394837, "grad_norm": 472.4410400390625, "learning_rate": 7.183040104415495e-07, "loss": 13.2531, "step": 422270 }, { "epoch": 0.8530323169721675, "grad_norm": 12.594407081604004, "learning_rate": 7.181237584654066e-07, "loss": 10.1258, "step": 422280 }, { "epoch": 0.8530525176048513, "grad_norm": 415.6765441894531, "learning_rate": 7.179435273586078e-07, "loss": 15.773, "step": 422290 }, { "epoch": 0.8530727182375352, "grad_norm": 65.1563720703125, "learning_rate": 7.177633171220339e-07, "loss": 18.0508, "step": 422300 }, { "epoch": 0.853092918870219, "grad_norm": 0.0, "learning_rate": 7.17583127756562e-07, "loss": 36.8546, "step": 422310 }, { "epoch": 0.8531131195029028, "grad_norm": 316.9919128417969, "learning_rate": 7.1740295926307e-07, "loss": 17.3249, "step": 422320 }, { "epoch": 0.8531333201355866, "grad_norm": 129.97496032714844, "learning_rate": 7.172228116424374e-07, "loss": 25.0414, "step": 422330 }, { "epoch": 0.8531535207682704, "grad_norm": 284.4596252441406, "learning_rate": 7.170426848955408e-07, "loss": 16.1025, "step": 422340 }, { "epoch": 0.8531737214009543, "grad_norm": 640.4921264648438, "learning_rate": 7.168625790232586e-07, "loss": 18.8267, "step": 422350 }, { "epoch": 0.8531939220336381, "grad_norm": 1549.4013671875, "learning_rate": 7.166824940264683e-07, "loss": 28.6623, "step": 422360 }, { "epoch": 0.8532141226663219, "grad_norm": 363.51800537109375, "learning_rate": 7.165024299060486e-07, "loss": 11.5337, "step": 422370 }, { "epoch": 0.8532343232990057, "grad_norm": 534.2193603515625, "learning_rate": 7.163223866628771e-07, "loss": 14.7847, "step": 422380 }, { "epoch": 0.8532545239316895, "grad_norm": 626.86865234375, "learning_rate": 7.161423642978299e-07, "loss": 22.6856, "step": 422390 }, { "epoch": 0.8532747245643734, "grad_norm": 187.61221313476562, "learning_rate": 7.159623628117856e-07, "loss": 13.3871, "step": 422400 }, { "epoch": 0.8532949251970572, "grad_norm": 282.8906555175781, "learning_rate": 7.157823822056214e-07, "loss": 12.2203, "step": 422410 }, { "epoch": 0.853315125829741, "grad_norm": 233.69619750976562, "learning_rate": 7.156024224802139e-07, "loss": 14.9793, "step": 422420 }, { "epoch": 0.8533353264624248, "grad_norm": 729.30517578125, "learning_rate": 7.154224836364398e-07, "loss": 23.6431, "step": 422430 }, { "epoch": 0.8533555270951086, "grad_norm": 443.14154052734375, "learning_rate": 7.152425656751794e-07, "loss": 13.5563, "step": 422440 }, { "epoch": 0.8533757277277925, "grad_norm": 488.0747985839844, "learning_rate": 7.150626685973045e-07, "loss": 16.0828, "step": 422450 }, { "epoch": 0.8533959283604763, "grad_norm": 164.69842529296875, "learning_rate": 7.148827924036944e-07, "loss": 18.4377, "step": 422460 }, { "epoch": 0.8534161289931601, "grad_norm": 345.8693542480469, "learning_rate": 7.147029370952274e-07, "loss": 24.7451, "step": 422470 }, { "epoch": 0.8534363296258439, "grad_norm": 172.44485473632812, "learning_rate": 7.145231026727783e-07, "loss": 19.3036, "step": 422480 }, { "epoch": 0.8534565302585277, "grad_norm": 492.9544372558594, "learning_rate": 7.143432891372226e-07, "loss": 16.763, "step": 422490 }, { "epoch": 0.8534767308912116, "grad_norm": 627.2689208984375, "learning_rate": 7.141634964894389e-07, "loss": 18.8558, "step": 422500 }, { "epoch": 0.8534969315238953, "grad_norm": 461.1217041015625, "learning_rate": 7.139837247303027e-07, "loss": 14.6763, "step": 422510 }, { "epoch": 0.8535171321565791, "grad_norm": 225.9117889404297, "learning_rate": 7.138039738606894e-07, "loss": 33.9146, "step": 422520 }, { "epoch": 0.8535373327892629, "grad_norm": 202.75625610351562, "learning_rate": 7.13624243881475e-07, "loss": 22.2583, "step": 422530 }, { "epoch": 0.8535575334219467, "grad_norm": 566.3533935546875, "learning_rate": 7.134445347935376e-07, "loss": 16.8513, "step": 422540 }, { "epoch": 0.8535777340546306, "grad_norm": 628.0380249023438, "learning_rate": 7.132648465977515e-07, "loss": 13.4277, "step": 422550 }, { "epoch": 0.8535979346873144, "grad_norm": 36.08042526245117, "learning_rate": 7.130851792949916e-07, "loss": 12.6178, "step": 422560 }, { "epoch": 0.8536181353199982, "grad_norm": 211.43484497070312, "learning_rate": 7.129055328861356e-07, "loss": 16.2987, "step": 422570 }, { "epoch": 0.853638335952682, "grad_norm": 52.93839645385742, "learning_rate": 7.127259073720571e-07, "loss": 16.9325, "step": 422580 }, { "epoch": 0.8536585365853658, "grad_norm": 228.2338409423828, "learning_rate": 7.125463027536334e-07, "loss": 24.4949, "step": 422590 }, { "epoch": 0.8536787372180497, "grad_norm": 248.85826110839844, "learning_rate": 7.123667190317396e-07, "loss": 19.4014, "step": 422600 }, { "epoch": 0.8536989378507335, "grad_norm": 218.89491271972656, "learning_rate": 7.121871562072486e-07, "loss": 28.9889, "step": 422610 }, { "epoch": 0.8537191384834173, "grad_norm": 258.9183654785156, "learning_rate": 7.12007614281039e-07, "loss": 23.8125, "step": 422620 }, { "epoch": 0.8537393391161011, "grad_norm": 481.2682800292969, "learning_rate": 7.11828093253984e-07, "loss": 16.0911, "step": 422630 }, { "epoch": 0.8537595397487849, "grad_norm": 599.0542602539062, "learning_rate": 7.116485931269573e-07, "loss": 16.99, "step": 422640 }, { "epoch": 0.8537797403814688, "grad_norm": 411.8247375488281, "learning_rate": 7.114691139008356e-07, "loss": 13.0052, "step": 422650 }, { "epoch": 0.8537999410141526, "grad_norm": 409.2203674316406, "learning_rate": 7.112896555764943e-07, "loss": 13.5062, "step": 422660 }, { "epoch": 0.8538201416468364, "grad_norm": 386.6644592285156, "learning_rate": 7.111102181548074e-07, "loss": 22.6078, "step": 422670 }, { "epoch": 0.8538403422795202, "grad_norm": 340.1001281738281, "learning_rate": 7.109308016366473e-07, "loss": 10.6364, "step": 422680 }, { "epoch": 0.853860542912204, "grad_norm": 299.4497375488281, "learning_rate": 7.107514060228921e-07, "loss": 18.9434, "step": 422690 }, { "epoch": 0.8538807435448879, "grad_norm": 461.1625671386719, "learning_rate": 7.105720313144143e-07, "loss": 9.3554, "step": 422700 }, { "epoch": 0.8539009441775717, "grad_norm": 300.0633544921875, "learning_rate": 7.103926775120867e-07, "loss": 33.4887, "step": 422710 }, { "epoch": 0.8539211448102555, "grad_norm": 314.12481689453125, "learning_rate": 7.102133446167847e-07, "loss": 18.0671, "step": 422720 }, { "epoch": 0.8539413454429393, "grad_norm": 172.38677978515625, "learning_rate": 7.100340326293853e-07, "loss": 9.6837, "step": 422730 }, { "epoch": 0.8539615460756231, "grad_norm": 89.95143127441406, "learning_rate": 7.098547415507572e-07, "loss": 19.3794, "step": 422740 }, { "epoch": 0.853981746708307, "grad_norm": 269.31787109375, "learning_rate": 7.096754713817771e-07, "loss": 21.4956, "step": 422750 }, { "epoch": 0.8540019473409908, "grad_norm": 4.7827067375183105, "learning_rate": 7.094962221233192e-07, "loss": 12.2086, "step": 422760 }, { "epoch": 0.8540221479736745, "grad_norm": 681.1311645507812, "learning_rate": 7.093169937762562e-07, "loss": 18.2227, "step": 422770 }, { "epoch": 0.8540423486063583, "grad_norm": 109.95421600341797, "learning_rate": 7.091377863414611e-07, "loss": 14.1634, "step": 422780 }, { "epoch": 0.8540625492390421, "grad_norm": 179.98135375976562, "learning_rate": 7.08958599819809e-07, "loss": 15.3122, "step": 422790 }, { "epoch": 0.8540827498717259, "grad_norm": 210.8217315673828, "learning_rate": 7.087794342121724e-07, "loss": 10.1048, "step": 422800 }, { "epoch": 0.8541029505044098, "grad_norm": 285.1670837402344, "learning_rate": 7.086002895194227e-07, "loss": 15.6775, "step": 422810 }, { "epoch": 0.8541231511370936, "grad_norm": 824.2320556640625, "learning_rate": 7.08421165742435e-07, "loss": 26.6279, "step": 422820 }, { "epoch": 0.8541433517697774, "grad_norm": 56.3100471496582, "learning_rate": 7.08242062882083e-07, "loss": 21.4873, "step": 422830 }, { "epoch": 0.8541635524024612, "grad_norm": 541.4666137695312, "learning_rate": 7.080629809392392e-07, "loss": 15.3471, "step": 422840 }, { "epoch": 0.854183753035145, "grad_norm": 333.91851806640625, "learning_rate": 7.078839199147741e-07, "loss": 22.5128, "step": 422850 }, { "epoch": 0.8542039536678289, "grad_norm": 545.7657470703125, "learning_rate": 7.077048798095637e-07, "loss": 28.4918, "step": 422860 }, { "epoch": 0.8542241543005127, "grad_norm": 0.6666960716247559, "learning_rate": 7.07525860624479e-07, "loss": 15.1818, "step": 422870 }, { "epoch": 0.8542443549331965, "grad_norm": 512.3268432617188, "learning_rate": 7.073468623603918e-07, "loss": 18.1457, "step": 422880 }, { "epoch": 0.8542645555658803, "grad_norm": 398.10882568359375, "learning_rate": 7.071678850181762e-07, "loss": 23.5213, "step": 422890 }, { "epoch": 0.8542847561985641, "grad_norm": 26.361709594726562, "learning_rate": 7.069889285987025e-07, "loss": 24.1029, "step": 422900 }, { "epoch": 0.854304956831248, "grad_norm": 99.04013061523438, "learning_rate": 7.068099931028449e-07, "loss": 14.5572, "step": 422910 }, { "epoch": 0.8543251574639318, "grad_norm": 487.67059326171875, "learning_rate": 7.066310785314756e-07, "loss": 14.2184, "step": 422920 }, { "epoch": 0.8543453580966156, "grad_norm": 779.5738525390625, "learning_rate": 7.064521848854639e-07, "loss": 41.2217, "step": 422930 }, { "epoch": 0.8543655587292994, "grad_norm": 250.2709503173828, "learning_rate": 7.062733121656845e-07, "loss": 17.1406, "step": 422940 }, { "epoch": 0.8543857593619832, "grad_norm": 452.2339782714844, "learning_rate": 7.060944603730086e-07, "loss": 11.8047, "step": 422950 }, { "epoch": 0.8544059599946671, "grad_norm": 194.9429168701172, "learning_rate": 7.059156295083064e-07, "loss": 16.9779, "step": 422960 }, { "epoch": 0.8544261606273509, "grad_norm": 0.0, "learning_rate": 7.057368195724506e-07, "loss": 25.6859, "step": 422970 }, { "epoch": 0.8544463612600347, "grad_norm": 120.65647888183594, "learning_rate": 7.055580305663135e-07, "loss": 12.0228, "step": 422980 }, { "epoch": 0.8544665618927185, "grad_norm": 79.81140899658203, "learning_rate": 7.053792624907662e-07, "loss": 14.6664, "step": 422990 }, { "epoch": 0.8544867625254023, "grad_norm": 401.9481201171875, "learning_rate": 7.052005153466779e-07, "loss": 20.8853, "step": 423000 }, { "epoch": 0.8545069631580862, "grad_norm": 73.91416931152344, "learning_rate": 7.050217891349226e-07, "loss": 15.3818, "step": 423010 }, { "epoch": 0.8545271637907699, "grad_norm": 1922.633056640625, "learning_rate": 7.048430838563708e-07, "loss": 31.3682, "step": 423020 }, { "epoch": 0.8545473644234537, "grad_norm": 451.913818359375, "learning_rate": 7.046643995118913e-07, "loss": 16.2877, "step": 423030 }, { "epoch": 0.8545675650561375, "grad_norm": 74.22517395019531, "learning_rate": 7.04485736102356e-07, "loss": 27.027, "step": 423040 }, { "epoch": 0.8545877656888213, "grad_norm": 294.8464050292969, "learning_rate": 7.043070936286395e-07, "loss": 11.8094, "step": 423050 }, { "epoch": 0.8546079663215052, "grad_norm": 267.31085205078125, "learning_rate": 7.041284720916064e-07, "loss": 14.3946, "step": 423060 }, { "epoch": 0.854628166954189, "grad_norm": 237.23765563964844, "learning_rate": 7.0394987149213e-07, "loss": 15.9205, "step": 423070 }, { "epoch": 0.8546483675868728, "grad_norm": 169.7945098876953, "learning_rate": 7.037712918310818e-07, "loss": 21.465, "step": 423080 }, { "epoch": 0.8546685682195566, "grad_norm": 1223.9046630859375, "learning_rate": 7.035927331093318e-07, "loss": 20.4954, "step": 423090 }, { "epoch": 0.8546887688522404, "grad_norm": 288.6837463378906, "learning_rate": 7.034141953277484e-07, "loss": 14.3236, "step": 423100 }, { "epoch": 0.8547089694849243, "grad_norm": 198.91371154785156, "learning_rate": 7.032356784872035e-07, "loss": 22.5588, "step": 423110 }, { "epoch": 0.8547291701176081, "grad_norm": 362.96160888671875, "learning_rate": 7.030571825885685e-07, "loss": 12.3301, "step": 423120 }, { "epoch": 0.8547493707502919, "grad_norm": 245.9905242919922, "learning_rate": 7.028787076327093e-07, "loss": 21.1567, "step": 423130 }, { "epoch": 0.8547695713829757, "grad_norm": 548.1031494140625, "learning_rate": 7.027002536204986e-07, "loss": 17.0715, "step": 423140 }, { "epoch": 0.8547897720156595, "grad_norm": 129.6185760498047, "learning_rate": 7.025218205528061e-07, "loss": 13.8482, "step": 423150 }, { "epoch": 0.8548099726483434, "grad_norm": 187.77706909179688, "learning_rate": 7.02343408430502e-07, "loss": 12.9665, "step": 423160 }, { "epoch": 0.8548301732810272, "grad_norm": 544.2252807617188, "learning_rate": 7.021650172544531e-07, "loss": 12.6351, "step": 423170 }, { "epoch": 0.854850373913711, "grad_norm": 582.0858154296875, "learning_rate": 7.019866470255315e-07, "loss": 26.0964, "step": 423180 }, { "epoch": 0.8548705745463948, "grad_norm": 290.2862548828125, "learning_rate": 7.018082977446061e-07, "loss": 12.2002, "step": 423190 }, { "epoch": 0.8548907751790786, "grad_norm": 792.829833984375, "learning_rate": 7.01629969412545e-07, "loss": 17.0466, "step": 423200 }, { "epoch": 0.8549109758117625, "grad_norm": 155.3640594482422, "learning_rate": 7.014516620302186e-07, "loss": 8.8565, "step": 423210 }, { "epoch": 0.8549311764444463, "grad_norm": 230.0448760986328, "learning_rate": 7.012733755984946e-07, "loss": 10.1439, "step": 423220 }, { "epoch": 0.8549513770771301, "grad_norm": 291.56732177734375, "learning_rate": 7.010951101182439e-07, "loss": 17.6992, "step": 423230 }, { "epoch": 0.8549715777098139, "grad_norm": 23.559616088867188, "learning_rate": 7.009168655903342e-07, "loss": 16.6611, "step": 423240 }, { "epoch": 0.8549917783424977, "grad_norm": 102.11821746826172, "learning_rate": 7.007386420156332e-07, "loss": 16.0564, "step": 423250 }, { "epoch": 0.8550119789751816, "grad_norm": 426.19720458984375, "learning_rate": 7.005604393950116e-07, "loss": 8.914, "step": 423260 }, { "epoch": 0.8550321796078654, "grad_norm": 197.2255859375, "learning_rate": 7.003822577293362e-07, "loss": 13.2246, "step": 423270 }, { "epoch": 0.8550523802405491, "grad_norm": 291.4448547363281, "learning_rate": 7.002040970194768e-07, "loss": 13.1481, "step": 423280 }, { "epoch": 0.8550725808732329, "grad_norm": 600.7762451171875, "learning_rate": 7.000259572663004e-07, "loss": 43.9559, "step": 423290 }, { "epoch": 0.8550927815059167, "grad_norm": 267.8164367675781, "learning_rate": 6.99847838470677e-07, "loss": 9.8765, "step": 423300 }, { "epoch": 0.8551129821386005, "grad_norm": 551.2529296875, "learning_rate": 6.996697406334735e-07, "loss": 8.6453, "step": 423310 }, { "epoch": 0.8551331827712844, "grad_norm": 306.5521240234375, "learning_rate": 6.994916637555571e-07, "loss": 15.5206, "step": 423320 }, { "epoch": 0.8551533834039682, "grad_norm": 409.07958984375, "learning_rate": 6.993136078377965e-07, "loss": 11.0522, "step": 423330 }, { "epoch": 0.855173584036652, "grad_norm": 609.0219116210938, "learning_rate": 6.991355728810623e-07, "loss": 29.754, "step": 423340 }, { "epoch": 0.8551937846693358, "grad_norm": 440.0611877441406, "learning_rate": 6.989575588862174e-07, "loss": 19.6599, "step": 423350 }, { "epoch": 0.8552139853020196, "grad_norm": 289.8558044433594, "learning_rate": 6.987795658541319e-07, "loss": 13.4273, "step": 423360 }, { "epoch": 0.8552341859347035, "grad_norm": 370.3302917480469, "learning_rate": 6.986015937856743e-07, "loss": 26.0949, "step": 423370 }, { "epoch": 0.8552543865673873, "grad_norm": 423.3115234375, "learning_rate": 6.984236426817104e-07, "loss": 29.1275, "step": 423380 }, { "epoch": 0.8552745872000711, "grad_norm": 849.1698608398438, "learning_rate": 6.982457125431069e-07, "loss": 33.0457, "step": 423390 }, { "epoch": 0.8552947878327549, "grad_norm": 520.9661254882812, "learning_rate": 6.980678033707333e-07, "loss": 19.85, "step": 423400 }, { "epoch": 0.8553149884654387, "grad_norm": 812.6036376953125, "learning_rate": 6.978899151654556e-07, "loss": 22.3343, "step": 423410 }, { "epoch": 0.8553351890981226, "grad_norm": 95.53800201416016, "learning_rate": 6.977120479281396e-07, "loss": 20.1647, "step": 423420 }, { "epoch": 0.8553553897308064, "grad_norm": 571.2792358398438, "learning_rate": 6.975342016596531e-07, "loss": 24.6136, "step": 423430 }, { "epoch": 0.8553755903634902, "grad_norm": 265.1282653808594, "learning_rate": 6.973563763608643e-07, "loss": 17.2472, "step": 423440 }, { "epoch": 0.855395790996174, "grad_norm": 319.90191650390625, "learning_rate": 6.971785720326385e-07, "loss": 13.9207, "step": 423450 }, { "epoch": 0.8554159916288578, "grad_norm": 482.8645324707031, "learning_rate": 6.970007886758412e-07, "loss": 20.3257, "step": 423460 }, { "epoch": 0.8554361922615417, "grad_norm": 471.61529541015625, "learning_rate": 6.968230262913417e-07, "loss": 21.8656, "step": 423470 }, { "epoch": 0.8554563928942255, "grad_norm": 443.1946716308594, "learning_rate": 6.966452848800043e-07, "loss": 15.9943, "step": 423480 }, { "epoch": 0.8554765935269093, "grad_norm": 578.9497680664062, "learning_rate": 6.964675644426955e-07, "loss": 18.8936, "step": 423490 }, { "epoch": 0.8554967941595931, "grad_norm": 6.638950347900391, "learning_rate": 6.962898649802824e-07, "loss": 17.4078, "step": 423500 }, { "epoch": 0.8555169947922769, "grad_norm": 1091.97998046875, "learning_rate": 6.961121864936294e-07, "loss": 22.4504, "step": 423510 }, { "epoch": 0.8555371954249608, "grad_norm": 532.3468627929688, "learning_rate": 6.95934528983605e-07, "loss": 10.7549, "step": 423520 }, { "epoch": 0.8555573960576445, "grad_norm": 306.2650146484375, "learning_rate": 6.957568924510733e-07, "loss": 11.9596, "step": 423530 }, { "epoch": 0.8555775966903283, "grad_norm": 550.3804931640625, "learning_rate": 6.955792768969e-07, "loss": 18.5798, "step": 423540 }, { "epoch": 0.8555977973230121, "grad_norm": 468.0177307128906, "learning_rate": 6.954016823219517e-07, "loss": 15.8299, "step": 423550 }, { "epoch": 0.8556179979556959, "grad_norm": 265.7972717285156, "learning_rate": 6.952241087270938e-07, "loss": 12.0223, "step": 423560 }, { "epoch": 0.8556381985883798, "grad_norm": 417.89630126953125, "learning_rate": 6.950465561131903e-07, "loss": 18.3307, "step": 423570 }, { "epoch": 0.8556583992210636, "grad_norm": 390.70086669921875, "learning_rate": 6.948690244811079e-07, "loss": 22.9239, "step": 423580 }, { "epoch": 0.8556785998537474, "grad_norm": 357.60009765625, "learning_rate": 6.946915138317129e-07, "loss": 13.8321, "step": 423590 }, { "epoch": 0.8556988004864312, "grad_norm": 292.06292724609375, "learning_rate": 6.945140241658688e-07, "loss": 16.6138, "step": 423600 }, { "epoch": 0.855719001119115, "grad_norm": 231.03797912597656, "learning_rate": 6.943365554844406e-07, "loss": 16.0941, "step": 423610 }, { "epoch": 0.8557392017517989, "grad_norm": 178.90017700195312, "learning_rate": 6.941591077882948e-07, "loss": 26.9284, "step": 423620 }, { "epoch": 0.8557594023844827, "grad_norm": 267.6416015625, "learning_rate": 6.939816810782952e-07, "loss": 22.531, "step": 423630 }, { "epoch": 0.8557796030171665, "grad_norm": 483.8642883300781, "learning_rate": 6.938042753553054e-07, "loss": 33.9005, "step": 423640 }, { "epoch": 0.8557998036498503, "grad_norm": 532.0093383789062, "learning_rate": 6.936268906201915e-07, "loss": 14.0307, "step": 423650 }, { "epoch": 0.8558200042825341, "grad_norm": 672.6396484375, "learning_rate": 6.934495268738195e-07, "loss": 18.4205, "step": 423660 }, { "epoch": 0.855840204915218, "grad_norm": 494.2569274902344, "learning_rate": 6.932721841170503e-07, "loss": 12.0525, "step": 423670 }, { "epoch": 0.8558604055479018, "grad_norm": 488.8106689453125, "learning_rate": 6.930948623507505e-07, "loss": 16.517, "step": 423680 }, { "epoch": 0.8558806061805856, "grad_norm": 716.0003662109375, "learning_rate": 6.92917561575785e-07, "loss": 24.3459, "step": 423690 }, { "epoch": 0.8559008068132694, "grad_norm": 463.7527160644531, "learning_rate": 6.927402817930168e-07, "loss": 17.1598, "step": 423700 }, { "epoch": 0.8559210074459532, "grad_norm": 303.1055908203125, "learning_rate": 6.925630230033087e-07, "loss": 24.3444, "step": 423710 }, { "epoch": 0.855941208078637, "grad_norm": 637.7202758789062, "learning_rate": 6.923857852075261e-07, "loss": 17.3609, "step": 423720 }, { "epoch": 0.8559614087113209, "grad_norm": 456.35491943359375, "learning_rate": 6.922085684065349e-07, "loss": 19.7045, "step": 423730 }, { "epoch": 0.8559816093440047, "grad_norm": 126.94768524169922, "learning_rate": 6.920313726011945e-07, "loss": 6.9527, "step": 423740 }, { "epoch": 0.8560018099766885, "grad_norm": 357.84619140625, "learning_rate": 6.918541977923709e-07, "loss": 23.9845, "step": 423750 }, { "epoch": 0.8560220106093723, "grad_norm": 377.5257873535156, "learning_rate": 6.916770439809283e-07, "loss": 21.3561, "step": 423760 }, { "epoch": 0.8560422112420562, "grad_norm": 135.76699829101562, "learning_rate": 6.914999111677295e-07, "loss": 16.4085, "step": 423770 }, { "epoch": 0.85606241187474, "grad_norm": 181.76840209960938, "learning_rate": 6.913227993536364e-07, "loss": 23.1228, "step": 423780 }, { "epoch": 0.8560826125074237, "grad_norm": 422.5714416503906, "learning_rate": 6.911457085395146e-07, "loss": 13.2301, "step": 423790 }, { "epoch": 0.8561028131401075, "grad_norm": 458.74517822265625, "learning_rate": 6.909686387262255e-07, "loss": 13.2711, "step": 423800 }, { "epoch": 0.8561230137727913, "grad_norm": 173.49359130859375, "learning_rate": 6.907915899146322e-07, "loss": 14.9355, "step": 423810 }, { "epoch": 0.8561432144054751, "grad_norm": 420.94586181640625, "learning_rate": 6.906145621055987e-07, "loss": 15.7464, "step": 423820 }, { "epoch": 0.856163415038159, "grad_norm": 325.2900695800781, "learning_rate": 6.904375552999859e-07, "loss": 14.5921, "step": 423830 }, { "epoch": 0.8561836156708428, "grad_norm": 377.8846130371094, "learning_rate": 6.902605694986592e-07, "loss": 29.0642, "step": 423840 }, { "epoch": 0.8562038163035266, "grad_norm": 424.69622802734375, "learning_rate": 6.9008360470248e-07, "loss": 10.3148, "step": 423850 }, { "epoch": 0.8562240169362104, "grad_norm": 16.294780731201172, "learning_rate": 6.89906660912309e-07, "loss": 20.1662, "step": 423860 }, { "epoch": 0.8562442175688942, "grad_norm": 467.6504821777344, "learning_rate": 6.897297381290113e-07, "loss": 12.0162, "step": 423870 }, { "epoch": 0.8562644182015781, "grad_norm": 90.69451141357422, "learning_rate": 6.895528363534476e-07, "loss": 21.7523, "step": 423880 }, { "epoch": 0.8562846188342619, "grad_norm": 572.353515625, "learning_rate": 6.89375955586481e-07, "loss": 9.6403, "step": 423890 }, { "epoch": 0.8563048194669457, "grad_norm": 426.759765625, "learning_rate": 6.891990958289724e-07, "loss": 16.1095, "step": 423900 }, { "epoch": 0.8563250200996295, "grad_norm": 261.89208984375, "learning_rate": 6.890222570817856e-07, "loss": 19.6324, "step": 423910 }, { "epoch": 0.8563452207323133, "grad_norm": 768.3299560546875, "learning_rate": 6.888454393457817e-07, "loss": 23.239, "step": 423920 }, { "epoch": 0.8563654213649972, "grad_norm": 167.47921752929688, "learning_rate": 6.886686426218209e-07, "loss": 16.6358, "step": 423930 }, { "epoch": 0.856385621997681, "grad_norm": 671.823486328125, "learning_rate": 6.884918669107671e-07, "loss": 14.0608, "step": 423940 }, { "epoch": 0.8564058226303648, "grad_norm": 208.45716857910156, "learning_rate": 6.883151122134812e-07, "loss": 13.198, "step": 423950 }, { "epoch": 0.8564260232630486, "grad_norm": 64.32759857177734, "learning_rate": 6.881383785308232e-07, "loss": 15.9101, "step": 423960 }, { "epoch": 0.8564462238957324, "grad_norm": 501.4256591796875, "learning_rate": 6.879616658636562e-07, "loss": 18.0617, "step": 423970 }, { "epoch": 0.8564664245284163, "grad_norm": 332.1427917480469, "learning_rate": 6.877849742128423e-07, "loss": 15.8026, "step": 423980 }, { "epoch": 0.8564866251611001, "grad_norm": 429.5103759765625, "learning_rate": 6.876083035792408e-07, "loss": 13.0902, "step": 423990 }, { "epoch": 0.8565068257937839, "grad_norm": 616.64208984375, "learning_rate": 6.874316539637127e-07, "loss": 26.6816, "step": 424000 }, { "epoch": 0.8565270264264677, "grad_norm": 420.34564208984375, "learning_rate": 6.872550253671207e-07, "loss": 23.2197, "step": 424010 }, { "epoch": 0.8565472270591515, "grad_norm": 1443.5985107421875, "learning_rate": 6.870784177903244e-07, "loss": 31.7041, "step": 424020 }, { "epoch": 0.8565674276918354, "grad_norm": 152.979248046875, "learning_rate": 6.869018312341841e-07, "loss": 12.8966, "step": 424030 }, { "epoch": 0.8565876283245192, "grad_norm": 279.4204406738281, "learning_rate": 6.86725265699561e-07, "loss": 17.8656, "step": 424040 }, { "epoch": 0.8566078289572029, "grad_norm": 646.5421142578125, "learning_rate": 6.865487211873167e-07, "loss": 19.8595, "step": 424050 }, { "epoch": 0.8566280295898867, "grad_norm": 416.0545654296875, "learning_rate": 6.863721976983112e-07, "loss": 26.8425, "step": 424060 }, { "epoch": 0.8566482302225705, "grad_norm": 922.7435302734375, "learning_rate": 6.861956952334031e-07, "loss": 17.239, "step": 424070 }, { "epoch": 0.8566684308552543, "grad_norm": 240.82968139648438, "learning_rate": 6.860192137934552e-07, "loss": 19.2269, "step": 424080 }, { "epoch": 0.8566886314879382, "grad_norm": 439.35784912109375, "learning_rate": 6.858427533793261e-07, "loss": 13.4926, "step": 424090 }, { "epoch": 0.856708832120622, "grad_norm": 588.150146484375, "learning_rate": 6.856663139918751e-07, "loss": 10.181, "step": 424100 }, { "epoch": 0.8567290327533058, "grad_norm": 288.3674011230469, "learning_rate": 6.854898956319644e-07, "loss": 25.9362, "step": 424110 }, { "epoch": 0.8567492333859896, "grad_norm": 256.4996643066406, "learning_rate": 6.853134983004517e-07, "loss": 7.8227, "step": 424120 }, { "epoch": 0.8567694340186734, "grad_norm": 452.1529846191406, "learning_rate": 6.851371219981989e-07, "loss": 16.0354, "step": 424130 }, { "epoch": 0.8567896346513573, "grad_norm": 287.13458251953125, "learning_rate": 6.849607667260643e-07, "loss": 25.9335, "step": 424140 }, { "epoch": 0.8568098352840411, "grad_norm": 425.8190002441406, "learning_rate": 6.847844324849062e-07, "loss": 40.5554, "step": 424150 }, { "epoch": 0.8568300359167249, "grad_norm": 180.3892822265625, "learning_rate": 6.846081192755871e-07, "loss": 12.232, "step": 424160 }, { "epoch": 0.8568502365494087, "grad_norm": 622.3618774414062, "learning_rate": 6.844318270989631e-07, "loss": 16.899, "step": 424170 }, { "epoch": 0.8568704371820925, "grad_norm": 257.9329528808594, "learning_rate": 6.842555559558961e-07, "loss": 13.4867, "step": 424180 }, { "epoch": 0.8568906378147764, "grad_norm": 330.57879638671875, "learning_rate": 6.840793058472434e-07, "loss": 11.9845, "step": 424190 }, { "epoch": 0.8569108384474602, "grad_norm": 922.0054321289062, "learning_rate": 6.839030767738653e-07, "loss": 19.7079, "step": 424200 }, { "epoch": 0.856931039080144, "grad_norm": 719.37109375, "learning_rate": 6.837268687366199e-07, "loss": 25.35, "step": 424210 }, { "epoch": 0.8569512397128278, "grad_norm": 421.06842041015625, "learning_rate": 6.835506817363657e-07, "loss": 15.7544, "step": 424220 }, { "epoch": 0.8569714403455116, "grad_norm": 64.48709869384766, "learning_rate": 6.83374515773963e-07, "loss": 25.4828, "step": 424230 }, { "epoch": 0.8569916409781955, "grad_norm": 391.7761535644531, "learning_rate": 6.831983708502693e-07, "loss": 13.6241, "step": 424240 }, { "epoch": 0.8570118416108793, "grad_norm": 389.82110595703125, "learning_rate": 6.830222469661419e-07, "loss": 10.5855, "step": 424250 }, { "epoch": 0.8570320422435631, "grad_norm": 359.3149108886719, "learning_rate": 6.828461441224405e-07, "loss": 16.1136, "step": 424260 }, { "epoch": 0.8570522428762469, "grad_norm": 162.98236083984375, "learning_rate": 6.826700623200255e-07, "loss": 12.2284, "step": 424270 }, { "epoch": 0.8570724435089307, "grad_norm": 303.9280090332031, "learning_rate": 6.824940015597514e-07, "loss": 15.9212, "step": 424280 }, { "epoch": 0.8570926441416146, "grad_norm": 93.1552963256836, "learning_rate": 6.823179618424774e-07, "loss": 6.9389, "step": 424290 }, { "epoch": 0.8571128447742983, "grad_norm": 265.16131591796875, "learning_rate": 6.821419431690629e-07, "loss": 12.7053, "step": 424300 }, { "epoch": 0.8571330454069821, "grad_norm": 131.3051300048828, "learning_rate": 6.819659455403654e-07, "loss": 12.4221, "step": 424310 }, { "epoch": 0.8571532460396659, "grad_norm": 738.7040405273438, "learning_rate": 6.817899689572405e-07, "loss": 18.3596, "step": 424320 }, { "epoch": 0.8571734466723497, "grad_norm": 505.9089660644531, "learning_rate": 6.816140134205479e-07, "loss": 17.632, "step": 424330 }, { "epoch": 0.8571936473050336, "grad_norm": 440.5530700683594, "learning_rate": 6.81438078931147e-07, "loss": 14.6219, "step": 424340 }, { "epoch": 0.8572138479377174, "grad_norm": 101.6639404296875, "learning_rate": 6.81262165489891e-07, "loss": 13.628, "step": 424350 }, { "epoch": 0.8572340485704012, "grad_norm": 87.83068084716797, "learning_rate": 6.810862730976392e-07, "loss": 12.6355, "step": 424360 }, { "epoch": 0.857254249203085, "grad_norm": 340.1312561035156, "learning_rate": 6.809104017552503e-07, "loss": 10.5935, "step": 424370 }, { "epoch": 0.8572744498357688, "grad_norm": 155.0587158203125, "learning_rate": 6.807345514635805e-07, "loss": 17.6908, "step": 424380 }, { "epoch": 0.8572946504684527, "grad_norm": 348.400146484375, "learning_rate": 6.80558722223485e-07, "loss": 13.1934, "step": 424390 }, { "epoch": 0.8573148511011365, "grad_norm": 300.3902893066406, "learning_rate": 6.803829140358237e-07, "loss": 20.5829, "step": 424400 }, { "epoch": 0.8573350517338203, "grad_norm": 574.2684326171875, "learning_rate": 6.802071269014527e-07, "loss": 20.713, "step": 424410 }, { "epoch": 0.8573552523665041, "grad_norm": 401.4273376464844, "learning_rate": 6.800313608212261e-07, "loss": 17.8327, "step": 424420 }, { "epoch": 0.8573754529991879, "grad_norm": 308.0999450683594, "learning_rate": 6.798556157960046e-07, "loss": 13.0263, "step": 424430 }, { "epoch": 0.8573956536318718, "grad_norm": 250.98672485351562, "learning_rate": 6.796798918266417e-07, "loss": 21.3625, "step": 424440 }, { "epoch": 0.8574158542645556, "grad_norm": 9.2577543258667, "learning_rate": 6.795041889139958e-07, "loss": 17.9683, "step": 424450 }, { "epoch": 0.8574360548972394, "grad_norm": 317.8941345214844, "learning_rate": 6.793285070589229e-07, "loss": 18.9148, "step": 424460 }, { "epoch": 0.8574562555299232, "grad_norm": 299.7467346191406, "learning_rate": 6.79152846262277e-07, "loss": 29.3304, "step": 424470 }, { "epoch": 0.857476456162607, "grad_norm": 391.6691589355469, "learning_rate": 6.789772065249178e-07, "loss": 12.1778, "step": 424480 }, { "epoch": 0.8574966567952909, "grad_norm": 327.8114929199219, "learning_rate": 6.788015878476983e-07, "loss": 12.3476, "step": 424490 }, { "epoch": 0.8575168574279747, "grad_norm": 172.25738525390625, "learning_rate": 6.786259902314768e-07, "loss": 12.297, "step": 424500 }, { "epoch": 0.8575370580606585, "grad_norm": 308.32952880859375, "learning_rate": 6.784504136771075e-07, "loss": 25.0714, "step": 424510 }, { "epoch": 0.8575572586933423, "grad_norm": 362.46392822265625, "learning_rate": 6.782748581854471e-07, "loss": 14.4293, "step": 424520 }, { "epoch": 0.8575774593260261, "grad_norm": 24.034151077270508, "learning_rate": 6.780993237573513e-07, "loss": 10.6712, "step": 424530 }, { "epoch": 0.85759765995871, "grad_norm": 383.0001220703125, "learning_rate": 6.779238103936742e-07, "loss": 17.6843, "step": 424540 }, { "epoch": 0.8576178605913938, "grad_norm": 189.0929412841797, "learning_rate": 6.777483180952732e-07, "loss": 13.5381, "step": 424550 }, { "epoch": 0.8576380612240775, "grad_norm": 70.52850341796875, "learning_rate": 6.775728468630027e-07, "loss": 20.7291, "step": 424560 }, { "epoch": 0.8576582618567613, "grad_norm": 336.09002685546875, "learning_rate": 6.773973966977165e-07, "loss": 26.8984, "step": 424570 }, { "epoch": 0.8576784624894451, "grad_norm": 489.1164245605469, "learning_rate": 6.772219676002717e-07, "loss": 18.0368, "step": 424580 }, { "epoch": 0.857698663122129, "grad_norm": 230.7914581298828, "learning_rate": 6.770465595715231e-07, "loss": 16.2012, "step": 424590 }, { "epoch": 0.8577188637548128, "grad_norm": 262.0990905761719, "learning_rate": 6.768711726123261e-07, "loss": 12.7744, "step": 424600 }, { "epoch": 0.8577390643874966, "grad_norm": 402.1889343261719, "learning_rate": 6.76695806723533e-07, "loss": 15.6399, "step": 424610 }, { "epoch": 0.8577592650201804, "grad_norm": 267.36810302734375, "learning_rate": 6.765204619060012e-07, "loss": 33.8587, "step": 424620 }, { "epoch": 0.8577794656528642, "grad_norm": 326.3402404785156, "learning_rate": 6.763451381605846e-07, "loss": 15.6711, "step": 424630 }, { "epoch": 0.857799666285548, "grad_norm": 11.367655754089355, "learning_rate": 6.761698354881363e-07, "loss": 43.7316, "step": 424640 }, { "epoch": 0.8578198669182319, "grad_norm": 635.5440673828125, "learning_rate": 6.759945538895119e-07, "loss": 23.3102, "step": 424650 }, { "epoch": 0.8578400675509157, "grad_norm": 849.0969848632812, "learning_rate": 6.758192933655667e-07, "loss": 24.0309, "step": 424660 }, { "epoch": 0.8578602681835995, "grad_norm": 350.1836242675781, "learning_rate": 6.756440539171533e-07, "loss": 17.4814, "step": 424670 }, { "epoch": 0.8578804688162833, "grad_norm": 686.380615234375, "learning_rate": 6.754688355451256e-07, "loss": 16.0547, "step": 424680 }, { "epoch": 0.8579006694489671, "grad_norm": 292.45770263671875, "learning_rate": 6.752936382503394e-07, "loss": 19.9808, "step": 424690 }, { "epoch": 0.857920870081651, "grad_norm": 641.3974609375, "learning_rate": 6.751184620336471e-07, "loss": 21.2562, "step": 424700 }, { "epoch": 0.8579410707143348, "grad_norm": 272.4770202636719, "learning_rate": 6.749433068959022e-07, "loss": 8.5196, "step": 424710 }, { "epoch": 0.8579612713470186, "grad_norm": 308.81781005859375, "learning_rate": 6.747681728379601e-07, "loss": 16.7534, "step": 424720 }, { "epoch": 0.8579814719797024, "grad_norm": 365.3183898925781, "learning_rate": 6.745930598606721e-07, "loss": 27.0763, "step": 424730 }, { "epoch": 0.8580016726123862, "grad_norm": 418.2502746582031, "learning_rate": 6.744179679648943e-07, "loss": 14.3722, "step": 424740 }, { "epoch": 0.8580218732450701, "grad_norm": 421.59576416015625, "learning_rate": 6.742428971514786e-07, "loss": 15.017, "step": 424750 }, { "epoch": 0.8580420738777539, "grad_norm": 524.6824340820312, "learning_rate": 6.74067847421277e-07, "loss": 30.5549, "step": 424760 }, { "epoch": 0.8580622745104377, "grad_norm": 423.94622802734375, "learning_rate": 6.738928187751454e-07, "loss": 17.7713, "step": 424770 }, { "epoch": 0.8580824751431215, "grad_norm": 554.2315063476562, "learning_rate": 6.737178112139342e-07, "loss": 14.8536, "step": 424780 }, { "epoch": 0.8581026757758053, "grad_norm": 244.727294921875, "learning_rate": 6.735428247384989e-07, "loss": 26.6349, "step": 424790 }, { "epoch": 0.8581228764084892, "grad_norm": 278.85321044921875, "learning_rate": 6.733678593496901e-07, "loss": 13.8719, "step": 424800 }, { "epoch": 0.8581430770411729, "grad_norm": 419.3354797363281, "learning_rate": 6.731929150483624e-07, "loss": 13.6345, "step": 424810 }, { "epoch": 0.8581632776738567, "grad_norm": 365.9506530761719, "learning_rate": 6.73017991835368e-07, "loss": 15.2896, "step": 424820 }, { "epoch": 0.8581834783065405, "grad_norm": 290.0882873535156, "learning_rate": 6.728430897115578e-07, "loss": 25.9369, "step": 424830 }, { "epoch": 0.8582036789392243, "grad_norm": 205.87440490722656, "learning_rate": 6.726682086777869e-07, "loss": 14.2771, "step": 424840 }, { "epoch": 0.8582238795719082, "grad_norm": 305.48828125, "learning_rate": 6.724933487349061e-07, "loss": 17.0883, "step": 424850 }, { "epoch": 0.858244080204592, "grad_norm": 160.75222778320312, "learning_rate": 6.723185098837665e-07, "loss": 37.2009, "step": 424860 }, { "epoch": 0.8582642808372758, "grad_norm": 272.7733154296875, "learning_rate": 6.721436921252223e-07, "loss": 31.2173, "step": 424870 }, { "epoch": 0.8582844814699596, "grad_norm": 170.22232055664062, "learning_rate": 6.719688954601266e-07, "loss": 27.0385, "step": 424880 }, { "epoch": 0.8583046821026434, "grad_norm": 419.1109924316406, "learning_rate": 6.717941198893274e-07, "loss": 14.9501, "step": 424890 }, { "epoch": 0.8583248827353273, "grad_norm": 344.0302734375, "learning_rate": 6.716193654136788e-07, "loss": 25.7425, "step": 424900 }, { "epoch": 0.8583450833680111, "grad_norm": 112.04228210449219, "learning_rate": 6.714446320340334e-07, "loss": 11.3657, "step": 424910 }, { "epoch": 0.8583652840006949, "grad_norm": 580.894775390625, "learning_rate": 6.712699197512418e-07, "loss": 11.5984, "step": 424920 }, { "epoch": 0.8583854846333787, "grad_norm": 150.63763427734375, "learning_rate": 6.710952285661549e-07, "loss": 32.7941, "step": 424930 }, { "epoch": 0.8584056852660625, "grad_norm": 569.3262939453125, "learning_rate": 6.709205584796241e-07, "loss": 15.6715, "step": 424940 }, { "epoch": 0.8584258858987464, "grad_norm": 216.56661987304688, "learning_rate": 6.707459094925045e-07, "loss": 10.321, "step": 424950 }, { "epoch": 0.8584460865314302, "grad_norm": 393.1278381347656, "learning_rate": 6.705712816056415e-07, "loss": 16.0812, "step": 424960 }, { "epoch": 0.858466287164114, "grad_norm": 350.8961181640625, "learning_rate": 6.703966748198892e-07, "loss": 12.8694, "step": 424970 }, { "epoch": 0.8584864877967978, "grad_norm": 113.81829833984375, "learning_rate": 6.702220891360994e-07, "loss": 11.2908, "step": 424980 }, { "epoch": 0.8585066884294816, "grad_norm": 11.053807258605957, "learning_rate": 6.700475245551218e-07, "loss": 9.902, "step": 424990 }, { "epoch": 0.8585268890621655, "grad_norm": 425.9871520996094, "learning_rate": 6.698729810778065e-07, "loss": 21.1804, "step": 425000 }, { "epoch": 0.8585470896948493, "grad_norm": 581.9754638671875, "learning_rate": 6.696984587050065e-07, "loss": 12.3986, "step": 425010 }, { "epoch": 0.8585672903275331, "grad_norm": 212.0026092529297, "learning_rate": 6.695239574375706e-07, "loss": 8.5076, "step": 425020 }, { "epoch": 0.8585874909602169, "grad_norm": 72.91810607910156, "learning_rate": 6.693494772763487e-07, "loss": 11.4933, "step": 425030 }, { "epoch": 0.8586076915929007, "grad_norm": 143.1288299560547, "learning_rate": 6.691750182221935e-07, "loss": 20.9185, "step": 425040 }, { "epoch": 0.8586278922255846, "grad_norm": 425.71942138671875, "learning_rate": 6.69000580275953e-07, "loss": 13.3819, "step": 425050 }, { "epoch": 0.8586480928582684, "grad_norm": 21.489431381225586, "learning_rate": 6.688261634384791e-07, "loss": 17.3936, "step": 425060 }, { "epoch": 0.8586682934909521, "grad_norm": 41.76399230957031, "learning_rate": 6.686517677106214e-07, "loss": 13.0918, "step": 425070 }, { "epoch": 0.8586884941236359, "grad_norm": 32.80811309814453, "learning_rate": 6.684773930932281e-07, "loss": 15.1117, "step": 425080 }, { "epoch": 0.8587086947563197, "grad_norm": 69.80337524414062, "learning_rate": 6.683030395871526e-07, "loss": 30.6622, "step": 425090 }, { "epoch": 0.8587288953890035, "grad_norm": 185.21798706054688, "learning_rate": 6.681287071932408e-07, "loss": 22.5686, "step": 425100 }, { "epoch": 0.8587490960216874, "grad_norm": 478.83819580078125, "learning_rate": 6.679543959123458e-07, "loss": 20.441, "step": 425110 }, { "epoch": 0.8587692966543712, "grad_norm": 399.27178955078125, "learning_rate": 6.677801057453143e-07, "loss": 16.9462, "step": 425120 }, { "epoch": 0.858789497287055, "grad_norm": 411.01025390625, "learning_rate": 6.676058366929988e-07, "loss": 11.9585, "step": 425130 }, { "epoch": 0.8588096979197388, "grad_norm": 322.119384765625, "learning_rate": 6.674315887562466e-07, "loss": 16.2444, "step": 425140 }, { "epoch": 0.8588298985524226, "grad_norm": 285.550048828125, "learning_rate": 6.672573619359063e-07, "loss": 21.3185, "step": 425150 }, { "epoch": 0.8588500991851065, "grad_norm": 260.2703857421875, "learning_rate": 6.67083156232829e-07, "loss": 28.233, "step": 425160 }, { "epoch": 0.8588702998177903, "grad_norm": 317.91375732421875, "learning_rate": 6.669089716478627e-07, "loss": 24.1974, "step": 425170 }, { "epoch": 0.8588905004504741, "grad_norm": 188.8263702392578, "learning_rate": 6.667348081818559e-07, "loss": 9.7247, "step": 425180 }, { "epoch": 0.8589107010831579, "grad_norm": 616.5370483398438, "learning_rate": 6.665606658356583e-07, "loss": 16.824, "step": 425190 }, { "epoch": 0.8589309017158417, "grad_norm": 105.1166000366211, "learning_rate": 6.663865446101192e-07, "loss": 18.8522, "step": 425200 }, { "epoch": 0.8589511023485256, "grad_norm": 127.76112365722656, "learning_rate": 6.662124445060863e-07, "loss": 16.4562, "step": 425210 }, { "epoch": 0.8589713029812094, "grad_norm": 267.9668273925781, "learning_rate": 6.660383655244074e-07, "loss": 12.7942, "step": 425220 }, { "epoch": 0.8589915036138932, "grad_norm": 100.9757308959961, "learning_rate": 6.658643076659327e-07, "loss": 19.0906, "step": 425230 }, { "epoch": 0.859011704246577, "grad_norm": 521.11767578125, "learning_rate": 6.6569027093151e-07, "loss": 22.8369, "step": 425240 }, { "epoch": 0.8590319048792608, "grad_norm": 638.5565795898438, "learning_rate": 6.655162553219862e-07, "loss": 32.4391, "step": 425250 }, { "epoch": 0.8590521055119447, "grad_norm": 581.9984741210938, "learning_rate": 6.653422608382105e-07, "loss": 28.779, "step": 425260 }, { "epoch": 0.8590723061446285, "grad_norm": 107.42449951171875, "learning_rate": 6.651682874810317e-07, "loss": 10.0435, "step": 425270 }, { "epoch": 0.8590925067773123, "grad_norm": 673.4070434570312, "learning_rate": 6.649943352512972e-07, "loss": 20.3799, "step": 425280 }, { "epoch": 0.8591127074099961, "grad_norm": 507.18798828125, "learning_rate": 6.648204041498534e-07, "loss": 20.1533, "step": 425290 }, { "epoch": 0.8591329080426799, "grad_norm": 315.055908203125, "learning_rate": 6.646464941775499e-07, "loss": 10.991, "step": 425300 }, { "epoch": 0.8591531086753638, "grad_norm": 265.2852783203125, "learning_rate": 6.64472605335234e-07, "loss": 13.6276, "step": 425310 }, { "epoch": 0.8591733093080475, "grad_norm": 124.01360321044922, "learning_rate": 6.642987376237514e-07, "loss": 29.7952, "step": 425320 }, { "epoch": 0.8591935099407313, "grad_norm": 366.8377685546875, "learning_rate": 6.641248910439518e-07, "loss": 12.5253, "step": 425330 }, { "epoch": 0.8592137105734151, "grad_norm": 379.7523193359375, "learning_rate": 6.639510655966813e-07, "loss": 15.6356, "step": 425340 }, { "epoch": 0.8592339112060989, "grad_norm": 288.5459289550781, "learning_rate": 6.637772612827881e-07, "loss": 13.3753, "step": 425350 }, { "epoch": 0.8592541118387828, "grad_norm": 86.94245147705078, "learning_rate": 6.636034781031181e-07, "loss": 13.9261, "step": 425360 }, { "epoch": 0.8592743124714666, "grad_norm": 391.8909912109375, "learning_rate": 6.634297160585184e-07, "loss": 22.205, "step": 425370 }, { "epoch": 0.8592945131041504, "grad_norm": 401.6841125488281, "learning_rate": 6.632559751498369e-07, "loss": 16.0234, "step": 425380 }, { "epoch": 0.8593147137368342, "grad_norm": 576.306640625, "learning_rate": 6.630822553779193e-07, "loss": 30.8965, "step": 425390 }, { "epoch": 0.859334914369518, "grad_norm": 179.42271423339844, "learning_rate": 6.629085567436133e-07, "loss": 27.6414, "step": 425400 }, { "epoch": 0.8593551150022019, "grad_norm": 441.8272705078125, "learning_rate": 6.627348792477639e-07, "loss": 20.9805, "step": 425410 }, { "epoch": 0.8593753156348857, "grad_norm": 26.4918155670166, "learning_rate": 6.625612228912199e-07, "loss": 26.1555, "step": 425420 }, { "epoch": 0.8593955162675695, "grad_norm": 261.5885009765625, "learning_rate": 6.623875876748265e-07, "loss": 23.7872, "step": 425430 }, { "epoch": 0.8594157169002533, "grad_norm": 585.4600219726562, "learning_rate": 6.622139735994288e-07, "loss": 18.8284, "step": 425440 }, { "epoch": 0.8594359175329371, "grad_norm": 181.7188262939453, "learning_rate": 6.620403806658754e-07, "loss": 8.9154, "step": 425450 }, { "epoch": 0.859456118165621, "grad_norm": 448.3474426269531, "learning_rate": 6.618668088750107e-07, "loss": 23.9844, "step": 425460 }, { "epoch": 0.8594763187983048, "grad_norm": 234.59646606445312, "learning_rate": 6.616932582276798e-07, "loss": 17.3415, "step": 425470 }, { "epoch": 0.8594965194309886, "grad_norm": 122.73247528076172, "learning_rate": 6.615197287247299e-07, "loss": 15.1149, "step": 425480 }, { "epoch": 0.8595167200636724, "grad_norm": 1209.4432373046875, "learning_rate": 6.61346220367009e-07, "loss": 10.0193, "step": 425490 }, { "epoch": 0.8595369206963562, "grad_norm": 872.7515258789062, "learning_rate": 6.611727331553585e-07, "loss": 22.8392, "step": 425500 }, { "epoch": 0.85955712132904, "grad_norm": 473.6070556640625, "learning_rate": 6.609992670906251e-07, "loss": 14.2966, "step": 425510 }, { "epoch": 0.8595773219617239, "grad_norm": 349.6256103515625, "learning_rate": 6.608258221736568e-07, "loss": 18.4509, "step": 425520 }, { "epoch": 0.8595975225944077, "grad_norm": 724.1307983398438, "learning_rate": 6.60652398405297e-07, "loss": 27.9618, "step": 425530 }, { "epoch": 0.8596177232270915, "grad_norm": 521.7205810546875, "learning_rate": 6.604789957863899e-07, "loss": 18.731, "step": 425540 }, { "epoch": 0.8596379238597753, "grad_norm": 652.4680786132812, "learning_rate": 6.603056143177817e-07, "loss": 16.1501, "step": 425550 }, { "epoch": 0.8596581244924592, "grad_norm": 0.0, "learning_rate": 6.601322540003202e-07, "loss": 27.6205, "step": 425560 }, { "epoch": 0.859678325125143, "grad_norm": 201.71774291992188, "learning_rate": 6.599589148348451e-07, "loss": 17.4737, "step": 425570 }, { "epoch": 0.8596985257578267, "grad_norm": 604.082763671875, "learning_rate": 6.597855968222038e-07, "loss": 13.4022, "step": 425580 }, { "epoch": 0.8597187263905105, "grad_norm": 367.8940734863281, "learning_rate": 6.596122999632426e-07, "loss": 21.1846, "step": 425590 }, { "epoch": 0.8597389270231943, "grad_norm": 194.28854370117188, "learning_rate": 6.594390242588044e-07, "loss": 13.4595, "step": 425600 }, { "epoch": 0.8597591276558781, "grad_norm": 5.756259441375732, "learning_rate": 6.592657697097333e-07, "loss": 18.238, "step": 425610 }, { "epoch": 0.859779328288562, "grad_norm": 582.0173950195312, "learning_rate": 6.590925363168749e-07, "loss": 25.401, "step": 425620 }, { "epoch": 0.8597995289212458, "grad_norm": 677.1604614257812, "learning_rate": 6.589193240810732e-07, "loss": 29.9261, "step": 425630 }, { "epoch": 0.8598197295539296, "grad_norm": 0.6434399485588074, "learning_rate": 6.587461330031714e-07, "loss": 14.7036, "step": 425640 }, { "epoch": 0.8598399301866134, "grad_norm": 413.88775634765625, "learning_rate": 6.585729630840149e-07, "loss": 17.6794, "step": 425650 }, { "epoch": 0.8598601308192972, "grad_norm": 194.02024841308594, "learning_rate": 6.583998143244463e-07, "loss": 14.493, "step": 425660 }, { "epoch": 0.8598803314519811, "grad_norm": 146.6978759765625, "learning_rate": 6.582266867253118e-07, "loss": 7.9778, "step": 425670 }, { "epoch": 0.8599005320846649, "grad_norm": 225.89418029785156, "learning_rate": 6.580535802874538e-07, "loss": 15.6481, "step": 425680 }, { "epoch": 0.8599207327173487, "grad_norm": 421.75567626953125, "learning_rate": 6.578804950117146e-07, "loss": 14.5297, "step": 425690 }, { "epoch": 0.8599409333500325, "grad_norm": 206.58993530273438, "learning_rate": 6.577074308989406e-07, "loss": 22.0901, "step": 425700 }, { "epoch": 0.8599611339827163, "grad_norm": 30.304372787475586, "learning_rate": 6.575343879499729e-07, "loss": 17.5016, "step": 425710 }, { "epoch": 0.8599813346154002, "grad_norm": 396.6864318847656, "learning_rate": 6.57361366165657e-07, "loss": 18.5493, "step": 425720 }, { "epoch": 0.860001535248084, "grad_norm": 469.9893493652344, "learning_rate": 6.571883655468336e-07, "loss": 22.4117, "step": 425730 }, { "epoch": 0.8600217358807678, "grad_norm": 226.0044708251953, "learning_rate": 6.57015386094349e-07, "loss": 20.228, "step": 425740 }, { "epoch": 0.8600419365134516, "grad_norm": 166.0838623046875, "learning_rate": 6.568424278090446e-07, "loss": 16.2481, "step": 425750 }, { "epoch": 0.8600621371461354, "grad_norm": 412.2013854980469, "learning_rate": 6.56669490691762e-07, "loss": 17.67, "step": 425760 }, { "epoch": 0.8600823377788193, "grad_norm": 566.22900390625, "learning_rate": 6.564965747433472e-07, "loss": 31.0132, "step": 425770 }, { "epoch": 0.8601025384115031, "grad_norm": 529.25732421875, "learning_rate": 6.563236799646405e-07, "loss": 15.0801, "step": 425780 }, { "epoch": 0.8601227390441869, "grad_norm": 350.4403381347656, "learning_rate": 6.561508063564847e-07, "loss": 13.9268, "step": 425790 }, { "epoch": 0.8601429396768707, "grad_norm": 297.8817138671875, "learning_rate": 6.559779539197231e-07, "loss": 30.1499, "step": 425800 }, { "epoch": 0.8601631403095545, "grad_norm": 724.6544189453125, "learning_rate": 6.558051226551992e-07, "loss": 19.1858, "step": 425810 }, { "epoch": 0.8601833409422384, "grad_norm": 423.3988952636719, "learning_rate": 6.556323125637542e-07, "loss": 12.4289, "step": 425820 }, { "epoch": 0.8602035415749222, "grad_norm": 3.7432050704956055, "learning_rate": 6.554595236462291e-07, "loss": 21.2461, "step": 425830 }, { "epoch": 0.8602237422076059, "grad_norm": 308.4240417480469, "learning_rate": 6.552867559034687e-07, "loss": 39.6005, "step": 425840 }, { "epoch": 0.8602439428402897, "grad_norm": 232.96771240234375, "learning_rate": 6.551140093363135e-07, "loss": 15.7731, "step": 425850 }, { "epoch": 0.8602641434729735, "grad_norm": 339.595458984375, "learning_rate": 6.549412839456048e-07, "loss": 13.9972, "step": 425860 }, { "epoch": 0.8602843441056574, "grad_norm": 749.7958984375, "learning_rate": 6.547685797321851e-07, "loss": 24.7508, "step": 425870 }, { "epoch": 0.8603045447383412, "grad_norm": 102.43689727783203, "learning_rate": 6.545958966968974e-07, "loss": 8.3586, "step": 425880 }, { "epoch": 0.860324745371025, "grad_norm": 429.5927429199219, "learning_rate": 6.544232348405821e-07, "loss": 18.1928, "step": 425890 }, { "epoch": 0.8603449460037088, "grad_norm": 176.30628967285156, "learning_rate": 6.542505941640803e-07, "loss": 25.1116, "step": 425900 }, { "epoch": 0.8603651466363926, "grad_norm": 614.5752563476562, "learning_rate": 6.540779746682346e-07, "loss": 30.6537, "step": 425910 }, { "epoch": 0.8603853472690765, "grad_norm": 484.5924377441406, "learning_rate": 6.53905376353886e-07, "loss": 19.1837, "step": 425920 }, { "epoch": 0.8604055479017603, "grad_norm": 508.8131408691406, "learning_rate": 6.537327992218745e-07, "loss": 10.3871, "step": 425930 }, { "epoch": 0.8604257485344441, "grad_norm": 277.9728088378906, "learning_rate": 6.535602432730432e-07, "loss": 13.4423, "step": 425940 }, { "epoch": 0.8604459491671279, "grad_norm": 355.0103759765625, "learning_rate": 6.533877085082307e-07, "loss": 12.4517, "step": 425950 }, { "epoch": 0.8604661497998117, "grad_norm": 258.963623046875, "learning_rate": 6.532151949282811e-07, "loss": 17.3157, "step": 425960 }, { "epoch": 0.8604863504324956, "grad_norm": 131.1427764892578, "learning_rate": 6.53042702534033e-07, "loss": 11.3277, "step": 425970 }, { "epoch": 0.8605065510651794, "grad_norm": 262.5980529785156, "learning_rate": 6.528702313263264e-07, "loss": 15.6577, "step": 425980 }, { "epoch": 0.8605267516978632, "grad_norm": 415.2807922363281, "learning_rate": 6.526977813060042e-07, "loss": 14.8311, "step": 425990 }, { "epoch": 0.860546952330547, "grad_norm": 277.50701904296875, "learning_rate": 6.52525352473905e-07, "loss": 13.9653, "step": 426000 }, { "epoch": 0.8605671529632308, "grad_norm": 771.4279174804688, "learning_rate": 6.523529448308708e-07, "loss": 18.6443, "step": 426010 }, { "epoch": 0.8605873535959147, "grad_norm": 647.3812255859375, "learning_rate": 6.521805583777396e-07, "loss": 17.5931, "step": 426020 }, { "epoch": 0.8606075542285985, "grad_norm": 317.23895263671875, "learning_rate": 6.520081931153544e-07, "loss": 17.4922, "step": 426030 }, { "epoch": 0.8606277548612823, "grad_norm": 698.1004028320312, "learning_rate": 6.518358490445542e-07, "loss": 19.6825, "step": 426040 }, { "epoch": 0.8606479554939661, "grad_norm": 270.4190979003906, "learning_rate": 6.516635261661775e-07, "loss": 14.7697, "step": 426050 }, { "epoch": 0.8606681561266499, "grad_norm": 208.9061737060547, "learning_rate": 6.514912244810662e-07, "loss": 10.7142, "step": 426060 }, { "epoch": 0.8606883567593338, "grad_norm": 288.39532470703125, "learning_rate": 6.513189439900591e-07, "loss": 16.5989, "step": 426070 }, { "epoch": 0.8607085573920176, "grad_norm": 423.8705139160156, "learning_rate": 6.511466846939956e-07, "loss": 19.8654, "step": 426080 }, { "epoch": 0.8607287580247013, "grad_norm": 144.19081115722656, "learning_rate": 6.509744465937151e-07, "loss": 17.9407, "step": 426090 }, { "epoch": 0.8607489586573851, "grad_norm": 311.5626525878906, "learning_rate": 6.508022296900601e-07, "loss": 22.3135, "step": 426100 }, { "epoch": 0.8607691592900689, "grad_norm": 19.088600158691406, "learning_rate": 6.506300339838656e-07, "loss": 6.1991, "step": 426110 }, { "epoch": 0.8607893599227527, "grad_norm": 200.4326629638672, "learning_rate": 6.504578594759725e-07, "loss": 13.0629, "step": 426120 }, { "epoch": 0.8608095605554366, "grad_norm": 175.73031616210938, "learning_rate": 6.502857061672213e-07, "loss": 24.9918, "step": 426130 }, { "epoch": 0.8608297611881204, "grad_norm": 140.0410919189453, "learning_rate": 6.501135740584502e-07, "loss": 19.302, "step": 426140 }, { "epoch": 0.8608499618208042, "grad_norm": 374.5769348144531, "learning_rate": 6.499414631504969e-07, "loss": 24.6695, "step": 426150 }, { "epoch": 0.860870162453488, "grad_norm": 289.5231018066406, "learning_rate": 6.497693734442007e-07, "loss": 16.6754, "step": 426160 }, { "epoch": 0.8608903630861718, "grad_norm": 633.6494750976562, "learning_rate": 6.495973049404037e-07, "loss": 17.9832, "step": 426170 }, { "epoch": 0.8609105637188557, "grad_norm": 203.72691345214844, "learning_rate": 6.494252576399395e-07, "loss": 16.9029, "step": 426180 }, { "epoch": 0.8609307643515395, "grad_norm": 121.83488464355469, "learning_rate": 6.49253231543649e-07, "loss": 20.0728, "step": 426190 }, { "epoch": 0.8609509649842233, "grad_norm": 61.352237701416016, "learning_rate": 6.490812266523716e-07, "loss": 16.1185, "step": 426200 }, { "epoch": 0.8609711656169071, "grad_norm": 234.11602783203125, "learning_rate": 6.489092429669447e-07, "loss": 14.0938, "step": 426210 }, { "epoch": 0.8609913662495909, "grad_norm": 657.1200561523438, "learning_rate": 6.487372804882053e-07, "loss": 17.6869, "step": 426220 }, { "epoch": 0.8610115668822748, "grad_norm": 588.2548828125, "learning_rate": 6.485653392169938e-07, "loss": 22.2285, "step": 426230 }, { "epoch": 0.8610317675149586, "grad_norm": 148.45741271972656, "learning_rate": 6.483934191541469e-07, "loss": 15.8468, "step": 426240 }, { "epoch": 0.8610519681476424, "grad_norm": 243.9552459716797, "learning_rate": 6.482215203005016e-07, "loss": 18.0027, "step": 426250 }, { "epoch": 0.8610721687803262, "grad_norm": 399.02685546875, "learning_rate": 6.480496426568983e-07, "loss": 33.1014, "step": 426260 }, { "epoch": 0.86109236941301, "grad_norm": 126.6474609375, "learning_rate": 6.478777862241714e-07, "loss": 12.1914, "step": 426270 }, { "epoch": 0.8611125700456939, "grad_norm": 29.05548095703125, "learning_rate": 6.477059510031619e-07, "loss": 18.6576, "step": 426280 }, { "epoch": 0.8611327706783777, "grad_norm": 391.8978271484375, "learning_rate": 6.475341369947047e-07, "loss": 18.8539, "step": 426290 }, { "epoch": 0.8611529713110615, "grad_norm": 301.7601318359375, "learning_rate": 6.47362344199639e-07, "loss": 11.3853, "step": 426300 }, { "epoch": 0.8611731719437453, "grad_norm": 172.89324951171875, "learning_rate": 6.471905726188015e-07, "loss": 15.71, "step": 426310 }, { "epoch": 0.8611933725764291, "grad_norm": 511.9766540527344, "learning_rate": 6.470188222530282e-07, "loss": 17.3988, "step": 426320 }, { "epoch": 0.861213573209113, "grad_norm": 495.0720520019531, "learning_rate": 6.468470931031584e-07, "loss": 26.0315, "step": 426330 }, { "epoch": 0.8612337738417968, "grad_norm": 554.4364624023438, "learning_rate": 6.466753851700264e-07, "loss": 14.4683, "step": 426340 }, { "epoch": 0.8612539744744805, "grad_norm": 499.3392639160156, "learning_rate": 6.465036984544721e-07, "loss": 24.2076, "step": 426350 }, { "epoch": 0.8612741751071643, "grad_norm": 135.09153747558594, "learning_rate": 6.463320329573303e-07, "loss": 9.2683, "step": 426360 }, { "epoch": 0.8612943757398481, "grad_norm": 318.33416748046875, "learning_rate": 6.46160388679437e-07, "loss": 11.1335, "step": 426370 }, { "epoch": 0.861314576372532, "grad_norm": 261.01849365234375, "learning_rate": 6.459887656216318e-07, "loss": 19.9245, "step": 426380 }, { "epoch": 0.8613347770052158, "grad_norm": 608.0852661132812, "learning_rate": 6.458171637847488e-07, "loss": 12.4543, "step": 426390 }, { "epoch": 0.8613549776378996, "grad_norm": 101.21218872070312, "learning_rate": 6.456455831696234e-07, "loss": 18.111, "step": 426400 }, { "epoch": 0.8613751782705834, "grad_norm": 22.630373001098633, "learning_rate": 6.454740237770934e-07, "loss": 19.3144, "step": 426410 }, { "epoch": 0.8613953789032672, "grad_norm": 196.15382385253906, "learning_rate": 6.453024856079976e-07, "loss": 19.9625, "step": 426420 }, { "epoch": 0.861415579535951, "grad_norm": 61.11758804321289, "learning_rate": 6.451309686631668e-07, "loss": 14.8085, "step": 426430 }, { "epoch": 0.8614357801686349, "grad_norm": 243.17691040039062, "learning_rate": 6.449594729434394e-07, "loss": 25.2256, "step": 426440 }, { "epoch": 0.8614559808013187, "grad_norm": 221.3637237548828, "learning_rate": 6.447879984496525e-07, "loss": 24.9462, "step": 426450 }, { "epoch": 0.8614761814340025, "grad_norm": 64.93826293945312, "learning_rate": 6.446165451826409e-07, "loss": 17.2373, "step": 426460 }, { "epoch": 0.8614963820666863, "grad_norm": 665.0004272460938, "learning_rate": 6.444451131432383e-07, "loss": 28.1167, "step": 426470 }, { "epoch": 0.8615165826993701, "grad_norm": 742.8054809570312, "learning_rate": 6.442737023322826e-07, "loss": 17.7511, "step": 426480 }, { "epoch": 0.861536783332054, "grad_norm": 982.496337890625, "learning_rate": 6.441023127506096e-07, "loss": 25.0334, "step": 426490 }, { "epoch": 0.8615569839647378, "grad_norm": 602.4315185546875, "learning_rate": 6.439309443990532e-07, "loss": 20.5195, "step": 426500 }, { "epoch": 0.8615771845974216, "grad_norm": 155.8933868408203, "learning_rate": 6.437595972784483e-07, "loss": 19.0511, "step": 426510 }, { "epoch": 0.8615973852301054, "grad_norm": 141.42478942871094, "learning_rate": 6.435882713896319e-07, "loss": 21.8023, "step": 426520 }, { "epoch": 0.8616175858627892, "grad_norm": 461.2997131347656, "learning_rate": 6.434169667334378e-07, "loss": 16.3485, "step": 426530 }, { "epoch": 0.8616377864954731, "grad_norm": 254.6463623046875, "learning_rate": 6.432456833106998e-07, "loss": 31.786, "step": 426540 }, { "epoch": 0.8616579871281569, "grad_norm": 549.6669921875, "learning_rate": 6.43074421122255e-07, "loss": 15.4421, "step": 426550 }, { "epoch": 0.8616781877608407, "grad_norm": 163.59596252441406, "learning_rate": 6.429031801689362e-07, "loss": 22.5572, "step": 426560 }, { "epoch": 0.8616983883935245, "grad_norm": 173.46153259277344, "learning_rate": 6.427319604515797e-07, "loss": 11.6241, "step": 426570 }, { "epoch": 0.8617185890262083, "grad_norm": 86.23184204101562, "learning_rate": 6.425607619710195e-07, "loss": 14.788, "step": 426580 }, { "epoch": 0.8617387896588922, "grad_norm": 461.0710144042969, "learning_rate": 6.423895847280881e-07, "loss": 14.6491, "step": 426590 }, { "epoch": 0.8617589902915759, "grad_norm": 449.5500793457031, "learning_rate": 6.422184287236227e-07, "loss": 15.9077, "step": 426600 }, { "epoch": 0.8617791909242597, "grad_norm": 248.44351196289062, "learning_rate": 6.420472939584549e-07, "loss": 16.0009, "step": 426610 }, { "epoch": 0.8617993915569435, "grad_norm": 529.0774536132812, "learning_rate": 6.418761804334212e-07, "loss": 23.6131, "step": 426620 }, { "epoch": 0.8618195921896273, "grad_norm": 219.2325897216797, "learning_rate": 6.417050881493536e-07, "loss": 23.5891, "step": 426630 }, { "epoch": 0.8618397928223112, "grad_norm": 363.240234375, "learning_rate": 6.415340171070877e-07, "loss": 13.0453, "step": 426640 }, { "epoch": 0.861859993454995, "grad_norm": 181.5204315185547, "learning_rate": 6.413629673074562e-07, "loss": 10.0992, "step": 426650 }, { "epoch": 0.8618801940876788, "grad_norm": 576.6511840820312, "learning_rate": 6.411919387512922e-07, "loss": 18.3, "step": 426660 }, { "epoch": 0.8619003947203626, "grad_norm": 520.0042724609375, "learning_rate": 6.410209314394305e-07, "loss": 38.815, "step": 426670 }, { "epoch": 0.8619205953530464, "grad_norm": 368.30169677734375, "learning_rate": 6.408499453727046e-07, "loss": 34.8511, "step": 426680 }, { "epoch": 0.8619407959857303, "grad_norm": 88.484619140625, "learning_rate": 6.406789805519464e-07, "loss": 12.4439, "step": 426690 }, { "epoch": 0.8619609966184141, "grad_norm": 312.1562805175781, "learning_rate": 6.405080369779898e-07, "loss": 17.1228, "step": 426700 }, { "epoch": 0.8619811972510979, "grad_norm": 394.21160888671875, "learning_rate": 6.403371146516707e-07, "loss": 21.2892, "step": 426710 }, { "epoch": 0.8620013978837817, "grad_norm": 180.46238708496094, "learning_rate": 6.401662135738174e-07, "loss": 14.238, "step": 426720 }, { "epoch": 0.8620215985164655, "grad_norm": 1012.4619140625, "learning_rate": 6.399953337452652e-07, "loss": 21.8464, "step": 426730 }, { "epoch": 0.8620417991491494, "grad_norm": 408.7126159667969, "learning_rate": 6.398244751668481e-07, "loss": 12.9432, "step": 426740 }, { "epoch": 0.8620619997818332, "grad_norm": 258.2486877441406, "learning_rate": 6.396536378393975e-07, "loss": 30.4977, "step": 426750 }, { "epoch": 0.862082200414517, "grad_norm": 318.1420593261719, "learning_rate": 6.394828217637455e-07, "loss": 13.0637, "step": 426760 }, { "epoch": 0.8621024010472008, "grad_norm": 563.1136474609375, "learning_rate": 6.393120269407249e-07, "loss": 30.0762, "step": 426770 }, { "epoch": 0.8621226016798846, "grad_norm": 844.4476928710938, "learning_rate": 6.391412533711711e-07, "loss": 25.8394, "step": 426780 }, { "epoch": 0.8621428023125685, "grad_norm": 198.59259033203125, "learning_rate": 6.389705010559117e-07, "loss": 15.6251, "step": 426790 }, { "epoch": 0.8621630029452523, "grad_norm": 59.635990142822266, "learning_rate": 6.387997699957815e-07, "loss": 14.6566, "step": 426800 }, { "epoch": 0.8621832035779361, "grad_norm": 131.53367614746094, "learning_rate": 6.386290601916129e-07, "loss": 12.1609, "step": 426810 }, { "epoch": 0.8622034042106199, "grad_norm": 647.2199096679688, "learning_rate": 6.384583716442371e-07, "loss": 28.2555, "step": 426820 }, { "epoch": 0.8622236048433037, "grad_norm": 488.17181396484375, "learning_rate": 6.382877043544855e-07, "loss": 8.1615, "step": 426830 }, { "epoch": 0.8622438054759876, "grad_norm": 1174.1416015625, "learning_rate": 6.381170583231916e-07, "loss": 23.6752, "step": 426840 }, { "epoch": 0.8622640061086714, "grad_norm": 1267.3780517578125, "learning_rate": 6.379464335511859e-07, "loss": 37.9311, "step": 426850 }, { "epoch": 0.8622842067413551, "grad_norm": 1164.790771484375, "learning_rate": 6.377758300392994e-07, "loss": 21.5773, "step": 426860 }, { "epoch": 0.8623044073740389, "grad_norm": 257.47064208984375, "learning_rate": 6.376052477883655e-07, "loss": 18.7969, "step": 426870 }, { "epoch": 0.8623246080067227, "grad_norm": 348.642333984375, "learning_rate": 6.374346867992138e-07, "loss": 10.7251, "step": 426880 }, { "epoch": 0.8623448086394065, "grad_norm": 360.3748779296875, "learning_rate": 6.372641470726765e-07, "loss": 20.3781, "step": 426890 }, { "epoch": 0.8623650092720904, "grad_norm": 258.9471130371094, "learning_rate": 6.370936286095842e-07, "loss": 12.6942, "step": 426900 }, { "epoch": 0.8623852099047742, "grad_norm": 261.1137390136719, "learning_rate": 6.369231314107693e-07, "loss": 19.6193, "step": 426910 }, { "epoch": 0.862405410537458, "grad_norm": 265.93951416015625, "learning_rate": 6.36752655477062e-07, "loss": 24.8202, "step": 426920 }, { "epoch": 0.8624256111701418, "grad_norm": 3038.77490234375, "learning_rate": 6.36582200809292e-07, "loss": 29.0701, "step": 426930 }, { "epoch": 0.8624458118028256, "grad_norm": 328.9877624511719, "learning_rate": 6.36411767408292e-07, "loss": 17.7837, "step": 426940 }, { "epoch": 0.8624660124355095, "grad_norm": 327.4744873046875, "learning_rate": 6.362413552748908e-07, "loss": 33.9574, "step": 426950 }, { "epoch": 0.8624862130681933, "grad_norm": 380.6587219238281, "learning_rate": 6.360709644099211e-07, "loss": 20.3324, "step": 426960 }, { "epoch": 0.8625064137008771, "grad_norm": 335.7801208496094, "learning_rate": 6.359005948142122e-07, "loss": 19.1392, "step": 426970 }, { "epoch": 0.8625266143335609, "grad_norm": 442.7544860839844, "learning_rate": 6.357302464885934e-07, "loss": 15.0711, "step": 426980 }, { "epoch": 0.8625468149662447, "grad_norm": 639.4885864257812, "learning_rate": 6.355599194338974e-07, "loss": 28.2678, "step": 426990 }, { "epoch": 0.8625670155989286, "grad_norm": 170.79458618164062, "learning_rate": 6.353896136509524e-07, "loss": 11.3887, "step": 427000 }, { "epoch": 0.8625872162316124, "grad_norm": 80.47013854980469, "learning_rate": 6.352193291405884e-07, "loss": 9.2106, "step": 427010 }, { "epoch": 0.8626074168642962, "grad_norm": 607.91259765625, "learning_rate": 6.350490659036362e-07, "loss": 23.6392, "step": 427020 }, { "epoch": 0.86262761749698, "grad_norm": 184.9613494873047, "learning_rate": 6.348788239409271e-07, "loss": 11.4782, "step": 427030 }, { "epoch": 0.8626478181296638, "grad_norm": 74.06925201416016, "learning_rate": 6.347086032532873e-07, "loss": 13.4751, "step": 427040 }, { "epoch": 0.8626680187623477, "grad_norm": 476.8874816894531, "learning_rate": 6.345384038415486e-07, "loss": 14.9326, "step": 427050 }, { "epoch": 0.8626882193950315, "grad_norm": 196.14111328125, "learning_rate": 6.343682257065408e-07, "loss": 15.7272, "step": 427060 }, { "epoch": 0.8627084200277153, "grad_norm": 679.0256958007812, "learning_rate": 6.341980688490934e-07, "loss": 38.8679, "step": 427070 }, { "epoch": 0.8627286206603991, "grad_norm": 1150.7294921875, "learning_rate": 6.340279332700333e-07, "loss": 22.0961, "step": 427080 }, { "epoch": 0.862748821293083, "grad_norm": 202.36204528808594, "learning_rate": 6.338578189701921e-07, "loss": 12.8232, "step": 427090 }, { "epoch": 0.8627690219257668, "grad_norm": 753.1251831054688, "learning_rate": 6.336877259504004e-07, "loss": 15.3143, "step": 427100 }, { "epoch": 0.8627892225584506, "grad_norm": 265.86480712890625, "learning_rate": 6.335176542114829e-07, "loss": 21.6381, "step": 427110 }, { "epoch": 0.8628094231911343, "grad_norm": 627.78076171875, "learning_rate": 6.333476037542707e-07, "loss": 20.5898, "step": 427120 }, { "epoch": 0.8628296238238181, "grad_norm": 273.6964416503906, "learning_rate": 6.331775745795937e-07, "loss": 28.021, "step": 427130 }, { "epoch": 0.8628498244565019, "grad_norm": 639.4112548828125, "learning_rate": 6.330075666882795e-07, "loss": 22.1546, "step": 427140 }, { "epoch": 0.8628700250891858, "grad_norm": 623.170166015625, "learning_rate": 6.328375800811559e-07, "loss": 19.3433, "step": 427150 }, { "epoch": 0.8628902257218696, "grad_norm": 471.6022033691406, "learning_rate": 6.326676147590533e-07, "loss": 19.5129, "step": 427160 }, { "epoch": 0.8629104263545534, "grad_norm": 424.69146728515625, "learning_rate": 6.324976707227993e-07, "loss": 6.4254, "step": 427170 }, { "epoch": 0.8629306269872372, "grad_norm": 0.0, "learning_rate": 6.323277479732203e-07, "loss": 13.1277, "step": 427180 }, { "epoch": 0.862950827619921, "grad_norm": 962.615234375, "learning_rate": 6.321578465111478e-07, "loss": 26.358, "step": 427190 }, { "epoch": 0.8629710282526049, "grad_norm": 536.439453125, "learning_rate": 6.319879663374068e-07, "loss": 22.4644, "step": 427200 }, { "epoch": 0.8629912288852887, "grad_norm": 307.97930908203125, "learning_rate": 6.318181074528279e-07, "loss": 26.8442, "step": 427210 }, { "epoch": 0.8630114295179725, "grad_norm": 415.0699768066406, "learning_rate": 6.316482698582365e-07, "loss": 9.4954, "step": 427220 }, { "epoch": 0.8630316301506563, "grad_norm": 51.516761779785156, "learning_rate": 6.314784535544627e-07, "loss": 21.2377, "step": 427230 }, { "epoch": 0.8630518307833401, "grad_norm": 110.73899841308594, "learning_rate": 6.313086585423316e-07, "loss": 14.6737, "step": 427240 }, { "epoch": 0.863072031416024, "grad_norm": 384.1722106933594, "learning_rate": 6.311388848226741e-07, "loss": 21.8881, "step": 427250 }, { "epoch": 0.8630922320487078, "grad_norm": 262.3652648925781, "learning_rate": 6.309691323963152e-07, "loss": 23.4864, "step": 427260 }, { "epoch": 0.8631124326813916, "grad_norm": 3200.091552734375, "learning_rate": 6.307994012640822e-07, "loss": 31.6897, "step": 427270 }, { "epoch": 0.8631326333140754, "grad_norm": 670.214599609375, "learning_rate": 6.30629691426804e-07, "loss": 18.9, "step": 427280 }, { "epoch": 0.8631528339467592, "grad_norm": 600.1300659179688, "learning_rate": 6.304600028853065e-07, "loss": 17.5757, "step": 427290 }, { "epoch": 0.863173034579443, "grad_norm": 689.7635498046875, "learning_rate": 6.302903356404161e-07, "loss": 19.7232, "step": 427300 }, { "epoch": 0.8631932352121269, "grad_norm": 251.3146514892578, "learning_rate": 6.301206896929607e-07, "loss": 18.1964, "step": 427310 }, { "epoch": 0.8632134358448107, "grad_norm": 415.42327880859375, "learning_rate": 6.29951065043769e-07, "loss": 17.6401, "step": 427320 }, { "epoch": 0.8632336364774945, "grad_norm": 302.36083984375, "learning_rate": 6.297814616936637e-07, "loss": 23.2883, "step": 427330 }, { "epoch": 0.8632538371101783, "grad_norm": 387.613037109375, "learning_rate": 6.296118796434735e-07, "loss": 16.3554, "step": 427340 }, { "epoch": 0.8632740377428622, "grad_norm": 283.42108154296875, "learning_rate": 6.294423188940263e-07, "loss": 10.3294, "step": 427350 }, { "epoch": 0.863294238375546, "grad_norm": 270.95489501953125, "learning_rate": 6.292727794461468e-07, "loss": 21.8911, "step": 427360 }, { "epoch": 0.8633144390082297, "grad_norm": 473.91461181640625, "learning_rate": 6.291032613006604e-07, "loss": 23.1282, "step": 427370 }, { "epoch": 0.8633346396409135, "grad_norm": 404.8705749511719, "learning_rate": 6.289337644583949e-07, "loss": 11.5368, "step": 427380 }, { "epoch": 0.8633548402735973, "grad_norm": 357.8258361816406, "learning_rate": 6.287642889201783e-07, "loss": 16.3562, "step": 427390 }, { "epoch": 0.8633750409062811, "grad_norm": 366.5943298339844, "learning_rate": 6.28594834686832e-07, "loss": 16.7686, "step": 427400 }, { "epoch": 0.863395241538965, "grad_norm": 202.032470703125, "learning_rate": 6.284254017591845e-07, "loss": 14.6638, "step": 427410 }, { "epoch": 0.8634154421716488, "grad_norm": 290.3363342285156, "learning_rate": 6.282559901380625e-07, "loss": 15.5777, "step": 427420 }, { "epoch": 0.8634356428043326, "grad_norm": 314.4226989746094, "learning_rate": 6.280865998242908e-07, "loss": 17.3926, "step": 427430 }, { "epoch": 0.8634558434370164, "grad_norm": 367.91033935546875, "learning_rate": 6.279172308186931e-07, "loss": 18.545, "step": 427440 }, { "epoch": 0.8634760440697002, "grad_norm": 329.7330322265625, "learning_rate": 6.277478831220979e-07, "loss": 29.4346, "step": 427450 }, { "epoch": 0.8634962447023841, "grad_norm": 199.14669799804688, "learning_rate": 6.275785567353293e-07, "loss": 13.117, "step": 427460 }, { "epoch": 0.8635164453350679, "grad_norm": 242.6150360107422, "learning_rate": 6.274092516592111e-07, "loss": 8.73, "step": 427470 }, { "epoch": 0.8635366459677517, "grad_norm": 316.0828857421875, "learning_rate": 6.272399678945712e-07, "loss": 10.5379, "step": 427480 }, { "epoch": 0.8635568466004355, "grad_norm": 629.6071166992188, "learning_rate": 6.27070705442232e-07, "loss": 18.8083, "step": 427490 }, { "epoch": 0.8635770472331193, "grad_norm": 554.4690551757812, "learning_rate": 6.269014643030214e-07, "loss": 20.6939, "step": 427500 }, { "epoch": 0.8635972478658032, "grad_norm": 251.9290313720703, "learning_rate": 6.267322444777612e-07, "loss": 13.3097, "step": 427510 }, { "epoch": 0.863617448498487, "grad_norm": 288.9092712402344, "learning_rate": 6.265630459672789e-07, "loss": 9.0264, "step": 427520 }, { "epoch": 0.8636376491311708, "grad_norm": 124.1415786743164, "learning_rate": 6.263938687723981e-07, "loss": 21.6229, "step": 427530 }, { "epoch": 0.8636578497638546, "grad_norm": 357.16436767578125, "learning_rate": 6.262247128939414e-07, "loss": 10.0365, "step": 427540 }, { "epoch": 0.8636780503965384, "grad_norm": 393.6729431152344, "learning_rate": 6.260555783327366e-07, "loss": 19.359, "step": 427550 }, { "epoch": 0.8636982510292223, "grad_norm": 0.5614365339279175, "learning_rate": 6.258864650896051e-07, "loss": 10.1452, "step": 427560 }, { "epoch": 0.8637184516619061, "grad_norm": 841.2181396484375, "learning_rate": 6.257173731653738e-07, "loss": 24.4382, "step": 427570 }, { "epoch": 0.8637386522945899, "grad_norm": 343.8075256347656, "learning_rate": 6.25548302560865e-07, "loss": 24.539, "step": 427580 }, { "epoch": 0.8637588529272737, "grad_norm": 555.2030029296875, "learning_rate": 6.253792532769026e-07, "loss": 12.5351, "step": 427590 }, { "epoch": 0.8637790535599575, "grad_norm": 549.779052734375, "learning_rate": 6.252102253143122e-07, "loss": 26.9366, "step": 427600 }, { "epoch": 0.8637992541926414, "grad_norm": 707.0358276367188, "learning_rate": 6.250412186739163e-07, "loss": 17.0704, "step": 427610 }, { "epoch": 0.8638194548253252, "grad_norm": 679.9314575195312, "learning_rate": 6.248722333565377e-07, "loss": 19.7051, "step": 427620 }, { "epoch": 0.8638396554580089, "grad_norm": 125.01164245605469, "learning_rate": 6.247032693630012e-07, "loss": 24.9688, "step": 427630 }, { "epoch": 0.8638598560906927, "grad_norm": 502.79290771484375, "learning_rate": 6.245343266941328e-07, "loss": 18.026, "step": 427640 }, { "epoch": 0.8638800567233765, "grad_norm": 7.125823020935059, "learning_rate": 6.243654053507515e-07, "loss": 10.2686, "step": 427650 }, { "epoch": 0.8639002573560604, "grad_norm": 999.5502319335938, "learning_rate": 6.241965053336818e-07, "loss": 19.1553, "step": 427660 }, { "epoch": 0.8639204579887442, "grad_norm": 560.1099853515625, "learning_rate": 6.24027626643749e-07, "loss": 19.8785, "step": 427670 }, { "epoch": 0.863940658621428, "grad_norm": 584.9874877929688, "learning_rate": 6.238587692817749e-07, "loss": 12.1768, "step": 427680 }, { "epoch": 0.8639608592541118, "grad_norm": 467.38824462890625, "learning_rate": 6.236899332485813e-07, "loss": 15.8237, "step": 427690 }, { "epoch": 0.8639810598867956, "grad_norm": 1058.6409912109375, "learning_rate": 6.235211185449919e-07, "loss": 14.8201, "step": 427700 }, { "epoch": 0.8640012605194795, "grad_norm": 332.8944091796875, "learning_rate": 6.233523251718321e-07, "loss": 15.9929, "step": 427710 }, { "epoch": 0.8640214611521633, "grad_norm": 237.4763946533203, "learning_rate": 6.231835531299202e-07, "loss": 24.3467, "step": 427720 }, { "epoch": 0.8640416617848471, "grad_norm": 355.6075744628906, "learning_rate": 6.23014802420081e-07, "loss": 19.6161, "step": 427730 }, { "epoch": 0.8640618624175309, "grad_norm": 207.7703399658203, "learning_rate": 6.228460730431374e-07, "loss": 12.1483, "step": 427740 }, { "epoch": 0.8640820630502147, "grad_norm": 235.3800811767578, "learning_rate": 6.226773649999113e-07, "loss": 11.0554, "step": 427750 }, { "epoch": 0.8641022636828986, "grad_norm": 405.9524230957031, "learning_rate": 6.225086782912237e-07, "loss": 21.9891, "step": 427760 }, { "epoch": 0.8641224643155824, "grad_norm": 145.1119842529297, "learning_rate": 6.223400129178992e-07, "loss": 9.3089, "step": 427770 }, { "epoch": 0.8641426649482662, "grad_norm": 275.88665771484375, "learning_rate": 6.221713688807585e-07, "loss": 32.3242, "step": 427780 }, { "epoch": 0.86416286558095, "grad_norm": 425.7492980957031, "learning_rate": 6.220027461806222e-07, "loss": 10.1099, "step": 427790 }, { "epoch": 0.8641830662136338, "grad_norm": 280.409423828125, "learning_rate": 6.218341448183141e-07, "loss": 23.433, "step": 427800 }, { "epoch": 0.8642032668463177, "grad_norm": 453.26947021484375, "learning_rate": 6.216655647946556e-07, "loss": 14.3934, "step": 427810 }, { "epoch": 0.8642234674790015, "grad_norm": 234.53407287597656, "learning_rate": 6.214970061104686e-07, "loss": 29.2459, "step": 427820 }, { "epoch": 0.8642436681116853, "grad_norm": 383.7027587890625, "learning_rate": 6.213284687665733e-07, "loss": 25.2639, "step": 427830 }, { "epoch": 0.8642638687443691, "grad_norm": 490.578125, "learning_rate": 6.21159952763793e-07, "loss": 13.2446, "step": 427840 }, { "epoch": 0.8642840693770529, "grad_norm": 291.77685546875, "learning_rate": 6.209914581029474e-07, "loss": 32.4509, "step": 427850 }, { "epoch": 0.8643042700097368, "grad_norm": 213.53958129882812, "learning_rate": 6.20822984784858e-07, "loss": 17.6725, "step": 427860 }, { "epoch": 0.8643244706424206, "grad_norm": 241.1728057861328, "learning_rate": 6.20654532810347e-07, "loss": 19.7326, "step": 427870 }, { "epoch": 0.8643446712751043, "grad_norm": 178.4251251220703, "learning_rate": 6.204861021802333e-07, "loss": 9.0884, "step": 427880 }, { "epoch": 0.8643648719077881, "grad_norm": 417.46343994140625, "learning_rate": 6.203176928953403e-07, "loss": 17.9481, "step": 427890 }, { "epoch": 0.8643850725404719, "grad_norm": 1527.8167724609375, "learning_rate": 6.201493049564883e-07, "loss": 14.5531, "step": 427900 }, { "epoch": 0.8644052731731557, "grad_norm": 728.8560180664062, "learning_rate": 6.199809383644956e-07, "loss": 12.0885, "step": 427910 }, { "epoch": 0.8644254738058396, "grad_norm": 236.08883666992188, "learning_rate": 6.198125931201848e-07, "loss": 21.4588, "step": 427920 }, { "epoch": 0.8644456744385234, "grad_norm": 260.3927917480469, "learning_rate": 6.196442692243787e-07, "loss": 20.7814, "step": 427930 }, { "epoch": 0.8644658750712072, "grad_norm": 662.6884155273438, "learning_rate": 6.194759666778927e-07, "loss": 19.546, "step": 427940 }, { "epoch": 0.864486075703891, "grad_norm": 499.678466796875, "learning_rate": 6.193076854815494e-07, "loss": 19.0645, "step": 427950 }, { "epoch": 0.8645062763365748, "grad_norm": 537.9235229492188, "learning_rate": 6.191394256361699e-07, "loss": 24.221, "step": 427960 }, { "epoch": 0.8645264769692587, "grad_norm": 395.36358642578125, "learning_rate": 6.189711871425741e-07, "loss": 14.9715, "step": 427970 }, { "epoch": 0.8645466776019425, "grad_norm": 348.86285400390625, "learning_rate": 6.188029700015802e-07, "loss": 13.2189, "step": 427980 }, { "epoch": 0.8645668782346263, "grad_norm": 371.9176330566406, "learning_rate": 6.186347742140092e-07, "loss": 29.0738, "step": 427990 }, { "epoch": 0.8645870788673101, "grad_norm": 878.5059814453125, "learning_rate": 6.184665997806832e-07, "loss": 22.1013, "step": 428000 }, { "epoch": 0.8646072794999939, "grad_norm": 426.17926025390625, "learning_rate": 6.182984467024173e-07, "loss": 19.0282, "step": 428010 }, { "epoch": 0.8646274801326778, "grad_norm": 299.5845031738281, "learning_rate": 6.181303149800333e-07, "loss": 18.7745, "step": 428020 }, { "epoch": 0.8646476807653616, "grad_norm": 98.38887023925781, "learning_rate": 6.179622046143513e-07, "loss": 19.949, "step": 428030 }, { "epoch": 0.8646678813980454, "grad_norm": 340.8511657714844, "learning_rate": 6.177941156061906e-07, "loss": 8.3363, "step": 428040 }, { "epoch": 0.8646880820307292, "grad_norm": 50.793827056884766, "learning_rate": 6.17626047956369e-07, "loss": 15.9434, "step": 428050 }, { "epoch": 0.864708282663413, "grad_norm": 231.07606506347656, "learning_rate": 6.174580016657073e-07, "loss": 13.5634, "step": 428060 }, { "epoch": 0.8647284832960969, "grad_norm": 1061.10302734375, "learning_rate": 6.172899767350238e-07, "loss": 16.6211, "step": 428070 }, { "epoch": 0.8647486839287807, "grad_norm": 386.4588623046875, "learning_rate": 6.171219731651362e-07, "loss": 24.283, "step": 428080 }, { "epoch": 0.8647688845614645, "grad_norm": 548.4069213867188, "learning_rate": 6.169539909568656e-07, "loss": 17.2704, "step": 428090 }, { "epoch": 0.8647890851941483, "grad_norm": 411.5999450683594, "learning_rate": 6.167860301110284e-07, "loss": 20.6446, "step": 428100 }, { "epoch": 0.8648092858268321, "grad_norm": 124.37580871582031, "learning_rate": 6.166180906284458e-07, "loss": 21.915, "step": 428110 }, { "epoch": 0.864829486459516, "grad_norm": 51.3354377746582, "learning_rate": 6.164501725099342e-07, "loss": 20.3813, "step": 428120 }, { "epoch": 0.8648496870921998, "grad_norm": 313.509521484375, "learning_rate": 6.162822757563136e-07, "loss": 16.0153, "step": 428130 }, { "epoch": 0.8648698877248835, "grad_norm": 571.181884765625, "learning_rate": 6.161144003684017e-07, "loss": 21.6709, "step": 428140 }, { "epoch": 0.8648900883575673, "grad_norm": 57.8526496887207, "learning_rate": 6.159465463470149e-07, "loss": 13.1453, "step": 428150 }, { "epoch": 0.8649102889902511, "grad_norm": 282.853515625, "learning_rate": 6.157787136929743e-07, "loss": 13.9466, "step": 428160 }, { "epoch": 0.864930489622935, "grad_norm": 47.31289291381836, "learning_rate": 6.156109024070955e-07, "loss": 19.1847, "step": 428170 }, { "epoch": 0.8649506902556188, "grad_norm": 368.8774719238281, "learning_rate": 6.154431124901983e-07, "loss": 13.2376, "step": 428180 }, { "epoch": 0.8649708908883026, "grad_norm": 938.6478271484375, "learning_rate": 6.152753439430997e-07, "loss": 18.7021, "step": 428190 }, { "epoch": 0.8649910915209864, "grad_norm": 505.9247131347656, "learning_rate": 6.151075967666165e-07, "loss": 21.0578, "step": 428200 }, { "epoch": 0.8650112921536702, "grad_norm": 360.1973571777344, "learning_rate": 6.149398709615678e-07, "loss": 14.0969, "step": 428210 }, { "epoch": 0.865031492786354, "grad_norm": 419.23187255859375, "learning_rate": 6.147721665287703e-07, "loss": 9.2271, "step": 428220 }, { "epoch": 0.8650516934190379, "grad_norm": 2.7760090827941895, "learning_rate": 6.146044834690401e-07, "loss": 10.1077, "step": 428230 }, { "epoch": 0.8650718940517217, "grad_norm": 281.9516906738281, "learning_rate": 6.144368217831965e-07, "loss": 8.2609, "step": 428240 }, { "epoch": 0.8650920946844055, "grad_norm": 337.73345947265625, "learning_rate": 6.142691814720575e-07, "loss": 20.3037, "step": 428250 }, { "epoch": 0.8651122953170893, "grad_norm": 430.0217590332031, "learning_rate": 6.141015625364366e-07, "loss": 11.1121, "step": 428260 }, { "epoch": 0.8651324959497732, "grad_norm": 234.089599609375, "learning_rate": 6.139339649771525e-07, "loss": 16.6557, "step": 428270 }, { "epoch": 0.865152696582457, "grad_norm": 211.0261993408203, "learning_rate": 6.137663887950235e-07, "loss": 13.0847, "step": 428280 }, { "epoch": 0.8651728972151408, "grad_norm": 1035.0299072265625, "learning_rate": 6.135988339908655e-07, "loss": 20.2821, "step": 428290 }, { "epoch": 0.8651930978478246, "grad_norm": 463.2383728027344, "learning_rate": 6.134313005654929e-07, "loss": 17.1917, "step": 428300 }, { "epoch": 0.8652132984805084, "grad_norm": 309.8400573730469, "learning_rate": 6.132637885197251e-07, "loss": 20.0468, "step": 428310 }, { "epoch": 0.8652334991131923, "grad_norm": 226.93862915039062, "learning_rate": 6.130962978543792e-07, "loss": 12.4476, "step": 428320 }, { "epoch": 0.8652536997458761, "grad_norm": 284.1596374511719, "learning_rate": 6.129288285702672e-07, "loss": 13.2608, "step": 428330 }, { "epoch": 0.8652739003785599, "grad_norm": 140.4319610595703, "learning_rate": 6.127613806682087e-07, "loss": 22.1714, "step": 428340 }, { "epoch": 0.8652941010112437, "grad_norm": 279.0960693359375, "learning_rate": 6.1259395414902e-07, "loss": 10.1577, "step": 428350 }, { "epoch": 0.8653143016439275, "grad_norm": 213.5045166015625, "learning_rate": 6.124265490135161e-07, "loss": 18.4615, "step": 428360 }, { "epoch": 0.8653345022766114, "grad_norm": 517.26416015625, "learning_rate": 6.122591652625126e-07, "loss": 14.2572, "step": 428370 }, { "epoch": 0.8653547029092952, "grad_norm": 994.1997680664062, "learning_rate": 6.120918028968265e-07, "loss": 15.3615, "step": 428380 }, { "epoch": 0.8653749035419789, "grad_norm": 991.473876953125, "learning_rate": 6.119244619172727e-07, "loss": 25.2808, "step": 428390 }, { "epoch": 0.8653951041746627, "grad_norm": 304.0759582519531, "learning_rate": 6.117571423246655e-07, "loss": 18.7912, "step": 428400 }, { "epoch": 0.8654153048073465, "grad_norm": 199.3155059814453, "learning_rate": 6.11589844119822e-07, "loss": 8.4047, "step": 428410 }, { "epoch": 0.8654355054400303, "grad_norm": 1094.4544677734375, "learning_rate": 6.114225673035584e-07, "loss": 21.4444, "step": 428420 }, { "epoch": 0.8654557060727142, "grad_norm": 207.91004943847656, "learning_rate": 6.112553118766889e-07, "loss": 25.2106, "step": 428430 }, { "epoch": 0.865475906705398, "grad_norm": 201.0609588623047, "learning_rate": 6.110880778400275e-07, "loss": 17.329, "step": 428440 }, { "epoch": 0.8654961073380818, "grad_norm": 327.4130859375, "learning_rate": 6.109208651943921e-07, "loss": 18.1264, "step": 428450 }, { "epoch": 0.8655163079707656, "grad_norm": 22.62371253967285, "learning_rate": 6.107536739405956e-07, "loss": 17.9029, "step": 428460 }, { "epoch": 0.8655365086034494, "grad_norm": 56.47309875488281, "learning_rate": 6.105865040794523e-07, "loss": 22.641, "step": 428470 }, { "epoch": 0.8655567092361333, "grad_norm": 569.1929321289062, "learning_rate": 6.104193556117793e-07, "loss": 22.5186, "step": 428480 }, { "epoch": 0.8655769098688171, "grad_norm": 389.8046875, "learning_rate": 6.102522285383888e-07, "loss": 10.6147, "step": 428490 }, { "epoch": 0.8655971105015009, "grad_norm": 299.5357360839844, "learning_rate": 6.100851228600974e-07, "loss": 28.5346, "step": 428500 }, { "epoch": 0.8656173111341847, "grad_norm": 633.14501953125, "learning_rate": 6.099180385777192e-07, "loss": 20.1365, "step": 428510 }, { "epoch": 0.8656375117668685, "grad_norm": 677.9397583007812, "learning_rate": 6.097509756920667e-07, "loss": 25.9088, "step": 428520 }, { "epoch": 0.8656577123995524, "grad_norm": 182.68701171875, "learning_rate": 6.095839342039561e-07, "loss": 13.7292, "step": 428530 }, { "epoch": 0.8656779130322362, "grad_norm": 308.7190246582031, "learning_rate": 6.094169141142014e-07, "loss": 17.2095, "step": 428540 }, { "epoch": 0.86569811366492, "grad_norm": 33.39704132080078, "learning_rate": 6.092499154236148e-07, "loss": 14.2919, "step": 428550 }, { "epoch": 0.8657183142976038, "grad_norm": 427.1455383300781, "learning_rate": 6.090829381330116e-07, "loss": 21.8508, "step": 428560 }, { "epoch": 0.8657385149302876, "grad_norm": 329.7239990234375, "learning_rate": 6.089159822432073e-07, "loss": 15.2621, "step": 428570 }, { "epoch": 0.8657587155629715, "grad_norm": 314.5326843261719, "learning_rate": 6.087490477550129e-07, "loss": 20.3163, "step": 428580 }, { "epoch": 0.8657789161956553, "grad_norm": 572.4465942382812, "learning_rate": 6.085821346692427e-07, "loss": 16.2445, "step": 428590 }, { "epoch": 0.8657991168283391, "grad_norm": 400.1139831542969, "learning_rate": 6.084152429867113e-07, "loss": 13.0832, "step": 428600 }, { "epoch": 0.8658193174610229, "grad_norm": 211.2682342529297, "learning_rate": 6.082483727082317e-07, "loss": 19.7007, "step": 428610 }, { "epoch": 0.8658395180937067, "grad_norm": 22.129629135131836, "learning_rate": 6.080815238346155e-07, "loss": 12.4625, "step": 428620 }, { "epoch": 0.8658597187263906, "grad_norm": 177.38450622558594, "learning_rate": 6.079146963666777e-07, "loss": 19.3025, "step": 428630 }, { "epoch": 0.8658799193590744, "grad_norm": 248.06504821777344, "learning_rate": 6.077478903052314e-07, "loss": 10.8978, "step": 428640 }, { "epoch": 0.8659001199917581, "grad_norm": 420.5555419921875, "learning_rate": 6.075811056510894e-07, "loss": 22.3603, "step": 428650 }, { "epoch": 0.8659203206244419, "grad_norm": 462.0986633300781, "learning_rate": 6.074143424050638e-07, "loss": 12.2093, "step": 428660 }, { "epoch": 0.8659405212571257, "grad_norm": 136.93148803710938, "learning_rate": 6.072476005679684e-07, "loss": 17.8271, "step": 428670 }, { "epoch": 0.8659607218898095, "grad_norm": 218.40277099609375, "learning_rate": 6.070808801406158e-07, "loss": 14.5372, "step": 428680 }, { "epoch": 0.8659809225224934, "grad_norm": 508.3802185058594, "learning_rate": 6.069141811238166e-07, "loss": 28.8907, "step": 428690 }, { "epoch": 0.8660011231551772, "grad_norm": 515.6898803710938, "learning_rate": 6.067475035183862e-07, "loss": 19.3834, "step": 428700 }, { "epoch": 0.866021323787861, "grad_norm": 187.1757354736328, "learning_rate": 6.06580847325135e-07, "loss": 23.0241, "step": 428710 }, { "epoch": 0.8660415244205448, "grad_norm": 411.40753173828125, "learning_rate": 6.064142125448763e-07, "loss": 15.513, "step": 428720 }, { "epoch": 0.8660617250532286, "grad_norm": 325.0508728027344, "learning_rate": 6.062475991784211e-07, "loss": 9.4605, "step": 428730 }, { "epoch": 0.8660819256859125, "grad_norm": 573.1134643554688, "learning_rate": 6.060810072265833e-07, "loss": 16.3213, "step": 428740 }, { "epoch": 0.8661021263185963, "grad_norm": 238.4241943359375, "learning_rate": 6.059144366901737e-07, "loss": 24.3547, "step": 428750 }, { "epoch": 0.8661223269512801, "grad_norm": 197.23275756835938, "learning_rate": 6.057478875700035e-07, "loss": 17.9109, "step": 428760 }, { "epoch": 0.8661425275839639, "grad_norm": 341.0602722167969, "learning_rate": 6.055813598668853e-07, "loss": 27.3504, "step": 428770 }, { "epoch": 0.8661627282166477, "grad_norm": 847.5743408203125, "learning_rate": 6.054148535816301e-07, "loss": 22.3776, "step": 428780 }, { "epoch": 0.8661829288493316, "grad_norm": 183.5742645263672, "learning_rate": 6.052483687150512e-07, "loss": 30.0743, "step": 428790 }, { "epoch": 0.8662031294820154, "grad_norm": 780.5835571289062, "learning_rate": 6.050819052679585e-07, "loss": 19.2017, "step": 428800 }, { "epoch": 0.8662233301146992, "grad_norm": 535.20263671875, "learning_rate": 6.049154632411625e-07, "loss": 16.9253, "step": 428810 }, { "epoch": 0.866243530747383, "grad_norm": 549.9434814453125, "learning_rate": 6.047490426354763e-07, "loss": 39.4413, "step": 428820 }, { "epoch": 0.8662637313800668, "grad_norm": 660.7290649414062, "learning_rate": 6.045826434517104e-07, "loss": 15.035, "step": 428830 }, { "epoch": 0.8662839320127507, "grad_norm": 398.4607238769531, "learning_rate": 6.044162656906744e-07, "loss": 20.089, "step": 428840 }, { "epoch": 0.8663041326454345, "grad_norm": 207.11167907714844, "learning_rate": 6.042499093531806e-07, "loss": 14.1729, "step": 428850 }, { "epoch": 0.8663243332781183, "grad_norm": 146.52442932128906, "learning_rate": 6.040835744400403e-07, "loss": 14.1465, "step": 428860 }, { "epoch": 0.8663445339108021, "grad_norm": 394.9657287597656, "learning_rate": 6.039172609520639e-07, "loss": 19.8002, "step": 428870 }, { "epoch": 0.866364734543486, "grad_norm": 185.9178466796875, "learning_rate": 6.037509688900606e-07, "loss": 17.8917, "step": 428880 }, { "epoch": 0.8663849351761698, "grad_norm": 221.9687042236328, "learning_rate": 6.035846982548427e-07, "loss": 12.455, "step": 428890 }, { "epoch": 0.8664051358088536, "grad_norm": 630.0784912109375, "learning_rate": 6.034184490472195e-07, "loss": 20.9911, "step": 428900 }, { "epoch": 0.8664253364415373, "grad_norm": 412.6866149902344, "learning_rate": 6.032522212680009e-07, "loss": 23.1872, "step": 428910 }, { "epoch": 0.8664455370742211, "grad_norm": 362.35296630859375, "learning_rate": 6.030860149179973e-07, "loss": 31.9858, "step": 428920 }, { "epoch": 0.8664657377069049, "grad_norm": 606.3483276367188, "learning_rate": 6.029198299980216e-07, "loss": 19.8697, "step": 428930 }, { "epoch": 0.8664859383395888, "grad_norm": 331.26861572265625, "learning_rate": 6.027536665088795e-07, "loss": 16.6162, "step": 428940 }, { "epoch": 0.8665061389722726, "grad_norm": 681.3436889648438, "learning_rate": 6.025875244513824e-07, "loss": 22.1219, "step": 428950 }, { "epoch": 0.8665263396049564, "grad_norm": 241.8258514404297, "learning_rate": 6.024214038263415e-07, "loss": 17.739, "step": 428960 }, { "epoch": 0.8665465402376402, "grad_norm": 483.76239013671875, "learning_rate": 6.022553046345647e-07, "loss": 14.6526, "step": 428970 }, { "epoch": 0.866566740870324, "grad_norm": 263.0108947753906, "learning_rate": 6.020892268768619e-07, "loss": 15.652, "step": 428980 }, { "epoch": 0.8665869415030079, "grad_norm": 369.8650817871094, "learning_rate": 6.019231705540435e-07, "loss": 28.1917, "step": 428990 }, { "epoch": 0.8666071421356917, "grad_norm": 325.9433288574219, "learning_rate": 6.017571356669183e-07, "loss": 18.0282, "step": 429000 }, { "epoch": 0.8666273427683755, "grad_norm": 964.6558227539062, "learning_rate": 6.015911222162946e-07, "loss": 30.9763, "step": 429010 }, { "epoch": 0.8666475434010593, "grad_norm": 114.1938705444336, "learning_rate": 6.014251302029817e-07, "loss": 14.6377, "step": 429020 }, { "epoch": 0.8666677440337431, "grad_norm": 836.9354248046875, "learning_rate": 6.012591596277906e-07, "loss": 24.4932, "step": 429030 }, { "epoch": 0.866687944666427, "grad_norm": 426.4087219238281, "learning_rate": 6.01093210491529e-07, "loss": 12.1576, "step": 429040 }, { "epoch": 0.8667081452991108, "grad_norm": 317.5310974121094, "learning_rate": 6.009272827950042e-07, "loss": 24.4424, "step": 429050 }, { "epoch": 0.8667283459317946, "grad_norm": 448.1875305175781, "learning_rate": 6.007613765390274e-07, "loss": 19.8337, "step": 429060 }, { "epoch": 0.8667485465644784, "grad_norm": 399.0731201171875, "learning_rate": 6.005954917244062e-07, "loss": 19.61, "step": 429070 }, { "epoch": 0.8667687471971622, "grad_norm": 395.09912109375, "learning_rate": 6.004296283519478e-07, "loss": 12.0017, "step": 429080 }, { "epoch": 0.8667889478298461, "grad_norm": 44.819297790527344, "learning_rate": 6.002637864224631e-07, "loss": 23.6252, "step": 429090 }, { "epoch": 0.8668091484625299, "grad_norm": 1196.097412109375, "learning_rate": 6.000979659367579e-07, "loss": 29.9458, "step": 429100 }, { "epoch": 0.8668293490952137, "grad_norm": 607.6129150390625, "learning_rate": 5.999321668956425e-07, "loss": 33.875, "step": 429110 }, { "epoch": 0.8668495497278975, "grad_norm": 7.330821990966797, "learning_rate": 5.997663892999239e-07, "loss": 15.4908, "step": 429120 }, { "epoch": 0.8668697503605813, "grad_norm": 38.274208068847656, "learning_rate": 5.996006331504095e-07, "loss": 20.6134, "step": 429130 }, { "epoch": 0.8668899509932652, "grad_norm": 188.052734375, "learning_rate": 5.994348984479092e-07, "loss": 9.7485, "step": 429140 }, { "epoch": 0.866910151625949, "grad_norm": 266.77996826171875, "learning_rate": 5.992691851932292e-07, "loss": 15.0421, "step": 429150 }, { "epoch": 0.8669303522586327, "grad_norm": 796.673095703125, "learning_rate": 5.991034933871764e-07, "loss": 26.3724, "step": 429160 }, { "epoch": 0.8669505528913165, "grad_norm": 565.3130493164062, "learning_rate": 5.989378230305592e-07, "loss": 16.5069, "step": 429170 }, { "epoch": 0.8669707535240003, "grad_norm": 660.585205078125, "learning_rate": 5.987721741241864e-07, "loss": 23.7941, "step": 429180 }, { "epoch": 0.8669909541566841, "grad_norm": 213.3594512939453, "learning_rate": 5.986065466688645e-07, "loss": 8.1154, "step": 429190 }, { "epoch": 0.867011154789368, "grad_norm": 346.75775146484375, "learning_rate": 5.98440940665399e-07, "loss": 6.3781, "step": 429200 }, { "epoch": 0.8670313554220518, "grad_norm": 371.4981994628906, "learning_rate": 5.982753561145999e-07, "loss": 12.4109, "step": 429210 }, { "epoch": 0.8670515560547356, "grad_norm": 529.7020263671875, "learning_rate": 5.981097930172725e-07, "loss": 12.5239, "step": 429220 }, { "epoch": 0.8670717566874194, "grad_norm": 96.09205627441406, "learning_rate": 5.979442513742234e-07, "loss": 16.9727, "step": 429230 }, { "epoch": 0.8670919573201032, "grad_norm": 101.05036926269531, "learning_rate": 5.977787311862598e-07, "loss": 29.9755, "step": 429240 }, { "epoch": 0.8671121579527871, "grad_norm": 340.5410461425781, "learning_rate": 5.9761323245419e-07, "loss": 13.352, "step": 429250 }, { "epoch": 0.8671323585854709, "grad_norm": 692.7017211914062, "learning_rate": 5.974477551788194e-07, "loss": 17.2747, "step": 429260 }, { "epoch": 0.8671525592181547, "grad_norm": 6.047051429748535, "learning_rate": 5.972822993609534e-07, "loss": 20.4808, "step": 429270 }, { "epoch": 0.8671727598508385, "grad_norm": 296.6640319824219, "learning_rate": 5.971168650014008e-07, "loss": 16.523, "step": 429280 }, { "epoch": 0.8671929604835223, "grad_norm": 690.8693237304688, "learning_rate": 5.969514521009662e-07, "loss": 17.6326, "step": 429290 }, { "epoch": 0.8672131611162062, "grad_norm": 161.1558837890625, "learning_rate": 5.967860606604553e-07, "loss": 9.8164, "step": 429300 }, { "epoch": 0.86723336174889, "grad_norm": 797.1126708984375, "learning_rate": 5.966206906806748e-07, "loss": 21.5301, "step": 429310 }, { "epoch": 0.8672535623815738, "grad_norm": 326.7194519042969, "learning_rate": 5.964553421624325e-07, "loss": 11.2839, "step": 429320 }, { "epoch": 0.8672737630142576, "grad_norm": 360.45458984375, "learning_rate": 5.962900151065326e-07, "loss": 13.3711, "step": 429330 }, { "epoch": 0.8672939636469414, "grad_norm": 225.3975372314453, "learning_rate": 5.961247095137795e-07, "loss": 11.2614, "step": 429340 }, { "epoch": 0.8673141642796253, "grad_norm": 301.3730163574219, "learning_rate": 5.959594253849821e-07, "loss": 21.4585, "step": 429350 }, { "epoch": 0.8673343649123091, "grad_norm": 598.3665771484375, "learning_rate": 5.95794162720944e-07, "loss": 24.2816, "step": 429360 }, { "epoch": 0.8673545655449929, "grad_norm": 180.07510375976562, "learning_rate": 5.956289215224703e-07, "loss": 7.2624, "step": 429370 }, { "epoch": 0.8673747661776767, "grad_norm": 148.77638244628906, "learning_rate": 5.95463701790368e-07, "loss": 10.5682, "step": 429380 }, { "epoch": 0.8673949668103605, "grad_norm": 296.96978759765625, "learning_rate": 5.9529850352544e-07, "loss": 5.4315, "step": 429390 }, { "epoch": 0.8674151674430444, "grad_norm": 186.8556365966797, "learning_rate": 5.951333267284942e-07, "loss": 17.682, "step": 429400 }, { "epoch": 0.8674353680757282, "grad_norm": 472.2164306640625, "learning_rate": 5.949681714003347e-07, "loss": 16.0475, "step": 429410 }, { "epoch": 0.8674555687084119, "grad_norm": 502.9358215332031, "learning_rate": 5.948030375417646e-07, "loss": 18.0679, "step": 429420 }, { "epoch": 0.8674757693410957, "grad_norm": 209.30079650878906, "learning_rate": 5.946379251535911e-07, "loss": 15.7121, "step": 429430 }, { "epoch": 0.8674959699737795, "grad_norm": 0.22631804645061493, "learning_rate": 5.944728342366179e-07, "loss": 18.109, "step": 429440 }, { "epoch": 0.8675161706064634, "grad_norm": 458.28662109375, "learning_rate": 5.943077647916496e-07, "loss": 13.4672, "step": 429450 }, { "epoch": 0.8675363712391472, "grad_norm": 349.9730529785156, "learning_rate": 5.941427168194902e-07, "loss": 21.1921, "step": 429460 }, { "epoch": 0.867556571871831, "grad_norm": 614.0407104492188, "learning_rate": 5.93977690320946e-07, "loss": 22.7234, "step": 429470 }, { "epoch": 0.8675767725045148, "grad_norm": 400.67938232421875, "learning_rate": 5.938126852968201e-07, "loss": 11.0473, "step": 429480 }, { "epoch": 0.8675969731371986, "grad_norm": 653.5919799804688, "learning_rate": 5.936477017479158e-07, "loss": 12.628, "step": 429490 }, { "epoch": 0.8676171737698825, "grad_norm": 406.5254211425781, "learning_rate": 5.934827396750392e-07, "loss": 29.1455, "step": 429500 }, { "epoch": 0.8676373744025663, "grad_norm": 265.0135498046875, "learning_rate": 5.933177990789934e-07, "loss": 12.9955, "step": 429510 }, { "epoch": 0.8676575750352501, "grad_norm": 358.8462829589844, "learning_rate": 5.931528799605813e-07, "loss": 20.3956, "step": 429520 }, { "epoch": 0.8676777756679339, "grad_norm": 660.5332641601562, "learning_rate": 5.92987982320607e-07, "loss": 28.7643, "step": 429530 }, { "epoch": 0.8676979763006177, "grad_norm": 831.7608642578125, "learning_rate": 5.928231061598772e-07, "loss": 22.3911, "step": 429540 }, { "epoch": 0.8677181769333016, "grad_norm": 287.30059814453125, "learning_rate": 5.926582514791912e-07, "loss": 15.9544, "step": 429550 }, { "epoch": 0.8677383775659854, "grad_norm": 534.867431640625, "learning_rate": 5.92493418279354e-07, "loss": 17.9074, "step": 429560 }, { "epoch": 0.8677585781986692, "grad_norm": 2.071526288986206, "learning_rate": 5.923286065611705e-07, "loss": 10.6438, "step": 429570 }, { "epoch": 0.867778778831353, "grad_norm": 794.7091674804688, "learning_rate": 5.921638163254423e-07, "loss": 26.6991, "step": 429580 }, { "epoch": 0.8677989794640368, "grad_norm": 532.3543701171875, "learning_rate": 5.919990475729725e-07, "loss": 20.9693, "step": 429590 }, { "epoch": 0.8678191800967207, "grad_norm": 124.79682922363281, "learning_rate": 5.918343003045656e-07, "loss": 7.699, "step": 429600 }, { "epoch": 0.8678393807294045, "grad_norm": 1302.444580078125, "learning_rate": 5.916695745210238e-07, "loss": 25.4062, "step": 429610 }, { "epoch": 0.8678595813620883, "grad_norm": 697.5823364257812, "learning_rate": 5.915048702231491e-07, "loss": 18.373, "step": 429620 }, { "epoch": 0.8678797819947721, "grad_norm": 262.98980712890625, "learning_rate": 5.913401874117447e-07, "loss": 13.0781, "step": 429630 }, { "epoch": 0.8678999826274559, "grad_norm": 508.6786193847656, "learning_rate": 5.911755260876145e-07, "loss": 10.9307, "step": 429640 }, { "epoch": 0.8679201832601398, "grad_norm": 2.2486164569854736, "learning_rate": 5.910108862515596e-07, "loss": 16.3299, "step": 429650 }, { "epoch": 0.8679403838928236, "grad_norm": 539.4609375, "learning_rate": 5.908462679043825e-07, "loss": 18.2002, "step": 429660 }, { "epoch": 0.8679605845255073, "grad_norm": 812.7120971679688, "learning_rate": 5.906816710468866e-07, "loss": 17.6814, "step": 429670 }, { "epoch": 0.8679807851581911, "grad_norm": 134.23193359375, "learning_rate": 5.905170956798739e-07, "loss": 18.2646, "step": 429680 }, { "epoch": 0.8680009857908749, "grad_norm": 302.33538818359375, "learning_rate": 5.903525418041445e-07, "loss": 19.6878, "step": 429690 }, { "epoch": 0.8680211864235587, "grad_norm": 690.2728881835938, "learning_rate": 5.901880094205037e-07, "loss": 27.2643, "step": 429700 }, { "epoch": 0.8680413870562426, "grad_norm": 243.7892303466797, "learning_rate": 5.900234985297498e-07, "loss": 16.3223, "step": 429710 }, { "epoch": 0.8680615876889264, "grad_norm": 343.6305236816406, "learning_rate": 5.898590091326884e-07, "loss": 12.9504, "step": 429720 }, { "epoch": 0.8680817883216102, "grad_norm": 401.50335693359375, "learning_rate": 5.896945412301186e-07, "loss": 18.5425, "step": 429730 }, { "epoch": 0.868101988954294, "grad_norm": 463.09027099609375, "learning_rate": 5.895300948228421e-07, "loss": 18.9193, "step": 429740 }, { "epoch": 0.8681221895869778, "grad_norm": 31.94213104248047, "learning_rate": 5.893656699116618e-07, "loss": 12.961, "step": 429750 }, { "epoch": 0.8681423902196617, "grad_norm": 168.0640106201172, "learning_rate": 5.892012664973784e-07, "loss": 15.7568, "step": 429760 }, { "epoch": 0.8681625908523455, "grad_norm": 18.695362091064453, "learning_rate": 5.89036884580792e-07, "loss": 10.4548, "step": 429770 }, { "epoch": 0.8681827914850293, "grad_norm": 192.1884307861328, "learning_rate": 5.888725241627047e-07, "loss": 26.1267, "step": 429780 }, { "epoch": 0.8682029921177131, "grad_norm": 971.8569946289062, "learning_rate": 5.887081852439186e-07, "loss": 15.6991, "step": 429790 }, { "epoch": 0.8682231927503969, "grad_norm": 236.9096221923828, "learning_rate": 5.885438678252342e-07, "loss": 8.6657, "step": 429800 }, { "epoch": 0.8682433933830808, "grad_norm": 144.9537811279297, "learning_rate": 5.883795719074509e-07, "loss": 14.095, "step": 429810 }, { "epoch": 0.8682635940157646, "grad_norm": 165.6309814453125, "learning_rate": 5.882152974913713e-07, "loss": 11.5047, "step": 429820 }, { "epoch": 0.8682837946484484, "grad_norm": 436.7717590332031, "learning_rate": 5.880510445777954e-07, "loss": 26.6042, "step": 429830 }, { "epoch": 0.8683039952811322, "grad_norm": 427.2652893066406, "learning_rate": 5.878868131675225e-07, "loss": 11.8873, "step": 429840 }, { "epoch": 0.868324195913816, "grad_norm": 63.67523956298828, "learning_rate": 5.877226032613542e-07, "loss": 13.2029, "step": 429850 }, { "epoch": 0.8683443965464999, "grad_norm": 112.3509292602539, "learning_rate": 5.875584148600916e-07, "loss": 12.0538, "step": 429860 }, { "epoch": 0.8683645971791837, "grad_norm": 80.30730438232422, "learning_rate": 5.873942479645345e-07, "loss": 17.1, "step": 429870 }, { "epoch": 0.8683847978118675, "grad_norm": 356.8930358886719, "learning_rate": 5.872301025754812e-07, "loss": 11.842, "step": 429880 }, { "epoch": 0.8684049984445513, "grad_norm": 548.4095458984375, "learning_rate": 5.870659786937344e-07, "loss": 18.5002, "step": 429890 }, { "epoch": 0.8684251990772351, "grad_norm": 45.01439666748047, "learning_rate": 5.869018763200929e-07, "loss": 22.6678, "step": 429900 }, { "epoch": 0.868445399709919, "grad_norm": 42.51400375366211, "learning_rate": 5.867377954553555e-07, "loss": 31.7126, "step": 429910 }, { "epoch": 0.8684656003426028, "grad_norm": 472.7746276855469, "learning_rate": 5.865737361003226e-07, "loss": 17.5104, "step": 429920 }, { "epoch": 0.8684858009752865, "grad_norm": 214.0835723876953, "learning_rate": 5.864096982557949e-07, "loss": 18.531, "step": 429930 }, { "epoch": 0.8685060016079703, "grad_norm": 1023.9053344726562, "learning_rate": 5.862456819225715e-07, "loss": 21.1408, "step": 429940 }, { "epoch": 0.8685262022406541, "grad_norm": 826.1692504882812, "learning_rate": 5.860816871014496e-07, "loss": 20.2905, "step": 429950 }, { "epoch": 0.868546402873338, "grad_norm": 303.5501403808594, "learning_rate": 5.859177137932315e-07, "loss": 20.0418, "step": 429960 }, { "epoch": 0.8685666035060218, "grad_norm": 949.9374389648438, "learning_rate": 5.857537619987152e-07, "loss": 16.031, "step": 429970 }, { "epoch": 0.8685868041387056, "grad_norm": 184.94468688964844, "learning_rate": 5.855898317186992e-07, "loss": 13.5504, "step": 429980 }, { "epoch": 0.8686070047713894, "grad_norm": 224.17349243164062, "learning_rate": 5.854259229539833e-07, "loss": 20.099, "step": 429990 }, { "epoch": 0.8686272054040732, "grad_norm": 571.5230102539062, "learning_rate": 5.852620357053651e-07, "loss": 20.4905, "step": 430000 }, { "epoch": 0.868647406036757, "grad_norm": 362.5823669433594, "learning_rate": 5.850981699736453e-07, "loss": 31.167, "step": 430010 }, { "epoch": 0.8686676066694409, "grad_norm": 429.9981384277344, "learning_rate": 5.849343257596218e-07, "loss": 16.7991, "step": 430020 }, { "epoch": 0.8686878073021247, "grad_norm": 447.98236083984375, "learning_rate": 5.847705030640915e-07, "loss": 23.4799, "step": 430030 }, { "epoch": 0.8687080079348085, "grad_norm": 339.2086486816406, "learning_rate": 5.84606701887856e-07, "loss": 14.5216, "step": 430040 }, { "epoch": 0.8687282085674923, "grad_norm": 9.948432922363281, "learning_rate": 5.844429222317111e-07, "loss": 4.6542, "step": 430050 }, { "epoch": 0.8687484092001762, "grad_norm": 334.28228759765625, "learning_rate": 5.842791640964551e-07, "loss": 22.8723, "step": 430060 }, { "epoch": 0.86876860983286, "grad_norm": 188.88925170898438, "learning_rate": 5.841154274828869e-07, "loss": 14.7107, "step": 430070 }, { "epoch": 0.8687888104655438, "grad_norm": 496.1181335449219, "learning_rate": 5.839517123918059e-07, "loss": 18.6788, "step": 430080 }, { "epoch": 0.8688090110982276, "grad_norm": 227.1697235107422, "learning_rate": 5.83788018824008e-07, "loss": 10.2072, "step": 430090 }, { "epoch": 0.8688292117309114, "grad_norm": 239.81246948242188, "learning_rate": 5.836243467802915e-07, "loss": 15.5513, "step": 430100 }, { "epoch": 0.8688494123635953, "grad_norm": 183.52682495117188, "learning_rate": 5.834606962614548e-07, "loss": 6.584, "step": 430110 }, { "epoch": 0.8688696129962791, "grad_norm": 421.7367858886719, "learning_rate": 5.832970672682948e-07, "loss": 30.8429, "step": 430120 }, { "epoch": 0.8688898136289629, "grad_norm": 890.8756103515625, "learning_rate": 5.831334598016086e-07, "loss": 21.5495, "step": 430130 }, { "epoch": 0.8689100142616467, "grad_norm": 315.5503234863281, "learning_rate": 5.829698738621941e-07, "loss": 21.7944, "step": 430140 }, { "epoch": 0.8689302148943305, "grad_norm": 303.3395080566406, "learning_rate": 5.828063094508507e-07, "loss": 22.6338, "step": 430150 }, { "epoch": 0.8689504155270144, "grad_norm": 297.9224853515625, "learning_rate": 5.826427665683715e-07, "loss": 8.8286, "step": 430160 }, { "epoch": 0.8689706161596982, "grad_norm": 394.8922119140625, "learning_rate": 5.824792452155558e-07, "loss": 15.724, "step": 430170 }, { "epoch": 0.868990816792382, "grad_norm": 441.8057556152344, "learning_rate": 5.823157453932015e-07, "loss": 26.8445, "step": 430180 }, { "epoch": 0.8690110174250657, "grad_norm": 26.113948822021484, "learning_rate": 5.821522671021041e-07, "loss": 20.7014, "step": 430190 }, { "epoch": 0.8690312180577495, "grad_norm": 501.9127197265625, "learning_rate": 5.819888103430598e-07, "loss": 19.1681, "step": 430200 }, { "epoch": 0.8690514186904333, "grad_norm": 878.7197875976562, "learning_rate": 5.818253751168679e-07, "loss": 17.2247, "step": 430210 }, { "epoch": 0.8690716193231172, "grad_norm": 168.92588806152344, "learning_rate": 5.816619614243224e-07, "loss": 22.1551, "step": 430220 }, { "epoch": 0.869091819955801, "grad_norm": 309.08953857421875, "learning_rate": 5.814985692662201e-07, "loss": 31.1003, "step": 430230 }, { "epoch": 0.8691120205884848, "grad_norm": 260.6549072265625, "learning_rate": 5.81335198643358e-07, "loss": 12.7556, "step": 430240 }, { "epoch": 0.8691322212211686, "grad_norm": 285.0781555175781, "learning_rate": 5.811718495565327e-07, "loss": 14.1524, "step": 430250 }, { "epoch": 0.8691524218538524, "grad_norm": 851.3721923828125, "learning_rate": 5.810085220065404e-07, "loss": 42.9282, "step": 430260 }, { "epoch": 0.8691726224865363, "grad_norm": 672.4266967773438, "learning_rate": 5.808452159941752e-07, "loss": 14.6863, "step": 430270 }, { "epoch": 0.8691928231192201, "grad_norm": 127.9519271850586, "learning_rate": 5.806819315202361e-07, "loss": 15.2976, "step": 430280 }, { "epoch": 0.8692130237519039, "grad_norm": 315.2915954589844, "learning_rate": 5.805186685855163e-07, "loss": 8.6507, "step": 430290 }, { "epoch": 0.8692332243845877, "grad_norm": 301.9071960449219, "learning_rate": 5.803554271908124e-07, "loss": 15.8969, "step": 430300 }, { "epoch": 0.8692534250172715, "grad_norm": 345.7572326660156, "learning_rate": 5.801922073369203e-07, "loss": 14.5178, "step": 430310 }, { "epoch": 0.8692736256499554, "grad_norm": 429.4742431640625, "learning_rate": 5.800290090246346e-07, "loss": 12.8204, "step": 430320 }, { "epoch": 0.8692938262826392, "grad_norm": 374.86956787109375, "learning_rate": 5.798658322547529e-07, "loss": 16.9373, "step": 430330 }, { "epoch": 0.869314026915323, "grad_norm": 175.72686767578125, "learning_rate": 5.797026770280683e-07, "loss": 7.0061, "step": 430340 }, { "epoch": 0.8693342275480068, "grad_norm": 4.458624362945557, "learning_rate": 5.795395433453765e-07, "loss": 14.7271, "step": 430350 }, { "epoch": 0.8693544281806906, "grad_norm": 185.01858520507812, "learning_rate": 5.793764312074735e-07, "loss": 17.3475, "step": 430360 }, { "epoch": 0.8693746288133745, "grad_norm": 254.61776733398438, "learning_rate": 5.792133406151523e-07, "loss": 12.7682, "step": 430370 }, { "epoch": 0.8693948294460583, "grad_norm": 371.08416748046875, "learning_rate": 5.790502715692104e-07, "loss": 23.1745, "step": 430380 }, { "epoch": 0.8694150300787421, "grad_norm": 311.84423828125, "learning_rate": 5.788872240704402e-07, "loss": 13.9985, "step": 430390 }, { "epoch": 0.8694352307114259, "grad_norm": 510.8533935546875, "learning_rate": 5.787241981196384e-07, "loss": 25.0979, "step": 430400 }, { "epoch": 0.8694554313441097, "grad_norm": 385.23028564453125, "learning_rate": 5.785611937175989e-07, "loss": 11.1401, "step": 430410 }, { "epoch": 0.8694756319767936, "grad_norm": 78.8631591796875, "learning_rate": 5.783982108651148e-07, "loss": 16.0758, "step": 430420 }, { "epoch": 0.8694958326094774, "grad_norm": 550.5646362304688, "learning_rate": 5.782352495629822e-07, "loss": 24.8675, "step": 430430 }, { "epoch": 0.8695160332421611, "grad_norm": 36.11176681518555, "learning_rate": 5.780723098119951e-07, "loss": 24.2342, "step": 430440 }, { "epoch": 0.8695362338748449, "grad_norm": 163.89772033691406, "learning_rate": 5.779093916129464e-07, "loss": 19.7238, "step": 430450 }, { "epoch": 0.8695564345075287, "grad_norm": 196.0900421142578, "learning_rate": 5.777464949666306e-07, "loss": 36.1687, "step": 430460 }, { "epoch": 0.8695766351402126, "grad_norm": 170.99618530273438, "learning_rate": 5.775836198738427e-07, "loss": 48.7488, "step": 430470 }, { "epoch": 0.8695968357728964, "grad_norm": 68.98126220703125, "learning_rate": 5.774207663353765e-07, "loss": 17.3575, "step": 430480 }, { "epoch": 0.8696170364055802, "grad_norm": 196.4060516357422, "learning_rate": 5.772579343520241e-07, "loss": 24.434, "step": 430490 }, { "epoch": 0.869637237038264, "grad_norm": 221.60061645507812, "learning_rate": 5.770951239245803e-07, "loss": 21.6284, "step": 430500 }, { "epoch": 0.8696574376709478, "grad_norm": 758.8854370117188, "learning_rate": 5.769323350538391e-07, "loss": 28.3884, "step": 430510 }, { "epoch": 0.8696776383036317, "grad_norm": 156.29849243164062, "learning_rate": 5.767695677405921e-07, "loss": 21.5101, "step": 430520 }, { "epoch": 0.8696978389363155, "grad_norm": 207.89292907714844, "learning_rate": 5.766068219856341e-07, "loss": 27.7899, "step": 430530 }, { "epoch": 0.8697180395689993, "grad_norm": 300.2975158691406, "learning_rate": 5.764440977897584e-07, "loss": 21.9585, "step": 430540 }, { "epoch": 0.8697382402016831, "grad_norm": 296.90606689453125, "learning_rate": 5.762813951537582e-07, "loss": 12.1937, "step": 430550 }, { "epoch": 0.8697584408343669, "grad_norm": 179.23008728027344, "learning_rate": 5.761187140784247e-07, "loss": 13.7425, "step": 430560 }, { "epoch": 0.8697786414670508, "grad_norm": 180.9647216796875, "learning_rate": 5.759560545645527e-07, "loss": 17.7472, "step": 430570 }, { "epoch": 0.8697988420997346, "grad_norm": 295.97515869140625, "learning_rate": 5.757934166129347e-07, "loss": 11.9401, "step": 430580 }, { "epoch": 0.8698190427324184, "grad_norm": 453.92840576171875, "learning_rate": 5.756308002243622e-07, "loss": 25.4231, "step": 430590 }, { "epoch": 0.8698392433651022, "grad_norm": 279.5281066894531, "learning_rate": 5.754682053996291e-07, "loss": 13.0344, "step": 430600 }, { "epoch": 0.869859443997786, "grad_norm": 364.5643005371094, "learning_rate": 5.753056321395267e-07, "loss": 13.5878, "step": 430610 }, { "epoch": 0.8698796446304699, "grad_norm": 478.2242126464844, "learning_rate": 5.751430804448488e-07, "loss": 17.6104, "step": 430620 }, { "epoch": 0.8698998452631537, "grad_norm": 193.85586547851562, "learning_rate": 5.749805503163869e-07, "loss": 10.5943, "step": 430630 }, { "epoch": 0.8699200458958375, "grad_norm": 67.99600982666016, "learning_rate": 5.748180417549321e-07, "loss": 10.1801, "step": 430640 }, { "epoch": 0.8699402465285213, "grad_norm": 151.18943786621094, "learning_rate": 5.746555547612781e-07, "loss": 24.1284, "step": 430650 }, { "epoch": 0.8699604471612051, "grad_norm": 712.3665161132812, "learning_rate": 5.744930893362166e-07, "loss": 24.4889, "step": 430660 }, { "epoch": 0.869980647793889, "grad_norm": 426.7364807128906, "learning_rate": 5.74330645480538e-07, "loss": 16.0659, "step": 430670 }, { "epoch": 0.8700008484265728, "grad_norm": 322.21551513671875, "learning_rate": 5.741682231950351e-07, "loss": 21.5867, "step": 430680 }, { "epoch": 0.8700210490592566, "grad_norm": 193.04405212402344, "learning_rate": 5.740058224805001e-07, "loss": 29.5392, "step": 430690 }, { "epoch": 0.8700412496919403, "grad_norm": 57.63292694091797, "learning_rate": 5.738434433377244e-07, "loss": 18.641, "step": 430700 }, { "epoch": 0.8700614503246241, "grad_norm": 256.9406433105469, "learning_rate": 5.736810857674979e-07, "loss": 30.6381, "step": 430710 }, { "epoch": 0.8700816509573079, "grad_norm": 812.01513671875, "learning_rate": 5.735187497706135e-07, "loss": 25.1458, "step": 430720 }, { "epoch": 0.8701018515899918, "grad_norm": 382.5735168457031, "learning_rate": 5.733564353478622e-07, "loss": 13.3199, "step": 430730 }, { "epoch": 0.8701220522226756, "grad_norm": 0.0, "learning_rate": 5.731941425000337e-07, "loss": 25.8635, "step": 430740 }, { "epoch": 0.8701422528553594, "grad_norm": 259.4105529785156, "learning_rate": 5.730318712279203e-07, "loss": 7.6119, "step": 430750 }, { "epoch": 0.8701624534880432, "grad_norm": 455.6169738769531, "learning_rate": 5.728696215323143e-07, "loss": 13.8466, "step": 430760 }, { "epoch": 0.870182654120727, "grad_norm": 10.431163787841797, "learning_rate": 5.727073934140026e-07, "loss": 17.4464, "step": 430770 }, { "epoch": 0.8702028547534109, "grad_norm": 20.1141414642334, "learning_rate": 5.725451868737786e-07, "loss": 10.5243, "step": 430780 }, { "epoch": 0.8702230553860947, "grad_norm": 79.7918930053711, "learning_rate": 5.723830019124332e-07, "loss": 17.1926, "step": 430790 }, { "epoch": 0.8702432560187785, "grad_norm": 243.2442169189453, "learning_rate": 5.722208385307559e-07, "loss": 11.0174, "step": 430800 }, { "epoch": 0.8702634566514623, "grad_norm": 311.8316650390625, "learning_rate": 5.720586967295366e-07, "loss": 10.5594, "step": 430810 }, { "epoch": 0.8702836572841461, "grad_norm": 1566.53173828125, "learning_rate": 5.718965765095669e-07, "loss": 18.4191, "step": 430820 }, { "epoch": 0.87030385791683, "grad_norm": 16.48854637145996, "learning_rate": 5.717344778716361e-07, "loss": 10.0563, "step": 430830 }, { "epoch": 0.8703240585495138, "grad_norm": 369.173095703125, "learning_rate": 5.715724008165335e-07, "loss": 18.6129, "step": 430840 }, { "epoch": 0.8703442591821976, "grad_norm": 287.1148376464844, "learning_rate": 5.714103453450498e-07, "loss": 17.074, "step": 430850 }, { "epoch": 0.8703644598148814, "grad_norm": 375.2651062011719, "learning_rate": 5.712483114579758e-07, "loss": 22.1905, "step": 430860 }, { "epoch": 0.8703846604475652, "grad_norm": 144.95962524414062, "learning_rate": 5.710862991561006e-07, "loss": 13.5704, "step": 430870 }, { "epoch": 0.8704048610802491, "grad_norm": 589.4137573242188, "learning_rate": 5.709243084402128e-07, "loss": 14.086, "step": 430880 }, { "epoch": 0.8704250617129329, "grad_norm": 4.6374192237854, "learning_rate": 5.70762339311104e-07, "loss": 17.3641, "step": 430890 }, { "epoch": 0.8704452623456167, "grad_norm": 481.83892822265625, "learning_rate": 5.706003917695619e-07, "loss": 26.1069, "step": 430900 }, { "epoch": 0.8704654629783005, "grad_norm": 828.807861328125, "learning_rate": 5.704384658163748e-07, "loss": 20.8426, "step": 430910 }, { "epoch": 0.8704856636109843, "grad_norm": 578.0156860351562, "learning_rate": 5.702765614523354e-07, "loss": 27.7354, "step": 430920 }, { "epoch": 0.8705058642436682, "grad_norm": 473.4757995605469, "learning_rate": 5.701146786782291e-07, "loss": 22.8459, "step": 430930 }, { "epoch": 0.870526064876352, "grad_norm": 713.3866577148438, "learning_rate": 5.699528174948477e-07, "loss": 103.8826, "step": 430940 }, { "epoch": 0.8705462655090357, "grad_norm": 761.142578125, "learning_rate": 5.697909779029786e-07, "loss": 14.9914, "step": 430950 }, { "epoch": 0.8705664661417195, "grad_norm": 440.9145202636719, "learning_rate": 5.696291599034104e-07, "loss": 37.4724, "step": 430960 }, { "epoch": 0.8705866667744033, "grad_norm": 239.36610412597656, "learning_rate": 5.694673634969334e-07, "loss": 9.299, "step": 430970 }, { "epoch": 0.8706068674070871, "grad_norm": 616.1082763671875, "learning_rate": 5.693055886843341e-07, "loss": 17.9193, "step": 430980 }, { "epoch": 0.870627068039771, "grad_norm": 220.93699645996094, "learning_rate": 5.691438354664031e-07, "loss": 25.5439, "step": 430990 }, { "epoch": 0.8706472686724548, "grad_norm": 50.125919342041016, "learning_rate": 5.689821038439264e-07, "loss": 22.6964, "step": 431000 }, { "epoch": 0.8706674693051386, "grad_norm": 86.68278503417969, "learning_rate": 5.688203938176945e-07, "loss": 18.2718, "step": 431010 }, { "epoch": 0.8706876699378224, "grad_norm": 283.8215026855469, "learning_rate": 5.686587053884946e-07, "loss": 10.0713, "step": 431020 }, { "epoch": 0.8707078705705062, "grad_norm": 289.13702392578125, "learning_rate": 5.684970385571137e-07, "loss": 12.529, "step": 431030 }, { "epoch": 0.8707280712031901, "grad_norm": 564.9439697265625, "learning_rate": 5.683353933243418e-07, "loss": 27.3489, "step": 431040 }, { "epoch": 0.8707482718358739, "grad_norm": 163.3947296142578, "learning_rate": 5.681737696909656e-07, "loss": 16.6905, "step": 431050 }, { "epoch": 0.8707684724685577, "grad_norm": 573.1134643554688, "learning_rate": 5.680121676577721e-07, "loss": 13.4849, "step": 431060 }, { "epoch": 0.8707886731012415, "grad_norm": 630.4998168945312, "learning_rate": 5.678505872255496e-07, "loss": 36.728, "step": 431070 }, { "epoch": 0.8708088737339253, "grad_norm": 344.05316162109375, "learning_rate": 5.676890283950881e-07, "loss": 10.6528, "step": 431080 }, { "epoch": 0.8708290743666092, "grad_norm": 374.6134948730469, "learning_rate": 5.675274911671702e-07, "loss": 14.498, "step": 431090 }, { "epoch": 0.870849274999293, "grad_norm": 26.75878143310547, "learning_rate": 5.673659755425859e-07, "loss": 31.9518, "step": 431100 }, { "epoch": 0.8708694756319768, "grad_norm": 45.3067512512207, "learning_rate": 5.672044815221234e-07, "loss": 17.2036, "step": 431110 }, { "epoch": 0.8708896762646606, "grad_norm": 154.9571075439453, "learning_rate": 5.670430091065682e-07, "loss": 10.5864, "step": 431120 }, { "epoch": 0.8709098768973444, "grad_norm": 662.7469482421875, "learning_rate": 5.668815582967074e-07, "loss": 17.816, "step": 431130 }, { "epoch": 0.8709300775300283, "grad_norm": 295.2125244140625, "learning_rate": 5.667201290933278e-07, "loss": 9.9797, "step": 431140 }, { "epoch": 0.8709502781627121, "grad_norm": 101.50379180908203, "learning_rate": 5.665587214972173e-07, "loss": 16.7628, "step": 431150 }, { "epoch": 0.8709704787953959, "grad_norm": 238.20875549316406, "learning_rate": 5.663973355091624e-07, "loss": 22.1564, "step": 431160 }, { "epoch": 0.8709906794280797, "grad_norm": 10.923376083374023, "learning_rate": 5.662359711299481e-07, "loss": 20.7295, "step": 431170 }, { "epoch": 0.8710108800607635, "grad_norm": 430.8266906738281, "learning_rate": 5.66074628360363e-07, "loss": 13.5221, "step": 431180 }, { "epoch": 0.8710310806934474, "grad_norm": 339.6048278808594, "learning_rate": 5.659133072011919e-07, "loss": 9.1787, "step": 431190 }, { "epoch": 0.8710512813261312, "grad_norm": 364.2235412597656, "learning_rate": 5.657520076532208e-07, "loss": 23.0557, "step": 431200 }, { "epoch": 0.8710714819588149, "grad_norm": 451.9517822265625, "learning_rate": 5.655907297172375e-07, "loss": 18.2305, "step": 431210 }, { "epoch": 0.8710916825914987, "grad_norm": 389.908935546875, "learning_rate": 5.654294733940263e-07, "loss": 24.1222, "step": 431220 }, { "epoch": 0.8711118832241825, "grad_norm": 146.44761657714844, "learning_rate": 5.65268238684375e-07, "loss": 11.2361, "step": 431230 }, { "epoch": 0.8711320838568664, "grad_norm": 268.7914733886719, "learning_rate": 5.651070255890689e-07, "loss": 13.8313, "step": 431240 }, { "epoch": 0.8711522844895502, "grad_norm": 70.17601013183594, "learning_rate": 5.649458341088915e-07, "loss": 12.3232, "step": 431250 }, { "epoch": 0.871172485122234, "grad_norm": 253.52432250976562, "learning_rate": 5.647846642446314e-07, "loss": 23.8891, "step": 431260 }, { "epoch": 0.8711926857549178, "grad_norm": 4.544342517852783, "learning_rate": 5.646235159970731e-07, "loss": 18.7502, "step": 431270 }, { "epoch": 0.8712128863876016, "grad_norm": 505.71832275390625, "learning_rate": 5.64462389367001e-07, "loss": 11.3864, "step": 431280 }, { "epoch": 0.8712330870202855, "grad_norm": 626.1432495117188, "learning_rate": 5.64301284355201e-07, "loss": 29.4871, "step": 431290 }, { "epoch": 0.8712532876529693, "grad_norm": 345.0766296386719, "learning_rate": 5.641402009624591e-07, "loss": 15.6212, "step": 431300 }, { "epoch": 0.8712734882856531, "grad_norm": 224.95736694335938, "learning_rate": 5.639791391895605e-07, "loss": 31.2785, "step": 431310 }, { "epoch": 0.8712936889183369, "grad_norm": 168.00369262695312, "learning_rate": 5.638180990372882e-07, "loss": 14.0955, "step": 431320 }, { "epoch": 0.8713138895510207, "grad_norm": 420.13336181640625, "learning_rate": 5.6365708050643e-07, "loss": 13.7055, "step": 431330 }, { "epoch": 0.8713340901837046, "grad_norm": 414.8622131347656, "learning_rate": 5.634960835977688e-07, "loss": 13.434, "step": 431340 }, { "epoch": 0.8713542908163884, "grad_norm": 77.1890869140625, "learning_rate": 5.633351083120886e-07, "loss": 15.1375, "step": 431350 }, { "epoch": 0.8713744914490722, "grad_norm": 103.03490447998047, "learning_rate": 5.631741546501746e-07, "loss": 14.2117, "step": 431360 }, { "epoch": 0.871394692081756, "grad_norm": 775.10791015625, "learning_rate": 5.630132226128143e-07, "loss": 14.5798, "step": 431370 }, { "epoch": 0.8714148927144398, "grad_norm": 281.8716125488281, "learning_rate": 5.628523122007867e-07, "loss": 26.4327, "step": 431380 }, { "epoch": 0.8714350933471237, "grad_norm": 1007.6710205078125, "learning_rate": 5.626914234148794e-07, "loss": 14.8941, "step": 431390 }, { "epoch": 0.8714552939798075, "grad_norm": 223.94935607910156, "learning_rate": 5.625305562558764e-07, "loss": 19.0216, "step": 431400 }, { "epoch": 0.8714754946124913, "grad_norm": 555.6204223632812, "learning_rate": 5.623697107245619e-07, "loss": 21.2055, "step": 431410 }, { "epoch": 0.8714956952451751, "grad_norm": 259.8000183105469, "learning_rate": 5.622088868217179e-07, "loss": 10.3563, "step": 431420 }, { "epoch": 0.8715158958778589, "grad_norm": 346.1632385253906, "learning_rate": 5.620480845481291e-07, "loss": 16.3341, "step": 431430 }, { "epoch": 0.8715360965105428, "grad_norm": 343.7537536621094, "learning_rate": 5.618873039045825e-07, "loss": 17.5446, "step": 431440 }, { "epoch": 0.8715562971432266, "grad_norm": 157.0694580078125, "learning_rate": 5.617265448918563e-07, "loss": 17.3536, "step": 431450 }, { "epoch": 0.8715764977759103, "grad_norm": 279.52642822265625, "learning_rate": 5.615658075107366e-07, "loss": 23.2342, "step": 431460 }, { "epoch": 0.8715966984085941, "grad_norm": 355.4528503417969, "learning_rate": 5.614050917620084e-07, "loss": 12.9763, "step": 431470 }, { "epoch": 0.8716168990412779, "grad_norm": 355.9991760253906, "learning_rate": 5.612443976464527e-07, "loss": 16.9133, "step": 431480 }, { "epoch": 0.8716370996739617, "grad_norm": 264.4765319824219, "learning_rate": 5.610837251648532e-07, "loss": 16.8319, "step": 431490 }, { "epoch": 0.8716573003066456, "grad_norm": 707.2461547851562, "learning_rate": 5.609230743179939e-07, "loss": 14.6348, "step": 431500 }, { "epoch": 0.8716775009393294, "grad_norm": 394.54473876953125, "learning_rate": 5.607624451066568e-07, "loss": 14.8244, "step": 431510 }, { "epoch": 0.8716977015720132, "grad_norm": 259.0372314453125, "learning_rate": 5.606018375316246e-07, "loss": 12.0606, "step": 431520 }, { "epoch": 0.871717902204697, "grad_norm": 168.06674194335938, "learning_rate": 5.604412515936814e-07, "loss": 19.1726, "step": 431530 }, { "epoch": 0.8717381028373808, "grad_norm": 333.35650634765625, "learning_rate": 5.602806872936079e-07, "loss": 19.7599, "step": 431540 }, { "epoch": 0.8717583034700647, "grad_norm": 696.573974609375, "learning_rate": 5.601201446321891e-07, "loss": 12.1974, "step": 431550 }, { "epoch": 0.8717785041027485, "grad_norm": 419.63787841796875, "learning_rate": 5.599596236102068e-07, "loss": 19.8364, "step": 431560 }, { "epoch": 0.8717987047354323, "grad_norm": 172.09666442871094, "learning_rate": 5.597991242284407e-07, "loss": 21.4207, "step": 431570 }, { "epoch": 0.8718189053681161, "grad_norm": 339.2966613769531, "learning_rate": 5.596386464876769e-07, "loss": 22.9744, "step": 431580 }, { "epoch": 0.8718391060008, "grad_norm": 554.4182739257812, "learning_rate": 5.594781903886942e-07, "loss": 17.6794, "step": 431590 }, { "epoch": 0.8718593066334838, "grad_norm": 452.2076416015625, "learning_rate": 5.593177559322776e-07, "loss": 42.9131, "step": 431600 }, { "epoch": 0.8718795072661676, "grad_norm": 175.66160583496094, "learning_rate": 5.591573431192066e-07, "loss": 16.0131, "step": 431610 }, { "epoch": 0.8718997078988514, "grad_norm": 335.3212585449219, "learning_rate": 5.589969519502652e-07, "loss": 19.758, "step": 431620 }, { "epoch": 0.8719199085315352, "grad_norm": 366.0373840332031, "learning_rate": 5.588365824262343e-07, "loss": 13.9156, "step": 431630 }, { "epoch": 0.871940109164219, "grad_norm": 330.110107421875, "learning_rate": 5.586762345478935e-07, "loss": 12.1645, "step": 431640 }, { "epoch": 0.8719603097969029, "grad_norm": 303.6749572753906, "learning_rate": 5.585159083160274e-07, "loss": 20.0114, "step": 431650 }, { "epoch": 0.8719805104295867, "grad_norm": 408.7389221191406, "learning_rate": 5.583556037314164e-07, "loss": 9.4455, "step": 431660 }, { "epoch": 0.8720007110622705, "grad_norm": 513.5108032226562, "learning_rate": 5.581953207948404e-07, "loss": 12.3123, "step": 431670 }, { "epoch": 0.8720209116949543, "grad_norm": 308.0274658203125, "learning_rate": 5.58035059507081e-07, "loss": 11.2197, "step": 431680 }, { "epoch": 0.8720411123276381, "grad_norm": 446.0270690917969, "learning_rate": 5.578748198689226e-07, "loss": 15.9516, "step": 431690 }, { "epoch": 0.872061312960322, "grad_norm": 491.5663757324219, "learning_rate": 5.577146018811419e-07, "loss": 25.5518, "step": 431700 }, { "epoch": 0.8720815135930058, "grad_norm": 377.6907958984375, "learning_rate": 5.575544055445209e-07, "loss": 18.1277, "step": 431710 }, { "epoch": 0.8721017142256895, "grad_norm": 401.6126708984375, "learning_rate": 5.573942308598418e-07, "loss": 15.5451, "step": 431720 }, { "epoch": 0.8721219148583733, "grad_norm": 740.29931640625, "learning_rate": 5.572340778278845e-07, "loss": 27.8595, "step": 431730 }, { "epoch": 0.8721421154910571, "grad_norm": 167.87570190429688, "learning_rate": 5.570739464494284e-07, "loss": 24.6506, "step": 431740 }, { "epoch": 0.872162316123741, "grad_norm": 236.99952697753906, "learning_rate": 5.569138367252553e-07, "loss": 7.629, "step": 431750 }, { "epoch": 0.8721825167564248, "grad_norm": 101.96878051757812, "learning_rate": 5.567537486561476e-07, "loss": 10.9722, "step": 431760 }, { "epoch": 0.8722027173891086, "grad_norm": 275.85247802734375, "learning_rate": 5.565936822428808e-07, "loss": 13.2331, "step": 431770 }, { "epoch": 0.8722229180217924, "grad_norm": 294.6750183105469, "learning_rate": 5.564336374862373e-07, "loss": 13.1657, "step": 431780 }, { "epoch": 0.8722431186544762, "grad_norm": 430.006103515625, "learning_rate": 5.562736143869984e-07, "loss": 15.3141, "step": 431790 }, { "epoch": 0.87226331928716, "grad_norm": 288.45123291015625, "learning_rate": 5.561136129459432e-07, "loss": 11.9062, "step": 431800 }, { "epoch": 0.8722835199198439, "grad_norm": 201.5088653564453, "learning_rate": 5.559536331638498e-07, "loss": 17.3725, "step": 431810 }, { "epoch": 0.8723037205525277, "grad_norm": 431.68145751953125, "learning_rate": 5.557936750415011e-07, "loss": 11.8276, "step": 431820 }, { "epoch": 0.8723239211852115, "grad_norm": 385.95745849609375, "learning_rate": 5.556337385796734e-07, "loss": 15.5291, "step": 431830 }, { "epoch": 0.8723441218178953, "grad_norm": 78.91565704345703, "learning_rate": 5.55473823779149e-07, "loss": 17.6818, "step": 431840 }, { "epoch": 0.8723643224505792, "grad_norm": 32.79534149169922, "learning_rate": 5.553139306407062e-07, "loss": 20.4146, "step": 431850 }, { "epoch": 0.872384523083263, "grad_norm": 160.63734436035156, "learning_rate": 5.551540591651234e-07, "loss": 18.6493, "step": 431860 }, { "epoch": 0.8724047237159468, "grad_norm": 673.2002563476562, "learning_rate": 5.549942093531812e-07, "loss": 27.3512, "step": 431870 }, { "epoch": 0.8724249243486306, "grad_norm": 1010.4269409179688, "learning_rate": 5.548343812056584e-07, "loss": 24.5515, "step": 431880 }, { "epoch": 0.8724451249813144, "grad_norm": 185.97637939453125, "learning_rate": 5.546745747233323e-07, "loss": 8.7487, "step": 431890 }, { "epoch": 0.8724653256139983, "grad_norm": 88.39303588867188, "learning_rate": 5.545147899069836e-07, "loss": 12.3746, "step": 431900 }, { "epoch": 0.8724855262466821, "grad_norm": 796.1704711914062, "learning_rate": 5.543550267573916e-07, "loss": 21.8673, "step": 431910 }, { "epoch": 0.8725057268793659, "grad_norm": 188.41055297851562, "learning_rate": 5.541952852753341e-07, "loss": 11.4995, "step": 431920 }, { "epoch": 0.8725259275120497, "grad_norm": 184.47239685058594, "learning_rate": 5.540355654615881e-07, "loss": 13.8518, "step": 431930 }, { "epoch": 0.8725461281447335, "grad_norm": 144.6045379638672, "learning_rate": 5.538758673169348e-07, "loss": 25.1702, "step": 431940 }, { "epoch": 0.8725663287774174, "grad_norm": 425.9952392578125, "learning_rate": 5.537161908421512e-07, "loss": 17.863, "step": 431950 }, { "epoch": 0.8725865294101012, "grad_norm": 225.549072265625, "learning_rate": 5.535565360380146e-07, "loss": 36.3956, "step": 431960 }, { "epoch": 0.872606730042785, "grad_norm": 264.4824523925781, "learning_rate": 5.533969029053043e-07, "loss": 9.2637, "step": 431970 }, { "epoch": 0.8726269306754687, "grad_norm": 424.3130798339844, "learning_rate": 5.532372914448003e-07, "loss": 17.5098, "step": 431980 }, { "epoch": 0.8726471313081525, "grad_norm": 24.801950454711914, "learning_rate": 5.530777016572763e-07, "loss": 18.5786, "step": 431990 }, { "epoch": 0.8726673319408363, "grad_norm": 175.92909240722656, "learning_rate": 5.529181335435124e-07, "loss": 25.0721, "step": 432000 }, { "epoch": 0.8726875325735202, "grad_norm": 133.3310546875, "learning_rate": 5.527585871042867e-07, "loss": 35.488, "step": 432010 }, { "epoch": 0.872707733206204, "grad_norm": 51.423831939697266, "learning_rate": 5.525990623403765e-07, "loss": 21.8321, "step": 432020 }, { "epoch": 0.8727279338388878, "grad_norm": 757.439453125, "learning_rate": 5.524395592525584e-07, "loss": 14.381, "step": 432030 }, { "epoch": 0.8727481344715716, "grad_norm": 194.53475952148438, "learning_rate": 5.522800778416099e-07, "loss": 38.3131, "step": 432040 }, { "epoch": 0.8727683351042554, "grad_norm": 474.09283447265625, "learning_rate": 5.521206181083111e-07, "loss": 22.2005, "step": 432050 }, { "epoch": 0.8727885357369393, "grad_norm": 117.4988021850586, "learning_rate": 5.519611800534347e-07, "loss": 12.3979, "step": 432060 }, { "epoch": 0.8728087363696231, "grad_norm": 265.8170166015625, "learning_rate": 5.518017636777606e-07, "loss": 14.9616, "step": 432070 }, { "epoch": 0.8728289370023069, "grad_norm": 243.57675170898438, "learning_rate": 5.516423689820655e-07, "loss": 22.1666, "step": 432080 }, { "epoch": 0.8728491376349907, "grad_norm": 331.3977355957031, "learning_rate": 5.514829959671264e-07, "loss": 15.4174, "step": 432090 }, { "epoch": 0.8728693382676745, "grad_norm": 238.21192932128906, "learning_rate": 5.51323644633719e-07, "loss": 6.8709, "step": 432100 }, { "epoch": 0.8728895389003584, "grad_norm": 210.3321990966797, "learning_rate": 5.511643149826206e-07, "loss": 24.2467, "step": 432110 }, { "epoch": 0.8729097395330422, "grad_norm": 312.65826416015625, "learning_rate": 5.510050070146083e-07, "loss": 14.3068, "step": 432120 }, { "epoch": 0.872929940165726, "grad_norm": 549.9442138671875, "learning_rate": 5.508457207304574e-07, "loss": 24.7624, "step": 432130 }, { "epoch": 0.8729501407984098, "grad_norm": 259.71240234375, "learning_rate": 5.506864561309455e-07, "loss": 13.4766, "step": 432140 }, { "epoch": 0.8729703414310936, "grad_norm": 175.85206604003906, "learning_rate": 5.505272132168471e-07, "loss": 12.3887, "step": 432150 }, { "epoch": 0.8729905420637775, "grad_norm": 409.7877197265625, "learning_rate": 5.503679919889404e-07, "loss": 18.4446, "step": 432160 }, { "epoch": 0.8730107426964613, "grad_norm": 258.2601623535156, "learning_rate": 5.502087924480005e-07, "loss": 26.1197, "step": 432170 }, { "epoch": 0.8730309433291451, "grad_norm": 183.85809326171875, "learning_rate": 5.50049614594802e-07, "loss": 14.9734, "step": 432180 }, { "epoch": 0.8730511439618289, "grad_norm": 342.5546569824219, "learning_rate": 5.498904584301235e-07, "loss": 22.1346, "step": 432190 }, { "epoch": 0.8730713445945127, "grad_norm": 303.18170166015625, "learning_rate": 5.497313239547374e-07, "loss": 14.67, "step": 432200 }, { "epoch": 0.8730915452271966, "grad_norm": 280.4842529296875, "learning_rate": 5.49572211169423e-07, "loss": 24.5363, "step": 432210 }, { "epoch": 0.8731117458598804, "grad_norm": 332.1897277832031, "learning_rate": 5.49413120074952e-07, "loss": 21.0212, "step": 432220 }, { "epoch": 0.8731319464925641, "grad_norm": 602.5610961914062, "learning_rate": 5.492540506721033e-07, "loss": 16.7147, "step": 432230 }, { "epoch": 0.8731521471252479, "grad_norm": 0.8453227281570435, "learning_rate": 5.490950029616504e-07, "loss": 30.1204, "step": 432240 }, { "epoch": 0.8731723477579317, "grad_norm": 269.9468994140625, "learning_rate": 5.489359769443675e-07, "loss": 21.4242, "step": 432250 }, { "epoch": 0.8731925483906156, "grad_norm": 805.483154296875, "learning_rate": 5.487769726210318e-07, "loss": 16.6819, "step": 432260 }, { "epoch": 0.8732127490232994, "grad_norm": 526.4840698242188, "learning_rate": 5.486179899924171e-07, "loss": 12.0968, "step": 432270 }, { "epoch": 0.8732329496559832, "grad_norm": 212.0432891845703, "learning_rate": 5.484590290592979e-07, "loss": 21.728, "step": 432280 }, { "epoch": 0.873253150288667, "grad_norm": 108.55610656738281, "learning_rate": 5.483000898224494e-07, "loss": 10.9841, "step": 432290 }, { "epoch": 0.8732733509213508, "grad_norm": 398.4762878417969, "learning_rate": 5.48141172282648e-07, "loss": 55.5959, "step": 432300 }, { "epoch": 0.8732935515540347, "grad_norm": 319.7445983886719, "learning_rate": 5.479822764406645e-07, "loss": 23.2687, "step": 432310 }, { "epoch": 0.8733137521867185, "grad_norm": 391.5088195800781, "learning_rate": 5.478234022972756e-07, "loss": 14.2518, "step": 432320 }, { "epoch": 0.8733339528194023, "grad_norm": 249.58383178710938, "learning_rate": 5.476645498532567e-07, "loss": 11.0005, "step": 432330 }, { "epoch": 0.8733541534520861, "grad_norm": 648.7304077148438, "learning_rate": 5.475057191093808e-07, "loss": 16.0999, "step": 432340 }, { "epoch": 0.8733743540847699, "grad_norm": 190.3310089111328, "learning_rate": 5.473469100664208e-07, "loss": 19.4721, "step": 432350 }, { "epoch": 0.8733945547174538, "grad_norm": 461.1125183105469, "learning_rate": 5.471881227251518e-07, "loss": 14.6923, "step": 432360 }, { "epoch": 0.8734147553501376, "grad_norm": 504.35113525390625, "learning_rate": 5.470293570863499e-07, "loss": 20.7561, "step": 432370 }, { "epoch": 0.8734349559828214, "grad_norm": 109.82476043701172, "learning_rate": 5.46870613150785e-07, "loss": 5.3293, "step": 432380 }, { "epoch": 0.8734551566155052, "grad_norm": 343.56341552734375, "learning_rate": 5.467118909192326e-07, "loss": 22.4527, "step": 432390 }, { "epoch": 0.873475357248189, "grad_norm": 465.1142578125, "learning_rate": 5.46553190392467e-07, "loss": 16.6082, "step": 432400 }, { "epoch": 0.8734955578808729, "grad_norm": 420.0114440917969, "learning_rate": 5.46394511571261e-07, "loss": 11.3463, "step": 432410 }, { "epoch": 0.8735157585135567, "grad_norm": 172.47666931152344, "learning_rate": 5.462358544563873e-07, "loss": 14.4073, "step": 432420 }, { "epoch": 0.8735359591462405, "grad_norm": 168.5302734375, "learning_rate": 5.460772190486208e-07, "loss": 13.0114, "step": 432430 }, { "epoch": 0.8735561597789243, "grad_norm": 69.3399429321289, "learning_rate": 5.459186053487336e-07, "loss": 14.7425, "step": 432440 }, { "epoch": 0.8735763604116081, "grad_norm": 761.65478515625, "learning_rate": 5.457600133574987e-07, "loss": 24.4876, "step": 432450 }, { "epoch": 0.873596561044292, "grad_norm": 211.42430114746094, "learning_rate": 5.456014430756895e-07, "loss": 15.9545, "step": 432460 }, { "epoch": 0.8736167616769758, "grad_norm": 216.61505126953125, "learning_rate": 5.454428945040774e-07, "loss": 12.5682, "step": 432470 }, { "epoch": 0.8736369623096596, "grad_norm": 194.28504943847656, "learning_rate": 5.452843676434377e-07, "loss": 17.7277, "step": 432480 }, { "epoch": 0.8736571629423433, "grad_norm": 80.29365539550781, "learning_rate": 5.45125862494541e-07, "loss": 20.0275, "step": 432490 }, { "epoch": 0.8736773635750271, "grad_norm": 152.5752716064453, "learning_rate": 5.449673790581611e-07, "loss": 24.6487, "step": 432500 }, { "epoch": 0.8736975642077109, "grad_norm": 554.6441650390625, "learning_rate": 5.448089173350696e-07, "loss": 9.1202, "step": 432510 }, { "epoch": 0.8737177648403948, "grad_norm": 277.0372314453125, "learning_rate": 5.446504773260386e-07, "loss": 34.0796, "step": 432520 }, { "epoch": 0.8737379654730786, "grad_norm": 0.0, "learning_rate": 5.44492059031842e-07, "loss": 14.672, "step": 432530 }, { "epoch": 0.8737581661057624, "grad_norm": 554.496337890625, "learning_rate": 5.443336624532492e-07, "loss": 18.5186, "step": 432540 }, { "epoch": 0.8737783667384462, "grad_norm": 546.7005615234375, "learning_rate": 5.44175287591035e-07, "loss": 16.7326, "step": 432550 }, { "epoch": 0.87379856737113, "grad_norm": 198.7493896484375, "learning_rate": 5.440169344459701e-07, "loss": 31.7625, "step": 432560 }, { "epoch": 0.8738187680038139, "grad_norm": 22.46010398864746, "learning_rate": 5.438586030188247e-07, "loss": 9.2043, "step": 432570 }, { "epoch": 0.8738389686364977, "grad_norm": 207.79080200195312, "learning_rate": 5.437002933103724e-07, "loss": 18.9447, "step": 432580 }, { "epoch": 0.8738591692691815, "grad_norm": 162.22372436523438, "learning_rate": 5.435420053213863e-07, "loss": 18.3355, "step": 432590 }, { "epoch": 0.8738793699018653, "grad_norm": 187.2777099609375, "learning_rate": 5.433837390526341e-07, "loss": 14.0661, "step": 432600 }, { "epoch": 0.8738995705345491, "grad_norm": 232.63430786132812, "learning_rate": 5.432254945048887e-07, "loss": 10.7602, "step": 432610 }, { "epoch": 0.873919771167233, "grad_norm": 56.217803955078125, "learning_rate": 5.430672716789232e-07, "loss": 10.8976, "step": 432620 }, { "epoch": 0.8739399717999168, "grad_norm": 368.2447509765625, "learning_rate": 5.429090705755069e-07, "loss": 11.1776, "step": 432630 }, { "epoch": 0.8739601724326006, "grad_norm": 455.3416748046875, "learning_rate": 5.427508911954105e-07, "loss": 13.8417, "step": 432640 }, { "epoch": 0.8739803730652844, "grad_norm": 212.73867797851562, "learning_rate": 5.425927335394054e-07, "loss": 17.5974, "step": 432650 }, { "epoch": 0.8740005736979682, "grad_norm": 362.45147705078125, "learning_rate": 5.424345976082645e-07, "loss": 18.1649, "step": 432660 }, { "epoch": 0.8740207743306521, "grad_norm": 493.18914794921875, "learning_rate": 5.42276483402755e-07, "loss": 39.1498, "step": 432670 }, { "epoch": 0.8740409749633359, "grad_norm": 467.24139404296875, "learning_rate": 5.421183909236494e-07, "loss": 12.8275, "step": 432680 }, { "epoch": 0.8740611755960197, "grad_norm": 296.6934814453125, "learning_rate": 5.419603201717189e-07, "loss": 22.2164, "step": 432690 }, { "epoch": 0.8740813762287035, "grad_norm": 17.59931182861328, "learning_rate": 5.418022711477333e-07, "loss": 19.6711, "step": 432700 }, { "epoch": 0.8741015768613873, "grad_norm": 393.2613220214844, "learning_rate": 5.416442438524616e-07, "loss": 38.0818, "step": 432710 }, { "epoch": 0.8741217774940712, "grad_norm": 290.41900634765625, "learning_rate": 5.414862382866759e-07, "loss": 17.1718, "step": 432720 }, { "epoch": 0.874141978126755, "grad_norm": 296.6372375488281, "learning_rate": 5.413282544511455e-07, "loss": 12.0898, "step": 432730 }, { "epoch": 0.8741621787594387, "grad_norm": 202.93516540527344, "learning_rate": 5.4117029234664e-07, "loss": 14.88, "step": 432740 }, { "epoch": 0.8741823793921225, "grad_norm": 21.219100952148438, "learning_rate": 5.410123519739302e-07, "loss": 12.9098, "step": 432750 }, { "epoch": 0.8742025800248063, "grad_norm": 372.3573913574219, "learning_rate": 5.408544333337845e-07, "loss": 15.8375, "step": 432760 }, { "epoch": 0.8742227806574902, "grad_norm": 146.60308837890625, "learning_rate": 5.406965364269745e-07, "loss": 13.5756, "step": 432770 }, { "epoch": 0.874242981290174, "grad_norm": 507.54498291015625, "learning_rate": 5.405386612542685e-07, "loss": 8.8647, "step": 432780 }, { "epoch": 0.8742631819228578, "grad_norm": 238.99053955078125, "learning_rate": 5.403808078164358e-07, "loss": 8.5932, "step": 432790 }, { "epoch": 0.8742833825555416, "grad_norm": 383.38189697265625, "learning_rate": 5.402229761142464e-07, "loss": 16.0073, "step": 432800 }, { "epoch": 0.8743035831882254, "grad_norm": 186.34523010253906, "learning_rate": 5.400651661484684e-07, "loss": 7.5963, "step": 432810 }, { "epoch": 0.8743237838209093, "grad_norm": 380.567138671875, "learning_rate": 5.399073779198732e-07, "loss": 15.2616, "step": 432820 }, { "epoch": 0.8743439844535931, "grad_norm": 8.406536102294922, "learning_rate": 5.397496114292278e-07, "loss": 28.221, "step": 432830 }, { "epoch": 0.8743641850862769, "grad_norm": 315.7178955078125, "learning_rate": 5.395918666773026e-07, "loss": 17.5182, "step": 432840 }, { "epoch": 0.8743843857189607, "grad_norm": 289.23663330078125, "learning_rate": 5.394341436648653e-07, "loss": 22.2257, "step": 432850 }, { "epoch": 0.8744045863516445, "grad_norm": 133.96200561523438, "learning_rate": 5.392764423926844e-07, "loss": 19.679, "step": 432860 }, { "epoch": 0.8744247869843284, "grad_norm": 226.0999755859375, "learning_rate": 5.391187628615296e-07, "loss": 15.2748, "step": 432870 }, { "epoch": 0.8744449876170122, "grad_norm": 555.56201171875, "learning_rate": 5.389611050721694e-07, "loss": 21.7419, "step": 432880 }, { "epoch": 0.874465188249696, "grad_norm": 433.8108825683594, "learning_rate": 5.388034690253701e-07, "loss": 9.8576, "step": 432890 }, { "epoch": 0.8744853888823798, "grad_norm": 171.49124145507812, "learning_rate": 5.386458547219026e-07, "loss": 21.4585, "step": 432900 }, { "epoch": 0.8745055895150636, "grad_norm": 192.7606658935547, "learning_rate": 5.384882621625353e-07, "loss": 14.1203, "step": 432910 }, { "epoch": 0.8745257901477475, "grad_norm": 1570.7572021484375, "learning_rate": 5.383306913480335e-07, "loss": 32.1453, "step": 432920 }, { "epoch": 0.8745459907804313, "grad_norm": 495.3536376953125, "learning_rate": 5.381731422791664e-07, "loss": 25.808, "step": 432930 }, { "epoch": 0.8745661914131151, "grad_norm": 662.15869140625, "learning_rate": 5.380156149567034e-07, "loss": 17.0348, "step": 432940 }, { "epoch": 0.8745863920457989, "grad_norm": 261.1503601074219, "learning_rate": 5.378581093814112e-07, "loss": 9.5443, "step": 432950 }, { "epoch": 0.8746065926784827, "grad_norm": 349.7860412597656, "learning_rate": 5.377006255540562e-07, "loss": 20.0274, "step": 432960 }, { "epoch": 0.8746267933111666, "grad_norm": 153.81024169921875, "learning_rate": 5.375431634754074e-07, "loss": 16.5702, "step": 432970 }, { "epoch": 0.8746469939438504, "grad_norm": 734.0849609375, "learning_rate": 5.373857231462337e-07, "loss": 35.6784, "step": 432980 }, { "epoch": 0.8746671945765342, "grad_norm": 1650.5621337890625, "learning_rate": 5.372283045672994e-07, "loss": 32.2598, "step": 432990 }, { "epoch": 0.8746873952092179, "grad_norm": 35.7860107421875, "learning_rate": 5.370709077393721e-07, "loss": 24.6236, "step": 433000 }, { "epoch": 0.8747075958419017, "grad_norm": 258.3363952636719, "learning_rate": 5.369135326632219e-07, "loss": 27.3438, "step": 433010 }, { "epoch": 0.8747277964745855, "grad_norm": 46.31103515625, "learning_rate": 5.367561793396132e-07, "loss": 17.3969, "step": 433020 }, { "epoch": 0.8747479971072694, "grad_norm": 448.46282958984375, "learning_rate": 5.365988477693124e-07, "loss": 19.1224, "step": 433030 }, { "epoch": 0.8747681977399532, "grad_norm": 178.7939453125, "learning_rate": 5.364415379530891e-07, "loss": 25.5598, "step": 433040 }, { "epoch": 0.874788398372637, "grad_norm": 209.88671875, "learning_rate": 5.362842498917081e-07, "loss": 17.2414, "step": 433050 }, { "epoch": 0.8748085990053208, "grad_norm": 262.6157531738281, "learning_rate": 5.36126983585935e-07, "loss": 14.1692, "step": 433060 }, { "epoch": 0.8748287996380046, "grad_norm": 541.3820190429688, "learning_rate": 5.359697390365387e-07, "loss": 17.0255, "step": 433070 }, { "epoch": 0.8748490002706885, "grad_norm": 379.27484130859375, "learning_rate": 5.35812516244284e-07, "loss": 18.0919, "step": 433080 }, { "epoch": 0.8748692009033723, "grad_norm": 590.208251953125, "learning_rate": 5.356553152099381e-07, "loss": 24.4596, "step": 433090 }, { "epoch": 0.8748894015360561, "grad_norm": 436.2502746582031, "learning_rate": 5.354981359342659e-07, "loss": 19.3351, "step": 433100 }, { "epoch": 0.8749096021687399, "grad_norm": 154.7361297607422, "learning_rate": 5.353409784180352e-07, "loss": 6.1398, "step": 433110 }, { "epoch": 0.8749298028014237, "grad_norm": 5.425511360168457, "learning_rate": 5.35183842662011e-07, "loss": 16.9334, "step": 433120 }, { "epoch": 0.8749500034341076, "grad_norm": 375.8233947753906, "learning_rate": 5.350267286669585e-07, "loss": 10.4431, "step": 433130 }, { "epoch": 0.8749702040667914, "grad_norm": 1288.437744140625, "learning_rate": 5.348696364336448e-07, "loss": 17.4038, "step": 433140 }, { "epoch": 0.8749904046994752, "grad_norm": 644.7527465820312, "learning_rate": 5.347125659628344e-07, "loss": 18.2653, "step": 433150 }, { "epoch": 0.875010605332159, "grad_norm": 102.53956604003906, "learning_rate": 5.345555172552941e-07, "loss": 32.7191, "step": 433160 }, { "epoch": 0.8750308059648428, "grad_norm": 643.939208984375, "learning_rate": 5.343984903117889e-07, "loss": 10.4285, "step": 433170 }, { "epoch": 0.8750510065975267, "grad_norm": 478.85107421875, "learning_rate": 5.342414851330824e-07, "loss": 28.7579, "step": 433180 }, { "epoch": 0.8750712072302105, "grad_norm": 139.83828735351562, "learning_rate": 5.340845017199425e-07, "loss": 22.7458, "step": 433190 }, { "epoch": 0.8750914078628943, "grad_norm": 238.2321319580078, "learning_rate": 5.339275400731331e-07, "loss": 24.4583, "step": 433200 }, { "epoch": 0.8751116084955781, "grad_norm": 358.0097961425781, "learning_rate": 5.337706001934184e-07, "loss": 12.0504, "step": 433210 }, { "epoch": 0.8751318091282619, "grad_norm": 76.72681427001953, "learning_rate": 5.33613682081564e-07, "loss": 15.6278, "step": 433220 }, { "epoch": 0.8751520097609458, "grad_norm": 533.8015747070312, "learning_rate": 5.334567857383354e-07, "loss": 29.6115, "step": 433230 }, { "epoch": 0.8751722103936296, "grad_norm": 436.1002502441406, "learning_rate": 5.332999111644971e-07, "loss": 17.0068, "step": 433240 }, { "epoch": 0.8751924110263134, "grad_norm": 372.8768310546875, "learning_rate": 5.331430583608122e-07, "loss": 17.8014, "step": 433250 }, { "epoch": 0.8752126116589971, "grad_norm": 635.5821533203125, "learning_rate": 5.329862273280462e-07, "loss": 28.728, "step": 433260 }, { "epoch": 0.8752328122916809, "grad_norm": 17.68754768371582, "learning_rate": 5.328294180669658e-07, "loss": 15.5665, "step": 433270 }, { "epoch": 0.8752530129243647, "grad_norm": 138.2192840576172, "learning_rate": 5.326726305783308e-07, "loss": 25.2673, "step": 433280 }, { "epoch": 0.8752732135570486, "grad_norm": 27.268882751464844, "learning_rate": 5.325158648629075e-07, "loss": 18.3581, "step": 433290 }, { "epoch": 0.8752934141897324, "grad_norm": 136.46170043945312, "learning_rate": 5.323591209214612e-07, "loss": 21.0438, "step": 433300 }, { "epoch": 0.8753136148224162, "grad_norm": 241.7820587158203, "learning_rate": 5.322023987547547e-07, "loss": 13.798, "step": 433310 }, { "epoch": 0.8753338154551, "grad_norm": 367.8775329589844, "learning_rate": 5.320456983635508e-07, "loss": 16.049, "step": 433320 }, { "epoch": 0.8753540160877838, "grad_norm": 531.8893432617188, "learning_rate": 5.318890197486154e-07, "loss": 20.8812, "step": 433330 }, { "epoch": 0.8753742167204677, "grad_norm": 166.06248474121094, "learning_rate": 5.317323629107108e-07, "loss": 11.5503, "step": 433340 }, { "epoch": 0.8753944173531515, "grad_norm": 269.6576232910156, "learning_rate": 5.315757278505995e-07, "loss": 18.7279, "step": 433350 }, { "epoch": 0.8754146179858353, "grad_norm": 50.614158630371094, "learning_rate": 5.314191145690473e-07, "loss": 27.3247, "step": 433360 }, { "epoch": 0.8754348186185191, "grad_norm": 360.9093017578125, "learning_rate": 5.312625230668155e-07, "loss": 22.016, "step": 433370 }, { "epoch": 0.875455019251203, "grad_norm": 285.93548583984375, "learning_rate": 5.311059533446694e-07, "loss": 13.3252, "step": 433380 }, { "epoch": 0.8754752198838868, "grad_norm": 201.05575561523438, "learning_rate": 5.309494054033704e-07, "loss": 11.7539, "step": 433390 }, { "epoch": 0.8754954205165706, "grad_norm": 509.9866638183594, "learning_rate": 5.307928792436812e-07, "loss": 22.6127, "step": 433400 }, { "epoch": 0.8755156211492544, "grad_norm": 513.7008056640625, "learning_rate": 5.306363748663668e-07, "loss": 26.1914, "step": 433410 }, { "epoch": 0.8755358217819382, "grad_norm": 431.09210205078125, "learning_rate": 5.304798922721871e-07, "loss": 15.9364, "step": 433420 }, { "epoch": 0.875556022414622, "grad_norm": 260.40850830078125, "learning_rate": 5.303234314619071e-07, "loss": 10.2867, "step": 433430 }, { "epoch": 0.8755762230473059, "grad_norm": 502.4874267578125, "learning_rate": 5.301669924362884e-07, "loss": 23.3825, "step": 433440 }, { "epoch": 0.8755964236799897, "grad_norm": 321.4576416015625, "learning_rate": 5.300105751960943e-07, "loss": 17.2701, "step": 433450 }, { "epoch": 0.8756166243126735, "grad_norm": 139.70773315429688, "learning_rate": 5.298541797420864e-07, "loss": 12.7987, "step": 433460 }, { "epoch": 0.8756368249453573, "grad_norm": 582.2942504882812, "learning_rate": 5.296978060750257e-07, "loss": 16.9464, "step": 433470 }, { "epoch": 0.8756570255780411, "grad_norm": 487.0493469238281, "learning_rate": 5.295414541956773e-07, "loss": 24.6904, "step": 433480 }, { "epoch": 0.875677226210725, "grad_norm": 231.55262756347656, "learning_rate": 5.293851241048015e-07, "loss": 11.211, "step": 433490 }, { "epoch": 0.8756974268434088, "grad_norm": 110.7741928100586, "learning_rate": 5.292288158031595e-07, "loss": 15.8149, "step": 433500 }, { "epoch": 0.8757176274760925, "grad_norm": 195.20785522460938, "learning_rate": 5.290725292915138e-07, "loss": 15.7708, "step": 433510 }, { "epoch": 0.8757378281087763, "grad_norm": 195.65206909179688, "learning_rate": 5.28916264570628e-07, "loss": 20.5607, "step": 433520 }, { "epoch": 0.8757580287414601, "grad_norm": 98.87440490722656, "learning_rate": 5.287600216412609e-07, "loss": 21.9518, "step": 433530 }, { "epoch": 0.875778229374144, "grad_norm": 632.74267578125, "learning_rate": 5.286038005041744e-07, "loss": 27.4733, "step": 433540 }, { "epoch": 0.8757984300068278, "grad_norm": 154.83694458007812, "learning_rate": 5.28447601160132e-07, "loss": 30.9031, "step": 433550 }, { "epoch": 0.8758186306395116, "grad_norm": 567.2290649414062, "learning_rate": 5.28291423609894e-07, "loss": 20.3919, "step": 433560 }, { "epoch": 0.8758388312721954, "grad_norm": 287.53558349609375, "learning_rate": 5.281352678542195e-07, "loss": 13.9235, "step": 433570 }, { "epoch": 0.8758590319048792, "grad_norm": 43.357112884521484, "learning_rate": 5.279791338938717e-07, "loss": 22.6026, "step": 433580 }, { "epoch": 0.8758792325375631, "grad_norm": 369.96173095703125, "learning_rate": 5.278230217296132e-07, "loss": 26.5123, "step": 433590 }, { "epoch": 0.8758994331702469, "grad_norm": 405.46685791015625, "learning_rate": 5.276669313622013e-07, "loss": 18.1153, "step": 433600 }, { "epoch": 0.8759196338029307, "grad_norm": 311.51251220703125, "learning_rate": 5.275108627923975e-07, "loss": 21.1751, "step": 433610 }, { "epoch": 0.8759398344356145, "grad_norm": 322.4067077636719, "learning_rate": 5.273548160209651e-07, "loss": 20.5398, "step": 433620 }, { "epoch": 0.8759600350682983, "grad_norm": 423.6556396484375, "learning_rate": 5.271987910486625e-07, "loss": 20.9919, "step": 433630 }, { "epoch": 0.8759802357009822, "grad_norm": 623.9129638671875, "learning_rate": 5.270427878762496e-07, "loss": 16.2219, "step": 433640 }, { "epoch": 0.876000436333666, "grad_norm": 353.4367370605469, "learning_rate": 5.268868065044886e-07, "loss": 18.939, "step": 433650 }, { "epoch": 0.8760206369663498, "grad_norm": 227.28196716308594, "learning_rate": 5.267308469341387e-07, "loss": 20.1464, "step": 433660 }, { "epoch": 0.8760408375990336, "grad_norm": 672.3381958007812, "learning_rate": 5.265749091659589e-07, "loss": 21.6426, "step": 433670 }, { "epoch": 0.8760610382317174, "grad_norm": 361.6789855957031, "learning_rate": 5.264189932007119e-07, "loss": 13.6678, "step": 433680 }, { "epoch": 0.8760812388644013, "grad_norm": 427.6011657714844, "learning_rate": 5.262630990391549e-07, "loss": 19.2339, "step": 433690 }, { "epoch": 0.8761014394970851, "grad_norm": 503.2677307128906, "learning_rate": 5.2610722668205e-07, "loss": 19.9637, "step": 433700 }, { "epoch": 0.8761216401297689, "grad_norm": 466.8319396972656, "learning_rate": 5.259513761301549e-07, "loss": 18.7197, "step": 433710 }, { "epoch": 0.8761418407624527, "grad_norm": 389.1748046875, "learning_rate": 5.257955473842314e-07, "loss": 13.7403, "step": 433720 }, { "epoch": 0.8761620413951365, "grad_norm": 258.1947937011719, "learning_rate": 5.25639740445037e-07, "loss": 17.3214, "step": 433730 }, { "epoch": 0.8761822420278204, "grad_norm": 548.7359619140625, "learning_rate": 5.254839553133312e-07, "loss": 11.7706, "step": 433740 }, { "epoch": 0.8762024426605042, "grad_norm": 1.1916102170944214, "learning_rate": 5.253281919898751e-07, "loss": 31.5134, "step": 433750 }, { "epoch": 0.876222643293188, "grad_norm": 550.4560546875, "learning_rate": 5.251724504754258e-07, "loss": 10.0177, "step": 433760 }, { "epoch": 0.8762428439258717, "grad_norm": 305.590576171875, "learning_rate": 5.250167307707437e-07, "loss": 17.4997, "step": 433770 }, { "epoch": 0.8762630445585555, "grad_norm": 482.72845458984375, "learning_rate": 5.24861032876588e-07, "loss": 22.2059, "step": 433780 }, { "epoch": 0.8762832451912393, "grad_norm": 302.8846435546875, "learning_rate": 5.247053567937155e-07, "loss": 21.9938, "step": 433790 }, { "epoch": 0.8763034458239232, "grad_norm": 25.524728775024414, "learning_rate": 5.245497025228874e-07, "loss": 32.624, "step": 433800 }, { "epoch": 0.876323646456607, "grad_norm": 563.2052612304688, "learning_rate": 5.243940700648609e-07, "loss": 14.6061, "step": 433810 }, { "epoch": 0.8763438470892908, "grad_norm": 260.39739990234375, "learning_rate": 5.242384594203942e-07, "loss": 29.1842, "step": 433820 }, { "epoch": 0.8763640477219746, "grad_norm": 326.4090270996094, "learning_rate": 5.240828705902462e-07, "loss": 6.721, "step": 433830 }, { "epoch": 0.8763842483546584, "grad_norm": 285.9054870605469, "learning_rate": 5.239273035751763e-07, "loss": 28.3131, "step": 433840 }, { "epoch": 0.8764044489873423, "grad_norm": 558.6888427734375, "learning_rate": 5.237717583759421e-07, "loss": 18.2302, "step": 433850 }, { "epoch": 0.8764246496200261, "grad_norm": 381.3103332519531, "learning_rate": 5.236162349933005e-07, "loss": 16.1051, "step": 433860 }, { "epoch": 0.8764448502527099, "grad_norm": 773.1160278320312, "learning_rate": 5.234607334280117e-07, "loss": 13.0957, "step": 433870 }, { "epoch": 0.8764650508853937, "grad_norm": 564.5609130859375, "learning_rate": 5.23305253680832e-07, "loss": 19.1361, "step": 433880 }, { "epoch": 0.8764852515180775, "grad_norm": 561.9713745117188, "learning_rate": 5.231497957525184e-07, "loss": 17.2799, "step": 433890 }, { "epoch": 0.8765054521507614, "grad_norm": 131.5213165283203, "learning_rate": 5.229943596438297e-07, "loss": 18.6086, "step": 433900 }, { "epoch": 0.8765256527834452, "grad_norm": 312.5356140136719, "learning_rate": 5.22838945355525e-07, "loss": 15.7515, "step": 433910 }, { "epoch": 0.876545853416129, "grad_norm": 225.6712646484375, "learning_rate": 5.2268355288836e-07, "loss": 7.8059, "step": 433920 }, { "epoch": 0.8765660540488128, "grad_norm": 158.82826232910156, "learning_rate": 5.225281822430911e-07, "loss": 17.0044, "step": 433930 }, { "epoch": 0.8765862546814966, "grad_norm": 908.42578125, "learning_rate": 5.22372833420478e-07, "loss": 33.0427, "step": 433940 }, { "epoch": 0.8766064553141805, "grad_norm": 103.19387817382812, "learning_rate": 5.222175064212764e-07, "loss": 9.7731, "step": 433950 }, { "epoch": 0.8766266559468643, "grad_norm": 747.3312377929688, "learning_rate": 5.220622012462429e-07, "loss": 25.4204, "step": 433960 }, { "epoch": 0.8766468565795481, "grad_norm": 188.887939453125, "learning_rate": 5.219069178961361e-07, "loss": 17.6252, "step": 433970 }, { "epoch": 0.8766670572122319, "grad_norm": 572.9087524414062, "learning_rate": 5.217516563717107e-07, "loss": 26.657, "step": 433980 }, { "epoch": 0.8766872578449157, "grad_norm": 1910.318603515625, "learning_rate": 5.215964166737258e-07, "loss": 22.2355, "step": 433990 }, { "epoch": 0.8767074584775996, "grad_norm": 343.5889587402344, "learning_rate": 5.214411988029355e-07, "loss": 14.9556, "step": 434000 }, { "epoch": 0.8767276591102834, "grad_norm": 452.5372619628906, "learning_rate": 5.212860027600986e-07, "loss": 11.9828, "step": 434010 }, { "epoch": 0.8767478597429671, "grad_norm": 744.7176513671875, "learning_rate": 5.21130828545971e-07, "loss": 9.0645, "step": 434020 }, { "epoch": 0.8767680603756509, "grad_norm": 317.92132568359375, "learning_rate": 5.209756761613072e-07, "loss": 21.652, "step": 434030 }, { "epoch": 0.8767882610083347, "grad_norm": 102.97876739501953, "learning_rate": 5.208205456068655e-07, "loss": 20.3339, "step": 434040 }, { "epoch": 0.8768084616410186, "grad_norm": 526.6455078125, "learning_rate": 5.206654368834002e-07, "loss": 29.8651, "step": 434050 }, { "epoch": 0.8768286622737024, "grad_norm": 283.0181579589844, "learning_rate": 5.205103499916697e-07, "loss": 10.6281, "step": 434060 }, { "epoch": 0.8768488629063862, "grad_norm": 1044.4290771484375, "learning_rate": 5.203552849324284e-07, "loss": 18.4944, "step": 434070 }, { "epoch": 0.87686906353907, "grad_norm": 282.4901428222656, "learning_rate": 5.202002417064306e-07, "loss": 28.6363, "step": 434080 }, { "epoch": 0.8768892641717538, "grad_norm": 400.8611755371094, "learning_rate": 5.200452203144352e-07, "loss": 22.2381, "step": 434090 }, { "epoch": 0.8769094648044377, "grad_norm": 384.8634033203125, "learning_rate": 5.198902207571955e-07, "loss": 12.7948, "step": 434100 }, { "epoch": 0.8769296654371215, "grad_norm": 218.68125915527344, "learning_rate": 5.197352430354669e-07, "loss": 19.107, "step": 434110 }, { "epoch": 0.8769498660698053, "grad_norm": 254.05931091308594, "learning_rate": 5.19580287150005e-07, "loss": 33.2748, "step": 434120 }, { "epoch": 0.8769700667024891, "grad_norm": 872.2964477539062, "learning_rate": 5.194253531015675e-07, "loss": 17.5088, "step": 434130 }, { "epoch": 0.8769902673351729, "grad_norm": 439.51593017578125, "learning_rate": 5.192704408909055e-07, "loss": 17.5281, "step": 434140 }, { "epoch": 0.8770104679678568, "grad_norm": 149.76513671875, "learning_rate": 5.191155505187756e-07, "loss": 17.1009, "step": 434150 }, { "epoch": 0.8770306686005406, "grad_norm": 145.25714111328125, "learning_rate": 5.189606819859344e-07, "loss": 11.5807, "step": 434160 }, { "epoch": 0.8770508692332244, "grad_norm": 513.3689575195312, "learning_rate": 5.188058352931352e-07, "loss": 18.9419, "step": 434170 }, { "epoch": 0.8770710698659082, "grad_norm": 817.205322265625, "learning_rate": 5.186510104411319e-07, "loss": 21.3864, "step": 434180 }, { "epoch": 0.877091270498592, "grad_norm": 311.3988952636719, "learning_rate": 5.184962074306798e-07, "loss": 16.6342, "step": 434190 }, { "epoch": 0.8771114711312759, "grad_norm": 298.127197265625, "learning_rate": 5.183414262625364e-07, "loss": 12.2656, "step": 434200 }, { "epoch": 0.8771316717639597, "grad_norm": 143.401611328125, "learning_rate": 5.181866669374507e-07, "loss": 12.541, "step": 434210 }, { "epoch": 0.8771518723966435, "grad_norm": 945.0745239257812, "learning_rate": 5.180319294561797e-07, "loss": 20.0791, "step": 434220 }, { "epoch": 0.8771720730293273, "grad_norm": 256.93524169921875, "learning_rate": 5.178772138194782e-07, "loss": 15.6674, "step": 434230 }, { "epoch": 0.8771922736620111, "grad_norm": 766.1445922851562, "learning_rate": 5.177225200281e-07, "loss": 19.7, "step": 434240 }, { "epoch": 0.877212474294695, "grad_norm": 280.7933044433594, "learning_rate": 5.175678480827972e-07, "loss": 15.9839, "step": 434250 }, { "epoch": 0.8772326749273788, "grad_norm": 542.4453735351562, "learning_rate": 5.174131979843266e-07, "loss": 21.3026, "step": 434260 }, { "epoch": 0.8772528755600626, "grad_norm": 401.15228271484375, "learning_rate": 5.172585697334398e-07, "loss": 22.7436, "step": 434270 }, { "epoch": 0.8772730761927463, "grad_norm": 469.0462341308594, "learning_rate": 5.171039633308905e-07, "loss": 14.0386, "step": 434280 }, { "epoch": 0.8772932768254301, "grad_norm": 233.45587158203125, "learning_rate": 5.169493787774338e-07, "loss": 13.6644, "step": 434290 }, { "epoch": 0.8773134774581139, "grad_norm": 294.5813293457031, "learning_rate": 5.167948160738206e-07, "loss": 10.5622, "step": 434300 }, { "epoch": 0.8773336780907978, "grad_norm": 11.028252601623535, "learning_rate": 5.166402752208071e-07, "loss": 8.3747, "step": 434310 }, { "epoch": 0.8773538787234816, "grad_norm": 437.0497741699219, "learning_rate": 5.164857562191439e-07, "loss": 24.6234, "step": 434320 }, { "epoch": 0.8773740793561654, "grad_norm": 663.6287231445312, "learning_rate": 5.163312590695869e-07, "loss": 21.4791, "step": 434330 }, { "epoch": 0.8773942799888492, "grad_norm": 246.58670043945312, "learning_rate": 5.161767837728871e-07, "loss": 21.7751, "step": 434340 }, { "epoch": 0.877414480621533, "grad_norm": 15.990674018859863, "learning_rate": 5.160223303297967e-07, "loss": 13.354, "step": 434350 }, { "epoch": 0.8774346812542169, "grad_norm": 264.93389892578125, "learning_rate": 5.15867898741071e-07, "loss": 17.3039, "step": 434360 }, { "epoch": 0.8774548818869007, "grad_norm": 337.698974609375, "learning_rate": 5.1571348900746e-07, "loss": 17.1138, "step": 434370 }, { "epoch": 0.8774750825195845, "grad_norm": 445.1646423339844, "learning_rate": 5.155591011297184e-07, "loss": 33.2997, "step": 434380 }, { "epoch": 0.8774952831522683, "grad_norm": 197.03204345703125, "learning_rate": 5.154047351085983e-07, "loss": 12.7977, "step": 434390 }, { "epoch": 0.8775154837849521, "grad_norm": 262.495361328125, "learning_rate": 5.152503909448503e-07, "loss": 20.8684, "step": 434400 }, { "epoch": 0.877535684417636, "grad_norm": 787.7115478515625, "learning_rate": 5.150960686392293e-07, "loss": 22.9129, "step": 434410 }, { "epoch": 0.8775558850503198, "grad_norm": 477.249755859375, "learning_rate": 5.149417681924856e-07, "loss": 20.5048, "step": 434420 }, { "epoch": 0.8775760856830036, "grad_norm": 42.68398666381836, "learning_rate": 5.147874896053711e-07, "loss": 14.379, "step": 434430 }, { "epoch": 0.8775962863156874, "grad_norm": 145.18858337402344, "learning_rate": 5.146332328786386e-07, "loss": 21.0583, "step": 434440 }, { "epoch": 0.8776164869483712, "grad_norm": 353.35400390625, "learning_rate": 5.144789980130404e-07, "loss": 19.2491, "step": 434450 }, { "epoch": 0.8776366875810551, "grad_norm": 766.2012939453125, "learning_rate": 5.143247850093274e-07, "loss": 22.5219, "step": 434460 }, { "epoch": 0.8776568882137389, "grad_norm": 329.0042419433594, "learning_rate": 5.141705938682506e-07, "loss": 20.0049, "step": 434470 }, { "epoch": 0.8776770888464227, "grad_norm": 256.41632080078125, "learning_rate": 5.140164245905633e-07, "loss": 13.8661, "step": 434480 }, { "epoch": 0.8776972894791065, "grad_norm": 472.8299255371094, "learning_rate": 5.138622771770157e-07, "loss": 19.9039, "step": 434490 }, { "epoch": 0.8777174901117903, "grad_norm": 310.2403564453125, "learning_rate": 5.137081516283582e-07, "loss": 14.9185, "step": 434500 }, { "epoch": 0.8777376907444742, "grad_norm": 534.4397583007812, "learning_rate": 5.135540479453432e-07, "loss": 17.3908, "step": 434510 }, { "epoch": 0.877757891377158, "grad_norm": 316.0336608886719, "learning_rate": 5.133999661287226e-07, "loss": 26.7963, "step": 434520 }, { "epoch": 0.8777780920098417, "grad_norm": 247.14096069335938, "learning_rate": 5.13245906179246e-07, "loss": 28.1687, "step": 434530 }, { "epoch": 0.8777982926425255, "grad_norm": 113.46258544921875, "learning_rate": 5.130918680976643e-07, "loss": 13.7509, "step": 434540 }, { "epoch": 0.8778184932752093, "grad_norm": 235.147705078125, "learning_rate": 5.129378518847295e-07, "loss": 23.4125, "step": 434550 }, { "epoch": 0.8778386939078932, "grad_norm": 383.57684326171875, "learning_rate": 5.127838575411908e-07, "loss": 14.9999, "step": 434560 }, { "epoch": 0.877858894540577, "grad_norm": 352.58856201171875, "learning_rate": 5.126298850677991e-07, "loss": 13.527, "step": 434570 }, { "epoch": 0.8778790951732608, "grad_norm": 321.5787353515625, "learning_rate": 5.124759344653057e-07, "loss": 8.4193, "step": 434580 }, { "epoch": 0.8778992958059446, "grad_norm": 500.7260437011719, "learning_rate": 5.123220057344597e-07, "loss": 22.2562, "step": 434590 }, { "epoch": 0.8779194964386284, "grad_norm": 445.0096130371094, "learning_rate": 5.121680988760125e-07, "loss": 19.0204, "step": 434600 }, { "epoch": 0.8779396970713123, "grad_norm": 222.2227325439453, "learning_rate": 5.120142138907131e-07, "loss": 37.4008, "step": 434610 }, { "epoch": 0.8779598977039961, "grad_norm": 418.1203918457031, "learning_rate": 5.11860350779313e-07, "loss": 19.7539, "step": 434620 }, { "epoch": 0.8779800983366799, "grad_norm": 516.3267822265625, "learning_rate": 5.11706509542561e-07, "loss": 14.7627, "step": 434630 }, { "epoch": 0.8780002989693637, "grad_norm": 933.3505859375, "learning_rate": 5.115526901812062e-07, "loss": 25.2065, "step": 434640 }, { "epoch": 0.8780204996020475, "grad_norm": 410.081787109375, "learning_rate": 5.113988926960001e-07, "loss": 15.1906, "step": 434650 }, { "epoch": 0.8780407002347314, "grad_norm": 320.27349853515625, "learning_rate": 5.112451170876903e-07, "loss": 16.609, "step": 434660 }, { "epoch": 0.8780609008674152, "grad_norm": 259.0826110839844, "learning_rate": 5.110913633570286e-07, "loss": 31.0852, "step": 434670 }, { "epoch": 0.878081101500099, "grad_norm": 314.41046142578125, "learning_rate": 5.109376315047632e-07, "loss": 11.9574, "step": 434680 }, { "epoch": 0.8781013021327828, "grad_norm": 643.1636352539062, "learning_rate": 5.107839215316424e-07, "loss": 27.3094, "step": 434690 }, { "epoch": 0.8781215027654666, "grad_norm": 439.34320068359375, "learning_rate": 5.106302334384172e-07, "loss": 16.708, "step": 434700 }, { "epoch": 0.8781417033981505, "grad_norm": 979.7433471679688, "learning_rate": 5.104765672258355e-07, "loss": 25.1756, "step": 434710 }, { "epoch": 0.8781619040308343, "grad_norm": 422.1972961425781, "learning_rate": 5.103229228946455e-07, "loss": 11.8049, "step": 434720 }, { "epoch": 0.8781821046635181, "grad_norm": 145.0203857421875, "learning_rate": 5.101693004455977e-07, "loss": 10.6392, "step": 434730 }, { "epoch": 0.8782023052962019, "grad_norm": 377.1507568359375, "learning_rate": 5.100156998794415e-07, "loss": 18.1032, "step": 434740 }, { "epoch": 0.8782225059288857, "grad_norm": 272.1239929199219, "learning_rate": 5.098621211969224e-07, "loss": 21.4669, "step": 434750 }, { "epoch": 0.8782427065615696, "grad_norm": 46.512596130371094, "learning_rate": 5.09708564398791e-07, "loss": 13.3603, "step": 434760 }, { "epoch": 0.8782629071942534, "grad_norm": 281.3079528808594, "learning_rate": 5.095550294857959e-07, "loss": 14.2644, "step": 434770 }, { "epoch": 0.8782831078269372, "grad_norm": 374.24896240234375, "learning_rate": 5.094015164586852e-07, "loss": 14.84, "step": 434780 }, { "epoch": 0.8783033084596209, "grad_norm": 364.0821533203125, "learning_rate": 5.092480253182058e-07, "loss": 23.2679, "step": 434790 }, { "epoch": 0.8783235090923047, "grad_norm": 162.59280395507812, "learning_rate": 5.090945560651073e-07, "loss": 15.5259, "step": 434800 }, { "epoch": 0.8783437097249885, "grad_norm": 276.9872741699219, "learning_rate": 5.08941108700139e-07, "loss": 8.1555, "step": 434810 }, { "epoch": 0.8783639103576724, "grad_norm": 326.7541198730469, "learning_rate": 5.087876832240446e-07, "loss": 27.8575, "step": 434820 }, { "epoch": 0.8783841109903562, "grad_norm": 33.11380386352539, "learning_rate": 5.086342796375749e-07, "loss": 7.0672, "step": 434830 }, { "epoch": 0.87840431162304, "grad_norm": 153.7439727783203, "learning_rate": 5.084808979414779e-07, "loss": 14.9328, "step": 434840 }, { "epoch": 0.8784245122557238, "grad_norm": 171.47793579101562, "learning_rate": 5.083275381364999e-07, "loss": 26.4409, "step": 434850 }, { "epoch": 0.8784447128884076, "grad_norm": 350.4378662109375, "learning_rate": 5.081742002233881e-07, "loss": 16.2276, "step": 434860 }, { "epoch": 0.8784649135210915, "grad_norm": 450.28155517578125, "learning_rate": 5.080208842028911e-07, "loss": 25.9278, "step": 434870 }, { "epoch": 0.8784851141537753, "grad_norm": 448.4052734375, "learning_rate": 5.078675900757557e-07, "loss": 20.3733, "step": 434880 }, { "epoch": 0.8785053147864591, "grad_norm": 350.4577941894531, "learning_rate": 5.07714317842728e-07, "loss": 12.8354, "step": 434890 }, { "epoch": 0.8785255154191429, "grad_norm": 610.2189331054688, "learning_rate": 5.075610675045567e-07, "loss": 17.948, "step": 434900 }, { "epoch": 0.8785457160518267, "grad_norm": 399.9798889160156, "learning_rate": 5.074078390619869e-07, "loss": 21.844, "step": 434910 }, { "epoch": 0.8785659166845106, "grad_norm": 383.8321838378906, "learning_rate": 5.072546325157673e-07, "loss": 13.7429, "step": 434920 }, { "epoch": 0.8785861173171944, "grad_norm": 216.4840850830078, "learning_rate": 5.071014478666425e-07, "loss": 14.4042, "step": 434930 }, { "epoch": 0.8786063179498782, "grad_norm": 457.54327392578125, "learning_rate": 5.069482851153618e-07, "loss": 18.8779, "step": 434940 }, { "epoch": 0.878626518582562, "grad_norm": 32.08904266357422, "learning_rate": 5.0679514426267e-07, "loss": 15.2914, "step": 434950 }, { "epoch": 0.8786467192152458, "grad_norm": 35.61134719848633, "learning_rate": 5.06642025309313e-07, "loss": 11.8779, "step": 434960 }, { "epoch": 0.8786669198479297, "grad_norm": 723.7139282226562, "learning_rate": 5.064889282560382e-07, "loss": 20.9908, "step": 434970 }, { "epoch": 0.8786871204806135, "grad_norm": 102.7997817993164, "learning_rate": 5.063358531035906e-07, "loss": 13.4488, "step": 434980 }, { "epoch": 0.8787073211132973, "grad_norm": 352.2797546386719, "learning_rate": 5.06182799852718e-07, "loss": 13.2957, "step": 434990 }, { "epoch": 0.8787275217459811, "grad_norm": 886.9483032226562, "learning_rate": 5.06029768504166e-07, "loss": 22.359, "step": 435000 }, { "epoch": 0.8787477223786649, "grad_norm": 218.36572265625, "learning_rate": 5.058767590586783e-07, "loss": 19.1126, "step": 435010 }, { "epoch": 0.8787679230113488, "grad_norm": 287.1523742675781, "learning_rate": 5.057237715170032e-07, "loss": 14.2723, "step": 435020 }, { "epoch": 0.8787881236440326, "grad_norm": 420.1017761230469, "learning_rate": 5.055708058798853e-07, "loss": 27.4834, "step": 435030 }, { "epoch": 0.8788083242767164, "grad_norm": 525.8872680664062, "learning_rate": 5.054178621480694e-07, "loss": 16.9919, "step": 435040 }, { "epoch": 0.8788285249094001, "grad_norm": 232.071533203125, "learning_rate": 5.052649403223015e-07, "loss": 18.4302, "step": 435050 }, { "epoch": 0.8788487255420839, "grad_norm": 342.8013610839844, "learning_rate": 5.051120404033283e-07, "loss": 18.3008, "step": 435060 }, { "epoch": 0.8788689261747678, "grad_norm": 6.585102081298828, "learning_rate": 5.049591623918937e-07, "loss": 26.1853, "step": 435070 }, { "epoch": 0.8788891268074516, "grad_norm": 368.1114501953125, "learning_rate": 5.04806306288742e-07, "loss": 25.3293, "step": 435080 }, { "epoch": 0.8789093274401354, "grad_norm": 515.1712036132812, "learning_rate": 5.046534720946206e-07, "loss": 16.9136, "step": 435090 }, { "epoch": 0.8789295280728192, "grad_norm": 16.89857292175293, "learning_rate": 5.045006598102725e-07, "loss": 25.171, "step": 435100 }, { "epoch": 0.878949728705503, "grad_norm": 346.8264465332031, "learning_rate": 5.043478694364423e-07, "loss": 16.4898, "step": 435110 }, { "epoch": 0.8789699293381869, "grad_norm": 332.0619201660156, "learning_rate": 5.04195100973875e-07, "loss": 17.1752, "step": 435120 }, { "epoch": 0.8789901299708707, "grad_norm": 107.52465057373047, "learning_rate": 5.040423544233164e-07, "loss": 17.1065, "step": 435130 }, { "epoch": 0.8790103306035545, "grad_norm": 433.8957824707031, "learning_rate": 5.0388962978551e-07, "loss": 15.4823, "step": 435140 }, { "epoch": 0.8790305312362383, "grad_norm": 242.65370178222656, "learning_rate": 5.037369270611997e-07, "loss": 14.8826, "step": 435150 }, { "epoch": 0.8790507318689221, "grad_norm": 471.4053649902344, "learning_rate": 5.035842462511309e-07, "loss": 33.3922, "step": 435160 }, { "epoch": 0.879070932501606, "grad_norm": 67.55804443359375, "learning_rate": 5.034315873560475e-07, "loss": 15.3738, "step": 435170 }, { "epoch": 0.8790911331342898, "grad_norm": 534.8261108398438, "learning_rate": 5.032789503766922e-07, "loss": 19.2829, "step": 435180 }, { "epoch": 0.8791113337669736, "grad_norm": 511.7774353027344, "learning_rate": 5.031263353138105e-07, "loss": 27.3329, "step": 435190 }, { "epoch": 0.8791315343996574, "grad_norm": 279.4249267578125, "learning_rate": 5.029737421681446e-07, "loss": 28.538, "step": 435200 }, { "epoch": 0.8791517350323412, "grad_norm": 345.3414306640625, "learning_rate": 5.028211709404407e-07, "loss": 24.7293, "step": 435210 }, { "epoch": 0.879171935665025, "grad_norm": 673.9608764648438, "learning_rate": 5.026686216314397e-07, "loss": 16.0036, "step": 435220 }, { "epoch": 0.8791921362977089, "grad_norm": 195.92652893066406, "learning_rate": 5.025160942418872e-07, "loss": 16.4735, "step": 435230 }, { "epoch": 0.8792123369303927, "grad_norm": 606.4151611328125, "learning_rate": 5.023635887725259e-07, "loss": 17.4006, "step": 435240 }, { "epoch": 0.8792325375630765, "grad_norm": 447.26043701171875, "learning_rate": 5.022111052240985e-07, "loss": 21.1823, "step": 435250 }, { "epoch": 0.8792527381957603, "grad_norm": 122.67220306396484, "learning_rate": 5.020586435973491e-07, "loss": 25.98, "step": 435260 }, { "epoch": 0.8792729388284442, "grad_norm": 458.9827575683594, "learning_rate": 5.019062038930195e-07, "loss": 21.6993, "step": 435270 }, { "epoch": 0.879293139461128, "grad_norm": 315.67913818359375, "learning_rate": 5.017537861118543e-07, "loss": 27.674, "step": 435280 }, { "epoch": 0.8793133400938118, "grad_norm": 158.3207244873047, "learning_rate": 5.016013902545957e-07, "loss": 13.9432, "step": 435290 }, { "epoch": 0.8793335407264955, "grad_norm": 261.7013244628906, "learning_rate": 5.014490163219854e-07, "loss": 16.5056, "step": 435300 }, { "epoch": 0.8793537413591793, "grad_norm": 59.75825500488281, "learning_rate": 5.012966643147682e-07, "loss": 10.7603, "step": 435310 }, { "epoch": 0.8793739419918631, "grad_norm": 483.52099609375, "learning_rate": 5.011443342336852e-07, "loss": 8.7847, "step": 435320 }, { "epoch": 0.879394142624547, "grad_norm": 192.5576934814453, "learning_rate": 5.009920260794782e-07, "loss": 9.5417, "step": 435330 }, { "epoch": 0.8794143432572308, "grad_norm": 318.4565124511719, "learning_rate": 5.008397398528903e-07, "loss": 13.3563, "step": 435340 }, { "epoch": 0.8794345438899146, "grad_norm": 380.82891845703125, "learning_rate": 5.006874755546654e-07, "loss": 17.2505, "step": 435350 }, { "epoch": 0.8794547445225984, "grad_norm": 852.404296875, "learning_rate": 5.005352331855423e-07, "loss": 22.5348, "step": 435360 }, { "epoch": 0.8794749451552822, "grad_norm": 502.7873229980469, "learning_rate": 5.00383012746265e-07, "loss": 34.0941, "step": 435370 }, { "epoch": 0.8794951457879661, "grad_norm": 190.71434020996094, "learning_rate": 5.002308142375762e-07, "loss": 13.873, "step": 435380 }, { "epoch": 0.8795153464206499, "grad_norm": 77.80549621582031, "learning_rate": 5.000786376602162e-07, "loss": 12.7138, "step": 435390 }, { "epoch": 0.8795355470533337, "grad_norm": 485.7001037597656, "learning_rate": 4.99926483014927e-07, "loss": 13.9821, "step": 435400 }, { "epoch": 0.8795557476860175, "grad_norm": 265.3295593261719, "learning_rate": 4.997743503024494e-07, "loss": 19.1495, "step": 435410 }, { "epoch": 0.8795759483187013, "grad_norm": 141.0780792236328, "learning_rate": 4.996222395235283e-07, "loss": 22.0904, "step": 435420 }, { "epoch": 0.8795961489513852, "grad_norm": 273.9278869628906, "learning_rate": 4.994701506789007e-07, "loss": 17.5997, "step": 435430 }, { "epoch": 0.879616349584069, "grad_norm": 353.7311706542969, "learning_rate": 4.99318083769309e-07, "loss": 13.0439, "step": 435440 }, { "epoch": 0.8796365502167528, "grad_norm": 721.9375610351562, "learning_rate": 4.991660387954967e-07, "loss": 22.3529, "step": 435450 }, { "epoch": 0.8796567508494366, "grad_norm": 301.629638671875, "learning_rate": 4.990140157582036e-07, "loss": 14.4415, "step": 435460 }, { "epoch": 0.8796769514821204, "grad_norm": 149.6297607421875, "learning_rate": 4.988620146581685e-07, "loss": 16.6529, "step": 435470 }, { "epoch": 0.8796971521148043, "grad_norm": 219.41262817382812, "learning_rate": 4.987100354961355e-07, "loss": 14.8243, "step": 435480 }, { "epoch": 0.8797173527474881, "grad_norm": 404.56951904296875, "learning_rate": 4.985580782728433e-07, "loss": 18.9657, "step": 435490 }, { "epoch": 0.8797375533801719, "grad_norm": 428.6075744628906, "learning_rate": 4.984061429890324e-07, "loss": 13.3605, "step": 435500 }, { "epoch": 0.8797577540128557, "grad_norm": 303.6961975097656, "learning_rate": 4.98254229645444e-07, "loss": 23.9782, "step": 435510 }, { "epoch": 0.8797779546455395, "grad_norm": 307.2866516113281, "learning_rate": 4.981023382428196e-07, "loss": 22.7079, "step": 435520 }, { "epoch": 0.8797981552782234, "grad_norm": 423.6874084472656, "learning_rate": 4.979504687818987e-07, "loss": 21.0905, "step": 435530 }, { "epoch": 0.8798183559109072, "grad_norm": 481.9783020019531, "learning_rate": 4.977986212634195e-07, "loss": 21.2299, "step": 435540 }, { "epoch": 0.879838556543591, "grad_norm": 610.5613403320312, "learning_rate": 4.976467956881254e-07, "loss": 20.8345, "step": 435550 }, { "epoch": 0.8798587571762747, "grad_norm": 238.90846252441406, "learning_rate": 4.97494992056754e-07, "loss": 18.398, "step": 435560 }, { "epoch": 0.8798789578089585, "grad_norm": 228.98049926757812, "learning_rate": 4.973432103700454e-07, "loss": 25.6139, "step": 435570 }, { "epoch": 0.8798991584416423, "grad_norm": 269.6488037109375, "learning_rate": 4.971914506287407e-07, "loss": 21.9952, "step": 435580 }, { "epoch": 0.8799193590743262, "grad_norm": 191.76803588867188, "learning_rate": 4.97039712833578e-07, "loss": 30.8179, "step": 435590 }, { "epoch": 0.87993955970701, "grad_norm": 510.7918701171875, "learning_rate": 4.968879969852985e-07, "loss": 21.6079, "step": 435600 }, { "epoch": 0.8799597603396938, "grad_norm": 22.066001892089844, "learning_rate": 4.967363030846406e-07, "loss": 22.5894, "step": 435610 }, { "epoch": 0.8799799609723776, "grad_norm": 355.6514892578125, "learning_rate": 4.965846311323431e-07, "loss": 24.1495, "step": 435620 }, { "epoch": 0.8800001616050614, "grad_norm": 226.7723388671875, "learning_rate": 4.964329811291463e-07, "loss": 11.3769, "step": 435630 }, { "epoch": 0.8800203622377453, "grad_norm": 76.03665161132812, "learning_rate": 4.962813530757893e-07, "loss": 14.7804, "step": 435640 }, { "epoch": 0.8800405628704291, "grad_norm": 199.2616729736328, "learning_rate": 4.961297469730097e-07, "loss": 13.2704, "step": 435650 }, { "epoch": 0.8800607635031129, "grad_norm": 465.0576171875, "learning_rate": 4.959781628215476e-07, "loss": 19.5031, "step": 435660 }, { "epoch": 0.8800809641357967, "grad_norm": 108.87104797363281, "learning_rate": 4.95826600622143e-07, "loss": 15.921, "step": 435670 }, { "epoch": 0.8801011647684805, "grad_norm": 103.40303802490234, "learning_rate": 4.956750603755328e-07, "loss": 13.7844, "step": 435680 }, { "epoch": 0.8801213654011644, "grad_norm": 320.8612365722656, "learning_rate": 4.95523542082455e-07, "loss": 15.81, "step": 435690 }, { "epoch": 0.8801415660338482, "grad_norm": 696.59326171875, "learning_rate": 4.9537204574365e-07, "loss": 24.1962, "step": 435700 }, { "epoch": 0.880161766666532, "grad_norm": 989.5188598632812, "learning_rate": 4.952205713598557e-07, "loss": 21.6382, "step": 435710 }, { "epoch": 0.8801819672992158, "grad_norm": 198.38470458984375, "learning_rate": 4.950691189318086e-07, "loss": 12.5203, "step": 435720 }, { "epoch": 0.8802021679318996, "grad_norm": 232.7144317626953, "learning_rate": 4.949176884602486e-07, "loss": 40.752, "step": 435730 }, { "epoch": 0.8802223685645835, "grad_norm": 314.38751220703125, "learning_rate": 4.947662799459152e-07, "loss": 19.3201, "step": 435740 }, { "epoch": 0.8802425691972673, "grad_norm": 347.6485595703125, "learning_rate": 4.946148933895423e-07, "loss": 22.7622, "step": 435750 }, { "epoch": 0.8802627698299511, "grad_norm": 87.8727035522461, "learning_rate": 4.944635287918703e-07, "loss": 10.1187, "step": 435760 }, { "epoch": 0.8802829704626349, "grad_norm": 192.29734802246094, "learning_rate": 4.943121861536376e-07, "loss": 20.3241, "step": 435770 }, { "epoch": 0.8803031710953187, "grad_norm": 254.23399353027344, "learning_rate": 4.941608654755808e-07, "loss": 10.8488, "step": 435780 }, { "epoch": 0.8803233717280026, "grad_norm": 515.1815185546875, "learning_rate": 4.940095667584366e-07, "loss": 21.5697, "step": 435790 }, { "epoch": 0.8803435723606864, "grad_norm": 158.14749145507812, "learning_rate": 4.938582900029437e-07, "loss": 21.8464, "step": 435800 }, { "epoch": 0.8803637729933701, "grad_norm": 957.723876953125, "learning_rate": 4.937070352098384e-07, "loss": 24.3145, "step": 435810 }, { "epoch": 0.8803839736260539, "grad_norm": 341.580322265625, "learning_rate": 4.935558023798592e-07, "loss": 18.2231, "step": 435820 }, { "epoch": 0.8804041742587377, "grad_norm": 276.53717041015625, "learning_rate": 4.934045915137419e-07, "loss": 9.082, "step": 435830 }, { "epoch": 0.8804243748914216, "grad_norm": 673.6719970703125, "learning_rate": 4.932534026122249e-07, "loss": 12.0788, "step": 435840 }, { "epoch": 0.8804445755241054, "grad_norm": 303.8342590332031, "learning_rate": 4.931022356760439e-07, "loss": 22.0684, "step": 435850 }, { "epoch": 0.8804647761567892, "grad_norm": 380.82440185546875, "learning_rate": 4.929510907059354e-07, "loss": 20.524, "step": 435860 }, { "epoch": 0.880484976789473, "grad_norm": 22.417442321777344, "learning_rate": 4.927999677026374e-07, "loss": 14.9668, "step": 435870 }, { "epoch": 0.8805051774221568, "grad_norm": 0.30903351306915283, "learning_rate": 4.926488666668844e-07, "loss": 12.1088, "step": 435880 }, { "epoch": 0.8805253780548407, "grad_norm": 529.1118774414062, "learning_rate": 4.924977875994159e-07, "loss": 19.5259, "step": 435890 }, { "epoch": 0.8805455786875245, "grad_norm": 357.0335388183594, "learning_rate": 4.92346730500966e-07, "loss": 9.633, "step": 435900 }, { "epoch": 0.8805657793202083, "grad_norm": 185.8363037109375, "learning_rate": 4.921956953722701e-07, "loss": 9.942, "step": 435910 }, { "epoch": 0.8805859799528921, "grad_norm": 256.4708557128906, "learning_rate": 4.920446822140673e-07, "loss": 10.8701, "step": 435920 }, { "epoch": 0.8806061805855759, "grad_norm": 474.8191833496094, "learning_rate": 4.918936910270916e-07, "loss": 12.3728, "step": 435930 }, { "epoch": 0.8806263812182598, "grad_norm": 217.32952880859375, "learning_rate": 4.917427218120785e-07, "loss": 21.8445, "step": 435940 }, { "epoch": 0.8806465818509436, "grad_norm": 248.072265625, "learning_rate": 4.915917745697645e-07, "loss": 20.431, "step": 435950 }, { "epoch": 0.8806667824836274, "grad_norm": 288.0353698730469, "learning_rate": 4.914408493008871e-07, "loss": 25.1107, "step": 435960 }, { "epoch": 0.8806869831163112, "grad_norm": 106.0359115600586, "learning_rate": 4.912899460061787e-07, "loss": 12.067, "step": 435970 }, { "epoch": 0.880707183748995, "grad_norm": 354.09881591796875, "learning_rate": 4.911390646863757e-07, "loss": 17.9086, "step": 435980 }, { "epoch": 0.8807273843816789, "grad_norm": 540.8861694335938, "learning_rate": 4.909882053422154e-07, "loss": 18.2216, "step": 435990 }, { "epoch": 0.8807475850143627, "grad_norm": 184.07806396484375, "learning_rate": 4.908373679744316e-07, "loss": 17.5554, "step": 436000 }, { "epoch": 0.8807677856470465, "grad_norm": 177.03599548339844, "learning_rate": 4.90686552583759e-07, "loss": 18.7642, "step": 436010 }, { "epoch": 0.8807879862797303, "grad_norm": 166.44651794433594, "learning_rate": 4.905357591709325e-07, "loss": 43.1835, "step": 436020 }, { "epoch": 0.8808081869124141, "grad_norm": 321.760986328125, "learning_rate": 4.9038498773669e-07, "loss": 16.6634, "step": 436030 }, { "epoch": 0.880828387545098, "grad_norm": 341.0688171386719, "learning_rate": 4.902342382817626e-07, "loss": 32.7243, "step": 436040 }, { "epoch": 0.8808485881777818, "grad_norm": 302.72125244140625, "learning_rate": 4.900835108068863e-07, "loss": 16.4093, "step": 436050 }, { "epoch": 0.8808687888104656, "grad_norm": 336.9021301269531, "learning_rate": 4.899328053127966e-07, "loss": 21.199, "step": 436060 }, { "epoch": 0.8808889894431493, "grad_norm": 434.4256896972656, "learning_rate": 4.89782121800228e-07, "loss": 14.7242, "step": 436070 }, { "epoch": 0.8809091900758331, "grad_norm": 167.6675567626953, "learning_rate": 4.896314602699126e-07, "loss": 15.5623, "step": 436080 }, { "epoch": 0.880929390708517, "grad_norm": 460.88494873046875, "learning_rate": 4.894808207225882e-07, "loss": 21.3126, "step": 436090 }, { "epoch": 0.8809495913412008, "grad_norm": 345.532958984375, "learning_rate": 4.893302031589864e-07, "loss": 14.3713, "step": 436100 }, { "epoch": 0.8809697919738846, "grad_norm": 349.746826171875, "learning_rate": 4.891796075798416e-07, "loss": 39.5377, "step": 436110 }, { "epoch": 0.8809899926065684, "grad_norm": 411.3088684082031, "learning_rate": 4.890290339858883e-07, "loss": 13.9997, "step": 436120 }, { "epoch": 0.8810101932392522, "grad_norm": 324.4735107421875, "learning_rate": 4.888784823778614e-07, "loss": 15.4449, "step": 436130 }, { "epoch": 0.881030393871936, "grad_norm": 259.8567199707031, "learning_rate": 4.887279527564936e-07, "loss": 9.0276, "step": 436140 }, { "epoch": 0.8810505945046199, "grad_norm": 444.4129943847656, "learning_rate": 4.885774451225178e-07, "loss": 15.4865, "step": 436150 }, { "epoch": 0.8810707951373037, "grad_norm": 343.4646911621094, "learning_rate": 4.884269594766689e-07, "loss": 18.7254, "step": 436160 }, { "epoch": 0.8810909957699875, "grad_norm": 232.9482421875, "learning_rate": 4.8827649581968e-07, "loss": 16.868, "step": 436170 }, { "epoch": 0.8811111964026713, "grad_norm": 549.541015625, "learning_rate": 4.881260541522831e-07, "loss": 28.0799, "step": 436180 }, { "epoch": 0.8811313970353551, "grad_norm": 612.6248168945312, "learning_rate": 4.87975634475214e-07, "loss": 20.9709, "step": 436190 }, { "epoch": 0.881151597668039, "grad_norm": 501.504638671875, "learning_rate": 4.878252367892033e-07, "loss": 16.7092, "step": 436200 }, { "epoch": 0.8811717983007228, "grad_norm": 297.49261474609375, "learning_rate": 4.87674861094986e-07, "loss": 19.5409, "step": 436210 }, { "epoch": 0.8811919989334066, "grad_norm": 224.5021209716797, "learning_rate": 4.875245073932944e-07, "loss": 10.7612, "step": 436220 }, { "epoch": 0.8812121995660904, "grad_norm": 361.38916015625, "learning_rate": 4.873741756848594e-07, "loss": 23.1716, "step": 436230 }, { "epoch": 0.8812324001987742, "grad_norm": 224.0435333251953, "learning_rate": 4.87223865970417e-07, "loss": 12.7234, "step": 436240 }, { "epoch": 0.8812526008314581, "grad_norm": 342.0216369628906, "learning_rate": 4.87073578250698e-07, "loss": 13.4458, "step": 436250 }, { "epoch": 0.8812728014641419, "grad_norm": 313.01409912109375, "learning_rate": 4.869233125264339e-07, "loss": 8.6048, "step": 436260 }, { "epoch": 0.8812930020968257, "grad_norm": 258.0538330078125, "learning_rate": 4.867730687983585e-07, "loss": 15.3131, "step": 436270 }, { "epoch": 0.8813132027295095, "grad_norm": 425.1286926269531, "learning_rate": 4.866228470672041e-07, "loss": 11.7544, "step": 436280 }, { "epoch": 0.8813334033621933, "grad_norm": 173.1059112548828, "learning_rate": 4.864726473337034e-07, "loss": 21.4789, "step": 436290 }, { "epoch": 0.8813536039948772, "grad_norm": 401.54052734375, "learning_rate": 4.863224695985858e-07, "loss": 23.8768, "step": 436300 }, { "epoch": 0.881373804627561, "grad_norm": 436.904052734375, "learning_rate": 4.861723138625862e-07, "loss": 18.8533, "step": 436310 }, { "epoch": 0.8813940052602448, "grad_norm": 455.9186706542969, "learning_rate": 4.860221801264358e-07, "loss": 17.9445, "step": 436320 }, { "epoch": 0.8814142058929285, "grad_norm": 319.8497314453125, "learning_rate": 4.858720683908646e-07, "loss": 21.2574, "step": 436330 }, { "epoch": 0.8814344065256123, "grad_norm": 468.0010986328125, "learning_rate": 4.857219786566053e-07, "loss": 24.4979, "step": 436340 }, { "epoch": 0.8814546071582962, "grad_norm": 550.1976928710938, "learning_rate": 4.855719109243917e-07, "loss": 23.3478, "step": 436350 }, { "epoch": 0.88147480779098, "grad_norm": 190.78244018554688, "learning_rate": 4.85421865194951e-07, "loss": 25.4633, "step": 436360 }, { "epoch": 0.8814950084236638, "grad_norm": 798.060302734375, "learning_rate": 4.852718414690166e-07, "loss": 11.6966, "step": 436370 }, { "epoch": 0.8815152090563476, "grad_norm": 268.206298828125, "learning_rate": 4.851218397473206e-07, "loss": 20.4789, "step": 436380 }, { "epoch": 0.8815354096890314, "grad_norm": 858.3386840820312, "learning_rate": 4.84971860030593e-07, "loss": 25.3801, "step": 436390 }, { "epoch": 0.8815556103217153, "grad_norm": 158.20469665527344, "learning_rate": 4.848219023195644e-07, "loss": 9.441, "step": 436400 }, { "epoch": 0.8815758109543991, "grad_norm": 214.66708374023438, "learning_rate": 4.846719666149668e-07, "loss": 14.5835, "step": 436410 }, { "epoch": 0.8815960115870829, "grad_norm": 338.07586669921875, "learning_rate": 4.845220529175304e-07, "loss": 16.8058, "step": 436420 }, { "epoch": 0.8816162122197667, "grad_norm": 421.33880615234375, "learning_rate": 4.84372161227985e-07, "loss": 13.6267, "step": 436430 }, { "epoch": 0.8816364128524505, "grad_norm": 0.18723739683628082, "learning_rate": 4.842222915470618e-07, "loss": 11.3648, "step": 436440 }, { "epoch": 0.8816566134851344, "grad_norm": 26.688398361206055, "learning_rate": 4.840724438754929e-07, "loss": 23.6005, "step": 436450 }, { "epoch": 0.8816768141178182, "grad_norm": 433.8655700683594, "learning_rate": 4.839226182140072e-07, "loss": 14.6859, "step": 436460 }, { "epoch": 0.881697014750502, "grad_norm": 489.1601867675781, "learning_rate": 4.837728145633335e-07, "loss": 19.8474, "step": 436470 }, { "epoch": 0.8817172153831858, "grad_norm": 393.7753601074219, "learning_rate": 4.836230329242042e-07, "loss": 20.2308, "step": 436480 }, { "epoch": 0.8817374160158696, "grad_norm": 130.21282958984375, "learning_rate": 4.83473273297348e-07, "loss": 17.8237, "step": 436490 }, { "epoch": 0.8817576166485535, "grad_norm": 400.9236755371094, "learning_rate": 4.833235356834959e-07, "loss": 15.2621, "step": 436500 }, { "epoch": 0.8817778172812373, "grad_norm": 395.96966552734375, "learning_rate": 4.831738200833775e-07, "loss": 17.6387, "step": 436510 }, { "epoch": 0.8817980179139211, "grad_norm": 605.7783813476562, "learning_rate": 4.830241264977209e-07, "loss": 19.7262, "step": 436520 }, { "epoch": 0.8818182185466049, "grad_norm": 163.8187713623047, "learning_rate": 4.828744549272579e-07, "loss": 21.9274, "step": 436530 }, { "epoch": 0.8818384191792887, "grad_norm": 571.732421875, "learning_rate": 4.827248053727168e-07, "loss": 18.2143, "step": 436540 }, { "epoch": 0.8818586198119726, "grad_norm": 313.611083984375, "learning_rate": 4.825751778348259e-07, "loss": 16.4337, "step": 436550 }, { "epoch": 0.8818788204446564, "grad_norm": 463.8468933105469, "learning_rate": 4.824255723143162e-07, "loss": 17.2454, "step": 436560 }, { "epoch": 0.8818990210773402, "grad_norm": 476.1033020019531, "learning_rate": 4.822759888119171e-07, "loss": 15.0617, "step": 436570 }, { "epoch": 0.8819192217100239, "grad_norm": 248.8014678955078, "learning_rate": 4.821264273283566e-07, "loss": 10.7048, "step": 436580 }, { "epoch": 0.8819394223427077, "grad_norm": 682.4763793945312, "learning_rate": 4.819768878643633e-07, "loss": 23.8681, "step": 436590 }, { "epoch": 0.8819596229753915, "grad_norm": 253.24656677246094, "learning_rate": 4.818273704206678e-07, "loss": 11.7182, "step": 436600 }, { "epoch": 0.8819798236080754, "grad_norm": 95.55328369140625, "learning_rate": 4.816778749979973e-07, "loss": 16.1974, "step": 436610 }, { "epoch": 0.8820000242407592, "grad_norm": 72.05003356933594, "learning_rate": 4.815284015970801e-07, "loss": 10.8611, "step": 436620 }, { "epoch": 0.882020224873443, "grad_norm": 675.322998046875, "learning_rate": 4.813789502186456e-07, "loss": 12.9505, "step": 436630 }, { "epoch": 0.8820404255061268, "grad_norm": 151.66297912597656, "learning_rate": 4.812295208634238e-07, "loss": 21.0599, "step": 436640 }, { "epoch": 0.8820606261388106, "grad_norm": 343.0258483886719, "learning_rate": 4.810801135321391e-07, "loss": 28.378, "step": 436650 }, { "epoch": 0.8820808267714945, "grad_norm": 415.1305236816406, "learning_rate": 4.809307282255221e-07, "loss": 14.8969, "step": 436660 }, { "epoch": 0.8821010274041783, "grad_norm": 659.5724487304688, "learning_rate": 4.807813649443016e-07, "loss": 24.2443, "step": 436670 }, { "epoch": 0.8821212280368621, "grad_norm": 838.9188232421875, "learning_rate": 4.806320236892048e-07, "loss": 24.2559, "step": 436680 }, { "epoch": 0.8821414286695459, "grad_norm": 328.0303039550781, "learning_rate": 4.804827044609578e-07, "loss": 18.7561, "step": 436690 }, { "epoch": 0.8821616293022297, "grad_norm": 458.027099609375, "learning_rate": 4.803334072602917e-07, "loss": 16.5193, "step": 436700 }, { "epoch": 0.8821818299349136, "grad_norm": 705.2664184570312, "learning_rate": 4.801841320879319e-07, "loss": 24.2991, "step": 436710 }, { "epoch": 0.8822020305675974, "grad_norm": 440.40521240234375, "learning_rate": 4.800348789446058e-07, "loss": 17.4962, "step": 436720 }, { "epoch": 0.8822222312002812, "grad_norm": 82.1038818359375, "learning_rate": 4.798856478310409e-07, "loss": 17.8321, "step": 436730 }, { "epoch": 0.882242431832965, "grad_norm": 691.0404052734375, "learning_rate": 4.797364387479664e-07, "loss": 28.2128, "step": 436740 }, { "epoch": 0.8822626324656488, "grad_norm": 657.4496459960938, "learning_rate": 4.795872516961087e-07, "loss": 15.4127, "step": 436750 }, { "epoch": 0.8822828330983327, "grad_norm": 584.3892822265625, "learning_rate": 4.794380866761928e-07, "loss": 13.4703, "step": 436760 }, { "epoch": 0.8823030337310165, "grad_norm": 325.6146545410156, "learning_rate": 4.792889436889487e-07, "loss": 7.1147, "step": 436770 }, { "epoch": 0.8823232343637003, "grad_norm": 118.1163101196289, "learning_rate": 4.791398227351024e-07, "loss": 14.4561, "step": 436780 }, { "epoch": 0.8823434349963841, "grad_norm": 243.56419372558594, "learning_rate": 4.789907238153785e-07, "loss": 21.6599, "step": 436790 }, { "epoch": 0.8823636356290679, "grad_norm": 760.7359619140625, "learning_rate": 4.788416469305068e-07, "loss": 25.8602, "step": 436800 }, { "epoch": 0.8823838362617518, "grad_norm": 113.09996032714844, "learning_rate": 4.786925920812119e-07, "loss": 12.4008, "step": 436810 }, { "epoch": 0.8824040368944356, "grad_norm": 326.0145263671875, "learning_rate": 4.78543559268222e-07, "loss": 22.3032, "step": 436820 }, { "epoch": 0.8824242375271194, "grad_norm": 238.98001098632812, "learning_rate": 4.78394548492262e-07, "loss": 20.334, "step": 436830 }, { "epoch": 0.8824444381598031, "grad_norm": 637.3267822265625, "learning_rate": 4.782455597540576e-07, "loss": 30.5467, "step": 436840 }, { "epoch": 0.8824646387924869, "grad_norm": 487.647216796875, "learning_rate": 4.780965930543369e-07, "loss": 12.0064, "step": 436850 }, { "epoch": 0.8824848394251708, "grad_norm": 367.8666687011719, "learning_rate": 4.779476483938251e-07, "loss": 13.7615, "step": 436860 }, { "epoch": 0.8825050400578546, "grad_norm": 679.5595092773438, "learning_rate": 4.777987257732469e-07, "loss": 20.4513, "step": 436870 }, { "epoch": 0.8825252406905384, "grad_norm": 102.08183288574219, "learning_rate": 4.776498251933292e-07, "loss": 21.0092, "step": 436880 }, { "epoch": 0.8825454413232222, "grad_norm": 578.4793701171875, "learning_rate": 4.775009466547986e-07, "loss": 20.5826, "step": 436890 }, { "epoch": 0.882565641955906, "grad_norm": 64.65514373779297, "learning_rate": 4.773520901583801e-07, "loss": 19.3511, "step": 436900 }, { "epoch": 0.8825858425885899, "grad_norm": 629.3908081054688, "learning_rate": 4.772032557047984e-07, "loss": 24.7698, "step": 436910 }, { "epoch": 0.8826060432212737, "grad_norm": 752.3409423828125, "learning_rate": 4.770544432947799e-07, "loss": 17.0448, "step": 436920 }, { "epoch": 0.8826262438539575, "grad_norm": 471.2159118652344, "learning_rate": 4.769056529290495e-07, "loss": 16.0433, "step": 436930 }, { "epoch": 0.8826464444866413, "grad_norm": 449.1194763183594, "learning_rate": 4.7675688460833145e-07, "loss": 24.9503, "step": 436940 }, { "epoch": 0.8826666451193251, "grad_norm": 366.4239807128906, "learning_rate": 4.766081383333521e-07, "loss": 29.5621, "step": 436950 }, { "epoch": 0.882686845752009, "grad_norm": 390.2115173339844, "learning_rate": 4.7645941410483733e-07, "loss": 16.0112, "step": 436960 }, { "epoch": 0.8827070463846928, "grad_norm": 189.2827606201172, "learning_rate": 4.7631071192350943e-07, "loss": 19.8507, "step": 436970 }, { "epoch": 0.8827272470173766, "grad_norm": 124.29317474365234, "learning_rate": 4.7616203179009445e-07, "loss": 20.3787, "step": 436980 }, { "epoch": 0.8827474476500604, "grad_norm": 133.61338806152344, "learning_rate": 4.760133737053174e-07, "loss": 7.5316, "step": 436990 }, { "epoch": 0.8827676482827442, "grad_norm": 119.6464614868164, "learning_rate": 4.758647376699033e-07, "loss": 16.7015, "step": 437000 }, { "epoch": 0.882787848915428, "grad_norm": 175.40997314453125, "learning_rate": 4.757161236845742e-07, "loss": 13.1048, "step": 437010 }, { "epoch": 0.8828080495481119, "grad_norm": 1037.2186279296875, "learning_rate": 4.755675317500569e-07, "loss": 30.6978, "step": 437020 }, { "epoch": 0.8828282501807957, "grad_norm": 235.47128295898438, "learning_rate": 4.7541896186707517e-07, "loss": 17.5572, "step": 437030 }, { "epoch": 0.8828484508134795, "grad_norm": 591.5972290039062, "learning_rate": 4.752704140363512e-07, "loss": 25.8775, "step": 437040 }, { "epoch": 0.8828686514461633, "grad_norm": 420.414794921875, "learning_rate": 4.751218882586106e-07, "loss": 30.0815, "step": 437050 }, { "epoch": 0.8828888520788472, "grad_norm": 216.95462036132812, "learning_rate": 4.749733845345783e-07, "loss": 17.1518, "step": 437060 }, { "epoch": 0.882909052711531, "grad_norm": 728.5482177734375, "learning_rate": 4.748249028649765e-07, "loss": 23.1488, "step": 437070 }, { "epoch": 0.8829292533442148, "grad_norm": 530.2343139648438, "learning_rate": 4.7467644325052855e-07, "loss": 16.5789, "step": 437080 }, { "epoch": 0.8829494539768985, "grad_norm": 219.0844268798828, "learning_rate": 4.7452800569195987e-07, "loss": 13.1149, "step": 437090 }, { "epoch": 0.8829696546095823, "grad_norm": 440.1877746582031, "learning_rate": 4.743795901899928e-07, "loss": 21.1264, "step": 437100 }, { "epoch": 0.8829898552422661, "grad_norm": 140.57644653320312, "learning_rate": 4.742311967453495e-07, "loss": 14.4177, "step": 437110 }, { "epoch": 0.88301005587495, "grad_norm": 552.50537109375, "learning_rate": 4.7408282535875593e-07, "loss": 29.2932, "step": 437120 }, { "epoch": 0.8830302565076338, "grad_norm": 141.6434326171875, "learning_rate": 4.739344760309322e-07, "loss": 14.1547, "step": 437130 }, { "epoch": 0.8830504571403176, "grad_norm": 53.88914108276367, "learning_rate": 4.737861487626039e-07, "loss": 11.355, "step": 437140 }, { "epoch": 0.8830706577730014, "grad_norm": 602.4818115234375, "learning_rate": 4.7363784355449303e-07, "loss": 21.5469, "step": 437150 }, { "epoch": 0.8830908584056852, "grad_norm": 606.8235473632812, "learning_rate": 4.734895604073214e-07, "loss": 14.414, "step": 437160 }, { "epoch": 0.8831110590383691, "grad_norm": 586.7005004882812, "learning_rate": 4.7334129932181283e-07, "loss": 34.3757, "step": 437170 }, { "epoch": 0.8831312596710529, "grad_norm": 158.1629638671875, "learning_rate": 4.731930602986906e-07, "loss": 9.9822, "step": 437180 }, { "epoch": 0.8831514603037367, "grad_norm": 417.0316162109375, "learning_rate": 4.730448433386764e-07, "loss": 14.5091, "step": 437190 }, { "epoch": 0.8831716609364205, "grad_norm": 861.2413940429688, "learning_rate": 4.728966484424913e-07, "loss": 26.3821, "step": 437200 }, { "epoch": 0.8831918615691043, "grad_norm": 502.9021301269531, "learning_rate": 4.727484756108602e-07, "loss": 14.3446, "step": 437210 }, { "epoch": 0.8832120622017882, "grad_norm": 319.57952880859375, "learning_rate": 4.726003248445038e-07, "loss": 17.3094, "step": 437220 }, { "epoch": 0.883232262834472, "grad_norm": 108.85161590576172, "learning_rate": 4.724521961441436e-07, "loss": 14.5947, "step": 437230 }, { "epoch": 0.8832524634671558, "grad_norm": 199.23394775390625, "learning_rate": 4.723040895105019e-07, "loss": 23.1646, "step": 437240 }, { "epoch": 0.8832726640998396, "grad_norm": 285.6567687988281, "learning_rate": 4.72156004944303e-07, "loss": 13.2874, "step": 437250 }, { "epoch": 0.8832928647325234, "grad_norm": 457.18017578125, "learning_rate": 4.720079424462648e-07, "loss": 20.3675, "step": 437260 }, { "epoch": 0.8833130653652073, "grad_norm": 428.0270080566406, "learning_rate": 4.718599020171105e-07, "loss": 18.1246, "step": 437270 }, { "epoch": 0.8833332659978911, "grad_norm": 94.82398223876953, "learning_rate": 4.7171188365756235e-07, "loss": 21.3042, "step": 437280 }, { "epoch": 0.8833534666305749, "grad_norm": 424.24517822265625, "learning_rate": 4.71563887368342e-07, "loss": 13.0863, "step": 437290 }, { "epoch": 0.8833736672632587, "grad_norm": 0.6462593078613281, "learning_rate": 4.714159131501689e-07, "loss": 8.5118, "step": 437300 }, { "epoch": 0.8833938678959425, "grad_norm": 181.89942932128906, "learning_rate": 4.7126796100376625e-07, "loss": 16.9697, "step": 437310 }, { "epoch": 0.8834140685286264, "grad_norm": 22.54423713684082, "learning_rate": 4.7112003092985414e-07, "loss": 11.9234, "step": 437320 }, { "epoch": 0.8834342691613102, "grad_norm": 212.46315002441406, "learning_rate": 4.7097212292915307e-07, "loss": 18.5752, "step": 437330 }, { "epoch": 0.883454469793994, "grad_norm": 273.62518310546875, "learning_rate": 4.7082423700238413e-07, "loss": 12.7955, "step": 437340 }, { "epoch": 0.8834746704266777, "grad_norm": 337.732421875, "learning_rate": 4.7067637315027005e-07, "loss": 11.8375, "step": 437350 }, { "epoch": 0.8834948710593615, "grad_norm": 203.64193725585938, "learning_rate": 4.705285313735297e-07, "loss": 9.4601, "step": 437360 }, { "epoch": 0.8835150716920454, "grad_norm": 468.43768310546875, "learning_rate": 4.703807116728831e-07, "loss": 14.4001, "step": 437370 }, { "epoch": 0.8835352723247292, "grad_norm": 310.4579772949219, "learning_rate": 4.7023291404905245e-07, "loss": 13.6832, "step": 437380 }, { "epoch": 0.883555472957413, "grad_norm": 489.82421875, "learning_rate": 4.700851385027566e-07, "loss": 18.8303, "step": 437390 }, { "epoch": 0.8835756735900968, "grad_norm": 307.5848083496094, "learning_rate": 4.699373850347161e-07, "loss": 23.059, "step": 437400 }, { "epoch": 0.8835958742227806, "grad_norm": 587.0242919921875, "learning_rate": 4.69789653645652e-07, "loss": 21.7008, "step": 437410 }, { "epoch": 0.8836160748554645, "grad_norm": 445.4830322265625, "learning_rate": 4.6964194433628317e-07, "loss": 10.5014, "step": 437420 }, { "epoch": 0.8836362754881483, "grad_norm": 395.11370849609375, "learning_rate": 4.6949425710733076e-07, "loss": 9.3218, "step": 437430 }, { "epoch": 0.8836564761208321, "grad_norm": 300.8310852050781, "learning_rate": 4.693465919595136e-07, "loss": 10.4542, "step": 437440 }, { "epoch": 0.8836766767535159, "grad_norm": 375.7260437011719, "learning_rate": 4.691989488935511e-07, "loss": 17.3353, "step": 437450 }, { "epoch": 0.8836968773861997, "grad_norm": 503.7796325683594, "learning_rate": 4.690513279101638e-07, "loss": 17.4916, "step": 437460 }, { "epoch": 0.8837170780188836, "grad_norm": 682.6212158203125, "learning_rate": 4.689037290100712e-07, "loss": 22.9069, "step": 437470 }, { "epoch": 0.8837372786515674, "grad_norm": 276.8441162109375, "learning_rate": 4.687561521939915e-07, "loss": 14.4692, "step": 437480 }, { "epoch": 0.8837574792842512, "grad_norm": 87.65137481689453, "learning_rate": 4.686085974626442e-07, "loss": 25.7139, "step": 437490 }, { "epoch": 0.883777679916935, "grad_norm": 647.8373413085938, "learning_rate": 4.6846106481675035e-07, "loss": 35.8543, "step": 437500 }, { "epoch": 0.8837978805496188, "grad_norm": 23.456647872924805, "learning_rate": 4.683135542570277e-07, "loss": 15.9017, "step": 437510 }, { "epoch": 0.8838180811823027, "grad_norm": 539.7153930664062, "learning_rate": 4.681660657841941e-07, "loss": 12.9271, "step": 437520 }, { "epoch": 0.8838382818149865, "grad_norm": 442.73876953125, "learning_rate": 4.6801859939896997e-07, "loss": 19.9689, "step": 437530 }, { "epoch": 0.8838584824476703, "grad_norm": 325.82708740234375, "learning_rate": 4.678711551020743e-07, "loss": 19.9856, "step": 437540 }, { "epoch": 0.8838786830803541, "grad_norm": 2447.803466796875, "learning_rate": 4.677237328942236e-07, "loss": 16.8712, "step": 437550 }, { "epoch": 0.8838988837130379, "grad_norm": 508.7763671875, "learning_rate": 4.6757633277613734e-07, "loss": 15.6815, "step": 437560 }, { "epoch": 0.8839190843457218, "grad_norm": 592.2590942382812, "learning_rate": 4.674289547485367e-07, "loss": 19.2942, "step": 437570 }, { "epoch": 0.8839392849784056, "grad_norm": 809.1892700195312, "learning_rate": 4.672815988121354e-07, "loss": 19.2552, "step": 437580 }, { "epoch": 0.8839594856110894, "grad_norm": 3.522529125213623, "learning_rate": 4.6713426496765413e-07, "loss": 19.2758, "step": 437590 }, { "epoch": 0.8839796862437731, "grad_norm": 0.0, "learning_rate": 4.6698695321581165e-07, "loss": 19.1922, "step": 437600 }, { "epoch": 0.8839998868764569, "grad_norm": 352.0887145996094, "learning_rate": 4.6683966355732466e-07, "loss": 12.766, "step": 437610 }, { "epoch": 0.8840200875091407, "grad_norm": 241.43849182128906, "learning_rate": 4.6669239599291093e-07, "loss": 17.8853, "step": 437620 }, { "epoch": 0.8840402881418246, "grad_norm": 608.0235595703125, "learning_rate": 4.665451505232882e-07, "loss": 14.7198, "step": 437630 }, { "epoch": 0.8840604887745084, "grad_norm": 361.3467712402344, "learning_rate": 4.663979271491764e-07, "loss": 21.5801, "step": 437640 }, { "epoch": 0.8840806894071922, "grad_norm": 461.13983154296875, "learning_rate": 4.662507258712895e-07, "loss": 12.0084, "step": 437650 }, { "epoch": 0.884100890039876, "grad_norm": 745.8554077148438, "learning_rate": 4.6610354669034686e-07, "loss": 23.2169, "step": 437660 }, { "epoch": 0.8841210906725598, "grad_norm": 785.3743896484375, "learning_rate": 4.6595638960706624e-07, "loss": 19.8191, "step": 437670 }, { "epoch": 0.8841412913052437, "grad_norm": 748.1947631835938, "learning_rate": 4.6580925462216487e-07, "loss": 21.8141, "step": 437680 }, { "epoch": 0.8841614919379275, "grad_norm": 268.165283203125, "learning_rate": 4.656621417363577e-07, "loss": 33.7911, "step": 437690 }, { "epoch": 0.8841816925706113, "grad_norm": 100.24395751953125, "learning_rate": 4.655150509503642e-07, "loss": 7.0177, "step": 437700 }, { "epoch": 0.8842018932032951, "grad_norm": 439.3194885253906, "learning_rate": 4.65367982264901e-07, "loss": 13.1335, "step": 437710 }, { "epoch": 0.8842220938359789, "grad_norm": 580.4093627929688, "learning_rate": 4.6522093568068307e-07, "loss": 21.3427, "step": 437720 }, { "epoch": 0.8842422944686628, "grad_norm": 221.25843811035156, "learning_rate": 4.650739111984287e-07, "loss": 17.7892, "step": 437730 }, { "epoch": 0.8842624951013466, "grad_norm": 306.7304992675781, "learning_rate": 4.649269088188535e-07, "loss": 11.2845, "step": 437740 }, { "epoch": 0.8842826957340304, "grad_norm": 288.1169738769531, "learning_rate": 4.647799285426757e-07, "loss": 19.4869, "step": 437750 }, { "epoch": 0.8843028963667142, "grad_norm": 726.6117553710938, "learning_rate": 4.646329703706104e-07, "loss": 37.4337, "step": 437760 }, { "epoch": 0.884323096999398, "grad_norm": 282.6123962402344, "learning_rate": 4.644860343033725e-07, "loss": 8.5612, "step": 437770 }, { "epoch": 0.8843432976320819, "grad_norm": 184.43309020996094, "learning_rate": 4.6433912034168083e-07, "loss": 10.7296, "step": 437780 }, { "epoch": 0.8843634982647657, "grad_norm": 313.0046691894531, "learning_rate": 4.6419222848624933e-07, "loss": 13.1905, "step": 437790 }, { "epoch": 0.8843836988974495, "grad_norm": 424.1170349121094, "learning_rate": 4.640453587377958e-07, "loss": 17.5901, "step": 437800 }, { "epoch": 0.8844038995301333, "grad_norm": 543.5725708007812, "learning_rate": 4.63898511097034e-07, "loss": 20.5456, "step": 437810 }, { "epoch": 0.8844241001628171, "grad_norm": 267.98577880859375, "learning_rate": 4.6375168556468175e-07, "loss": 27.1274, "step": 437820 }, { "epoch": 0.884444300795501, "grad_norm": 689.3540649414062, "learning_rate": 4.636048821414535e-07, "loss": 17.7252, "step": 437830 }, { "epoch": 0.8844645014281848, "grad_norm": 445.6986999511719, "learning_rate": 4.6345810082806363e-07, "loss": 18.1701, "step": 437840 }, { "epoch": 0.8844847020608686, "grad_norm": 373.1318359375, "learning_rate": 4.6331134162522994e-07, "loss": 15.1325, "step": 437850 }, { "epoch": 0.8845049026935523, "grad_norm": 377.1880187988281, "learning_rate": 4.631646045336663e-07, "loss": 16.7919, "step": 437860 }, { "epoch": 0.8845251033262361, "grad_norm": 185.0105438232422, "learning_rate": 4.6301788955408765e-07, "loss": 27.8121, "step": 437870 }, { "epoch": 0.88454530395892, "grad_norm": 563.5680541992188, "learning_rate": 4.62871196687209e-07, "loss": 18.6658, "step": 437880 }, { "epoch": 0.8845655045916038, "grad_norm": 231.3992919921875, "learning_rate": 4.6272452593374763e-07, "loss": 15.211, "step": 437890 }, { "epoch": 0.8845857052242876, "grad_norm": 88.52085876464844, "learning_rate": 4.625778772944156e-07, "loss": 17.0757, "step": 437900 }, { "epoch": 0.8846059058569714, "grad_norm": 181.04515075683594, "learning_rate": 4.6243125076992857e-07, "loss": 9.0486, "step": 437910 }, { "epoch": 0.8846261064896552, "grad_norm": 112.04206085205078, "learning_rate": 4.62284646361002e-07, "loss": 14.2957, "step": 437920 }, { "epoch": 0.884646307122339, "grad_norm": 370.3331604003906, "learning_rate": 4.6213806406834926e-07, "loss": 25.6922, "step": 437930 }, { "epoch": 0.8846665077550229, "grad_norm": 485.3958740234375, "learning_rate": 4.6199150389268476e-07, "loss": 13.4734, "step": 437940 }, { "epoch": 0.8846867083877067, "grad_norm": 288.86383056640625, "learning_rate": 4.6184496583472293e-07, "loss": 32.1188, "step": 437950 }, { "epoch": 0.8847069090203905, "grad_norm": 38.7264518737793, "learning_rate": 4.616984498951793e-07, "loss": 12.0045, "step": 437960 }, { "epoch": 0.8847271096530743, "grad_norm": 810.9624633789062, "learning_rate": 4.6155195607476723e-07, "loss": 14.6303, "step": 437970 }, { "epoch": 0.8847473102857581, "grad_norm": 460.4407958984375, "learning_rate": 4.614054843741994e-07, "loss": 9.7327, "step": 437980 }, { "epoch": 0.884767510918442, "grad_norm": 91.19581604003906, "learning_rate": 4.61259034794192e-07, "loss": 10.1597, "step": 437990 }, { "epoch": 0.8847877115511258, "grad_norm": 616.4536743164062, "learning_rate": 4.6111260733545714e-07, "loss": 28.4931, "step": 438000 }, { "epoch": 0.8848079121838096, "grad_norm": 162.45799255371094, "learning_rate": 4.6096620199870824e-07, "loss": 8.3742, "step": 438010 }, { "epoch": 0.8848281128164934, "grad_norm": 215.92393493652344, "learning_rate": 4.6081981878466077e-07, "loss": 18.8802, "step": 438020 }, { "epoch": 0.8848483134491772, "grad_norm": 255.54925537109375, "learning_rate": 4.606734576940253e-07, "loss": 16.6796, "step": 438030 }, { "epoch": 0.8848685140818611, "grad_norm": 0.0, "learning_rate": 4.6052711872751843e-07, "loss": 10.8918, "step": 438040 }, { "epoch": 0.8848887147145449, "grad_norm": 235.79869079589844, "learning_rate": 4.6038080188585135e-07, "loss": 21.5738, "step": 438050 }, { "epoch": 0.8849089153472287, "grad_norm": 143.0699920654297, "learning_rate": 4.602345071697373e-07, "loss": 13.8974, "step": 438060 }, { "epoch": 0.8849291159799125, "grad_norm": 160.64614868164062, "learning_rate": 4.600882345798902e-07, "loss": 24.4522, "step": 438070 }, { "epoch": 0.8849493166125963, "grad_norm": 119.09025573730469, "learning_rate": 4.599419841170216e-07, "loss": 17.8335, "step": 438080 }, { "epoch": 0.8849695172452802, "grad_norm": 638.122802734375, "learning_rate": 4.5979575578184554e-07, "loss": 18.7437, "step": 438090 }, { "epoch": 0.884989717877964, "grad_norm": 433.4074401855469, "learning_rate": 4.5964954957507414e-07, "loss": 22.9923, "step": 438100 }, { "epoch": 0.8850099185106478, "grad_norm": 442.2541809082031, "learning_rate": 4.595033654974207e-07, "loss": 11.311, "step": 438110 }, { "epoch": 0.8850301191433315, "grad_norm": 338.8622131347656, "learning_rate": 4.593572035495969e-07, "loss": 16.6048, "step": 438120 }, { "epoch": 0.8850503197760153, "grad_norm": 254.80043029785156, "learning_rate": 4.592110637323149e-07, "loss": 9.9581, "step": 438130 }, { "epoch": 0.8850705204086992, "grad_norm": 468.2026062011719, "learning_rate": 4.5906494604628816e-07, "loss": 17.1238, "step": 438140 }, { "epoch": 0.885090721041383, "grad_norm": 607.1072998046875, "learning_rate": 4.5891885049222815e-07, "loss": 10.1863, "step": 438150 }, { "epoch": 0.8851109216740668, "grad_norm": 680.0186157226562, "learning_rate": 4.587727770708461e-07, "loss": 15.1077, "step": 438160 }, { "epoch": 0.8851311223067506, "grad_norm": 596.2591552734375, "learning_rate": 4.5862672578285475e-07, "loss": 18.6161, "step": 438170 }, { "epoch": 0.8851513229394344, "grad_norm": 493.0876159667969, "learning_rate": 4.5848069662896786e-07, "loss": 18.7473, "step": 438180 }, { "epoch": 0.8851715235721183, "grad_norm": 283.8135070800781, "learning_rate": 4.5833468960989333e-07, "loss": 19.7269, "step": 438190 }, { "epoch": 0.8851917242048021, "grad_norm": 155.3894805908203, "learning_rate": 4.581887047263445e-07, "loss": 36.0125, "step": 438200 }, { "epoch": 0.8852119248374859, "grad_norm": 399.869384765625, "learning_rate": 4.5804274197903396e-07, "loss": 11.8754, "step": 438210 }, { "epoch": 0.8852321254701697, "grad_norm": 407.58941650390625, "learning_rate": 4.5789680136867245e-07, "loss": 14.0652, "step": 438220 }, { "epoch": 0.8852523261028535, "grad_norm": 232.7967071533203, "learning_rate": 4.577508828959698e-07, "loss": 12.1223, "step": 438230 }, { "epoch": 0.8852725267355374, "grad_norm": 279.1619873046875, "learning_rate": 4.5760498656163886e-07, "loss": 30.3401, "step": 438240 }, { "epoch": 0.8852927273682212, "grad_norm": 716.46875, "learning_rate": 4.5745911236639186e-07, "loss": 25.7322, "step": 438250 }, { "epoch": 0.885312928000905, "grad_norm": 555.6453857421875, "learning_rate": 4.5731326031093645e-07, "loss": 20.943, "step": 438260 }, { "epoch": 0.8853331286335888, "grad_norm": 371.71453857421875, "learning_rate": 4.57167430395985e-07, "loss": 17.1643, "step": 438270 }, { "epoch": 0.8853533292662726, "grad_norm": 604.136962890625, "learning_rate": 4.5702162262224957e-07, "loss": 15.5282, "step": 438280 }, { "epoch": 0.8853735298989565, "grad_norm": 481.5717468261719, "learning_rate": 4.5687583699044027e-07, "loss": 20.4883, "step": 438290 }, { "epoch": 0.8853937305316403, "grad_norm": 913.0048828125, "learning_rate": 4.567300735012653e-07, "loss": 19.11, "step": 438300 }, { "epoch": 0.8854139311643241, "grad_norm": 522.569580078125, "learning_rate": 4.565843321554386e-07, "loss": 7.5831, "step": 438310 }, { "epoch": 0.8854341317970079, "grad_norm": 355.49493408203125, "learning_rate": 4.5643861295366854e-07, "loss": 22.0143, "step": 438320 }, { "epoch": 0.8854543324296917, "grad_norm": 793.0252075195312, "learning_rate": 4.562929158966645e-07, "loss": 19.7702, "step": 438330 }, { "epoch": 0.8854745330623756, "grad_norm": 668.8243408203125, "learning_rate": 4.561472409851386e-07, "loss": 29.0895, "step": 438340 }, { "epoch": 0.8854947336950594, "grad_norm": 279.86346435546875, "learning_rate": 4.5600158821979933e-07, "loss": 23.1765, "step": 438350 }, { "epoch": 0.8855149343277432, "grad_norm": 11.049134254455566, "learning_rate": 4.5585595760135825e-07, "loss": 12.0037, "step": 438360 }, { "epoch": 0.8855351349604269, "grad_norm": 108.68246459960938, "learning_rate": 4.557103491305237e-07, "loss": 22.9215, "step": 438370 }, { "epoch": 0.8855553355931107, "grad_norm": 414.8169250488281, "learning_rate": 4.555647628080051e-07, "loss": 8.2424, "step": 438380 }, { "epoch": 0.8855755362257945, "grad_norm": 265.68408203125, "learning_rate": 4.554191986345136e-07, "loss": 17.2626, "step": 438390 }, { "epoch": 0.8855957368584784, "grad_norm": 139.4207000732422, "learning_rate": 4.552736566107563e-07, "loss": 9.0719, "step": 438400 }, { "epoch": 0.8856159374911622, "grad_norm": 15.755369186401367, "learning_rate": 4.551281367374455e-07, "loss": 8.6049, "step": 438410 }, { "epoch": 0.885636138123846, "grad_norm": 264.4195556640625, "learning_rate": 4.5498263901528784e-07, "loss": 13.7838, "step": 438420 }, { "epoch": 0.8856563387565298, "grad_norm": 201.75054931640625, "learning_rate": 4.548371634449944e-07, "loss": 10.8078, "step": 438430 }, { "epoch": 0.8856765393892136, "grad_norm": 353.6030578613281, "learning_rate": 4.546917100272735e-07, "loss": 8.0673, "step": 438440 }, { "epoch": 0.8856967400218975, "grad_norm": 319.56085205078125, "learning_rate": 4.5454627876283295e-07, "loss": 18.6831, "step": 438450 }, { "epoch": 0.8857169406545813, "grad_norm": 398.4009094238281, "learning_rate": 4.5440086965238326e-07, "loss": 12.8228, "step": 438460 }, { "epoch": 0.8857371412872651, "grad_norm": 1338.2640380859375, "learning_rate": 4.542554826966328e-07, "loss": 14.2016, "step": 438470 }, { "epoch": 0.8857573419199489, "grad_norm": 801.7337646484375, "learning_rate": 4.541101178962887e-07, "loss": 23.2571, "step": 438480 }, { "epoch": 0.8857775425526327, "grad_norm": 412.7310485839844, "learning_rate": 4.539647752520604e-07, "loss": 16.6522, "step": 438490 }, { "epoch": 0.8857977431853166, "grad_norm": 84.2537841796875, "learning_rate": 4.538194547646574e-07, "loss": 29.8909, "step": 438500 }, { "epoch": 0.8858179438180004, "grad_norm": 494.4392395019531, "learning_rate": 4.5367415643478683e-07, "loss": 27.2246, "step": 438510 }, { "epoch": 0.8858381444506842, "grad_norm": 822.8221435546875, "learning_rate": 4.5352888026315654e-07, "loss": 27.4212, "step": 438520 }, { "epoch": 0.885858345083368, "grad_norm": 83.42070007324219, "learning_rate": 4.533836262504759e-07, "loss": 15.1378, "step": 438530 }, { "epoch": 0.8858785457160518, "grad_norm": 133.17446899414062, "learning_rate": 4.5323839439745163e-07, "loss": 42.554, "step": 438540 }, { "epoch": 0.8858987463487357, "grad_norm": 579.7252807617188, "learning_rate": 4.5309318470479144e-07, "loss": 18.9857, "step": 438550 }, { "epoch": 0.8859189469814195, "grad_norm": 142.4523162841797, "learning_rate": 4.529479971732031e-07, "loss": 15.7356, "step": 438560 }, { "epoch": 0.8859391476141033, "grad_norm": 598.0130004882812, "learning_rate": 4.528028318033961e-07, "loss": 20.5556, "step": 438570 }, { "epoch": 0.8859593482467871, "grad_norm": 274.994384765625, "learning_rate": 4.526576885960765e-07, "loss": 14.0988, "step": 438580 }, { "epoch": 0.885979548879471, "grad_norm": 882.8165893554688, "learning_rate": 4.5251256755195093e-07, "loss": 17.3218, "step": 438590 }, { "epoch": 0.8859997495121548, "grad_norm": 356.706787109375, "learning_rate": 4.523674686717283e-07, "loss": 13.0502, "step": 438600 }, { "epoch": 0.8860199501448386, "grad_norm": 363.36566162109375, "learning_rate": 4.522223919561153e-07, "loss": 7.7626, "step": 438610 }, { "epoch": 0.8860401507775224, "grad_norm": 416.0433654785156, "learning_rate": 4.520773374058179e-07, "loss": 14.5136, "step": 438620 }, { "epoch": 0.8860603514102061, "grad_norm": 344.8145446777344, "learning_rate": 4.519323050215446e-07, "loss": 9.0433, "step": 438630 }, { "epoch": 0.8860805520428899, "grad_norm": 2461.770263671875, "learning_rate": 4.5178729480400084e-07, "loss": 44.5676, "step": 438640 }, { "epoch": 0.8861007526755738, "grad_norm": 302.51043701171875, "learning_rate": 4.51642306753895e-07, "loss": 11.8257, "step": 438650 }, { "epoch": 0.8861209533082576, "grad_norm": 52.07389831542969, "learning_rate": 4.514973408719331e-07, "loss": 23.1165, "step": 438660 }, { "epoch": 0.8861411539409414, "grad_norm": 351.7831115722656, "learning_rate": 4.513523971588202e-07, "loss": 15.1223, "step": 438670 }, { "epoch": 0.8861613545736252, "grad_norm": 761.6973876953125, "learning_rate": 4.512074756152651e-07, "loss": 88.1393, "step": 438680 }, { "epoch": 0.886181555206309, "grad_norm": 458.8019104003906, "learning_rate": 4.5106257624197237e-07, "loss": 20.716, "step": 438690 }, { "epoch": 0.8862017558389929, "grad_norm": 499.322998046875, "learning_rate": 4.5091769903964965e-07, "loss": 18.8507, "step": 438700 }, { "epoch": 0.8862219564716767, "grad_norm": 28.082361221313477, "learning_rate": 4.5077284400900147e-07, "loss": 32.0778, "step": 438710 }, { "epoch": 0.8862421571043605, "grad_norm": 210.52737426757812, "learning_rate": 4.5062801115073607e-07, "loss": 17.0969, "step": 438720 }, { "epoch": 0.8862623577370443, "grad_norm": 71.61561584472656, "learning_rate": 4.504832004655574e-07, "loss": 10.2539, "step": 438730 }, { "epoch": 0.8862825583697281, "grad_norm": 459.04461669921875, "learning_rate": 4.503384119541709e-07, "loss": 13.2212, "step": 438740 }, { "epoch": 0.886302759002412, "grad_norm": 210.68026733398438, "learning_rate": 4.501936456172845e-07, "loss": 28.052, "step": 438750 }, { "epoch": 0.8863229596350958, "grad_norm": 465.93450927734375, "learning_rate": 4.50048901455602e-07, "loss": 21.7153, "step": 438760 }, { "epoch": 0.8863431602677796, "grad_norm": 143.4450225830078, "learning_rate": 4.4990417946982836e-07, "loss": 10.5338, "step": 438770 }, { "epoch": 0.8863633609004634, "grad_norm": 1098.5234375, "learning_rate": 4.4975947966067023e-07, "loss": 29.5691, "step": 438780 }, { "epoch": 0.8863835615331472, "grad_norm": 390.75439453125, "learning_rate": 4.4961480202883434e-07, "loss": 20.115, "step": 438790 }, { "epoch": 0.886403762165831, "grad_norm": 428.8589782714844, "learning_rate": 4.494701465750217e-07, "loss": 25.0542, "step": 438800 }, { "epoch": 0.8864239627985149, "grad_norm": 109.70050811767578, "learning_rate": 4.4932551329994023e-07, "loss": 13.2468, "step": 438810 }, { "epoch": 0.8864441634311987, "grad_norm": 313.089111328125, "learning_rate": 4.4918090220429476e-07, "loss": 11.3645, "step": 438820 }, { "epoch": 0.8864643640638825, "grad_norm": 143.77685546875, "learning_rate": 4.490363132887904e-07, "loss": 11.8531, "step": 438830 }, { "epoch": 0.8864845646965663, "grad_norm": 922.7874145507812, "learning_rate": 4.4889174655412924e-07, "loss": 20.0964, "step": 438840 }, { "epoch": 0.8865047653292502, "grad_norm": 204.7127685546875, "learning_rate": 4.487472020010181e-07, "loss": 9.0205, "step": 438850 }, { "epoch": 0.886524965961934, "grad_norm": 307.23052978515625, "learning_rate": 4.4860267963016293e-07, "loss": 19.5553, "step": 438860 }, { "epoch": 0.8865451665946178, "grad_norm": 117.01416778564453, "learning_rate": 4.484581794422643e-07, "loss": 15.8075, "step": 438870 }, { "epoch": 0.8865653672273015, "grad_norm": 903.5155029296875, "learning_rate": 4.48313701438029e-07, "loss": 22.7876, "step": 438880 }, { "epoch": 0.8865855678599853, "grad_norm": 392.1251220703125, "learning_rate": 4.4816924561816076e-07, "loss": 15.337, "step": 438890 }, { "epoch": 0.8866057684926691, "grad_norm": 690.133056640625, "learning_rate": 4.480248119833641e-07, "loss": 12.9416, "step": 438900 }, { "epoch": 0.886625969125353, "grad_norm": 261.4015197753906, "learning_rate": 4.4788040053434124e-07, "loss": 16.3067, "step": 438910 }, { "epoch": 0.8866461697580368, "grad_norm": 405.3455810546875, "learning_rate": 4.477360112717982e-07, "loss": 21.6496, "step": 438920 }, { "epoch": 0.8866663703907206, "grad_norm": 555.2881469726562, "learning_rate": 4.475916441964379e-07, "loss": 18.7047, "step": 438930 }, { "epoch": 0.8866865710234044, "grad_norm": 474.69940185546875, "learning_rate": 4.474472993089629e-07, "loss": 15.6627, "step": 438940 }, { "epoch": 0.8867067716560882, "grad_norm": 689.9981689453125, "learning_rate": 4.473029766100784e-07, "loss": 37.1921, "step": 438950 }, { "epoch": 0.8867269722887721, "grad_norm": 55.59153747558594, "learning_rate": 4.471586761004859e-07, "loss": 6.5716, "step": 438960 }, { "epoch": 0.8867471729214559, "grad_norm": 115.98567199707031, "learning_rate": 4.4701439778089105e-07, "loss": 18.5614, "step": 438970 }, { "epoch": 0.8867673735541397, "grad_norm": 6.016637802124023, "learning_rate": 4.4687014165199547e-07, "loss": 12.5303, "step": 438980 }, { "epoch": 0.8867875741868235, "grad_norm": 264.12884521484375, "learning_rate": 4.46725907714502e-07, "loss": 36.1123, "step": 438990 }, { "epoch": 0.8868077748195073, "grad_norm": 352.121826171875, "learning_rate": 4.4658169596911493e-07, "loss": 18.2095, "step": 439000 }, { "epoch": 0.8868279754521912, "grad_norm": 496.9523620605469, "learning_rate": 4.464375064165355e-07, "loss": 17.7458, "step": 439010 }, { "epoch": 0.886848176084875, "grad_norm": 726.5328979492188, "learning_rate": 4.4629333905746864e-07, "loss": 24.0773, "step": 439020 }, { "epoch": 0.8868683767175588, "grad_norm": 915.4989624023438, "learning_rate": 4.461491938926144e-07, "loss": 25.1332, "step": 439030 }, { "epoch": 0.8868885773502426, "grad_norm": 90.11317443847656, "learning_rate": 4.4600507092267767e-07, "loss": 15.5745, "step": 439040 }, { "epoch": 0.8869087779829264, "grad_norm": 420.9406433105469, "learning_rate": 4.4586097014836017e-07, "loss": 11.2216, "step": 439050 }, { "epoch": 0.8869289786156103, "grad_norm": 325.19110107421875, "learning_rate": 4.4571689157036244e-07, "loss": 9.7093, "step": 439060 }, { "epoch": 0.8869491792482941, "grad_norm": 156.01129150390625, "learning_rate": 4.455728351893895e-07, "loss": 19.7621, "step": 439070 }, { "epoch": 0.8869693798809779, "grad_norm": 1102.5689697265625, "learning_rate": 4.454288010061425e-07, "loss": 33.3687, "step": 439080 }, { "epoch": 0.8869895805136617, "grad_norm": 310.2066955566406, "learning_rate": 4.4528478902132187e-07, "loss": 12.1877, "step": 439090 }, { "epoch": 0.8870097811463455, "grad_norm": 66.99678039550781, "learning_rate": 4.4514079923563103e-07, "loss": 15.1627, "step": 439100 }, { "epoch": 0.8870299817790294, "grad_norm": 365.5875244140625, "learning_rate": 4.449968316497721e-07, "loss": 15.9574, "step": 439110 }, { "epoch": 0.8870501824117132, "grad_norm": 398.55133056640625, "learning_rate": 4.448528862644458e-07, "loss": 12.7694, "step": 439120 }, { "epoch": 0.887070383044397, "grad_norm": 465.5559387207031, "learning_rate": 4.447089630803536e-07, "loss": 25.2559, "step": 439130 }, { "epoch": 0.8870905836770807, "grad_norm": 1330.308837890625, "learning_rate": 4.445650620981984e-07, "loss": 17.1388, "step": 439140 }, { "epoch": 0.8871107843097645, "grad_norm": 277.4283752441406, "learning_rate": 4.444211833186807e-07, "loss": 15.4694, "step": 439150 }, { "epoch": 0.8871309849424484, "grad_norm": 719.4064331054688, "learning_rate": 4.4427732674250045e-07, "loss": 19.9625, "step": 439160 }, { "epoch": 0.8871511855751322, "grad_norm": 396.2528076171875, "learning_rate": 4.4413349237036e-07, "loss": 15.4088, "step": 439170 }, { "epoch": 0.887171386207816, "grad_norm": 250.67013549804688, "learning_rate": 4.4398968020296143e-07, "loss": 20.414, "step": 439180 }, { "epoch": 0.8871915868404998, "grad_norm": 535.2349853515625, "learning_rate": 4.4384589024100423e-07, "loss": 28.5496, "step": 439190 }, { "epoch": 0.8872117874731836, "grad_norm": 499.7408447265625, "learning_rate": 4.4370212248518895e-07, "loss": 18.146, "step": 439200 }, { "epoch": 0.8872319881058675, "grad_norm": 579.1659545898438, "learning_rate": 4.4355837693621786e-07, "loss": 22.3271, "step": 439210 }, { "epoch": 0.8872521887385513, "grad_norm": 432.2957458496094, "learning_rate": 4.434146535947903e-07, "loss": 20.5266, "step": 439220 }, { "epoch": 0.8872723893712351, "grad_norm": 116.518310546875, "learning_rate": 4.4327095246160636e-07, "loss": 15.6317, "step": 439230 }, { "epoch": 0.8872925900039189, "grad_norm": 329.53875732421875, "learning_rate": 4.4312727353736816e-07, "loss": 48.8665, "step": 439240 }, { "epoch": 0.8873127906366027, "grad_norm": 527.8027954101562, "learning_rate": 4.4298361682277355e-07, "loss": 17.8723, "step": 439250 }, { "epoch": 0.8873329912692866, "grad_norm": 389.74847412109375, "learning_rate": 4.428399823185253e-07, "loss": 16.563, "step": 439260 }, { "epoch": 0.8873531919019704, "grad_norm": 277.07861328125, "learning_rate": 4.426963700253223e-07, "loss": 19.9283, "step": 439270 }, { "epoch": 0.8873733925346542, "grad_norm": 542.4605712890625, "learning_rate": 4.425527799438639e-07, "loss": 12.7354, "step": 439280 }, { "epoch": 0.887393593167338, "grad_norm": 369.82061767578125, "learning_rate": 4.4240921207485077e-07, "loss": 30.9794, "step": 439290 }, { "epoch": 0.8874137938000218, "grad_norm": 419.03533935546875, "learning_rate": 4.4226566641898173e-07, "loss": 10.5437, "step": 439300 }, { "epoch": 0.8874339944327057, "grad_norm": 9.967693328857422, "learning_rate": 4.421221429769579e-07, "loss": 13.8394, "step": 439310 }, { "epoch": 0.8874541950653895, "grad_norm": 428.9529113769531, "learning_rate": 4.4197864174947755e-07, "loss": 11.1389, "step": 439320 }, { "epoch": 0.8874743956980733, "grad_norm": 293.9654235839844, "learning_rate": 4.418351627372408e-07, "loss": 20.6097, "step": 439330 }, { "epoch": 0.8874945963307571, "grad_norm": 1044.8182373046875, "learning_rate": 4.416917059409465e-07, "loss": 22.0248, "step": 439340 }, { "epoch": 0.8875147969634409, "grad_norm": 149.45436096191406, "learning_rate": 4.415482713612934e-07, "loss": 15.2085, "step": 439350 }, { "epoch": 0.8875349975961248, "grad_norm": 171.68978881835938, "learning_rate": 4.414048589989822e-07, "loss": 14.8334, "step": 439360 }, { "epoch": 0.8875551982288086, "grad_norm": 443.9935607910156, "learning_rate": 4.4126146885471067e-07, "loss": 22.9411, "step": 439370 }, { "epoch": 0.8875753988614924, "grad_norm": 236.66659545898438, "learning_rate": 4.411181009291765e-07, "loss": 28.307, "step": 439380 }, { "epoch": 0.8875955994941762, "grad_norm": 205.0904541015625, "learning_rate": 4.409747552230803e-07, "loss": 13.7862, "step": 439390 }, { "epoch": 0.8876158001268599, "grad_norm": 697.9765625, "learning_rate": 4.4083143173712207e-07, "loss": 15.5063, "step": 439400 }, { "epoch": 0.8876360007595437, "grad_norm": 472.9891052246094, "learning_rate": 4.406881304719962e-07, "loss": 16.3077, "step": 439410 }, { "epoch": 0.8876562013922276, "grad_norm": 50.7934684753418, "learning_rate": 4.405448514284039e-07, "loss": 32.6937, "step": 439420 }, { "epoch": 0.8876764020249114, "grad_norm": 613.9843139648438, "learning_rate": 4.404015946070439e-07, "loss": 22.2959, "step": 439430 }, { "epoch": 0.8876966026575952, "grad_norm": 435.0783996582031, "learning_rate": 4.40258360008613e-07, "loss": 27.7951, "step": 439440 }, { "epoch": 0.887716803290279, "grad_norm": 777.6680908203125, "learning_rate": 4.401151476338095e-07, "loss": 42.459, "step": 439450 }, { "epoch": 0.8877370039229628, "grad_norm": 434.35491943359375, "learning_rate": 4.3997195748333113e-07, "loss": 11.0759, "step": 439460 }, { "epoch": 0.8877572045556467, "grad_norm": 22.04349136352539, "learning_rate": 4.3982878955787844e-07, "loss": 14.6108, "step": 439470 }, { "epoch": 0.8877774051883305, "grad_norm": 79.72455596923828, "learning_rate": 4.396856438581454e-07, "loss": 32.3698, "step": 439480 }, { "epoch": 0.8877976058210143, "grad_norm": 269.5208435058594, "learning_rate": 4.395425203848314e-07, "loss": 16.2407, "step": 439490 }, { "epoch": 0.8878178064536981, "grad_norm": 467.6042785644531, "learning_rate": 4.3939941913863525e-07, "loss": 17.5712, "step": 439500 }, { "epoch": 0.8878380070863819, "grad_norm": 170.37184143066406, "learning_rate": 4.392563401202526e-07, "loss": 27.6455, "step": 439510 }, { "epoch": 0.8878582077190658, "grad_norm": 74.18937683105469, "learning_rate": 4.391132833303807e-07, "loss": 19.3106, "step": 439520 }, { "epoch": 0.8878784083517496, "grad_norm": 343.9047546386719, "learning_rate": 4.389702487697189e-07, "loss": 12.2741, "step": 439530 }, { "epoch": 0.8878986089844334, "grad_norm": 33.56159973144531, "learning_rate": 4.388272364389623e-07, "loss": 12.6946, "step": 439540 }, { "epoch": 0.8879188096171172, "grad_norm": 324.51470947265625, "learning_rate": 4.38684246338808e-07, "loss": 22.468, "step": 439550 }, { "epoch": 0.887939010249801, "grad_norm": 296.24420166015625, "learning_rate": 4.385412784699544e-07, "loss": 32.3633, "step": 439560 }, { "epoch": 0.8879592108824849, "grad_norm": 791.6322021484375, "learning_rate": 4.3839833283309597e-07, "loss": 28.1926, "step": 439570 }, { "epoch": 0.8879794115151687, "grad_norm": 354.2049255371094, "learning_rate": 4.3825540942893206e-07, "loss": 19.6999, "step": 439580 }, { "epoch": 0.8879996121478525, "grad_norm": 276.34783935546875, "learning_rate": 4.381125082581583e-07, "loss": 20.3004, "step": 439590 }, { "epoch": 0.8880198127805363, "grad_norm": 525.9163818359375, "learning_rate": 4.379696293214697e-07, "loss": 21.0685, "step": 439600 }, { "epoch": 0.8880400134132201, "grad_norm": 405.0637512207031, "learning_rate": 4.378267726195645e-07, "loss": 16.6269, "step": 439610 }, { "epoch": 0.888060214045904, "grad_norm": 875.94873046875, "learning_rate": 4.3768393815313723e-07, "loss": 20.4529, "step": 439620 }, { "epoch": 0.8880804146785878, "grad_norm": 78.52611541748047, "learning_rate": 4.375411259228868e-07, "loss": 16.3166, "step": 439630 }, { "epoch": 0.8881006153112716, "grad_norm": 154.6929931640625, "learning_rate": 4.373983359295059e-07, "loss": 18.9563, "step": 439640 }, { "epoch": 0.8881208159439553, "grad_norm": 183.49374389648438, "learning_rate": 4.372555681736934e-07, "loss": 23.9325, "step": 439650 }, { "epoch": 0.8881410165766391, "grad_norm": 517.3644409179688, "learning_rate": 4.3711282265614385e-07, "loss": 17.1077, "step": 439660 }, { "epoch": 0.888161217209323, "grad_norm": 266.4991455078125, "learning_rate": 4.369700993775522e-07, "loss": 11.7718, "step": 439670 }, { "epoch": 0.8881814178420068, "grad_norm": 103.00946807861328, "learning_rate": 4.368273983386157e-07, "loss": 28.2479, "step": 439680 }, { "epoch": 0.8882016184746906, "grad_norm": 126.4054946899414, "learning_rate": 4.3668471954002864e-07, "loss": 15.4523, "step": 439690 }, { "epoch": 0.8882218191073744, "grad_norm": 629.47265625, "learning_rate": 4.3654206298248625e-07, "loss": 17.5671, "step": 439700 }, { "epoch": 0.8882420197400582, "grad_norm": 250.27529907226562, "learning_rate": 4.363994286666845e-07, "loss": 20.4415, "step": 439710 }, { "epoch": 0.888262220372742, "grad_norm": 315.4422607421875, "learning_rate": 4.3625681659331895e-07, "loss": 18.5077, "step": 439720 }, { "epoch": 0.8882824210054259, "grad_norm": 205.74395751953125, "learning_rate": 4.3611422676308413e-07, "loss": 19.0239, "step": 439730 }, { "epoch": 0.8883026216381097, "grad_norm": 144.6514892578125, "learning_rate": 4.359716591766744e-07, "loss": 10.1121, "step": 439740 }, { "epoch": 0.8883228222707935, "grad_norm": 94.45883178710938, "learning_rate": 4.3582911383478646e-07, "loss": 23.2051, "step": 439750 }, { "epoch": 0.8883430229034773, "grad_norm": 108.36637878417969, "learning_rate": 4.3568659073811306e-07, "loss": 14.929, "step": 439760 }, { "epoch": 0.8883632235361612, "grad_norm": 606.2430419921875, "learning_rate": 4.355440898873492e-07, "loss": 10.0484, "step": 439770 }, { "epoch": 0.888383424168845, "grad_norm": 298.37127685546875, "learning_rate": 4.354016112831899e-07, "loss": 7.4637, "step": 439780 }, { "epoch": 0.8884036248015288, "grad_norm": 760.9266357421875, "learning_rate": 4.352591549263302e-07, "loss": 12.9479, "step": 439790 }, { "epoch": 0.8884238254342126, "grad_norm": 621.0365600585938, "learning_rate": 4.3511672081746393e-07, "loss": 14.3445, "step": 439800 }, { "epoch": 0.8884440260668964, "grad_norm": 407.5010070800781, "learning_rate": 4.3497430895728444e-07, "loss": 15.4422, "step": 439810 }, { "epoch": 0.8884642266995803, "grad_norm": 734.0892333984375, "learning_rate": 4.348319193464867e-07, "loss": 36.5453, "step": 439820 }, { "epoch": 0.8884844273322641, "grad_norm": 274.6903991699219, "learning_rate": 4.3468955198576524e-07, "loss": 13.928, "step": 439830 }, { "epoch": 0.8885046279649479, "grad_norm": 403.5230712890625, "learning_rate": 4.3454720687581165e-07, "loss": 23.6129, "step": 439840 }, { "epoch": 0.8885248285976317, "grad_norm": 741.6392211914062, "learning_rate": 4.344048840173226e-07, "loss": 13.0167, "step": 439850 }, { "epoch": 0.8885450292303155, "grad_norm": 252.17994689941406, "learning_rate": 4.3426258341098925e-07, "loss": 6.1833, "step": 439860 }, { "epoch": 0.8885652298629994, "grad_norm": 793.151611328125, "learning_rate": 4.341203050575077e-07, "loss": 20.799, "step": 439870 }, { "epoch": 0.8885854304956832, "grad_norm": 447.8883056640625, "learning_rate": 4.3397804895756957e-07, "loss": 25.9996, "step": 439880 }, { "epoch": 0.888605631128367, "grad_norm": 0.031039610505104065, "learning_rate": 4.338358151118677e-07, "loss": 9.6498, "step": 439890 }, { "epoch": 0.8886258317610508, "grad_norm": 219.12828063964844, "learning_rate": 4.33693603521097e-07, "loss": 6.6652, "step": 439900 }, { "epoch": 0.8886460323937345, "grad_norm": 318.5906677246094, "learning_rate": 4.3355141418594926e-07, "loss": 21.3146, "step": 439910 }, { "epoch": 0.8886662330264183, "grad_norm": 336.47918701171875, "learning_rate": 4.334092471071194e-07, "loss": 17.1244, "step": 439920 }, { "epoch": 0.8886864336591022, "grad_norm": 483.5345764160156, "learning_rate": 4.3326710228529746e-07, "loss": 16.6284, "step": 439930 }, { "epoch": 0.888706634291786, "grad_norm": 651.7122192382812, "learning_rate": 4.3312497972117895e-07, "loss": 16.1954, "step": 439940 }, { "epoch": 0.8887268349244698, "grad_norm": 283.4456481933594, "learning_rate": 4.32982879415455e-07, "loss": 19.7755, "step": 439950 }, { "epoch": 0.8887470355571536, "grad_norm": 185.06582641601562, "learning_rate": 4.3284080136881847e-07, "loss": 25.6656, "step": 439960 }, { "epoch": 0.8887672361898374, "grad_norm": 255.2400360107422, "learning_rate": 4.32698745581962e-07, "loss": 14.7366, "step": 439970 }, { "epoch": 0.8887874368225213, "grad_norm": 445.416015625, "learning_rate": 4.325567120555785e-07, "loss": 11.0359, "step": 439980 }, { "epoch": 0.8888076374552051, "grad_norm": 338.63629150390625, "learning_rate": 4.324147007903584e-07, "loss": 17.1912, "step": 439990 }, { "epoch": 0.8888278380878889, "grad_norm": 143.6701202392578, "learning_rate": 4.322727117869951e-07, "loss": 9.3195, "step": 440000 }, { "epoch": 0.8888480387205727, "grad_norm": 117.09489440917969, "learning_rate": 4.3213074504618256e-07, "loss": 12.1255, "step": 440010 }, { "epoch": 0.8888682393532565, "grad_norm": 437.0494689941406, "learning_rate": 4.31988800568609e-07, "loss": 21.9564, "step": 440020 }, { "epoch": 0.8888884399859404, "grad_norm": 12.793342590332031, "learning_rate": 4.3184687835496784e-07, "loss": 21.4166, "step": 440030 }, { "epoch": 0.8889086406186242, "grad_norm": 170.0322723388672, "learning_rate": 4.317049784059518e-07, "loss": 12.801, "step": 440040 }, { "epoch": 0.888928841251308, "grad_norm": 349.74859619140625, "learning_rate": 4.315631007222515e-07, "loss": 14.7523, "step": 440050 }, { "epoch": 0.8889490418839918, "grad_norm": 217.57176208496094, "learning_rate": 4.31421245304558e-07, "loss": 19.6759, "step": 440060 }, { "epoch": 0.8889692425166756, "grad_norm": 264.9936828613281, "learning_rate": 4.3127941215356296e-07, "loss": 18.8373, "step": 440070 }, { "epoch": 0.8889894431493595, "grad_norm": 885.3839721679688, "learning_rate": 4.3113760126995974e-07, "loss": 22.7676, "step": 440080 }, { "epoch": 0.8890096437820433, "grad_norm": 178.55599975585938, "learning_rate": 4.309958126544361e-07, "loss": 19.6819, "step": 440090 }, { "epoch": 0.8890298444147271, "grad_norm": 23.563514709472656, "learning_rate": 4.308540463076849e-07, "loss": 8.1549, "step": 440100 }, { "epoch": 0.8890500450474109, "grad_norm": 475.083251953125, "learning_rate": 4.3071230223039774e-07, "loss": 22.2255, "step": 440110 }, { "epoch": 0.8890702456800947, "grad_norm": 310.59912109375, "learning_rate": 4.3057058042326407e-07, "loss": 18.2259, "step": 440120 }, { "epoch": 0.8890904463127786, "grad_norm": 249.81739807128906, "learning_rate": 4.30428880886975e-07, "loss": 9.7312, "step": 440130 }, { "epoch": 0.8891106469454624, "grad_norm": 157.1270751953125, "learning_rate": 4.3028720362222166e-07, "loss": 22.6873, "step": 440140 }, { "epoch": 0.8891308475781462, "grad_norm": 368.5738220214844, "learning_rate": 4.301455486296946e-07, "loss": 21.0228, "step": 440150 }, { "epoch": 0.8891510482108299, "grad_norm": 474.0268249511719, "learning_rate": 4.300039159100827e-07, "loss": 18.5053, "step": 440160 }, { "epoch": 0.8891712488435137, "grad_norm": 402.1085510253906, "learning_rate": 4.298623054640788e-07, "loss": 14.8153, "step": 440170 }, { "epoch": 0.8891914494761975, "grad_norm": 357.016357421875, "learning_rate": 4.2972071729237065e-07, "loss": 11.0594, "step": 440180 }, { "epoch": 0.8892116501088814, "grad_norm": 11.80178165435791, "learning_rate": 4.295791513956504e-07, "loss": 18.0563, "step": 440190 }, { "epoch": 0.8892318507415652, "grad_norm": 288.9800109863281, "learning_rate": 4.29437607774606e-07, "loss": 17.9754, "step": 440200 }, { "epoch": 0.889252051374249, "grad_norm": 327.2669677734375, "learning_rate": 4.2929608642992894e-07, "loss": 28.796, "step": 440210 }, { "epoch": 0.8892722520069328, "grad_norm": 452.38348388671875, "learning_rate": 4.291545873623087e-07, "loss": 16.7511, "step": 440220 }, { "epoch": 0.8892924526396166, "grad_norm": 530.0560302734375, "learning_rate": 4.2901311057243377e-07, "loss": 10.8369, "step": 440230 }, { "epoch": 0.8893126532723005, "grad_norm": 489.7251892089844, "learning_rate": 4.2887165606099513e-07, "loss": 18.7304, "step": 440240 }, { "epoch": 0.8893328539049843, "grad_norm": 356.1235656738281, "learning_rate": 4.2873022382868115e-07, "loss": 18.1181, "step": 440250 }, { "epoch": 0.8893530545376681, "grad_norm": 407.93499755859375, "learning_rate": 4.2858881387618235e-07, "loss": 16.1849, "step": 440260 }, { "epoch": 0.8893732551703519, "grad_norm": 684.4132080078125, "learning_rate": 4.284474262041871e-07, "loss": 24.9747, "step": 440270 }, { "epoch": 0.8893934558030357, "grad_norm": 216.8534698486328, "learning_rate": 4.283060608133843e-07, "loss": 17.7716, "step": 440280 }, { "epoch": 0.8894136564357196, "grad_norm": 476.00628662109375, "learning_rate": 4.2816471770446343e-07, "loss": 14.6045, "step": 440290 }, { "epoch": 0.8894338570684034, "grad_norm": 329.6189880371094, "learning_rate": 4.280233968781139e-07, "loss": 14.645, "step": 440300 }, { "epoch": 0.8894540577010872, "grad_norm": 216.16668701171875, "learning_rate": 4.2788209833502237e-07, "loss": 15.3206, "step": 440310 }, { "epoch": 0.889474258333771, "grad_norm": 514.588134765625, "learning_rate": 4.277408220758794e-07, "loss": 20.1569, "step": 440320 }, { "epoch": 0.8894944589664548, "grad_norm": 256.3499755859375, "learning_rate": 4.275995681013745e-07, "loss": 30.1647, "step": 440330 }, { "epoch": 0.8895146595991387, "grad_norm": 737.529296875, "learning_rate": 4.2745833641219317e-07, "loss": 16.7913, "step": 440340 }, { "epoch": 0.8895348602318225, "grad_norm": 97.90251159667969, "learning_rate": 4.273171270090254e-07, "loss": 6.7977, "step": 440350 }, { "epoch": 0.8895550608645063, "grad_norm": 356.3243408203125, "learning_rate": 4.271759398925601e-07, "loss": 13.1676, "step": 440360 }, { "epoch": 0.8895752614971901, "grad_norm": 435.6765441894531, "learning_rate": 4.270347750634846e-07, "loss": 18.4452, "step": 440370 }, { "epoch": 0.889595462129874, "grad_norm": 663.2234497070312, "learning_rate": 4.2689363252248595e-07, "loss": 21.1937, "step": 440380 }, { "epoch": 0.8896156627625578, "grad_norm": 155.16162109375, "learning_rate": 4.2675251227025315e-07, "loss": 19.0472, "step": 440390 }, { "epoch": 0.8896358633952416, "grad_norm": 344.0484313964844, "learning_rate": 4.266114143074751e-07, "loss": 10.4041, "step": 440400 }, { "epoch": 0.8896560640279254, "grad_norm": 165.12008666992188, "learning_rate": 4.264703386348384e-07, "loss": 10.6243, "step": 440410 }, { "epoch": 0.8896762646606091, "grad_norm": 223.59613037109375, "learning_rate": 4.263292852530293e-07, "loss": 23.7532, "step": 440420 }, { "epoch": 0.8896964652932929, "grad_norm": 111.74337768554688, "learning_rate": 4.261882541627377e-07, "loss": 7.5019, "step": 440430 }, { "epoch": 0.8897166659259768, "grad_norm": 448.3424377441406, "learning_rate": 4.260472453646497e-07, "loss": 27.4821, "step": 440440 }, { "epoch": 0.8897368665586606, "grad_norm": 318.8260192871094, "learning_rate": 4.2590625885945205e-07, "loss": 18.8889, "step": 440450 }, { "epoch": 0.8897570671913444, "grad_norm": 331.6166687011719, "learning_rate": 4.25765294647833e-07, "loss": 29.4402, "step": 440460 }, { "epoch": 0.8897772678240282, "grad_norm": 598.9876708984375, "learning_rate": 4.256243527304782e-07, "loss": 18.679, "step": 440470 }, { "epoch": 0.889797468456712, "grad_norm": 435.41290283203125, "learning_rate": 4.2548343310807704e-07, "loss": 17.4909, "step": 440480 }, { "epoch": 0.8898176690893959, "grad_norm": 481.29132080078125, "learning_rate": 4.25342535781314e-07, "loss": 17.0876, "step": 440490 }, { "epoch": 0.8898378697220797, "grad_norm": 258.2563781738281, "learning_rate": 4.2520166075087635e-07, "loss": 14.378, "step": 440500 }, { "epoch": 0.8898580703547635, "grad_norm": 452.7650451660156, "learning_rate": 4.250608080174512e-07, "loss": 19.2819, "step": 440510 }, { "epoch": 0.8898782709874473, "grad_norm": 223.28135681152344, "learning_rate": 4.249199775817242e-07, "loss": 16.1098, "step": 440520 }, { "epoch": 0.8898984716201311, "grad_norm": 80.29216003417969, "learning_rate": 4.247791694443837e-07, "loss": 23.5703, "step": 440530 }, { "epoch": 0.889918672252815, "grad_norm": 555.3062744140625, "learning_rate": 4.24638383606113e-07, "loss": 22.0077, "step": 440540 }, { "epoch": 0.8899388728854988, "grad_norm": 1100.7147216796875, "learning_rate": 4.24497620067601e-07, "loss": 39.7739, "step": 440550 }, { "epoch": 0.8899590735181826, "grad_norm": 675.494873046875, "learning_rate": 4.2435687882953327e-07, "loss": 17.158, "step": 440560 }, { "epoch": 0.8899792741508664, "grad_norm": 292.1600341796875, "learning_rate": 4.242161598925937e-07, "loss": 15.5541, "step": 440570 }, { "epoch": 0.8899994747835502, "grad_norm": 274.9527282714844, "learning_rate": 4.240754632574706e-07, "loss": 27.1441, "step": 440580 }, { "epoch": 0.8900196754162341, "grad_norm": 48.35358428955078, "learning_rate": 4.239347889248485e-07, "loss": 13.2044, "step": 440590 }, { "epoch": 0.8900398760489179, "grad_norm": 235.7996368408203, "learning_rate": 4.237941368954124e-07, "loss": 22.3997, "step": 440600 }, { "epoch": 0.8900600766816017, "grad_norm": 11.193168640136719, "learning_rate": 4.236535071698489e-07, "loss": 17.1348, "step": 440610 }, { "epoch": 0.8900802773142855, "grad_norm": 379.49041748046875, "learning_rate": 4.2351289974884467e-07, "loss": 23.3766, "step": 440620 }, { "epoch": 0.8901004779469693, "grad_norm": 255.96241760253906, "learning_rate": 4.2337231463308147e-07, "loss": 14.9062, "step": 440630 }, { "epoch": 0.8901206785796532, "grad_norm": 757.0723876953125, "learning_rate": 4.2323175182324706e-07, "loss": 29.2287, "step": 440640 }, { "epoch": 0.890140879212337, "grad_norm": 357.3234558105469, "learning_rate": 4.2309121132002695e-07, "loss": 11.862, "step": 440650 }, { "epoch": 0.8901610798450208, "grad_norm": 25.004135131835938, "learning_rate": 4.2295069312410455e-07, "loss": 18.1373, "step": 440660 }, { "epoch": 0.8901812804777045, "grad_norm": 36.95526123046875, "learning_rate": 4.228101972361648e-07, "loss": 7.3536, "step": 440670 }, { "epoch": 0.8902014811103883, "grad_norm": 207.34742736816406, "learning_rate": 4.226697236568933e-07, "loss": 16.4848, "step": 440680 }, { "epoch": 0.8902216817430721, "grad_norm": 526.7403564453125, "learning_rate": 4.225292723869762e-07, "loss": 20.7376, "step": 440690 }, { "epoch": 0.890241882375756, "grad_norm": 572.5574951171875, "learning_rate": 4.2238884342709397e-07, "loss": 23.5109, "step": 440700 }, { "epoch": 0.8902620830084398, "grad_norm": 486.1275329589844, "learning_rate": 4.222484367779334e-07, "loss": 37.0887, "step": 440710 }, { "epoch": 0.8902822836411236, "grad_norm": 314.53375244140625, "learning_rate": 4.2210805244017993e-07, "loss": 24.3133, "step": 440720 }, { "epoch": 0.8903024842738074, "grad_norm": 374.93328857421875, "learning_rate": 4.219676904145165e-07, "loss": 31.6426, "step": 440730 }, { "epoch": 0.8903226849064912, "grad_norm": 113.07835388183594, "learning_rate": 4.218273507016263e-07, "loss": 32.312, "step": 440740 }, { "epoch": 0.8903428855391751, "grad_norm": 478.1295471191406, "learning_rate": 4.2168703330219494e-07, "loss": 22.5987, "step": 440750 }, { "epoch": 0.8903630861718589, "grad_norm": 360.760498046875, "learning_rate": 4.2154673821690585e-07, "loss": 22.2875, "step": 440760 }, { "epoch": 0.8903832868045427, "grad_norm": 177.20535278320312, "learning_rate": 4.2140646544644227e-07, "loss": 16.4268, "step": 440770 }, { "epoch": 0.8904034874372265, "grad_norm": 525.95361328125, "learning_rate": 4.212662149914887e-07, "loss": 13.4369, "step": 440780 }, { "epoch": 0.8904236880699103, "grad_norm": 216.5105438232422, "learning_rate": 4.211259868527273e-07, "loss": 27.3728, "step": 440790 }, { "epoch": 0.8904438887025942, "grad_norm": 634.1834716796875, "learning_rate": 4.2098578103084376e-07, "loss": 22.5864, "step": 440800 }, { "epoch": 0.890464089335278, "grad_norm": 52.97663116455078, "learning_rate": 4.208455975265191e-07, "loss": 18.4716, "step": 440810 }, { "epoch": 0.8904842899679618, "grad_norm": 56.63849639892578, "learning_rate": 4.2070543634043834e-07, "loss": 9.3955, "step": 440820 }, { "epoch": 0.8905044906006456, "grad_norm": 550.6646118164062, "learning_rate": 4.205652974732838e-07, "loss": 18.9752, "step": 440830 }, { "epoch": 0.8905246912333294, "grad_norm": 559.0628051757812, "learning_rate": 4.2042518092573814e-07, "loss": 27.9919, "step": 440840 }, { "epoch": 0.8905448918660133, "grad_norm": 128.8303680419922, "learning_rate": 4.202850866984853e-07, "loss": 27.2158, "step": 440850 }, { "epoch": 0.8905650924986971, "grad_norm": 343.2501220703125, "learning_rate": 4.201450147922065e-07, "loss": 16.7919, "step": 440860 }, { "epoch": 0.8905852931313809, "grad_norm": 185.21302795410156, "learning_rate": 4.200049652075866e-07, "loss": 20.326, "step": 440870 }, { "epoch": 0.8906054937640647, "grad_norm": 244.53485107421875, "learning_rate": 4.198649379453068e-07, "loss": 12.922, "step": 440880 }, { "epoch": 0.8906256943967485, "grad_norm": 532.2025756835938, "learning_rate": 4.1972493300604877e-07, "loss": 22.2626, "step": 440890 }, { "epoch": 0.8906458950294324, "grad_norm": 89.53042602539062, "learning_rate": 4.195849503904975e-07, "loss": 12.9347, "step": 440900 }, { "epoch": 0.8906660956621162, "grad_norm": 171.7433624267578, "learning_rate": 4.1944499009933303e-07, "loss": 8.9002, "step": 440910 }, { "epoch": 0.8906862962948, "grad_norm": 118.01786041259766, "learning_rate": 4.19305052133237e-07, "loss": 7.3701, "step": 440920 }, { "epoch": 0.8907064969274837, "grad_norm": 339.9176940917969, "learning_rate": 4.1916513649289334e-07, "loss": 17.031, "step": 440930 }, { "epoch": 0.8907266975601675, "grad_norm": 315.4590759277344, "learning_rate": 4.1902524317898427e-07, "loss": 20.6522, "step": 440940 }, { "epoch": 0.8907468981928514, "grad_norm": 231.2949676513672, "learning_rate": 4.188853721921893e-07, "loss": 17.2781, "step": 440950 }, { "epoch": 0.8907670988255352, "grad_norm": 404.14935302734375, "learning_rate": 4.1874552353319107e-07, "loss": 20.6696, "step": 440960 }, { "epoch": 0.890787299458219, "grad_norm": 1402.7301025390625, "learning_rate": 4.186056972026725e-07, "loss": 37.9456, "step": 440970 }, { "epoch": 0.8908075000909028, "grad_norm": 425.55401611328125, "learning_rate": 4.1846589320131415e-07, "loss": 15.6599, "step": 440980 }, { "epoch": 0.8908277007235866, "grad_norm": 712.2496337890625, "learning_rate": 4.1832611152979655e-07, "loss": 21.0699, "step": 440990 }, { "epoch": 0.8908479013562705, "grad_norm": 487.39239501953125, "learning_rate": 4.1818635218880186e-07, "loss": 17.5509, "step": 441000 }, { "epoch": 0.8908681019889543, "grad_norm": 75.24030303955078, "learning_rate": 4.1804661517901244e-07, "loss": 41.9425, "step": 441010 }, { "epoch": 0.8908883026216381, "grad_norm": 488.7297058105469, "learning_rate": 4.179069005011066e-07, "loss": 14.4235, "step": 441020 }, { "epoch": 0.8909085032543219, "grad_norm": 662.9674072265625, "learning_rate": 4.177672081557671e-07, "loss": 12.6769, "step": 441030 }, { "epoch": 0.8909287038870057, "grad_norm": 744.5567626953125, "learning_rate": 4.176275381436751e-07, "loss": 12.1562, "step": 441040 }, { "epoch": 0.8909489045196896, "grad_norm": 169.5181427001953, "learning_rate": 4.1748789046551055e-07, "loss": 20.7962, "step": 441050 }, { "epoch": 0.8909691051523734, "grad_norm": 230.7576141357422, "learning_rate": 4.173482651219535e-07, "loss": 12.6017, "step": 441060 }, { "epoch": 0.8909893057850572, "grad_norm": 511.5341491699219, "learning_rate": 4.1720866211368615e-07, "loss": 23.1607, "step": 441070 }, { "epoch": 0.891009506417741, "grad_norm": 373.091552734375, "learning_rate": 4.1706908144138804e-07, "loss": 14.3656, "step": 441080 }, { "epoch": 0.8910297070504248, "grad_norm": 284.5761413574219, "learning_rate": 4.1692952310573854e-07, "loss": 21.3658, "step": 441090 }, { "epoch": 0.8910499076831087, "grad_norm": 570.6357421875, "learning_rate": 4.1678998710741936e-07, "loss": 19.4725, "step": 441100 }, { "epoch": 0.8910701083157925, "grad_norm": 967.0919189453125, "learning_rate": 4.1665047344710887e-07, "loss": 26.8212, "step": 441110 }, { "epoch": 0.8910903089484763, "grad_norm": 259.86822509765625, "learning_rate": 4.1651098212548923e-07, "loss": 12.6998, "step": 441120 }, { "epoch": 0.8911105095811601, "grad_norm": 206.50607299804688, "learning_rate": 4.163715131432383e-07, "loss": 10.5511, "step": 441130 }, { "epoch": 0.8911307102138439, "grad_norm": 466.2878723144531, "learning_rate": 4.162320665010372e-07, "loss": 22.2893, "step": 441140 }, { "epoch": 0.8911509108465278, "grad_norm": 11.808757781982422, "learning_rate": 4.160926421995648e-07, "loss": 19.4246, "step": 441150 }, { "epoch": 0.8911711114792116, "grad_norm": 793.69775390625, "learning_rate": 4.159532402395011e-07, "loss": 27.9781, "step": 441160 }, { "epoch": 0.8911913121118954, "grad_norm": 0.3489563763141632, "learning_rate": 4.158138606215256e-07, "loss": 9.1859, "step": 441170 }, { "epoch": 0.8912115127445792, "grad_norm": 0.0, "learning_rate": 4.1567450334631667e-07, "loss": 21.4238, "step": 441180 }, { "epoch": 0.8912317133772629, "grad_norm": 1309.4345703125, "learning_rate": 4.155351684145548e-07, "loss": 15.2316, "step": 441190 }, { "epoch": 0.8912519140099467, "grad_norm": 204.02719116210938, "learning_rate": 4.153958558269189e-07, "loss": 14.4283, "step": 441200 }, { "epoch": 0.8912721146426306, "grad_norm": 98.7807846069336, "learning_rate": 4.1525656558408624e-07, "loss": 11.4204, "step": 441210 }, { "epoch": 0.8912923152753144, "grad_norm": 407.64825439453125, "learning_rate": 4.151172976867374e-07, "loss": 13.8096, "step": 441220 }, { "epoch": 0.8913125159079982, "grad_norm": 458.1145324707031, "learning_rate": 4.149780521355523e-07, "loss": 14.3765, "step": 441230 }, { "epoch": 0.891332716540682, "grad_norm": 275.7145690917969, "learning_rate": 4.1483882893120606e-07, "loss": 11.479, "step": 441240 }, { "epoch": 0.8913529171733658, "grad_norm": 315.7733154296875, "learning_rate": 4.146996280743798e-07, "loss": 28.4609, "step": 441250 }, { "epoch": 0.8913731178060497, "grad_norm": 152.9192657470703, "learning_rate": 4.145604495657518e-07, "loss": 4.4852, "step": 441260 }, { "epoch": 0.8913933184387335, "grad_norm": 29.09087562561035, "learning_rate": 4.144212934060005e-07, "loss": 21.8849, "step": 441270 }, { "epoch": 0.8914135190714173, "grad_norm": 118.08297729492188, "learning_rate": 4.142821595958024e-07, "loss": 24.1273, "step": 441280 }, { "epoch": 0.8914337197041011, "grad_norm": 202.7879638671875, "learning_rate": 4.1414304813583663e-07, "loss": 25.2596, "step": 441290 }, { "epoch": 0.8914539203367849, "grad_norm": 70.14551544189453, "learning_rate": 4.140039590267836e-07, "loss": 21.5711, "step": 441300 }, { "epoch": 0.8914741209694688, "grad_norm": 55.28828048706055, "learning_rate": 4.1386489226931723e-07, "loss": 21.0769, "step": 441310 }, { "epoch": 0.8914943216021526, "grad_norm": 189.6409149169922, "learning_rate": 4.137258478641176e-07, "loss": 21.3108, "step": 441320 }, { "epoch": 0.8915145222348364, "grad_norm": 401.5644836425781, "learning_rate": 4.135868258118625e-07, "loss": 12.4659, "step": 441330 }, { "epoch": 0.8915347228675202, "grad_norm": 46.09809112548828, "learning_rate": 4.1344782611322855e-07, "loss": 21.3478, "step": 441340 }, { "epoch": 0.891554923500204, "grad_norm": 237.2362823486328, "learning_rate": 4.13308848768893e-07, "loss": 20.2728, "step": 441350 }, { "epoch": 0.8915751241328879, "grad_norm": 277.4466857910156, "learning_rate": 4.1316989377953477e-07, "loss": 16.9617, "step": 441360 }, { "epoch": 0.8915953247655717, "grad_norm": 219.892822265625, "learning_rate": 4.1303096114583e-07, "loss": 19.2814, "step": 441370 }, { "epoch": 0.8916155253982555, "grad_norm": 448.60009765625, "learning_rate": 4.128920508684553e-07, "loss": 24.5058, "step": 441380 }, { "epoch": 0.8916357260309393, "grad_norm": 351.42901611328125, "learning_rate": 4.127531629480891e-07, "loss": 19.3832, "step": 441390 }, { "epoch": 0.8916559266636231, "grad_norm": 642.4378662109375, "learning_rate": 4.1261429738540694e-07, "loss": 31.0099, "step": 441400 }, { "epoch": 0.891676127296307, "grad_norm": 496.5330810546875, "learning_rate": 4.1247545418108715e-07, "loss": 21.5189, "step": 441410 }, { "epoch": 0.8916963279289908, "grad_norm": 206.7541046142578, "learning_rate": 4.1233663333580474e-07, "loss": 10.9033, "step": 441420 }, { "epoch": 0.8917165285616746, "grad_norm": 428.3406677246094, "learning_rate": 4.121978348502381e-07, "loss": 27.4461, "step": 441430 }, { "epoch": 0.8917367291943583, "grad_norm": 265.78924560546875, "learning_rate": 4.1205905872506224e-07, "loss": 17.8106, "step": 441440 }, { "epoch": 0.8917569298270421, "grad_norm": 439.1598815917969, "learning_rate": 4.119203049609538e-07, "loss": 19.014, "step": 441450 }, { "epoch": 0.891777130459726, "grad_norm": 4.566817760467529, "learning_rate": 4.1178157355859005e-07, "loss": 13.6683, "step": 441460 }, { "epoch": 0.8917973310924098, "grad_norm": 406.9350891113281, "learning_rate": 4.1164286451864543e-07, "loss": 17.9122, "step": 441470 }, { "epoch": 0.8918175317250936, "grad_norm": 272.4143371582031, "learning_rate": 4.1150417784179776e-07, "loss": 21.5863, "step": 441480 }, { "epoch": 0.8918377323577774, "grad_norm": 664.770263671875, "learning_rate": 4.1136551352872256e-07, "loss": 16.9244, "step": 441490 }, { "epoch": 0.8918579329904612, "grad_norm": 657.5719604492188, "learning_rate": 4.112268715800943e-07, "loss": 23.7679, "step": 441500 }, { "epoch": 0.891878133623145, "grad_norm": 150.0564727783203, "learning_rate": 4.1108825199659087e-07, "loss": 16.1455, "step": 441510 }, { "epoch": 0.8918983342558289, "grad_norm": 410.1813659667969, "learning_rate": 4.1094965477888605e-07, "loss": 17.7182, "step": 441520 }, { "epoch": 0.8919185348885127, "grad_norm": 194.68238830566406, "learning_rate": 4.1081107992765546e-07, "loss": 17.0886, "step": 441530 }, { "epoch": 0.8919387355211965, "grad_norm": 333.56903076171875, "learning_rate": 4.1067252744357524e-07, "loss": 17.374, "step": 441540 }, { "epoch": 0.8919589361538803, "grad_norm": 348.90771484375, "learning_rate": 4.10533997327322e-07, "loss": 10.7891, "step": 441550 }, { "epoch": 0.8919791367865642, "grad_norm": 272.50164794921875, "learning_rate": 4.1039548957956807e-07, "loss": 16.9159, "step": 441560 }, { "epoch": 0.891999337419248, "grad_norm": 54.76359558105469, "learning_rate": 4.102570042009896e-07, "loss": 17.3863, "step": 441570 }, { "epoch": 0.8920195380519318, "grad_norm": 255.55389404296875, "learning_rate": 4.101185411922626e-07, "loss": 15.8582, "step": 441580 }, { "epoch": 0.8920397386846156, "grad_norm": 70.89956665039062, "learning_rate": 4.099801005540616e-07, "loss": 17.2675, "step": 441590 }, { "epoch": 0.8920599393172994, "grad_norm": 311.2065124511719, "learning_rate": 4.0984168228705934e-07, "loss": 13.5224, "step": 441600 }, { "epoch": 0.8920801399499833, "grad_norm": 296.6814880371094, "learning_rate": 4.0970328639193255e-07, "loss": 17.2601, "step": 441610 }, { "epoch": 0.8921003405826671, "grad_norm": 348.70703125, "learning_rate": 4.0956491286935687e-07, "loss": 40.7721, "step": 441620 }, { "epoch": 0.8921205412153509, "grad_norm": 760.1993408203125, "learning_rate": 4.0942656172000273e-07, "loss": 19.1502, "step": 441630 }, { "epoch": 0.8921407418480347, "grad_norm": 331.21807861328125, "learning_rate": 4.0928823294454743e-07, "loss": 27.8179, "step": 441640 }, { "epoch": 0.8921609424807185, "grad_norm": 431.77606201171875, "learning_rate": 4.091499265436649e-07, "loss": 14.2931, "step": 441650 }, { "epoch": 0.8921811431134024, "grad_norm": 814.52001953125, "learning_rate": 4.0901164251802905e-07, "loss": 16.915, "step": 441660 }, { "epoch": 0.8922013437460862, "grad_norm": 197.07962036132812, "learning_rate": 4.088733808683132e-07, "loss": 19.4393, "step": 441670 }, { "epoch": 0.89222154437877, "grad_norm": 327.1533203125, "learning_rate": 4.087351415951918e-07, "loss": 25.7358, "step": 441680 }, { "epoch": 0.8922417450114538, "grad_norm": 617.658935546875, "learning_rate": 4.085969246993388e-07, "loss": 21.5491, "step": 441690 }, { "epoch": 0.8922619456441375, "grad_norm": 484.689208984375, "learning_rate": 4.084587301814269e-07, "loss": 17.0248, "step": 441700 }, { "epoch": 0.8922821462768213, "grad_norm": 317.9181823730469, "learning_rate": 4.0832055804212957e-07, "loss": 12.5236, "step": 441710 }, { "epoch": 0.8923023469095052, "grad_norm": 6.746350288391113, "learning_rate": 4.081824082821223e-07, "loss": 11.206, "step": 441720 }, { "epoch": 0.892322547542189, "grad_norm": 467.7158203125, "learning_rate": 4.080442809020774e-07, "loss": 15.5601, "step": 441730 }, { "epoch": 0.8923427481748728, "grad_norm": 175.54934692382812, "learning_rate": 4.079061759026659e-07, "loss": 19.0913, "step": 441740 }, { "epoch": 0.8923629488075566, "grad_norm": 490.88568115234375, "learning_rate": 4.0776809328456455e-07, "loss": 14.066, "step": 441750 }, { "epoch": 0.8923831494402404, "grad_norm": 570.3674926757812, "learning_rate": 4.0763003304844395e-07, "loss": 12.0989, "step": 441760 }, { "epoch": 0.8924033500729243, "grad_norm": 483.3099670410156, "learning_rate": 4.0749199519497686e-07, "loss": 14.4682, "step": 441770 }, { "epoch": 0.8924235507056081, "grad_norm": 312.3144836425781, "learning_rate": 4.073539797248377e-07, "loss": 21.0393, "step": 441780 }, { "epoch": 0.8924437513382919, "grad_norm": 95.38319396972656, "learning_rate": 4.0721598663869764e-07, "loss": 18.0344, "step": 441790 }, { "epoch": 0.8924639519709757, "grad_norm": 414.2052001953125, "learning_rate": 4.0707801593723006e-07, "loss": 33.5571, "step": 441800 }, { "epoch": 0.8924841526036595, "grad_norm": 379.32366943359375, "learning_rate": 4.069400676211077e-07, "loss": 19.56, "step": 441810 }, { "epoch": 0.8925043532363434, "grad_norm": 185.80258178710938, "learning_rate": 4.0680214169100117e-07, "loss": 14.8039, "step": 441820 }, { "epoch": 0.8925245538690272, "grad_norm": 760.356689453125, "learning_rate": 4.0666423814758436e-07, "loss": 28.8158, "step": 441830 }, { "epoch": 0.892544754501711, "grad_norm": 153.5394287109375, "learning_rate": 4.065263569915301e-07, "loss": 16.4005, "step": 441840 }, { "epoch": 0.8925649551343948, "grad_norm": 312.73956298828125, "learning_rate": 4.063884982235078e-07, "loss": 21.2115, "step": 441850 }, { "epoch": 0.8925851557670786, "grad_norm": 129.17160034179688, "learning_rate": 4.062506618441908e-07, "loss": 14.3664, "step": 441860 }, { "epoch": 0.8926053563997625, "grad_norm": 255.9687042236328, "learning_rate": 4.06112847854252e-07, "loss": 28.4689, "step": 441870 }, { "epoch": 0.8926255570324463, "grad_norm": 344.6517639160156, "learning_rate": 4.059750562543618e-07, "loss": 42.3723, "step": 441880 }, { "epoch": 0.8926457576651301, "grad_norm": 280.7841796875, "learning_rate": 4.05837287045191e-07, "loss": 14.4088, "step": 441890 }, { "epoch": 0.8926659582978139, "grad_norm": 324.40618896484375, "learning_rate": 4.056995402274122e-07, "loss": 22.3527, "step": 441900 }, { "epoch": 0.8926861589304977, "grad_norm": 194.77801513671875, "learning_rate": 4.0556181580169885e-07, "loss": 15.2484, "step": 441910 }, { "epoch": 0.8927063595631816, "grad_norm": 433.3874206542969, "learning_rate": 4.054241137687176e-07, "loss": 22.2178, "step": 441920 }, { "epoch": 0.8927265601958654, "grad_norm": 102.00749969482422, "learning_rate": 4.052864341291418e-07, "loss": 11.2987, "step": 441930 }, { "epoch": 0.8927467608285492, "grad_norm": 348.9513854980469, "learning_rate": 4.051487768836443e-07, "loss": 22.3919, "step": 441940 }, { "epoch": 0.8927669614612329, "grad_norm": 229.98158264160156, "learning_rate": 4.0501114203289395e-07, "loss": 17.3857, "step": 441950 }, { "epoch": 0.8927871620939167, "grad_norm": 416.3073425292969, "learning_rate": 4.048735295775608e-07, "loss": 16.0174, "step": 441960 }, { "epoch": 0.8928073627266006, "grad_norm": 374.8637390136719, "learning_rate": 4.0473593951831814e-07, "loss": 12.0551, "step": 441970 }, { "epoch": 0.8928275633592844, "grad_norm": 544.44580078125, "learning_rate": 4.0459837185583497e-07, "loss": 10.0, "step": 441980 }, { "epoch": 0.8928477639919682, "grad_norm": 578.8648071289062, "learning_rate": 4.044608265907807e-07, "loss": 23.768, "step": 441990 }, { "epoch": 0.892867964624652, "grad_norm": 93.16361236572266, "learning_rate": 4.043233037238281e-07, "loss": 22.4546, "step": 442000 }, { "epoch": 0.8928881652573358, "grad_norm": 862.9176025390625, "learning_rate": 4.041858032556456e-07, "loss": 19.5332, "step": 442010 }, { "epoch": 0.8929083658900197, "grad_norm": 319.9920654296875, "learning_rate": 4.040483251869054e-07, "loss": 12.6, "step": 442020 }, { "epoch": 0.8929285665227035, "grad_norm": 28.0956974029541, "learning_rate": 4.0391086951827474e-07, "loss": 31.4243, "step": 442030 }, { "epoch": 0.8929487671553873, "grad_norm": 325.6099548339844, "learning_rate": 4.0377343625042587e-07, "loss": 11.6039, "step": 442040 }, { "epoch": 0.8929689677880711, "grad_norm": 81.76892852783203, "learning_rate": 4.0363602538402823e-07, "loss": 11.1132, "step": 442050 }, { "epoch": 0.8929891684207549, "grad_norm": 668.8286743164062, "learning_rate": 4.034986369197502e-07, "loss": 26.1809, "step": 442060 }, { "epoch": 0.8930093690534388, "grad_norm": 663.0156860351562, "learning_rate": 4.0336127085826294e-07, "loss": 22.1773, "step": 442070 }, { "epoch": 0.8930295696861226, "grad_norm": 353.7117919921875, "learning_rate": 4.032239272002347e-07, "loss": 14.2357, "step": 442080 }, { "epoch": 0.8930497703188064, "grad_norm": 571.19384765625, "learning_rate": 4.030866059463362e-07, "loss": 24.5401, "step": 442090 }, { "epoch": 0.8930699709514902, "grad_norm": 277.9085388183594, "learning_rate": 4.029493070972362e-07, "loss": 15.2518, "step": 442100 }, { "epoch": 0.893090171584174, "grad_norm": 212.6479949951172, "learning_rate": 4.0281203065360265e-07, "loss": 15.6186, "step": 442110 }, { "epoch": 0.8931103722168579, "grad_norm": 3.895404100418091, "learning_rate": 4.026747766161071e-07, "loss": 14.8128, "step": 442120 }, { "epoch": 0.8931305728495417, "grad_norm": 3.034308433532715, "learning_rate": 4.025375449854163e-07, "loss": 10.7396, "step": 442130 }, { "epoch": 0.8931507734822255, "grad_norm": 376.6468200683594, "learning_rate": 4.0240033576219974e-07, "loss": 22.8036, "step": 442140 }, { "epoch": 0.8931709741149093, "grad_norm": 591.7930297851562, "learning_rate": 4.022631489471257e-07, "loss": 21.005, "step": 442150 }, { "epoch": 0.8931911747475931, "grad_norm": 67.9887924194336, "learning_rate": 4.0212598454086596e-07, "loss": 17.2758, "step": 442160 }, { "epoch": 0.893211375380277, "grad_norm": 611.8180541992188, "learning_rate": 4.019888425440838e-07, "loss": 18.577, "step": 442170 }, { "epoch": 0.8932315760129608, "grad_norm": 460.660400390625, "learning_rate": 4.018517229574509e-07, "loss": 21.4927, "step": 442180 }, { "epoch": 0.8932517766456446, "grad_norm": 286.05133056640625, "learning_rate": 4.0171462578163624e-07, "loss": 17.3168, "step": 442190 }, { "epoch": 0.8932719772783284, "grad_norm": 289.0408020019531, "learning_rate": 4.0157755101730645e-07, "loss": 13.6396, "step": 442200 }, { "epoch": 0.8932921779110121, "grad_norm": 433.1505432128906, "learning_rate": 4.014404986651288e-07, "loss": 18.2587, "step": 442210 }, { "epoch": 0.8933123785436959, "grad_norm": 505.6806945800781, "learning_rate": 4.013034687257727e-07, "loss": 21.5438, "step": 442220 }, { "epoch": 0.8933325791763798, "grad_norm": 330.5233154296875, "learning_rate": 4.011664611999072e-07, "loss": 30.2277, "step": 442230 }, { "epoch": 0.8933527798090636, "grad_norm": 633.8748779296875, "learning_rate": 4.010294760881972e-07, "loss": 25.41, "step": 442240 }, { "epoch": 0.8933729804417474, "grad_norm": 301.0047607421875, "learning_rate": 4.0089251339131164e-07, "loss": 21.5253, "step": 442250 }, { "epoch": 0.8933931810744312, "grad_norm": 2.294178009033203, "learning_rate": 4.0075557310991886e-07, "loss": 21.8428, "step": 442260 }, { "epoch": 0.893413381707115, "grad_norm": 519.116455078125, "learning_rate": 4.006186552446861e-07, "loss": 18.2456, "step": 442270 }, { "epoch": 0.8934335823397989, "grad_norm": 256.85467529296875, "learning_rate": 4.00481759796279e-07, "loss": 19.9572, "step": 442280 }, { "epoch": 0.8934537829724827, "grad_norm": 501.7436828613281, "learning_rate": 4.003448867653664e-07, "loss": 18.7082, "step": 442290 }, { "epoch": 0.8934739836051665, "grad_norm": 430.889404296875, "learning_rate": 4.002080361526156e-07, "loss": 16.348, "step": 442300 }, { "epoch": 0.8934941842378503, "grad_norm": 591.6133422851562, "learning_rate": 4.000712079586916e-07, "loss": 16.6457, "step": 442310 }, { "epoch": 0.8935143848705341, "grad_norm": 604.29736328125, "learning_rate": 3.999344021842627e-07, "loss": 23.2564, "step": 442320 }, { "epoch": 0.893534585503218, "grad_norm": 216.2506103515625, "learning_rate": 3.997976188299968e-07, "loss": 16.2214, "step": 442330 }, { "epoch": 0.8935547861359018, "grad_norm": 949.240966796875, "learning_rate": 3.996608578965594e-07, "loss": 22.458, "step": 442340 }, { "epoch": 0.8935749867685856, "grad_norm": 247.2509307861328, "learning_rate": 3.9952411938461557e-07, "loss": 18.5836, "step": 442350 }, { "epoch": 0.8935951874012694, "grad_norm": 260.24273681640625, "learning_rate": 3.9938740329483473e-07, "loss": 20.2889, "step": 442360 }, { "epoch": 0.8936153880339532, "grad_norm": 475.4487609863281, "learning_rate": 3.992507096278814e-07, "loss": 23.5179, "step": 442370 }, { "epoch": 0.8936355886666371, "grad_norm": 317.2352600097656, "learning_rate": 3.991140383844211e-07, "loss": 9.8048, "step": 442380 }, { "epoch": 0.8936557892993209, "grad_norm": 841.1236572265625, "learning_rate": 3.989773895651222e-07, "loss": 18.5649, "step": 442390 }, { "epoch": 0.8936759899320047, "grad_norm": 498.5187072753906, "learning_rate": 3.9884076317064813e-07, "loss": 20.668, "step": 442400 }, { "epoch": 0.8936961905646885, "grad_norm": 311.369384765625, "learning_rate": 3.9870415920166715e-07, "loss": 16.4844, "step": 442410 }, { "epoch": 0.8937163911973723, "grad_norm": 283.80987548828125, "learning_rate": 3.9856757765884436e-07, "loss": 10.47, "step": 442420 }, { "epoch": 0.8937365918300562, "grad_norm": 37.27119445800781, "learning_rate": 3.984310185428442e-07, "loss": 33.6831, "step": 442430 }, { "epoch": 0.89375679246274, "grad_norm": 382.70458984375, "learning_rate": 3.9829448185433385e-07, "loss": 25.1369, "step": 442440 }, { "epoch": 0.8937769930954238, "grad_norm": 424.489501953125, "learning_rate": 3.9815796759397783e-07, "loss": 8.1771, "step": 442450 }, { "epoch": 0.8937971937281076, "grad_norm": 0.0, "learning_rate": 3.980214757624412e-07, "loss": 20.0538, "step": 442460 }, { "epoch": 0.8938173943607913, "grad_norm": 232.93911743164062, "learning_rate": 3.978850063603895e-07, "loss": 18.2064, "step": 442470 }, { "epoch": 0.8938375949934751, "grad_norm": 588.0669555664062, "learning_rate": 3.977485593884889e-07, "loss": 27.8043, "step": 442480 }, { "epoch": 0.893857795626159, "grad_norm": 289.8110656738281, "learning_rate": 3.9761213484740435e-07, "loss": 18.8844, "step": 442490 }, { "epoch": 0.8938779962588428, "grad_norm": 455.50518798828125, "learning_rate": 3.9747573273779816e-07, "loss": 13.1274, "step": 442500 }, { "epoch": 0.8938981968915266, "grad_norm": 263.20697021484375, "learning_rate": 3.9733935306033756e-07, "loss": 16.1889, "step": 442510 }, { "epoch": 0.8939183975242104, "grad_norm": 463.04376220703125, "learning_rate": 3.9720299581568865e-07, "loss": 21.7175, "step": 442520 }, { "epoch": 0.8939385981568942, "grad_norm": 302.4626770019531, "learning_rate": 3.970666610045121e-07, "loss": 19.4688, "step": 442530 }, { "epoch": 0.8939587987895781, "grad_norm": 769.6148681640625, "learning_rate": 3.969303486274745e-07, "loss": 27.4903, "step": 442540 }, { "epoch": 0.8939789994222619, "grad_norm": 316.3883361816406, "learning_rate": 3.967940586852409e-07, "loss": 12.6395, "step": 442550 }, { "epoch": 0.8939992000549457, "grad_norm": 183.7150421142578, "learning_rate": 3.966577911784747e-07, "loss": 20.832, "step": 442560 }, { "epoch": 0.8940194006876295, "grad_norm": 590.2752685546875, "learning_rate": 3.965215461078392e-07, "loss": 15.9476, "step": 442570 }, { "epoch": 0.8940396013203133, "grad_norm": 534.8192138671875, "learning_rate": 3.963853234740006e-07, "loss": 19.1567, "step": 442580 }, { "epoch": 0.8940598019529972, "grad_norm": 597.6304931640625, "learning_rate": 3.962491232776211e-07, "loss": 20.072, "step": 442590 }, { "epoch": 0.894080002585681, "grad_norm": 381.5980529785156, "learning_rate": 3.961129455193641e-07, "loss": 32.0533, "step": 442600 }, { "epoch": 0.8941002032183648, "grad_norm": 256.7167663574219, "learning_rate": 3.959767901998957e-07, "loss": 11.408, "step": 442610 }, { "epoch": 0.8941204038510486, "grad_norm": 65.94268798828125, "learning_rate": 3.958406573198764e-07, "loss": 18.7583, "step": 442620 }, { "epoch": 0.8941406044837324, "grad_norm": 288.7595520019531, "learning_rate": 3.957045468799725e-07, "loss": 22.4417, "step": 442630 }, { "epoch": 0.8941608051164163, "grad_norm": 427.774169921875, "learning_rate": 3.955684588808456e-07, "loss": 29.2313, "step": 442640 }, { "epoch": 0.8941810057491001, "grad_norm": 105.23033142089844, "learning_rate": 3.954323933231602e-07, "loss": 24.7779, "step": 442650 }, { "epoch": 0.8942012063817839, "grad_norm": 648.6617431640625, "learning_rate": 3.952963502075791e-07, "loss": 23.5429, "step": 442660 }, { "epoch": 0.8942214070144677, "grad_norm": 456.74700927734375, "learning_rate": 3.951603295347639e-07, "loss": 21.8411, "step": 442670 }, { "epoch": 0.8942416076471515, "grad_norm": 0.0741969645023346, "learning_rate": 3.9502433130537977e-07, "loss": 10.3931, "step": 442680 }, { "epoch": 0.8942618082798354, "grad_norm": 482.71612548828125, "learning_rate": 3.9488835552008773e-07, "loss": 20.5551, "step": 442690 }, { "epoch": 0.8942820089125192, "grad_norm": 61.67448806762695, "learning_rate": 3.947524021795518e-07, "loss": 18.076, "step": 442700 }, { "epoch": 0.894302209545203, "grad_norm": 291.5075988769531, "learning_rate": 3.946164712844347e-07, "loss": 10.6352, "step": 442710 }, { "epoch": 0.8943224101778867, "grad_norm": 536.745361328125, "learning_rate": 3.9448056283539704e-07, "loss": 19.3627, "step": 442720 }, { "epoch": 0.8943426108105705, "grad_norm": 359.7989196777344, "learning_rate": 3.9434467683310327e-07, "loss": 13.5057, "step": 442730 }, { "epoch": 0.8943628114432544, "grad_norm": 391.85369873046875, "learning_rate": 3.942088132782157e-07, "loss": 16.9426, "step": 442740 }, { "epoch": 0.8943830120759382, "grad_norm": 234.9928741455078, "learning_rate": 3.9407297217139427e-07, "loss": 12.7332, "step": 442750 }, { "epoch": 0.894403212708622, "grad_norm": 301.238037109375, "learning_rate": 3.9393715351330243e-07, "loss": 11.6003, "step": 442760 }, { "epoch": 0.8944234133413058, "grad_norm": 177.99615478515625, "learning_rate": 3.9380135730460347e-07, "loss": 9.4544, "step": 442770 }, { "epoch": 0.8944436139739896, "grad_norm": 565.592041015625, "learning_rate": 3.9366558354595797e-07, "loss": 23.9906, "step": 442780 }, { "epoch": 0.8944638146066735, "grad_norm": 177.5765838623047, "learning_rate": 3.935298322380271e-07, "loss": 10.1951, "step": 442790 }, { "epoch": 0.8944840152393573, "grad_norm": 451.1468505859375, "learning_rate": 3.9339410338147363e-07, "loss": 17.273, "step": 442800 }, { "epoch": 0.8945042158720411, "grad_norm": 234.44549560546875, "learning_rate": 3.9325839697695877e-07, "loss": 11.3077, "step": 442810 }, { "epoch": 0.8945244165047249, "grad_norm": 601.57275390625, "learning_rate": 3.931227130251425e-07, "loss": 16.1628, "step": 442820 }, { "epoch": 0.8945446171374087, "grad_norm": 50.89834976196289, "learning_rate": 3.929870515266876e-07, "loss": 21.8412, "step": 442830 }, { "epoch": 0.8945648177700926, "grad_norm": 312.83880615234375, "learning_rate": 3.928514124822569e-07, "loss": 18.6505, "step": 442840 }, { "epoch": 0.8945850184027764, "grad_norm": 22.50575065612793, "learning_rate": 3.9271579589250817e-07, "loss": 27.6935, "step": 442850 }, { "epoch": 0.8946052190354602, "grad_norm": 371.71063232421875, "learning_rate": 3.925802017581032e-07, "loss": 14.2174, "step": 442860 }, { "epoch": 0.894625419668144, "grad_norm": 322.22113037109375, "learning_rate": 3.924446300797052e-07, "loss": 11.8918, "step": 442870 }, { "epoch": 0.8946456203008278, "grad_norm": 245.89785766601562, "learning_rate": 3.923090808579727e-07, "loss": 19.6123, "step": 442880 }, { "epoch": 0.8946658209335117, "grad_norm": 521.1366577148438, "learning_rate": 3.9217355409356614e-07, "loss": 26.2525, "step": 442890 }, { "epoch": 0.8946860215661955, "grad_norm": 397.6199645996094, "learning_rate": 3.920380497871473e-07, "loss": 15.6646, "step": 442900 }, { "epoch": 0.8947062221988793, "grad_norm": 391.6208801269531, "learning_rate": 3.9190256793937675e-07, "loss": 20.3865, "step": 442910 }, { "epoch": 0.8947264228315631, "grad_norm": 289.1727294921875, "learning_rate": 3.9176710855091283e-07, "loss": 9.2943, "step": 442920 }, { "epoch": 0.8947466234642469, "grad_norm": 239.43304443359375, "learning_rate": 3.916316716224172e-07, "loss": 16.7488, "step": 442930 }, { "epoch": 0.8947668240969308, "grad_norm": 349.6020202636719, "learning_rate": 3.9149625715455107e-07, "loss": 20.8104, "step": 442940 }, { "epoch": 0.8947870247296146, "grad_norm": 96.1251449584961, "learning_rate": 3.913608651479733e-07, "loss": 26.9883, "step": 442950 }, { "epoch": 0.8948072253622984, "grad_norm": 623.001953125, "learning_rate": 3.912254956033423e-07, "loss": 19.0216, "step": 442960 }, { "epoch": 0.8948274259949822, "grad_norm": 338.7909851074219, "learning_rate": 3.9109014852132035e-07, "loss": 10.8443, "step": 442970 }, { "epoch": 0.8948476266276659, "grad_norm": 490.4385070800781, "learning_rate": 3.9095482390256624e-07, "loss": 17.8714, "step": 442980 }, { "epoch": 0.8948678272603497, "grad_norm": 648.5219116210938, "learning_rate": 3.908195217477384e-07, "loss": 19.2579, "step": 442990 }, { "epoch": 0.8948880278930336, "grad_norm": 679.5826416015625, "learning_rate": 3.90684242057498e-07, "loss": 34.5945, "step": 443000 }, { "epoch": 0.8949082285257174, "grad_norm": 291.563232421875, "learning_rate": 3.9054898483250224e-07, "loss": 20.5633, "step": 443010 }, { "epoch": 0.8949284291584012, "grad_norm": 510.5856628417969, "learning_rate": 3.904137500734129e-07, "loss": 22.5774, "step": 443020 }, { "epoch": 0.894948629791085, "grad_norm": 551.2908325195312, "learning_rate": 3.902785377808882e-07, "loss": 26.2614, "step": 443030 }, { "epoch": 0.8949688304237688, "grad_norm": 251.3923797607422, "learning_rate": 3.901433479555855e-07, "loss": 18.5808, "step": 443040 }, { "epoch": 0.8949890310564527, "grad_norm": 289.2093200683594, "learning_rate": 3.9000818059816593e-07, "loss": 24.0075, "step": 443050 }, { "epoch": 0.8950092316891365, "grad_norm": 123.8757553100586, "learning_rate": 3.898730357092878e-07, "loss": 28.818, "step": 443060 }, { "epoch": 0.8950294323218203, "grad_norm": 592.0748291015625, "learning_rate": 3.8973791328960786e-07, "loss": 18.4389, "step": 443070 }, { "epoch": 0.8950496329545041, "grad_norm": 339.29248046875, "learning_rate": 3.8960281333978667e-07, "loss": 19.7948, "step": 443080 }, { "epoch": 0.895069833587188, "grad_norm": 218.08489990234375, "learning_rate": 3.894677358604826e-07, "loss": 14.6797, "step": 443090 }, { "epoch": 0.8950900342198718, "grad_norm": 459.9266662597656, "learning_rate": 3.89332680852354e-07, "loss": 17.3519, "step": 443100 }, { "epoch": 0.8951102348525556, "grad_norm": 314.08953857421875, "learning_rate": 3.8919764831605754e-07, "loss": 9.2347, "step": 443110 }, { "epoch": 0.8951304354852394, "grad_norm": 176.02169799804688, "learning_rate": 3.890626382522539e-07, "loss": 21.5893, "step": 443120 }, { "epoch": 0.8951506361179232, "grad_norm": 822.2457275390625, "learning_rate": 3.889276506615991e-07, "loss": 14.4411, "step": 443130 }, { "epoch": 0.895170836750607, "grad_norm": 414.7174987792969, "learning_rate": 3.88792685544751e-07, "loss": 23.4973, "step": 443140 }, { "epoch": 0.8951910373832909, "grad_norm": 176.8641357421875, "learning_rate": 3.88657742902368e-07, "loss": 14.6913, "step": 443150 }, { "epoch": 0.8952112380159747, "grad_norm": 465.1305847167969, "learning_rate": 3.88522822735109e-07, "loss": 25.9285, "step": 443160 }, { "epoch": 0.8952314386486585, "grad_norm": 268.416748046875, "learning_rate": 3.8838792504363066e-07, "loss": 8.6215, "step": 443170 }, { "epoch": 0.8952516392813423, "grad_norm": 1195.952392578125, "learning_rate": 3.882530498285886e-07, "loss": 16.573, "step": 443180 }, { "epoch": 0.8952718399140261, "grad_norm": 293.72906494140625, "learning_rate": 3.8811819709064336e-07, "loss": 14.2375, "step": 443190 }, { "epoch": 0.89529204054671, "grad_norm": 47.482784271240234, "learning_rate": 3.879833668304506e-07, "loss": 29.3211, "step": 443200 }, { "epoch": 0.8953122411793938, "grad_norm": 433.0750427246094, "learning_rate": 3.8784855904866637e-07, "loss": 26.0019, "step": 443210 }, { "epoch": 0.8953324418120776, "grad_norm": 113.45503234863281, "learning_rate": 3.877137737459502e-07, "loss": 11.3939, "step": 443220 }, { "epoch": 0.8953526424447613, "grad_norm": 362.42205810546875, "learning_rate": 3.875790109229566e-07, "loss": 25.6576, "step": 443230 }, { "epoch": 0.8953728430774451, "grad_norm": 107.12870788574219, "learning_rate": 3.8744427058034384e-07, "loss": 13.7933, "step": 443240 }, { "epoch": 0.895393043710129, "grad_norm": 554.9472045898438, "learning_rate": 3.8730955271876813e-07, "loss": 10.5045, "step": 443250 }, { "epoch": 0.8954132443428128, "grad_norm": 5.183191299438477, "learning_rate": 3.871748573388867e-07, "loss": 19.209, "step": 443260 }, { "epoch": 0.8954334449754966, "grad_norm": 622.285400390625, "learning_rate": 3.870401844413557e-07, "loss": 20.5912, "step": 443270 }, { "epoch": 0.8954536456081804, "grad_norm": 608.5679931640625, "learning_rate": 3.8690553402683015e-07, "loss": 21.6407, "step": 443280 }, { "epoch": 0.8954738462408642, "grad_norm": 224.35610961914062, "learning_rate": 3.86770906095969e-07, "loss": 21.7842, "step": 443290 }, { "epoch": 0.895494046873548, "grad_norm": 360.945556640625, "learning_rate": 3.866363006494256e-07, "loss": 17.557, "step": 443300 }, { "epoch": 0.8955142475062319, "grad_norm": 458.71636962890625, "learning_rate": 3.8650171768785826e-07, "loss": 30.2733, "step": 443310 }, { "epoch": 0.8955344481389157, "grad_norm": 509.4033508300781, "learning_rate": 3.863671572119221e-07, "loss": 13.7747, "step": 443320 }, { "epoch": 0.8955546487715995, "grad_norm": 390.75848388671875, "learning_rate": 3.8623261922227204e-07, "loss": 26.8211, "step": 443330 }, { "epoch": 0.8955748494042833, "grad_norm": 336.63623046875, "learning_rate": 3.8609810371956544e-07, "loss": 9.9372, "step": 443340 }, { "epoch": 0.8955950500369672, "grad_norm": 26.759260177612305, "learning_rate": 3.859636107044573e-07, "loss": 24.4349, "step": 443350 }, { "epoch": 0.895615250669651, "grad_norm": 317.5001220703125, "learning_rate": 3.8582914017760154e-07, "loss": 21.1117, "step": 443360 }, { "epoch": 0.8956354513023348, "grad_norm": 460.7093811035156, "learning_rate": 3.856946921396554e-07, "loss": 15.5941, "step": 443370 }, { "epoch": 0.8956556519350186, "grad_norm": 315.0159606933594, "learning_rate": 3.8556026659127445e-07, "loss": 7.0699, "step": 443380 }, { "epoch": 0.8956758525677024, "grad_norm": 358.8883972167969, "learning_rate": 3.8542586353311264e-07, "loss": 14.2727, "step": 443390 }, { "epoch": 0.8956960532003863, "grad_norm": 316.111572265625, "learning_rate": 3.85291482965825e-07, "loss": 15.5937, "step": 443400 }, { "epoch": 0.8957162538330701, "grad_norm": 223.5146942138672, "learning_rate": 3.851571248900676e-07, "loss": 15.3269, "step": 443410 }, { "epoch": 0.8957364544657539, "grad_norm": 420.58526611328125, "learning_rate": 3.8502278930649506e-07, "loss": 18.9996, "step": 443420 }, { "epoch": 0.8957566550984377, "grad_norm": 426.0105285644531, "learning_rate": 3.8488847621576066e-07, "loss": 8.0604, "step": 443430 }, { "epoch": 0.8957768557311215, "grad_norm": 114.0346450805664, "learning_rate": 3.8475418561851996e-07, "loss": 43.2814, "step": 443440 }, { "epoch": 0.8957970563638054, "grad_norm": 466.63580322265625, "learning_rate": 3.846199175154297e-07, "loss": 20.6006, "step": 443450 }, { "epoch": 0.8958172569964892, "grad_norm": 123.13581848144531, "learning_rate": 3.8448567190713993e-07, "loss": 22.305, "step": 443460 }, { "epoch": 0.895837457629173, "grad_norm": 208.05831909179688, "learning_rate": 3.843514487943079e-07, "loss": 17.5501, "step": 443470 }, { "epoch": 0.8958576582618568, "grad_norm": 190.33071899414062, "learning_rate": 3.8421724817758745e-07, "loss": 21.5826, "step": 443480 }, { "epoch": 0.8958778588945405, "grad_norm": 356.87127685546875, "learning_rate": 3.84083070057632e-07, "loss": 23.349, "step": 443490 }, { "epoch": 0.8958980595272243, "grad_norm": 371.1936340332031, "learning_rate": 3.8394891443509554e-07, "loss": 7.139, "step": 443500 }, { "epoch": 0.8959182601599082, "grad_norm": 329.1784973144531, "learning_rate": 3.83814781310633e-07, "loss": 17.2144, "step": 443510 }, { "epoch": 0.895938460792592, "grad_norm": 22.805368423461914, "learning_rate": 3.8368067068489724e-07, "loss": 18.3239, "step": 443520 }, { "epoch": 0.8959586614252758, "grad_norm": 343.87689208984375, "learning_rate": 3.8354658255854105e-07, "loss": 16.9245, "step": 443530 }, { "epoch": 0.8959788620579596, "grad_norm": 511.1064147949219, "learning_rate": 3.8341251693221893e-07, "loss": 19.1064, "step": 443540 }, { "epoch": 0.8959990626906434, "grad_norm": 370.7943420410156, "learning_rate": 3.832784738065853e-07, "loss": 18.6469, "step": 443550 }, { "epoch": 0.8960192633233273, "grad_norm": 338.9887390136719, "learning_rate": 3.83144453182292e-07, "loss": 9.248, "step": 443560 }, { "epoch": 0.8960394639560111, "grad_norm": 596.9765625, "learning_rate": 3.830104550599922e-07, "loss": 15.9077, "step": 443570 }, { "epoch": 0.8960596645886949, "grad_norm": 96.01427459716797, "learning_rate": 3.8287647944034054e-07, "loss": 12.1708, "step": 443580 }, { "epoch": 0.8960798652213787, "grad_norm": 276.3226013183594, "learning_rate": 3.827425263239887e-07, "loss": 15.0657, "step": 443590 }, { "epoch": 0.8961000658540625, "grad_norm": 450.3712158203125, "learning_rate": 3.8260859571158883e-07, "loss": 22.2363, "step": 443600 }, { "epoch": 0.8961202664867464, "grad_norm": 348.3032531738281, "learning_rate": 3.824746876037955e-07, "loss": 14.9362, "step": 443610 }, { "epoch": 0.8961404671194302, "grad_norm": 542.25146484375, "learning_rate": 3.8234080200125977e-07, "loss": 17.2952, "step": 443620 }, { "epoch": 0.896160667752114, "grad_norm": 302.9388732910156, "learning_rate": 3.822069389046357e-07, "loss": 21.2011, "step": 443630 }, { "epoch": 0.8961808683847978, "grad_norm": 261.0054016113281, "learning_rate": 3.8207309831457485e-07, "loss": 11.77, "step": 443640 }, { "epoch": 0.8962010690174816, "grad_norm": 107.35267639160156, "learning_rate": 3.8193928023172897e-07, "loss": 29.1692, "step": 443650 }, { "epoch": 0.8962212696501655, "grad_norm": 248.1295166015625, "learning_rate": 3.818054846567515e-07, "loss": 16.7975, "step": 443660 }, { "epoch": 0.8962414702828493, "grad_norm": 457.2428894042969, "learning_rate": 3.8167171159029405e-07, "loss": 15.9782, "step": 443670 }, { "epoch": 0.8962616709155331, "grad_norm": 429.57025146484375, "learning_rate": 3.815379610330078e-07, "loss": 14.5478, "step": 443680 }, { "epoch": 0.8962818715482169, "grad_norm": 473.4178466796875, "learning_rate": 3.814042329855455e-07, "loss": 17.089, "step": 443690 }, { "epoch": 0.8963020721809007, "grad_norm": 374.85650634765625, "learning_rate": 3.812705274485595e-07, "loss": 22.4911, "step": 443700 }, { "epoch": 0.8963222728135846, "grad_norm": 525.5431518554688, "learning_rate": 3.811368444227009e-07, "loss": 21.8245, "step": 443710 }, { "epoch": 0.8963424734462684, "grad_norm": 626.8782348632812, "learning_rate": 3.8100318390862033e-07, "loss": 11.766, "step": 443720 }, { "epoch": 0.8963626740789522, "grad_norm": 276.6632080078125, "learning_rate": 3.8086954590697057e-07, "loss": 21.9943, "step": 443730 }, { "epoch": 0.8963828747116359, "grad_norm": 272.9100646972656, "learning_rate": 3.8073593041840274e-07, "loss": 40.214, "step": 443740 }, { "epoch": 0.8964030753443197, "grad_norm": 561.2411499023438, "learning_rate": 3.8060233744356634e-07, "loss": 18.5295, "step": 443750 }, { "epoch": 0.8964232759770036, "grad_norm": 11.287782669067383, "learning_rate": 3.804687669831142e-07, "loss": 11.435, "step": 443760 }, { "epoch": 0.8964434766096874, "grad_norm": 321.518798828125, "learning_rate": 3.80335219037698e-07, "loss": 15.9047, "step": 443770 }, { "epoch": 0.8964636772423712, "grad_norm": 562.2626953125, "learning_rate": 3.802016936079678e-07, "loss": 21.6205, "step": 443780 }, { "epoch": 0.896483877875055, "grad_norm": 276.7866516113281, "learning_rate": 3.8006819069457304e-07, "loss": 20.1359, "step": 443790 }, { "epoch": 0.8965040785077388, "grad_norm": 219.55706787109375, "learning_rate": 3.7993471029816653e-07, "loss": 10.2999, "step": 443800 }, { "epoch": 0.8965242791404227, "grad_norm": 369.7362060546875, "learning_rate": 3.798012524193978e-07, "loss": 31.9167, "step": 443810 }, { "epoch": 0.8965444797731065, "grad_norm": 241.03692626953125, "learning_rate": 3.7966781705891684e-07, "loss": 24.1727, "step": 443820 }, { "epoch": 0.8965646804057903, "grad_norm": 726.6835327148438, "learning_rate": 3.7953440421737433e-07, "loss": 16.6032, "step": 443830 }, { "epoch": 0.8965848810384741, "grad_norm": 285.6444091796875, "learning_rate": 3.794010138954213e-07, "loss": 33.959, "step": 443840 }, { "epoch": 0.8966050816711579, "grad_norm": 587.40283203125, "learning_rate": 3.792676460937078e-07, "loss": 23.2916, "step": 443850 }, { "epoch": 0.8966252823038418, "grad_norm": 6.033504486083984, "learning_rate": 3.791343008128823e-07, "loss": 11.7179, "step": 443860 }, { "epoch": 0.8966454829365256, "grad_norm": 12.601014137268066, "learning_rate": 3.790009780535969e-07, "loss": 19.0735, "step": 443870 }, { "epoch": 0.8966656835692094, "grad_norm": 82.12911987304688, "learning_rate": 3.7886767781650016e-07, "loss": 22.1259, "step": 443880 }, { "epoch": 0.8966858842018932, "grad_norm": 154.22366333007812, "learning_rate": 3.787344001022408e-07, "loss": 12.7269, "step": 443890 }, { "epoch": 0.896706084834577, "grad_norm": 318.2230224609375, "learning_rate": 3.7860114491147017e-07, "loss": 15.6227, "step": 443900 }, { "epoch": 0.8967262854672609, "grad_norm": 235.8320770263672, "learning_rate": 3.784679122448365e-07, "loss": 29.1175, "step": 443910 }, { "epoch": 0.8967464860999447, "grad_norm": 15.09432601928711, "learning_rate": 3.783347021029904e-07, "loss": 10.9162, "step": 443920 }, { "epoch": 0.8967666867326285, "grad_norm": 90.33223724365234, "learning_rate": 3.782015144865808e-07, "loss": 16.4666, "step": 443930 }, { "epoch": 0.8967868873653123, "grad_norm": 423.8289489746094, "learning_rate": 3.780683493962556e-07, "loss": 13.6356, "step": 443940 }, { "epoch": 0.8968070879979961, "grad_norm": 230.84378051757812, "learning_rate": 3.779352068326653e-07, "loss": 11.9636, "step": 443950 }, { "epoch": 0.89682728863068, "grad_norm": 1077.435546875, "learning_rate": 3.7780208679645826e-07, "loss": 25.3632, "step": 443960 }, { "epoch": 0.8968474892633638, "grad_norm": 17.35983657836914, "learning_rate": 3.776689892882823e-07, "loss": 20.2653, "step": 443970 }, { "epoch": 0.8968676898960476, "grad_norm": 436.32354736328125, "learning_rate": 3.77535914308787e-07, "loss": 17.2695, "step": 443980 }, { "epoch": 0.8968878905287314, "grad_norm": 282.3138122558594, "learning_rate": 3.774028618586217e-07, "loss": 25.9139, "step": 443990 }, { "epoch": 0.8969080911614151, "grad_norm": 650.9126586914062, "learning_rate": 3.772698319384349e-07, "loss": 13.6121, "step": 444000 }, { "epoch": 0.8969282917940989, "grad_norm": 65.5379409790039, "learning_rate": 3.7713682454887266e-07, "loss": 11.2433, "step": 444010 }, { "epoch": 0.8969484924267828, "grad_norm": 320.1227722167969, "learning_rate": 3.770038396905862e-07, "loss": 14.2495, "step": 444020 }, { "epoch": 0.8969686930594666, "grad_norm": 357.67852783203125, "learning_rate": 3.768708773642221e-07, "loss": 16.8157, "step": 444030 }, { "epoch": 0.8969888936921504, "grad_norm": 276.79132080078125, "learning_rate": 3.767379375704278e-07, "loss": 16.1594, "step": 444040 }, { "epoch": 0.8970090943248342, "grad_norm": 167.3631134033203, "learning_rate": 3.7660502030985203e-07, "loss": 14.0351, "step": 444050 }, { "epoch": 0.897029294957518, "grad_norm": 344.3874206542969, "learning_rate": 3.7647212558314493e-07, "loss": 10.6284, "step": 444060 }, { "epoch": 0.8970494955902019, "grad_norm": 355.57635498046875, "learning_rate": 3.7633925339094936e-07, "loss": 4.788, "step": 444070 }, { "epoch": 0.8970696962228857, "grad_norm": 234.05596923828125, "learning_rate": 3.762064037339158e-07, "loss": 26.8288, "step": 444080 }, { "epoch": 0.8970898968555695, "grad_norm": 573.5048217773438, "learning_rate": 3.760735766126927e-07, "loss": 18.9453, "step": 444090 }, { "epoch": 0.8971100974882533, "grad_norm": 306.1864318847656, "learning_rate": 3.759407720279257e-07, "loss": 17.8384, "step": 444100 }, { "epoch": 0.8971302981209371, "grad_norm": 763.8611450195312, "learning_rate": 3.758079899802619e-07, "loss": 17.9444, "step": 444110 }, { "epoch": 0.897150498753621, "grad_norm": 241.2154998779297, "learning_rate": 3.756752304703498e-07, "loss": 20.6118, "step": 444120 }, { "epoch": 0.8971706993863048, "grad_norm": 474.2724609375, "learning_rate": 3.755424934988355e-07, "loss": 9.7833, "step": 444130 }, { "epoch": 0.8971909000189886, "grad_norm": 71.91261291503906, "learning_rate": 3.7540977906636576e-07, "loss": 13.4105, "step": 444140 }, { "epoch": 0.8972111006516724, "grad_norm": 111.76239776611328, "learning_rate": 3.752770871735878e-07, "loss": 10.913, "step": 444150 }, { "epoch": 0.8972313012843562, "grad_norm": 187.22528076171875, "learning_rate": 3.751444178211494e-07, "loss": 7.838, "step": 444160 }, { "epoch": 0.8972515019170401, "grad_norm": 277.2666320800781, "learning_rate": 3.7501177100969566e-07, "loss": 8.9107, "step": 444170 }, { "epoch": 0.8972717025497239, "grad_norm": 444.3712463378906, "learning_rate": 3.748791467398732e-07, "loss": 12.3827, "step": 444180 }, { "epoch": 0.8972919031824077, "grad_norm": 361.9163818359375, "learning_rate": 3.747465450123294e-07, "loss": 19.0128, "step": 444190 }, { "epoch": 0.8973121038150915, "grad_norm": 334.4684143066406, "learning_rate": 3.7461396582771035e-07, "loss": 20.5581, "step": 444200 }, { "epoch": 0.8973323044477753, "grad_norm": 313.4148864746094, "learning_rate": 3.744814091866605e-07, "loss": 23.7055, "step": 444210 }, { "epoch": 0.8973525050804592, "grad_norm": 403.1631164550781, "learning_rate": 3.7434887508982886e-07, "loss": 16.993, "step": 444220 }, { "epoch": 0.897372705713143, "grad_norm": 43.625972747802734, "learning_rate": 3.7421636353785815e-07, "loss": 17.8234, "step": 444230 }, { "epoch": 0.8973929063458268, "grad_norm": 342.0977478027344, "learning_rate": 3.740838745313974e-07, "loss": 12.4247, "step": 444240 }, { "epoch": 0.8974131069785106, "grad_norm": 238.92608642578125, "learning_rate": 3.739514080710899e-07, "loss": 10.3439, "step": 444250 }, { "epoch": 0.8974333076111943, "grad_norm": 943.6799926757812, "learning_rate": 3.738189641575818e-07, "loss": 27.6635, "step": 444260 }, { "epoch": 0.8974535082438782, "grad_norm": 413.5370788574219, "learning_rate": 3.7368654279151985e-07, "loss": 41.3092, "step": 444270 }, { "epoch": 0.897473708876562, "grad_norm": 118.44355773925781, "learning_rate": 3.7355414397354796e-07, "loss": 10.9965, "step": 444280 }, { "epoch": 0.8974939095092458, "grad_norm": 392.2132263183594, "learning_rate": 3.7342176770431284e-07, "loss": 15.963, "step": 444290 }, { "epoch": 0.8975141101419296, "grad_norm": 335.9568176269531, "learning_rate": 3.732894139844578e-07, "loss": 40.2798, "step": 444300 }, { "epoch": 0.8975343107746134, "grad_norm": 109.2363510131836, "learning_rate": 3.731570828146297e-07, "loss": 17.8185, "step": 444310 }, { "epoch": 0.8975545114072973, "grad_norm": 73.78199768066406, "learning_rate": 3.730247741954729e-07, "loss": 16.4353, "step": 444320 }, { "epoch": 0.8975747120399811, "grad_norm": 389.6304626464844, "learning_rate": 3.7289248812763137e-07, "loss": 10.4385, "step": 444330 }, { "epoch": 0.8975949126726649, "grad_norm": 545.3394165039062, "learning_rate": 3.727602246117518e-07, "loss": 23.9497, "step": 444340 }, { "epoch": 0.8976151133053487, "grad_norm": 512.56640625, "learning_rate": 3.7262798364847753e-07, "loss": 23.9551, "step": 444350 }, { "epoch": 0.8976353139380325, "grad_norm": 228.80422973632812, "learning_rate": 3.72495765238452e-07, "loss": 15.2482, "step": 444360 }, { "epoch": 0.8976555145707164, "grad_norm": 306.053955078125, "learning_rate": 3.723635693823213e-07, "loss": 21.9044, "step": 444370 }, { "epoch": 0.8976757152034002, "grad_norm": 275.0199279785156, "learning_rate": 3.7223139608073e-07, "loss": 18.1763, "step": 444380 }, { "epoch": 0.897695915836084, "grad_norm": 606.3357543945312, "learning_rate": 3.720992453343214e-07, "loss": 22.7662, "step": 444390 }, { "epoch": 0.8977161164687678, "grad_norm": 194.8509979248047, "learning_rate": 3.7196711714373947e-07, "loss": 12.8268, "step": 444400 }, { "epoch": 0.8977363171014516, "grad_norm": 116.74224853515625, "learning_rate": 3.7183501150962863e-07, "loss": 15.3617, "step": 444410 }, { "epoch": 0.8977565177341355, "grad_norm": 469.42413330078125, "learning_rate": 3.7170292843263347e-07, "loss": 21.9017, "step": 444420 }, { "epoch": 0.8977767183668193, "grad_norm": 342.9283142089844, "learning_rate": 3.715708679133956e-07, "loss": 10.503, "step": 444430 }, { "epoch": 0.8977969189995031, "grad_norm": 269.0628662109375, "learning_rate": 3.714388299525595e-07, "loss": 12.1823, "step": 444440 }, { "epoch": 0.8978171196321869, "grad_norm": 332.29168701171875, "learning_rate": 3.713068145507709e-07, "loss": 12.9783, "step": 444450 }, { "epoch": 0.8978373202648707, "grad_norm": 60.27352523803711, "learning_rate": 3.7117482170867083e-07, "loss": 16.7351, "step": 444460 }, { "epoch": 0.8978575208975546, "grad_norm": 413.5735168457031, "learning_rate": 3.710428514269027e-07, "loss": 11.7485, "step": 444470 }, { "epoch": 0.8978777215302384, "grad_norm": 770.4952392578125, "learning_rate": 3.7091090370611093e-07, "loss": 17.4015, "step": 444480 }, { "epoch": 0.8978979221629222, "grad_norm": 228.45712280273438, "learning_rate": 3.707789785469379e-07, "loss": 16.2267, "step": 444490 }, { "epoch": 0.897918122795606, "grad_norm": 236.81741333007812, "learning_rate": 3.7064707595002636e-07, "loss": 27.0402, "step": 444500 }, { "epoch": 0.8979383234282897, "grad_norm": 83.5274887084961, "learning_rate": 3.705151959160197e-07, "loss": 18.8256, "step": 444510 }, { "epoch": 0.8979585240609735, "grad_norm": 512.5786743164062, "learning_rate": 3.703833384455602e-07, "loss": 20.8874, "step": 444520 }, { "epoch": 0.8979787246936574, "grad_norm": 270.5484619140625, "learning_rate": 3.702515035392912e-07, "loss": 28.6608, "step": 444530 }, { "epoch": 0.8979989253263412, "grad_norm": 0.0, "learning_rate": 3.7011969119785496e-07, "loss": 4.3968, "step": 444540 }, { "epoch": 0.898019125959025, "grad_norm": 415.77490234375, "learning_rate": 3.6998790142189324e-07, "loss": 12.6397, "step": 444550 }, { "epoch": 0.8980393265917088, "grad_norm": 994.0740966796875, "learning_rate": 3.698561342120499e-07, "loss": 16.8994, "step": 444560 }, { "epoch": 0.8980595272243926, "grad_norm": 106.81134033203125, "learning_rate": 3.6972438956896563e-07, "loss": 10.136, "step": 444570 }, { "epoch": 0.8980797278570765, "grad_norm": 483.38348388671875, "learning_rate": 3.695926674932826e-07, "loss": 33.2317, "step": 444580 }, { "epoch": 0.8980999284897603, "grad_norm": 474.3923034667969, "learning_rate": 3.694609679856431e-07, "loss": 28.5039, "step": 444590 }, { "epoch": 0.8981201291224441, "grad_norm": 798.9323120117188, "learning_rate": 3.693292910466906e-07, "loss": 21.2564, "step": 444600 }, { "epoch": 0.8981403297551279, "grad_norm": 342.9609680175781, "learning_rate": 3.69197636677065e-07, "loss": 12.9522, "step": 444610 }, { "epoch": 0.8981605303878117, "grad_norm": 172.19468688964844, "learning_rate": 3.690660048774075e-07, "loss": 12.2965, "step": 444620 }, { "epoch": 0.8981807310204956, "grad_norm": 224.4616241455078, "learning_rate": 3.6893439564836155e-07, "loss": 8.3202, "step": 444630 }, { "epoch": 0.8982009316531794, "grad_norm": 38.49509048461914, "learning_rate": 3.688028089905682e-07, "loss": 17.5146, "step": 444640 }, { "epoch": 0.8982211322858632, "grad_norm": 54.56721115112305, "learning_rate": 3.6867124490466697e-07, "loss": 15.7033, "step": 444650 }, { "epoch": 0.898241332918547, "grad_norm": 315.0137634277344, "learning_rate": 3.685397033913002e-07, "loss": 14.4688, "step": 444660 }, { "epoch": 0.8982615335512308, "grad_norm": 285.6546936035156, "learning_rate": 3.6840818445111114e-07, "loss": 17.9773, "step": 444670 }, { "epoch": 0.8982817341839147, "grad_norm": 588.5462036132812, "learning_rate": 3.6827668808473714e-07, "loss": 10.5859, "step": 444680 }, { "epoch": 0.8983019348165985, "grad_norm": 444.90185546875, "learning_rate": 3.68145214292821e-07, "loss": 15.1751, "step": 444690 }, { "epoch": 0.8983221354492823, "grad_norm": 1217.4012451171875, "learning_rate": 3.680137630760039e-07, "loss": 23.9297, "step": 444700 }, { "epoch": 0.8983423360819661, "grad_norm": 432.7891845703125, "learning_rate": 3.6788233443492583e-07, "loss": 22.0646, "step": 444710 }, { "epoch": 0.8983625367146499, "grad_norm": 320.3438720703125, "learning_rate": 3.6775092837022685e-07, "loss": 19.194, "step": 444720 }, { "epoch": 0.8983827373473338, "grad_norm": 209.52174377441406, "learning_rate": 3.676195448825487e-07, "loss": 14.6774, "step": 444730 }, { "epoch": 0.8984029379800176, "grad_norm": 575.9804077148438, "learning_rate": 3.674881839725314e-07, "loss": 28.4419, "step": 444740 }, { "epoch": 0.8984231386127014, "grad_norm": 313.8929138183594, "learning_rate": 3.6735684564081385e-07, "loss": 20.3377, "step": 444750 }, { "epoch": 0.8984433392453852, "grad_norm": 266.3089904785156, "learning_rate": 3.672255298880367e-07, "loss": 33.5205, "step": 444760 }, { "epoch": 0.8984635398780689, "grad_norm": 223.70948791503906, "learning_rate": 3.670942367148417e-07, "loss": 21.5937, "step": 444770 }, { "epoch": 0.8984837405107527, "grad_norm": 568.1279907226562, "learning_rate": 3.669629661218671e-07, "loss": 25.2434, "step": 444780 }, { "epoch": 0.8985039411434366, "grad_norm": 15.84461498260498, "learning_rate": 3.66831718109753e-07, "loss": 14.5056, "step": 444790 }, { "epoch": 0.8985241417761204, "grad_norm": 378.42156982421875, "learning_rate": 3.6670049267913954e-07, "loss": 15.9576, "step": 444800 }, { "epoch": 0.8985443424088042, "grad_norm": 416.7359313964844, "learning_rate": 3.665692898306655e-07, "loss": 18.7411, "step": 444810 }, { "epoch": 0.898564543041488, "grad_norm": 205.28900146484375, "learning_rate": 3.664381095649705e-07, "loss": 12.982, "step": 444820 }, { "epoch": 0.8985847436741718, "grad_norm": 116.66765594482422, "learning_rate": 3.6630695188269505e-07, "loss": 10.0713, "step": 444830 }, { "epoch": 0.8986049443068557, "grad_norm": 293.1413269042969, "learning_rate": 3.6617581678447647e-07, "loss": 14.8839, "step": 444840 }, { "epoch": 0.8986251449395395, "grad_norm": 92.39081573486328, "learning_rate": 3.6604470427095587e-07, "loss": 13.498, "step": 444850 }, { "epoch": 0.8986453455722233, "grad_norm": 314.4815979003906, "learning_rate": 3.6591361434277105e-07, "loss": 21.9132, "step": 444860 }, { "epoch": 0.8986655462049071, "grad_norm": 375.6960144042969, "learning_rate": 3.6578254700056107e-07, "loss": 8.0686, "step": 444870 }, { "epoch": 0.898685746837591, "grad_norm": 254.2244873046875, "learning_rate": 3.6565150224496525e-07, "loss": 8.0898, "step": 444880 }, { "epoch": 0.8987059474702748, "grad_norm": 192.85308837890625, "learning_rate": 3.65520480076621e-07, "loss": 12.1381, "step": 444890 }, { "epoch": 0.8987261481029586, "grad_norm": 262.3988952636719, "learning_rate": 3.6538948049616886e-07, "loss": 11.3097, "step": 444900 }, { "epoch": 0.8987463487356424, "grad_norm": 374.4770812988281, "learning_rate": 3.6525850350424554e-07, "loss": 8.4125, "step": 444910 }, { "epoch": 0.8987665493683262, "grad_norm": 919.95458984375, "learning_rate": 3.651275491014905e-07, "loss": 28.0506, "step": 444920 }, { "epoch": 0.89878675000101, "grad_norm": 256.9989013671875, "learning_rate": 3.649966172885422e-07, "loss": 18.5785, "step": 444930 }, { "epoch": 0.8988069506336939, "grad_norm": 302.6526184082031, "learning_rate": 3.648657080660373e-07, "loss": 14.164, "step": 444940 }, { "epoch": 0.8988271512663777, "grad_norm": 138.86810302734375, "learning_rate": 3.6473482143461523e-07, "loss": 15.4201, "step": 444950 }, { "epoch": 0.8988473518990615, "grad_norm": 253.0792999267578, "learning_rate": 3.6460395739491337e-07, "loss": 8.506, "step": 444960 }, { "epoch": 0.8988675525317453, "grad_norm": 267.2154846191406, "learning_rate": 3.644731159475695e-07, "loss": 14.1381, "step": 444970 }, { "epoch": 0.8988877531644291, "grad_norm": 773.7963256835938, "learning_rate": 3.643422970932209e-07, "loss": 28.2256, "step": 444980 }, { "epoch": 0.898907953797113, "grad_norm": 183.3919219970703, "learning_rate": 3.6421150083250754e-07, "loss": 11.0542, "step": 444990 }, { "epoch": 0.8989281544297968, "grad_norm": 614.067138671875, "learning_rate": 3.6408072716606346e-07, "loss": 20.9825, "step": 445000 }, { "epoch": 0.8989483550624806, "grad_norm": 728.6051025390625, "learning_rate": 3.6394997609452755e-07, "loss": 13.9006, "step": 445010 }, { "epoch": 0.8989685556951643, "grad_norm": 354.2909851074219, "learning_rate": 3.6381924761853814e-07, "loss": 13.5888, "step": 445020 }, { "epoch": 0.8989887563278481, "grad_norm": 600.506591796875, "learning_rate": 3.6368854173873094e-07, "loss": 20.5429, "step": 445030 }, { "epoch": 0.899008956960532, "grad_norm": 345.0184631347656, "learning_rate": 3.635578584557431e-07, "loss": 25.2817, "step": 445040 }, { "epoch": 0.8990291575932158, "grad_norm": 493.048095703125, "learning_rate": 3.6342719777021194e-07, "loss": 23.0403, "step": 445050 }, { "epoch": 0.8990493582258996, "grad_norm": 174.3533477783203, "learning_rate": 3.6329655968277477e-07, "loss": 16.8303, "step": 445060 }, { "epoch": 0.8990695588585834, "grad_norm": 57.79670333862305, "learning_rate": 3.6316594419406826e-07, "loss": 14.3608, "step": 445070 }, { "epoch": 0.8990897594912672, "grad_norm": 422.2472229003906, "learning_rate": 3.6303535130472743e-07, "loss": 24.412, "step": 445080 }, { "epoch": 0.8991099601239511, "grad_norm": 244.47982788085938, "learning_rate": 3.6290478101539073e-07, "loss": 11.1764, "step": 445090 }, { "epoch": 0.8991301607566349, "grad_norm": 318.8597412109375, "learning_rate": 3.627742333266937e-07, "loss": 11.5762, "step": 445100 }, { "epoch": 0.8991503613893187, "grad_norm": 132.2611083984375, "learning_rate": 3.6264370823927196e-07, "loss": 18.6923, "step": 445110 }, { "epoch": 0.8991705620220025, "grad_norm": 481.2422180175781, "learning_rate": 3.6251320575376336e-07, "loss": 23.3867, "step": 445120 }, { "epoch": 0.8991907626546863, "grad_norm": 274.683837890625, "learning_rate": 3.6238272587080183e-07, "loss": 15.6587, "step": 445130 }, { "epoch": 0.8992109632873702, "grad_norm": 292.89007568359375, "learning_rate": 3.6225226859102515e-07, "loss": 8.5681, "step": 445140 }, { "epoch": 0.899231163920054, "grad_norm": 596.2623901367188, "learning_rate": 3.621218339150684e-07, "loss": 17.0533, "step": 445150 }, { "epoch": 0.8992513645527378, "grad_norm": 728.558837890625, "learning_rate": 3.619914218435666e-07, "loss": 16.9225, "step": 445160 }, { "epoch": 0.8992715651854216, "grad_norm": 299.84503173828125, "learning_rate": 3.6186103237715706e-07, "loss": 12.8559, "step": 445170 }, { "epoch": 0.8992917658181054, "grad_norm": 665.6764526367188, "learning_rate": 3.617306655164743e-07, "loss": 37.6737, "step": 445180 }, { "epoch": 0.8993119664507893, "grad_norm": 377.08441162109375, "learning_rate": 3.6160032126215274e-07, "loss": 15.4841, "step": 445190 }, { "epoch": 0.8993321670834731, "grad_norm": 668.6402587890625, "learning_rate": 3.614699996148285e-07, "loss": 24.6529, "step": 445200 }, { "epoch": 0.8993523677161569, "grad_norm": 458.1636657714844, "learning_rate": 3.613397005751379e-07, "loss": 26.3734, "step": 445210 }, { "epoch": 0.8993725683488407, "grad_norm": 363.49127197265625, "learning_rate": 3.612094241437153e-07, "loss": 22.0163, "step": 445220 }, { "epoch": 0.8993927689815245, "grad_norm": 333.63128662109375, "learning_rate": 3.610791703211941e-07, "loss": 14.0242, "step": 445230 }, { "epoch": 0.8994129696142084, "grad_norm": 389.77337646484375, "learning_rate": 3.6094893910821103e-07, "loss": 20.8929, "step": 445240 }, { "epoch": 0.8994331702468922, "grad_norm": 279.6188049316406, "learning_rate": 3.608187305054006e-07, "loss": 11.8537, "step": 445250 }, { "epoch": 0.899453370879576, "grad_norm": 544.8707885742188, "learning_rate": 3.606885445133962e-07, "loss": 12.3577, "step": 445260 }, { "epoch": 0.8994735715122598, "grad_norm": 241.91265869140625, "learning_rate": 3.605583811328328e-07, "loss": 23.5405, "step": 445270 }, { "epoch": 0.8994937721449435, "grad_norm": 481.5022277832031, "learning_rate": 3.604282403643472e-07, "loss": 24.3286, "step": 445280 }, { "epoch": 0.8995139727776273, "grad_norm": 165.5880584716797, "learning_rate": 3.6029812220857e-07, "loss": 18.6106, "step": 445290 }, { "epoch": 0.8995341734103112, "grad_norm": 96.1946792602539, "learning_rate": 3.601680266661367e-07, "loss": 17.9782, "step": 445300 }, { "epoch": 0.899554374042995, "grad_norm": 529.3019409179688, "learning_rate": 3.6003795373768303e-07, "loss": 18.7, "step": 445310 }, { "epoch": 0.8995745746756788, "grad_norm": 1.002054214477539, "learning_rate": 3.5990790342384117e-07, "loss": 18.6463, "step": 445320 }, { "epoch": 0.8995947753083626, "grad_norm": 678.5360717773438, "learning_rate": 3.5977787572524457e-07, "loss": 16.6345, "step": 445330 }, { "epoch": 0.8996149759410464, "grad_norm": 79.41068267822266, "learning_rate": 3.596478706425277e-07, "loss": 16.8962, "step": 445340 }, { "epoch": 0.8996351765737303, "grad_norm": 608.1316528320312, "learning_rate": 3.5951788817632615e-07, "loss": 15.5022, "step": 445350 }, { "epoch": 0.8996553772064141, "grad_norm": 404.8101806640625, "learning_rate": 3.5938792832726996e-07, "loss": 23.5742, "step": 445360 }, { "epoch": 0.8996755778390979, "grad_norm": 316.1697998046875, "learning_rate": 3.5925799109599426e-07, "loss": 15.4585, "step": 445370 }, { "epoch": 0.8996957784717817, "grad_norm": 362.8455505371094, "learning_rate": 3.5912807648313285e-07, "loss": 19.0565, "step": 445380 }, { "epoch": 0.8997159791044655, "grad_norm": 341.9504699707031, "learning_rate": 3.5899818448931865e-07, "loss": 20.8177, "step": 445390 }, { "epoch": 0.8997361797371494, "grad_norm": 401.0293273925781, "learning_rate": 3.5886831511518336e-07, "loss": 24.3106, "step": 445400 }, { "epoch": 0.8997563803698332, "grad_norm": 161.62619018554688, "learning_rate": 3.5873846836136204e-07, "loss": 25.0842, "step": 445410 }, { "epoch": 0.899776581002517, "grad_norm": 0.0, "learning_rate": 3.586086442284864e-07, "loss": 22.1347, "step": 445420 }, { "epoch": 0.8997967816352008, "grad_norm": 324.30487060546875, "learning_rate": 3.5847884271718814e-07, "loss": 21.2996, "step": 445430 }, { "epoch": 0.8998169822678846, "grad_norm": 198.55274963378906, "learning_rate": 3.583490638281023e-07, "loss": 24.4018, "step": 445440 }, { "epoch": 0.8998371829005685, "grad_norm": 240.75051879882812, "learning_rate": 3.5821930756185894e-07, "loss": 15.4742, "step": 445450 }, { "epoch": 0.8998573835332523, "grad_norm": 526.855224609375, "learning_rate": 3.5808957391909315e-07, "loss": 14.6135, "step": 445460 }, { "epoch": 0.8998775841659361, "grad_norm": 506.0526123046875, "learning_rate": 3.579598629004355e-07, "loss": 14.1117, "step": 445470 }, { "epoch": 0.8998977847986199, "grad_norm": 545.1388549804688, "learning_rate": 3.5783017450651714e-07, "loss": 12.2958, "step": 445480 }, { "epoch": 0.8999179854313037, "grad_norm": 520.304443359375, "learning_rate": 3.5770050873797314e-07, "loss": 19.0794, "step": 445490 }, { "epoch": 0.8999381860639876, "grad_norm": 142.5303955078125, "learning_rate": 3.575708655954324e-07, "loss": 8.911, "step": 445500 }, { "epoch": 0.8999583866966714, "grad_norm": 366.2143249511719, "learning_rate": 3.5744124507952895e-07, "loss": 15.2595, "step": 445510 }, { "epoch": 0.8999785873293552, "grad_norm": 171.73390197753906, "learning_rate": 3.573116471908933e-07, "loss": 21.8395, "step": 445520 }, { "epoch": 0.899998787962039, "grad_norm": 449.6515197753906, "learning_rate": 3.571820719301583e-07, "loss": 17.7359, "step": 445530 }, { "epoch": 0.9000189885947227, "grad_norm": 827.51953125, "learning_rate": 3.570525192979546e-07, "loss": 15.0506, "step": 445540 }, { "epoch": 0.9000391892274066, "grad_norm": 465.2867126464844, "learning_rate": 3.569229892949133e-07, "loss": 21.303, "step": 445550 }, { "epoch": 0.9000593898600904, "grad_norm": 218.71087646484375, "learning_rate": 3.5679348192166675e-07, "loss": 8.2386, "step": 445560 }, { "epoch": 0.9000795904927742, "grad_norm": 721.7833251953125, "learning_rate": 3.5666399717884604e-07, "loss": 20.4058, "step": 445570 }, { "epoch": 0.900099791125458, "grad_norm": 141.5386505126953, "learning_rate": 3.565345350670807e-07, "loss": 12.856, "step": 445580 }, { "epoch": 0.9001199917581418, "grad_norm": 508.2934875488281, "learning_rate": 3.56405095587003e-07, "loss": 12.1547, "step": 445590 }, { "epoch": 0.9001401923908257, "grad_norm": 132.60581970214844, "learning_rate": 3.562756787392452e-07, "loss": 23.756, "step": 445600 }, { "epoch": 0.9001603930235095, "grad_norm": 340.5688781738281, "learning_rate": 3.561462845244351e-07, "loss": 13.3342, "step": 445610 }, { "epoch": 0.9001805936561933, "grad_norm": 351.4344482421875, "learning_rate": 3.560169129432045e-07, "loss": 26.1856, "step": 445620 }, { "epoch": 0.9002007942888771, "grad_norm": 527.7457275390625, "learning_rate": 3.5588756399618507e-07, "loss": 9.6738, "step": 445630 }, { "epoch": 0.9002209949215609, "grad_norm": 262.84136962890625, "learning_rate": 3.557582376840063e-07, "loss": 14.2681, "step": 445640 }, { "epoch": 0.9002411955542448, "grad_norm": 526.7639770507812, "learning_rate": 3.556289340072977e-07, "loss": 10.0684, "step": 445650 }, { "epoch": 0.9002613961869286, "grad_norm": 321.4812316894531, "learning_rate": 3.55499652966691e-07, "loss": 17.8272, "step": 445660 }, { "epoch": 0.9002815968196124, "grad_norm": 91.7345962524414, "learning_rate": 3.5537039456281674e-07, "loss": 13.1638, "step": 445670 }, { "epoch": 0.9003017974522962, "grad_norm": 111.93982696533203, "learning_rate": 3.5524115879630225e-07, "loss": 7.6087, "step": 445680 }, { "epoch": 0.90032199808498, "grad_norm": 481.2381286621094, "learning_rate": 3.551119456677793e-07, "loss": 15.0273, "step": 445690 }, { "epoch": 0.9003421987176639, "grad_norm": 403.2091979980469, "learning_rate": 3.5498275517787783e-07, "loss": 33.3146, "step": 445700 }, { "epoch": 0.9003623993503477, "grad_norm": 309.87310791015625, "learning_rate": 3.5485358732722743e-07, "loss": 17.5942, "step": 445710 }, { "epoch": 0.9003825999830315, "grad_norm": 732.3163452148438, "learning_rate": 3.547244421164564e-07, "loss": 20.9732, "step": 445720 }, { "epoch": 0.9004028006157153, "grad_norm": 365.7209167480469, "learning_rate": 3.545953195461954e-07, "loss": 22.0166, "step": 445730 }, { "epoch": 0.9004230012483991, "grad_norm": 365.7203369140625, "learning_rate": 3.5446621961707284e-07, "loss": 14.7776, "step": 445740 }, { "epoch": 0.900443201881083, "grad_norm": 545.6244506835938, "learning_rate": 3.5433714232971927e-07, "loss": 26.0447, "step": 445750 }, { "epoch": 0.9004634025137668, "grad_norm": 219.1865234375, "learning_rate": 3.5420808768476313e-07, "loss": 17.1418, "step": 445760 }, { "epoch": 0.9004836031464506, "grad_norm": 876.4417114257812, "learning_rate": 3.540790556828327e-07, "loss": 14.8119, "step": 445770 }, { "epoch": 0.9005038037791344, "grad_norm": 154.30516052246094, "learning_rate": 3.539500463245582e-07, "loss": 42.6618, "step": 445780 }, { "epoch": 0.9005240044118181, "grad_norm": 280.67626953125, "learning_rate": 3.5382105961056735e-07, "loss": 24.9225, "step": 445790 }, { "epoch": 0.9005442050445019, "grad_norm": 122.18487548828125, "learning_rate": 3.5369209554148854e-07, "loss": 17.0533, "step": 445800 }, { "epoch": 0.9005644056771858, "grad_norm": 348.6451110839844, "learning_rate": 3.535631541179507e-07, "loss": 15.0391, "step": 445810 }, { "epoch": 0.9005846063098696, "grad_norm": 140.8256072998047, "learning_rate": 3.534342353405834e-07, "loss": 13.4767, "step": 445820 }, { "epoch": 0.9006048069425534, "grad_norm": 281.6974182128906, "learning_rate": 3.533053392100144e-07, "loss": 24.3823, "step": 445830 }, { "epoch": 0.9006250075752372, "grad_norm": 448.6886901855469, "learning_rate": 3.531764657268705e-07, "loss": 15.1547, "step": 445840 }, { "epoch": 0.900645208207921, "grad_norm": 455.422119140625, "learning_rate": 3.530476148917816e-07, "loss": 31.2984, "step": 445850 }, { "epoch": 0.9006654088406049, "grad_norm": 555.0156860351562, "learning_rate": 3.5291878670537516e-07, "loss": 22.9467, "step": 445860 }, { "epoch": 0.9006856094732887, "grad_norm": 285.40625, "learning_rate": 3.5278998116827835e-07, "loss": 23.5238, "step": 445870 }, { "epoch": 0.9007058101059725, "grad_norm": 291.3361511230469, "learning_rate": 3.5266119828111953e-07, "loss": 21.3014, "step": 445880 }, { "epoch": 0.9007260107386563, "grad_norm": 621.8134155273438, "learning_rate": 3.525324380445277e-07, "loss": 15.1114, "step": 445890 }, { "epoch": 0.9007462113713401, "grad_norm": 316.3350524902344, "learning_rate": 3.524037004591274e-07, "loss": 15.3083, "step": 445900 }, { "epoch": 0.900766412004024, "grad_norm": 235.22181701660156, "learning_rate": 3.5227498552554805e-07, "loss": 18.8735, "step": 445910 }, { "epoch": 0.9007866126367078, "grad_norm": 680.9277954101562, "learning_rate": 3.5214629324441754e-07, "loss": 33.483, "step": 445920 }, { "epoch": 0.9008068132693916, "grad_norm": 646.0338745117188, "learning_rate": 3.5201762361636195e-07, "loss": 12.3207, "step": 445930 }, { "epoch": 0.9008270139020754, "grad_norm": 601.1619873046875, "learning_rate": 3.5188897664200804e-07, "loss": 17.9275, "step": 445940 }, { "epoch": 0.9008472145347592, "grad_norm": 357.0928039550781, "learning_rate": 3.5176035232198367e-07, "loss": 20.5941, "step": 445950 }, { "epoch": 0.9008674151674431, "grad_norm": 431.9078674316406, "learning_rate": 3.516317506569172e-07, "loss": 20.7034, "step": 445960 }, { "epoch": 0.9008876158001269, "grad_norm": 188.78501892089844, "learning_rate": 3.515031716474321e-07, "loss": 17.5412, "step": 445970 }, { "epoch": 0.9009078164328107, "grad_norm": 616.5831298828125, "learning_rate": 3.513746152941572e-07, "loss": 15.5337, "step": 445980 }, { "epoch": 0.9009280170654945, "grad_norm": 245.98434448242188, "learning_rate": 3.5124608159771864e-07, "loss": 13.5908, "step": 445990 }, { "epoch": 0.9009482176981783, "grad_norm": 259.31622314453125, "learning_rate": 3.511175705587433e-07, "loss": 21.3198, "step": 446000 }, { "epoch": 0.9009684183308622, "grad_norm": 614.026123046875, "learning_rate": 3.509890821778561e-07, "loss": 17.3189, "step": 446010 }, { "epoch": 0.900988618963546, "grad_norm": 1304.6748046875, "learning_rate": 3.508606164556855e-07, "loss": 22.5737, "step": 446020 }, { "epoch": 0.9010088195962298, "grad_norm": 445.1016845703125, "learning_rate": 3.507321733928559e-07, "loss": 13.0387, "step": 446030 }, { "epoch": 0.9010290202289136, "grad_norm": 323.90216064453125, "learning_rate": 3.5060375298999303e-07, "loss": 26.3852, "step": 446040 }, { "epoch": 0.9010492208615973, "grad_norm": 340.41754150390625, "learning_rate": 3.5047535524772467e-07, "loss": 23.3956, "step": 446050 }, { "epoch": 0.9010694214942812, "grad_norm": 309.1763916015625, "learning_rate": 3.5034698016667423e-07, "loss": 13.9331, "step": 446060 }, { "epoch": 0.901089622126965, "grad_norm": 299.2520751953125, "learning_rate": 3.5021862774747007e-07, "loss": 17.1172, "step": 446070 }, { "epoch": 0.9011098227596488, "grad_norm": 73.07524108886719, "learning_rate": 3.500902979907356e-07, "loss": 15.6339, "step": 446080 }, { "epoch": 0.9011300233923326, "grad_norm": 607.2282104492188, "learning_rate": 3.4996199089709695e-07, "loss": 13.3263, "step": 446090 }, { "epoch": 0.9011502240250164, "grad_norm": 414.443359375, "learning_rate": 3.498337064671803e-07, "loss": 21.8615, "step": 446100 }, { "epoch": 0.9011704246577003, "grad_norm": 176.03738403320312, "learning_rate": 3.4970544470160905e-07, "loss": 14.6029, "step": 446110 }, { "epoch": 0.9011906252903841, "grad_norm": 243.4033203125, "learning_rate": 3.495772056010105e-07, "loss": 21.5409, "step": 446120 }, { "epoch": 0.9012108259230679, "grad_norm": 423.4161071777344, "learning_rate": 3.4944898916600743e-07, "loss": 27.8391, "step": 446130 }, { "epoch": 0.9012310265557517, "grad_norm": 488.76904296875, "learning_rate": 3.493207953972272e-07, "loss": 12.8866, "step": 446140 }, { "epoch": 0.9012512271884355, "grad_norm": 815.1055297851562, "learning_rate": 3.491926242952931e-07, "loss": 20.7648, "step": 446150 }, { "epoch": 0.9012714278211194, "grad_norm": 744.4523315429688, "learning_rate": 3.4906447586082917e-07, "loss": 27.846, "step": 446160 }, { "epoch": 0.9012916284538032, "grad_norm": 186.1259002685547, "learning_rate": 3.48936350094462e-07, "loss": 11.0203, "step": 446170 }, { "epoch": 0.901311829086487, "grad_norm": 751.5745849609375, "learning_rate": 3.488082469968146e-07, "loss": 27.7282, "step": 446180 }, { "epoch": 0.9013320297191708, "grad_norm": 876.5944213867188, "learning_rate": 3.4868016656851135e-07, "loss": 20.8903, "step": 446190 }, { "epoch": 0.9013522303518546, "grad_norm": 248.3055419921875, "learning_rate": 3.4855210881017675e-07, "loss": 18.6062, "step": 446200 }, { "epoch": 0.9013724309845385, "grad_norm": 534.772216796875, "learning_rate": 3.4842407372243646e-07, "loss": 30.0212, "step": 446210 }, { "epoch": 0.9013926316172223, "grad_norm": 178.67788696289062, "learning_rate": 3.482960613059111e-07, "loss": 16.603, "step": 446220 }, { "epoch": 0.9014128322499061, "grad_norm": 169.78196716308594, "learning_rate": 3.481680715612273e-07, "loss": 10.8747, "step": 446230 }, { "epoch": 0.9014330328825899, "grad_norm": 949.7888793945312, "learning_rate": 3.480401044890086e-07, "loss": 16.0835, "step": 446240 }, { "epoch": 0.9014532335152737, "grad_norm": 310.1080017089844, "learning_rate": 3.479121600898777e-07, "loss": 22.9904, "step": 446250 }, { "epoch": 0.9014734341479576, "grad_norm": 533.2051391601562, "learning_rate": 3.477842383644586e-07, "loss": 14.9268, "step": 446260 }, { "epoch": 0.9014936347806414, "grad_norm": 290.5344543457031, "learning_rate": 3.476563393133747e-07, "loss": 19.1723, "step": 446270 }, { "epoch": 0.9015138354133252, "grad_norm": 248.2886962890625, "learning_rate": 3.475284629372511e-07, "loss": 25.3621, "step": 446280 }, { "epoch": 0.901534036046009, "grad_norm": 496.6837463378906, "learning_rate": 3.474006092367077e-07, "loss": 12.9381, "step": 446290 }, { "epoch": 0.9015542366786927, "grad_norm": 8.167790412902832, "learning_rate": 3.472727782123697e-07, "loss": 5.335, "step": 446300 }, { "epoch": 0.9015744373113765, "grad_norm": 494.1587829589844, "learning_rate": 3.4714496986486045e-07, "loss": 14.7211, "step": 446310 }, { "epoch": 0.9015946379440604, "grad_norm": 496.6618957519531, "learning_rate": 3.470171841948022e-07, "loss": 15.1881, "step": 446320 }, { "epoch": 0.9016148385767442, "grad_norm": 318.4088134765625, "learning_rate": 3.468894212028173e-07, "loss": 25.2526, "step": 446330 }, { "epoch": 0.901635039209428, "grad_norm": 424.84149169921875, "learning_rate": 3.467616808895302e-07, "loss": 17.9956, "step": 446340 }, { "epoch": 0.9016552398421118, "grad_norm": 295.3107604980469, "learning_rate": 3.4663396325556154e-07, "loss": 8.7933, "step": 446350 }, { "epoch": 0.9016754404747956, "grad_norm": 235.19976806640625, "learning_rate": 3.465062683015341e-07, "loss": 15.9845, "step": 446360 }, { "epoch": 0.9016956411074795, "grad_norm": 327.4865417480469, "learning_rate": 3.463785960280719e-07, "loss": 17.9148, "step": 446370 }, { "epoch": 0.9017158417401633, "grad_norm": 313.4536437988281, "learning_rate": 3.462509464357944e-07, "loss": 28.1946, "step": 446380 }, { "epoch": 0.9017360423728471, "grad_norm": 201.65939331054688, "learning_rate": 3.461233195253266e-07, "loss": 18.0275, "step": 446390 }, { "epoch": 0.9017562430055309, "grad_norm": 977.5045166015625, "learning_rate": 3.459957152972887e-07, "loss": 28.939, "step": 446400 }, { "epoch": 0.9017764436382147, "grad_norm": 0.0, "learning_rate": 3.45868133752304e-07, "loss": 17.0587, "step": 446410 }, { "epoch": 0.9017966442708986, "grad_norm": 593.5664672851562, "learning_rate": 3.45740574890992e-07, "loss": 18.1876, "step": 446420 }, { "epoch": 0.9018168449035824, "grad_norm": 122.75834655761719, "learning_rate": 3.456130387139778e-07, "loss": 40.8926, "step": 446430 }, { "epoch": 0.9018370455362662, "grad_norm": 281.06292724609375, "learning_rate": 3.454855252218803e-07, "loss": 21.9502, "step": 446440 }, { "epoch": 0.90185724616895, "grad_norm": 272.9182434082031, "learning_rate": 3.4535803441532125e-07, "loss": 15.2092, "step": 446450 }, { "epoch": 0.9018774468016338, "grad_norm": 274.5888671875, "learning_rate": 3.4523056629492344e-07, "loss": 25.7554, "step": 446460 }, { "epoch": 0.9018976474343177, "grad_norm": 422.311767578125, "learning_rate": 3.451031208613076e-07, "loss": 10.7451, "step": 446470 }, { "epoch": 0.9019178480670015, "grad_norm": 267.4585266113281, "learning_rate": 3.449756981150931e-07, "loss": 14.6757, "step": 446480 }, { "epoch": 0.9019380486996853, "grad_norm": 369.5973205566406, "learning_rate": 3.448482980569029e-07, "loss": 29.1553, "step": 446490 }, { "epoch": 0.9019582493323691, "grad_norm": 713.7677001953125, "learning_rate": 3.4472092068735917e-07, "loss": 31.0512, "step": 446500 }, { "epoch": 0.9019784499650529, "grad_norm": 286.2923889160156, "learning_rate": 3.4459356600707925e-07, "loss": 9.0444, "step": 446510 }, { "epoch": 0.9019986505977368, "grad_norm": 692.444580078125, "learning_rate": 3.44466234016686e-07, "loss": 15.3244, "step": 446520 }, { "epoch": 0.9020188512304206, "grad_norm": 233.64529418945312, "learning_rate": 3.443389247168e-07, "loss": 16.3657, "step": 446530 }, { "epoch": 0.9020390518631044, "grad_norm": 156.468994140625, "learning_rate": 3.442116381080418e-07, "loss": 13.9561, "step": 446540 }, { "epoch": 0.9020592524957882, "grad_norm": 376.273193359375, "learning_rate": 3.4408437419103047e-07, "loss": 9.307, "step": 446550 }, { "epoch": 0.9020794531284719, "grad_norm": 253.2138671875, "learning_rate": 3.4395713296638713e-07, "loss": 18.5868, "step": 446560 }, { "epoch": 0.9020996537611558, "grad_norm": 474.3890380859375, "learning_rate": 3.4382991443473403e-07, "loss": 20.5897, "step": 446570 }, { "epoch": 0.9021198543938396, "grad_norm": 670.932373046875, "learning_rate": 3.437027185966868e-07, "loss": 13.156, "step": 446580 }, { "epoch": 0.9021400550265234, "grad_norm": 267.61737060546875, "learning_rate": 3.4357554545286833e-07, "loss": 20.705, "step": 446590 }, { "epoch": 0.9021602556592072, "grad_norm": 19.1137752532959, "learning_rate": 3.434483950038986e-07, "loss": 10.2194, "step": 446600 }, { "epoch": 0.902180456291891, "grad_norm": 818.1859741210938, "learning_rate": 3.433212672503966e-07, "loss": 19.5881, "step": 446610 }, { "epoch": 0.9022006569245749, "grad_norm": 497.45947265625, "learning_rate": 3.431941621929813e-07, "loss": 22.4659, "step": 446620 }, { "epoch": 0.9022208575572587, "grad_norm": 109.84841918945312, "learning_rate": 3.430670798322733e-07, "loss": 12.4741, "step": 446630 }, { "epoch": 0.9022410581899425, "grad_norm": 232.27517700195312, "learning_rate": 3.4294002016889206e-07, "loss": 15.562, "step": 446640 }, { "epoch": 0.9022612588226263, "grad_norm": 204.5904998779297, "learning_rate": 3.428129832034549e-07, "loss": 18.9749, "step": 446650 }, { "epoch": 0.9022814594553101, "grad_norm": 271.8900146484375, "learning_rate": 3.426859689365836e-07, "loss": 11.7842, "step": 446660 }, { "epoch": 0.902301660087994, "grad_norm": 203.10791015625, "learning_rate": 3.425589773688953e-07, "loss": 20.7852, "step": 446670 }, { "epoch": 0.9023218607206778, "grad_norm": 189.75350952148438, "learning_rate": 3.424320085010102e-07, "loss": 19.1101, "step": 446680 }, { "epoch": 0.9023420613533616, "grad_norm": 96.21739959716797, "learning_rate": 3.423050623335467e-07, "loss": 10.193, "step": 446690 }, { "epoch": 0.9023622619860454, "grad_norm": 410.38580322265625, "learning_rate": 3.421781388671225e-07, "loss": 13.8715, "step": 446700 }, { "epoch": 0.9023824626187292, "grad_norm": 29.13329315185547, "learning_rate": 3.420512381023583e-07, "loss": 19.4322, "step": 446710 }, { "epoch": 0.902402663251413, "grad_norm": 76.03953552246094, "learning_rate": 3.419243600398703e-07, "loss": 12.2015, "step": 446720 }, { "epoch": 0.9024228638840969, "grad_norm": 233.65858459472656, "learning_rate": 3.4179750468027906e-07, "loss": 15.4427, "step": 446730 }, { "epoch": 0.9024430645167807, "grad_norm": 392.72564697265625, "learning_rate": 3.416706720242008e-07, "loss": 15.2899, "step": 446740 }, { "epoch": 0.9024632651494645, "grad_norm": 527.7315673828125, "learning_rate": 3.415438620722555e-07, "loss": 17.6316, "step": 446750 }, { "epoch": 0.9024834657821483, "grad_norm": 222.16433715820312, "learning_rate": 3.4141707482506056e-07, "loss": 16.3429, "step": 446760 }, { "epoch": 0.9025036664148322, "grad_norm": 687.5732421875, "learning_rate": 3.412903102832327e-07, "loss": 10.8618, "step": 446770 }, { "epoch": 0.902523867047516, "grad_norm": 1229.32275390625, "learning_rate": 3.4116356844739184e-07, "loss": 28.2971, "step": 446780 }, { "epoch": 0.9025440676801998, "grad_norm": 58.414588928222656, "learning_rate": 3.4103684931815483e-07, "loss": 17.7443, "step": 446790 }, { "epoch": 0.9025642683128836, "grad_norm": 371.36328125, "learning_rate": 3.409101528961378e-07, "loss": 5.0725, "step": 446800 }, { "epoch": 0.9025844689455673, "grad_norm": 140.0763397216797, "learning_rate": 3.407834791819603e-07, "loss": 15.3257, "step": 446810 }, { "epoch": 0.9026046695782511, "grad_norm": 291.6254577636719, "learning_rate": 3.4065682817624015e-07, "loss": 17.0342, "step": 446820 }, { "epoch": 0.902624870210935, "grad_norm": 433.16741943359375, "learning_rate": 3.4053019987959234e-07, "loss": 23.7829, "step": 446830 }, { "epoch": 0.9026450708436188, "grad_norm": 360.67669677734375, "learning_rate": 3.404035942926348e-07, "loss": 14.3795, "step": 446840 }, { "epoch": 0.9026652714763026, "grad_norm": 400.54913330078125, "learning_rate": 3.402770114159859e-07, "loss": 25.7854, "step": 446850 }, { "epoch": 0.9026854721089864, "grad_norm": 313.03179931640625, "learning_rate": 3.401504512502618e-07, "loss": 18.2124, "step": 446860 }, { "epoch": 0.9027056727416702, "grad_norm": 448.4454650878906, "learning_rate": 3.4002391379607815e-07, "loss": 26.2969, "step": 446870 }, { "epoch": 0.9027258733743541, "grad_norm": 15.552080154418945, "learning_rate": 3.3989739905405326e-07, "loss": 10.4564, "step": 446880 }, { "epoch": 0.9027460740070379, "grad_norm": 462.7173156738281, "learning_rate": 3.3977090702480455e-07, "loss": 22.8206, "step": 446890 }, { "epoch": 0.9027662746397217, "grad_norm": 673.148193359375, "learning_rate": 3.396444377089453e-07, "loss": 21.1259, "step": 446900 }, { "epoch": 0.9027864752724055, "grad_norm": 480.64093017578125, "learning_rate": 3.395179911070945e-07, "loss": 20.9275, "step": 446910 }, { "epoch": 0.9028066759050893, "grad_norm": 125.34583282470703, "learning_rate": 3.3939156721986777e-07, "loss": 15.184, "step": 446920 }, { "epoch": 0.9028268765377732, "grad_norm": 139.38539123535156, "learning_rate": 3.3926516604788185e-07, "loss": 23.2434, "step": 446930 }, { "epoch": 0.902847077170457, "grad_norm": 422.3639221191406, "learning_rate": 3.3913878759175124e-07, "loss": 24.0939, "step": 446940 }, { "epoch": 0.9028672778031408, "grad_norm": 721.1874389648438, "learning_rate": 3.3901243185209375e-07, "loss": 20.7313, "step": 446950 }, { "epoch": 0.9028874784358246, "grad_norm": 594.5277709960938, "learning_rate": 3.388860988295245e-07, "loss": 18.999, "step": 446960 }, { "epoch": 0.9029076790685084, "grad_norm": 317.29876708984375, "learning_rate": 3.3875978852465795e-07, "loss": 15.7928, "step": 446970 }, { "epoch": 0.9029278797011923, "grad_norm": 22.906665802001953, "learning_rate": 3.3863350093811196e-07, "loss": 13.0489, "step": 446980 }, { "epoch": 0.9029480803338761, "grad_norm": 198.2176971435547, "learning_rate": 3.3850723607049994e-07, "loss": 14.2384, "step": 446990 }, { "epoch": 0.9029682809665599, "grad_norm": 149.88609313964844, "learning_rate": 3.3838099392243915e-07, "loss": 19.9989, "step": 447000 }, { "epoch": 0.9029884815992437, "grad_norm": 35.93222427368164, "learning_rate": 3.382547744945436e-07, "loss": 16.6192, "step": 447010 }, { "epoch": 0.9030086822319275, "grad_norm": 271.7751159667969, "learning_rate": 3.3812857778742935e-07, "loss": 11.4213, "step": 447020 }, { "epoch": 0.9030288828646114, "grad_norm": 524.922119140625, "learning_rate": 3.3800240380171046e-07, "loss": 14.8176, "step": 447030 }, { "epoch": 0.9030490834972952, "grad_norm": 583.8279418945312, "learning_rate": 3.3787625253800247e-07, "loss": 13.3672, "step": 447040 }, { "epoch": 0.903069284129979, "grad_norm": 301.9650573730469, "learning_rate": 3.3775012399692055e-07, "loss": 22.4438, "step": 447050 }, { "epoch": 0.9030894847626628, "grad_norm": 724.0348510742188, "learning_rate": 3.3762401817907795e-07, "loss": 18.063, "step": 447060 }, { "epoch": 0.9031096853953465, "grad_norm": 301.4600524902344, "learning_rate": 3.374979350850921e-07, "loss": 24.1437, "step": 447070 }, { "epoch": 0.9031298860280303, "grad_norm": 89.9062728881836, "learning_rate": 3.373718747155752e-07, "loss": 24.9831, "step": 447080 }, { "epoch": 0.9031500866607142, "grad_norm": 147.34152221679688, "learning_rate": 3.372458370711412e-07, "loss": 10.8046, "step": 447090 }, { "epoch": 0.903170287293398, "grad_norm": 258.792236328125, "learning_rate": 3.371198221524069e-07, "loss": 13.8453, "step": 447100 }, { "epoch": 0.9031904879260818, "grad_norm": 426.54119873046875, "learning_rate": 3.3699382995998455e-07, "loss": 16.5044, "step": 447110 }, { "epoch": 0.9032106885587656, "grad_norm": 264.06488037109375, "learning_rate": 3.368678604944886e-07, "loss": 13.1876, "step": 447120 }, { "epoch": 0.9032308891914494, "grad_norm": 298.4723815917969, "learning_rate": 3.3674191375653255e-07, "loss": 10.8668, "step": 447130 }, { "epoch": 0.9032510898241333, "grad_norm": 236.72854614257812, "learning_rate": 3.366159897467314e-07, "loss": 14.6112, "step": 447140 }, { "epoch": 0.9032712904568171, "grad_norm": 278.7016296386719, "learning_rate": 3.364900884656991e-07, "loss": 16.1786, "step": 447150 }, { "epoch": 0.9032914910895009, "grad_norm": 394.2326354980469, "learning_rate": 3.3636420991404686e-07, "loss": 9.8431, "step": 447160 }, { "epoch": 0.9033116917221847, "grad_norm": 443.133544921875, "learning_rate": 3.3623835409239023e-07, "loss": 21.7817, "step": 447170 }, { "epoch": 0.9033318923548685, "grad_norm": 417.1814880371094, "learning_rate": 3.361125210013438e-07, "loss": 15.9135, "step": 447180 }, { "epoch": 0.9033520929875524, "grad_norm": 0.0, "learning_rate": 3.3598671064151767e-07, "loss": 18.1624, "step": 447190 }, { "epoch": 0.9033722936202362, "grad_norm": 309.14569091796875, "learning_rate": 3.358609230135268e-07, "loss": 13.8954, "step": 447200 }, { "epoch": 0.90339249425292, "grad_norm": 253.5623016357422, "learning_rate": 3.357351581179846e-07, "loss": 23.6011, "step": 447210 }, { "epoch": 0.9034126948856038, "grad_norm": 218.31854248046875, "learning_rate": 3.35609415955504e-07, "loss": 16.3643, "step": 447220 }, { "epoch": 0.9034328955182876, "grad_norm": 628.333740234375, "learning_rate": 3.354836965266961e-07, "loss": 49.4682, "step": 447230 }, { "epoch": 0.9034530961509715, "grad_norm": 351.5813293457031, "learning_rate": 3.35357999832176e-07, "loss": 17.8232, "step": 447240 }, { "epoch": 0.9034732967836553, "grad_norm": 8.600955963134766, "learning_rate": 3.352323258725554e-07, "loss": 13.7669, "step": 447250 }, { "epoch": 0.9034934974163391, "grad_norm": 387.5688781738281, "learning_rate": 3.351066746484455e-07, "loss": 18.2632, "step": 447260 }, { "epoch": 0.9035136980490229, "grad_norm": 518.1428833007812, "learning_rate": 3.349810461604608e-07, "loss": 22.6909, "step": 447270 }, { "epoch": 0.9035338986817067, "grad_norm": 448.3558654785156, "learning_rate": 3.3485544040921194e-07, "loss": 33.6627, "step": 447280 }, { "epoch": 0.9035540993143906, "grad_norm": 583.00341796875, "learning_rate": 3.347298573953128e-07, "loss": 20.9727, "step": 447290 }, { "epoch": 0.9035742999470744, "grad_norm": 18.169967651367188, "learning_rate": 3.3460429711937417e-07, "loss": 19.8996, "step": 447300 }, { "epoch": 0.9035945005797582, "grad_norm": 311.3398132324219, "learning_rate": 3.344787595820076e-07, "loss": 13.1239, "step": 447310 }, { "epoch": 0.903614701212442, "grad_norm": 567.7097778320312, "learning_rate": 3.343532447838266e-07, "loss": 17.0992, "step": 447320 }, { "epoch": 0.9036349018451257, "grad_norm": 484.4988098144531, "learning_rate": 3.3422775272544115e-07, "loss": 16.0481, "step": 447330 }, { "epoch": 0.9036551024778096, "grad_norm": 355.2326354980469, "learning_rate": 3.3410228340746475e-07, "loss": 15.4506, "step": 447340 }, { "epoch": 0.9036753031104934, "grad_norm": 338.78253173828125, "learning_rate": 3.3397683683050685e-07, "loss": 27.8149, "step": 447350 }, { "epoch": 0.9036955037431772, "grad_norm": 374.1416931152344, "learning_rate": 3.338514129951809e-07, "loss": 6.0169, "step": 447360 }, { "epoch": 0.903715704375861, "grad_norm": 189.8123321533203, "learning_rate": 3.337260119020974e-07, "loss": 22.418, "step": 447370 }, { "epoch": 0.9037359050085448, "grad_norm": 403.3528747558594, "learning_rate": 3.33600633551866e-07, "loss": 25.0211, "step": 447380 }, { "epoch": 0.9037561056412287, "grad_norm": 108.13957977294922, "learning_rate": 3.334752779451006e-07, "loss": 12.1229, "step": 447390 }, { "epoch": 0.9037763062739125, "grad_norm": 207.4993438720703, "learning_rate": 3.3334994508241013e-07, "loss": 16.8677, "step": 447400 }, { "epoch": 0.9037965069065963, "grad_norm": 612.2901000976562, "learning_rate": 3.332246349644058e-07, "loss": 32.5601, "step": 447410 }, { "epoch": 0.9038167075392801, "grad_norm": 235.7354278564453, "learning_rate": 3.3309934759169825e-07, "loss": 20.5226, "step": 447420 }, { "epoch": 0.9038369081719639, "grad_norm": 312.7841796875, "learning_rate": 3.3297408296489973e-07, "loss": 13.5229, "step": 447430 }, { "epoch": 0.9038571088046478, "grad_norm": 346.403076171875, "learning_rate": 3.328488410846187e-07, "loss": 21.7707, "step": 447440 }, { "epoch": 0.9038773094373316, "grad_norm": 612.0426635742188, "learning_rate": 3.327236219514657e-07, "loss": 15.0607, "step": 447450 }, { "epoch": 0.9038975100700154, "grad_norm": 130.6298828125, "learning_rate": 3.325984255660525e-07, "loss": 29.4215, "step": 447460 }, { "epoch": 0.9039177107026992, "grad_norm": 374.779296875, "learning_rate": 3.324732519289886e-07, "loss": 15.5523, "step": 447470 }, { "epoch": 0.903937911335383, "grad_norm": 244.9932403564453, "learning_rate": 3.3234810104088356e-07, "loss": 14.7736, "step": 447480 }, { "epoch": 0.9039581119680669, "grad_norm": 402.7099914550781, "learning_rate": 3.322229729023474e-07, "loss": 11.1433, "step": 447490 }, { "epoch": 0.9039783126007507, "grad_norm": 646.601806640625, "learning_rate": 3.320978675139919e-07, "loss": 19.4331, "step": 447500 }, { "epoch": 0.9039985132334345, "grad_norm": 16.859086990356445, "learning_rate": 3.319727848764237e-07, "loss": 22.936, "step": 447510 }, { "epoch": 0.9040187138661183, "grad_norm": 434.27294921875, "learning_rate": 3.318477249902541e-07, "loss": 12.9803, "step": 447520 }, { "epoch": 0.9040389144988021, "grad_norm": 109.24701690673828, "learning_rate": 3.317226878560931e-07, "loss": 16.8664, "step": 447530 }, { "epoch": 0.904059115131486, "grad_norm": 533.0873413085938, "learning_rate": 3.3159767347454963e-07, "loss": 28.9964, "step": 447540 }, { "epoch": 0.9040793157641698, "grad_norm": 255.40516662597656, "learning_rate": 3.3147268184623216e-07, "loss": 13.5238, "step": 447550 }, { "epoch": 0.9040995163968536, "grad_norm": 294.3439636230469, "learning_rate": 3.3134771297175127e-07, "loss": 21.6402, "step": 447560 }, { "epoch": 0.9041197170295374, "grad_norm": 246.0892333984375, "learning_rate": 3.3122276685171593e-07, "loss": 22.777, "step": 447570 }, { "epoch": 0.9041399176622211, "grad_norm": 598.0889892578125, "learning_rate": 3.3109784348673293e-07, "loss": 17.5916, "step": 447580 }, { "epoch": 0.904160118294905, "grad_norm": 875.310791015625, "learning_rate": 3.309729428774144e-07, "loss": 17.8856, "step": 447590 }, { "epoch": 0.9041803189275888, "grad_norm": 216.2782745361328, "learning_rate": 3.3084806502436617e-07, "loss": 17.0905, "step": 447600 }, { "epoch": 0.9042005195602726, "grad_norm": 251.59776306152344, "learning_rate": 3.3072320992819875e-07, "loss": 22.732, "step": 447610 }, { "epoch": 0.9042207201929564, "grad_norm": 118.15159606933594, "learning_rate": 3.3059837758951995e-07, "loss": 12.7717, "step": 447620 }, { "epoch": 0.9042409208256402, "grad_norm": 723.6868896484375, "learning_rate": 3.3047356800893826e-07, "loss": 24.9406, "step": 447630 }, { "epoch": 0.904261121458324, "grad_norm": 495.8370666503906, "learning_rate": 3.303487811870626e-07, "loss": 12.215, "step": 447640 }, { "epoch": 0.9042813220910079, "grad_norm": 214.63211059570312, "learning_rate": 3.3022401712450025e-07, "loss": 14.264, "step": 447650 }, { "epoch": 0.9043015227236917, "grad_norm": 1940.4654541015625, "learning_rate": 3.3009927582185965e-07, "loss": 36.2257, "step": 447660 }, { "epoch": 0.9043217233563755, "grad_norm": 94.8907470703125, "learning_rate": 3.2997455727974856e-07, "loss": 17.4319, "step": 447670 }, { "epoch": 0.9043419239890593, "grad_norm": 51.35108947753906, "learning_rate": 3.2984986149877554e-07, "loss": 7.2657, "step": 447680 }, { "epoch": 0.9043621246217431, "grad_norm": 0.6754915118217468, "learning_rate": 3.297251884795477e-07, "loss": 19.2647, "step": 447690 }, { "epoch": 0.904382325254427, "grad_norm": 611.1425170898438, "learning_rate": 3.2960053822267245e-07, "loss": 18.7656, "step": 447700 }, { "epoch": 0.9044025258871108, "grad_norm": 124.38903045654297, "learning_rate": 3.294759107287582e-07, "loss": 10.8771, "step": 447710 }, { "epoch": 0.9044227265197946, "grad_norm": 308.4328918457031, "learning_rate": 3.293513059984121e-07, "loss": 15.8337, "step": 447720 }, { "epoch": 0.9044429271524784, "grad_norm": 346.4642333984375, "learning_rate": 3.2922672403224053e-07, "loss": 20.7543, "step": 447730 }, { "epoch": 0.9044631277851622, "grad_norm": 415.1618347167969, "learning_rate": 3.2910216483085125e-07, "loss": 15.1491, "step": 447740 }, { "epoch": 0.9044833284178461, "grad_norm": 471.6772155761719, "learning_rate": 3.289776283948526e-07, "loss": 16.6599, "step": 447750 }, { "epoch": 0.9045035290505299, "grad_norm": 338.21728515625, "learning_rate": 3.2885311472485025e-07, "loss": 15.285, "step": 447760 }, { "epoch": 0.9045237296832137, "grad_norm": 173.7224578857422, "learning_rate": 3.287286238214504e-07, "loss": 12.2094, "step": 447770 }, { "epoch": 0.9045439303158975, "grad_norm": 188.3278350830078, "learning_rate": 3.286041556852615e-07, "loss": 11.3011, "step": 447780 }, { "epoch": 0.9045641309485813, "grad_norm": 178.1429443359375, "learning_rate": 3.2847971031688963e-07, "loss": 32.7256, "step": 447790 }, { "epoch": 0.9045843315812652, "grad_norm": 302.8177490234375, "learning_rate": 3.283552877169399e-07, "loss": 18.2551, "step": 447800 }, { "epoch": 0.904604532213949, "grad_norm": 243.9778594970703, "learning_rate": 3.282308878860202e-07, "loss": 11.2811, "step": 447810 }, { "epoch": 0.9046247328466328, "grad_norm": 298.0887145996094, "learning_rate": 3.281065108247372e-07, "loss": 49.3632, "step": 447820 }, { "epoch": 0.9046449334793166, "grad_norm": 426.9590759277344, "learning_rate": 3.279821565336966e-07, "loss": 11.56, "step": 447830 }, { "epoch": 0.9046651341120003, "grad_norm": 115.5376968383789, "learning_rate": 3.2785782501350284e-07, "loss": 20.0251, "step": 447840 }, { "epoch": 0.9046853347446842, "grad_norm": 229.4214630126953, "learning_rate": 3.277335162647649e-07, "loss": 30.5869, "step": 447850 }, { "epoch": 0.904705535377368, "grad_norm": 498.9197082519531, "learning_rate": 3.276092302880868e-07, "loss": 18.484, "step": 447860 }, { "epoch": 0.9047257360100518, "grad_norm": 226.56736755371094, "learning_rate": 3.274849670840741e-07, "loss": 7.8482, "step": 447870 }, { "epoch": 0.9047459366427356, "grad_norm": 256.080810546875, "learning_rate": 3.2736072665333353e-07, "loss": 16.6454, "step": 447880 }, { "epoch": 0.9047661372754194, "grad_norm": 244.22299194335938, "learning_rate": 3.272365089964691e-07, "loss": 11.4553, "step": 447890 }, { "epoch": 0.9047863379081033, "grad_norm": 233.81263732910156, "learning_rate": 3.271123141140886e-07, "loss": 16.5892, "step": 447900 }, { "epoch": 0.9048065385407871, "grad_norm": 615.6817626953125, "learning_rate": 3.269881420067944e-07, "loss": 17.0083, "step": 447910 }, { "epoch": 0.9048267391734709, "grad_norm": 204.99356079101562, "learning_rate": 3.268639926751943e-07, "loss": 11.7022, "step": 447920 }, { "epoch": 0.9048469398061547, "grad_norm": 595.6871948242188, "learning_rate": 3.267398661198923e-07, "loss": 26.3127, "step": 447930 }, { "epoch": 0.9048671404388385, "grad_norm": 300.2926940917969, "learning_rate": 3.2661576234149285e-07, "loss": 14.0718, "step": 447940 }, { "epoch": 0.9048873410715224, "grad_norm": 243.31668090820312, "learning_rate": 3.264916813406022e-07, "loss": 19.1018, "step": 447950 }, { "epoch": 0.9049075417042062, "grad_norm": 388.39508056640625, "learning_rate": 3.263676231178231e-07, "loss": 9.4069, "step": 447960 }, { "epoch": 0.90492774233689, "grad_norm": 5.497354984283447, "learning_rate": 3.262435876737624e-07, "loss": 16.1466, "step": 447970 }, { "epoch": 0.9049479429695738, "grad_norm": 346.11614990234375, "learning_rate": 3.2611957500902345e-07, "loss": 26.7694, "step": 447980 }, { "epoch": 0.9049681436022576, "grad_norm": 1.7630181312561035, "learning_rate": 3.2599558512421024e-07, "loss": 16.1983, "step": 447990 }, { "epoch": 0.9049883442349415, "grad_norm": 228.85594177246094, "learning_rate": 3.258716180199278e-07, "loss": 19.5428, "step": 448000 }, { "epoch": 0.9050085448676253, "grad_norm": 92.16657257080078, "learning_rate": 3.2574767369678073e-07, "loss": 15.1785, "step": 448010 }, { "epoch": 0.9050287455003091, "grad_norm": 402.4857177734375, "learning_rate": 3.2562375215537176e-07, "loss": 11.6245, "step": 448020 }, { "epoch": 0.9050489461329929, "grad_norm": 345.69921875, "learning_rate": 3.2549985339630606e-07, "loss": 22.8108, "step": 448030 }, { "epoch": 0.9050691467656767, "grad_norm": 150.28077697753906, "learning_rate": 3.253759774201881e-07, "loss": 17.1309, "step": 448040 }, { "epoch": 0.9050893473983606, "grad_norm": 653.8369140625, "learning_rate": 3.252521242276191e-07, "loss": 29.8362, "step": 448050 }, { "epoch": 0.9051095480310444, "grad_norm": 420.880615234375, "learning_rate": 3.2512829381920463e-07, "loss": 21.5998, "step": 448060 }, { "epoch": 0.9051297486637282, "grad_norm": 253.64968872070312, "learning_rate": 3.250044861955487e-07, "loss": 24.7224, "step": 448070 }, { "epoch": 0.905149949296412, "grad_norm": 10.839777946472168, "learning_rate": 3.248807013572536e-07, "loss": 21.1545, "step": 448080 }, { "epoch": 0.9051701499290957, "grad_norm": 449.986083984375, "learning_rate": 3.2475693930492214e-07, "loss": 11.1076, "step": 448090 }, { "epoch": 0.9051903505617795, "grad_norm": 435.3336486816406, "learning_rate": 3.246332000391583e-07, "loss": 15.9453, "step": 448100 }, { "epoch": 0.9052105511944634, "grad_norm": 679.3292846679688, "learning_rate": 3.245094835605667e-07, "loss": 16.2423, "step": 448110 }, { "epoch": 0.9052307518271472, "grad_norm": 145.24484252929688, "learning_rate": 3.2438578986974776e-07, "loss": 19.8766, "step": 448120 }, { "epoch": 0.905250952459831, "grad_norm": 776.5805053710938, "learning_rate": 3.242621189673051e-07, "loss": 32.9737, "step": 448130 }, { "epoch": 0.9052711530925148, "grad_norm": 196.9553680419922, "learning_rate": 3.2413847085384256e-07, "loss": 13.7633, "step": 448140 }, { "epoch": 0.9052913537251986, "grad_norm": 300.7092590332031, "learning_rate": 3.240148455299619e-07, "loss": 12.6026, "step": 448150 }, { "epoch": 0.9053115543578825, "grad_norm": 359.0980529785156, "learning_rate": 3.2389124299626483e-07, "loss": 24.2891, "step": 448160 }, { "epoch": 0.9053317549905663, "grad_norm": 576.0731811523438, "learning_rate": 3.237676632533554e-07, "loss": 14.0114, "step": 448170 }, { "epoch": 0.9053519556232501, "grad_norm": 501.6863708496094, "learning_rate": 3.2364410630183587e-07, "loss": 14.6857, "step": 448180 }, { "epoch": 0.9053721562559339, "grad_norm": 275.0650634765625, "learning_rate": 3.2352057214230623e-07, "loss": 11.8622, "step": 448190 }, { "epoch": 0.9053923568886177, "grad_norm": 12.33811092376709, "learning_rate": 3.233970607753717e-07, "loss": 18.6745, "step": 448200 }, { "epoch": 0.9054125575213016, "grad_norm": 254.0646514892578, "learning_rate": 3.2327357220163116e-07, "loss": 15.4009, "step": 448210 }, { "epoch": 0.9054327581539854, "grad_norm": 294.08221435546875, "learning_rate": 3.231501064216891e-07, "loss": 27.6999, "step": 448220 }, { "epoch": 0.9054529587866692, "grad_norm": 194.0894775390625, "learning_rate": 3.2302666343614565e-07, "loss": 13.6976, "step": 448230 }, { "epoch": 0.905473159419353, "grad_norm": 417.09747314453125, "learning_rate": 3.2290324324560363e-07, "loss": 18.6556, "step": 448240 }, { "epoch": 0.9054933600520368, "grad_norm": 286.4114685058594, "learning_rate": 3.227798458506637e-07, "loss": 24.0155, "step": 448250 }, { "epoch": 0.9055135606847207, "grad_norm": 399.8034362792969, "learning_rate": 3.22656471251927e-07, "loss": 22.4191, "step": 448260 }, { "epoch": 0.9055337613174045, "grad_norm": 260.3201599121094, "learning_rate": 3.225331194499964e-07, "loss": 10.4566, "step": 448270 }, { "epoch": 0.9055539619500883, "grad_norm": 192.796142578125, "learning_rate": 3.2240979044547095e-07, "loss": 14.5858, "step": 448280 }, { "epoch": 0.9055741625827721, "grad_norm": 601.3679809570312, "learning_rate": 3.2228648423895335e-07, "loss": 19.8696, "step": 448290 }, { "epoch": 0.9055943632154559, "grad_norm": 512.9625854492188, "learning_rate": 3.2216320083104434e-07, "loss": 18.3414, "step": 448300 }, { "epoch": 0.9056145638481398, "grad_norm": 388.1852722167969, "learning_rate": 3.2203994022234396e-07, "loss": 19.741, "step": 448310 }, { "epoch": 0.9056347644808236, "grad_norm": 282.55877685546875, "learning_rate": 3.2191670241345395e-07, "loss": 23.1342, "step": 448320 }, { "epoch": 0.9056549651135074, "grad_norm": 1288.4505615234375, "learning_rate": 3.2179348740497494e-07, "loss": 15.1686, "step": 448330 }, { "epoch": 0.9056751657461912, "grad_norm": 421.3667907714844, "learning_rate": 3.216702951975059e-07, "loss": 17.3516, "step": 448340 }, { "epoch": 0.9056953663788749, "grad_norm": 95.43819427490234, "learning_rate": 3.2154712579164913e-07, "loss": 7.5615, "step": 448350 }, { "epoch": 0.9057155670115588, "grad_norm": 406.94879150390625, "learning_rate": 3.2142397918800416e-07, "loss": 19.1469, "step": 448360 }, { "epoch": 0.9057357676442426, "grad_norm": 99.41961669921875, "learning_rate": 3.213008553871716e-07, "loss": 11.4991, "step": 448370 }, { "epoch": 0.9057559682769264, "grad_norm": 403.7974853515625, "learning_rate": 3.2117775438975096e-07, "loss": 19.0258, "step": 448380 }, { "epoch": 0.9057761689096102, "grad_norm": 141.62965393066406, "learning_rate": 3.2105467619634234e-07, "loss": 13.6665, "step": 448390 }, { "epoch": 0.905796369542294, "grad_norm": 194.33724975585938, "learning_rate": 3.2093162080754634e-07, "loss": 10.719, "step": 448400 }, { "epoch": 0.9058165701749779, "grad_norm": 289.2017517089844, "learning_rate": 3.208085882239614e-07, "loss": 11.4255, "step": 448410 }, { "epoch": 0.9058367708076617, "grad_norm": 219.3064727783203, "learning_rate": 3.206855784461876e-07, "loss": 18.4352, "step": 448420 }, { "epoch": 0.9058569714403455, "grad_norm": 170.38917541503906, "learning_rate": 3.205625914748256e-07, "loss": 22.2926, "step": 448430 }, { "epoch": 0.9058771720730293, "grad_norm": 533.3335571289062, "learning_rate": 3.2043962731047373e-07, "loss": 10.2524, "step": 448440 }, { "epoch": 0.9058973727057131, "grad_norm": 7.176573753356934, "learning_rate": 3.20316685953731e-07, "loss": 28.7266, "step": 448450 }, { "epoch": 0.905917573338397, "grad_norm": 258.6211242675781, "learning_rate": 3.20193767405198e-07, "loss": 11.7974, "step": 448460 }, { "epoch": 0.9059377739710808, "grad_norm": 153.90350341796875, "learning_rate": 3.2007087166547325e-07, "loss": 11.3183, "step": 448470 }, { "epoch": 0.9059579746037646, "grad_norm": 189.8888397216797, "learning_rate": 3.199479987351545e-07, "loss": 13.7309, "step": 448480 }, { "epoch": 0.9059781752364484, "grad_norm": 615.9976806640625, "learning_rate": 3.1982514861484184e-07, "loss": 17.5584, "step": 448490 }, { "epoch": 0.9059983758691322, "grad_norm": 1100.916015625, "learning_rate": 3.1970232130513365e-07, "loss": 18.7667, "step": 448500 }, { "epoch": 0.906018576501816, "grad_norm": 349.28466796875, "learning_rate": 3.19579516806629e-07, "loss": 22.8324, "step": 448510 }, { "epoch": 0.9060387771344999, "grad_norm": 385.8545227050781, "learning_rate": 3.194567351199257e-07, "loss": 24.0253, "step": 448520 }, { "epoch": 0.9060589777671837, "grad_norm": 657.9740600585938, "learning_rate": 3.193339762456232e-07, "loss": 12.212, "step": 448530 }, { "epoch": 0.9060791783998675, "grad_norm": 757.3938598632812, "learning_rate": 3.1921124018431946e-07, "loss": 19.3436, "step": 448540 }, { "epoch": 0.9060993790325513, "grad_norm": 373.80242919921875, "learning_rate": 3.1908852693661116e-07, "loss": 27.6082, "step": 448550 }, { "epoch": 0.9061195796652352, "grad_norm": 371.020751953125, "learning_rate": 3.1896583650309896e-07, "loss": 20.1392, "step": 448560 }, { "epoch": 0.906139780297919, "grad_norm": 597.7250366210938, "learning_rate": 3.188431688843785e-07, "loss": 22.1884, "step": 448570 }, { "epoch": 0.9061599809306028, "grad_norm": 571.2300415039062, "learning_rate": 3.187205240810493e-07, "loss": 21.1181, "step": 448580 }, { "epoch": 0.9061801815632866, "grad_norm": 228.69024658203125, "learning_rate": 3.1859790209370855e-07, "loss": 16.468, "step": 448590 }, { "epoch": 0.9062003821959704, "grad_norm": 60.642494201660156, "learning_rate": 3.1847530292295313e-07, "loss": 9.7498, "step": 448600 }, { "epoch": 0.9062205828286541, "grad_norm": 327.65728759765625, "learning_rate": 3.18352726569382e-07, "loss": 26.8853, "step": 448610 }, { "epoch": 0.906240783461338, "grad_norm": 418.5385437011719, "learning_rate": 3.1823017303359185e-07, "loss": 16.8091, "step": 448620 }, { "epoch": 0.9062609840940218, "grad_norm": 602.8416748046875, "learning_rate": 3.181076423161794e-07, "loss": 28.1128, "step": 448630 }, { "epoch": 0.9062811847267056, "grad_norm": 307.3411865234375, "learning_rate": 3.179851344177426e-07, "loss": 15.6654, "step": 448640 }, { "epoch": 0.9063013853593894, "grad_norm": 315.0074157714844, "learning_rate": 3.1786264933887977e-07, "loss": 10.2402, "step": 448650 }, { "epoch": 0.9063215859920732, "grad_norm": 312.59918212890625, "learning_rate": 3.1774018708018493e-07, "loss": 14.1066, "step": 448660 }, { "epoch": 0.9063417866247571, "grad_norm": 371.5028076171875, "learning_rate": 3.176177476422565e-07, "loss": 11.442, "step": 448670 }, { "epoch": 0.9063619872574409, "grad_norm": 310.7154235839844, "learning_rate": 3.1749533102569176e-07, "loss": 8.6138, "step": 448680 }, { "epoch": 0.9063821878901247, "grad_norm": 11.721882820129395, "learning_rate": 3.173729372310874e-07, "loss": 10.4029, "step": 448690 }, { "epoch": 0.9064023885228085, "grad_norm": 372.51806640625, "learning_rate": 3.172505662590386e-07, "loss": 12.4118, "step": 448700 }, { "epoch": 0.9064225891554923, "grad_norm": 541.5244750976562, "learning_rate": 3.1712821811014205e-07, "loss": 19.0972, "step": 448710 }, { "epoch": 0.9064427897881762, "grad_norm": 290.11285400390625, "learning_rate": 3.170058927849967e-07, "loss": 9.8545, "step": 448720 }, { "epoch": 0.90646299042086, "grad_norm": 413.8747863769531, "learning_rate": 3.168835902841949e-07, "loss": 11.5055, "step": 448730 }, { "epoch": 0.9064831910535438, "grad_norm": 268.87640380859375, "learning_rate": 3.167613106083345e-07, "loss": 17.3808, "step": 448740 }, { "epoch": 0.9065033916862276, "grad_norm": 547.7677001953125, "learning_rate": 3.166390537580122e-07, "loss": 18.9562, "step": 448750 }, { "epoch": 0.9065235923189114, "grad_norm": 8.598103523254395, "learning_rate": 3.165168197338231e-07, "loss": 25.9563, "step": 448760 }, { "epoch": 0.9065437929515953, "grad_norm": 537.5119018554688, "learning_rate": 3.1639460853636226e-07, "loss": 16.9571, "step": 448770 }, { "epoch": 0.9065639935842791, "grad_norm": 21.0839900970459, "learning_rate": 3.162724201662265e-07, "loss": 13.5353, "step": 448780 }, { "epoch": 0.9065841942169629, "grad_norm": 6.599519729614258, "learning_rate": 3.161502546240114e-07, "loss": 13.7822, "step": 448790 }, { "epoch": 0.9066043948496467, "grad_norm": 888.1553344726562, "learning_rate": 3.160281119103109e-07, "loss": 30.5156, "step": 448800 }, { "epoch": 0.9066245954823305, "grad_norm": 198.2985076904297, "learning_rate": 3.159059920257218e-07, "loss": 15.3852, "step": 448810 }, { "epoch": 0.9066447961150144, "grad_norm": 103.79512786865234, "learning_rate": 3.157838949708386e-07, "loss": 18.8775, "step": 448820 }, { "epoch": 0.9066649967476982, "grad_norm": 1511.2833251953125, "learning_rate": 3.1566182074625693e-07, "loss": 25.0839, "step": 448830 }, { "epoch": 0.906685197380382, "grad_norm": 641.9132080078125, "learning_rate": 3.155397693525708e-07, "loss": 27.815, "step": 448840 }, { "epoch": 0.9067053980130658, "grad_norm": 455.2959899902344, "learning_rate": 3.1541774079037635e-07, "loss": 24.2859, "step": 448850 }, { "epoch": 0.9067255986457495, "grad_norm": 527.1464233398438, "learning_rate": 3.1529573506026757e-07, "loss": 28.662, "step": 448860 }, { "epoch": 0.9067457992784334, "grad_norm": 25.249774932861328, "learning_rate": 3.151737521628384e-07, "loss": 12.2626, "step": 448870 }, { "epoch": 0.9067659999111172, "grad_norm": 164.37081909179688, "learning_rate": 3.150517920986851e-07, "loss": 12.5367, "step": 448880 }, { "epoch": 0.906786200543801, "grad_norm": 476.541259765625, "learning_rate": 3.1492985486840044e-07, "loss": 11.9349, "step": 448890 }, { "epoch": 0.9068064011764848, "grad_norm": 286.9355773925781, "learning_rate": 3.148079404725801e-07, "loss": 13.8341, "step": 448900 }, { "epoch": 0.9068266018091686, "grad_norm": 356.0955810546875, "learning_rate": 3.1468604891181755e-07, "loss": 10.4007, "step": 448910 }, { "epoch": 0.9068468024418525, "grad_norm": 518.3862915039062, "learning_rate": 3.145641801867061e-07, "loss": 18.039, "step": 448920 }, { "epoch": 0.9068670030745363, "grad_norm": 17.225505828857422, "learning_rate": 3.1444233429784145e-07, "loss": 16.9743, "step": 448930 }, { "epoch": 0.9068872037072201, "grad_norm": 309.4004211425781, "learning_rate": 3.14320511245817e-07, "loss": 18.3492, "step": 448940 }, { "epoch": 0.9069074043399039, "grad_norm": 307.60687255859375, "learning_rate": 3.1419871103122447e-07, "loss": 16.0927, "step": 448950 }, { "epoch": 0.9069276049725877, "grad_norm": 96.79744720458984, "learning_rate": 3.1407693365465954e-07, "loss": 19.2152, "step": 448960 }, { "epoch": 0.9069478056052716, "grad_norm": 557.51611328125, "learning_rate": 3.1395517911671613e-07, "loss": 29.1107, "step": 448970 }, { "epoch": 0.9069680062379554, "grad_norm": 78.58000946044922, "learning_rate": 3.1383344741798716e-07, "loss": 19.1292, "step": 448980 }, { "epoch": 0.9069882068706392, "grad_norm": 305.63470458984375, "learning_rate": 3.137117385590643e-07, "loss": 21.5092, "step": 448990 }, { "epoch": 0.907008407503323, "grad_norm": 89.49698638916016, "learning_rate": 3.135900525405428e-07, "loss": 10.2221, "step": 449000 }, { "epoch": 0.9070286081360068, "grad_norm": 123.39061737060547, "learning_rate": 3.134683893630153e-07, "loss": 20.3347, "step": 449010 }, { "epoch": 0.9070488087686907, "grad_norm": 265.69366455078125, "learning_rate": 3.133467490270736e-07, "loss": 10.337, "step": 449020 }, { "epoch": 0.9070690094013745, "grad_norm": 636.016845703125, "learning_rate": 3.1322513153331124e-07, "loss": 19.651, "step": 449030 }, { "epoch": 0.9070892100340583, "grad_norm": 1288.184814453125, "learning_rate": 3.1310353688232207e-07, "loss": 18.6391, "step": 449040 }, { "epoch": 0.9071094106667421, "grad_norm": 191.1102294921875, "learning_rate": 3.1298196507469737e-07, "loss": 28.4142, "step": 449050 }, { "epoch": 0.9071296112994259, "grad_norm": 140.29869079589844, "learning_rate": 3.128604161110299e-07, "loss": 7.4167, "step": 449060 }, { "epoch": 0.9071498119321098, "grad_norm": 300.2296142578125, "learning_rate": 3.1273888999191314e-07, "loss": 18.4172, "step": 449070 }, { "epoch": 0.9071700125647936, "grad_norm": 1085.9041748046875, "learning_rate": 3.126173867179383e-07, "loss": 25.2115, "step": 449080 }, { "epoch": 0.9071902131974774, "grad_norm": 149.21746826171875, "learning_rate": 3.1249590628969707e-07, "loss": 17.9784, "step": 449090 }, { "epoch": 0.9072104138301612, "grad_norm": 429.34661865234375, "learning_rate": 3.123744487077829e-07, "loss": 26.9559, "step": 449100 }, { "epoch": 0.907230614462845, "grad_norm": 245.41632080078125, "learning_rate": 3.122530139727864e-07, "loss": 22.1495, "step": 449110 }, { "epoch": 0.9072508150955287, "grad_norm": 282.68389892578125, "learning_rate": 3.12131602085301e-07, "loss": 10.4131, "step": 449120 }, { "epoch": 0.9072710157282126, "grad_norm": 448.1305236816406, "learning_rate": 3.1201021304591684e-07, "loss": 17.2096, "step": 449130 }, { "epoch": 0.9072912163608964, "grad_norm": 308.39117431640625, "learning_rate": 3.118888468552267e-07, "loss": 6.6042, "step": 449140 }, { "epoch": 0.9073114169935802, "grad_norm": 370.9839172363281, "learning_rate": 3.1176750351382235e-07, "loss": 16.0175, "step": 449150 }, { "epoch": 0.907331617626264, "grad_norm": 7.8283562660217285, "learning_rate": 3.116461830222933e-07, "loss": 13.4259, "step": 449160 }, { "epoch": 0.9073518182589478, "grad_norm": 560.1171264648438, "learning_rate": 3.11524885381233e-07, "loss": 36.5528, "step": 449170 }, { "epoch": 0.9073720188916317, "grad_norm": 238.0440673828125, "learning_rate": 3.11403610591231e-07, "loss": 14.3345, "step": 449180 }, { "epoch": 0.9073922195243155, "grad_norm": 222.44174194335938, "learning_rate": 3.1128235865288013e-07, "loss": 16.2855, "step": 449190 }, { "epoch": 0.9074124201569993, "grad_norm": 247.97122192382812, "learning_rate": 3.1116112956677045e-07, "loss": 10.4193, "step": 449200 }, { "epoch": 0.9074326207896831, "grad_norm": 179.41934204101562, "learning_rate": 3.1103992333349153e-07, "loss": 16.3914, "step": 449210 }, { "epoch": 0.9074528214223669, "grad_norm": 470.7078552246094, "learning_rate": 3.1091873995363677e-07, "loss": 17.0916, "step": 449220 }, { "epoch": 0.9074730220550508, "grad_norm": 270.0218811035156, "learning_rate": 3.1079757942779453e-07, "loss": 16.3383, "step": 449230 }, { "epoch": 0.9074932226877346, "grad_norm": 367.3122863769531, "learning_rate": 3.106764417565561e-07, "loss": 12.3758, "step": 449240 }, { "epoch": 0.9075134233204184, "grad_norm": 306.461181640625, "learning_rate": 3.105553269405115e-07, "loss": 18.3363, "step": 449250 }, { "epoch": 0.9075336239531022, "grad_norm": 522.5321655273438, "learning_rate": 3.1043423498025303e-07, "loss": 24.1793, "step": 449260 }, { "epoch": 0.907553824585786, "grad_norm": 516.6013793945312, "learning_rate": 3.1031316587636805e-07, "loss": 17.7558, "step": 449270 }, { "epoch": 0.9075740252184699, "grad_norm": 1105.093994140625, "learning_rate": 3.101921196294477e-07, "loss": 33.2699, "step": 449280 }, { "epoch": 0.9075942258511537, "grad_norm": 102.50092315673828, "learning_rate": 3.1007109624008326e-07, "loss": 25.3437, "step": 449290 }, { "epoch": 0.9076144264838375, "grad_norm": 565.497314453125, "learning_rate": 3.0995009570886305e-07, "loss": 24.174, "step": 449300 }, { "epoch": 0.9076346271165213, "grad_norm": 291.22540283203125, "learning_rate": 3.098291180363766e-07, "loss": 16.3335, "step": 449310 }, { "epoch": 0.9076548277492051, "grad_norm": 256.3533935546875, "learning_rate": 3.097081632232141e-07, "loss": 12.1066, "step": 449320 }, { "epoch": 0.907675028381889, "grad_norm": 300.76544189453125, "learning_rate": 3.095872312699666e-07, "loss": 9.9195, "step": 449330 }, { "epoch": 0.9076952290145728, "grad_norm": 374.5771789550781, "learning_rate": 3.094663221772209e-07, "loss": 20.0803, "step": 449340 }, { "epoch": 0.9077154296472566, "grad_norm": 457.07659912109375, "learning_rate": 3.093454359455672e-07, "loss": 18.5713, "step": 449350 }, { "epoch": 0.9077356302799404, "grad_norm": 456.6804504394531, "learning_rate": 3.09224572575596e-07, "loss": 17.623, "step": 449360 }, { "epoch": 0.9077558309126241, "grad_norm": 631.7473754882812, "learning_rate": 3.091037320678947e-07, "loss": 19.8148, "step": 449370 }, { "epoch": 0.907776031545308, "grad_norm": 283.96368408203125, "learning_rate": 3.089829144230527e-07, "loss": 16.0926, "step": 449380 }, { "epoch": 0.9077962321779918, "grad_norm": 319.5601806640625, "learning_rate": 3.088621196416597e-07, "loss": 10.865, "step": 449390 }, { "epoch": 0.9078164328106756, "grad_norm": 254.3486785888672, "learning_rate": 3.0874134772430344e-07, "loss": 10.6656, "step": 449400 }, { "epoch": 0.9078366334433594, "grad_norm": 1269.4559326171875, "learning_rate": 3.0862059867157237e-07, "loss": 25.0295, "step": 449410 }, { "epoch": 0.9078568340760432, "grad_norm": 524.7383422851562, "learning_rate": 3.08499872484056e-07, "loss": 9.9924, "step": 449420 }, { "epoch": 0.907877034708727, "grad_norm": 284.661865234375, "learning_rate": 3.0837916916234166e-07, "loss": 37.4764, "step": 449430 }, { "epoch": 0.9078972353414109, "grad_norm": 367.37078857421875, "learning_rate": 3.0825848870701893e-07, "loss": 18.6231, "step": 449440 }, { "epoch": 0.9079174359740947, "grad_norm": 6.0677595138549805, "learning_rate": 3.08137831118675e-07, "loss": 15.7651, "step": 449450 }, { "epoch": 0.9079376366067785, "grad_norm": 330.1292724609375, "learning_rate": 3.080171963978984e-07, "loss": 22.6161, "step": 449460 }, { "epoch": 0.9079578372394623, "grad_norm": 170.20848083496094, "learning_rate": 3.078965845452769e-07, "loss": 8.8626, "step": 449470 }, { "epoch": 0.9079780378721461, "grad_norm": 458.19586181640625, "learning_rate": 3.077759955613979e-07, "loss": 17.3684, "step": 449480 }, { "epoch": 0.90799823850483, "grad_norm": 236.87313842773438, "learning_rate": 3.0765542944685036e-07, "loss": 22.9826, "step": 449490 }, { "epoch": 0.9080184391375138, "grad_norm": 444.2785339355469, "learning_rate": 3.0753488620222037e-07, "loss": 29.2181, "step": 449500 }, { "epoch": 0.9080386397701976, "grad_norm": 6.273552417755127, "learning_rate": 3.07414365828097e-07, "loss": 20.8284, "step": 449510 }, { "epoch": 0.9080588404028814, "grad_norm": 569.6155395507812, "learning_rate": 3.0729386832506647e-07, "loss": 22.2681, "step": 449520 }, { "epoch": 0.9080790410355652, "grad_norm": 717.0222778320312, "learning_rate": 3.07173393693716e-07, "loss": 22.9611, "step": 449530 }, { "epoch": 0.9080992416682491, "grad_norm": 583.5347900390625, "learning_rate": 3.0705294193463406e-07, "loss": 18.084, "step": 449540 }, { "epoch": 0.9081194423009329, "grad_norm": 611.0958862304688, "learning_rate": 3.069325130484069e-07, "loss": 20.9226, "step": 449550 }, { "epoch": 0.9081396429336167, "grad_norm": 88.22270202636719, "learning_rate": 3.068121070356206e-07, "loss": 18.2759, "step": 449560 }, { "epoch": 0.9081598435663005, "grad_norm": 147.03045654296875, "learning_rate": 3.066917238968631e-07, "loss": 11.1902, "step": 449570 }, { "epoch": 0.9081800441989843, "grad_norm": 39.146305084228516, "learning_rate": 3.065713636327211e-07, "loss": 19.9076, "step": 449580 }, { "epoch": 0.9082002448316682, "grad_norm": 243.39291381835938, "learning_rate": 3.0645102624378144e-07, "loss": 19.0416, "step": 449590 }, { "epoch": 0.908220445464352, "grad_norm": 473.9781799316406, "learning_rate": 3.0633071173062966e-07, "loss": 12.5497, "step": 449600 }, { "epoch": 0.9082406460970358, "grad_norm": 149.61248779296875, "learning_rate": 3.0621042009385313e-07, "loss": 19.2441, "step": 449610 }, { "epoch": 0.9082608467297196, "grad_norm": 420.84954833984375, "learning_rate": 3.0609015133403806e-07, "loss": 19.7314, "step": 449620 }, { "epoch": 0.9082810473624033, "grad_norm": 262.05914306640625, "learning_rate": 3.0596990545176895e-07, "loss": 17.1844, "step": 449630 }, { "epoch": 0.9083012479950872, "grad_norm": 387.1336669921875, "learning_rate": 3.058496824476337e-07, "loss": 10.2942, "step": 449640 }, { "epoch": 0.908321448627771, "grad_norm": 243.58253479003906, "learning_rate": 3.057294823222184e-07, "loss": 21.0373, "step": 449650 }, { "epoch": 0.9083416492604548, "grad_norm": 281.2779846191406, "learning_rate": 3.056093050761083e-07, "loss": 14.0664, "step": 449660 }, { "epoch": 0.9083618498931386, "grad_norm": 339.8753356933594, "learning_rate": 3.0548915070988837e-07, "loss": 13.4907, "step": 449670 }, { "epoch": 0.9083820505258224, "grad_norm": 271.4891052246094, "learning_rate": 3.0536901922414543e-07, "loss": 22.4855, "step": 449680 }, { "epoch": 0.9084022511585063, "grad_norm": 451.68890380859375, "learning_rate": 3.052489106194645e-07, "loss": 31.894, "step": 449690 }, { "epoch": 0.9084224517911901, "grad_norm": 644.14111328125, "learning_rate": 3.051288248964307e-07, "loss": 22.8247, "step": 449700 }, { "epoch": 0.9084426524238739, "grad_norm": 158.24072265625, "learning_rate": 3.050087620556302e-07, "loss": 7.1228, "step": 449710 }, { "epoch": 0.9084628530565577, "grad_norm": 788.8622436523438, "learning_rate": 3.0488872209764654e-07, "loss": 29.4907, "step": 449720 }, { "epoch": 0.9084830536892415, "grad_norm": 38.16522979736328, "learning_rate": 3.047687050230663e-07, "loss": 13.5508, "step": 449730 }, { "epoch": 0.9085032543219254, "grad_norm": 369.5571594238281, "learning_rate": 3.046487108324736e-07, "loss": 18.3055, "step": 449740 }, { "epoch": 0.9085234549546092, "grad_norm": 458.87786865234375, "learning_rate": 3.0452873952645455e-07, "loss": 11.2209, "step": 449750 }, { "epoch": 0.908543655587293, "grad_norm": 659.783935546875, "learning_rate": 3.0440879110559263e-07, "loss": 34.188, "step": 449760 }, { "epoch": 0.9085638562199768, "grad_norm": 36.0201530456543, "learning_rate": 3.0428886557047176e-07, "loss": 19.4572, "step": 449770 }, { "epoch": 0.9085840568526606, "grad_norm": 552.7836303710938, "learning_rate": 3.0416896292167873e-07, "loss": 24.2485, "step": 449780 }, { "epoch": 0.9086042574853445, "grad_norm": 214.62286376953125, "learning_rate": 3.0404908315979587e-07, "loss": 17.428, "step": 449790 }, { "epoch": 0.9086244581180283, "grad_norm": 499.6221008300781, "learning_rate": 3.0392922628540875e-07, "loss": 21.9665, "step": 449800 }, { "epoch": 0.9086446587507121, "grad_norm": 429.6669921875, "learning_rate": 3.0380939229910087e-07, "loss": 22.1935, "step": 449810 }, { "epoch": 0.9086648593833959, "grad_norm": 387.1067810058594, "learning_rate": 3.036895812014556e-07, "loss": 14.9835, "step": 449820 }, { "epoch": 0.9086850600160797, "grad_norm": 452.1629638671875, "learning_rate": 3.0356979299305867e-07, "loss": 19.1507, "step": 449830 }, { "epoch": 0.9087052606487636, "grad_norm": 265.0748291015625, "learning_rate": 3.0345002767449337e-07, "loss": 13.7882, "step": 449840 }, { "epoch": 0.9087254612814474, "grad_norm": 824.8062744140625, "learning_rate": 3.0333028524634156e-07, "loss": 17.5646, "step": 449850 }, { "epoch": 0.9087456619141312, "grad_norm": 16.399173736572266, "learning_rate": 3.0321056570918883e-07, "loss": 18.7888, "step": 449860 }, { "epoch": 0.908765862546815, "grad_norm": 1272.93701171875, "learning_rate": 3.030908690636192e-07, "loss": 27.8628, "step": 449870 }, { "epoch": 0.9087860631794987, "grad_norm": 344.57684326171875, "learning_rate": 3.029711953102138e-07, "loss": 21.5898, "step": 449880 }, { "epoch": 0.9088062638121825, "grad_norm": 346.8316955566406, "learning_rate": 3.028515444495572e-07, "loss": 10.6882, "step": 449890 }, { "epoch": 0.9088264644448664, "grad_norm": 240.88621520996094, "learning_rate": 3.027319164822329e-07, "loss": 16.0152, "step": 449900 }, { "epoch": 0.9088466650775502, "grad_norm": 113.99472045898438, "learning_rate": 3.0261231140882363e-07, "loss": 37.6736, "step": 449910 }, { "epoch": 0.908866865710234, "grad_norm": 153.38453674316406, "learning_rate": 3.024927292299118e-07, "loss": 21.8065, "step": 449920 }, { "epoch": 0.9088870663429178, "grad_norm": 444.3023986816406, "learning_rate": 3.0237316994608025e-07, "loss": 14.1541, "step": 449930 }, { "epoch": 0.9089072669756016, "grad_norm": 412.1969299316406, "learning_rate": 3.02253633557914e-07, "loss": 20.4504, "step": 449940 }, { "epoch": 0.9089274676082855, "grad_norm": 162.01190185546875, "learning_rate": 3.0213412006599216e-07, "loss": 11.6022, "step": 449950 }, { "epoch": 0.9089476682409693, "grad_norm": 0.0, "learning_rate": 3.0201462947089865e-07, "loss": 24.5164, "step": 449960 }, { "epoch": 0.9089678688736531, "grad_norm": 684.73779296875, "learning_rate": 3.018951617732169e-07, "loss": 21.3613, "step": 449970 }, { "epoch": 0.9089880695063369, "grad_norm": 733.5957641601562, "learning_rate": 3.01775716973528e-07, "loss": 34.4216, "step": 449980 }, { "epoch": 0.9090082701390207, "grad_norm": 362.92449951171875, "learning_rate": 3.0165629507241446e-07, "loss": 13.8412, "step": 449990 }, { "epoch": 0.9090284707717046, "grad_norm": 392.89288330078125, "learning_rate": 3.015368960704584e-07, "loss": 15.1687, "step": 450000 }, { "epoch": 0.9090486714043884, "grad_norm": 420.2798767089844, "learning_rate": 3.014175199682418e-07, "loss": 15.1001, "step": 450010 }, { "epoch": 0.9090688720370722, "grad_norm": 494.7281799316406, "learning_rate": 3.012981667663456e-07, "loss": 14.4487, "step": 450020 }, { "epoch": 0.909089072669756, "grad_norm": 673.8734741210938, "learning_rate": 3.011788364653523e-07, "loss": 18.8924, "step": 450030 }, { "epoch": 0.9091092733024398, "grad_norm": 491.93609619140625, "learning_rate": 3.010595290658441e-07, "loss": 13.9405, "step": 450040 }, { "epoch": 0.9091294739351237, "grad_norm": 136.08999633789062, "learning_rate": 3.0094024456840176e-07, "loss": 12.4955, "step": 450050 }, { "epoch": 0.9091496745678075, "grad_norm": 411.83746337890625, "learning_rate": 3.008209829736064e-07, "loss": 19.9289, "step": 450060 }, { "epoch": 0.9091698752004913, "grad_norm": 226.57904052734375, "learning_rate": 3.007017442820398e-07, "loss": 21.5572, "step": 450070 }, { "epoch": 0.9091900758331751, "grad_norm": 651.72900390625, "learning_rate": 3.005825284942837e-07, "loss": 22.6409, "step": 450080 }, { "epoch": 0.909210276465859, "grad_norm": 8.027105331420898, "learning_rate": 3.004633356109171e-07, "loss": 25.1169, "step": 450090 }, { "epoch": 0.9092304770985428, "grad_norm": 294.9572448730469, "learning_rate": 3.003441656325229e-07, "loss": 12.6286, "step": 450100 }, { "epoch": 0.9092506777312266, "grad_norm": 374.8417663574219, "learning_rate": 3.002250185596806e-07, "loss": 19.1292, "step": 450110 }, { "epoch": 0.9092708783639104, "grad_norm": 58.905242919921875, "learning_rate": 3.0010589439297245e-07, "loss": 30.5942, "step": 450120 }, { "epoch": 0.9092910789965942, "grad_norm": 385.8377380371094, "learning_rate": 2.9998679313297807e-07, "loss": 17.9534, "step": 450130 }, { "epoch": 0.9093112796292779, "grad_norm": 214.30320739746094, "learning_rate": 2.99867714780277e-07, "loss": 17.9742, "step": 450140 }, { "epoch": 0.9093314802619618, "grad_norm": 166.50491333007812, "learning_rate": 2.9974865933545207e-07, "loss": 15.6885, "step": 450150 }, { "epoch": 0.9093516808946456, "grad_norm": 264.8522644042969, "learning_rate": 2.996296267990817e-07, "loss": 17.7719, "step": 450160 }, { "epoch": 0.9093718815273294, "grad_norm": 327.0448303222656, "learning_rate": 2.9951061717174543e-07, "loss": 18.7962, "step": 450170 }, { "epoch": 0.9093920821600132, "grad_norm": 425.8899841308594, "learning_rate": 2.9939163045402456e-07, "loss": 8.4987, "step": 450180 }, { "epoch": 0.909412282792697, "grad_norm": 132.53302001953125, "learning_rate": 2.992726666464996e-07, "loss": 17.0717, "step": 450190 }, { "epoch": 0.9094324834253809, "grad_norm": 338.8014221191406, "learning_rate": 2.99153725749749e-07, "loss": 19.2351, "step": 450200 }, { "epoch": 0.9094526840580647, "grad_norm": 340.30010986328125, "learning_rate": 2.990348077643529e-07, "loss": 13.3819, "step": 450210 }, { "epoch": 0.9094728846907485, "grad_norm": 61.42256546020508, "learning_rate": 2.989159126908914e-07, "loss": 15.6164, "step": 450220 }, { "epoch": 0.9094930853234323, "grad_norm": 432.1888122558594, "learning_rate": 2.9879704052994395e-07, "loss": 11.7148, "step": 450230 }, { "epoch": 0.9095132859561161, "grad_norm": 227.47276306152344, "learning_rate": 2.986781912820885e-07, "loss": 9.2523, "step": 450240 }, { "epoch": 0.9095334865888, "grad_norm": 834.7599487304688, "learning_rate": 2.9855936494790516e-07, "loss": 20.4919, "step": 450250 }, { "epoch": 0.9095536872214838, "grad_norm": 10.250986099243164, "learning_rate": 2.9844056152797505e-07, "loss": 15.58, "step": 450260 }, { "epoch": 0.9095738878541676, "grad_norm": 283.3591613769531, "learning_rate": 2.983217810228739e-07, "loss": 6.9049, "step": 450270 }, { "epoch": 0.9095940884868514, "grad_norm": 299.103515625, "learning_rate": 2.9820302343318177e-07, "loss": 18.7821, "step": 450280 }, { "epoch": 0.9096142891195352, "grad_norm": 216.5201416015625, "learning_rate": 2.9808428875947925e-07, "loss": 14.35, "step": 450290 }, { "epoch": 0.909634489752219, "grad_norm": 2.37345027923584, "learning_rate": 2.9796557700234317e-07, "loss": 24.4248, "step": 450300 }, { "epoch": 0.9096546903849029, "grad_norm": 201.43051147460938, "learning_rate": 2.9784688816235194e-07, "loss": 18.0874, "step": 450310 }, { "epoch": 0.9096748910175867, "grad_norm": 150.97740173339844, "learning_rate": 2.9772822224008515e-07, "loss": 14.5586, "step": 450320 }, { "epoch": 0.9096950916502705, "grad_norm": 190.00332641601562, "learning_rate": 2.976095792361211e-07, "loss": 44.8998, "step": 450330 }, { "epoch": 0.9097152922829543, "grad_norm": 350.0762939453125, "learning_rate": 2.9749095915103665e-07, "loss": 24.1158, "step": 450340 }, { "epoch": 0.9097354929156382, "grad_norm": 541.433349609375, "learning_rate": 2.9737236198541077e-07, "loss": 30.8052, "step": 450350 }, { "epoch": 0.909755693548322, "grad_norm": 118.30099487304688, "learning_rate": 2.9725378773982295e-07, "loss": 16.6455, "step": 450360 }, { "epoch": 0.9097758941810058, "grad_norm": 424.4774475097656, "learning_rate": 2.971352364148494e-07, "loss": 17.9009, "step": 450370 }, { "epoch": 0.9097960948136896, "grad_norm": 122.40388488769531, "learning_rate": 2.970167080110675e-07, "loss": 13.7767, "step": 450380 }, { "epoch": 0.9098162954463734, "grad_norm": 129.57290649414062, "learning_rate": 2.968982025290568e-07, "loss": 20.165, "step": 450390 }, { "epoch": 0.9098364960790571, "grad_norm": 307.5549011230469, "learning_rate": 2.967797199693928e-07, "loss": 22.9961, "step": 450400 }, { "epoch": 0.909856696711741, "grad_norm": 755.9302978515625, "learning_rate": 2.9666126033265517e-07, "loss": 17.0161, "step": 450410 }, { "epoch": 0.9098768973444248, "grad_norm": 232.53997802734375, "learning_rate": 2.9654282361941953e-07, "loss": 19.0928, "step": 450420 }, { "epoch": 0.9098970979771086, "grad_norm": 578.3982543945312, "learning_rate": 2.9642440983026324e-07, "loss": 23.7926, "step": 450430 }, { "epoch": 0.9099172986097924, "grad_norm": 685.7623901367188, "learning_rate": 2.963060189657646e-07, "loss": 18.2127, "step": 450440 }, { "epoch": 0.9099374992424762, "grad_norm": 157.11639404296875, "learning_rate": 2.961876510264999e-07, "loss": 19.3056, "step": 450450 }, { "epoch": 0.9099576998751601, "grad_norm": 142.48611450195312, "learning_rate": 2.9606930601304595e-07, "loss": 15.6247, "step": 450460 }, { "epoch": 0.9099779005078439, "grad_norm": 37.19289779663086, "learning_rate": 2.9595098392597887e-07, "loss": 11.6434, "step": 450470 }, { "epoch": 0.9099981011405277, "grad_norm": 489.3031005859375, "learning_rate": 2.958326847658771e-07, "loss": 17.3193, "step": 450480 }, { "epoch": 0.9100183017732115, "grad_norm": 122.79801940917969, "learning_rate": 2.9571440853331634e-07, "loss": 19.2583, "step": 450490 }, { "epoch": 0.9100385024058953, "grad_norm": 422.8098449707031, "learning_rate": 2.9559615522887275e-07, "loss": 10.5503, "step": 450500 }, { "epoch": 0.9100587030385792, "grad_norm": 154.032958984375, "learning_rate": 2.954779248531231e-07, "loss": 14.2224, "step": 450510 }, { "epoch": 0.910078903671263, "grad_norm": 259.25714111328125, "learning_rate": 2.953597174066436e-07, "loss": 17.5647, "step": 450520 }, { "epoch": 0.9100991043039468, "grad_norm": 197.6894989013672, "learning_rate": 2.952415328900093e-07, "loss": 13.9, "step": 450530 }, { "epoch": 0.9101193049366306, "grad_norm": 140.98451232910156, "learning_rate": 2.951233713037971e-07, "loss": 10.3261, "step": 450540 }, { "epoch": 0.9101395055693144, "grad_norm": 530.427001953125, "learning_rate": 2.9500523264858473e-07, "loss": 17.0496, "step": 450550 }, { "epoch": 0.9101597062019983, "grad_norm": 1189.279296875, "learning_rate": 2.948871169249451e-07, "loss": 24.623, "step": 450560 }, { "epoch": 0.9101799068346821, "grad_norm": 255.91119384765625, "learning_rate": 2.9476902413345443e-07, "loss": 19.1389, "step": 450570 }, { "epoch": 0.9102001074673659, "grad_norm": 382.6887512207031, "learning_rate": 2.946509542746895e-07, "loss": 10.8728, "step": 450580 }, { "epoch": 0.9102203081000497, "grad_norm": 526.3858642578125, "learning_rate": 2.9453290734922537e-07, "loss": 27.7853, "step": 450590 }, { "epoch": 0.9102405087327335, "grad_norm": 301.6593017578125, "learning_rate": 2.9441488335763656e-07, "loss": 32.5232, "step": 450600 }, { "epoch": 0.9102607093654174, "grad_norm": 8.258715629577637, "learning_rate": 2.9429688230049934e-07, "loss": 14.6922, "step": 450610 }, { "epoch": 0.9102809099981012, "grad_norm": 467.5506286621094, "learning_rate": 2.941789041783888e-07, "loss": 7.9306, "step": 450620 }, { "epoch": 0.910301110630785, "grad_norm": 4.88645076751709, "learning_rate": 2.940609489918783e-07, "loss": 11.6321, "step": 450630 }, { "epoch": 0.9103213112634688, "grad_norm": 615.4234619140625, "learning_rate": 2.9394301674154413e-07, "loss": 14.5892, "step": 450640 }, { "epoch": 0.9103415118961525, "grad_norm": 232.69505310058594, "learning_rate": 2.938251074279619e-07, "loss": 15.8183, "step": 450650 }, { "epoch": 0.9103617125288364, "grad_norm": 698.8169555664062, "learning_rate": 2.9370722105170504e-07, "loss": 18.039, "step": 450660 }, { "epoch": 0.9103819131615202, "grad_norm": 29.231889724731445, "learning_rate": 2.935893576133475e-07, "loss": 24.1463, "step": 450670 }, { "epoch": 0.910402113794204, "grad_norm": 68.94104766845703, "learning_rate": 2.9347151711346556e-07, "loss": 11.9315, "step": 450680 }, { "epoch": 0.9104223144268878, "grad_norm": 363.8338317871094, "learning_rate": 2.933536995526326e-07, "loss": 29.146, "step": 450690 }, { "epoch": 0.9104425150595716, "grad_norm": 57.30198669433594, "learning_rate": 2.9323590493142206e-07, "loss": 15.7005, "step": 450700 }, { "epoch": 0.9104627156922555, "grad_norm": 235.6766357421875, "learning_rate": 2.931181332504096e-07, "loss": 15.6148, "step": 450710 }, { "epoch": 0.9104829163249393, "grad_norm": 136.07627868652344, "learning_rate": 2.930003845101681e-07, "loss": 12.1525, "step": 450720 }, { "epoch": 0.9105031169576231, "grad_norm": 347.1009216308594, "learning_rate": 2.9288265871127206e-07, "loss": 13.9972, "step": 450730 }, { "epoch": 0.9105233175903069, "grad_norm": 58.31315231323242, "learning_rate": 2.927649558542955e-07, "loss": 17.731, "step": 450740 }, { "epoch": 0.9105435182229907, "grad_norm": 188.71400451660156, "learning_rate": 2.9264727593981024e-07, "loss": 17.885, "step": 450750 }, { "epoch": 0.9105637188556746, "grad_norm": 244.48216247558594, "learning_rate": 2.9252961896839236e-07, "loss": 16.7535, "step": 450760 }, { "epoch": 0.9105839194883584, "grad_norm": 595.94921875, "learning_rate": 2.9241198494061427e-07, "loss": 20.7789, "step": 450770 }, { "epoch": 0.9106041201210422, "grad_norm": 59.34998321533203, "learning_rate": 2.922943738570483e-07, "loss": 13.5088, "step": 450780 }, { "epoch": 0.910624320753726, "grad_norm": 384.7566833496094, "learning_rate": 2.921767857182689e-07, "loss": 16.75, "step": 450790 }, { "epoch": 0.9106445213864098, "grad_norm": 258.9685363769531, "learning_rate": 2.920592205248496e-07, "loss": 20.9006, "step": 450800 }, { "epoch": 0.9106647220190937, "grad_norm": 415.7431945800781, "learning_rate": 2.919416782773621e-07, "loss": 18.0212, "step": 450810 }, { "epoch": 0.9106849226517775, "grad_norm": 307.84735107421875, "learning_rate": 2.918241589763793e-07, "loss": 17.0118, "step": 450820 }, { "epoch": 0.9107051232844613, "grad_norm": 154.61444091796875, "learning_rate": 2.917066626224757e-07, "loss": 20.9167, "step": 450830 }, { "epoch": 0.9107253239171451, "grad_norm": 545.405029296875, "learning_rate": 2.9158918921622205e-07, "loss": 29.9494, "step": 450840 }, { "epoch": 0.9107455245498289, "grad_norm": 372.11822509765625, "learning_rate": 2.914717387581917e-07, "loss": 26.7919, "step": 450850 }, { "epoch": 0.9107657251825128, "grad_norm": 272.1129150390625, "learning_rate": 2.913543112489564e-07, "loss": 22.2206, "step": 450860 }, { "epoch": 0.9107859258151966, "grad_norm": 564.9196166992188, "learning_rate": 2.912369066890908e-07, "loss": 13.2435, "step": 450870 }, { "epoch": 0.9108061264478804, "grad_norm": 407.7785949707031, "learning_rate": 2.9111952507916375e-07, "loss": 24.8045, "step": 450880 }, { "epoch": 0.9108263270805642, "grad_norm": 240.93890380859375, "learning_rate": 2.910021664197493e-07, "loss": 23.5872, "step": 450890 }, { "epoch": 0.910846527713248, "grad_norm": 455.80938720703125, "learning_rate": 2.908848307114198e-07, "loss": 18.3362, "step": 450900 }, { "epoch": 0.9108667283459317, "grad_norm": 180.8559112548828, "learning_rate": 2.9076751795474647e-07, "loss": 11.052, "step": 450910 }, { "epoch": 0.9108869289786156, "grad_norm": 185.9855499267578, "learning_rate": 2.9065022815030044e-07, "loss": 10.145, "step": 450920 }, { "epoch": 0.9109071296112994, "grad_norm": 503.127685546875, "learning_rate": 2.905329612986546e-07, "loss": 19.2377, "step": 450930 }, { "epoch": 0.9109273302439832, "grad_norm": 231.20713806152344, "learning_rate": 2.9041571740037967e-07, "loss": 15.72, "step": 450940 }, { "epoch": 0.910947530876667, "grad_norm": 444.4071350097656, "learning_rate": 2.9029849645604735e-07, "loss": 16.6395, "step": 450950 }, { "epoch": 0.9109677315093508, "grad_norm": 311.00054931640625, "learning_rate": 2.9018129846622834e-07, "loss": 11.8376, "step": 450960 }, { "epoch": 0.9109879321420347, "grad_norm": 424.9436340332031, "learning_rate": 2.900641234314955e-07, "loss": 22.9413, "step": 450970 }, { "epoch": 0.9110081327747185, "grad_norm": 113.29547119140625, "learning_rate": 2.899469713524183e-07, "loss": 4.7459, "step": 450980 }, { "epoch": 0.9110283334074023, "grad_norm": 8.436201095581055, "learning_rate": 2.898298422295681e-07, "loss": 22.1985, "step": 450990 }, { "epoch": 0.9110485340400861, "grad_norm": 239.64442443847656, "learning_rate": 2.8971273606351656e-07, "loss": 16.2286, "step": 451000 }, { "epoch": 0.9110687346727699, "grad_norm": 269.68243408203125, "learning_rate": 2.895956528548338e-07, "loss": 34.9558, "step": 451010 }, { "epoch": 0.9110889353054538, "grad_norm": 182.18995666503906, "learning_rate": 2.8947859260408997e-07, "loss": 14.533, "step": 451020 }, { "epoch": 0.9111091359381376, "grad_norm": 23.684764862060547, "learning_rate": 2.8936155531185675e-07, "loss": 29.4517, "step": 451030 }, { "epoch": 0.9111293365708214, "grad_norm": 405.1063537597656, "learning_rate": 2.892445409787037e-07, "loss": 30.9317, "step": 451040 }, { "epoch": 0.9111495372035052, "grad_norm": 154.00747680664062, "learning_rate": 2.891275496052015e-07, "loss": 18.9794, "step": 451050 }, { "epoch": 0.911169737836189, "grad_norm": 521.412353515625, "learning_rate": 2.8901058119192026e-07, "loss": 16.2786, "step": 451060 }, { "epoch": 0.9111899384688729, "grad_norm": 295.50860595703125, "learning_rate": 2.8889363573943006e-07, "loss": 11.4799, "step": 451070 }, { "epoch": 0.9112101391015567, "grad_norm": 461.7724609375, "learning_rate": 2.8877671324829994e-07, "loss": 17.0752, "step": 451080 }, { "epoch": 0.9112303397342405, "grad_norm": 307.6255798339844, "learning_rate": 2.886598137191021e-07, "loss": 22.2398, "step": 451090 }, { "epoch": 0.9112505403669243, "grad_norm": 112.7985610961914, "learning_rate": 2.8854293715240455e-07, "loss": 20.4865, "step": 451100 }, { "epoch": 0.9112707409996081, "grad_norm": 301.1622009277344, "learning_rate": 2.884260835487768e-07, "loss": 14.4974, "step": 451110 }, { "epoch": 0.911290941632292, "grad_norm": 166.6481475830078, "learning_rate": 2.8830925290878997e-07, "loss": 20.8534, "step": 451120 }, { "epoch": 0.9113111422649758, "grad_norm": 242.3641357421875, "learning_rate": 2.8819244523301206e-07, "loss": 20.7083, "step": 451130 }, { "epoch": 0.9113313428976596, "grad_norm": 162.1275177001953, "learning_rate": 2.880756605220114e-07, "loss": 14.867, "step": 451140 }, { "epoch": 0.9113515435303434, "grad_norm": 562.4708862304688, "learning_rate": 2.879588987763593e-07, "loss": 26.572, "step": 451150 }, { "epoch": 0.9113717441630271, "grad_norm": 410.82196044921875, "learning_rate": 2.878421599966252e-07, "loss": 20.0088, "step": 451160 }, { "epoch": 0.911391944795711, "grad_norm": 334.5708923339844, "learning_rate": 2.877254441833754e-07, "loss": 34.9807, "step": 451170 }, { "epoch": 0.9114121454283948, "grad_norm": 541.3951416015625, "learning_rate": 2.8760875133718003e-07, "loss": 45.1133, "step": 451180 }, { "epoch": 0.9114323460610786, "grad_norm": 848.431396484375, "learning_rate": 2.8749208145860907e-07, "loss": 13.6133, "step": 451190 }, { "epoch": 0.9114525466937624, "grad_norm": 182.5347442626953, "learning_rate": 2.8737543454822993e-07, "loss": 16.9643, "step": 451200 }, { "epoch": 0.9114727473264462, "grad_norm": 245.85494995117188, "learning_rate": 2.87258810606611e-07, "loss": 11.7495, "step": 451210 }, { "epoch": 0.91149294795913, "grad_norm": 480.86737060546875, "learning_rate": 2.8714220963432125e-07, "loss": 16.4133, "step": 451220 }, { "epoch": 0.9115131485918139, "grad_norm": 0.02471095696091652, "learning_rate": 2.870256316319292e-07, "loss": 24.8626, "step": 451230 }, { "epoch": 0.9115333492244977, "grad_norm": 480.7483825683594, "learning_rate": 2.8690907660000156e-07, "loss": 16.0808, "step": 451240 }, { "epoch": 0.9115535498571815, "grad_norm": 106.62007904052734, "learning_rate": 2.867925445391079e-07, "loss": 11.1748, "step": 451250 }, { "epoch": 0.9115737504898653, "grad_norm": 517.0779418945312, "learning_rate": 2.8667603544981604e-07, "loss": 17.3558, "step": 451260 }, { "epoch": 0.9115939511225492, "grad_norm": 763.6796875, "learning_rate": 2.8655954933269395e-07, "loss": 25.0028, "step": 451270 }, { "epoch": 0.911614151755233, "grad_norm": 448.9400634765625, "learning_rate": 2.8644308618830775e-07, "loss": 30.3073, "step": 451280 }, { "epoch": 0.9116343523879168, "grad_norm": 110.02424621582031, "learning_rate": 2.86326646017227e-07, "loss": 12.3114, "step": 451290 }, { "epoch": 0.9116545530206006, "grad_norm": 60.046451568603516, "learning_rate": 2.862102288200186e-07, "loss": 8.2574, "step": 451300 }, { "epoch": 0.9116747536532844, "grad_norm": 163.764892578125, "learning_rate": 2.8609383459724915e-07, "loss": 11.8364, "step": 451310 }, { "epoch": 0.9116949542859683, "grad_norm": 462.7592468261719, "learning_rate": 2.8597746334948773e-07, "loss": 13.9652, "step": 451320 }, { "epoch": 0.9117151549186521, "grad_norm": 387.3420104980469, "learning_rate": 2.8586111507729887e-07, "loss": 17.1762, "step": 451330 }, { "epoch": 0.9117353555513359, "grad_norm": 213.70208740234375, "learning_rate": 2.8574478978125266e-07, "loss": 25.3365, "step": 451340 }, { "epoch": 0.9117555561840197, "grad_norm": 193.23422241210938, "learning_rate": 2.856284874619142e-07, "loss": 14.605, "step": 451350 }, { "epoch": 0.9117757568167035, "grad_norm": 355.51910400390625, "learning_rate": 2.855122081198503e-07, "loss": 14.1195, "step": 451360 }, { "epoch": 0.9117959574493874, "grad_norm": 309.6023254394531, "learning_rate": 2.8539595175562817e-07, "loss": 14.2535, "step": 451370 }, { "epoch": 0.9118161580820712, "grad_norm": 276.8885803222656, "learning_rate": 2.852797183698147e-07, "loss": 31.66, "step": 451380 }, { "epoch": 0.911836358714755, "grad_norm": 233.3388214111328, "learning_rate": 2.851635079629755e-07, "loss": 18.4705, "step": 451390 }, { "epoch": 0.9118565593474388, "grad_norm": 53.565189361572266, "learning_rate": 2.850473205356774e-07, "loss": 18.5574, "step": 451400 }, { "epoch": 0.9118767599801226, "grad_norm": 290.7619934082031, "learning_rate": 2.8493115608848764e-07, "loss": 24.058, "step": 451410 }, { "epoch": 0.9118969606128063, "grad_norm": 286.1277160644531, "learning_rate": 2.8481501462197137e-07, "loss": 12.422, "step": 451420 }, { "epoch": 0.9119171612454902, "grad_norm": 622.9802856445312, "learning_rate": 2.846988961366942e-07, "loss": 22.359, "step": 451430 }, { "epoch": 0.911937361878174, "grad_norm": 324.1952209472656, "learning_rate": 2.8458280063322353e-07, "loss": 23.3572, "step": 451440 }, { "epoch": 0.9119575625108578, "grad_norm": 410.7866516113281, "learning_rate": 2.844667281121244e-07, "loss": 16.6071, "step": 451450 }, { "epoch": 0.9119777631435416, "grad_norm": 475.54052734375, "learning_rate": 2.843506785739614e-07, "loss": 12.6368, "step": 451460 }, { "epoch": 0.9119979637762254, "grad_norm": 35.28129959106445, "learning_rate": 2.842346520193018e-07, "loss": 18.0663, "step": 451470 }, { "epoch": 0.9120181644089093, "grad_norm": 287.9037780761719, "learning_rate": 2.8411864844871184e-07, "loss": 14.8362, "step": 451480 }, { "epoch": 0.9120383650415931, "grad_norm": 250.30825805664062, "learning_rate": 2.8400266786275387e-07, "loss": 34.2517, "step": 451490 }, { "epoch": 0.9120585656742769, "grad_norm": 359.9864196777344, "learning_rate": 2.838867102619952e-07, "loss": 17.0671, "step": 451500 }, { "epoch": 0.9120787663069607, "grad_norm": 276.7231750488281, "learning_rate": 2.8377077564700094e-07, "loss": 8.9851, "step": 451510 }, { "epoch": 0.9120989669396445, "grad_norm": 338.2561340332031, "learning_rate": 2.8365486401833677e-07, "loss": 22.0601, "step": 451520 }, { "epoch": 0.9121191675723284, "grad_norm": 143.89337158203125, "learning_rate": 2.835389753765655e-07, "loss": 13.1001, "step": 451530 }, { "epoch": 0.9121393682050122, "grad_norm": 683.7655639648438, "learning_rate": 2.834231097222534e-07, "loss": 26.8933, "step": 451540 }, { "epoch": 0.912159568837696, "grad_norm": 487.2683410644531, "learning_rate": 2.833072670559661e-07, "loss": 21.1917, "step": 451550 }, { "epoch": 0.9121797694703798, "grad_norm": 561.84423828125, "learning_rate": 2.83191447378266e-07, "loss": 19.7071, "step": 451560 }, { "epoch": 0.9121999701030636, "grad_norm": 33.04185104370117, "learning_rate": 2.8307565068971867e-07, "loss": 18.7884, "step": 451570 }, { "epoch": 0.9122201707357475, "grad_norm": 215.38392639160156, "learning_rate": 2.829598769908892e-07, "loss": 24.2191, "step": 451580 }, { "epoch": 0.9122403713684313, "grad_norm": 159.6072998046875, "learning_rate": 2.8284412628234117e-07, "loss": 9.3734, "step": 451590 }, { "epoch": 0.9122605720011151, "grad_norm": 5.168828010559082, "learning_rate": 2.8272839856463783e-07, "loss": 12.7489, "step": 451600 }, { "epoch": 0.9122807726337989, "grad_norm": 598.6390380859375, "learning_rate": 2.8261269383834497e-07, "loss": 20.4365, "step": 451610 }, { "epoch": 0.9123009732664827, "grad_norm": 304.0203857421875, "learning_rate": 2.8249701210402603e-07, "loss": 17.0099, "step": 451620 }, { "epoch": 0.9123211738991666, "grad_norm": 471.2829895019531, "learning_rate": 2.823813533622438e-07, "loss": 15.8112, "step": 451630 }, { "epoch": 0.9123413745318504, "grad_norm": 779.8455810546875, "learning_rate": 2.822657176135629e-07, "loss": 27.6026, "step": 451640 }, { "epoch": 0.9123615751645342, "grad_norm": 398.429443359375, "learning_rate": 2.821501048585462e-07, "loss": 52.8884, "step": 451650 }, { "epoch": 0.912381775797218, "grad_norm": 173.2937774658203, "learning_rate": 2.8203451509775825e-07, "loss": 14.2675, "step": 451660 }, { "epoch": 0.9124019764299018, "grad_norm": 0.9098075032234192, "learning_rate": 2.819189483317625e-07, "loss": 17.9382, "step": 451670 }, { "epoch": 0.9124221770625855, "grad_norm": 557.1135864257812, "learning_rate": 2.818034045611201e-07, "loss": 19.7519, "step": 451680 }, { "epoch": 0.9124423776952694, "grad_norm": 325.0290832519531, "learning_rate": 2.816878837863968e-07, "loss": 10.5954, "step": 451690 }, { "epoch": 0.9124625783279532, "grad_norm": 497.5835876464844, "learning_rate": 2.815723860081537e-07, "loss": 16.6184, "step": 451700 }, { "epoch": 0.912482778960637, "grad_norm": 197.95315551757812, "learning_rate": 2.8145691122695496e-07, "loss": 20.7503, "step": 451710 }, { "epoch": 0.9125029795933208, "grad_norm": 424.4306945800781, "learning_rate": 2.8134145944336225e-07, "loss": 15.6808, "step": 451720 }, { "epoch": 0.9125231802260046, "grad_norm": 193.1788330078125, "learning_rate": 2.812260306579401e-07, "loss": 20.6186, "step": 451730 }, { "epoch": 0.9125433808586885, "grad_norm": 717.1135864257812, "learning_rate": 2.811106248712497e-07, "loss": 26.0767, "step": 451740 }, { "epoch": 0.9125635814913723, "grad_norm": 459.9844665527344, "learning_rate": 2.8099524208385297e-07, "loss": 29.406, "step": 451750 }, { "epoch": 0.9125837821240561, "grad_norm": 380.0570373535156, "learning_rate": 2.8087988229631325e-07, "loss": 11.3463, "step": 451760 }, { "epoch": 0.9126039827567399, "grad_norm": 562.419189453125, "learning_rate": 2.8076454550919397e-07, "loss": 21.1921, "step": 451770 }, { "epoch": 0.9126241833894237, "grad_norm": 874.0545043945312, "learning_rate": 2.8064923172305467e-07, "loss": 27.8008, "step": 451780 }, { "epoch": 0.9126443840221076, "grad_norm": 218.60658264160156, "learning_rate": 2.8053394093845833e-07, "loss": 15.5657, "step": 451790 }, { "epoch": 0.9126645846547914, "grad_norm": 72.1780014038086, "learning_rate": 2.804186731559677e-07, "loss": 11.2906, "step": 451800 }, { "epoch": 0.9126847852874752, "grad_norm": 530.6395263671875, "learning_rate": 2.8030342837614466e-07, "loss": 17.5073, "step": 451810 }, { "epoch": 0.912704985920159, "grad_norm": 183.9311981201172, "learning_rate": 2.8018820659954927e-07, "loss": 14.8489, "step": 451820 }, { "epoch": 0.9127251865528428, "grad_norm": 110.54678344726562, "learning_rate": 2.800730078267444e-07, "loss": 14.3235, "step": 451830 }, { "epoch": 0.9127453871855267, "grad_norm": 10.480724334716797, "learning_rate": 2.7995783205829185e-07, "loss": 14.0507, "step": 451840 }, { "epoch": 0.9127655878182105, "grad_norm": 188.81947326660156, "learning_rate": 2.798426792947517e-07, "loss": 18.8643, "step": 451850 }, { "epoch": 0.9127857884508943, "grad_norm": 501.7898254394531, "learning_rate": 2.7972754953668524e-07, "loss": 17.7551, "step": 451860 }, { "epoch": 0.9128059890835781, "grad_norm": 352.3951110839844, "learning_rate": 2.796124427846553e-07, "loss": 11.06, "step": 451870 }, { "epoch": 0.912826189716262, "grad_norm": 364.9849853515625, "learning_rate": 2.7949735903922195e-07, "loss": 16.1715, "step": 451880 }, { "epoch": 0.9128463903489458, "grad_norm": 404.40289306640625, "learning_rate": 2.7938229830094475e-07, "loss": 13.3248, "step": 451890 }, { "epoch": 0.9128665909816296, "grad_norm": 885.2887573242188, "learning_rate": 2.792672605703867e-07, "loss": 23.7981, "step": 451900 }, { "epoch": 0.9128867916143134, "grad_norm": 442.0524597167969, "learning_rate": 2.791522458481077e-07, "loss": 21.1115, "step": 451910 }, { "epoch": 0.9129069922469972, "grad_norm": 656.0565795898438, "learning_rate": 2.79037254134667e-07, "loss": 24.4948, "step": 451920 }, { "epoch": 0.9129271928796809, "grad_norm": 265.2845458984375, "learning_rate": 2.7892228543062725e-07, "loss": 8.6274, "step": 451930 }, { "epoch": 0.9129473935123648, "grad_norm": 973.04541015625, "learning_rate": 2.788073397365465e-07, "loss": 26.0115, "step": 451940 }, { "epoch": 0.9129675941450486, "grad_norm": 154.58677673339844, "learning_rate": 2.78692417052987e-07, "loss": 17.974, "step": 451950 }, { "epoch": 0.9129877947777324, "grad_norm": 412.13519287109375, "learning_rate": 2.785775173805083e-07, "loss": 26.1495, "step": 451960 }, { "epoch": 0.9130079954104162, "grad_norm": 714.805908203125, "learning_rate": 2.784626407196689e-07, "loss": 18.7356, "step": 451970 }, { "epoch": 0.9130281960431, "grad_norm": 215.3949432373047, "learning_rate": 2.7834778707103104e-07, "loss": 13.2698, "step": 451980 }, { "epoch": 0.9130483966757839, "grad_norm": 10.791740417480469, "learning_rate": 2.782329564351532e-07, "loss": 31.0468, "step": 451990 }, { "epoch": 0.9130685973084677, "grad_norm": 588.0308227539062, "learning_rate": 2.7811814881259503e-07, "loss": 32.0709, "step": 452000 }, { "epoch": 0.9130887979411515, "grad_norm": 636.9493408203125, "learning_rate": 2.7800336420391593e-07, "loss": 17.1677, "step": 452010 }, { "epoch": 0.9131089985738353, "grad_norm": 351.098876953125, "learning_rate": 2.7788860260967665e-07, "loss": 21.9418, "step": 452020 }, { "epoch": 0.9131291992065191, "grad_norm": 490.53692626953125, "learning_rate": 2.77773864030435e-07, "loss": 17.6471, "step": 452030 }, { "epoch": 0.913149399839203, "grad_norm": 453.8436279296875, "learning_rate": 2.7765914846675067e-07, "loss": 26.3111, "step": 452040 }, { "epoch": 0.9131696004718868, "grad_norm": 264.65277099609375, "learning_rate": 2.775444559191837e-07, "loss": 16.9386, "step": 452050 }, { "epoch": 0.9131898011045706, "grad_norm": 389.0132751464844, "learning_rate": 2.774297863882919e-07, "loss": 21.6635, "step": 452060 }, { "epoch": 0.9132100017372544, "grad_norm": 171.96925354003906, "learning_rate": 2.773151398746338e-07, "loss": 8.1321, "step": 452070 }, { "epoch": 0.9132302023699382, "grad_norm": 102.16289520263672, "learning_rate": 2.772005163787689e-07, "loss": 10.6967, "step": 452080 }, { "epoch": 0.9132504030026221, "grad_norm": 133.51788330078125, "learning_rate": 2.770859159012579e-07, "loss": 12.4846, "step": 452090 }, { "epoch": 0.9132706036353059, "grad_norm": 315.4050598144531, "learning_rate": 2.7697133844265535e-07, "loss": 19.5759, "step": 452100 }, { "epoch": 0.9132908042679897, "grad_norm": 29.029733657836914, "learning_rate": 2.768567840035219e-07, "loss": 27.0451, "step": 452110 }, { "epoch": 0.9133110049006735, "grad_norm": 409.8620300292969, "learning_rate": 2.76742252584416e-07, "loss": 10.4875, "step": 452120 }, { "epoch": 0.9133312055333573, "grad_norm": 29.181663513183594, "learning_rate": 2.7662774418589555e-07, "loss": 10.0926, "step": 452130 }, { "epoch": 0.9133514061660412, "grad_norm": 651.210693359375, "learning_rate": 2.765132588085184e-07, "loss": 23.9028, "step": 452140 }, { "epoch": 0.913371606798725, "grad_norm": 248.01182556152344, "learning_rate": 2.763987964528425e-07, "loss": 14.2047, "step": 452150 }, { "epoch": 0.9133918074314088, "grad_norm": 453.2279052734375, "learning_rate": 2.7628435711942737e-07, "loss": 18.6502, "step": 452160 }, { "epoch": 0.9134120080640926, "grad_norm": 344.0987548828125, "learning_rate": 2.7616994080882754e-07, "loss": 24.7988, "step": 452170 }, { "epoch": 0.9134322086967764, "grad_norm": 455.11712646484375, "learning_rate": 2.7605554752160256e-07, "loss": 21.2957, "step": 452180 }, { "epoch": 0.9134524093294601, "grad_norm": 76.14151763916016, "learning_rate": 2.7594117725831096e-07, "loss": 7.8477, "step": 452190 }, { "epoch": 0.913472609962144, "grad_norm": 342.2422180175781, "learning_rate": 2.758268300195094e-07, "loss": 12.5107, "step": 452200 }, { "epoch": 0.9134928105948278, "grad_norm": 320.7900390625, "learning_rate": 2.757125058057536e-07, "loss": 10.5416, "step": 452210 }, { "epoch": 0.9135130112275116, "grad_norm": 340.1474609375, "learning_rate": 2.755982046176031e-07, "loss": 19.4271, "step": 452220 }, { "epoch": 0.9135332118601954, "grad_norm": 202.52476501464844, "learning_rate": 2.754839264556136e-07, "loss": 6.2309, "step": 452230 }, { "epoch": 0.9135534124928792, "grad_norm": 294.6946105957031, "learning_rate": 2.7536967132034186e-07, "loss": 11.8811, "step": 452240 }, { "epoch": 0.9135736131255631, "grad_norm": 639.5032958984375, "learning_rate": 2.752554392123463e-07, "loss": 23.6904, "step": 452250 }, { "epoch": 0.9135938137582469, "grad_norm": 500.483642578125, "learning_rate": 2.7514123013218153e-07, "loss": 22.7042, "step": 452260 }, { "epoch": 0.9136140143909307, "grad_norm": 311.4737548828125, "learning_rate": 2.750270440804065e-07, "loss": 11.9557, "step": 452270 }, { "epoch": 0.9136342150236145, "grad_norm": 100.47586059570312, "learning_rate": 2.749128810575763e-07, "loss": 14.1911, "step": 452280 }, { "epoch": 0.9136544156562983, "grad_norm": 356.00457763671875, "learning_rate": 2.747987410642472e-07, "loss": 23.2069, "step": 452290 }, { "epoch": 0.9136746162889822, "grad_norm": 248.7677001953125, "learning_rate": 2.746846241009765e-07, "loss": 11.1875, "step": 452300 }, { "epoch": 0.913694816921666, "grad_norm": 617.8887329101562, "learning_rate": 2.745705301683188e-07, "loss": 15.8752, "step": 452310 }, { "epoch": 0.9137150175543498, "grad_norm": 313.4349060058594, "learning_rate": 2.7445645926683253e-07, "loss": 22.381, "step": 452320 }, { "epoch": 0.9137352181870336, "grad_norm": 353.8231201171875, "learning_rate": 2.7434241139707106e-07, "loss": 16.0729, "step": 452330 }, { "epoch": 0.9137554188197174, "grad_norm": 184.85678100585938, "learning_rate": 2.742283865595924e-07, "loss": 13.1759, "step": 452340 }, { "epoch": 0.9137756194524013, "grad_norm": 421.85113525390625, "learning_rate": 2.7411438475495155e-07, "loss": 16.8529, "step": 452350 }, { "epoch": 0.9137958200850851, "grad_norm": 700.2169189453125, "learning_rate": 2.740004059837031e-07, "loss": 18.6863, "step": 452360 }, { "epoch": 0.9138160207177689, "grad_norm": 408.6905517578125, "learning_rate": 2.738864502464045e-07, "loss": 19.4036, "step": 452370 }, { "epoch": 0.9138362213504527, "grad_norm": 341.9888000488281, "learning_rate": 2.737725175436101e-07, "loss": 14.0515, "step": 452380 }, { "epoch": 0.9138564219831365, "grad_norm": 401.954345703125, "learning_rate": 2.7365860787587405e-07, "loss": 9.6258, "step": 452390 }, { "epoch": 0.9138766226158204, "grad_norm": 23.526723861694336, "learning_rate": 2.735447212437531e-07, "loss": 27.0535, "step": 452400 }, { "epoch": 0.9138968232485042, "grad_norm": 1.1871169805526733, "learning_rate": 2.734308576478023e-07, "loss": 13.1915, "step": 452410 }, { "epoch": 0.913917023881188, "grad_norm": 668.0006103515625, "learning_rate": 2.733170170885768e-07, "loss": 21.8141, "step": 452420 }, { "epoch": 0.9139372245138718, "grad_norm": 413.64727783203125, "learning_rate": 2.7320319956662957e-07, "loss": 19.9112, "step": 452430 }, { "epoch": 0.9139574251465555, "grad_norm": 692.726318359375, "learning_rate": 2.730894050825178e-07, "loss": 6.8907, "step": 452440 }, { "epoch": 0.9139776257792394, "grad_norm": 22.914690017700195, "learning_rate": 2.72975633636795e-07, "loss": 11.7867, "step": 452450 }, { "epoch": 0.9139978264119232, "grad_norm": 361.56939697265625, "learning_rate": 2.728618852300147e-07, "loss": 10.5111, "step": 452460 }, { "epoch": 0.914018027044607, "grad_norm": 424.5031433105469, "learning_rate": 2.727481598627324e-07, "loss": 14.1596, "step": 452470 }, { "epoch": 0.9140382276772908, "grad_norm": 325.74468994140625, "learning_rate": 2.7263445753550275e-07, "loss": 6.4521, "step": 452480 }, { "epoch": 0.9140584283099746, "grad_norm": 468.8731384277344, "learning_rate": 2.725207782488792e-07, "loss": 18.0635, "step": 452490 }, { "epoch": 0.9140786289426585, "grad_norm": 428.7283020019531, "learning_rate": 2.724071220034158e-07, "loss": 22.1494, "step": 452500 }, { "epoch": 0.9140988295753423, "grad_norm": 658.4942626953125, "learning_rate": 2.72293488799667e-07, "loss": 23.3455, "step": 452510 }, { "epoch": 0.9141190302080261, "grad_norm": 221.12045288085938, "learning_rate": 2.7217987863818684e-07, "loss": 28.4971, "step": 452520 }, { "epoch": 0.9141392308407099, "grad_norm": 68.4049301147461, "learning_rate": 2.7206629151952715e-07, "loss": 17.7994, "step": 452530 }, { "epoch": 0.9141594314733937, "grad_norm": 116.92928314208984, "learning_rate": 2.7195272744424405e-07, "loss": 15.2393, "step": 452540 }, { "epoch": 0.9141796321060776, "grad_norm": 373.884521484375, "learning_rate": 2.7183918641288943e-07, "loss": 9.9782, "step": 452550 }, { "epoch": 0.9141998327387614, "grad_norm": 490.6842041015625, "learning_rate": 2.717256684260172e-07, "loss": 22.5185, "step": 452560 }, { "epoch": 0.9142200333714452, "grad_norm": 161.93197631835938, "learning_rate": 2.716121734841814e-07, "loss": 28.5874, "step": 452570 }, { "epoch": 0.914240234004129, "grad_norm": 130.264404296875, "learning_rate": 2.714987015879328e-07, "loss": 20.4128, "step": 452580 }, { "epoch": 0.9142604346368128, "grad_norm": 932.6390991210938, "learning_rate": 2.7138525273782746e-07, "loss": 23.6868, "step": 452590 }, { "epoch": 0.9142806352694967, "grad_norm": 156.4149627685547, "learning_rate": 2.712718269344161e-07, "loss": 14.7229, "step": 452600 }, { "epoch": 0.9143008359021805, "grad_norm": 90.23873138427734, "learning_rate": 2.711584241782528e-07, "loss": 13.9489, "step": 452610 }, { "epoch": 0.9143210365348643, "grad_norm": 210.1337127685547, "learning_rate": 2.7104504446988867e-07, "loss": 20.9073, "step": 452620 }, { "epoch": 0.9143412371675481, "grad_norm": 329.8204040527344, "learning_rate": 2.709316878098789e-07, "loss": 14.034, "step": 452630 }, { "epoch": 0.9143614378002319, "grad_norm": 465.3182067871094, "learning_rate": 2.708183541987741e-07, "loss": 19.5663, "step": 452640 }, { "epoch": 0.9143816384329158, "grad_norm": 193.5091094970703, "learning_rate": 2.707050436371267e-07, "loss": 16.4573, "step": 452650 }, { "epoch": 0.9144018390655996, "grad_norm": 298.7574462890625, "learning_rate": 2.7059175612548947e-07, "loss": 19.0406, "step": 452660 }, { "epoch": 0.9144220396982834, "grad_norm": 359.96295166015625, "learning_rate": 2.7047849166441487e-07, "loss": 31.8732, "step": 452670 }, { "epoch": 0.9144422403309672, "grad_norm": 387.7774963378906, "learning_rate": 2.703652502544535e-07, "loss": 23.7531, "step": 452680 }, { "epoch": 0.914462440963651, "grad_norm": 192.9261932373047, "learning_rate": 2.702520318961588e-07, "loss": 18.7476, "step": 452690 }, { "epoch": 0.9144826415963347, "grad_norm": 229.85899353027344, "learning_rate": 2.701388365900831e-07, "loss": 17.4647, "step": 452700 }, { "epoch": 0.9145028422290186, "grad_norm": 377.1651306152344, "learning_rate": 2.7002566433677547e-07, "loss": 16.6451, "step": 452710 }, { "epoch": 0.9145230428617024, "grad_norm": 153.7214813232422, "learning_rate": 2.699125151367893e-07, "loss": 17.1172, "step": 452720 }, { "epoch": 0.9145432434943862, "grad_norm": 193.17724609375, "learning_rate": 2.697993889906764e-07, "loss": 14.337, "step": 452730 }, { "epoch": 0.91456344412707, "grad_norm": 263.640869140625, "learning_rate": 2.6968628589898735e-07, "loss": 8.4065, "step": 452740 }, { "epoch": 0.9145836447597538, "grad_norm": 96.6507568359375, "learning_rate": 2.6957320586227354e-07, "loss": 25.2692, "step": 452750 }, { "epoch": 0.9146038453924377, "grad_norm": 7.664963245391846, "learning_rate": 2.694601488810855e-07, "loss": 19.9627, "step": 452760 }, { "epoch": 0.9146240460251215, "grad_norm": 634.0137329101562, "learning_rate": 2.6934711495597676e-07, "loss": 45.0058, "step": 452770 }, { "epoch": 0.9146442466578053, "grad_norm": 621.3983764648438, "learning_rate": 2.6923410408749516e-07, "loss": 17.0445, "step": 452780 }, { "epoch": 0.9146644472904891, "grad_norm": 284.60247802734375, "learning_rate": 2.6912111627619255e-07, "loss": 21.5207, "step": 452790 }, { "epoch": 0.9146846479231729, "grad_norm": 216.87062072753906, "learning_rate": 2.690081515226206e-07, "loss": 23.3396, "step": 452800 }, { "epoch": 0.9147048485558568, "grad_norm": 274.0249938964844, "learning_rate": 2.6889520982732897e-07, "loss": 11.5307, "step": 452810 }, { "epoch": 0.9147250491885406, "grad_norm": 52.05552673339844, "learning_rate": 2.6878229119086776e-07, "loss": 8.4213, "step": 452820 }, { "epoch": 0.9147452498212244, "grad_norm": 188.5215606689453, "learning_rate": 2.6866939561378867e-07, "loss": 17.838, "step": 452830 }, { "epoch": 0.9147654504539082, "grad_norm": 419.7026062011719, "learning_rate": 2.685565230966408e-07, "loss": 17.6637, "step": 452840 }, { "epoch": 0.914785651086592, "grad_norm": 525.0553588867188, "learning_rate": 2.684436736399737e-07, "loss": 14.4383, "step": 452850 }, { "epoch": 0.9148058517192759, "grad_norm": 333.9505920410156, "learning_rate": 2.6833084724433965e-07, "loss": 25.6344, "step": 452860 }, { "epoch": 0.9148260523519597, "grad_norm": 261.0413513183594, "learning_rate": 2.6821804391028603e-07, "loss": 17.0195, "step": 452870 }, { "epoch": 0.9148462529846435, "grad_norm": 58.79597854614258, "learning_rate": 2.681052636383641e-07, "loss": 15.6878, "step": 452880 }, { "epoch": 0.9148664536173273, "grad_norm": 906.9078979492188, "learning_rate": 2.679925064291239e-07, "loss": 24.5474, "step": 452890 }, { "epoch": 0.9148866542500111, "grad_norm": 172.41342163085938, "learning_rate": 2.6787977228311336e-07, "loss": 12.6641, "step": 452900 }, { "epoch": 0.914906854882695, "grad_norm": 290.7709655761719, "learning_rate": 2.677670612008837e-07, "loss": 28.5541, "step": 452910 }, { "epoch": 0.9149270555153788, "grad_norm": 383.07562255859375, "learning_rate": 2.676543731829823e-07, "loss": 22.4324, "step": 452920 }, { "epoch": 0.9149472561480626, "grad_norm": 501.7777404785156, "learning_rate": 2.6754170822996026e-07, "loss": 11.6085, "step": 452930 }, { "epoch": 0.9149674567807464, "grad_norm": 1093.78369140625, "learning_rate": 2.6742906634236564e-07, "loss": 17.3962, "step": 452940 }, { "epoch": 0.9149876574134301, "grad_norm": 355.9870300292969, "learning_rate": 2.6731644752074846e-07, "loss": 16.4126, "step": 452950 }, { "epoch": 0.915007858046114, "grad_norm": 244.96832275390625, "learning_rate": 2.6720385176565664e-07, "loss": 10.274, "step": 452960 }, { "epoch": 0.9150280586787978, "grad_norm": 823.0773315429688, "learning_rate": 2.6709127907763864e-07, "loss": 30.8585, "step": 452970 }, { "epoch": 0.9150482593114816, "grad_norm": 54.019229888916016, "learning_rate": 2.6697872945724455e-07, "loss": 14.8269, "step": 452980 }, { "epoch": 0.9150684599441654, "grad_norm": 484.1335754394531, "learning_rate": 2.668662029050217e-07, "loss": 14.5493, "step": 452990 }, { "epoch": 0.9150886605768492, "grad_norm": 500.1659851074219, "learning_rate": 2.6675369942151864e-07, "loss": 22.1963, "step": 453000 }, { "epoch": 0.915108861209533, "grad_norm": 514.647216796875, "learning_rate": 2.666412190072837e-07, "loss": 30.4295, "step": 453010 }, { "epoch": 0.9151290618422169, "grad_norm": 443.3310546875, "learning_rate": 2.665287616628659e-07, "loss": 9.98, "step": 453020 }, { "epoch": 0.9151492624749007, "grad_norm": 290.30291748046875, "learning_rate": 2.6641632738881315e-07, "loss": 17.9727, "step": 453030 }, { "epoch": 0.9151694631075845, "grad_norm": 235.2632293701172, "learning_rate": 2.663039161856723e-07, "loss": 13.8964, "step": 453040 }, { "epoch": 0.9151896637402683, "grad_norm": 122.74141693115234, "learning_rate": 2.6619152805399286e-07, "loss": 21.4631, "step": 453050 }, { "epoch": 0.9152098643729522, "grad_norm": 160.0275421142578, "learning_rate": 2.660791629943216e-07, "loss": 32.995, "step": 453060 }, { "epoch": 0.915230065005636, "grad_norm": 74.91618347167969, "learning_rate": 2.659668210072058e-07, "loss": 10.9474, "step": 453070 }, { "epoch": 0.9152502656383198, "grad_norm": 386.40838623046875, "learning_rate": 2.658545020931935e-07, "loss": 31.9575, "step": 453080 }, { "epoch": 0.9152704662710036, "grad_norm": 303.7977600097656, "learning_rate": 2.657422062528325e-07, "loss": 18.1284, "step": 453090 }, { "epoch": 0.9152906669036874, "grad_norm": 856.51220703125, "learning_rate": 2.656299334866702e-07, "loss": 22.5285, "step": 453100 }, { "epoch": 0.9153108675363713, "grad_norm": 381.6043395996094, "learning_rate": 2.655176837952528e-07, "loss": 24.3001, "step": 453110 }, { "epoch": 0.9153310681690551, "grad_norm": 246.74102783203125, "learning_rate": 2.654054571791287e-07, "loss": 11.1632, "step": 453120 }, { "epoch": 0.9153512688017389, "grad_norm": 494.12615966796875, "learning_rate": 2.6529325363884364e-07, "loss": 17.3095, "step": 453130 }, { "epoch": 0.9153714694344227, "grad_norm": 129.7801055908203, "learning_rate": 2.651810731749449e-07, "loss": 12.1936, "step": 453140 }, { "epoch": 0.9153916700671065, "grad_norm": 249.99185180664062, "learning_rate": 2.650689157879799e-07, "loss": 8.0272, "step": 453150 }, { "epoch": 0.9154118706997904, "grad_norm": 524.1873168945312, "learning_rate": 2.649567814784937e-07, "loss": 37.0262, "step": 453160 }, { "epoch": 0.9154320713324742, "grad_norm": 212.4214630126953, "learning_rate": 2.6484467024703476e-07, "loss": 17.7013, "step": 453170 }, { "epoch": 0.915452271965158, "grad_norm": 446.6779479980469, "learning_rate": 2.647325820941488e-07, "loss": 15.0334, "step": 453180 }, { "epoch": 0.9154724725978418, "grad_norm": 331.09869384765625, "learning_rate": 2.6462051702038085e-07, "loss": 14.7586, "step": 453190 }, { "epoch": 0.9154926732305256, "grad_norm": 382.7553405761719, "learning_rate": 2.6450847502627883e-07, "loss": 24.1565, "step": 453200 }, { "epoch": 0.9155128738632093, "grad_norm": 365.6215515136719, "learning_rate": 2.6439645611238795e-07, "loss": 20.7367, "step": 453210 }, { "epoch": 0.9155330744958932, "grad_norm": 598.3663940429688, "learning_rate": 2.642844602792544e-07, "loss": 17.08, "step": 453220 }, { "epoch": 0.915553275128577, "grad_norm": 184.0043487548828, "learning_rate": 2.6417248752742374e-07, "loss": 17.1081, "step": 453230 }, { "epoch": 0.9155734757612608, "grad_norm": 380.4300231933594, "learning_rate": 2.640605378574429e-07, "loss": 26.9159, "step": 453240 }, { "epoch": 0.9155936763939446, "grad_norm": 263.845703125, "learning_rate": 2.639486112698564e-07, "loss": 20.9774, "step": 453250 }, { "epoch": 0.9156138770266284, "grad_norm": 513.1378173828125, "learning_rate": 2.6383670776520933e-07, "loss": 25.778, "step": 453260 }, { "epoch": 0.9156340776593123, "grad_norm": 421.67596435546875, "learning_rate": 2.637248273440479e-07, "loss": 20.1958, "step": 453270 }, { "epoch": 0.9156542782919961, "grad_norm": 430.4900207519531, "learning_rate": 2.6361297000691787e-07, "loss": 11.1612, "step": 453280 }, { "epoch": 0.9156744789246799, "grad_norm": 284.833984375, "learning_rate": 2.6350113575436266e-07, "loss": 8.8739, "step": 453290 }, { "epoch": 0.9156946795573637, "grad_norm": 409.1342468261719, "learning_rate": 2.6338932458692847e-07, "loss": 12.7456, "step": 453300 }, { "epoch": 0.9157148801900475, "grad_norm": 1214.3470458984375, "learning_rate": 2.6327753650516205e-07, "loss": 46.1228, "step": 453310 }, { "epoch": 0.9157350808227314, "grad_norm": 333.0218505859375, "learning_rate": 2.631657715096048e-07, "loss": 11.3531, "step": 453320 }, { "epoch": 0.9157552814554152, "grad_norm": 98.55928802490234, "learning_rate": 2.630540296008027e-07, "loss": 5.1327, "step": 453330 }, { "epoch": 0.915775482088099, "grad_norm": 674.3197021484375, "learning_rate": 2.629423107793022e-07, "loss": 23.4372, "step": 453340 }, { "epoch": 0.9157956827207828, "grad_norm": 350.0754089355469, "learning_rate": 2.6283061504564553e-07, "loss": 14.2001, "step": 453350 }, { "epoch": 0.9158158833534666, "grad_norm": 382.4748840332031, "learning_rate": 2.6271894240037785e-07, "loss": 17.2192, "step": 453360 }, { "epoch": 0.9158360839861505, "grad_norm": 423.0195617675781, "learning_rate": 2.626072928440432e-07, "loss": 16.7564, "step": 453370 }, { "epoch": 0.9158562846188343, "grad_norm": 386.2308654785156, "learning_rate": 2.6249566637718714e-07, "loss": 21.5422, "step": 453380 }, { "epoch": 0.9158764852515181, "grad_norm": 250.33633422851562, "learning_rate": 2.623840630003516e-07, "loss": 12.7983, "step": 453390 }, { "epoch": 0.9158966858842019, "grad_norm": 0.0, "learning_rate": 2.622724827140816e-07, "loss": 26.3335, "step": 453400 }, { "epoch": 0.9159168865168857, "grad_norm": 266.47052001953125, "learning_rate": 2.6216092551892116e-07, "loss": 17.5531, "step": 453410 }, { "epoch": 0.9159370871495696, "grad_norm": 374.290771484375, "learning_rate": 2.6204939141541376e-07, "loss": 17.8065, "step": 453420 }, { "epoch": 0.9159572877822534, "grad_norm": 368.16546630859375, "learning_rate": 2.6193788040410286e-07, "loss": 22.8737, "step": 453430 }, { "epoch": 0.9159774884149372, "grad_norm": 354.1971435546875, "learning_rate": 2.618263924855324e-07, "loss": 18.6407, "step": 453440 }, { "epoch": 0.915997689047621, "grad_norm": 318.41448974609375, "learning_rate": 2.617149276602454e-07, "loss": 26.7618, "step": 453450 }, { "epoch": 0.9160178896803048, "grad_norm": 4.6045918464660645, "learning_rate": 2.616034859287847e-07, "loss": 19.446, "step": 453460 }, { "epoch": 0.9160380903129886, "grad_norm": 251.6952362060547, "learning_rate": 2.614920672916943e-07, "loss": 13.2661, "step": 453470 }, { "epoch": 0.9160582909456724, "grad_norm": 194.96925354003906, "learning_rate": 2.61380671749516e-07, "loss": 12.1911, "step": 453480 }, { "epoch": 0.9160784915783562, "grad_norm": 76.82911682128906, "learning_rate": 2.6126929930279486e-07, "loss": 18.5787, "step": 453490 }, { "epoch": 0.91609869221104, "grad_norm": 472.5380859375, "learning_rate": 2.611579499520722e-07, "loss": 24.2646, "step": 453500 }, { "epoch": 0.9161188928437238, "grad_norm": 149.053466796875, "learning_rate": 2.610466236978898e-07, "loss": 21.6734, "step": 453510 }, { "epoch": 0.9161390934764077, "grad_norm": 177.4880828857422, "learning_rate": 2.6093532054079276e-07, "loss": 17.4667, "step": 453520 }, { "epoch": 0.9161592941090915, "grad_norm": 147.05726623535156, "learning_rate": 2.6082404048132114e-07, "loss": 12.7033, "step": 453530 }, { "epoch": 0.9161794947417753, "grad_norm": 410.37933349609375, "learning_rate": 2.6071278352001904e-07, "loss": 18.072, "step": 453540 }, { "epoch": 0.9161996953744591, "grad_norm": 114.84696197509766, "learning_rate": 2.606015496574277e-07, "loss": 20.3603, "step": 453550 }, { "epoch": 0.9162198960071429, "grad_norm": 125.01507568359375, "learning_rate": 2.604903388940899e-07, "loss": 23.2023, "step": 453560 }, { "epoch": 0.9162400966398268, "grad_norm": 164.6056365966797, "learning_rate": 2.60379151230547e-07, "loss": 13.9271, "step": 453570 }, { "epoch": 0.9162602972725106, "grad_norm": 285.9284973144531, "learning_rate": 2.602679866673413e-07, "loss": 18.8505, "step": 453580 }, { "epoch": 0.9162804979051944, "grad_norm": 394.2331848144531, "learning_rate": 2.601568452050146e-07, "loss": 27.4004, "step": 453590 }, { "epoch": 0.9163006985378782, "grad_norm": 301.9587707519531, "learning_rate": 2.600457268441092e-07, "loss": 21.1456, "step": 453600 }, { "epoch": 0.916320899170562, "grad_norm": 417.49981689453125, "learning_rate": 2.599346315851653e-07, "loss": 17.002, "step": 453610 }, { "epoch": 0.9163410998032459, "grad_norm": 1888.8367919921875, "learning_rate": 2.598235594287246e-07, "loss": 53.7972, "step": 453620 }, { "epoch": 0.9163613004359297, "grad_norm": 2.622469186782837, "learning_rate": 2.597125103753301e-07, "loss": 17.3838, "step": 453630 }, { "epoch": 0.9163815010686135, "grad_norm": 417.6042785644531, "learning_rate": 2.596014844255218e-07, "loss": 17.2987, "step": 453640 }, { "epoch": 0.9164017017012973, "grad_norm": 831.579833984375, "learning_rate": 2.594904815798399e-07, "loss": 15.5263, "step": 453650 }, { "epoch": 0.9164219023339811, "grad_norm": 425.7539978027344, "learning_rate": 2.5937950183882734e-07, "loss": 21.3763, "step": 453660 }, { "epoch": 0.916442102966665, "grad_norm": 59.693458557128906, "learning_rate": 2.5926854520302414e-07, "loss": 30.4629, "step": 453670 }, { "epoch": 0.9164623035993488, "grad_norm": 582.9328002929688, "learning_rate": 2.591576116729705e-07, "loss": 27.7754, "step": 453680 }, { "epoch": 0.9164825042320326, "grad_norm": 186.99293518066406, "learning_rate": 2.590467012492076e-07, "loss": 20.5637, "step": 453690 }, { "epoch": 0.9165027048647164, "grad_norm": 334.56634521484375, "learning_rate": 2.589358139322767e-07, "loss": 21.4026, "step": 453700 }, { "epoch": 0.9165229054974002, "grad_norm": 414.5623779296875, "learning_rate": 2.5882494972271746e-07, "loss": 10.8386, "step": 453710 }, { "epoch": 0.9165431061300839, "grad_norm": 290.22833251953125, "learning_rate": 2.587141086210698e-07, "loss": 10.4992, "step": 453720 }, { "epoch": 0.9165633067627678, "grad_norm": 399.252197265625, "learning_rate": 2.586032906278757e-07, "loss": 16.0113, "step": 453730 }, { "epoch": 0.9165835073954516, "grad_norm": 763.1583862304688, "learning_rate": 2.584924957436735e-07, "loss": 19.304, "step": 453740 }, { "epoch": 0.9166037080281354, "grad_norm": 534.4200439453125, "learning_rate": 2.583817239690034e-07, "loss": 13.5357, "step": 453750 }, { "epoch": 0.9166239086608192, "grad_norm": 162.89605712890625, "learning_rate": 2.5827097530440605e-07, "loss": 19.0077, "step": 453760 }, { "epoch": 0.916644109293503, "grad_norm": 399.8823547363281, "learning_rate": 2.581602497504204e-07, "loss": 13.0953, "step": 453770 }, { "epoch": 0.9166643099261869, "grad_norm": 227.8401336669922, "learning_rate": 2.580495473075878e-07, "loss": 12.9742, "step": 453780 }, { "epoch": 0.9166845105588707, "grad_norm": 545.9440307617188, "learning_rate": 2.579388679764455e-07, "loss": 20.8741, "step": 453790 }, { "epoch": 0.9167047111915545, "grad_norm": 400.27392578125, "learning_rate": 2.578282117575343e-07, "loss": 20.9395, "step": 453800 }, { "epoch": 0.9167249118242383, "grad_norm": 30.04183578491211, "learning_rate": 2.577175786513936e-07, "loss": 33.2292, "step": 453810 }, { "epoch": 0.9167451124569221, "grad_norm": 386.2145080566406, "learning_rate": 2.576069686585614e-07, "loss": 11.1608, "step": 453820 }, { "epoch": 0.916765313089606, "grad_norm": 608.4188232421875, "learning_rate": 2.5749638177957834e-07, "loss": 24.0531, "step": 453830 }, { "epoch": 0.9167855137222898, "grad_norm": 363.9738464355469, "learning_rate": 2.5738581801498187e-07, "loss": 25.8142, "step": 453840 }, { "epoch": 0.9168057143549736, "grad_norm": 498.20318603515625, "learning_rate": 2.5727527736531256e-07, "loss": 22.7052, "step": 453850 }, { "epoch": 0.9168259149876574, "grad_norm": 287.67144775390625, "learning_rate": 2.571647598311089e-07, "loss": 28.2199, "step": 453860 }, { "epoch": 0.9168461156203412, "grad_norm": 356.0575866699219, "learning_rate": 2.5705426541290765e-07, "loss": 28.7977, "step": 453870 }, { "epoch": 0.9168663162530251, "grad_norm": 364.90789794921875, "learning_rate": 2.56943794111249e-07, "loss": 13.9711, "step": 453880 }, { "epoch": 0.9168865168857089, "grad_norm": 352.82049560546875, "learning_rate": 2.5683334592667195e-07, "loss": 17.8761, "step": 453890 }, { "epoch": 0.9169067175183927, "grad_norm": 500.10650634765625, "learning_rate": 2.5672292085971276e-07, "loss": 19.1851, "step": 453900 }, { "epoch": 0.9169269181510765, "grad_norm": 606.4287719726562, "learning_rate": 2.5661251891091087e-07, "loss": 29.4943, "step": 453910 }, { "epoch": 0.9169471187837603, "grad_norm": 287.93060302734375, "learning_rate": 2.5650214008080544e-07, "loss": 11.703, "step": 453920 }, { "epoch": 0.9169673194164442, "grad_norm": 257.391357421875, "learning_rate": 2.5639178436993205e-07, "loss": 7.1054, "step": 453930 }, { "epoch": 0.916987520049128, "grad_norm": 299.9127502441406, "learning_rate": 2.5628145177882926e-07, "loss": 14.3462, "step": 453940 }, { "epoch": 0.9170077206818118, "grad_norm": 271.0744934082031, "learning_rate": 2.561711423080365e-07, "loss": 16.4024, "step": 453950 }, { "epoch": 0.9170279213144956, "grad_norm": 438.03692626953125, "learning_rate": 2.5606085595809015e-07, "loss": 19.034, "step": 453960 }, { "epoch": 0.9170481219471794, "grad_norm": 61.587188720703125, "learning_rate": 2.559505927295275e-07, "loss": 13.6311, "step": 453970 }, { "epoch": 0.9170683225798631, "grad_norm": 294.6064758300781, "learning_rate": 2.5584035262288585e-07, "loss": 15.7928, "step": 453980 }, { "epoch": 0.917088523212547, "grad_norm": 643.0828247070312, "learning_rate": 2.557301356387043e-07, "loss": 15.0719, "step": 453990 }, { "epoch": 0.9171087238452308, "grad_norm": 880.5946044921875, "learning_rate": 2.556199417775174e-07, "loss": 20.1695, "step": 454000 }, { "epoch": 0.9171289244779146, "grad_norm": 547.711669921875, "learning_rate": 2.555097710398635e-07, "loss": 16.7549, "step": 454010 }, { "epoch": 0.9171491251105984, "grad_norm": 849.7205200195312, "learning_rate": 2.553996234262801e-07, "loss": 19.2328, "step": 454020 }, { "epoch": 0.9171693257432822, "grad_norm": 251.0746307373047, "learning_rate": 2.5528949893730393e-07, "loss": 11.8855, "step": 454030 }, { "epoch": 0.9171895263759661, "grad_norm": 719.6102905273438, "learning_rate": 2.551793975734701e-07, "loss": 24.0901, "step": 454040 }, { "epoch": 0.9172097270086499, "grad_norm": 897.33056640625, "learning_rate": 2.550693193353171e-07, "loss": 19.3591, "step": 454050 }, { "epoch": 0.9172299276413337, "grad_norm": 1040.27001953125, "learning_rate": 2.5495926422338115e-07, "loss": 31.5606, "step": 454060 }, { "epoch": 0.9172501282740175, "grad_norm": 364.1861572265625, "learning_rate": 2.548492322381968e-07, "loss": 18.4093, "step": 454070 }, { "epoch": 0.9172703289067013, "grad_norm": 278.04632568359375, "learning_rate": 2.547392233803031e-07, "loss": 10.8303, "step": 454080 }, { "epoch": 0.9172905295393852, "grad_norm": 7105.796875, "learning_rate": 2.5462923765023404e-07, "loss": 99.3802, "step": 454090 }, { "epoch": 0.917310730172069, "grad_norm": 169.97467041015625, "learning_rate": 2.5451927504852757e-07, "loss": 15.1717, "step": 454100 }, { "epoch": 0.9173309308047528, "grad_norm": 299.2624206542969, "learning_rate": 2.544093355757171e-07, "loss": 13.147, "step": 454110 }, { "epoch": 0.9173511314374366, "grad_norm": 228.32669067382812, "learning_rate": 2.5429941923234114e-07, "loss": 18.2561, "step": 454120 }, { "epoch": 0.9173713320701204, "grad_norm": 1094.8414306640625, "learning_rate": 2.541895260189342e-07, "loss": 21.1897, "step": 454130 }, { "epoch": 0.9173915327028043, "grad_norm": 386.129638671875, "learning_rate": 2.5407965593603147e-07, "loss": 9.8599, "step": 454140 }, { "epoch": 0.9174117333354881, "grad_norm": 22.72983169555664, "learning_rate": 2.539698089841691e-07, "loss": 28.813, "step": 454150 }, { "epoch": 0.9174319339681719, "grad_norm": 307.68890380859375, "learning_rate": 2.538599851638818e-07, "loss": 13.8743, "step": 454160 }, { "epoch": 0.9174521346008557, "grad_norm": 477.8509216308594, "learning_rate": 2.537501844757062e-07, "loss": 11.512, "step": 454170 }, { "epoch": 0.9174723352335395, "grad_norm": 852.4013061523438, "learning_rate": 2.5364040692017644e-07, "loss": 17.6096, "step": 454180 }, { "epoch": 0.9174925358662234, "grad_norm": 0.0, "learning_rate": 2.5353065249782647e-07, "loss": 19.7076, "step": 454190 }, { "epoch": 0.9175127364989072, "grad_norm": 74.62406921386719, "learning_rate": 2.534209212091937e-07, "loss": 19.8793, "step": 454200 }, { "epoch": 0.917532937131591, "grad_norm": 248.11647033691406, "learning_rate": 2.5331121305481154e-07, "loss": 13.3752, "step": 454210 }, { "epoch": 0.9175531377642748, "grad_norm": 529.7850952148438, "learning_rate": 2.53201528035214e-07, "loss": 17.367, "step": 454220 }, { "epoch": 0.9175733383969585, "grad_norm": 640.7024536132812, "learning_rate": 2.530918661509368e-07, "loss": 16.6216, "step": 454230 }, { "epoch": 0.9175935390296424, "grad_norm": 394.8306579589844, "learning_rate": 2.529822274025151e-07, "loss": 34.8331, "step": 454240 }, { "epoch": 0.9176137396623262, "grad_norm": 270.83837890625, "learning_rate": 2.5287261179048117e-07, "loss": 14.5951, "step": 454250 }, { "epoch": 0.91763394029501, "grad_norm": 38.40205001831055, "learning_rate": 2.5276301931537015e-07, "loss": 11.557, "step": 454260 }, { "epoch": 0.9176541409276938, "grad_norm": 283.07025146484375, "learning_rate": 2.5265344997771726e-07, "loss": 13.8433, "step": 454270 }, { "epoch": 0.9176743415603776, "grad_norm": 469.7500915527344, "learning_rate": 2.525439037780558e-07, "loss": 18.4683, "step": 454280 }, { "epoch": 0.9176945421930615, "grad_norm": 574.3396606445312, "learning_rate": 2.5243438071691826e-07, "loss": 11.2677, "step": 454290 }, { "epoch": 0.9177147428257453, "grad_norm": 244.5355682373047, "learning_rate": 2.523248807948403e-07, "loss": 19.2707, "step": 454300 }, { "epoch": 0.9177349434584291, "grad_norm": 128.16741943359375, "learning_rate": 2.522154040123559e-07, "loss": 31.2086, "step": 454310 }, { "epoch": 0.9177551440911129, "grad_norm": 91.43888854980469, "learning_rate": 2.521059503699974e-07, "loss": 50.0533, "step": 454320 }, { "epoch": 0.9177753447237967, "grad_norm": 523.4545288085938, "learning_rate": 2.5199651986829777e-07, "loss": 9.7866, "step": 454330 }, { "epoch": 0.9177955453564806, "grad_norm": 693.93310546875, "learning_rate": 2.518871125077926e-07, "loss": 27.2713, "step": 454340 }, { "epoch": 0.9178157459891644, "grad_norm": 682.9830932617188, "learning_rate": 2.5177772828901327e-07, "loss": 17.2413, "step": 454350 }, { "epoch": 0.9178359466218482, "grad_norm": 181.77784729003906, "learning_rate": 2.5166836721249254e-07, "loss": 23.461, "step": 454360 }, { "epoch": 0.917856147254532, "grad_norm": 505.5752258300781, "learning_rate": 2.515590292787656e-07, "loss": 14.8428, "step": 454370 }, { "epoch": 0.9178763478872158, "grad_norm": 173.8253173828125, "learning_rate": 2.5144971448836263e-07, "loss": 19.8574, "step": 454380 }, { "epoch": 0.9178965485198997, "grad_norm": 465.9610595703125, "learning_rate": 2.5134042284181927e-07, "loss": 11.3948, "step": 454390 }, { "epoch": 0.9179167491525835, "grad_norm": 168.74269104003906, "learning_rate": 2.5123115433966615e-07, "loss": 16.2418, "step": 454400 }, { "epoch": 0.9179369497852673, "grad_norm": 270.9518737792969, "learning_rate": 2.5112190898243627e-07, "loss": 26.7706, "step": 454410 }, { "epoch": 0.9179571504179511, "grad_norm": 157.22084045410156, "learning_rate": 2.5101268677066247e-07, "loss": 16.8187, "step": 454420 }, { "epoch": 0.9179773510506349, "grad_norm": 1096.5396728515625, "learning_rate": 2.5090348770487604e-07, "loss": 17.5447, "step": 454430 }, { "epoch": 0.9179975516833188, "grad_norm": 61.21339797973633, "learning_rate": 2.50794311785611e-07, "loss": 14.8221, "step": 454440 }, { "epoch": 0.9180177523160026, "grad_norm": 505.9404602050781, "learning_rate": 2.5068515901339794e-07, "loss": 15.3988, "step": 454450 }, { "epoch": 0.9180379529486864, "grad_norm": 299.4237365722656, "learning_rate": 2.505760293887699e-07, "loss": 10.9603, "step": 454460 }, { "epoch": 0.9180581535813702, "grad_norm": 218.8555145263672, "learning_rate": 2.5046692291225803e-07, "loss": 5.6634, "step": 454470 }, { "epoch": 0.918078354214054, "grad_norm": 427.2490539550781, "learning_rate": 2.503578395843936e-07, "loss": 15.9759, "step": 454480 }, { "epoch": 0.9180985548467377, "grad_norm": 130.83230590820312, "learning_rate": 2.502487794057101e-07, "loss": 20.5652, "step": 454490 }, { "epoch": 0.9181187554794216, "grad_norm": 522.4658203125, "learning_rate": 2.5013974237673824e-07, "loss": 21.8345, "step": 454500 }, { "epoch": 0.9181389561121054, "grad_norm": 0.0, "learning_rate": 2.50030728498008e-07, "loss": 18.9641, "step": 454510 }, { "epoch": 0.9181591567447892, "grad_norm": 367.7684020996094, "learning_rate": 2.499217377700519e-07, "loss": 19.4399, "step": 454520 }, { "epoch": 0.918179357377473, "grad_norm": 331.11248779296875, "learning_rate": 2.498127701934022e-07, "loss": 13.3787, "step": 454530 }, { "epoch": 0.9181995580101568, "grad_norm": 173.78582763671875, "learning_rate": 2.49703825768588e-07, "loss": 15.3256, "step": 454540 }, { "epoch": 0.9182197586428407, "grad_norm": 425.071533203125, "learning_rate": 2.49594904496141e-07, "loss": 23.0658, "step": 454550 }, { "epoch": 0.9182399592755245, "grad_norm": 405.1095275878906, "learning_rate": 2.494860063765936e-07, "loss": 31.5979, "step": 454560 }, { "epoch": 0.9182601599082083, "grad_norm": 200.28211975097656, "learning_rate": 2.493771314104743e-07, "loss": 12.9257, "step": 454570 }, { "epoch": 0.9182803605408921, "grad_norm": 636.622802734375, "learning_rate": 2.492682795983148e-07, "loss": 26.3459, "step": 454580 }, { "epoch": 0.918300561173576, "grad_norm": 449.2371520996094, "learning_rate": 2.4915945094064476e-07, "loss": 21.1839, "step": 454590 }, { "epoch": 0.9183207618062598, "grad_norm": 494.1687927246094, "learning_rate": 2.4905064543799706e-07, "loss": 34.0085, "step": 454600 }, { "epoch": 0.9183409624389436, "grad_norm": 625.0794067382812, "learning_rate": 2.4894186309089906e-07, "loss": 28.0983, "step": 454610 }, { "epoch": 0.9183611630716274, "grad_norm": 321.6091613769531, "learning_rate": 2.48833103899882e-07, "loss": 10.3564, "step": 454620 }, { "epoch": 0.9183813637043112, "grad_norm": 101.4336166381836, "learning_rate": 2.487243678654772e-07, "loss": 24.6452, "step": 454630 }, { "epoch": 0.918401564336995, "grad_norm": 319.6427001953125, "learning_rate": 2.486156549882135e-07, "loss": 24.801, "step": 454640 }, { "epoch": 0.9184217649696789, "grad_norm": 511.2293701171875, "learning_rate": 2.485069652686195e-07, "loss": 21.2323, "step": 454650 }, { "epoch": 0.9184419656023627, "grad_norm": 1020.9444580078125, "learning_rate": 2.4839829870722756e-07, "loss": 20.0905, "step": 454660 }, { "epoch": 0.9184621662350465, "grad_norm": 81.75325012207031, "learning_rate": 2.482896553045661e-07, "loss": 14.3172, "step": 454670 }, { "epoch": 0.9184823668677303, "grad_norm": 431.46844482421875, "learning_rate": 2.4818103506116355e-07, "loss": 19.0111, "step": 454680 }, { "epoch": 0.9185025675004141, "grad_norm": 302.5388488769531, "learning_rate": 2.4807243797755064e-07, "loss": 17.3512, "step": 454690 }, { "epoch": 0.918522768133098, "grad_norm": 289.9342041015625, "learning_rate": 2.479638640542564e-07, "loss": 21.5833, "step": 454700 }, { "epoch": 0.9185429687657818, "grad_norm": 292.45037841796875, "learning_rate": 2.478553132918099e-07, "loss": 15.3687, "step": 454710 }, { "epoch": 0.9185631693984656, "grad_norm": 312.3753356933594, "learning_rate": 2.477467856907401e-07, "loss": 21.3034, "step": 454720 }, { "epoch": 0.9185833700311494, "grad_norm": 407.63671875, "learning_rate": 2.4763828125157654e-07, "loss": 21.7776, "step": 454730 }, { "epoch": 0.9186035706638332, "grad_norm": 203.3709716796875, "learning_rate": 2.4752979997484774e-07, "loss": 27.606, "step": 454740 }, { "epoch": 0.918623771296517, "grad_norm": 219.9977264404297, "learning_rate": 2.474213418610816e-07, "loss": 10.9427, "step": 454750 }, { "epoch": 0.9186439719292008, "grad_norm": 118.15936279296875, "learning_rate": 2.4731290691080766e-07, "loss": 14.6467, "step": 454760 }, { "epoch": 0.9186641725618846, "grad_norm": 804.6563110351562, "learning_rate": 2.472044951245539e-07, "loss": 14.8232, "step": 454770 }, { "epoch": 0.9186843731945684, "grad_norm": 118.620849609375, "learning_rate": 2.470961065028499e-07, "loss": 8.4102, "step": 454780 }, { "epoch": 0.9187045738272522, "grad_norm": 88.24150085449219, "learning_rate": 2.4698774104622235e-07, "loss": 16.5455, "step": 454790 }, { "epoch": 0.918724774459936, "grad_norm": 339.7093200683594, "learning_rate": 2.4687939875519984e-07, "loss": 12.3436, "step": 454800 }, { "epoch": 0.9187449750926199, "grad_norm": 1048.246826171875, "learning_rate": 2.4677107963031134e-07, "loss": 22.8728, "step": 454810 }, { "epoch": 0.9187651757253037, "grad_norm": 0.4774147868156433, "learning_rate": 2.4666278367208417e-07, "loss": 9.4854, "step": 454820 }, { "epoch": 0.9187853763579875, "grad_norm": 353.9394226074219, "learning_rate": 2.465545108810452e-07, "loss": 12.8216, "step": 454830 }, { "epoch": 0.9188055769906713, "grad_norm": 17.323734283447266, "learning_rate": 2.464462612577234e-07, "loss": 20.1536, "step": 454840 }, { "epoch": 0.9188257776233552, "grad_norm": 464.7160339355469, "learning_rate": 2.463380348026467e-07, "loss": 19.4973, "step": 454850 }, { "epoch": 0.918845978256039, "grad_norm": 355.2198486328125, "learning_rate": 2.4622983151634083e-07, "loss": 22.1437, "step": 454860 }, { "epoch": 0.9188661788887228, "grad_norm": 98.9280776977539, "learning_rate": 2.461216513993342e-07, "loss": 7.2628, "step": 454870 }, { "epoch": 0.9188863795214066, "grad_norm": 291.2118225097656, "learning_rate": 2.460134944521547e-07, "loss": 15.7866, "step": 454880 }, { "epoch": 0.9189065801540904, "grad_norm": 68.61520385742188, "learning_rate": 2.459053606753292e-07, "loss": 27.5811, "step": 454890 }, { "epoch": 0.9189267807867743, "grad_norm": 555.388671875, "learning_rate": 2.457972500693834e-07, "loss": 27.5479, "step": 454900 }, { "epoch": 0.9189469814194581, "grad_norm": 220.7833251953125, "learning_rate": 2.456891626348451e-07, "loss": 11.7957, "step": 454910 }, { "epoch": 0.9189671820521419, "grad_norm": 337.0918884277344, "learning_rate": 2.455810983722429e-07, "loss": 21.3151, "step": 454920 }, { "epoch": 0.9189873826848257, "grad_norm": 540.9188842773438, "learning_rate": 2.4547305728210015e-07, "loss": 19.5621, "step": 454930 }, { "epoch": 0.9190075833175095, "grad_norm": 21.81239891052246, "learning_rate": 2.453650393649448e-07, "loss": 19.4124, "step": 454940 }, { "epoch": 0.9190277839501934, "grad_norm": 145.65936279296875, "learning_rate": 2.4525704462130485e-07, "loss": 10.6751, "step": 454950 }, { "epoch": 0.9190479845828772, "grad_norm": 368.0899658203125, "learning_rate": 2.4514907305170476e-07, "loss": 13.6209, "step": 454960 }, { "epoch": 0.919068185215561, "grad_norm": 228.02255249023438, "learning_rate": 2.4504112465667085e-07, "loss": 15.9716, "step": 454970 }, { "epoch": 0.9190883858482448, "grad_norm": 207.96987915039062, "learning_rate": 2.44933199436731e-07, "loss": 9.5194, "step": 454980 }, { "epoch": 0.9191085864809286, "grad_norm": 123.552490234375, "learning_rate": 2.448252973924087e-07, "loss": 18.464, "step": 454990 }, { "epoch": 0.9191287871136123, "grad_norm": 89.00115203857422, "learning_rate": 2.447174185242324e-07, "loss": 16.1351, "step": 455000 }, { "epoch": 0.9191489877462962, "grad_norm": 24.298992156982422, "learning_rate": 2.446095628327261e-07, "loss": 20.8026, "step": 455010 }, { "epoch": 0.91916918837898, "grad_norm": 263.0375671386719, "learning_rate": 2.4450173031841607e-07, "loss": 18.0441, "step": 455020 }, { "epoch": 0.9191893890116638, "grad_norm": 294.327392578125, "learning_rate": 2.4439392098182804e-07, "loss": 19.281, "step": 455030 }, { "epoch": 0.9192095896443476, "grad_norm": 4.164112091064453, "learning_rate": 2.442861348234865e-07, "loss": 20.7177, "step": 455040 }, { "epoch": 0.9192297902770314, "grad_norm": 289.1728210449219, "learning_rate": 2.4417837184391833e-07, "loss": 18.8782, "step": 455050 }, { "epoch": 0.9192499909097153, "grad_norm": 171.1140899658203, "learning_rate": 2.4407063204364703e-07, "loss": 16.9614, "step": 455060 }, { "epoch": 0.9192701915423991, "grad_norm": 142.26199340820312, "learning_rate": 2.4396291542319985e-07, "loss": 9.5314, "step": 455070 }, { "epoch": 0.9192903921750829, "grad_norm": 285.46533203125, "learning_rate": 2.438552219831003e-07, "loss": 19.4052, "step": 455080 }, { "epoch": 0.9193105928077667, "grad_norm": 98.38275909423828, "learning_rate": 2.43747551723873e-07, "loss": 13.5849, "step": 455090 }, { "epoch": 0.9193307934404505, "grad_norm": 71.43067169189453, "learning_rate": 2.4363990464604357e-07, "loss": 13.79, "step": 455100 }, { "epoch": 0.9193509940731344, "grad_norm": 370.7507019042969, "learning_rate": 2.435322807501367e-07, "loss": 11.0951, "step": 455110 }, { "epoch": 0.9193711947058182, "grad_norm": 230.6715850830078, "learning_rate": 2.4342468003667576e-07, "loss": 13.7054, "step": 455120 }, { "epoch": 0.919391395338502, "grad_norm": 126.7361068725586, "learning_rate": 2.4331710250618647e-07, "loss": 13.2743, "step": 455130 }, { "epoch": 0.9194115959711858, "grad_norm": 314.3231201171875, "learning_rate": 2.432095481591934e-07, "loss": 15.3865, "step": 455140 }, { "epoch": 0.9194317966038696, "grad_norm": 329.8556213378906, "learning_rate": 2.4310201699621896e-07, "loss": 6.9519, "step": 455150 }, { "epoch": 0.9194519972365535, "grad_norm": 1005.0066528320312, "learning_rate": 2.429945090177888e-07, "loss": 16.0441, "step": 455160 }, { "epoch": 0.9194721978692373, "grad_norm": 642.8953247070312, "learning_rate": 2.4288702422442633e-07, "loss": 21.0964, "step": 455170 }, { "epoch": 0.9194923985019211, "grad_norm": 365.2342224121094, "learning_rate": 2.4277956261665624e-07, "loss": 23.2442, "step": 455180 }, { "epoch": 0.9195125991346049, "grad_norm": 618.9346923828125, "learning_rate": 2.426721241950003e-07, "loss": 20.1788, "step": 455190 }, { "epoch": 0.9195327997672887, "grad_norm": 18.485822677612305, "learning_rate": 2.4256470895998363e-07, "loss": 10.9366, "step": 455200 }, { "epoch": 0.9195530003999726, "grad_norm": 779.13916015625, "learning_rate": 2.4245731691213137e-07, "loss": 21.7145, "step": 455210 }, { "epoch": 0.9195732010326564, "grad_norm": 243.8010711669922, "learning_rate": 2.423499480519631e-07, "loss": 12.4242, "step": 455220 }, { "epoch": 0.9195934016653402, "grad_norm": 560.136474609375, "learning_rate": 2.4224260238000454e-07, "loss": 17.1233, "step": 455230 }, { "epoch": 0.919613602298024, "grad_norm": 554.8250122070312, "learning_rate": 2.421352798967791e-07, "loss": 21.4487, "step": 455240 }, { "epoch": 0.9196338029307078, "grad_norm": 26.497678756713867, "learning_rate": 2.420279806028092e-07, "loss": 16.912, "step": 455250 }, { "epoch": 0.9196540035633916, "grad_norm": 335.10186767578125, "learning_rate": 2.4192070449861717e-07, "loss": 17.4943, "step": 455260 }, { "epoch": 0.9196742041960754, "grad_norm": 429.57232666015625, "learning_rate": 2.418134515847276e-07, "loss": 16.5135, "step": 455270 }, { "epoch": 0.9196944048287592, "grad_norm": 488.39642333984375, "learning_rate": 2.417062218616617e-07, "loss": 18.5194, "step": 455280 }, { "epoch": 0.919714605461443, "grad_norm": 113.37300872802734, "learning_rate": 2.415990153299419e-07, "loss": 9.4051, "step": 455290 }, { "epoch": 0.9197348060941268, "grad_norm": 277.68145751953125, "learning_rate": 2.414918319900922e-07, "loss": 15.3822, "step": 455300 }, { "epoch": 0.9197550067268107, "grad_norm": 112.81552124023438, "learning_rate": 2.413846718426338e-07, "loss": 8.4589, "step": 455310 }, { "epoch": 0.9197752073594945, "grad_norm": 36.12300491333008, "learning_rate": 2.412775348880897e-07, "loss": 11.6502, "step": 455320 }, { "epoch": 0.9197954079921783, "grad_norm": 292.3298034667969, "learning_rate": 2.4117042112698107e-07, "loss": 22.0428, "step": 455330 }, { "epoch": 0.9198156086248621, "grad_norm": 442.1577453613281, "learning_rate": 2.410633305598309e-07, "loss": 10.4833, "step": 455340 }, { "epoch": 0.9198358092575459, "grad_norm": 704.8218383789062, "learning_rate": 2.4095626318716146e-07, "loss": 17.864, "step": 455350 }, { "epoch": 0.9198560098902298, "grad_norm": 6.83566951751709, "learning_rate": 2.40849219009493e-07, "loss": 16.1764, "step": 455360 }, { "epoch": 0.9198762105229136, "grad_norm": 534.9774780273438, "learning_rate": 2.407421980273489e-07, "loss": 15.982, "step": 455370 }, { "epoch": 0.9198964111555974, "grad_norm": 246.0690460205078, "learning_rate": 2.406352002412499e-07, "loss": 12.9856, "step": 455380 }, { "epoch": 0.9199166117882812, "grad_norm": 541.630126953125, "learning_rate": 2.4052822565171775e-07, "loss": 35.701, "step": 455390 }, { "epoch": 0.919936812420965, "grad_norm": 363.7542419433594, "learning_rate": 2.404212742592743e-07, "loss": 11.3771, "step": 455400 }, { "epoch": 0.9199570130536489, "grad_norm": 228.14138793945312, "learning_rate": 2.4031434606443914e-07, "loss": 18.2189, "step": 455410 }, { "epoch": 0.9199772136863327, "grad_norm": 1217.557861328125, "learning_rate": 2.4020744106773573e-07, "loss": 16.9061, "step": 455420 }, { "epoch": 0.9199974143190165, "grad_norm": 165.05870056152344, "learning_rate": 2.401005592696837e-07, "loss": 5.6709, "step": 455430 }, { "epoch": 0.9200176149517003, "grad_norm": 351.95660400390625, "learning_rate": 2.399937006708036e-07, "loss": 15.8542, "step": 455440 }, { "epoch": 0.9200378155843841, "grad_norm": 201.84127807617188, "learning_rate": 2.3988686527161686e-07, "loss": 10.515, "step": 455450 }, { "epoch": 0.920058016217068, "grad_norm": 519.5714111328125, "learning_rate": 2.3978005307264517e-07, "loss": 15.5738, "step": 455460 }, { "epoch": 0.9200782168497518, "grad_norm": 326.5455627441406, "learning_rate": 2.396732640744076e-07, "loss": 16.2957, "step": 455470 }, { "epoch": 0.9200984174824356, "grad_norm": 346.3192443847656, "learning_rate": 2.395664982774243e-07, "loss": 10.8901, "step": 455480 }, { "epoch": 0.9201186181151194, "grad_norm": 306.8151550292969, "learning_rate": 2.3945975568221814e-07, "loss": 12.2765, "step": 455490 }, { "epoch": 0.9201388187478032, "grad_norm": 392.6286315917969, "learning_rate": 2.3935303628930705e-07, "loss": 19.6382, "step": 455500 }, { "epoch": 0.9201590193804869, "grad_norm": 130.42257690429688, "learning_rate": 2.392463400992112e-07, "loss": 26.4354, "step": 455510 }, { "epoch": 0.9201792200131708, "grad_norm": 803.9397583007812, "learning_rate": 2.3913966711245185e-07, "loss": 24.5252, "step": 455520 }, { "epoch": 0.9201994206458546, "grad_norm": 530.8009033203125, "learning_rate": 2.390330173295491e-07, "loss": 17.6739, "step": 455530 }, { "epoch": 0.9202196212785384, "grad_norm": 526.763671875, "learning_rate": 2.389263907510209e-07, "loss": 21.4294, "step": 455540 }, { "epoch": 0.9202398219112222, "grad_norm": 629.4849853515625, "learning_rate": 2.388197873773879e-07, "loss": 19.7043, "step": 455550 }, { "epoch": 0.920260022543906, "grad_norm": 201.6699981689453, "learning_rate": 2.387132072091708e-07, "loss": 17.9511, "step": 455560 }, { "epoch": 0.9202802231765899, "grad_norm": 579.7809448242188, "learning_rate": 2.3860665024688757e-07, "loss": 18.4522, "step": 455570 }, { "epoch": 0.9203004238092737, "grad_norm": 169.97409057617188, "learning_rate": 2.3850011649105774e-07, "loss": 16.2272, "step": 455580 }, { "epoch": 0.9203206244419575, "grad_norm": 380.0137023925781, "learning_rate": 2.3839360594220094e-07, "loss": 8.5757, "step": 455590 }, { "epoch": 0.9203408250746413, "grad_norm": 528.6257934570312, "learning_rate": 2.3828711860083676e-07, "loss": 11.0365, "step": 455600 }, { "epoch": 0.9203610257073251, "grad_norm": 328.374755859375, "learning_rate": 2.3818065446748306e-07, "loss": 15.4559, "step": 455610 }, { "epoch": 0.920381226340009, "grad_norm": 558.6610717773438, "learning_rate": 2.380742135426589e-07, "loss": 28.1747, "step": 455620 }, { "epoch": 0.9204014269726928, "grad_norm": 217.7898712158203, "learning_rate": 2.3796779582688444e-07, "loss": 28.7116, "step": 455630 }, { "epoch": 0.9204216276053766, "grad_norm": 835.9548950195312, "learning_rate": 2.3786140132067703e-07, "loss": 14.7242, "step": 455640 }, { "epoch": 0.9204418282380604, "grad_norm": 386.291748046875, "learning_rate": 2.3775503002455514e-07, "loss": 17.8009, "step": 455650 }, { "epoch": 0.9204620288707442, "grad_norm": 310.6231384277344, "learning_rate": 2.3764868193903835e-07, "loss": 36.5693, "step": 455660 }, { "epoch": 0.9204822295034281, "grad_norm": 138.1139373779297, "learning_rate": 2.3754235706464457e-07, "loss": 14.5568, "step": 455670 }, { "epoch": 0.9205024301361119, "grad_norm": 622.2490844726562, "learning_rate": 2.3743605540189063e-07, "loss": 23.2112, "step": 455680 }, { "epoch": 0.9205226307687957, "grad_norm": 221.71322631835938, "learning_rate": 2.3732977695129612e-07, "loss": 17.7805, "step": 455690 }, { "epoch": 0.9205428314014795, "grad_norm": 127.730712890625, "learning_rate": 2.3722352171337836e-07, "loss": 22.0893, "step": 455700 }, { "epoch": 0.9205630320341633, "grad_norm": 697.8600463867188, "learning_rate": 2.3711728968865643e-07, "loss": 21.791, "step": 455710 }, { "epoch": 0.9205832326668472, "grad_norm": 485.99853515625, "learning_rate": 2.3701108087764657e-07, "loss": 9.2866, "step": 455720 }, { "epoch": 0.920603433299531, "grad_norm": 415.9108581542969, "learning_rate": 2.3690489528086668e-07, "loss": 10.3244, "step": 455730 }, { "epoch": 0.9206236339322148, "grad_norm": 396.4830322265625, "learning_rate": 2.367987328988347e-07, "loss": 12.8008, "step": 455740 }, { "epoch": 0.9206438345648986, "grad_norm": 450.2491760253906, "learning_rate": 2.366925937320691e-07, "loss": 23.0683, "step": 455750 }, { "epoch": 0.9206640351975824, "grad_norm": 267.2088623046875, "learning_rate": 2.36586477781085e-07, "loss": 25.6832, "step": 455760 }, { "epoch": 0.9206842358302662, "grad_norm": 849.0910034179688, "learning_rate": 2.3648038504640036e-07, "loss": 15.9315, "step": 455770 }, { "epoch": 0.92070443646295, "grad_norm": 1106.893798828125, "learning_rate": 2.3637431552853363e-07, "loss": 17.4516, "step": 455780 }, { "epoch": 0.9207246370956338, "grad_norm": 865.7428588867188, "learning_rate": 2.362682692280005e-07, "loss": 26.9862, "step": 455790 }, { "epoch": 0.9207448377283176, "grad_norm": 594.6658325195312, "learning_rate": 2.361622461453178e-07, "loss": 19.4022, "step": 455800 }, { "epoch": 0.9207650383610014, "grad_norm": 248.6200714111328, "learning_rate": 2.3605624628100178e-07, "loss": 15.1621, "step": 455810 }, { "epoch": 0.9207852389936853, "grad_norm": 509.28375244140625, "learning_rate": 2.3595026963557145e-07, "loss": 18.1621, "step": 455820 }, { "epoch": 0.9208054396263691, "grad_norm": 231.5879669189453, "learning_rate": 2.3584431620954085e-07, "loss": 17.4094, "step": 455830 }, { "epoch": 0.9208256402590529, "grad_norm": 151.7647705078125, "learning_rate": 2.357383860034268e-07, "loss": 16.7027, "step": 455840 }, { "epoch": 0.9208458408917367, "grad_norm": 314.00775146484375, "learning_rate": 2.3563247901774666e-07, "loss": 11.1552, "step": 455850 }, { "epoch": 0.9208660415244205, "grad_norm": 340.83489990234375, "learning_rate": 2.3552659525301557e-07, "loss": 13.9794, "step": 455860 }, { "epoch": 0.9208862421571044, "grad_norm": 376.7004089355469, "learning_rate": 2.354207347097498e-07, "loss": 19.8883, "step": 455870 }, { "epoch": 0.9209064427897882, "grad_norm": 568.572265625, "learning_rate": 2.3531489738846613e-07, "loss": 28.6371, "step": 455880 }, { "epoch": 0.920926643422472, "grad_norm": 324.4543762207031, "learning_rate": 2.3520908328968027e-07, "loss": 21.9086, "step": 455890 }, { "epoch": 0.9209468440551558, "grad_norm": 622.8432006835938, "learning_rate": 2.351032924139063e-07, "loss": 17.3053, "step": 455900 }, { "epoch": 0.9209670446878396, "grad_norm": 20.084917068481445, "learning_rate": 2.349975247616615e-07, "loss": 22.7659, "step": 455910 }, { "epoch": 0.9209872453205235, "grad_norm": 2.691534996032715, "learning_rate": 2.3489178033345994e-07, "loss": 14.0358, "step": 455920 }, { "epoch": 0.9210074459532073, "grad_norm": 561.95361328125, "learning_rate": 2.34786059129819e-07, "loss": 15.8221, "step": 455930 }, { "epoch": 0.9210276465858911, "grad_norm": 415.3448791503906, "learning_rate": 2.3468036115125215e-07, "loss": 15.3055, "step": 455940 }, { "epoch": 0.9210478472185749, "grad_norm": 369.7100830078125, "learning_rate": 2.3457468639827563e-07, "loss": 22.8397, "step": 455950 }, { "epoch": 0.9210680478512587, "grad_norm": 155.63955688476562, "learning_rate": 2.344690348714046e-07, "loss": 14.0282, "step": 455960 }, { "epoch": 0.9210882484839426, "grad_norm": 211.77499389648438, "learning_rate": 2.3436340657115253e-07, "loss": 25.2555, "step": 455970 }, { "epoch": 0.9211084491166264, "grad_norm": 341.98150634765625, "learning_rate": 2.3425780149803623e-07, "loss": 8.1026, "step": 455980 }, { "epoch": 0.9211286497493102, "grad_norm": 704.5994873046875, "learning_rate": 2.3415221965256807e-07, "loss": 26.6054, "step": 455990 }, { "epoch": 0.921148850381994, "grad_norm": 0.6574216485023499, "learning_rate": 2.3404666103526542e-07, "loss": 18.6659, "step": 456000 }, { "epoch": 0.9211690510146778, "grad_norm": 143.26748657226562, "learning_rate": 2.3394112564664062e-07, "loss": 15.0257, "step": 456010 }, { "epoch": 0.9211892516473615, "grad_norm": 477.011474609375, "learning_rate": 2.338356134872083e-07, "loss": 18.866, "step": 456020 }, { "epoch": 0.9212094522800454, "grad_norm": 669.2011108398438, "learning_rate": 2.3373012455748356e-07, "loss": 23.3207, "step": 456030 }, { "epoch": 0.9212296529127292, "grad_norm": 387.2029724121094, "learning_rate": 2.3362465885798046e-07, "loss": 19.2357, "step": 456040 }, { "epoch": 0.921249853545413, "grad_norm": 1630.5396728515625, "learning_rate": 2.3351921638921193e-07, "loss": 23.0036, "step": 456050 }, { "epoch": 0.9212700541780968, "grad_norm": 55.501487731933594, "learning_rate": 2.3341379715169254e-07, "loss": 8.9039, "step": 456060 }, { "epoch": 0.9212902548107806, "grad_norm": 258.55023193359375, "learning_rate": 2.33308401145938e-07, "loss": 8.9358, "step": 456070 }, { "epoch": 0.9213104554434645, "grad_norm": 803.1054077148438, "learning_rate": 2.3320302837245846e-07, "loss": 20.5455, "step": 456080 }, { "epoch": 0.9213306560761483, "grad_norm": 273.6131286621094, "learning_rate": 2.3309767883176903e-07, "loss": 28.0867, "step": 456090 }, { "epoch": 0.9213508567088321, "grad_norm": 248.03652954101562, "learning_rate": 2.3299235252438434e-07, "loss": 30.396, "step": 456100 }, { "epoch": 0.9213710573415159, "grad_norm": 518.1532592773438, "learning_rate": 2.3288704945081675e-07, "loss": 24.2605, "step": 456110 }, { "epoch": 0.9213912579741997, "grad_norm": 412.0128479003906, "learning_rate": 2.327817696115786e-07, "loss": 23.6396, "step": 456120 }, { "epoch": 0.9214114586068836, "grad_norm": 296.6305236816406, "learning_rate": 2.3267651300718397e-07, "loss": 11.4003, "step": 456130 }, { "epoch": 0.9214316592395674, "grad_norm": 601.3193359375, "learning_rate": 2.325712796381474e-07, "loss": 34.8444, "step": 456140 }, { "epoch": 0.9214518598722512, "grad_norm": 1319.228515625, "learning_rate": 2.3246606950497851e-07, "loss": 17.4915, "step": 456150 }, { "epoch": 0.921472060504935, "grad_norm": 450.4251708984375, "learning_rate": 2.3236088260819188e-07, "loss": 19.1523, "step": 456160 }, { "epoch": 0.9214922611376188, "grad_norm": 232.65257263183594, "learning_rate": 2.3225571894830047e-07, "loss": 32.4943, "step": 456170 }, { "epoch": 0.9215124617703027, "grad_norm": 534.1532592773438, "learning_rate": 2.3215057852581712e-07, "loss": 22.4241, "step": 456180 }, { "epoch": 0.9215326624029865, "grad_norm": 448.429443359375, "learning_rate": 2.3204546134125207e-07, "loss": 13.9421, "step": 456190 }, { "epoch": 0.9215528630356703, "grad_norm": 756.3300170898438, "learning_rate": 2.319403673951204e-07, "loss": 30.2919, "step": 456200 }, { "epoch": 0.9215730636683541, "grad_norm": 176.592041015625, "learning_rate": 2.3183529668793282e-07, "loss": 16.056, "step": 456210 }, { "epoch": 0.9215932643010379, "grad_norm": 563.3243408203125, "learning_rate": 2.3173024922020114e-07, "loss": 27.7949, "step": 456220 }, { "epoch": 0.9216134649337218, "grad_norm": 545.1439819335938, "learning_rate": 2.3162522499243833e-07, "loss": 18.0718, "step": 456230 }, { "epoch": 0.9216336655664056, "grad_norm": 2.2020576000213623, "learning_rate": 2.3152022400515561e-07, "loss": 9.7643, "step": 456240 }, { "epoch": 0.9216538661990894, "grad_norm": 372.8847351074219, "learning_rate": 2.314152462588659e-07, "loss": 14.4089, "step": 456250 }, { "epoch": 0.9216740668317732, "grad_norm": 359.3384094238281, "learning_rate": 2.3131029175407883e-07, "loss": 16.9152, "step": 456260 }, { "epoch": 0.921694267464457, "grad_norm": 105.58753204345703, "learning_rate": 2.3120536049130727e-07, "loss": 15.5293, "step": 456270 }, { "epoch": 0.9217144680971407, "grad_norm": 756.6207885742188, "learning_rate": 2.3110045247106305e-07, "loss": 17.3203, "step": 456280 }, { "epoch": 0.9217346687298246, "grad_norm": 200.3162384033203, "learning_rate": 2.3099556769385578e-07, "loss": 20.6643, "step": 456290 }, { "epoch": 0.9217548693625084, "grad_norm": 505.3739318847656, "learning_rate": 2.3089070616019838e-07, "loss": 26.1185, "step": 456300 }, { "epoch": 0.9217750699951922, "grad_norm": 161.13502502441406, "learning_rate": 2.3078586787060098e-07, "loss": 18.3574, "step": 456310 }, { "epoch": 0.921795270627876, "grad_norm": 254.5853729248047, "learning_rate": 2.306810528255754e-07, "loss": 20.247, "step": 456320 }, { "epoch": 0.9218154712605598, "grad_norm": 281.9202575683594, "learning_rate": 2.3057626102563125e-07, "loss": 18.4267, "step": 456330 }, { "epoch": 0.9218356718932437, "grad_norm": 411.31146240234375, "learning_rate": 2.3047149247127975e-07, "loss": 19.3035, "step": 456340 }, { "epoch": 0.9218558725259275, "grad_norm": 285.017822265625, "learning_rate": 2.3036674716303277e-07, "loss": 11.5934, "step": 456350 }, { "epoch": 0.9218760731586113, "grad_norm": 234.5925750732422, "learning_rate": 2.3026202510139928e-07, "loss": 26.1673, "step": 456360 }, { "epoch": 0.9218962737912951, "grad_norm": 180.01597595214844, "learning_rate": 2.3015732628688948e-07, "loss": 20.587, "step": 456370 }, { "epoch": 0.921916474423979, "grad_norm": 229.5282745361328, "learning_rate": 2.300526507200146e-07, "loss": 20.5006, "step": 456380 }, { "epoch": 0.9219366750566628, "grad_norm": 141.17848205566406, "learning_rate": 2.2994799840128533e-07, "loss": 10.3158, "step": 456390 }, { "epoch": 0.9219568756893466, "grad_norm": 196.68496704101562, "learning_rate": 2.2984336933121076e-07, "loss": 19.1464, "step": 456400 }, { "epoch": 0.9219770763220304, "grad_norm": 318.13580322265625, "learning_rate": 2.2973876351030046e-07, "loss": 20.5453, "step": 456410 }, { "epoch": 0.9219972769547142, "grad_norm": 117.87385559082031, "learning_rate": 2.2963418093906453e-07, "loss": 13.3332, "step": 456420 }, { "epoch": 0.922017477587398, "grad_norm": 398.493896484375, "learning_rate": 2.2952962161801485e-07, "loss": 20.5647, "step": 456430 }, { "epoch": 0.9220376782200819, "grad_norm": 551.0594482421875, "learning_rate": 2.2942508554765764e-07, "loss": 27.3807, "step": 456440 }, { "epoch": 0.9220578788527657, "grad_norm": 418.9258117675781, "learning_rate": 2.2932057272850416e-07, "loss": 22.1432, "step": 456450 }, { "epoch": 0.9220780794854495, "grad_norm": 318.01776123046875, "learning_rate": 2.2921608316106402e-07, "loss": 13.1443, "step": 456460 }, { "epoch": 0.9220982801181333, "grad_norm": 500.9718322753906, "learning_rate": 2.2911161684584626e-07, "loss": 18.2248, "step": 456470 }, { "epoch": 0.9221184807508171, "grad_norm": 678.5086059570312, "learning_rate": 2.290071737833588e-07, "loss": 12.8109, "step": 456480 }, { "epoch": 0.922138681383501, "grad_norm": 999.7238159179688, "learning_rate": 2.2890275397411288e-07, "loss": 27.8825, "step": 456490 }, { "epoch": 0.9221588820161848, "grad_norm": 319.5143127441406, "learning_rate": 2.287983574186159e-07, "loss": 17.5575, "step": 456500 }, { "epoch": 0.9221790826488686, "grad_norm": 279.6608581542969, "learning_rate": 2.2869398411737687e-07, "loss": 8.8441, "step": 456510 }, { "epoch": 0.9221992832815524, "grad_norm": 411.6119079589844, "learning_rate": 2.2858963407090484e-07, "loss": 16.573, "step": 456520 }, { "epoch": 0.9222194839142362, "grad_norm": 264.779541015625, "learning_rate": 2.2848530727970775e-07, "loss": 13.1386, "step": 456530 }, { "epoch": 0.92223968454692, "grad_norm": 456.6824035644531, "learning_rate": 2.2838100374429518e-07, "loss": 26.1654, "step": 456540 }, { "epoch": 0.9222598851796038, "grad_norm": 541.8452758789062, "learning_rate": 2.2827672346517448e-07, "loss": 19.0031, "step": 456550 }, { "epoch": 0.9222800858122876, "grad_norm": 209.32427978515625, "learning_rate": 2.2817246644285472e-07, "loss": 13.9763, "step": 456560 }, { "epoch": 0.9223002864449714, "grad_norm": 547.7611083984375, "learning_rate": 2.2806823267784327e-07, "loss": 13.4791, "step": 456570 }, { "epoch": 0.9223204870776552, "grad_norm": 513.3222045898438, "learning_rate": 2.2796402217064806e-07, "loss": 43.8032, "step": 456580 }, { "epoch": 0.9223406877103391, "grad_norm": 145.3455810546875, "learning_rate": 2.2785983492177867e-07, "loss": 35.2751, "step": 456590 }, { "epoch": 0.9223608883430229, "grad_norm": 489.8966064453125, "learning_rate": 2.2775567093174022e-07, "loss": 38.8586, "step": 456600 }, { "epoch": 0.9223810889757067, "grad_norm": 584.9359130859375, "learning_rate": 2.2765153020104292e-07, "loss": 16.7876, "step": 456610 }, { "epoch": 0.9224012896083905, "grad_norm": 157.879638671875, "learning_rate": 2.27547412730193e-07, "loss": 22.6318, "step": 456620 }, { "epoch": 0.9224214902410743, "grad_norm": 298.29937744140625, "learning_rate": 2.274433185196978e-07, "loss": 26.4337, "step": 456630 }, { "epoch": 0.9224416908737582, "grad_norm": 90.5885238647461, "learning_rate": 2.2733924757006531e-07, "loss": 16.6558, "step": 456640 }, { "epoch": 0.922461891506442, "grad_norm": 346.80889892578125, "learning_rate": 2.2723519988180232e-07, "loss": 27.9793, "step": 456650 }, { "epoch": 0.9224820921391258, "grad_norm": 355.554931640625, "learning_rate": 2.2713117545541618e-07, "loss": 8.9129, "step": 456660 }, { "epoch": 0.9225022927718096, "grad_norm": 307.0997619628906, "learning_rate": 2.270271742914132e-07, "loss": 19.7816, "step": 456670 }, { "epoch": 0.9225224934044934, "grad_norm": 178.32456970214844, "learning_rate": 2.269231963903018e-07, "loss": 13.3781, "step": 456680 }, { "epoch": 0.9225426940371773, "grad_norm": 317.56158447265625, "learning_rate": 2.2681924175258773e-07, "loss": 14.2739, "step": 456690 }, { "epoch": 0.9225628946698611, "grad_norm": 227.15536499023438, "learning_rate": 2.2671531037877724e-07, "loss": 7.1665, "step": 456700 }, { "epoch": 0.9225830953025449, "grad_norm": 186.4401397705078, "learning_rate": 2.2661140226937773e-07, "loss": 15.9246, "step": 456710 }, { "epoch": 0.9226032959352287, "grad_norm": 461.26959228515625, "learning_rate": 2.2650751742489542e-07, "loss": 16.1091, "step": 456720 }, { "epoch": 0.9226234965679125, "grad_norm": 550.1011352539062, "learning_rate": 2.2640365584583602e-07, "loss": 12.9847, "step": 456730 }, { "epoch": 0.9226436972005964, "grad_norm": 301.6156005859375, "learning_rate": 2.2629981753270636e-07, "loss": 25.3048, "step": 456740 }, { "epoch": 0.9226638978332802, "grad_norm": 535.1401977539062, "learning_rate": 2.2619600248601327e-07, "loss": 17.4736, "step": 456750 }, { "epoch": 0.922684098465964, "grad_norm": 250.36090087890625, "learning_rate": 2.2609221070626132e-07, "loss": 22.4407, "step": 456760 }, { "epoch": 0.9227042990986478, "grad_norm": 615.8826293945312, "learning_rate": 2.259884421939562e-07, "loss": 15.605, "step": 456770 }, { "epoch": 0.9227244997313316, "grad_norm": 851.8646850585938, "learning_rate": 2.2588469694960535e-07, "loss": 19.2829, "step": 456780 }, { "epoch": 0.9227447003640153, "grad_norm": 488.8655700683594, "learning_rate": 2.2578097497371333e-07, "loss": 12.2094, "step": 456790 }, { "epoch": 0.9227649009966992, "grad_norm": 184.11412048339844, "learning_rate": 2.2567727626678527e-07, "loss": 16.5717, "step": 456800 }, { "epoch": 0.922785101629383, "grad_norm": 157.20469665527344, "learning_rate": 2.2557360082932745e-07, "loss": 21.0107, "step": 456810 }, { "epoch": 0.9228053022620668, "grad_norm": 558.7533569335938, "learning_rate": 2.2546994866184557e-07, "loss": 14.0995, "step": 456820 }, { "epoch": 0.9228255028947506, "grad_norm": 219.511474609375, "learning_rate": 2.253663197648426e-07, "loss": 22.1468, "step": 456830 }, { "epoch": 0.9228457035274344, "grad_norm": 254.12046813964844, "learning_rate": 2.2526271413882528e-07, "loss": 10.6055, "step": 456840 }, { "epoch": 0.9228659041601183, "grad_norm": 282.03717041015625, "learning_rate": 2.2515913178429937e-07, "loss": 13.9128, "step": 456850 }, { "epoch": 0.9228861047928021, "grad_norm": 658.669189453125, "learning_rate": 2.2505557270176837e-07, "loss": 21.0541, "step": 456860 }, { "epoch": 0.9229063054254859, "grad_norm": 399.0304260253906, "learning_rate": 2.249520368917374e-07, "loss": 15.579, "step": 456870 }, { "epoch": 0.9229265060581697, "grad_norm": 888.0962524414062, "learning_rate": 2.2484852435471106e-07, "loss": 26.448, "step": 456880 }, { "epoch": 0.9229467066908535, "grad_norm": 123.26486206054688, "learning_rate": 2.2474503509119394e-07, "loss": 16.2698, "step": 456890 }, { "epoch": 0.9229669073235374, "grad_norm": 622.527099609375, "learning_rate": 2.2464156910168954e-07, "loss": 21.5968, "step": 456900 }, { "epoch": 0.9229871079562212, "grad_norm": 94.87808990478516, "learning_rate": 2.2453812638670413e-07, "loss": 16.1051, "step": 456910 }, { "epoch": 0.923007308588905, "grad_norm": 231.9738006591797, "learning_rate": 2.2443470694673953e-07, "loss": 12.1589, "step": 456920 }, { "epoch": 0.9230275092215888, "grad_norm": 812.3882446289062, "learning_rate": 2.2433131078230196e-07, "loss": 36.6796, "step": 456930 }, { "epoch": 0.9230477098542726, "grad_norm": 472.25018310546875, "learning_rate": 2.242279378938944e-07, "loss": 21.6882, "step": 456940 }, { "epoch": 0.9230679104869565, "grad_norm": 667.198974609375, "learning_rate": 2.2412458828201977e-07, "loss": 23.4013, "step": 456950 }, { "epoch": 0.9230881111196403, "grad_norm": 68.68064880371094, "learning_rate": 2.2402126194718322e-07, "loss": 13.6757, "step": 456960 }, { "epoch": 0.9231083117523241, "grad_norm": 491.9569396972656, "learning_rate": 2.2391795888988822e-07, "loss": 28.9148, "step": 456970 }, { "epoch": 0.9231285123850079, "grad_norm": 482.0792541503906, "learning_rate": 2.2381467911063658e-07, "loss": 31.3768, "step": 456980 }, { "epoch": 0.9231487130176917, "grad_norm": 214.32330322265625, "learning_rate": 2.237114226099335e-07, "loss": 51.9785, "step": 456990 }, { "epoch": 0.9231689136503756, "grad_norm": 533.4446411132812, "learning_rate": 2.2360818938828189e-07, "loss": 20.4225, "step": 457000 }, { "epoch": 0.9231891142830594, "grad_norm": 268.1573181152344, "learning_rate": 2.2350497944618466e-07, "loss": 15.3793, "step": 457010 }, { "epoch": 0.9232093149157432, "grad_norm": 460.7972717285156, "learning_rate": 2.234017927841442e-07, "loss": 20.3809, "step": 457020 }, { "epoch": 0.923229515548427, "grad_norm": 72.43126678466797, "learning_rate": 2.2329862940266511e-07, "loss": 24.144, "step": 457030 }, { "epoch": 0.9232497161811108, "grad_norm": 144.76942443847656, "learning_rate": 2.2319548930224865e-07, "loss": 9.2042, "step": 457040 }, { "epoch": 0.9232699168137946, "grad_norm": 414.5937194824219, "learning_rate": 2.2309237248339776e-07, "loss": 18.7369, "step": 457050 }, { "epoch": 0.9232901174464784, "grad_norm": 315.70648193359375, "learning_rate": 2.2298927894661481e-07, "loss": 18.66, "step": 457060 }, { "epoch": 0.9233103180791622, "grad_norm": 640.033203125, "learning_rate": 2.2288620869240384e-07, "loss": 23.0898, "step": 457070 }, { "epoch": 0.923330518711846, "grad_norm": 205.42617797851562, "learning_rate": 2.2278316172126612e-07, "loss": 15.6564, "step": 457080 }, { "epoch": 0.9233507193445298, "grad_norm": 1400.28759765625, "learning_rate": 2.2268013803370292e-07, "loss": 27.0673, "step": 457090 }, { "epoch": 0.9233709199772137, "grad_norm": 338.8489685058594, "learning_rate": 2.2257713763021826e-07, "loss": 20.6615, "step": 457100 }, { "epoch": 0.9233911206098975, "grad_norm": 253.6118927001953, "learning_rate": 2.2247416051131288e-07, "loss": 13.7819, "step": 457110 }, { "epoch": 0.9234113212425813, "grad_norm": 162.47174072265625, "learning_rate": 2.2237120667748856e-07, "loss": 14.0294, "step": 457120 }, { "epoch": 0.9234315218752651, "grad_norm": 318.33233642578125, "learning_rate": 2.2226827612924774e-07, "loss": 19.7768, "step": 457130 }, { "epoch": 0.9234517225079489, "grad_norm": 17.8799991607666, "learning_rate": 2.221653688670916e-07, "loss": 17.8933, "step": 457140 }, { "epoch": 0.9234719231406328, "grad_norm": 501.81207275390625, "learning_rate": 2.220624848915226e-07, "loss": 21.2872, "step": 457150 }, { "epoch": 0.9234921237733166, "grad_norm": 313.23614501953125, "learning_rate": 2.2195962420304083e-07, "loss": 14.9458, "step": 457160 }, { "epoch": 0.9235123244060004, "grad_norm": 307.3273010253906, "learning_rate": 2.2185678680214927e-07, "loss": 23.8175, "step": 457170 }, { "epoch": 0.9235325250386842, "grad_norm": 324.7360534667969, "learning_rate": 2.2175397268934807e-07, "loss": 20.4862, "step": 457180 }, { "epoch": 0.923552725671368, "grad_norm": 141.25596618652344, "learning_rate": 2.216511818651379e-07, "loss": 19.4876, "step": 457190 }, { "epoch": 0.9235729263040519, "grad_norm": 443.9954528808594, "learning_rate": 2.2154841433002062e-07, "loss": 16.8223, "step": 457200 }, { "epoch": 0.9235931269367357, "grad_norm": 601.5424194335938, "learning_rate": 2.2144567008449636e-07, "loss": 29.5105, "step": 457210 }, { "epoch": 0.9236133275694195, "grad_norm": 606.4437255859375, "learning_rate": 2.2134294912906696e-07, "loss": 25.3341, "step": 457220 }, { "epoch": 0.9236335282021033, "grad_norm": 327.7691955566406, "learning_rate": 2.2124025146423255e-07, "loss": 16.0972, "step": 457230 }, { "epoch": 0.9236537288347871, "grad_norm": 0.0, "learning_rate": 2.2113757709049277e-07, "loss": 10.7911, "step": 457240 }, { "epoch": 0.923673929467471, "grad_norm": 412.32818603515625, "learning_rate": 2.210349260083494e-07, "loss": 19.2258, "step": 457250 }, { "epoch": 0.9236941301001548, "grad_norm": 642.1405639648438, "learning_rate": 2.2093229821830263e-07, "loss": 12.5188, "step": 457260 }, { "epoch": 0.9237143307328386, "grad_norm": 217.98617553710938, "learning_rate": 2.208296937208515e-07, "loss": 7.6809, "step": 457270 }, { "epoch": 0.9237345313655224, "grad_norm": 428.9268493652344, "learning_rate": 2.2072711251649615e-07, "loss": 7.977, "step": 457280 }, { "epoch": 0.9237547319982062, "grad_norm": 475.7462158203125, "learning_rate": 2.2062455460573838e-07, "loss": 20.2152, "step": 457290 }, { "epoch": 0.9237749326308899, "grad_norm": 620.5607299804688, "learning_rate": 2.2052201998907673e-07, "loss": 16.3221, "step": 457300 }, { "epoch": 0.9237951332635738, "grad_norm": 581.6460571289062, "learning_rate": 2.2041950866701078e-07, "loss": 32.3453, "step": 457310 }, { "epoch": 0.9238153338962576, "grad_norm": 61.62921142578125, "learning_rate": 2.2031702064004067e-07, "loss": 11.4661, "step": 457320 }, { "epoch": 0.9238355345289414, "grad_norm": 357.42230224609375, "learning_rate": 2.2021455590866546e-07, "loss": 26.6565, "step": 457330 }, { "epoch": 0.9238557351616252, "grad_norm": 72.99015045166016, "learning_rate": 2.2011211447338477e-07, "loss": 20.1441, "step": 457340 }, { "epoch": 0.923875935794309, "grad_norm": 806.2169189453125, "learning_rate": 2.200096963346976e-07, "loss": 22.0562, "step": 457350 }, { "epoch": 0.9238961364269929, "grad_norm": 346.07818603515625, "learning_rate": 2.199073014931047e-07, "loss": 15.2492, "step": 457360 }, { "epoch": 0.9239163370596767, "grad_norm": 3.393545150756836, "learning_rate": 2.198049299491023e-07, "loss": 17.5839, "step": 457370 }, { "epoch": 0.9239365376923605, "grad_norm": 210.43228149414062, "learning_rate": 2.1970258170319114e-07, "loss": 8.5199, "step": 457380 }, { "epoch": 0.9239567383250443, "grad_norm": 230.42808532714844, "learning_rate": 2.1960025675587082e-07, "loss": 11.0523, "step": 457390 }, { "epoch": 0.9239769389577281, "grad_norm": 283.653076171875, "learning_rate": 2.1949795510763872e-07, "loss": 16.4459, "step": 457400 }, { "epoch": 0.923997139590412, "grad_norm": 348.6254577636719, "learning_rate": 2.1939567675899333e-07, "loss": 25.9321, "step": 457410 }, { "epoch": 0.9240173402230958, "grad_norm": 506.9590759277344, "learning_rate": 2.1929342171043366e-07, "loss": 32.5383, "step": 457420 }, { "epoch": 0.9240375408557796, "grad_norm": 166.4855499267578, "learning_rate": 2.191911899624588e-07, "loss": 27.0463, "step": 457430 }, { "epoch": 0.9240577414884634, "grad_norm": 386.7478942871094, "learning_rate": 2.1908898151556502e-07, "loss": 17.4471, "step": 457440 }, { "epoch": 0.9240779421211472, "grad_norm": 499.1949462890625, "learning_rate": 2.189867963702519e-07, "loss": 16.7372, "step": 457450 }, { "epoch": 0.9240981427538311, "grad_norm": 165.86109924316406, "learning_rate": 2.188846345270179e-07, "loss": 18.4938, "step": 457460 }, { "epoch": 0.9241183433865149, "grad_norm": 238.35037231445312, "learning_rate": 2.1878249598636047e-07, "loss": 15.6179, "step": 457470 }, { "epoch": 0.9241385440191987, "grad_norm": 72.76551055908203, "learning_rate": 2.186803807487764e-07, "loss": 14.1343, "step": 457480 }, { "epoch": 0.9241587446518825, "grad_norm": 494.52740478515625, "learning_rate": 2.1857828881476472e-07, "loss": 25.6341, "step": 457490 }, { "epoch": 0.9241789452845663, "grad_norm": 260.92047119140625, "learning_rate": 2.1847622018482283e-07, "loss": 25.9921, "step": 457500 }, { "epoch": 0.9241991459172502, "grad_norm": 423.1890869140625, "learning_rate": 2.1837417485944755e-07, "loss": 20.9144, "step": 457510 }, { "epoch": 0.924219346549934, "grad_norm": 1008.9845581054688, "learning_rate": 2.1827215283913683e-07, "loss": 33.1877, "step": 457520 }, { "epoch": 0.9242395471826178, "grad_norm": 773.0634765625, "learning_rate": 2.1817015412438692e-07, "loss": 31.0697, "step": 457530 }, { "epoch": 0.9242597478153016, "grad_norm": 716.7244873046875, "learning_rate": 2.1806817871569686e-07, "loss": 26.2957, "step": 457540 }, { "epoch": 0.9242799484479854, "grad_norm": 288.569091796875, "learning_rate": 2.1796622661356238e-07, "loss": 25.4398, "step": 457550 }, { "epoch": 0.9243001490806692, "grad_norm": 189.87271118164062, "learning_rate": 2.1786429781847972e-07, "loss": 25.3806, "step": 457560 }, { "epoch": 0.924320349713353, "grad_norm": 470.0531005859375, "learning_rate": 2.1776239233094687e-07, "loss": 10.8642, "step": 457570 }, { "epoch": 0.9243405503460368, "grad_norm": 243.01356506347656, "learning_rate": 2.176605101514606e-07, "loss": 25.1575, "step": 457580 }, { "epoch": 0.9243607509787206, "grad_norm": 802.8110961914062, "learning_rate": 2.175586512805161e-07, "loss": 30.1035, "step": 457590 }, { "epoch": 0.9243809516114044, "grad_norm": 338.3279113769531, "learning_rate": 2.174568157186102e-07, "loss": 20.7881, "step": 457600 }, { "epoch": 0.9244011522440883, "grad_norm": 1601.8643798828125, "learning_rate": 2.1735500346624083e-07, "loss": 18.3868, "step": 457610 }, { "epoch": 0.9244213528767721, "grad_norm": 473.4765930175781, "learning_rate": 2.1725321452390314e-07, "loss": 17.7341, "step": 457620 }, { "epoch": 0.9244415535094559, "grad_norm": 257.49365234375, "learning_rate": 2.1715144889209284e-07, "loss": 20.9138, "step": 457630 }, { "epoch": 0.9244617541421397, "grad_norm": 537.02880859375, "learning_rate": 2.1704970657130675e-07, "loss": 14.2191, "step": 457640 }, { "epoch": 0.9244819547748235, "grad_norm": 323.56304931640625, "learning_rate": 2.1694798756204005e-07, "loss": 14.642, "step": 457650 }, { "epoch": 0.9245021554075074, "grad_norm": 1487.6925048828125, "learning_rate": 2.1684629186478846e-07, "loss": 14.8847, "step": 457660 }, { "epoch": 0.9245223560401912, "grad_norm": 214.20962524414062, "learning_rate": 2.1674461948004766e-07, "loss": 16.7686, "step": 457670 }, { "epoch": 0.924542556672875, "grad_norm": 241.1480255126953, "learning_rate": 2.1664297040831394e-07, "loss": 17.2618, "step": 457680 }, { "epoch": 0.9245627573055588, "grad_norm": 87.69023132324219, "learning_rate": 2.1654134465008247e-07, "loss": 8.7376, "step": 457690 }, { "epoch": 0.9245829579382426, "grad_norm": 602.7846069335938, "learning_rate": 2.1643974220584729e-07, "loss": 24.1448, "step": 457700 }, { "epoch": 0.9246031585709265, "grad_norm": 238.993408203125, "learning_rate": 2.1633816307610577e-07, "loss": 7.3158, "step": 457710 }, { "epoch": 0.9246233592036103, "grad_norm": 375.7080078125, "learning_rate": 2.1623660726135197e-07, "loss": 20.0468, "step": 457720 }, { "epoch": 0.9246435598362941, "grad_norm": 201.77499389648438, "learning_rate": 2.161350747620794e-07, "loss": 14.484, "step": 457730 }, { "epoch": 0.9246637604689779, "grad_norm": 403.94195556640625, "learning_rate": 2.1603356557878486e-07, "loss": 12.3051, "step": 457740 }, { "epoch": 0.9246839611016617, "grad_norm": 305.6134338378906, "learning_rate": 2.1593207971196296e-07, "loss": 15.9938, "step": 457750 }, { "epoch": 0.9247041617343456, "grad_norm": 513.3170166015625, "learning_rate": 2.1583061716210774e-07, "loss": 11.5397, "step": 457760 }, { "epoch": 0.9247243623670294, "grad_norm": 155.95716857910156, "learning_rate": 2.1572917792971326e-07, "loss": 20.2176, "step": 457770 }, { "epoch": 0.9247445629997132, "grad_norm": 194.8069610595703, "learning_rate": 2.1562776201527525e-07, "loss": 12.5153, "step": 457780 }, { "epoch": 0.924764763632397, "grad_norm": 1246.8511962890625, "learning_rate": 2.1552636941928717e-07, "loss": 18.2852, "step": 457790 }, { "epoch": 0.9247849642650808, "grad_norm": 706.98828125, "learning_rate": 2.154250001422431e-07, "loss": 18.9501, "step": 457800 }, { "epoch": 0.9248051648977647, "grad_norm": 322.3863830566406, "learning_rate": 2.1532365418463708e-07, "loss": 8.0768, "step": 457810 }, { "epoch": 0.9248253655304484, "grad_norm": 513.7962036132812, "learning_rate": 2.1522233154696314e-07, "loss": 9.9633, "step": 457820 }, { "epoch": 0.9248455661631322, "grad_norm": 401.7316589355469, "learning_rate": 2.151210322297159e-07, "loss": 22.6052, "step": 457830 }, { "epoch": 0.924865766795816, "grad_norm": 16.721694946289062, "learning_rate": 2.1501975623338833e-07, "loss": 11.0103, "step": 457840 }, { "epoch": 0.9248859674284998, "grad_norm": 110.01323699951172, "learning_rate": 2.1491850355847332e-07, "loss": 16.0218, "step": 457850 }, { "epoch": 0.9249061680611836, "grad_norm": 219.98196411132812, "learning_rate": 2.1481727420546605e-07, "loss": 8.403, "step": 457860 }, { "epoch": 0.9249263686938675, "grad_norm": 379.1888427734375, "learning_rate": 2.147160681748589e-07, "loss": 18.7612, "step": 457870 }, { "epoch": 0.9249465693265513, "grad_norm": 298.0424499511719, "learning_rate": 2.1461488546714425e-07, "loss": 17.5678, "step": 457880 }, { "epoch": 0.9249667699592351, "grad_norm": 253.58570861816406, "learning_rate": 2.1451372608281674e-07, "loss": 8.7051, "step": 457890 }, { "epoch": 0.9249869705919189, "grad_norm": 341.6390075683594, "learning_rate": 2.1441259002236924e-07, "loss": 17.6478, "step": 457900 }, { "epoch": 0.9250071712246027, "grad_norm": 258.5384826660156, "learning_rate": 2.1431147728629476e-07, "loss": 14.3431, "step": 457910 }, { "epoch": 0.9250273718572866, "grad_norm": 365.05657958984375, "learning_rate": 2.1421038787508508e-07, "loss": 14.0344, "step": 457920 }, { "epoch": 0.9250475724899704, "grad_norm": 327.6850280761719, "learning_rate": 2.1410932178923372e-07, "loss": 17.4474, "step": 457930 }, { "epoch": 0.9250677731226542, "grad_norm": 425.10919189453125, "learning_rate": 2.1400827902923304e-07, "loss": 24.1575, "step": 457940 }, { "epoch": 0.925087973755338, "grad_norm": 132.63491821289062, "learning_rate": 2.1390725959557546e-07, "loss": 13.1877, "step": 457950 }, { "epoch": 0.9251081743880218, "grad_norm": 263.4458923339844, "learning_rate": 2.1380626348875278e-07, "loss": 16.6089, "step": 457960 }, { "epoch": 0.9251283750207057, "grad_norm": 155.18031311035156, "learning_rate": 2.137052907092596e-07, "loss": 13.4668, "step": 457970 }, { "epoch": 0.9251485756533895, "grad_norm": 387.36309814453125, "learning_rate": 2.13604341257585e-07, "loss": 21.6578, "step": 457980 }, { "epoch": 0.9251687762860733, "grad_norm": 305.8255920410156, "learning_rate": 2.135034151342219e-07, "loss": 17.9192, "step": 457990 }, { "epoch": 0.9251889769187571, "grad_norm": 212.56101989746094, "learning_rate": 2.134025123396638e-07, "loss": 17.149, "step": 458000 }, { "epoch": 0.9252091775514409, "grad_norm": 318.2418518066406, "learning_rate": 2.1330163287440087e-07, "loss": 9.732, "step": 458010 }, { "epoch": 0.9252293781841248, "grad_norm": 1.686371088027954, "learning_rate": 2.1320077673892493e-07, "loss": 18.1357, "step": 458020 }, { "epoch": 0.9252495788168086, "grad_norm": 362.7011413574219, "learning_rate": 2.1309994393372836e-07, "loss": 19.8476, "step": 458030 }, { "epoch": 0.9252697794494924, "grad_norm": 32.95444107055664, "learning_rate": 2.1299913445930242e-07, "loss": 17.4755, "step": 458040 }, { "epoch": 0.9252899800821762, "grad_norm": 139.97511291503906, "learning_rate": 2.1289834831613675e-07, "loss": 12.3224, "step": 458050 }, { "epoch": 0.92531018071486, "grad_norm": 455.064208984375, "learning_rate": 2.127975855047243e-07, "loss": 12.8526, "step": 458060 }, { "epoch": 0.9253303813475438, "grad_norm": 443.9643859863281, "learning_rate": 2.126968460255563e-07, "loss": 22.1814, "step": 458070 }, { "epoch": 0.9253505819802276, "grad_norm": 151.95689392089844, "learning_rate": 2.1259612987912348e-07, "loss": 50.2585, "step": 458080 }, { "epoch": 0.9253707826129114, "grad_norm": 207.3853759765625, "learning_rate": 2.1249543706591602e-07, "loss": 7.4737, "step": 458090 }, { "epoch": 0.9253909832455952, "grad_norm": 126.56194305419922, "learning_rate": 2.123947675864252e-07, "loss": 10.355, "step": 458100 }, { "epoch": 0.925411183878279, "grad_norm": 161.03704833984375, "learning_rate": 2.1229412144114225e-07, "loss": 11.7268, "step": 458110 }, { "epoch": 0.9254313845109629, "grad_norm": 341.4384765625, "learning_rate": 2.121934986305557e-07, "loss": 18.7943, "step": 458120 }, { "epoch": 0.9254515851436467, "grad_norm": 219.3982391357422, "learning_rate": 2.120928991551585e-07, "loss": 17.0994, "step": 458130 }, { "epoch": 0.9254717857763305, "grad_norm": 286.6707458496094, "learning_rate": 2.1199232301543915e-07, "loss": 18.727, "step": 458140 }, { "epoch": 0.9254919864090143, "grad_norm": 211.01681518554688, "learning_rate": 2.1189177021188888e-07, "loss": 38.938, "step": 458150 }, { "epoch": 0.9255121870416981, "grad_norm": 172.27032470703125, "learning_rate": 2.117912407449979e-07, "loss": 12.6474, "step": 458160 }, { "epoch": 0.925532387674382, "grad_norm": 29.137935638427734, "learning_rate": 2.116907346152547e-07, "loss": 17.0718, "step": 458170 }, { "epoch": 0.9255525883070658, "grad_norm": 170.0179443359375, "learning_rate": 2.1159025182315052e-07, "loss": 16.2877, "step": 458180 }, { "epoch": 0.9255727889397496, "grad_norm": 187.5997314453125, "learning_rate": 2.11489792369175e-07, "loss": 15.7404, "step": 458190 }, { "epoch": 0.9255929895724334, "grad_norm": 167.6175537109375, "learning_rate": 2.1138935625381663e-07, "loss": 15.2215, "step": 458200 }, { "epoch": 0.9256131902051172, "grad_norm": 530.850830078125, "learning_rate": 2.1128894347756613e-07, "loss": 18.4094, "step": 458210 }, { "epoch": 0.925633390837801, "grad_norm": 381.7887268066406, "learning_rate": 2.1118855404091253e-07, "loss": 24.7909, "step": 458220 }, { "epoch": 0.9256535914704849, "grad_norm": 29.789928436279297, "learning_rate": 2.110881879443455e-07, "loss": 13.2872, "step": 458230 }, { "epoch": 0.9256737921031687, "grad_norm": 217.49542236328125, "learning_rate": 2.1098784518835292e-07, "loss": 18.055, "step": 458240 }, { "epoch": 0.9256939927358525, "grad_norm": 535.6318969726562, "learning_rate": 2.1088752577342607e-07, "loss": 27.8291, "step": 458250 }, { "epoch": 0.9257141933685363, "grad_norm": 926.4777221679688, "learning_rate": 2.1078722970005182e-07, "loss": 16.9594, "step": 458260 }, { "epoch": 0.9257343940012202, "grad_norm": 346.7973937988281, "learning_rate": 2.1068695696871922e-07, "loss": 15.6104, "step": 458270 }, { "epoch": 0.925754594633904, "grad_norm": 359.69110107421875, "learning_rate": 2.1058670757991783e-07, "loss": 14.7547, "step": 458280 }, { "epoch": 0.9257747952665878, "grad_norm": 451.11236572265625, "learning_rate": 2.104864815341362e-07, "loss": 18.7919, "step": 458290 }, { "epoch": 0.9257949958992716, "grad_norm": 196.26596069335938, "learning_rate": 2.103862788318628e-07, "loss": 14.8491, "step": 458300 }, { "epoch": 0.9258151965319554, "grad_norm": 308.1374206542969, "learning_rate": 2.102860994735856e-07, "loss": 10.3378, "step": 458310 }, { "epoch": 0.9258353971646393, "grad_norm": 489.496826171875, "learning_rate": 2.1018594345979305e-07, "loss": 23.6785, "step": 458320 }, { "epoch": 0.925855597797323, "grad_norm": 394.7153625488281, "learning_rate": 2.1008581079097312e-07, "loss": 22.2725, "step": 458330 }, { "epoch": 0.9258757984300068, "grad_norm": 380.5003662109375, "learning_rate": 2.0998570146761376e-07, "loss": 18.5849, "step": 458340 }, { "epoch": 0.9258959990626906, "grad_norm": 218.2222442626953, "learning_rate": 2.098856154902029e-07, "loss": 14.9699, "step": 458350 }, { "epoch": 0.9259161996953744, "grad_norm": 324.1954345703125, "learning_rate": 2.0978555285922963e-07, "loss": 29.3252, "step": 458360 }, { "epoch": 0.9259364003280582, "grad_norm": 354.8207092285156, "learning_rate": 2.0968551357518018e-07, "loss": 13.6946, "step": 458370 }, { "epoch": 0.9259566009607421, "grad_norm": 1145.4552001953125, "learning_rate": 2.0958549763854196e-07, "loss": 24.0101, "step": 458380 }, { "epoch": 0.9259768015934259, "grad_norm": 261.1099853515625, "learning_rate": 2.0948550504980403e-07, "loss": 13.9914, "step": 458390 }, { "epoch": 0.9259970022261097, "grad_norm": 170.25189208984375, "learning_rate": 2.0938553580945208e-07, "loss": 24.7193, "step": 458400 }, { "epoch": 0.9260172028587935, "grad_norm": 437.9764099121094, "learning_rate": 2.092855899179741e-07, "loss": 9.5411, "step": 458410 }, { "epoch": 0.9260374034914773, "grad_norm": 94.67361450195312, "learning_rate": 2.0918566737585688e-07, "loss": 24.0387, "step": 458420 }, { "epoch": 0.9260576041241612, "grad_norm": 157.25180053710938, "learning_rate": 2.0908576818358783e-07, "loss": 14.9054, "step": 458430 }, { "epoch": 0.926077804756845, "grad_norm": 154.60018920898438, "learning_rate": 2.0898589234165378e-07, "loss": 13.5259, "step": 458440 }, { "epoch": 0.9260980053895288, "grad_norm": 464.2149658203125, "learning_rate": 2.0888603985054156e-07, "loss": 19.8272, "step": 458450 }, { "epoch": 0.9261182060222126, "grad_norm": 252.5354461669922, "learning_rate": 2.0878621071073745e-07, "loss": 10.8964, "step": 458460 }, { "epoch": 0.9261384066548964, "grad_norm": 138.6220703125, "learning_rate": 2.086864049227283e-07, "loss": 15.92, "step": 458470 }, { "epoch": 0.9261586072875803, "grad_norm": 651.3563232421875, "learning_rate": 2.085866224870009e-07, "loss": 12.8114, "step": 458480 }, { "epoch": 0.9261788079202641, "grad_norm": 445.91790771484375, "learning_rate": 2.0848686340404045e-07, "loss": 34.6725, "step": 458490 }, { "epoch": 0.9261990085529479, "grad_norm": 325.5871887207031, "learning_rate": 2.083871276743338e-07, "loss": 20.4321, "step": 458500 }, { "epoch": 0.9262192091856317, "grad_norm": 373.8263854980469, "learning_rate": 2.0828741529836771e-07, "loss": 15.8434, "step": 458510 }, { "epoch": 0.9262394098183155, "grad_norm": 28.93181037902832, "learning_rate": 2.0818772627662743e-07, "loss": 11.6755, "step": 458520 }, { "epoch": 0.9262596104509994, "grad_norm": 15.074884414672852, "learning_rate": 2.0808806060959864e-07, "loss": 27.5151, "step": 458530 }, { "epoch": 0.9262798110836832, "grad_norm": 386.1090087890625, "learning_rate": 2.0798841829776816e-07, "loss": 9.6054, "step": 458540 }, { "epoch": 0.926300011716367, "grad_norm": 159.1573028564453, "learning_rate": 2.0788879934162064e-07, "loss": 17.2663, "step": 458550 }, { "epoch": 0.9263202123490508, "grad_norm": 262.0688781738281, "learning_rate": 2.077892037416418e-07, "loss": 19.3492, "step": 458560 }, { "epoch": 0.9263404129817346, "grad_norm": 194.11447143554688, "learning_rate": 2.0768963149831678e-07, "loss": 14.6876, "step": 458570 }, { "epoch": 0.9263606136144183, "grad_norm": 365.3988342285156, "learning_rate": 2.0759008261213242e-07, "loss": 16.0998, "step": 458580 }, { "epoch": 0.9263808142471022, "grad_norm": 192.0506591796875, "learning_rate": 2.0749055708357168e-07, "loss": 26.9038, "step": 458590 }, { "epoch": 0.926401014879786, "grad_norm": 250.0546875, "learning_rate": 2.0739105491312028e-07, "loss": 17.8981, "step": 458600 }, { "epoch": 0.9264212155124698, "grad_norm": 996.9682006835938, "learning_rate": 2.0729157610126448e-07, "loss": 27.0404, "step": 458610 }, { "epoch": 0.9264414161451536, "grad_norm": 122.38219451904297, "learning_rate": 2.0719212064848838e-07, "loss": 19.0454, "step": 458620 }, { "epoch": 0.9264616167778374, "grad_norm": 393.2940368652344, "learning_rate": 2.07092688555276e-07, "loss": 18.0674, "step": 458630 }, { "epoch": 0.9264818174105213, "grad_norm": 224.41012573242188, "learning_rate": 2.0699327982211304e-07, "loss": 13.0098, "step": 458640 }, { "epoch": 0.9265020180432051, "grad_norm": 460.50335693359375, "learning_rate": 2.068938944494836e-07, "loss": 19.5449, "step": 458650 }, { "epoch": 0.9265222186758889, "grad_norm": 692.4585571289062, "learning_rate": 2.0679453243787174e-07, "loss": 12.5094, "step": 458660 }, { "epoch": 0.9265424193085727, "grad_norm": 759.0761108398438, "learning_rate": 2.0669519378776147e-07, "loss": 22.3652, "step": 458670 }, { "epoch": 0.9265626199412565, "grad_norm": 336.0935363769531, "learning_rate": 2.0659587849963801e-07, "loss": 21.5044, "step": 458680 }, { "epoch": 0.9265828205739404, "grad_norm": 345.7869873046875, "learning_rate": 2.0649658657398487e-07, "loss": 11.5162, "step": 458690 }, { "epoch": 0.9266030212066242, "grad_norm": 578.116943359375, "learning_rate": 2.0639731801128603e-07, "loss": 19.6053, "step": 458700 }, { "epoch": 0.926623221839308, "grad_norm": 552.098388671875, "learning_rate": 2.0629807281202508e-07, "loss": 22.3361, "step": 458710 }, { "epoch": 0.9266434224719918, "grad_norm": 314.4300537109375, "learning_rate": 2.0619885097668658e-07, "loss": 10.5781, "step": 458720 }, { "epoch": 0.9266636231046756, "grad_norm": 577.6607666015625, "learning_rate": 2.0609965250575237e-07, "loss": 22.9499, "step": 458730 }, { "epoch": 0.9266838237373595, "grad_norm": 0.4380476176738739, "learning_rate": 2.0600047739970762e-07, "loss": 11.5809, "step": 458740 }, { "epoch": 0.9267040243700433, "grad_norm": 208.62179565429688, "learning_rate": 2.0590132565903475e-07, "loss": 10.554, "step": 458750 }, { "epoch": 0.9267242250027271, "grad_norm": 527.3123168945312, "learning_rate": 2.058021972842178e-07, "loss": 19.0069, "step": 458760 }, { "epoch": 0.9267444256354109, "grad_norm": 499.30670166015625, "learning_rate": 2.057030922757397e-07, "loss": 19.0369, "step": 458770 }, { "epoch": 0.9267646262680947, "grad_norm": 314.04437255859375, "learning_rate": 2.056040106340823e-07, "loss": 10.8281, "step": 458780 }, { "epoch": 0.9267848269007786, "grad_norm": 123.98068237304688, "learning_rate": 2.0550495235973023e-07, "loss": 12.4546, "step": 458790 }, { "epoch": 0.9268050275334624, "grad_norm": 198.2677001953125, "learning_rate": 2.054059174531653e-07, "loss": 20.5061, "step": 458800 }, { "epoch": 0.9268252281661462, "grad_norm": 154.2901153564453, "learning_rate": 2.0530690591487047e-07, "loss": 19.8547, "step": 458810 }, { "epoch": 0.92684542879883, "grad_norm": 277.7265930175781, "learning_rate": 2.0520791774532757e-07, "loss": 24.4116, "step": 458820 }, { "epoch": 0.9268656294315138, "grad_norm": 328.3410949707031, "learning_rate": 2.0510895294502066e-07, "loss": 14.0775, "step": 458830 }, { "epoch": 0.9268858300641976, "grad_norm": 317.104736328125, "learning_rate": 2.0501001151443156e-07, "loss": 17.5873, "step": 458840 }, { "epoch": 0.9269060306968814, "grad_norm": 226.73709106445312, "learning_rate": 2.0491109345404102e-07, "loss": 15.4212, "step": 458850 }, { "epoch": 0.9269262313295652, "grad_norm": 849.9632568359375, "learning_rate": 2.0481219876433257e-07, "loss": 18.5426, "step": 458860 }, { "epoch": 0.926946431962249, "grad_norm": 363.4132385253906, "learning_rate": 2.0471332744578853e-07, "loss": 25.3132, "step": 458870 }, { "epoch": 0.9269666325949328, "grad_norm": 785.9321899414062, "learning_rate": 2.0461447949888912e-07, "loss": 13.8512, "step": 458880 }, { "epoch": 0.9269868332276167, "grad_norm": 338.12884521484375, "learning_rate": 2.0451565492411672e-07, "loss": 21.3447, "step": 458890 }, { "epoch": 0.9270070338603005, "grad_norm": 235.50070190429688, "learning_rate": 2.0441685372195487e-07, "loss": 16.1099, "step": 458900 }, { "epoch": 0.9270272344929843, "grad_norm": 402.45147705078125, "learning_rate": 2.043180758928831e-07, "loss": 27.9963, "step": 458910 }, { "epoch": 0.9270474351256681, "grad_norm": 154.576904296875, "learning_rate": 2.0421932143738276e-07, "loss": 12.2917, "step": 458920 }, { "epoch": 0.9270676357583519, "grad_norm": 344.50531005859375, "learning_rate": 2.041205903559368e-07, "loss": 14.6842, "step": 458930 }, { "epoch": 0.9270878363910358, "grad_norm": 366.5665588378906, "learning_rate": 2.0402188264902533e-07, "loss": 22.528, "step": 458940 }, { "epoch": 0.9271080370237196, "grad_norm": 477.3027038574219, "learning_rate": 2.039231983171286e-07, "loss": 16.5182, "step": 458950 }, { "epoch": 0.9271282376564034, "grad_norm": 0.0, "learning_rate": 2.0382453736072838e-07, "loss": 16.6952, "step": 458960 }, { "epoch": 0.9271484382890872, "grad_norm": 306.124755859375, "learning_rate": 2.0372589978030654e-07, "loss": 11.9331, "step": 458970 }, { "epoch": 0.927168638921771, "grad_norm": 754.0401611328125, "learning_rate": 2.0362728557634327e-07, "loss": 32.1713, "step": 458980 }, { "epoch": 0.9271888395544549, "grad_norm": 123.714599609375, "learning_rate": 2.0352869474931758e-07, "loss": 12.2411, "step": 458990 }, { "epoch": 0.9272090401871387, "grad_norm": 302.0909423828125, "learning_rate": 2.0343012729971244e-07, "loss": 10.5212, "step": 459000 }, { "epoch": 0.9272292408198225, "grad_norm": 466.5577087402344, "learning_rate": 2.0333158322800696e-07, "loss": 15.9164, "step": 459010 }, { "epoch": 0.9272494414525063, "grad_norm": 263.16961669921875, "learning_rate": 2.0323306253468123e-07, "loss": 10.4473, "step": 459020 }, { "epoch": 0.9272696420851901, "grad_norm": 584.4795532226562, "learning_rate": 2.0313456522021603e-07, "loss": 16.174, "step": 459030 }, { "epoch": 0.927289842717874, "grad_norm": 429.5994567871094, "learning_rate": 2.0303609128509038e-07, "loss": 22.1154, "step": 459040 }, { "epoch": 0.9273100433505578, "grad_norm": 389.5309143066406, "learning_rate": 2.0293764072978618e-07, "loss": 22.5208, "step": 459050 }, { "epoch": 0.9273302439832416, "grad_norm": 372.05609130859375, "learning_rate": 2.0283921355478187e-07, "loss": 18.7915, "step": 459060 }, { "epoch": 0.9273504446159254, "grad_norm": 289.4825744628906, "learning_rate": 2.0274080976055655e-07, "loss": 14.1784, "step": 459070 }, { "epoch": 0.9273706452486092, "grad_norm": 477.81732177734375, "learning_rate": 2.0264242934759147e-07, "loss": 22.1646, "step": 459080 }, { "epoch": 0.927390845881293, "grad_norm": 390.359130859375, "learning_rate": 2.025440723163652e-07, "loss": 24.2576, "step": 459090 }, { "epoch": 0.9274110465139768, "grad_norm": 163.69305419921875, "learning_rate": 2.0244573866735673e-07, "loss": 19.6924, "step": 459100 }, { "epoch": 0.9274312471466606, "grad_norm": 304.3630065917969, "learning_rate": 2.0234742840104627e-07, "loss": 17.2062, "step": 459110 }, { "epoch": 0.9274514477793444, "grad_norm": 149.70547485351562, "learning_rate": 2.0224914151791285e-07, "loss": 9.3478, "step": 459120 }, { "epoch": 0.9274716484120282, "grad_norm": 283.55181884765625, "learning_rate": 2.0215087801843504e-07, "loss": 18.1078, "step": 459130 }, { "epoch": 0.927491849044712, "grad_norm": 709.7905883789062, "learning_rate": 2.0205263790309125e-07, "loss": 13.3553, "step": 459140 }, { "epoch": 0.9275120496773959, "grad_norm": 871.2809448242188, "learning_rate": 2.0195442117236176e-07, "loss": 31.3566, "step": 459150 }, { "epoch": 0.9275322503100797, "grad_norm": 387.3518371582031, "learning_rate": 2.0185622782672497e-07, "loss": 16.4249, "step": 459160 }, { "epoch": 0.9275524509427635, "grad_norm": 516.0538940429688, "learning_rate": 2.0175805786665782e-07, "loss": 15.3209, "step": 459170 }, { "epoch": 0.9275726515754473, "grad_norm": 313.6719665527344, "learning_rate": 2.0165991129263984e-07, "loss": 10.7569, "step": 459180 }, { "epoch": 0.9275928522081311, "grad_norm": 368.5505065917969, "learning_rate": 2.0156178810515127e-07, "loss": 22.9218, "step": 459190 }, { "epoch": 0.927613052840815, "grad_norm": 476.4034729003906, "learning_rate": 2.0146368830466668e-07, "loss": 26.4644, "step": 459200 }, { "epoch": 0.9276332534734988, "grad_norm": 604.5199584960938, "learning_rate": 2.0136561189166682e-07, "loss": 12.8561, "step": 459210 }, { "epoch": 0.9276534541061826, "grad_norm": 173.3835906982422, "learning_rate": 2.0126755886662907e-07, "loss": 14.4322, "step": 459220 }, { "epoch": 0.9276736547388664, "grad_norm": 331.8222961425781, "learning_rate": 2.0116952923003142e-07, "loss": 14.6574, "step": 459230 }, { "epoch": 0.9276938553715502, "grad_norm": 756.9157104492188, "learning_rate": 2.0107152298235067e-07, "loss": 21.7451, "step": 459240 }, { "epoch": 0.9277140560042341, "grad_norm": 45.36907958984375, "learning_rate": 2.0097354012406535e-07, "loss": 23.7887, "step": 459250 }, { "epoch": 0.9277342566369179, "grad_norm": 711.8092041015625, "learning_rate": 2.0087558065565394e-07, "loss": 25.7945, "step": 459260 }, { "epoch": 0.9277544572696017, "grad_norm": 285.94219970703125, "learning_rate": 2.007776445775922e-07, "loss": 17.119, "step": 459270 }, { "epoch": 0.9277746579022855, "grad_norm": 445.1518249511719, "learning_rate": 2.006797318903575e-07, "loss": 17.844, "step": 459280 }, { "epoch": 0.9277948585349693, "grad_norm": 286.44287109375, "learning_rate": 2.0058184259442893e-07, "loss": 17.7038, "step": 459290 }, { "epoch": 0.9278150591676532, "grad_norm": 206.69505310058594, "learning_rate": 2.0048397669028164e-07, "loss": 25.5775, "step": 459300 }, { "epoch": 0.927835259800337, "grad_norm": 343.9249267578125, "learning_rate": 2.003861341783936e-07, "loss": 15.4368, "step": 459310 }, { "epoch": 0.9278554604330208, "grad_norm": 146.1588592529297, "learning_rate": 2.0028831505924162e-07, "loss": 21.2046, "step": 459320 }, { "epoch": 0.9278756610657046, "grad_norm": 526.7974853515625, "learning_rate": 2.0019051933330204e-07, "loss": 14.5463, "step": 459330 }, { "epoch": 0.9278958616983884, "grad_norm": 53.01992416381836, "learning_rate": 2.000927470010511e-07, "loss": 12.1168, "step": 459340 }, { "epoch": 0.9279160623310722, "grad_norm": 1.5485777854919434, "learning_rate": 1.9999499806296674e-07, "loss": 11.8297, "step": 459350 }, { "epoch": 0.927936262963756, "grad_norm": 233.57095336914062, "learning_rate": 1.9989727251952418e-07, "loss": 32.4009, "step": 459360 }, { "epoch": 0.9279564635964398, "grad_norm": 552.9609985351562, "learning_rate": 1.9979957037120078e-07, "loss": 22.0227, "step": 459370 }, { "epoch": 0.9279766642291236, "grad_norm": 629.5794677734375, "learning_rate": 1.9970189161847175e-07, "loss": 15.866, "step": 459380 }, { "epoch": 0.9279968648618074, "grad_norm": 189.53878784179688, "learning_rate": 1.996042362618128e-07, "loss": 17.6634, "step": 459390 }, { "epoch": 0.9280170654944913, "grad_norm": 389.4222412109375, "learning_rate": 1.995066043017013e-07, "loss": 38.8011, "step": 459400 }, { "epoch": 0.9280372661271751, "grad_norm": 289.399658203125, "learning_rate": 1.9940899573861195e-07, "loss": 12.5329, "step": 459410 }, { "epoch": 0.9280574667598589, "grad_norm": 482.4100646972656, "learning_rate": 1.993114105730215e-07, "loss": 20.3436, "step": 459420 }, { "epoch": 0.9280776673925427, "grad_norm": 151.21099853515625, "learning_rate": 1.9921384880540406e-07, "loss": 16.3865, "step": 459430 }, { "epoch": 0.9280978680252265, "grad_norm": 391.5841369628906, "learning_rate": 1.9911631043623704e-07, "loss": 16.6259, "step": 459440 }, { "epoch": 0.9281180686579104, "grad_norm": 311.5986328125, "learning_rate": 1.99018795465995e-07, "loss": 16.96, "step": 459450 }, { "epoch": 0.9281382692905942, "grad_norm": 299.59454345703125, "learning_rate": 1.9892130389515207e-07, "loss": 13.1875, "step": 459460 }, { "epoch": 0.928158469923278, "grad_norm": 135.6160125732422, "learning_rate": 1.9882383572418508e-07, "loss": 8.0025, "step": 459470 }, { "epoch": 0.9281786705559618, "grad_norm": 845.6554565429688, "learning_rate": 1.987263909535686e-07, "loss": 26.8887, "step": 459480 }, { "epoch": 0.9281988711886456, "grad_norm": 530.5870361328125, "learning_rate": 1.986289695837762e-07, "loss": 25.8121, "step": 459490 }, { "epoch": 0.9282190718213295, "grad_norm": 299.4232177734375, "learning_rate": 1.9853157161528468e-07, "loss": 17.5788, "step": 459500 }, { "epoch": 0.9282392724540133, "grad_norm": 230.8065948486328, "learning_rate": 1.984341970485687e-07, "loss": 11.3624, "step": 459510 }, { "epoch": 0.9282594730866971, "grad_norm": 250.2640838623047, "learning_rate": 1.9833684588410062e-07, "loss": 17.0651, "step": 459520 }, { "epoch": 0.9282796737193809, "grad_norm": 1162.2672119140625, "learning_rate": 1.9823951812235675e-07, "loss": 32.3245, "step": 459530 }, { "epoch": 0.9282998743520647, "grad_norm": 648.0301513671875, "learning_rate": 1.981422137638117e-07, "loss": 26.6975, "step": 459540 }, { "epoch": 0.9283200749847486, "grad_norm": 192.74911499023438, "learning_rate": 1.98044932808939e-07, "loss": 17.8849, "step": 459550 }, { "epoch": 0.9283402756174324, "grad_norm": 3.202186346054077, "learning_rate": 1.9794767525821212e-07, "loss": 13.659, "step": 459560 }, { "epoch": 0.9283604762501162, "grad_norm": 437.1767883300781, "learning_rate": 1.9785044111210627e-07, "loss": 13.1538, "step": 459570 }, { "epoch": 0.9283806768828, "grad_norm": 588.3291625976562, "learning_rate": 1.977532303710955e-07, "loss": 18.7623, "step": 459580 }, { "epoch": 0.9284008775154838, "grad_norm": 872.955322265625, "learning_rate": 1.9765604303565223e-07, "loss": 22.6115, "step": 459590 }, { "epoch": 0.9284210781481677, "grad_norm": 207.5331268310547, "learning_rate": 1.9755887910625103e-07, "loss": 16.759, "step": 459600 }, { "epoch": 0.9284412787808514, "grad_norm": 433.579833984375, "learning_rate": 1.9746173858336604e-07, "loss": 16.0027, "step": 459610 }, { "epoch": 0.9284614794135352, "grad_norm": 119.8254623413086, "learning_rate": 1.9736462146747015e-07, "loss": 15.5961, "step": 459620 }, { "epoch": 0.928481680046219, "grad_norm": 346.081787109375, "learning_rate": 1.972675277590358e-07, "loss": 18.0993, "step": 459630 }, { "epoch": 0.9285018806789028, "grad_norm": 251.54295349121094, "learning_rate": 1.9717045745853758e-07, "loss": 13.2372, "step": 459640 }, { "epoch": 0.9285220813115866, "grad_norm": 453.9295654296875, "learning_rate": 1.9707341056644737e-07, "loss": 30.9079, "step": 459650 }, { "epoch": 0.9285422819442705, "grad_norm": 333.4501647949219, "learning_rate": 1.9697638708323918e-07, "loss": 14.9811, "step": 459660 }, { "epoch": 0.9285624825769543, "grad_norm": 230.99989318847656, "learning_rate": 1.9687938700938602e-07, "loss": 40.2869, "step": 459670 }, { "epoch": 0.9285826832096381, "grad_norm": 225.25244140625, "learning_rate": 1.967824103453597e-07, "loss": 28.2683, "step": 459680 }, { "epoch": 0.9286028838423219, "grad_norm": 387.0373840332031, "learning_rate": 1.9668545709163378e-07, "loss": 18.3746, "step": 459690 }, { "epoch": 0.9286230844750057, "grad_norm": 612.067138671875, "learning_rate": 1.9658852724868005e-07, "loss": 24.9054, "step": 459700 }, { "epoch": 0.9286432851076896, "grad_norm": 357.3955993652344, "learning_rate": 1.9649162081697094e-07, "loss": 24.4012, "step": 459710 }, { "epoch": 0.9286634857403734, "grad_norm": 536.5983276367188, "learning_rate": 1.963947377969788e-07, "loss": 12.6208, "step": 459720 }, { "epoch": 0.9286836863730572, "grad_norm": 267.8538513183594, "learning_rate": 1.9629787818917722e-07, "loss": 16.5168, "step": 459730 }, { "epoch": 0.928703887005741, "grad_norm": 31.454442977905273, "learning_rate": 1.9620104199403688e-07, "loss": 19.1965, "step": 459740 }, { "epoch": 0.9287240876384248, "grad_norm": 337.57623291015625, "learning_rate": 1.961042292120291e-07, "loss": 14.2129, "step": 459750 }, { "epoch": 0.9287442882711087, "grad_norm": 220.30328369140625, "learning_rate": 1.9600743984362792e-07, "loss": 13.5387, "step": 459760 }, { "epoch": 0.9287644889037925, "grad_norm": 252.01829528808594, "learning_rate": 1.959106738893035e-07, "loss": 10.7286, "step": 459770 }, { "epoch": 0.9287846895364763, "grad_norm": 173.19248962402344, "learning_rate": 1.958139313495272e-07, "loss": 16.17, "step": 459780 }, { "epoch": 0.9288048901691601, "grad_norm": 5.145406723022461, "learning_rate": 1.957172122247708e-07, "loss": 30.5028, "step": 459790 }, { "epoch": 0.9288250908018439, "grad_norm": 237.4205780029297, "learning_rate": 1.9562051651550784e-07, "loss": 11.8499, "step": 459800 }, { "epoch": 0.9288452914345278, "grad_norm": 157.3616943359375, "learning_rate": 1.9552384422220627e-07, "loss": 12.6332, "step": 459810 }, { "epoch": 0.9288654920672116, "grad_norm": 286.0095520019531, "learning_rate": 1.954271953453385e-07, "loss": 18.3174, "step": 459820 }, { "epoch": 0.9288856926998954, "grad_norm": 1239.24462890625, "learning_rate": 1.953305698853769e-07, "loss": 25.9679, "step": 459830 }, { "epoch": 0.9289058933325792, "grad_norm": 665.3642578125, "learning_rate": 1.9523396784279114e-07, "loss": 23.8822, "step": 459840 }, { "epoch": 0.928926093965263, "grad_norm": 508.5174865722656, "learning_rate": 1.9513738921805192e-07, "loss": 17.3589, "step": 459850 }, { "epoch": 0.9289462945979468, "grad_norm": 316.22100830078125, "learning_rate": 1.9504083401162999e-07, "loss": 22.6244, "step": 459860 }, { "epoch": 0.9289664952306306, "grad_norm": 284.92230224609375, "learning_rate": 1.9494430222399774e-07, "loss": 18.12, "step": 459870 }, { "epoch": 0.9289866958633144, "grad_norm": 188.45223999023438, "learning_rate": 1.948477938556226e-07, "loss": 13.7546, "step": 459880 }, { "epoch": 0.9290068964959982, "grad_norm": 294.2447814941406, "learning_rate": 1.9475130890697691e-07, "loss": 29.014, "step": 459890 }, { "epoch": 0.929027097128682, "grad_norm": 181.1422882080078, "learning_rate": 1.9465484737853092e-07, "loss": 22.4655, "step": 459900 }, { "epoch": 0.9290472977613659, "grad_norm": 217.6072235107422, "learning_rate": 1.945584092707542e-07, "loss": 14.1615, "step": 459910 }, { "epoch": 0.9290674983940497, "grad_norm": 657.130126953125, "learning_rate": 1.944619945841164e-07, "loss": 11.1402, "step": 459920 }, { "epoch": 0.9290876990267335, "grad_norm": 399.0269470214844, "learning_rate": 1.9436560331908882e-07, "loss": 10.5638, "step": 459930 }, { "epoch": 0.9291078996594173, "grad_norm": 359.1075134277344, "learning_rate": 1.9426923547614052e-07, "loss": 12.1312, "step": 459940 }, { "epoch": 0.9291281002921011, "grad_norm": 851.9827880859375, "learning_rate": 1.9417289105574054e-07, "loss": 27.9834, "step": 459950 }, { "epoch": 0.929148300924785, "grad_norm": 248.04933166503906, "learning_rate": 1.9407657005835967e-07, "loss": 19.4236, "step": 459960 }, { "epoch": 0.9291685015574688, "grad_norm": 225.03952026367188, "learning_rate": 1.9398027248446582e-07, "loss": 11.7057, "step": 459970 }, { "epoch": 0.9291887021901526, "grad_norm": 383.68414306640625, "learning_rate": 1.9388399833452974e-07, "loss": 18.9307, "step": 459980 }, { "epoch": 0.9292089028228364, "grad_norm": 298.77899169921875, "learning_rate": 1.9378774760902052e-07, "loss": 11.7764, "step": 459990 }, { "epoch": 0.9292291034555202, "grad_norm": 239.48939514160156, "learning_rate": 1.9369152030840553e-07, "loss": 16.0167, "step": 460000 }, { "epoch": 0.929249304088204, "grad_norm": 472.67279052734375, "learning_rate": 1.9359531643315665e-07, "loss": 19.6229, "step": 460010 }, { "epoch": 0.9292695047208879, "grad_norm": 348.67791748046875, "learning_rate": 1.9349913598374014e-07, "loss": 15.113, "step": 460020 }, { "epoch": 0.9292897053535717, "grad_norm": 697.9534301757812, "learning_rate": 1.9340297896062676e-07, "loss": 16.396, "step": 460030 }, { "epoch": 0.9293099059862555, "grad_norm": 561.0146484375, "learning_rate": 1.9330684536428335e-07, "loss": 18.4987, "step": 460040 }, { "epoch": 0.9293301066189393, "grad_norm": 609.9745483398438, "learning_rate": 1.9321073519518007e-07, "loss": 19.547, "step": 460050 }, { "epoch": 0.9293503072516232, "grad_norm": 38.030887603759766, "learning_rate": 1.9311464845378492e-07, "loss": 11.6924, "step": 460060 }, { "epoch": 0.929370507884307, "grad_norm": 112.8908462524414, "learning_rate": 1.9301858514056527e-07, "loss": 9.4522, "step": 460070 }, { "epoch": 0.9293907085169908, "grad_norm": 187.12997436523438, "learning_rate": 1.9292254525599075e-07, "loss": 17.3489, "step": 460080 }, { "epoch": 0.9294109091496746, "grad_norm": 392.20379638671875, "learning_rate": 1.928265288005282e-07, "loss": 18.9361, "step": 460090 }, { "epoch": 0.9294311097823584, "grad_norm": 281.1587219238281, "learning_rate": 1.927305357746462e-07, "loss": 16.0612, "step": 460100 }, { "epoch": 0.9294513104150423, "grad_norm": 273.9291076660156, "learning_rate": 1.9263456617881203e-07, "loss": 11.1165, "step": 460110 }, { "epoch": 0.929471511047726, "grad_norm": 327.94281005859375, "learning_rate": 1.9253862001349543e-07, "loss": 11.9103, "step": 460120 }, { "epoch": 0.9294917116804098, "grad_norm": 285.5328674316406, "learning_rate": 1.9244269727916097e-07, "loss": 10.3836, "step": 460130 }, { "epoch": 0.9295119123130936, "grad_norm": 176.0340118408203, "learning_rate": 1.9234679797627832e-07, "loss": 16.9426, "step": 460140 }, { "epoch": 0.9295321129457774, "grad_norm": 317.98980712890625, "learning_rate": 1.9225092210531425e-07, "loss": 19.7461, "step": 460150 }, { "epoch": 0.9295523135784612, "grad_norm": 614.9310302734375, "learning_rate": 1.9215506966673624e-07, "loss": 22.1396, "step": 460160 }, { "epoch": 0.9295725142111451, "grad_norm": 347.4130859375, "learning_rate": 1.9205924066101057e-07, "loss": 10.2269, "step": 460170 }, { "epoch": 0.9295927148438289, "grad_norm": 427.9936218261719, "learning_rate": 1.9196343508860515e-07, "loss": 22.8617, "step": 460180 }, { "epoch": 0.9296129154765127, "grad_norm": 386.7332458496094, "learning_rate": 1.9186765294998855e-07, "loss": 18.1745, "step": 460190 }, { "epoch": 0.9296331161091965, "grad_norm": 421.6540832519531, "learning_rate": 1.917718942456237e-07, "loss": 15.6033, "step": 460200 }, { "epoch": 0.9296533167418803, "grad_norm": 387.5469665527344, "learning_rate": 1.9167615897598023e-07, "loss": 12.7935, "step": 460210 }, { "epoch": 0.9296735173745642, "grad_norm": 550.24853515625, "learning_rate": 1.9158044714152447e-07, "loss": 23.7764, "step": 460220 }, { "epoch": 0.929693718007248, "grad_norm": 1298.991455078125, "learning_rate": 1.914847587427221e-07, "loss": 32.2464, "step": 460230 }, { "epoch": 0.9297139186399318, "grad_norm": 668.077880859375, "learning_rate": 1.9138909378003946e-07, "loss": 17.404, "step": 460240 }, { "epoch": 0.9297341192726156, "grad_norm": 339.6650695800781, "learning_rate": 1.9129345225394335e-07, "loss": 18.9665, "step": 460250 }, { "epoch": 0.9297543199052994, "grad_norm": 773.9869384765625, "learning_rate": 1.9119783416490013e-07, "loss": 29.5222, "step": 460260 }, { "epoch": 0.9297745205379833, "grad_norm": 61.7031135559082, "learning_rate": 1.9110223951337492e-07, "loss": 18.3, "step": 460270 }, { "epoch": 0.9297947211706671, "grad_norm": 233.27320861816406, "learning_rate": 1.910066682998346e-07, "loss": 22.573, "step": 460280 }, { "epoch": 0.9298149218033509, "grad_norm": 261.3985595703125, "learning_rate": 1.909111205247438e-07, "loss": 24.1993, "step": 460290 }, { "epoch": 0.9298351224360347, "grad_norm": 256.5817565917969, "learning_rate": 1.9081559618856938e-07, "loss": 25.1584, "step": 460300 }, { "epoch": 0.9298553230687185, "grad_norm": 443.31829833984375, "learning_rate": 1.907200952917765e-07, "loss": 22.7134, "step": 460310 }, { "epoch": 0.9298755237014024, "grad_norm": 368.3848571777344, "learning_rate": 1.9062461783483034e-07, "loss": 13.8993, "step": 460320 }, { "epoch": 0.9298957243340862, "grad_norm": 602.7681274414062, "learning_rate": 1.9052916381819664e-07, "loss": 21.0812, "step": 460330 }, { "epoch": 0.92991592496677, "grad_norm": 1414.91796875, "learning_rate": 1.904337332423406e-07, "loss": 36.7006, "step": 460340 }, { "epoch": 0.9299361255994538, "grad_norm": 475.8883056640625, "learning_rate": 1.903383261077274e-07, "loss": 28.129, "step": 460350 }, { "epoch": 0.9299563262321376, "grad_norm": 8.253231048583984, "learning_rate": 1.9024294241482112e-07, "loss": 15.3869, "step": 460360 }, { "epoch": 0.9299765268648214, "grad_norm": 116.25457000732422, "learning_rate": 1.9014758216408803e-07, "loss": 18.6597, "step": 460370 }, { "epoch": 0.9299967274975052, "grad_norm": 79.83399963378906, "learning_rate": 1.900522453559922e-07, "loss": 16.284, "step": 460380 }, { "epoch": 0.930016928130189, "grad_norm": 1928.6744384765625, "learning_rate": 1.899569319909983e-07, "loss": 35.3755, "step": 460390 }, { "epoch": 0.9300371287628728, "grad_norm": 159.0091094970703, "learning_rate": 1.8986164206957037e-07, "loss": 15.8112, "step": 460400 }, { "epoch": 0.9300573293955566, "grad_norm": 728.5242919921875, "learning_rate": 1.897663755921747e-07, "loss": 25.4743, "step": 460410 }, { "epoch": 0.9300775300282405, "grad_norm": 291.23748779296875, "learning_rate": 1.8967113255927315e-07, "loss": 17.5053, "step": 460420 }, { "epoch": 0.9300977306609243, "grad_norm": 305.28753662109375, "learning_rate": 1.8957591297133093e-07, "loss": 10.2362, "step": 460430 }, { "epoch": 0.9301179312936081, "grad_norm": 413.1041564941406, "learning_rate": 1.894807168288132e-07, "loss": 17.3874, "step": 460440 }, { "epoch": 0.9301381319262919, "grad_norm": 93.40693664550781, "learning_rate": 1.8938554413218292e-07, "loss": 14.6845, "step": 460450 }, { "epoch": 0.9301583325589757, "grad_norm": 265.3533020019531, "learning_rate": 1.8929039488190304e-07, "loss": 10.0248, "step": 460460 }, { "epoch": 0.9301785331916596, "grad_norm": 1233.372802734375, "learning_rate": 1.8919526907843876e-07, "loss": 50.5303, "step": 460470 }, { "epoch": 0.9301987338243434, "grad_norm": 11.834299087524414, "learning_rate": 1.8910016672225418e-07, "loss": 14.3973, "step": 460480 }, { "epoch": 0.9302189344570272, "grad_norm": 288.27978515625, "learning_rate": 1.8900508781381056e-07, "loss": 14.8687, "step": 460490 }, { "epoch": 0.930239135089711, "grad_norm": 186.37252807617188, "learning_rate": 1.8891003235357307e-07, "loss": 18.5956, "step": 460500 }, { "epoch": 0.9302593357223948, "grad_norm": 793.5770874023438, "learning_rate": 1.8881500034200473e-07, "loss": 15.8723, "step": 460510 }, { "epoch": 0.9302795363550787, "grad_norm": 255.04364013671875, "learning_rate": 1.88719991779569e-07, "loss": 40.3052, "step": 460520 }, { "epoch": 0.9302997369877625, "grad_norm": 5.66730260848999, "learning_rate": 1.8862500666672778e-07, "loss": 9.9851, "step": 460530 }, { "epoch": 0.9303199376204463, "grad_norm": 385.5716552734375, "learning_rate": 1.8853004500394512e-07, "loss": 20.0818, "step": 460540 }, { "epoch": 0.9303401382531301, "grad_norm": 10.681656837463379, "learning_rate": 1.8843510679168341e-07, "loss": 13.3622, "step": 460550 }, { "epoch": 0.9303603388858139, "grad_norm": 0.04340120032429695, "learning_rate": 1.883401920304051e-07, "loss": 10.5599, "step": 460560 }, { "epoch": 0.9303805395184978, "grad_norm": 323.4036560058594, "learning_rate": 1.8824530072057369e-07, "loss": 25.3733, "step": 460570 }, { "epoch": 0.9304007401511816, "grad_norm": 161.68948364257812, "learning_rate": 1.8815043286265044e-07, "loss": 17.8008, "step": 460580 }, { "epoch": 0.9304209407838654, "grad_norm": 98.37507629394531, "learning_rate": 1.8805558845709894e-07, "loss": 10.1326, "step": 460590 }, { "epoch": 0.9304411414165492, "grad_norm": 1050.3140869140625, "learning_rate": 1.8796076750438096e-07, "loss": 29.5644, "step": 460600 }, { "epoch": 0.930461342049233, "grad_norm": 4.681258201599121, "learning_rate": 1.878659700049579e-07, "loss": 21.6032, "step": 460610 }, { "epoch": 0.9304815426819169, "grad_norm": 902.1006469726562, "learning_rate": 1.8777119595929315e-07, "loss": 15.341, "step": 460620 }, { "epoch": 0.9305017433146006, "grad_norm": 584.0853271484375, "learning_rate": 1.8767644536784703e-07, "loss": 16.4857, "step": 460630 }, { "epoch": 0.9305219439472844, "grad_norm": 605.7371826171875, "learning_rate": 1.8758171823108295e-07, "loss": 13.6837, "step": 460640 }, { "epoch": 0.9305421445799682, "grad_norm": 346.3061828613281, "learning_rate": 1.874870145494617e-07, "loss": 24.9194, "step": 460650 }, { "epoch": 0.930562345212652, "grad_norm": 471.7580261230469, "learning_rate": 1.8739233432344518e-07, "loss": 18.2733, "step": 460660 }, { "epoch": 0.9305825458453358, "grad_norm": 226.48004150390625, "learning_rate": 1.8729767755349514e-07, "loss": 18.7975, "step": 460670 }, { "epoch": 0.9306027464780197, "grad_norm": 829.1776733398438, "learning_rate": 1.872030442400713e-07, "loss": 28.4234, "step": 460680 }, { "epoch": 0.9306229471107035, "grad_norm": 331.8815612792969, "learning_rate": 1.8710843438363713e-07, "loss": 36.9377, "step": 460690 }, { "epoch": 0.9306431477433873, "grad_norm": 0.0, "learning_rate": 1.8701384798465284e-07, "loss": 13.0251, "step": 460700 }, { "epoch": 0.9306633483760711, "grad_norm": 418.47802734375, "learning_rate": 1.8691928504357858e-07, "loss": 25.0024, "step": 460710 }, { "epoch": 0.9306835490087549, "grad_norm": 308.30731201171875, "learning_rate": 1.868247455608757e-07, "loss": 10.4745, "step": 460720 }, { "epoch": 0.9307037496414388, "grad_norm": 447.0386657714844, "learning_rate": 1.867302295370066e-07, "loss": 15.2201, "step": 460730 }, { "epoch": 0.9307239502741226, "grad_norm": 355.3406677246094, "learning_rate": 1.8663573697242977e-07, "loss": 15.954, "step": 460740 }, { "epoch": 0.9307441509068064, "grad_norm": 252.26039123535156, "learning_rate": 1.8654126786760597e-07, "loss": 37.0269, "step": 460750 }, { "epoch": 0.9307643515394902, "grad_norm": 0.0, "learning_rate": 1.8644682222299703e-07, "loss": 9.7798, "step": 460760 }, { "epoch": 0.930784552172174, "grad_norm": 65.33013153076172, "learning_rate": 1.8635240003906264e-07, "loss": 14.7183, "step": 460770 }, { "epoch": 0.9308047528048579, "grad_norm": 246.04112243652344, "learning_rate": 1.8625800131626236e-07, "loss": 32.6438, "step": 460780 }, { "epoch": 0.9308249534375417, "grad_norm": 92.7275161743164, "learning_rate": 1.8616362605505645e-07, "loss": 8.0405, "step": 460790 }, { "epoch": 0.9308451540702255, "grad_norm": 412.2235107421875, "learning_rate": 1.8606927425590616e-07, "loss": 19.9027, "step": 460800 }, { "epoch": 0.9308653547029093, "grad_norm": 122.2205810546875, "learning_rate": 1.8597494591926946e-07, "loss": 15.9204, "step": 460810 }, { "epoch": 0.9308855553355931, "grad_norm": 530.8219604492188, "learning_rate": 1.858806410456071e-07, "loss": 16.7315, "step": 460820 }, { "epoch": 0.930905755968277, "grad_norm": 323.177001953125, "learning_rate": 1.8578635963537926e-07, "loss": 13.0048, "step": 460830 }, { "epoch": 0.9309259566009608, "grad_norm": 388.21942138671875, "learning_rate": 1.856921016890445e-07, "loss": 20.7814, "step": 460840 }, { "epoch": 0.9309461572336446, "grad_norm": 462.2232971191406, "learning_rate": 1.8559786720706185e-07, "loss": 16.8715, "step": 460850 }, { "epoch": 0.9309663578663284, "grad_norm": 595.4361572265625, "learning_rate": 1.8550365618989207e-07, "loss": 16.301, "step": 460860 }, { "epoch": 0.9309865584990122, "grad_norm": 652.5562744140625, "learning_rate": 1.854094686379937e-07, "loss": 18.9976, "step": 460870 }, { "epoch": 0.9310067591316961, "grad_norm": 611.486328125, "learning_rate": 1.8531530455182522e-07, "loss": 14.5472, "step": 460880 }, { "epoch": 0.9310269597643798, "grad_norm": 11.874302864074707, "learning_rate": 1.852211639318463e-07, "loss": 8.2873, "step": 460890 }, { "epoch": 0.9310471603970636, "grad_norm": 143.15005493164062, "learning_rate": 1.8512704677851489e-07, "loss": 8.2499, "step": 460900 }, { "epoch": 0.9310673610297474, "grad_norm": 231.83935546875, "learning_rate": 1.8503295309229065e-07, "loss": 10.2527, "step": 460910 }, { "epoch": 0.9310875616624312, "grad_norm": 354.39215087890625, "learning_rate": 1.8493888287363148e-07, "loss": 17.5189, "step": 460920 }, { "epoch": 0.931107762295115, "grad_norm": 64.90702056884766, "learning_rate": 1.8484483612299654e-07, "loss": 23.2972, "step": 460930 }, { "epoch": 0.9311279629277989, "grad_norm": 9.676826477050781, "learning_rate": 1.8475081284084428e-07, "loss": 13.254, "step": 460940 }, { "epoch": 0.9311481635604827, "grad_norm": 393.5041809082031, "learning_rate": 1.846568130276316e-07, "loss": 20.8727, "step": 460950 }, { "epoch": 0.9311683641931665, "grad_norm": 633.0675659179688, "learning_rate": 1.8456283668381814e-07, "loss": 15.9627, "step": 460960 }, { "epoch": 0.9311885648258503, "grad_norm": 109.91011047363281, "learning_rate": 1.8446888380986128e-07, "loss": 25.1295, "step": 460970 }, { "epoch": 0.9312087654585341, "grad_norm": 222.28253173828125, "learning_rate": 1.84374954406219e-07, "loss": 21.0881, "step": 460980 }, { "epoch": 0.931228966091218, "grad_norm": 410.1101989746094, "learning_rate": 1.8428104847334927e-07, "loss": 8.0959, "step": 460990 }, { "epoch": 0.9312491667239018, "grad_norm": 428.9569396972656, "learning_rate": 1.841871660117095e-07, "loss": 14.7619, "step": 461000 }, { "epoch": 0.9312693673565856, "grad_norm": 594.7449340820312, "learning_rate": 1.8409330702175764e-07, "loss": 16.7418, "step": 461010 }, { "epoch": 0.9312895679892694, "grad_norm": 154.09713745117188, "learning_rate": 1.8399947150395058e-07, "loss": 25.5965, "step": 461020 }, { "epoch": 0.9313097686219532, "grad_norm": 237.8426055908203, "learning_rate": 1.8390565945874572e-07, "loss": 19.8066, "step": 461030 }, { "epoch": 0.9313299692546371, "grad_norm": 260.109130859375, "learning_rate": 1.8381187088660046e-07, "loss": 18.0444, "step": 461040 }, { "epoch": 0.9313501698873209, "grad_norm": 103.0996322631836, "learning_rate": 1.8371810578797277e-07, "loss": 12.4614, "step": 461050 }, { "epoch": 0.9313703705200047, "grad_norm": 20.84394645690918, "learning_rate": 1.8362436416331896e-07, "loss": 12.9066, "step": 461060 }, { "epoch": 0.9313905711526885, "grad_norm": 246.29661560058594, "learning_rate": 1.8353064601309533e-07, "loss": 12.1289, "step": 461070 }, { "epoch": 0.9314107717853723, "grad_norm": 95.33872985839844, "learning_rate": 1.8343695133775874e-07, "loss": 25.977, "step": 461080 }, { "epoch": 0.9314309724180562, "grad_norm": 189.6843719482422, "learning_rate": 1.833432801377677e-07, "loss": 17.1384, "step": 461090 }, { "epoch": 0.93145117305074, "grad_norm": 294.8585205078125, "learning_rate": 1.832496324135763e-07, "loss": 14.8602, "step": 461100 }, { "epoch": 0.9314713736834238, "grad_norm": 403.48773193359375, "learning_rate": 1.8315600816564137e-07, "loss": 21.2338, "step": 461110 }, { "epoch": 0.9314915743161076, "grad_norm": 142.48876953125, "learning_rate": 1.8306240739442094e-07, "loss": 19.6496, "step": 461120 }, { "epoch": 0.9315117749487914, "grad_norm": 399.75579833984375, "learning_rate": 1.8296883010037014e-07, "loss": 17.4055, "step": 461130 }, { "epoch": 0.9315319755814752, "grad_norm": 204.31796264648438, "learning_rate": 1.8287527628394418e-07, "loss": 10.361, "step": 461140 }, { "epoch": 0.931552176214159, "grad_norm": 174.668701171875, "learning_rate": 1.827817459456005e-07, "loss": 11.47, "step": 461150 }, { "epoch": 0.9315723768468428, "grad_norm": 326.95880126953125, "learning_rate": 1.826882390857948e-07, "loss": 18.0679, "step": 461160 }, { "epoch": 0.9315925774795266, "grad_norm": 178.13429260253906, "learning_rate": 1.825947557049812e-07, "loss": 22.8237, "step": 461170 }, { "epoch": 0.9316127781122104, "grad_norm": 647.4390258789062, "learning_rate": 1.825012958036171e-07, "loss": 37.3266, "step": 461180 }, { "epoch": 0.9316329787448943, "grad_norm": 589.9456176757812, "learning_rate": 1.824078593821571e-07, "loss": 16.358, "step": 461190 }, { "epoch": 0.9316531793775781, "grad_norm": 18.983449935913086, "learning_rate": 1.8231444644105755e-07, "loss": 16.9788, "step": 461200 }, { "epoch": 0.9316733800102619, "grad_norm": 755.7958374023438, "learning_rate": 1.8222105698077253e-07, "loss": 25.4583, "step": 461210 }, { "epoch": 0.9316935806429457, "grad_norm": 346.2664794921875, "learning_rate": 1.8212769100175774e-07, "loss": 24.498, "step": 461220 }, { "epoch": 0.9317137812756295, "grad_norm": 390.356201171875, "learning_rate": 1.8203434850446844e-07, "loss": 22.6047, "step": 461230 }, { "epoch": 0.9317339819083134, "grad_norm": 265.6792297363281, "learning_rate": 1.819410294893592e-07, "loss": 29.0851, "step": 461240 }, { "epoch": 0.9317541825409972, "grad_norm": 3.6130383014678955, "learning_rate": 1.8184773395688527e-07, "loss": 10.6757, "step": 461250 }, { "epoch": 0.931774383173681, "grad_norm": 434.55145263671875, "learning_rate": 1.8175446190750068e-07, "loss": 17.189, "step": 461260 }, { "epoch": 0.9317945838063648, "grad_norm": 693.389404296875, "learning_rate": 1.816612133416612e-07, "loss": 14.9552, "step": 461270 }, { "epoch": 0.9318147844390486, "grad_norm": 259.1981506347656, "learning_rate": 1.8156798825982035e-07, "loss": 9.5073, "step": 461280 }, { "epoch": 0.9318349850717325, "grad_norm": 193.0907745361328, "learning_rate": 1.814747866624317e-07, "loss": 17.5511, "step": 461290 }, { "epoch": 0.9318551857044163, "grad_norm": 376.73968505859375, "learning_rate": 1.8138160854995145e-07, "loss": 25.6786, "step": 461300 }, { "epoch": 0.9318753863371001, "grad_norm": 282.88330078125, "learning_rate": 1.8128845392283324e-07, "loss": 14.7452, "step": 461310 }, { "epoch": 0.9318955869697839, "grad_norm": 447.7366027832031, "learning_rate": 1.8119532278152996e-07, "loss": 33.4463, "step": 461320 }, { "epoch": 0.9319157876024677, "grad_norm": 370.9266662597656, "learning_rate": 1.8110221512649573e-07, "loss": 13.9933, "step": 461330 }, { "epoch": 0.9319359882351516, "grad_norm": 500.27410888671875, "learning_rate": 1.8100913095818627e-07, "loss": 19.469, "step": 461340 }, { "epoch": 0.9319561888678354, "grad_norm": 306.4193115234375, "learning_rate": 1.8091607027705293e-07, "loss": 15.5763, "step": 461350 }, { "epoch": 0.9319763895005192, "grad_norm": 447.197265625, "learning_rate": 1.8082303308354976e-07, "loss": 19.6914, "step": 461360 }, { "epoch": 0.931996590133203, "grad_norm": 125.23076629638672, "learning_rate": 1.8073001937813138e-07, "loss": 16.8651, "step": 461370 }, { "epoch": 0.9320167907658868, "grad_norm": 91.94493865966797, "learning_rate": 1.8063702916125025e-07, "loss": 9.5325, "step": 461380 }, { "epoch": 0.9320369913985707, "grad_norm": 202.62930297851562, "learning_rate": 1.805440624333593e-07, "loss": 26.8626, "step": 461390 }, { "epoch": 0.9320571920312544, "grad_norm": 514.9032592773438, "learning_rate": 1.804511191949121e-07, "loss": 21.1308, "step": 461400 }, { "epoch": 0.9320773926639382, "grad_norm": 172.77285766601562, "learning_rate": 1.8035819944636269e-07, "loss": 25.8753, "step": 461410 }, { "epoch": 0.932097593296622, "grad_norm": 289.017822265625, "learning_rate": 1.8026530318816183e-07, "loss": 17.1092, "step": 461420 }, { "epoch": 0.9321177939293058, "grad_norm": 6.599430561065674, "learning_rate": 1.8017243042076304e-07, "loss": 18.2395, "step": 461430 }, { "epoch": 0.9321379945619896, "grad_norm": 280.7936706542969, "learning_rate": 1.8007958114462042e-07, "loss": 13.1732, "step": 461440 }, { "epoch": 0.9321581951946735, "grad_norm": 894.9324340820312, "learning_rate": 1.7998675536018474e-07, "loss": 21.2707, "step": 461450 }, { "epoch": 0.9321783958273573, "grad_norm": 80.70378112792969, "learning_rate": 1.7989395306790835e-07, "loss": 43.2938, "step": 461460 }, { "epoch": 0.9321985964600411, "grad_norm": 301.1913757324219, "learning_rate": 1.798011742682454e-07, "loss": 18.9873, "step": 461470 }, { "epoch": 0.9322187970927249, "grad_norm": 164.97093200683594, "learning_rate": 1.7970841896164658e-07, "loss": 27.1778, "step": 461480 }, { "epoch": 0.9322389977254087, "grad_norm": 172.13999938964844, "learning_rate": 1.7961568714856382e-07, "loss": 22.4734, "step": 461490 }, { "epoch": 0.9322591983580926, "grad_norm": 228.5173797607422, "learning_rate": 1.7952297882945e-07, "loss": 35.3497, "step": 461500 }, { "epoch": 0.9322793989907764, "grad_norm": 513.8005981445312, "learning_rate": 1.7943029400475598e-07, "loss": 19.494, "step": 461510 }, { "epoch": 0.9322995996234602, "grad_norm": 236.8550567626953, "learning_rate": 1.7933763267493465e-07, "loss": 11.9686, "step": 461520 }, { "epoch": 0.932319800256144, "grad_norm": 3.990077018737793, "learning_rate": 1.7924499484043622e-07, "loss": 15.6638, "step": 461530 }, { "epoch": 0.9323400008888278, "grad_norm": 267.17132568359375, "learning_rate": 1.7915238050171367e-07, "loss": 19.3243, "step": 461540 }, { "epoch": 0.9323602015215117, "grad_norm": 175.84994506835938, "learning_rate": 1.7905978965921778e-07, "loss": 23.0182, "step": 461550 }, { "epoch": 0.9323804021541955, "grad_norm": 455.0528259277344, "learning_rate": 1.7896722231339925e-07, "loss": 21.7424, "step": 461560 }, { "epoch": 0.9324006027868793, "grad_norm": 239.96661376953125, "learning_rate": 1.788746784647105e-07, "loss": 13.5123, "step": 461570 }, { "epoch": 0.9324208034195631, "grad_norm": 418.77294921875, "learning_rate": 1.7878215811360068e-07, "loss": 17.4203, "step": 461580 }, { "epoch": 0.932441004052247, "grad_norm": 564.48095703125, "learning_rate": 1.7868966126052323e-07, "loss": 9.9037, "step": 461590 }, { "epoch": 0.9324612046849308, "grad_norm": 132.60113525390625, "learning_rate": 1.785971879059273e-07, "loss": 18.6507, "step": 461600 }, { "epoch": 0.9324814053176146, "grad_norm": 358.73638916015625, "learning_rate": 1.7850473805026304e-07, "loss": 22.1932, "step": 461610 }, { "epoch": 0.9325016059502984, "grad_norm": 105.70510864257812, "learning_rate": 1.7841231169398287e-07, "loss": 11.7789, "step": 461620 }, { "epoch": 0.9325218065829822, "grad_norm": 426.2632751464844, "learning_rate": 1.7831990883753592e-07, "loss": 16.3437, "step": 461630 }, { "epoch": 0.932542007215666, "grad_norm": 410.3416748046875, "learning_rate": 1.7822752948137289e-07, "loss": 16.3283, "step": 461640 }, { "epoch": 0.9325622078483498, "grad_norm": 988.7979736328125, "learning_rate": 1.7813517362594347e-07, "loss": 26.9564, "step": 461650 }, { "epoch": 0.9325824084810336, "grad_norm": 334.42529296875, "learning_rate": 1.7804284127169946e-07, "loss": 8.4331, "step": 461660 }, { "epoch": 0.9326026091137174, "grad_norm": 243.21121215820312, "learning_rate": 1.7795053241908943e-07, "loss": 10.7253, "step": 461670 }, { "epoch": 0.9326228097464012, "grad_norm": 570.7203369140625, "learning_rate": 1.7785824706856303e-07, "loss": 12.1912, "step": 461680 }, { "epoch": 0.932643010379085, "grad_norm": 534.68994140625, "learning_rate": 1.7776598522057154e-07, "loss": 27.0854, "step": 461690 }, { "epoch": 0.9326632110117689, "grad_norm": 371.4281921386719, "learning_rate": 1.7767374687556405e-07, "loss": 31.2355, "step": 461700 }, { "epoch": 0.9326834116444527, "grad_norm": 163.0079803466797, "learning_rate": 1.7758153203398853e-07, "loss": 26.3914, "step": 461710 }, { "epoch": 0.9327036122771365, "grad_norm": 99.2337875366211, "learning_rate": 1.774893406962963e-07, "loss": 22.1929, "step": 461720 }, { "epoch": 0.9327238129098203, "grad_norm": 719.2616577148438, "learning_rate": 1.7739717286293644e-07, "loss": 15.3715, "step": 461730 }, { "epoch": 0.9327440135425041, "grad_norm": 360.2846374511719, "learning_rate": 1.7730502853435805e-07, "loss": 13.2564, "step": 461740 }, { "epoch": 0.932764214175188, "grad_norm": 130.13404846191406, "learning_rate": 1.7721290771100964e-07, "loss": 11.8033, "step": 461750 }, { "epoch": 0.9327844148078718, "grad_norm": 130.27134704589844, "learning_rate": 1.7712081039334083e-07, "loss": 8.8699, "step": 461760 }, { "epoch": 0.9328046154405556, "grad_norm": 40.666603088378906, "learning_rate": 1.770287365818002e-07, "loss": 11.4024, "step": 461770 }, { "epoch": 0.9328248160732394, "grad_norm": 350.0940856933594, "learning_rate": 1.7693668627683625e-07, "loss": 10.8917, "step": 461780 }, { "epoch": 0.9328450167059232, "grad_norm": 260.5531921386719, "learning_rate": 1.7684465947889806e-07, "loss": 14.8456, "step": 461790 }, { "epoch": 0.9328652173386071, "grad_norm": 369.9488525390625, "learning_rate": 1.7675265618843361e-07, "loss": 11.4153, "step": 461800 }, { "epoch": 0.9328854179712909, "grad_norm": 265.1435241699219, "learning_rate": 1.7666067640589256e-07, "loss": 31.7913, "step": 461810 }, { "epoch": 0.9329056186039747, "grad_norm": 607.055419921875, "learning_rate": 1.7656872013172176e-07, "loss": 19.5087, "step": 461820 }, { "epoch": 0.9329258192366585, "grad_norm": 319.4716796875, "learning_rate": 1.764767873663703e-07, "loss": 37.0232, "step": 461830 }, { "epoch": 0.9329460198693423, "grad_norm": 467.063232421875, "learning_rate": 1.7638487811028616e-07, "loss": 17.9041, "step": 461840 }, { "epoch": 0.9329662205020262, "grad_norm": 175.1415557861328, "learning_rate": 1.7629299236391616e-07, "loss": 15.5899, "step": 461850 }, { "epoch": 0.93298642113471, "grad_norm": 1434.3819580078125, "learning_rate": 1.7620113012771002e-07, "loss": 23.1167, "step": 461860 }, { "epoch": 0.9330066217673938, "grad_norm": 619.7500610351562, "learning_rate": 1.7610929140211397e-07, "loss": 27.0966, "step": 461870 }, { "epoch": 0.9330268224000776, "grad_norm": 457.38543701171875, "learning_rate": 1.760174761875766e-07, "loss": 10.2659, "step": 461880 }, { "epoch": 0.9330470230327614, "grad_norm": 312.8133544921875, "learning_rate": 1.7592568448454528e-07, "loss": 24.0141, "step": 461890 }, { "epoch": 0.9330672236654453, "grad_norm": 113.47441101074219, "learning_rate": 1.758339162934658e-07, "loss": 21.0164, "step": 461900 }, { "epoch": 0.933087424298129, "grad_norm": 468.087646484375, "learning_rate": 1.757421716147878e-07, "loss": 21.749, "step": 461910 }, { "epoch": 0.9331076249308128, "grad_norm": 730.168212890625, "learning_rate": 1.7565045044895756e-07, "loss": 20.517, "step": 461920 }, { "epoch": 0.9331278255634966, "grad_norm": 459.88446044921875, "learning_rate": 1.7555875279642087e-07, "loss": 16.8315, "step": 461930 }, { "epoch": 0.9331480261961804, "grad_norm": 299.3412780761719, "learning_rate": 1.754670786576257e-07, "loss": 11.309, "step": 461940 }, { "epoch": 0.9331682268288642, "grad_norm": 255.61477661132812, "learning_rate": 1.7537542803302e-07, "loss": 8.1569, "step": 461950 }, { "epoch": 0.9331884274615481, "grad_norm": 303.9642028808594, "learning_rate": 1.7528380092304842e-07, "loss": 26.4091, "step": 461960 }, { "epoch": 0.9332086280942319, "grad_norm": 880.2850341796875, "learning_rate": 1.751921973281584e-07, "loss": 30.5622, "step": 461970 }, { "epoch": 0.9332288287269157, "grad_norm": 446.8388671875, "learning_rate": 1.7510061724879678e-07, "loss": 17.2565, "step": 461980 }, { "epoch": 0.9332490293595995, "grad_norm": 195.17286682128906, "learning_rate": 1.750090606854099e-07, "loss": 22.5183, "step": 461990 }, { "epoch": 0.9332692299922833, "grad_norm": 234.3126983642578, "learning_rate": 1.7491752763844294e-07, "loss": 14.1387, "step": 462000 }, { "epoch": 0.9332894306249672, "grad_norm": 184.8792724609375, "learning_rate": 1.7482601810834276e-07, "loss": 12.4313, "step": 462010 }, { "epoch": 0.933309631257651, "grad_norm": 96.02690124511719, "learning_rate": 1.7473453209555625e-07, "loss": 14.9553, "step": 462020 }, { "epoch": 0.9333298318903348, "grad_norm": 613.595947265625, "learning_rate": 1.7464306960052746e-07, "loss": 14.9738, "step": 462030 }, { "epoch": 0.9333500325230186, "grad_norm": 386.20684814453125, "learning_rate": 1.7455163062370273e-07, "loss": 11.3136, "step": 462040 }, { "epoch": 0.9333702331557024, "grad_norm": 408.2582702636719, "learning_rate": 1.744602151655289e-07, "loss": 18.7055, "step": 462050 }, { "epoch": 0.9333904337883863, "grad_norm": 124.63929748535156, "learning_rate": 1.743688232264512e-07, "loss": 8.8279, "step": 462060 }, { "epoch": 0.9334106344210701, "grad_norm": 418.02581787109375, "learning_rate": 1.742774548069137e-07, "loss": 12.0346, "step": 462070 }, { "epoch": 0.9334308350537539, "grad_norm": 400.3048400878906, "learning_rate": 1.7418610990736273e-07, "loss": 16.3988, "step": 462080 }, { "epoch": 0.9334510356864377, "grad_norm": 161.02871704101562, "learning_rate": 1.7409478852824402e-07, "loss": 30.2902, "step": 462090 }, { "epoch": 0.9334712363191215, "grad_norm": 321.67498779296875, "learning_rate": 1.740034906700011e-07, "loss": 14.7172, "step": 462100 }, { "epoch": 0.9334914369518054, "grad_norm": 422.029296875, "learning_rate": 1.7391221633308032e-07, "loss": 18.4577, "step": 462110 }, { "epoch": 0.9335116375844892, "grad_norm": 616.5631103515625, "learning_rate": 1.7382096551792572e-07, "loss": 12.381, "step": 462120 }, { "epoch": 0.933531838217173, "grad_norm": 594.563720703125, "learning_rate": 1.7372973822498252e-07, "loss": 16.8806, "step": 462130 }, { "epoch": 0.9335520388498568, "grad_norm": 6.52410364151001, "learning_rate": 1.7363853445469482e-07, "loss": 16.316, "step": 462140 }, { "epoch": 0.9335722394825406, "grad_norm": 363.2400817871094, "learning_rate": 1.7354735420750835e-07, "loss": 17.689, "step": 462150 }, { "epoch": 0.9335924401152244, "grad_norm": 481.3193359375, "learning_rate": 1.7345619748386666e-07, "loss": 22.4928, "step": 462160 }, { "epoch": 0.9336126407479082, "grad_norm": 216.18240356445312, "learning_rate": 1.733650642842133e-07, "loss": 15.536, "step": 462170 }, { "epoch": 0.933632841380592, "grad_norm": 661.4515991210938, "learning_rate": 1.73273954608994e-07, "loss": 27.2659, "step": 462180 }, { "epoch": 0.9336530420132758, "grad_norm": 69.99722290039062, "learning_rate": 1.7318286845865174e-07, "loss": 25.4236, "step": 462190 }, { "epoch": 0.9336732426459596, "grad_norm": 110.41122436523438, "learning_rate": 1.7309180583363062e-07, "loss": 21.1707, "step": 462200 }, { "epoch": 0.9336934432786435, "grad_norm": 328.7599792480469, "learning_rate": 1.7300076673437526e-07, "loss": 9.7369, "step": 462210 }, { "epoch": 0.9337136439113273, "grad_norm": 602.4974975585938, "learning_rate": 1.7290975116132756e-07, "loss": 24.9123, "step": 462220 }, { "epoch": 0.9337338445440111, "grad_norm": 209.95286560058594, "learning_rate": 1.728187591149333e-07, "loss": 12.9717, "step": 462230 }, { "epoch": 0.9337540451766949, "grad_norm": 150.35626220703125, "learning_rate": 1.7272779059563483e-07, "loss": 22.0588, "step": 462240 }, { "epoch": 0.9337742458093787, "grad_norm": 237.76243591308594, "learning_rate": 1.7263684560387518e-07, "loss": 19.1027, "step": 462250 }, { "epoch": 0.9337944464420626, "grad_norm": 186.2238311767578, "learning_rate": 1.7254592414009785e-07, "loss": 16.0267, "step": 462260 }, { "epoch": 0.9338146470747464, "grad_norm": 1390.9752197265625, "learning_rate": 1.7245502620474643e-07, "loss": 21.0884, "step": 462270 }, { "epoch": 0.9338348477074302, "grad_norm": 373.25543212890625, "learning_rate": 1.7236415179826438e-07, "loss": 20.0606, "step": 462280 }, { "epoch": 0.933855048340114, "grad_norm": 115.53531646728516, "learning_rate": 1.7227330092109306e-07, "loss": 18.2338, "step": 462290 }, { "epoch": 0.9338752489727978, "grad_norm": 155.42364501953125, "learning_rate": 1.7218247357367656e-07, "loss": 15.553, "step": 462300 }, { "epoch": 0.9338954496054817, "grad_norm": 14.017396926879883, "learning_rate": 1.720916697564573e-07, "loss": 8.9427, "step": 462310 }, { "epoch": 0.9339156502381655, "grad_norm": 613.1043701171875, "learning_rate": 1.7200088946987713e-07, "loss": 29.1203, "step": 462320 }, { "epoch": 0.9339358508708493, "grad_norm": 1064.6912841796875, "learning_rate": 1.7191013271437908e-07, "loss": 23.2569, "step": 462330 }, { "epoch": 0.9339560515035331, "grad_norm": 644.6098022460938, "learning_rate": 1.7181939949040606e-07, "loss": 17.197, "step": 462340 }, { "epoch": 0.9339762521362169, "grad_norm": 735.906982421875, "learning_rate": 1.717286897983994e-07, "loss": 27.4059, "step": 462350 }, { "epoch": 0.9339964527689008, "grad_norm": 194.6940155029297, "learning_rate": 1.7163800363880102e-07, "loss": 8.4069, "step": 462360 }, { "epoch": 0.9340166534015846, "grad_norm": 446.6946105957031, "learning_rate": 1.715473410120544e-07, "loss": 28.9146, "step": 462370 }, { "epoch": 0.9340368540342684, "grad_norm": 454.2542419433594, "learning_rate": 1.7145670191859977e-07, "loss": 12.4825, "step": 462380 }, { "epoch": 0.9340570546669522, "grad_norm": 210.14657592773438, "learning_rate": 1.7136608635887952e-07, "loss": 18.0583, "step": 462390 }, { "epoch": 0.934077255299636, "grad_norm": 1262.3385009765625, "learning_rate": 1.7127549433333557e-07, "loss": 26.256, "step": 462400 }, { "epoch": 0.9340974559323199, "grad_norm": 194.8001251220703, "learning_rate": 1.7118492584240865e-07, "loss": 15.3081, "step": 462410 }, { "epoch": 0.9341176565650036, "grad_norm": 210.71812438964844, "learning_rate": 1.7109438088654173e-07, "loss": 14.9052, "step": 462420 }, { "epoch": 0.9341378571976874, "grad_norm": 427.1445617675781, "learning_rate": 1.7100385946617393e-07, "loss": 20.4259, "step": 462430 }, { "epoch": 0.9341580578303712, "grad_norm": 1009.92626953125, "learning_rate": 1.7091336158174877e-07, "loss": 22.6844, "step": 462440 }, { "epoch": 0.934178258463055, "grad_norm": 234.84991455078125, "learning_rate": 1.7082288723370587e-07, "loss": 4.9438, "step": 462450 }, { "epoch": 0.9341984590957388, "grad_norm": 272.6453857421875, "learning_rate": 1.7073243642248605e-07, "loss": 16.662, "step": 462460 }, { "epoch": 0.9342186597284227, "grad_norm": 554.6612548828125, "learning_rate": 1.7064200914853112e-07, "loss": 26.6758, "step": 462470 }, { "epoch": 0.9342388603611065, "grad_norm": 171.58334350585938, "learning_rate": 1.7055160541228077e-07, "loss": 7.0108, "step": 462480 }, { "epoch": 0.9342590609937903, "grad_norm": 761.4239501953125, "learning_rate": 1.7046122521417686e-07, "loss": 10.5315, "step": 462490 }, { "epoch": 0.9342792616264741, "grad_norm": 279.9809875488281, "learning_rate": 1.7037086855465902e-07, "loss": 29.0353, "step": 462500 }, { "epoch": 0.9342994622591579, "grad_norm": 130.5390625, "learning_rate": 1.702805354341669e-07, "loss": 26.2922, "step": 462510 }, { "epoch": 0.9343196628918418, "grad_norm": 1399.39794921875, "learning_rate": 1.7019022585314293e-07, "loss": 19.9626, "step": 462520 }, { "epoch": 0.9343398635245256, "grad_norm": 965.174560546875, "learning_rate": 1.7009993981202567e-07, "loss": 26.338, "step": 462530 }, { "epoch": 0.9343600641572094, "grad_norm": 168.63499450683594, "learning_rate": 1.7000967731125472e-07, "loss": 19.967, "step": 462540 }, { "epoch": 0.9343802647898932, "grad_norm": 222.2827911376953, "learning_rate": 1.699194383512709e-07, "loss": 7.6039, "step": 462550 }, { "epoch": 0.934400465422577, "grad_norm": 277.81793212890625, "learning_rate": 1.6982922293251548e-07, "loss": 43.1463, "step": 462560 }, { "epoch": 0.9344206660552609, "grad_norm": 207.72872924804688, "learning_rate": 1.6973903105542533e-07, "loss": 32.8887, "step": 462570 }, { "epoch": 0.9344408666879447, "grad_norm": 741.337158203125, "learning_rate": 1.6964886272044069e-07, "loss": 24.0368, "step": 462580 }, { "epoch": 0.9344610673206285, "grad_norm": 294.7828369140625, "learning_rate": 1.6955871792800283e-07, "loss": 13.5807, "step": 462590 }, { "epoch": 0.9344812679533123, "grad_norm": 463.4609680175781, "learning_rate": 1.6946859667854977e-07, "loss": 34.091, "step": 462600 }, { "epoch": 0.9345014685859961, "grad_norm": 118.23023223876953, "learning_rate": 1.6937849897252056e-07, "loss": 8.3565, "step": 462610 }, { "epoch": 0.93452166921868, "grad_norm": 76.01818084716797, "learning_rate": 1.6928842481035436e-07, "loss": 9.2868, "step": 462620 }, { "epoch": 0.9345418698513638, "grad_norm": 468.1135559082031, "learning_rate": 1.691983741924913e-07, "loss": 16.4288, "step": 462630 }, { "epoch": 0.9345620704840476, "grad_norm": 322.39031982421875, "learning_rate": 1.6910834711936886e-07, "loss": 15.4933, "step": 462640 }, { "epoch": 0.9345822711167314, "grad_norm": 523.301025390625, "learning_rate": 1.690183435914261e-07, "loss": 12.5366, "step": 462650 }, { "epoch": 0.9346024717494152, "grad_norm": 577.0751342773438, "learning_rate": 1.689283636091027e-07, "loss": 19.5234, "step": 462660 }, { "epoch": 0.9346226723820991, "grad_norm": 452.67889404296875, "learning_rate": 1.688384071728366e-07, "loss": 31.4996, "step": 462670 }, { "epoch": 0.9346428730147828, "grad_norm": 11.344707489013672, "learning_rate": 1.6874847428306583e-07, "loss": 20.8121, "step": 462680 }, { "epoch": 0.9346630736474666, "grad_norm": 550.50927734375, "learning_rate": 1.6865856494022892e-07, "loss": 14.4299, "step": 462690 }, { "epoch": 0.9346832742801504, "grad_norm": 397.3772888183594, "learning_rate": 1.6856867914476492e-07, "loss": 8.819, "step": 462700 }, { "epoch": 0.9347034749128342, "grad_norm": 6.457852840423584, "learning_rate": 1.684788168971102e-07, "loss": 12.4718, "step": 462710 }, { "epoch": 0.934723675545518, "grad_norm": 464.8440856933594, "learning_rate": 1.6838897819770438e-07, "loss": 40.8515, "step": 462720 }, { "epoch": 0.9347438761782019, "grad_norm": 2413.400390625, "learning_rate": 1.682991630469838e-07, "loss": 42.2716, "step": 462730 }, { "epoch": 0.9347640768108857, "grad_norm": 624.918212890625, "learning_rate": 1.6820937144538807e-07, "loss": 16.1107, "step": 462740 }, { "epoch": 0.9347842774435695, "grad_norm": 635.3636474609375, "learning_rate": 1.6811960339335298e-07, "loss": 18.9638, "step": 462750 }, { "epoch": 0.9348044780762533, "grad_norm": 595.7736206054688, "learning_rate": 1.6802985889131762e-07, "loss": 22.7298, "step": 462760 }, { "epoch": 0.9348246787089372, "grad_norm": 233.24156188964844, "learning_rate": 1.6794013793971887e-07, "loss": 26.2768, "step": 462770 }, { "epoch": 0.934844879341621, "grad_norm": 545.78271484375, "learning_rate": 1.6785044053899302e-07, "loss": 15.6134, "step": 462780 }, { "epoch": 0.9348650799743048, "grad_norm": 230.40011596679688, "learning_rate": 1.6776076668957864e-07, "loss": 16.2018, "step": 462790 }, { "epoch": 0.9348852806069886, "grad_norm": 242.9065704345703, "learning_rate": 1.6767111639191202e-07, "loss": 24.9229, "step": 462800 }, { "epoch": 0.9349054812396724, "grad_norm": 0.0, "learning_rate": 1.675814896464306e-07, "loss": 34.967, "step": 462810 }, { "epoch": 0.9349256818723563, "grad_norm": 229.31613159179688, "learning_rate": 1.6749188645357072e-07, "loss": 11.2219, "step": 462820 }, { "epoch": 0.9349458825050401, "grad_norm": 412.9171447753906, "learning_rate": 1.6740230681376867e-07, "loss": 35.8329, "step": 462830 }, { "epoch": 0.9349660831377239, "grad_norm": 12.56732177734375, "learning_rate": 1.6731275072746244e-07, "loss": 20.6205, "step": 462840 }, { "epoch": 0.9349862837704077, "grad_norm": 454.1410827636719, "learning_rate": 1.672232181950878e-07, "loss": 11.0535, "step": 462850 }, { "epoch": 0.9350064844030915, "grad_norm": 283.65576171875, "learning_rate": 1.6713370921708049e-07, "loss": 20.9141, "step": 462860 }, { "epoch": 0.9350266850357754, "grad_norm": 240.0868682861328, "learning_rate": 1.6704422379387685e-07, "loss": 16.6079, "step": 462870 }, { "epoch": 0.9350468856684592, "grad_norm": 0.09903652966022491, "learning_rate": 1.669547619259143e-07, "loss": 10.4518, "step": 462880 }, { "epoch": 0.935067086301143, "grad_norm": 383.5288391113281, "learning_rate": 1.6686532361362805e-07, "loss": 14.665, "step": 462890 }, { "epoch": 0.9350872869338268, "grad_norm": 133.67431640625, "learning_rate": 1.6677590885745388e-07, "loss": 30.3162, "step": 462900 }, { "epoch": 0.9351074875665106, "grad_norm": 161.3779296875, "learning_rate": 1.6668651765782806e-07, "loss": 27.869, "step": 462910 }, { "epoch": 0.9351276881991945, "grad_norm": 499.52880859375, "learning_rate": 1.6659715001518583e-07, "loss": 20.6727, "step": 462920 }, { "epoch": 0.9351478888318782, "grad_norm": 500.2755126953125, "learning_rate": 1.665078059299624e-07, "loss": 11.5379, "step": 462930 }, { "epoch": 0.935168089464562, "grad_norm": 345.6598815917969, "learning_rate": 1.6641848540259353e-07, "loss": 28.3758, "step": 462940 }, { "epoch": 0.9351882900972458, "grad_norm": 91.43672943115234, "learning_rate": 1.6632918843351554e-07, "loss": 8.8927, "step": 462950 }, { "epoch": 0.9352084907299296, "grad_norm": 75.08840942382812, "learning_rate": 1.662399150231625e-07, "loss": 18.547, "step": 462960 }, { "epoch": 0.9352286913626134, "grad_norm": 301.5865478515625, "learning_rate": 1.6615066517196965e-07, "loss": 21.6605, "step": 462970 }, { "epoch": 0.9352488919952973, "grad_norm": 436.82568359375, "learning_rate": 1.6606143888037219e-07, "loss": 12.5314, "step": 462980 }, { "epoch": 0.9352690926279811, "grad_norm": 358.45697021484375, "learning_rate": 1.659722361488053e-07, "loss": 23.522, "step": 462990 }, { "epoch": 0.9352892932606649, "grad_norm": 677.9505004882812, "learning_rate": 1.6588305697770313e-07, "loss": 23.7936, "step": 463000 }, { "epoch": 0.9353094938933487, "grad_norm": 140.39315795898438, "learning_rate": 1.6579390136750086e-07, "loss": 30.5268, "step": 463010 }, { "epoch": 0.9353296945260325, "grad_norm": 391.122802734375, "learning_rate": 1.6570476931863256e-07, "loss": 16.1737, "step": 463020 }, { "epoch": 0.9353498951587164, "grad_norm": 328.2913818359375, "learning_rate": 1.656156608315329e-07, "loss": 22.6559, "step": 463030 }, { "epoch": 0.9353700957914002, "grad_norm": 246.04940795898438, "learning_rate": 1.65526575906636e-07, "loss": 13.7928, "step": 463040 }, { "epoch": 0.935390296424084, "grad_norm": 124.8285140991211, "learning_rate": 1.6543751454437708e-07, "loss": 14.0438, "step": 463050 }, { "epoch": 0.9354104970567678, "grad_norm": 247.01405334472656, "learning_rate": 1.6534847674518905e-07, "loss": 13.7093, "step": 463060 }, { "epoch": 0.9354306976894516, "grad_norm": 403.01171875, "learning_rate": 1.6525946250950553e-07, "loss": 14.5217, "step": 463070 }, { "epoch": 0.9354508983221355, "grad_norm": 196.5394744873047, "learning_rate": 1.651704718377617e-07, "loss": 12.1933, "step": 463080 }, { "epoch": 0.9354710989548193, "grad_norm": 820.8240356445312, "learning_rate": 1.650815047303894e-07, "loss": 23.4596, "step": 463090 }, { "epoch": 0.9354912995875031, "grad_norm": 45.13959503173828, "learning_rate": 1.6499256118782503e-07, "loss": 23.7143, "step": 463100 }, { "epoch": 0.9355115002201869, "grad_norm": 1102.887451171875, "learning_rate": 1.6490364121049984e-07, "loss": 23.7886, "step": 463110 }, { "epoch": 0.9355317008528707, "grad_norm": 180.792724609375, "learning_rate": 1.648147447988474e-07, "loss": 19.873, "step": 463120 }, { "epoch": 0.9355519014855546, "grad_norm": 553.9746704101562, "learning_rate": 1.6472587195330236e-07, "loss": 27.4306, "step": 463130 }, { "epoch": 0.9355721021182384, "grad_norm": 309.197509765625, "learning_rate": 1.6463702267429659e-07, "loss": 15.8393, "step": 463140 }, { "epoch": 0.9355923027509222, "grad_norm": 319.75726318359375, "learning_rate": 1.645481969622631e-07, "loss": 20.5699, "step": 463150 }, { "epoch": 0.935612503383606, "grad_norm": 260.9181213378906, "learning_rate": 1.644593948176354e-07, "loss": 23.9048, "step": 463160 }, { "epoch": 0.9356327040162898, "grad_norm": 283.957763671875, "learning_rate": 1.6437061624084704e-07, "loss": 15.1584, "step": 463170 }, { "epoch": 0.9356529046489737, "grad_norm": 216.19720458984375, "learning_rate": 1.6428186123232826e-07, "loss": 14.2853, "step": 463180 }, { "epoch": 0.9356731052816574, "grad_norm": 282.28033447265625, "learning_rate": 1.6419312979251368e-07, "loss": 26.9816, "step": 463190 }, { "epoch": 0.9356933059143412, "grad_norm": 547.8955688476562, "learning_rate": 1.6410442192183574e-07, "loss": 13.523, "step": 463200 }, { "epoch": 0.935713506547025, "grad_norm": 317.2925720214844, "learning_rate": 1.6401573762072631e-07, "loss": 10.4762, "step": 463210 }, { "epoch": 0.9357337071797088, "grad_norm": 226.99322509765625, "learning_rate": 1.6392707688961728e-07, "loss": 17.5634, "step": 463220 }, { "epoch": 0.9357539078123926, "grad_norm": 104.4507064819336, "learning_rate": 1.638384397289411e-07, "loss": 22.4312, "step": 463230 }, { "epoch": 0.9357741084450765, "grad_norm": 216.3778533935547, "learning_rate": 1.6374982613913072e-07, "loss": 17.19, "step": 463240 }, { "epoch": 0.9357943090777603, "grad_norm": 299.8779296875, "learning_rate": 1.6366123612061636e-07, "loss": 26.81, "step": 463250 }, { "epoch": 0.9358145097104441, "grad_norm": 167.1317596435547, "learning_rate": 1.635726696738299e-07, "loss": 19.6392, "step": 463260 }, { "epoch": 0.9358347103431279, "grad_norm": 415.87091064453125, "learning_rate": 1.6348412679920488e-07, "loss": 10.1537, "step": 463270 }, { "epoch": 0.9358549109758117, "grad_norm": 165.0482177734375, "learning_rate": 1.6339560749717154e-07, "loss": 9.7739, "step": 463280 }, { "epoch": 0.9358751116084956, "grad_norm": 236.4598388671875, "learning_rate": 1.633071117681606e-07, "loss": 14.7218, "step": 463290 }, { "epoch": 0.9358953122411794, "grad_norm": 644.8948974609375, "learning_rate": 1.6321863961260452e-07, "loss": 18.5992, "step": 463300 }, { "epoch": 0.9359155128738632, "grad_norm": 419.4942626953125, "learning_rate": 1.6313019103093463e-07, "loss": 16.4838, "step": 463310 }, { "epoch": 0.935935713506547, "grad_norm": 219.58193969726562, "learning_rate": 1.6304176602358056e-07, "loss": 13.8679, "step": 463320 }, { "epoch": 0.9359559141392308, "grad_norm": 338.3202819824219, "learning_rate": 1.6295336459097532e-07, "loss": 15.4373, "step": 463330 }, { "epoch": 0.9359761147719147, "grad_norm": 377.089111328125, "learning_rate": 1.62864986733548e-07, "loss": 14.0342, "step": 463340 }, { "epoch": 0.9359963154045985, "grad_norm": 299.72210693359375, "learning_rate": 1.6277663245173047e-07, "loss": 11.9394, "step": 463350 }, { "epoch": 0.9360165160372823, "grad_norm": 290.8778076171875, "learning_rate": 1.6268830174595242e-07, "loss": 14.5878, "step": 463360 }, { "epoch": 0.9360367166699661, "grad_norm": 491.1105651855469, "learning_rate": 1.6259999461664567e-07, "loss": 23.7471, "step": 463370 }, { "epoch": 0.93605691730265, "grad_norm": 42.898990631103516, "learning_rate": 1.6251171106423935e-07, "loss": 15.818, "step": 463380 }, { "epoch": 0.9360771179353338, "grad_norm": 205.88906860351562, "learning_rate": 1.6242345108916424e-07, "loss": 13.1447, "step": 463390 }, { "epoch": 0.9360973185680176, "grad_norm": 438.9242248535156, "learning_rate": 1.6233521469185054e-07, "loss": 22.0241, "step": 463400 }, { "epoch": 0.9361175192007014, "grad_norm": 303.1636962890625, "learning_rate": 1.6224700187272792e-07, "loss": 18.267, "step": 463410 }, { "epoch": 0.9361377198333852, "grad_norm": 295.252685546875, "learning_rate": 1.621588126322271e-07, "loss": 13.4704, "step": 463420 }, { "epoch": 0.936157920466069, "grad_norm": 1018.2053833007812, "learning_rate": 1.620706469707778e-07, "loss": 39.882, "step": 463430 }, { "epoch": 0.9361781210987528, "grad_norm": 461.65521240234375, "learning_rate": 1.619825048888085e-07, "loss": 12.3608, "step": 463440 }, { "epoch": 0.9361983217314366, "grad_norm": 297.8987121582031, "learning_rate": 1.618943863867506e-07, "loss": 23.6289, "step": 463450 }, { "epoch": 0.9362185223641204, "grad_norm": 34.65291213989258, "learning_rate": 1.6180629146503256e-07, "loss": 10.5408, "step": 463460 }, { "epoch": 0.9362387229968042, "grad_norm": 454.8813781738281, "learning_rate": 1.61718220124083e-07, "loss": 11.0426, "step": 463470 }, { "epoch": 0.936258923629488, "grad_norm": 0.6589401960372925, "learning_rate": 1.6163017236433265e-07, "loss": 17.0109, "step": 463480 }, { "epoch": 0.9362791242621719, "grad_norm": 688.1834106445312, "learning_rate": 1.6154214818621007e-07, "loss": 21.8056, "step": 463490 }, { "epoch": 0.9362993248948557, "grad_norm": 224.41502380371094, "learning_rate": 1.6145414759014433e-07, "loss": 13.6119, "step": 463500 }, { "epoch": 0.9363195255275395, "grad_norm": 252.64492797851562, "learning_rate": 1.6136617057656344e-07, "loss": 12.8251, "step": 463510 }, { "epoch": 0.9363397261602233, "grad_norm": 197.5618896484375, "learning_rate": 1.6127821714589763e-07, "loss": 14.6395, "step": 463520 }, { "epoch": 0.9363599267929071, "grad_norm": 646.2914428710938, "learning_rate": 1.6119028729857545e-07, "loss": 14.4647, "step": 463530 }, { "epoch": 0.936380127425591, "grad_norm": 217.630126953125, "learning_rate": 1.6110238103502374e-07, "loss": 21.2792, "step": 463540 }, { "epoch": 0.9364003280582748, "grad_norm": 221.92062377929688, "learning_rate": 1.6101449835567273e-07, "loss": 11.9105, "step": 463550 }, { "epoch": 0.9364205286909586, "grad_norm": 686.2666015625, "learning_rate": 1.6092663926094987e-07, "loss": 21.9448, "step": 463560 }, { "epoch": 0.9364407293236424, "grad_norm": 395.3066101074219, "learning_rate": 1.6083880375128424e-07, "loss": 17.1408, "step": 463570 }, { "epoch": 0.9364609299563262, "grad_norm": 663.0675659179688, "learning_rate": 1.6075099182710274e-07, "loss": 34.2614, "step": 463580 }, { "epoch": 0.9364811305890101, "grad_norm": 228.4241943359375, "learning_rate": 1.6066320348883448e-07, "loss": 20.2467, "step": 463590 }, { "epoch": 0.9365013312216939, "grad_norm": 198.4999237060547, "learning_rate": 1.6057543873690685e-07, "loss": 11.6937, "step": 463600 }, { "epoch": 0.9365215318543777, "grad_norm": 1286.03173828125, "learning_rate": 1.604876975717473e-07, "loss": 17.0578, "step": 463610 }, { "epoch": 0.9365417324870615, "grad_norm": 179.70726013183594, "learning_rate": 1.6039997999378388e-07, "loss": 9.2406, "step": 463620 }, { "epoch": 0.9365619331197453, "grad_norm": 201.22686767578125, "learning_rate": 1.603122860034434e-07, "loss": 18.0075, "step": 463630 }, { "epoch": 0.9365821337524292, "grad_norm": 345.80712890625, "learning_rate": 1.6022461560115498e-07, "loss": 22.7832, "step": 463640 }, { "epoch": 0.936602334385113, "grad_norm": 495.47772216796875, "learning_rate": 1.6013696878734385e-07, "loss": 10.4317, "step": 463650 }, { "epoch": 0.9366225350177968, "grad_norm": 253.85330200195312, "learning_rate": 1.6004934556243857e-07, "loss": 8.4572, "step": 463660 }, { "epoch": 0.9366427356504806, "grad_norm": 395.35296630859375, "learning_rate": 1.5996174592686598e-07, "loss": 31.3012, "step": 463670 }, { "epoch": 0.9366629362831644, "grad_norm": 491.93597412109375, "learning_rate": 1.5987416988105188e-07, "loss": 25.3924, "step": 463680 }, { "epoch": 0.9366831369158483, "grad_norm": 1044.8165283203125, "learning_rate": 1.5978661742542477e-07, "loss": 23.4883, "step": 463690 }, { "epoch": 0.936703337548532, "grad_norm": 443.22705078125, "learning_rate": 1.596990885604105e-07, "loss": 12.8588, "step": 463700 }, { "epoch": 0.9367235381812158, "grad_norm": 350.1383972167969, "learning_rate": 1.596115832864359e-07, "loss": 35.5491, "step": 463710 }, { "epoch": 0.9367437388138996, "grad_norm": 461.406005859375, "learning_rate": 1.5952410160392784e-07, "loss": 23.4766, "step": 463720 }, { "epoch": 0.9367639394465834, "grad_norm": 426.9395751953125, "learning_rate": 1.59436643513311e-07, "loss": 31.4879, "step": 463730 }, { "epoch": 0.9367841400792672, "grad_norm": 142.09671020507812, "learning_rate": 1.5934920901501395e-07, "loss": 12.2829, "step": 463740 }, { "epoch": 0.9368043407119511, "grad_norm": 467.48260498046875, "learning_rate": 1.5926179810946185e-07, "loss": 20.8934, "step": 463750 }, { "epoch": 0.9368245413446349, "grad_norm": 128.65975952148438, "learning_rate": 1.5917441079707942e-07, "loss": 17.7847, "step": 463760 }, { "epoch": 0.9368447419773187, "grad_norm": 333.0611267089844, "learning_rate": 1.5908704707829458e-07, "loss": 10.7977, "step": 463770 }, { "epoch": 0.9368649426100025, "grad_norm": 593.1852416992188, "learning_rate": 1.5899970695353262e-07, "loss": 17.1089, "step": 463780 }, { "epoch": 0.9368851432426863, "grad_norm": 347.4171142578125, "learning_rate": 1.5891239042321871e-07, "loss": 9.5736, "step": 463790 }, { "epoch": 0.9369053438753702, "grad_norm": 362.99835205078125, "learning_rate": 1.5882509748777809e-07, "loss": 23.4353, "step": 463800 }, { "epoch": 0.936925544508054, "grad_norm": 219.54135131835938, "learning_rate": 1.5873782814763762e-07, "loss": 38.4928, "step": 463810 }, { "epoch": 0.9369457451407378, "grad_norm": 166.92816162109375, "learning_rate": 1.586505824032214e-07, "loss": 27.4917, "step": 463820 }, { "epoch": 0.9369659457734216, "grad_norm": 479.5160827636719, "learning_rate": 1.5856336025495466e-07, "loss": 9.695, "step": 463830 }, { "epoch": 0.9369861464061054, "grad_norm": 526.8687133789062, "learning_rate": 1.5847616170326318e-07, "loss": 24.6402, "step": 463840 }, { "epoch": 0.9370063470387893, "grad_norm": 471.7763366699219, "learning_rate": 1.5838898674857273e-07, "loss": 13.5469, "step": 463850 }, { "epoch": 0.9370265476714731, "grad_norm": 414.0970458984375, "learning_rate": 1.5830183539130574e-07, "loss": 11.267, "step": 463860 }, { "epoch": 0.9370467483041569, "grad_norm": 319.959716796875, "learning_rate": 1.582147076318885e-07, "loss": 13.8998, "step": 463870 }, { "epoch": 0.9370669489368407, "grad_norm": 175.54379272460938, "learning_rate": 1.581276034707463e-07, "loss": 20.5544, "step": 463880 }, { "epoch": 0.9370871495695245, "grad_norm": 416.506591796875, "learning_rate": 1.5804052290830262e-07, "loss": 16.798, "step": 463890 }, { "epoch": 0.9371073502022084, "grad_norm": 728.6934814453125, "learning_rate": 1.5795346594498162e-07, "loss": 19.4924, "step": 463900 }, { "epoch": 0.9371275508348922, "grad_norm": 333.4419250488281, "learning_rate": 1.5786643258120905e-07, "loss": 18.2137, "step": 463910 }, { "epoch": 0.937147751467576, "grad_norm": 11.038262367248535, "learning_rate": 1.5777942281740789e-07, "loss": 18.4305, "step": 463920 }, { "epoch": 0.9371679521002598, "grad_norm": 327.39752197265625, "learning_rate": 1.5769243665400224e-07, "loss": 28.4707, "step": 463930 }, { "epoch": 0.9371881527329436, "grad_norm": 425.0711364746094, "learning_rate": 1.5760547409141626e-07, "loss": 19.2452, "step": 463940 }, { "epoch": 0.9372083533656275, "grad_norm": 120.11178588867188, "learning_rate": 1.5751853513007454e-07, "loss": 12.1289, "step": 463950 }, { "epoch": 0.9372285539983112, "grad_norm": 14.883577346801758, "learning_rate": 1.5743161977039954e-07, "loss": 17.47, "step": 463960 }, { "epoch": 0.937248754630995, "grad_norm": 333.3983154296875, "learning_rate": 1.5734472801281543e-07, "loss": 15.0493, "step": 463970 }, { "epoch": 0.9372689552636788, "grad_norm": 384.79547119140625, "learning_rate": 1.5725785985774623e-07, "loss": 17.9263, "step": 463980 }, { "epoch": 0.9372891558963626, "grad_norm": 323.10943603515625, "learning_rate": 1.5717101530561497e-07, "loss": 10.0074, "step": 463990 }, { "epoch": 0.9373093565290465, "grad_norm": 400.6250305175781, "learning_rate": 1.5708419435684463e-07, "loss": 21.9798, "step": 464000 }, { "epoch": 0.9373295571617303, "grad_norm": 287.2420349121094, "learning_rate": 1.5699739701185878e-07, "loss": 40.5435, "step": 464010 }, { "epoch": 0.9373497577944141, "grad_norm": 474.51348876953125, "learning_rate": 1.5691062327107932e-07, "loss": 13.2892, "step": 464020 }, { "epoch": 0.9373699584270979, "grad_norm": 319.4916687011719, "learning_rate": 1.5682387313493086e-07, "loss": 15.7933, "step": 464030 }, { "epoch": 0.9373901590597817, "grad_norm": 614.4664306640625, "learning_rate": 1.5673714660383532e-07, "loss": 17.009, "step": 464040 }, { "epoch": 0.9374103596924656, "grad_norm": 623.9253540039062, "learning_rate": 1.5665044367821513e-07, "loss": 17.0484, "step": 464050 }, { "epoch": 0.9374305603251494, "grad_norm": 486.2925720214844, "learning_rate": 1.5656376435849385e-07, "loss": 24.1836, "step": 464060 }, { "epoch": 0.9374507609578332, "grad_norm": 343.29730224609375, "learning_rate": 1.5647710864509336e-07, "loss": 18.7584, "step": 464070 }, { "epoch": 0.937470961590517, "grad_norm": 500.9264221191406, "learning_rate": 1.5639047653843554e-07, "loss": 15.1472, "step": 464080 }, { "epoch": 0.9374911622232008, "grad_norm": 171.4511260986328, "learning_rate": 1.563038680389428e-07, "loss": 10.5716, "step": 464090 }, { "epoch": 0.9375113628558847, "grad_norm": 52.38894271850586, "learning_rate": 1.5621728314703822e-07, "loss": 17.9345, "step": 464100 }, { "epoch": 0.9375315634885685, "grad_norm": 151.2301788330078, "learning_rate": 1.5613072186314304e-07, "loss": 13.4225, "step": 464110 }, { "epoch": 0.9375517641212523, "grad_norm": 661.763427734375, "learning_rate": 1.560441841876792e-07, "loss": 24.8922, "step": 464120 }, { "epoch": 0.9375719647539361, "grad_norm": 323.34417724609375, "learning_rate": 1.5595767012106856e-07, "loss": 20.4487, "step": 464130 }, { "epoch": 0.9375921653866199, "grad_norm": 433.3529052734375, "learning_rate": 1.5587117966373244e-07, "loss": 10.2313, "step": 464140 }, { "epoch": 0.9376123660193038, "grad_norm": 414.6815185546875, "learning_rate": 1.5578471281609274e-07, "loss": 17.6942, "step": 464150 }, { "epoch": 0.9376325666519876, "grad_norm": 320.7261047363281, "learning_rate": 1.5569826957857027e-07, "loss": 18.6855, "step": 464160 }, { "epoch": 0.9376527672846714, "grad_norm": 142.53854370117188, "learning_rate": 1.556118499515885e-07, "loss": 19.6049, "step": 464170 }, { "epoch": 0.9376729679173552, "grad_norm": 573.0814819335938, "learning_rate": 1.555254539355655e-07, "loss": 11.8, "step": 464180 }, { "epoch": 0.937693168550039, "grad_norm": 382.5942687988281, "learning_rate": 1.5543908153092424e-07, "loss": 28.2042, "step": 464190 }, { "epoch": 0.9377133691827229, "grad_norm": 489.61602783203125, "learning_rate": 1.553527327380855e-07, "loss": 27.2857, "step": 464200 }, { "epoch": 0.9377335698154066, "grad_norm": 398.268310546875, "learning_rate": 1.5526640755747003e-07, "loss": 18.6053, "step": 464210 }, { "epoch": 0.9377537704480904, "grad_norm": 689.2642822265625, "learning_rate": 1.5518010598949807e-07, "loss": 17.124, "step": 464220 }, { "epoch": 0.9377739710807742, "grad_norm": 391.16351318359375, "learning_rate": 1.5509382803459149e-07, "loss": 16.5513, "step": 464230 }, { "epoch": 0.937794171713458, "grad_norm": 324.4962463378906, "learning_rate": 1.5500757369316888e-07, "loss": 26.283, "step": 464240 }, { "epoch": 0.9378143723461418, "grad_norm": 243.7255859375, "learning_rate": 1.5492134296565264e-07, "loss": 54.337, "step": 464250 }, { "epoch": 0.9378345729788257, "grad_norm": 180.85537719726562, "learning_rate": 1.5483513585246135e-07, "loss": 19.1971, "step": 464260 }, { "epoch": 0.9378547736115095, "grad_norm": 870.2200927734375, "learning_rate": 1.5474895235401688e-07, "loss": 21.2145, "step": 464270 }, { "epoch": 0.9378749742441933, "grad_norm": 397.47149658203125, "learning_rate": 1.546627924707378e-07, "loss": 19.5186, "step": 464280 }, { "epoch": 0.9378951748768771, "grad_norm": 120.85033416748047, "learning_rate": 1.545766562030443e-07, "loss": 21.5627, "step": 464290 }, { "epoch": 0.937915375509561, "grad_norm": 370.54620361328125, "learning_rate": 1.5449054355135718e-07, "loss": 20.4738, "step": 464300 }, { "epoch": 0.9379355761422448, "grad_norm": 237.71800231933594, "learning_rate": 1.54404454516095e-07, "loss": 21.7886, "step": 464310 }, { "epoch": 0.9379557767749286, "grad_norm": 425.63330078125, "learning_rate": 1.5431838909767793e-07, "loss": 18.739, "step": 464320 }, { "epoch": 0.9379759774076124, "grad_norm": 454.5373840332031, "learning_rate": 1.542323472965257e-07, "loss": 15.3442, "step": 464330 }, { "epoch": 0.9379961780402962, "grad_norm": 304.43927001953125, "learning_rate": 1.5414632911305683e-07, "loss": 18.2253, "step": 464340 }, { "epoch": 0.93801637867298, "grad_norm": 516.6799926757812, "learning_rate": 1.5406033454769154e-07, "loss": 11.4711, "step": 464350 }, { "epoch": 0.9380365793056639, "grad_norm": 366.4046325683594, "learning_rate": 1.5397436360084784e-07, "loss": 18.1866, "step": 464360 }, { "epoch": 0.9380567799383477, "grad_norm": 405.16986083984375, "learning_rate": 1.5388841627294536e-07, "loss": 25.2115, "step": 464370 }, { "epoch": 0.9380769805710315, "grad_norm": 702.1383666992188, "learning_rate": 1.5380249256440272e-07, "loss": 23.2144, "step": 464380 }, { "epoch": 0.9380971812037153, "grad_norm": 471.3887939453125, "learning_rate": 1.5371659247564063e-07, "loss": 19.3548, "step": 464390 }, { "epoch": 0.9381173818363991, "grad_norm": 681.3546752929688, "learning_rate": 1.5363071600707435e-07, "loss": 18.2053, "step": 464400 }, { "epoch": 0.938137582469083, "grad_norm": 247.7911834716797, "learning_rate": 1.5354486315912408e-07, "loss": 18.0673, "step": 464410 }, { "epoch": 0.9381577831017668, "grad_norm": 511.078125, "learning_rate": 1.534590339322095e-07, "loss": 12.9879, "step": 464420 }, { "epoch": 0.9381779837344506, "grad_norm": 401.2061767578125, "learning_rate": 1.533732283267475e-07, "loss": 24.2374, "step": 464430 }, { "epoch": 0.9381981843671344, "grad_norm": 348.04296875, "learning_rate": 1.532874463431555e-07, "loss": 10.0772, "step": 464440 }, { "epoch": 0.9382183849998182, "grad_norm": 193.95880126953125, "learning_rate": 1.532016879818532e-07, "loss": 17.9981, "step": 464450 }, { "epoch": 0.9382385856325021, "grad_norm": 133.47679138183594, "learning_rate": 1.5311595324325912e-07, "loss": 14.5344, "step": 464460 }, { "epoch": 0.9382587862651858, "grad_norm": 349.5655212402344, "learning_rate": 1.5303024212778905e-07, "loss": 25.1919, "step": 464470 }, { "epoch": 0.9382789868978696, "grad_norm": 495.6358947753906, "learning_rate": 1.5294455463586157e-07, "loss": 11.76, "step": 464480 }, { "epoch": 0.9382991875305534, "grad_norm": 852.6415405273438, "learning_rate": 1.528588907678946e-07, "loss": 22.6186, "step": 464490 }, { "epoch": 0.9383193881632372, "grad_norm": 338.9753723144531, "learning_rate": 1.5277325052430569e-07, "loss": 10.316, "step": 464500 }, { "epoch": 0.938339588795921, "grad_norm": 182.22496032714844, "learning_rate": 1.5268763390551167e-07, "loss": 8.8562, "step": 464510 }, { "epoch": 0.9383597894286049, "grad_norm": 697.7536010742188, "learning_rate": 1.526020409119311e-07, "loss": 16.601, "step": 464520 }, { "epoch": 0.9383799900612887, "grad_norm": 471.80670166015625, "learning_rate": 1.5251647154397975e-07, "loss": 17.9513, "step": 464530 }, { "epoch": 0.9384001906939725, "grad_norm": 438.54632568359375, "learning_rate": 1.5243092580207507e-07, "loss": 21.5101, "step": 464540 }, { "epoch": 0.9384203913266563, "grad_norm": 160.27978515625, "learning_rate": 1.5234540368663343e-07, "loss": 13.6464, "step": 464550 }, { "epoch": 0.9384405919593402, "grad_norm": 483.20751953125, "learning_rate": 1.5225990519807332e-07, "loss": 12.8111, "step": 464560 }, { "epoch": 0.938460792592024, "grad_norm": 539.7047119140625, "learning_rate": 1.5217443033681058e-07, "loss": 20.0048, "step": 464570 }, { "epoch": 0.9384809932247078, "grad_norm": 290.4250793457031, "learning_rate": 1.5208897910326092e-07, "loss": 13.4598, "step": 464580 }, { "epoch": 0.9385011938573916, "grad_norm": 410.6996765136719, "learning_rate": 1.520035514978424e-07, "loss": 14.0252, "step": 464590 }, { "epoch": 0.9385213944900754, "grad_norm": 470.7203369140625, "learning_rate": 1.5191814752097024e-07, "loss": 10.0053, "step": 464600 }, { "epoch": 0.9385415951227593, "grad_norm": 427.7999572753906, "learning_rate": 1.5183276717306072e-07, "loss": 16.5089, "step": 464610 }, { "epoch": 0.9385617957554431, "grad_norm": 862.2379150390625, "learning_rate": 1.517474104545308e-07, "loss": 13.5312, "step": 464620 }, { "epoch": 0.9385819963881269, "grad_norm": 554.513916015625, "learning_rate": 1.5166207736579564e-07, "loss": 24.5063, "step": 464630 }, { "epoch": 0.9386021970208107, "grad_norm": 620.202880859375, "learning_rate": 1.515767679072716e-07, "loss": 13.7802, "step": 464640 }, { "epoch": 0.9386223976534945, "grad_norm": 556.294921875, "learning_rate": 1.5149148207937447e-07, "loss": 20.9563, "step": 464650 }, { "epoch": 0.9386425982861784, "grad_norm": 317.4458923339844, "learning_rate": 1.5140621988251947e-07, "loss": 5.6833, "step": 464660 }, { "epoch": 0.9386627989188622, "grad_norm": 381.5221252441406, "learning_rate": 1.513209813171229e-07, "loss": 15.0624, "step": 464670 }, { "epoch": 0.938682999551546, "grad_norm": 317.9993591308594, "learning_rate": 1.5123576638360004e-07, "loss": 16.5072, "step": 464680 }, { "epoch": 0.9387032001842298, "grad_norm": 118.84877014160156, "learning_rate": 1.5115057508236498e-07, "loss": 18.7588, "step": 464690 }, { "epoch": 0.9387234008169136, "grad_norm": 1289.9464111328125, "learning_rate": 1.5106540741383402e-07, "loss": 25.8103, "step": 464700 }, { "epoch": 0.9387436014495975, "grad_norm": 153.5814666748047, "learning_rate": 1.5098026337842297e-07, "loss": 24.5178, "step": 464710 }, { "epoch": 0.9387638020822812, "grad_norm": 646.8268432617188, "learning_rate": 1.5089514297654594e-07, "loss": 22.356, "step": 464720 }, { "epoch": 0.938784002714965, "grad_norm": 461.1291809082031, "learning_rate": 1.5081004620861706e-07, "loss": 18.6647, "step": 464730 }, { "epoch": 0.9388042033476488, "grad_norm": 489.087890625, "learning_rate": 1.5072497307505263e-07, "loss": 17.1713, "step": 464740 }, { "epoch": 0.9388244039803326, "grad_norm": 315.9326171875, "learning_rate": 1.5063992357626623e-07, "loss": 35.0541, "step": 464750 }, { "epoch": 0.9388446046130164, "grad_norm": 529.4815673828125, "learning_rate": 1.5055489771267252e-07, "loss": 24.0619, "step": 464760 }, { "epoch": 0.9388648052457003, "grad_norm": 470.8222961425781, "learning_rate": 1.5046989548468616e-07, "loss": 14.294, "step": 464770 }, { "epoch": 0.9388850058783841, "grad_norm": 345.0228271484375, "learning_rate": 1.503849168927224e-07, "loss": 26.5148, "step": 464780 }, { "epoch": 0.9389052065110679, "grad_norm": 425.7090148925781, "learning_rate": 1.502999619371931e-07, "loss": 23.6067, "step": 464790 }, { "epoch": 0.9389254071437517, "grad_norm": 333.5506286621094, "learning_rate": 1.502150306185135e-07, "loss": 18.3616, "step": 464800 }, { "epoch": 0.9389456077764355, "grad_norm": 141.08529663085938, "learning_rate": 1.5013012293709828e-07, "loss": 10.1888, "step": 464810 }, { "epoch": 0.9389658084091194, "grad_norm": 280.8174743652344, "learning_rate": 1.5004523889336042e-07, "loss": 18.8, "step": 464820 }, { "epoch": 0.9389860090418032, "grad_norm": 197.2073516845703, "learning_rate": 1.499603784877135e-07, "loss": 11.635, "step": 464830 }, { "epoch": 0.939006209674487, "grad_norm": 554.6068725585938, "learning_rate": 1.4987554172057216e-07, "loss": 14.0745, "step": 464840 }, { "epoch": 0.9390264103071708, "grad_norm": 622.3172607421875, "learning_rate": 1.497907285923489e-07, "loss": 17.9825, "step": 464850 }, { "epoch": 0.9390466109398546, "grad_norm": 118.02326965332031, "learning_rate": 1.4970593910345665e-07, "loss": 8.7162, "step": 464860 }, { "epoch": 0.9390668115725385, "grad_norm": 379.7081604003906, "learning_rate": 1.4962117325431013e-07, "loss": 24.6652, "step": 464870 }, { "epoch": 0.9390870122052223, "grad_norm": 115.3721923828125, "learning_rate": 1.495364310453218e-07, "loss": 25.8597, "step": 464880 }, { "epoch": 0.9391072128379061, "grad_norm": 453.2753601074219, "learning_rate": 1.494517124769046e-07, "loss": 11.1597, "step": 464890 }, { "epoch": 0.9391274134705899, "grad_norm": 118.36872100830078, "learning_rate": 1.4936701754947104e-07, "loss": 12.8013, "step": 464900 }, { "epoch": 0.9391476141032737, "grad_norm": 366.5028076171875, "learning_rate": 1.4928234626343464e-07, "loss": 9.0016, "step": 464910 }, { "epoch": 0.9391678147359576, "grad_norm": 417.77398681640625, "learning_rate": 1.4919769861920785e-07, "loss": 14.6703, "step": 464920 }, { "epoch": 0.9391880153686414, "grad_norm": 295.6487121582031, "learning_rate": 1.491130746172026e-07, "loss": 16.5016, "step": 464930 }, { "epoch": 0.9392082160013252, "grad_norm": 459.5295104980469, "learning_rate": 1.490284742578324e-07, "loss": 30.716, "step": 464940 }, { "epoch": 0.939228416634009, "grad_norm": 387.3690185546875, "learning_rate": 1.4894389754150862e-07, "loss": 29.2588, "step": 464950 }, { "epoch": 0.9392486172666928, "grad_norm": 396.13812255859375, "learning_rate": 1.4885934446864425e-07, "loss": 19.497, "step": 464960 }, { "epoch": 0.9392688178993767, "grad_norm": 133.84405517578125, "learning_rate": 1.487748150396512e-07, "loss": 13.2698, "step": 464970 }, { "epoch": 0.9392890185320604, "grad_norm": 130.9963836669922, "learning_rate": 1.4869030925494077e-07, "loss": 22.2032, "step": 464980 }, { "epoch": 0.9393092191647442, "grad_norm": 646.0631103515625, "learning_rate": 1.4860582711492544e-07, "loss": 25.5472, "step": 464990 }, { "epoch": 0.939329419797428, "grad_norm": 182.46316528320312, "learning_rate": 1.4852136862001766e-07, "loss": 25.9811, "step": 465000 }, { "epoch": 0.9393496204301118, "grad_norm": 329.9434509277344, "learning_rate": 1.4843693377062818e-07, "loss": 17.1915, "step": 465010 }, { "epoch": 0.9393698210627957, "grad_norm": 404.30364990234375, "learning_rate": 1.483525225671678e-07, "loss": 18.5547, "step": 465020 }, { "epoch": 0.9393900216954795, "grad_norm": 116.88551330566406, "learning_rate": 1.4826813501004954e-07, "loss": 15.3164, "step": 465030 }, { "epoch": 0.9394102223281633, "grad_norm": 680.893798828125, "learning_rate": 1.4818377109968417e-07, "loss": 18.4572, "step": 465040 }, { "epoch": 0.9394304229608471, "grad_norm": 601.853515625, "learning_rate": 1.4809943083648194e-07, "loss": 16.359, "step": 465050 }, { "epoch": 0.9394506235935309, "grad_norm": 455.3320617675781, "learning_rate": 1.480151142208547e-07, "loss": 22.9655, "step": 465060 }, { "epoch": 0.9394708242262148, "grad_norm": 201.93214416503906, "learning_rate": 1.4793082125321435e-07, "loss": 20.6204, "step": 465070 }, { "epoch": 0.9394910248588986, "grad_norm": 148.03668212890625, "learning_rate": 1.4784655193396947e-07, "loss": 16.3354, "step": 465080 }, { "epoch": 0.9395112254915824, "grad_norm": 234.11648559570312, "learning_rate": 1.4776230626353193e-07, "loss": 16.4417, "step": 465090 }, { "epoch": 0.9395314261242662, "grad_norm": 346.0733642578125, "learning_rate": 1.4767808424231312e-07, "loss": 12.3764, "step": 465100 }, { "epoch": 0.93955162675695, "grad_norm": 55.60728454589844, "learning_rate": 1.4759388587072266e-07, "loss": 18.4479, "step": 465110 }, { "epoch": 0.9395718273896339, "grad_norm": 111.06137084960938, "learning_rate": 1.475097111491708e-07, "loss": 11.6848, "step": 465120 }, { "epoch": 0.9395920280223177, "grad_norm": 300.3351135253906, "learning_rate": 1.474255600780683e-07, "loss": 13.9835, "step": 465130 }, { "epoch": 0.9396122286550015, "grad_norm": 363.2767028808594, "learning_rate": 1.473414326578254e-07, "loss": 9.6612, "step": 465140 }, { "epoch": 0.9396324292876853, "grad_norm": 927.3482055664062, "learning_rate": 1.4725732888885126e-07, "loss": 20.8591, "step": 465150 }, { "epoch": 0.9396526299203691, "grad_norm": 472.299072265625, "learning_rate": 1.4717324877155603e-07, "loss": 6.7122, "step": 465160 }, { "epoch": 0.939672830553053, "grad_norm": 161.1034698486328, "learning_rate": 1.4708919230635054e-07, "loss": 14.2039, "step": 465170 }, { "epoch": 0.9396930311857368, "grad_norm": 361.7558288574219, "learning_rate": 1.4700515949364337e-07, "loss": 28.8626, "step": 465180 }, { "epoch": 0.9397132318184206, "grad_norm": 246.94151306152344, "learning_rate": 1.4692115033384468e-07, "loss": 25.2142, "step": 465190 }, { "epoch": 0.9397334324511044, "grad_norm": 314.4781494140625, "learning_rate": 1.4683716482736364e-07, "loss": 21.4019, "step": 465200 }, { "epoch": 0.9397536330837882, "grad_norm": 495.0483703613281, "learning_rate": 1.4675320297460994e-07, "loss": 22.743, "step": 465210 }, { "epoch": 0.939773833716472, "grad_norm": 0.047875095158815384, "learning_rate": 1.4666926477599153e-07, "loss": 9.238, "step": 465220 }, { "epoch": 0.9397940343491558, "grad_norm": 3.2310094833374023, "learning_rate": 1.4658535023191922e-07, "loss": 18.7415, "step": 465230 }, { "epoch": 0.9398142349818396, "grad_norm": 279.0162353515625, "learning_rate": 1.4650145934280103e-07, "loss": 25.614, "step": 465240 }, { "epoch": 0.9398344356145234, "grad_norm": 96.9708023071289, "learning_rate": 1.4641759210904605e-07, "loss": 16.596, "step": 465250 }, { "epoch": 0.9398546362472072, "grad_norm": 684.9998779296875, "learning_rate": 1.463337485310634e-07, "loss": 18.1348, "step": 465260 }, { "epoch": 0.939874836879891, "grad_norm": 10.25330924987793, "learning_rate": 1.4624992860926112e-07, "loss": 12.523, "step": 465270 }, { "epoch": 0.9398950375125749, "grad_norm": 2057.43212890625, "learning_rate": 1.461661323440483e-07, "loss": 37.1617, "step": 465280 }, { "epoch": 0.9399152381452587, "grad_norm": 313.6214904785156, "learning_rate": 1.4608235973583296e-07, "loss": 16.6247, "step": 465290 }, { "epoch": 0.9399354387779425, "grad_norm": 133.1804656982422, "learning_rate": 1.459986107850231e-07, "loss": 29.5971, "step": 465300 }, { "epoch": 0.9399556394106263, "grad_norm": 0.5061958432197571, "learning_rate": 1.4591488549202725e-07, "loss": 21.5198, "step": 465310 }, { "epoch": 0.9399758400433101, "grad_norm": 390.5429992675781, "learning_rate": 1.4583118385725402e-07, "loss": 10.8836, "step": 465320 }, { "epoch": 0.939996040675994, "grad_norm": 153.8983917236328, "learning_rate": 1.4574750588111085e-07, "loss": 10.1329, "step": 465330 }, { "epoch": 0.9400162413086778, "grad_norm": 102.71835327148438, "learning_rate": 1.4566385156400463e-07, "loss": 19.8331, "step": 465340 }, { "epoch": 0.9400364419413616, "grad_norm": 165.04196166992188, "learning_rate": 1.4558022090634504e-07, "loss": 9.5658, "step": 465350 }, { "epoch": 0.9400566425740454, "grad_norm": 144.1915283203125, "learning_rate": 1.4549661390853897e-07, "loss": 22.7668, "step": 465360 }, { "epoch": 0.9400768432067292, "grad_norm": 269.35174560546875, "learning_rate": 1.4541303057099275e-07, "loss": 17.217, "step": 465370 }, { "epoch": 0.9400970438394131, "grad_norm": 341.9735412597656, "learning_rate": 1.4532947089411443e-07, "loss": 14.102, "step": 465380 }, { "epoch": 0.9401172444720969, "grad_norm": 342.3262939453125, "learning_rate": 1.452459348783125e-07, "loss": 14.8285, "step": 465390 }, { "epoch": 0.9401374451047807, "grad_norm": 195.041748046875, "learning_rate": 1.4516242252399227e-07, "loss": 21.6281, "step": 465400 }, { "epoch": 0.9401576457374645, "grad_norm": 653.010986328125, "learning_rate": 1.450789338315617e-07, "loss": 16.6789, "step": 465410 }, { "epoch": 0.9401778463701483, "grad_norm": 429.4823913574219, "learning_rate": 1.4499546880142823e-07, "loss": 20.1362, "step": 465420 }, { "epoch": 0.9401980470028322, "grad_norm": 472.930419921875, "learning_rate": 1.4491202743399767e-07, "loss": 16.6625, "step": 465430 }, { "epoch": 0.940218247635516, "grad_norm": 598.2315063476562, "learning_rate": 1.448286097296764e-07, "loss": 22.673, "step": 465440 }, { "epoch": 0.9402384482681998, "grad_norm": 560.1913452148438, "learning_rate": 1.4474521568887178e-07, "loss": 20.8818, "step": 465450 }, { "epoch": 0.9402586489008836, "grad_norm": 308.62799072265625, "learning_rate": 1.4466184531199135e-07, "loss": 10.2614, "step": 465460 }, { "epoch": 0.9402788495335674, "grad_norm": 633.8780517578125, "learning_rate": 1.4457849859943862e-07, "loss": 20.54, "step": 465470 }, { "epoch": 0.9402990501662513, "grad_norm": 394.0419616699219, "learning_rate": 1.4449517555162163e-07, "loss": 11.4615, "step": 465480 }, { "epoch": 0.940319250798935, "grad_norm": 340.50732421875, "learning_rate": 1.4441187616894724e-07, "loss": 16.5588, "step": 465490 }, { "epoch": 0.9403394514316188, "grad_norm": 202.9910125732422, "learning_rate": 1.4432860045182019e-07, "loss": 27.8608, "step": 465500 }, { "epoch": 0.9403596520643026, "grad_norm": 115.48960876464844, "learning_rate": 1.4424534840064563e-07, "loss": 16.095, "step": 465510 }, { "epoch": 0.9403798526969864, "grad_norm": 159.15509033203125, "learning_rate": 1.4416212001583163e-07, "loss": 13.0288, "step": 465520 }, { "epoch": 0.9404000533296702, "grad_norm": 198.47335815429688, "learning_rate": 1.4407891529778172e-07, "loss": 11.6673, "step": 465530 }, { "epoch": 0.9404202539623541, "grad_norm": 702.14794921875, "learning_rate": 1.4399573424690227e-07, "loss": 26.2657, "step": 465540 }, { "epoch": 0.9404404545950379, "grad_norm": 347.3752746582031, "learning_rate": 1.4391257686359906e-07, "loss": 21.5979, "step": 465550 }, { "epoch": 0.9404606552277217, "grad_norm": 139.51553344726562, "learning_rate": 1.438294431482762e-07, "loss": 27.9939, "step": 465560 }, { "epoch": 0.9404808558604055, "grad_norm": 472.1897277832031, "learning_rate": 1.4374633310134057e-07, "loss": 16.1521, "step": 465570 }, { "epoch": 0.9405010564930893, "grad_norm": 373.1190490722656, "learning_rate": 1.4366324672319575e-07, "loss": 27.0018, "step": 465580 }, { "epoch": 0.9405212571257732, "grad_norm": 204.26011657714844, "learning_rate": 1.43580184014247e-07, "loss": 18.4251, "step": 465590 }, { "epoch": 0.940541457758457, "grad_norm": 159.9972686767578, "learning_rate": 1.4349714497490009e-07, "loss": 11.4422, "step": 465600 }, { "epoch": 0.9405616583911408, "grad_norm": 307.0250549316406, "learning_rate": 1.4341412960555855e-07, "loss": 15.6385, "step": 465610 }, { "epoch": 0.9405818590238246, "grad_norm": 491.42803955078125, "learning_rate": 1.4333113790662822e-07, "loss": 18.4737, "step": 465620 }, { "epoch": 0.9406020596565084, "grad_norm": 243.17962646484375, "learning_rate": 1.432481698785121e-07, "loss": 4.4355, "step": 465630 }, { "epoch": 0.9406222602891923, "grad_norm": 17.453372955322266, "learning_rate": 1.4316522552161593e-07, "loss": 18.5871, "step": 465640 }, { "epoch": 0.9406424609218761, "grad_norm": 4.891506671905518, "learning_rate": 1.4308230483634334e-07, "loss": 28.1658, "step": 465650 }, { "epoch": 0.9406626615545599, "grad_norm": 328.7502136230469, "learning_rate": 1.4299940782309785e-07, "loss": 12.0737, "step": 465660 }, { "epoch": 0.9406828621872437, "grad_norm": 356.3871765136719, "learning_rate": 1.4291653448228416e-07, "loss": 14.3334, "step": 465670 }, { "epoch": 0.9407030628199275, "grad_norm": 29.75800132751465, "learning_rate": 1.4283368481430747e-07, "loss": 21.4017, "step": 465680 }, { "epoch": 0.9407232634526114, "grad_norm": 62.24608612060547, "learning_rate": 1.427508588195692e-07, "loss": 15.4066, "step": 465690 }, { "epoch": 0.9407434640852952, "grad_norm": 540.0977172851562, "learning_rate": 1.4266805649847392e-07, "loss": 16.5002, "step": 465700 }, { "epoch": 0.940763664717979, "grad_norm": 419.4856872558594, "learning_rate": 1.425852778514264e-07, "loss": 10.3622, "step": 465710 }, { "epoch": 0.9407838653506628, "grad_norm": 501.3851318359375, "learning_rate": 1.4250252287882848e-07, "loss": 16.8594, "step": 465720 }, { "epoch": 0.9408040659833466, "grad_norm": 234.9951934814453, "learning_rate": 1.4241979158108433e-07, "loss": 16.2693, "step": 465730 }, { "epoch": 0.9408242666160305, "grad_norm": 13.653524398803711, "learning_rate": 1.4233708395859692e-07, "loss": 25.7325, "step": 465740 }, { "epoch": 0.9408444672487142, "grad_norm": 139.67691040039062, "learning_rate": 1.4225440001176983e-07, "loss": 15.5495, "step": 465750 }, { "epoch": 0.940864667881398, "grad_norm": 470.8349609375, "learning_rate": 1.421717397410044e-07, "loss": 25.8463, "step": 465760 }, { "epoch": 0.9408848685140818, "grad_norm": 453.8491516113281, "learning_rate": 1.420891031467053e-07, "loss": 22.6746, "step": 465770 }, { "epoch": 0.9409050691467656, "grad_norm": 224.24273681640625, "learning_rate": 1.4200649022927505e-07, "loss": 11.2414, "step": 465780 }, { "epoch": 0.9409252697794495, "grad_norm": 47.68784713745117, "learning_rate": 1.41923900989116e-07, "loss": 15.038, "step": 465790 }, { "epoch": 0.9409454704121333, "grad_norm": 472.7624206542969, "learning_rate": 1.4184133542663014e-07, "loss": 14.2832, "step": 465800 }, { "epoch": 0.9409656710448171, "grad_norm": 157.7672882080078, "learning_rate": 1.41758793542221e-07, "loss": 12.4509, "step": 465810 }, { "epoch": 0.9409858716775009, "grad_norm": 438.72222900390625, "learning_rate": 1.4167627533628992e-07, "loss": 17.5392, "step": 465820 }, { "epoch": 0.9410060723101847, "grad_norm": 0.7373473048210144, "learning_rate": 1.4159378080923936e-07, "loss": 23.9596, "step": 465830 }, { "epoch": 0.9410262729428686, "grad_norm": 465.291015625, "learning_rate": 1.4151130996147177e-07, "loss": 22.3183, "step": 465840 }, { "epoch": 0.9410464735755524, "grad_norm": 515.6854248046875, "learning_rate": 1.4142886279338852e-07, "loss": 28.089, "step": 465850 }, { "epoch": 0.9410666742082362, "grad_norm": 241.45852661132812, "learning_rate": 1.4134643930539204e-07, "loss": 13.2914, "step": 465860 }, { "epoch": 0.94108687484092, "grad_norm": 210.69989013671875, "learning_rate": 1.4126403949788369e-07, "loss": 14.6817, "step": 465870 }, { "epoch": 0.9411070754736038, "grad_norm": 14.873430252075195, "learning_rate": 1.4118166337126428e-07, "loss": 23.0126, "step": 465880 }, { "epoch": 0.9411272761062877, "grad_norm": 133.8224334716797, "learning_rate": 1.4109931092593732e-07, "loss": 25.0196, "step": 465890 }, { "epoch": 0.9411474767389715, "grad_norm": 605.4613647460938, "learning_rate": 1.4101698216230254e-07, "loss": 11.0159, "step": 465900 }, { "epoch": 0.9411676773716553, "grad_norm": 308.15338134765625, "learning_rate": 1.4093467708076126e-07, "loss": 17.0492, "step": 465910 }, { "epoch": 0.9411878780043391, "grad_norm": 569.3749389648438, "learning_rate": 1.4085239568171483e-07, "loss": 13.6531, "step": 465920 }, { "epoch": 0.9412080786370229, "grad_norm": 5.422422885894775, "learning_rate": 1.4077013796556515e-07, "loss": 27.1837, "step": 465930 }, { "epoch": 0.9412282792697068, "grad_norm": 260.8473205566406, "learning_rate": 1.406879039327125e-07, "loss": 27.4344, "step": 465940 }, { "epoch": 0.9412484799023906, "grad_norm": 385.05035400390625, "learning_rate": 1.4060569358355703e-07, "loss": 28.6004, "step": 465950 }, { "epoch": 0.9412686805350744, "grad_norm": 113.09442138671875, "learning_rate": 1.405235069185007e-07, "loss": 13.7099, "step": 465960 }, { "epoch": 0.9412888811677582, "grad_norm": 390.7118835449219, "learning_rate": 1.4044134393794373e-07, "loss": 16.4347, "step": 465970 }, { "epoch": 0.941309081800442, "grad_norm": 317.0025634765625, "learning_rate": 1.4035920464228525e-07, "loss": 10.5919, "step": 465980 }, { "epoch": 0.9413292824331259, "grad_norm": 301.4737548828125, "learning_rate": 1.4027708903192662e-07, "loss": 24.9793, "step": 465990 }, { "epoch": 0.9413494830658096, "grad_norm": 348.3053283691406, "learning_rate": 1.4019499710726913e-07, "loss": 17.0418, "step": 466000 }, { "epoch": 0.9413696836984934, "grad_norm": 103.71463775634766, "learning_rate": 1.4011292886871086e-07, "loss": 14.3418, "step": 466010 }, { "epoch": 0.9413898843311772, "grad_norm": 543.97265625, "learning_rate": 1.4003088431665312e-07, "loss": 11.213, "step": 466020 }, { "epoch": 0.941410084963861, "grad_norm": 645.0778198242188, "learning_rate": 1.3994886345149504e-07, "loss": 17.445, "step": 466030 }, { "epoch": 0.9414302855965448, "grad_norm": 707.9461059570312, "learning_rate": 1.3986686627363744e-07, "loss": 33.2789, "step": 466040 }, { "epoch": 0.9414504862292287, "grad_norm": 12.873198509216309, "learning_rate": 1.3978489278347883e-07, "loss": 28.9894, "step": 466050 }, { "epoch": 0.9414706868619125, "grad_norm": 318.5113830566406, "learning_rate": 1.397029429814184e-07, "loss": 22.1279, "step": 466060 }, { "epoch": 0.9414908874945963, "grad_norm": 123.84893798828125, "learning_rate": 1.39621016867858e-07, "loss": 21.4548, "step": 466070 }, { "epoch": 0.9415110881272801, "grad_norm": 315.24322509765625, "learning_rate": 1.39539114443194e-07, "loss": 23.2591, "step": 466080 }, { "epoch": 0.941531288759964, "grad_norm": 444.9673156738281, "learning_rate": 1.3945723570782722e-07, "loss": 22.2276, "step": 466090 }, { "epoch": 0.9415514893926478, "grad_norm": 99.54863739013672, "learning_rate": 1.3937538066215672e-07, "loss": 22.1537, "step": 466100 }, { "epoch": 0.9415716900253316, "grad_norm": 248.89920043945312, "learning_rate": 1.3929354930658112e-07, "loss": 11.2812, "step": 466110 }, { "epoch": 0.9415918906580154, "grad_norm": 295.01171875, "learning_rate": 1.3921174164149842e-07, "loss": 23.3916, "step": 466120 }, { "epoch": 0.9416120912906992, "grad_norm": 364.608154296875, "learning_rate": 1.3912995766730887e-07, "loss": 9.879, "step": 466130 }, { "epoch": 0.941632291923383, "grad_norm": 156.15719604492188, "learning_rate": 1.3904819738441043e-07, "loss": 28.2597, "step": 466140 }, { "epoch": 0.9416524925560669, "grad_norm": 570.7730712890625, "learning_rate": 1.3896646079320064e-07, "loss": 18.1528, "step": 466150 }, { "epoch": 0.9416726931887507, "grad_norm": 200.7433319091797, "learning_rate": 1.388847478940797e-07, "loss": 19.8137, "step": 466160 }, { "epoch": 0.9416928938214345, "grad_norm": 213.10841369628906, "learning_rate": 1.3880305868744392e-07, "loss": 20.0374, "step": 466170 }, { "epoch": 0.9417130944541183, "grad_norm": 244.504150390625, "learning_rate": 1.3872139317369304e-07, "loss": 10.2179, "step": 466180 }, { "epoch": 0.9417332950868021, "grad_norm": 319.9541015625, "learning_rate": 1.3863975135322505e-07, "loss": 12.6198, "step": 466190 }, { "epoch": 0.941753495719486, "grad_norm": 201.5258331298828, "learning_rate": 1.385581332264363e-07, "loss": 15.1341, "step": 466200 }, { "epoch": 0.9417736963521698, "grad_norm": 192.09957885742188, "learning_rate": 1.3847653879372646e-07, "loss": 18.1536, "step": 466210 }, { "epoch": 0.9417938969848536, "grad_norm": 217.1121826171875, "learning_rate": 1.3839496805549136e-07, "loss": 12.6363, "step": 466220 }, { "epoch": 0.9418140976175374, "grad_norm": 646.8970947265625, "learning_rate": 1.383134210121301e-07, "loss": 13.4812, "step": 466230 }, { "epoch": 0.9418342982502212, "grad_norm": 209.90310668945312, "learning_rate": 1.3823189766403954e-07, "loss": 22.6369, "step": 466240 }, { "epoch": 0.9418544988829051, "grad_norm": 597.7464599609375, "learning_rate": 1.3815039801161723e-07, "loss": 14.8995, "step": 466250 }, { "epoch": 0.9418746995155888, "grad_norm": 583.8340454101562, "learning_rate": 1.3806892205526e-07, "loss": 12.6547, "step": 466260 }, { "epoch": 0.9418949001482726, "grad_norm": 146.20486450195312, "learning_rate": 1.3798746979536482e-07, "loss": 20.1029, "step": 466270 }, { "epoch": 0.9419151007809564, "grad_norm": 462.0206604003906, "learning_rate": 1.3790604123232966e-07, "loss": 11.2156, "step": 466280 }, { "epoch": 0.9419353014136402, "grad_norm": 243.73023986816406, "learning_rate": 1.3782463636655087e-07, "loss": 21.2671, "step": 466290 }, { "epoch": 0.9419555020463241, "grad_norm": 127.55465698242188, "learning_rate": 1.3774325519842423e-07, "loss": 14.346, "step": 466300 }, { "epoch": 0.9419757026790079, "grad_norm": 122.92948913574219, "learning_rate": 1.376618977283478e-07, "loss": 20.0084, "step": 466310 }, { "epoch": 0.9419959033116917, "grad_norm": 609.1244506835938, "learning_rate": 1.3758056395671738e-07, "loss": 17.6589, "step": 466320 }, { "epoch": 0.9420161039443755, "grad_norm": 322.552001953125, "learning_rate": 1.374992538839298e-07, "loss": 19.4366, "step": 466330 }, { "epoch": 0.9420363045770593, "grad_norm": 110.1209716796875, "learning_rate": 1.3741796751038095e-07, "loss": 22.9075, "step": 466340 }, { "epoch": 0.9420565052097432, "grad_norm": 380.9493713378906, "learning_rate": 1.373367048364671e-07, "loss": 14.092, "step": 466350 }, { "epoch": 0.942076705842427, "grad_norm": 0.42919933795928955, "learning_rate": 1.3725546586258464e-07, "loss": 13.1575, "step": 466360 }, { "epoch": 0.9420969064751108, "grad_norm": 1916.9405517578125, "learning_rate": 1.3717425058912882e-07, "loss": 18.7603, "step": 466370 }, { "epoch": 0.9421171071077946, "grad_norm": 14.732674598693848, "learning_rate": 1.3709305901649594e-07, "loss": 38.1083, "step": 466380 }, { "epoch": 0.9421373077404784, "grad_norm": 512.1130981445312, "learning_rate": 1.370118911450824e-07, "loss": 18.5798, "step": 466390 }, { "epoch": 0.9421575083731623, "grad_norm": 434.47039794921875, "learning_rate": 1.3693074697528231e-07, "loss": 19.878, "step": 466400 }, { "epoch": 0.9421777090058461, "grad_norm": 20.22287940979004, "learning_rate": 1.36849626507492e-07, "loss": 21.2979, "step": 466410 }, { "epoch": 0.9421979096385299, "grad_norm": 260.6330261230469, "learning_rate": 1.367685297421073e-07, "loss": 22.6538, "step": 466420 }, { "epoch": 0.9422181102712137, "grad_norm": 635.8558959960938, "learning_rate": 1.366874566795229e-07, "loss": 25.0508, "step": 466430 }, { "epoch": 0.9422383109038975, "grad_norm": 382.78399658203125, "learning_rate": 1.3660640732013342e-07, "loss": 9.9655, "step": 466440 }, { "epoch": 0.9422585115365814, "grad_norm": 202.0198211669922, "learning_rate": 1.3652538166433527e-07, "loss": 20.7105, "step": 466450 }, { "epoch": 0.9422787121692652, "grad_norm": 86.01641082763672, "learning_rate": 1.3644437971252144e-07, "loss": 23.0091, "step": 466460 }, { "epoch": 0.942298912801949, "grad_norm": 148.82080078125, "learning_rate": 1.3636340146508886e-07, "loss": 17.164, "step": 466470 }, { "epoch": 0.9423191134346328, "grad_norm": 490.79486083984375, "learning_rate": 1.362824469224311e-07, "loss": 26.0232, "step": 466480 }, { "epoch": 0.9423393140673166, "grad_norm": 759.5617065429688, "learning_rate": 1.362015160849417e-07, "loss": 17.5195, "step": 466490 }, { "epoch": 0.9423595147000005, "grad_norm": 160.0380401611328, "learning_rate": 1.3612060895301759e-07, "loss": 10.9043, "step": 466500 }, { "epoch": 0.9423797153326842, "grad_norm": 81.51010131835938, "learning_rate": 1.360397255270507e-07, "loss": 12.4245, "step": 466510 }, { "epoch": 0.942399915965368, "grad_norm": 9.42785930633545, "learning_rate": 1.3595886580743677e-07, "loss": 15.6158, "step": 466520 }, { "epoch": 0.9424201165980518, "grad_norm": 254.63711547851562, "learning_rate": 1.3587802979456888e-07, "loss": 21.2009, "step": 466530 }, { "epoch": 0.9424403172307356, "grad_norm": 488.95220947265625, "learning_rate": 1.3579721748884222e-07, "loss": 21.6403, "step": 466540 }, { "epoch": 0.9424605178634194, "grad_norm": 34.97917556762695, "learning_rate": 1.3571642889064984e-07, "loss": 15.1119, "step": 466550 }, { "epoch": 0.9424807184961033, "grad_norm": 238.89625549316406, "learning_rate": 1.356356640003853e-07, "loss": 8.2208, "step": 466560 }, { "epoch": 0.9425009191287871, "grad_norm": 247.09791564941406, "learning_rate": 1.3555492281844273e-07, "loss": 16.6098, "step": 466570 }, { "epoch": 0.9425211197614709, "grad_norm": 292.3550109863281, "learning_rate": 1.354742053452157e-07, "loss": 25.4076, "step": 466580 }, { "epoch": 0.9425413203941547, "grad_norm": 550.693115234375, "learning_rate": 1.353935115810967e-07, "loss": 13.479, "step": 466590 }, { "epoch": 0.9425615210268385, "grad_norm": 150.84622192382812, "learning_rate": 1.3531284152647983e-07, "loss": 26.4981, "step": 466600 }, { "epoch": 0.9425817216595224, "grad_norm": 135.9732666015625, "learning_rate": 1.3523219518175924e-07, "loss": 19.2011, "step": 466610 }, { "epoch": 0.9426019222922062, "grad_norm": 411.6714172363281, "learning_rate": 1.351515725473257e-07, "loss": 13.1952, "step": 466620 }, { "epoch": 0.94262212292489, "grad_norm": 691.3521728515625, "learning_rate": 1.3507097362357392e-07, "loss": 23.5884, "step": 466630 }, { "epoch": 0.9426423235575738, "grad_norm": 435.2999572753906, "learning_rate": 1.349903984108958e-07, "loss": 19.8776, "step": 466640 }, { "epoch": 0.9426625241902576, "grad_norm": 522.2403564453125, "learning_rate": 1.3490984690968488e-07, "loss": 25.458, "step": 466650 }, { "epoch": 0.9426827248229415, "grad_norm": 790.474609375, "learning_rate": 1.3482931912033314e-07, "loss": 21.0078, "step": 466660 }, { "epoch": 0.9427029254556253, "grad_norm": 1263.0888671875, "learning_rate": 1.3474881504323301e-07, "loss": 19.8605, "step": 466670 }, { "epoch": 0.9427231260883091, "grad_norm": 590.8986206054688, "learning_rate": 1.346683346787775e-07, "loss": 13.3168, "step": 466680 }, { "epoch": 0.9427433267209929, "grad_norm": 232.91049194335938, "learning_rate": 1.3458787802735794e-07, "loss": 21.9886, "step": 466690 }, { "epoch": 0.9427635273536767, "grad_norm": 438.4366455078125, "learning_rate": 1.3450744508936687e-07, "loss": 18.1001, "step": 466700 }, { "epoch": 0.9427837279863606, "grad_norm": 492.16668701171875, "learning_rate": 1.3442703586519724e-07, "loss": 10.5851, "step": 466710 }, { "epoch": 0.9428039286190444, "grad_norm": 277.2815246582031, "learning_rate": 1.3434665035523985e-07, "loss": 30.349, "step": 466720 }, { "epoch": 0.9428241292517282, "grad_norm": 177.5471954345703, "learning_rate": 1.342662885598861e-07, "loss": 11.7614, "step": 466730 }, { "epoch": 0.942844329884412, "grad_norm": 438.42755126953125, "learning_rate": 1.3418595047952897e-07, "loss": 12.5441, "step": 466740 }, { "epoch": 0.9428645305170958, "grad_norm": 639.2652587890625, "learning_rate": 1.341056361145593e-07, "loss": 15.6022, "step": 466750 }, { "epoch": 0.9428847311497797, "grad_norm": 40.974517822265625, "learning_rate": 1.3402534546536783e-07, "loss": 17.9016, "step": 466760 }, { "epoch": 0.9429049317824634, "grad_norm": 227.44757080078125, "learning_rate": 1.3394507853234763e-07, "loss": 19.5819, "step": 466770 }, { "epoch": 0.9429251324151472, "grad_norm": 461.1067810058594, "learning_rate": 1.3386483531588834e-07, "loss": 21.6301, "step": 466780 }, { "epoch": 0.942945333047831, "grad_norm": 502.9984130859375, "learning_rate": 1.337846158163819e-07, "loss": 12.9162, "step": 466790 }, { "epoch": 0.9429655336805148, "grad_norm": 606.315673828125, "learning_rate": 1.3370442003421913e-07, "loss": 25.3759, "step": 466800 }, { "epoch": 0.9429857343131987, "grad_norm": 275.4271545410156, "learning_rate": 1.336242479697908e-07, "loss": 35.1638, "step": 466810 }, { "epoch": 0.9430059349458825, "grad_norm": 544.243408203125, "learning_rate": 1.335440996234877e-07, "loss": 11.1995, "step": 466820 }, { "epoch": 0.9430261355785663, "grad_norm": 200.02992248535156, "learning_rate": 1.334639749956995e-07, "loss": 12.549, "step": 466830 }, { "epoch": 0.9430463362112501, "grad_norm": 487.49444580078125, "learning_rate": 1.3338387408681875e-07, "loss": 13.481, "step": 466840 }, { "epoch": 0.9430665368439339, "grad_norm": 738.1172485351562, "learning_rate": 1.333037968972345e-07, "loss": 15.7329, "step": 466850 }, { "epoch": 0.9430867374766178, "grad_norm": 99.85303497314453, "learning_rate": 1.33223743427337e-07, "loss": 19.0156, "step": 466860 }, { "epoch": 0.9431069381093016, "grad_norm": 357.34100341796875, "learning_rate": 1.331437136775171e-07, "loss": 18.5052, "step": 466870 }, { "epoch": 0.9431271387419854, "grad_norm": 166.46453857421875, "learning_rate": 1.330637076481639e-07, "loss": 13.3199, "step": 466880 }, { "epoch": 0.9431473393746692, "grad_norm": 48.30439758300781, "learning_rate": 1.3298372533966874e-07, "loss": 11.878, "step": 466890 }, { "epoch": 0.943167540007353, "grad_norm": 324.0368347167969, "learning_rate": 1.3290376675242022e-07, "loss": 25.7629, "step": 466900 }, { "epoch": 0.9431877406400369, "grad_norm": 390.7507019042969, "learning_rate": 1.3282383188680802e-07, "loss": 21.2157, "step": 466910 }, { "epoch": 0.9432079412727207, "grad_norm": 57.96989059448242, "learning_rate": 1.327439207432224e-07, "loss": 15.7185, "step": 466920 }, { "epoch": 0.9432281419054045, "grad_norm": 202.033935546875, "learning_rate": 1.3266403332205248e-07, "loss": 9.7807, "step": 466930 }, { "epoch": 0.9432483425380883, "grad_norm": 108.84996795654297, "learning_rate": 1.3258416962368849e-07, "loss": 14.6172, "step": 466940 }, { "epoch": 0.9432685431707721, "grad_norm": 172.59718322753906, "learning_rate": 1.325043296485179e-07, "loss": 18.0242, "step": 466950 }, { "epoch": 0.943288743803456, "grad_norm": 434.2422180175781, "learning_rate": 1.3242451339693153e-07, "loss": 33.7853, "step": 466960 }, { "epoch": 0.9433089444361398, "grad_norm": 147.3890838623047, "learning_rate": 1.3234472086931738e-07, "loss": 26.3679, "step": 466970 }, { "epoch": 0.9433291450688236, "grad_norm": 10.948002815246582, "learning_rate": 1.322649520660646e-07, "loss": 13.5406, "step": 466980 }, { "epoch": 0.9433493457015074, "grad_norm": 424.71539306640625, "learning_rate": 1.3218520698756177e-07, "loss": 9.7036, "step": 466990 }, { "epoch": 0.9433695463341912, "grad_norm": 243.30010986328125, "learning_rate": 1.3210548563419857e-07, "loss": 13.9705, "step": 467000 }, { "epoch": 0.943389746966875, "grad_norm": 197.2886962890625, "learning_rate": 1.32025788006363e-07, "loss": 26.665, "step": 467010 }, { "epoch": 0.9434099475995589, "grad_norm": 610.0017700195312, "learning_rate": 1.3194611410444258e-07, "loss": 21.8821, "step": 467020 }, { "epoch": 0.9434301482322426, "grad_norm": 272.1587219238281, "learning_rate": 1.3186646392882696e-07, "loss": 8.3357, "step": 467030 }, { "epoch": 0.9434503488649264, "grad_norm": 167.1318817138672, "learning_rate": 1.3178683747990362e-07, "loss": 11.8705, "step": 467040 }, { "epoch": 0.9434705494976102, "grad_norm": 434.9255065917969, "learning_rate": 1.3170723475806003e-07, "loss": 23.161, "step": 467050 }, { "epoch": 0.943490750130294, "grad_norm": 79.04789733886719, "learning_rate": 1.3162765576368587e-07, "loss": 24.5077, "step": 467060 }, { "epoch": 0.9435109507629779, "grad_norm": 203.78448486328125, "learning_rate": 1.315481004971675e-07, "loss": 8.7908, "step": 467070 }, { "epoch": 0.9435311513956617, "grad_norm": 225.8023681640625, "learning_rate": 1.314685689588935e-07, "loss": 18.4575, "step": 467080 }, { "epoch": 0.9435513520283455, "grad_norm": 297.1622619628906, "learning_rate": 1.3138906114925133e-07, "loss": 10.182, "step": 467090 }, { "epoch": 0.9435715526610293, "grad_norm": 90.65361022949219, "learning_rate": 1.313095770686279e-07, "loss": 11.4575, "step": 467100 }, { "epoch": 0.9435917532937131, "grad_norm": 511.15032958984375, "learning_rate": 1.3123011671741183e-07, "loss": 17.2859, "step": 467110 }, { "epoch": 0.943611953926397, "grad_norm": 199.49876403808594, "learning_rate": 1.3115068009598886e-07, "loss": 15.8563, "step": 467120 }, { "epoch": 0.9436321545590808, "grad_norm": 403.6021728515625, "learning_rate": 1.3107126720474762e-07, "loss": 13.149, "step": 467130 }, { "epoch": 0.9436523551917646, "grad_norm": 256.2900695800781, "learning_rate": 1.3099187804407387e-07, "loss": 24.4645, "step": 467140 }, { "epoch": 0.9436725558244484, "grad_norm": 254.575927734375, "learning_rate": 1.3091251261435568e-07, "loss": 7.1697, "step": 467150 }, { "epoch": 0.9436927564571322, "grad_norm": 467.6200866699219, "learning_rate": 1.3083317091597936e-07, "loss": 17.0296, "step": 467160 }, { "epoch": 0.9437129570898161, "grad_norm": 310.8859558105469, "learning_rate": 1.3075385294933129e-07, "loss": 14.5931, "step": 467170 }, { "epoch": 0.9437331577224999, "grad_norm": 312.9391174316406, "learning_rate": 1.306745587147984e-07, "loss": 17.0316, "step": 467180 }, { "epoch": 0.9437533583551837, "grad_norm": 322.968994140625, "learning_rate": 1.3059528821276758e-07, "loss": 18.964, "step": 467190 }, { "epoch": 0.9437735589878675, "grad_norm": 116.29562377929688, "learning_rate": 1.3051604144362407e-07, "loss": 15.6003, "step": 467200 }, { "epoch": 0.9437937596205513, "grad_norm": 424.7398986816406, "learning_rate": 1.304368184077548e-07, "loss": 21.4729, "step": 467210 }, { "epoch": 0.9438139602532352, "grad_norm": 406.6511535644531, "learning_rate": 1.3035761910554666e-07, "loss": 23.7003, "step": 467220 }, { "epoch": 0.943834160885919, "grad_norm": 431.1209716796875, "learning_rate": 1.302784435373844e-07, "loss": 11.4722, "step": 467230 }, { "epoch": 0.9438543615186028, "grad_norm": 277.345703125, "learning_rate": 1.3019929170365376e-07, "loss": 18.2024, "step": 467240 }, { "epoch": 0.9438745621512866, "grad_norm": 393.22003173828125, "learning_rate": 1.3012016360474223e-07, "loss": 6.3782, "step": 467250 }, { "epoch": 0.9438947627839704, "grad_norm": 107.62462615966797, "learning_rate": 1.3004105924103394e-07, "loss": 28.9039, "step": 467260 }, { "epoch": 0.9439149634166543, "grad_norm": 312.8868103027344, "learning_rate": 1.2996197861291472e-07, "loss": 20.6576, "step": 467270 }, { "epoch": 0.943935164049338, "grad_norm": 304.1863098144531, "learning_rate": 1.2988292172076977e-07, "loss": 18.5216, "step": 467280 }, { "epoch": 0.9439553646820218, "grad_norm": 190.7682647705078, "learning_rate": 1.2980388856498604e-07, "loss": 9.204, "step": 467290 }, { "epoch": 0.9439755653147056, "grad_norm": 121.52448272705078, "learning_rate": 1.29724879145946e-07, "loss": 12.4498, "step": 467300 }, { "epoch": 0.9439957659473894, "grad_norm": 229.15966796875, "learning_rate": 1.296458934640371e-07, "loss": 9.9259, "step": 467310 }, { "epoch": 0.9440159665800733, "grad_norm": 532.7434692382812, "learning_rate": 1.2956693151964296e-07, "loss": 19.2803, "step": 467320 }, { "epoch": 0.9440361672127571, "grad_norm": 372.7672424316406, "learning_rate": 1.2948799331314933e-07, "loss": 14.7937, "step": 467330 }, { "epoch": 0.9440563678454409, "grad_norm": 1146.291015625, "learning_rate": 1.2940907884494036e-07, "loss": 48.4152, "step": 467340 }, { "epoch": 0.9440765684781247, "grad_norm": 452.1363830566406, "learning_rate": 1.2933018811540078e-07, "loss": 18.9367, "step": 467350 }, { "epoch": 0.9440967691108085, "grad_norm": 391.53204345703125, "learning_rate": 1.2925132112491523e-07, "loss": 16.6104, "step": 467360 }, { "epoch": 0.9441169697434924, "grad_norm": 237.9326171875, "learning_rate": 1.2917247787386787e-07, "loss": 16.8393, "step": 467370 }, { "epoch": 0.9441371703761762, "grad_norm": 501.45037841796875, "learning_rate": 1.2909365836264287e-07, "loss": 21.6704, "step": 467380 }, { "epoch": 0.94415737100886, "grad_norm": 126.89668273925781, "learning_rate": 1.2901486259162488e-07, "loss": 19.5969, "step": 467390 }, { "epoch": 0.9441775716415438, "grad_norm": 428.07708740234375, "learning_rate": 1.289360905611975e-07, "loss": 10.1928, "step": 467400 }, { "epoch": 0.9441977722742276, "grad_norm": 126.64779663085938, "learning_rate": 1.288573422717454e-07, "loss": 39.1484, "step": 467410 }, { "epoch": 0.9442179729069115, "grad_norm": 75.78832244873047, "learning_rate": 1.287786177236511e-07, "loss": 15.7369, "step": 467420 }, { "epoch": 0.9442381735395953, "grad_norm": 851.9575805664062, "learning_rate": 1.2869991691729922e-07, "loss": 30.6076, "step": 467430 }, { "epoch": 0.9442583741722791, "grad_norm": 15.668920516967773, "learning_rate": 1.2862123985307284e-07, "loss": 11.1184, "step": 467440 }, { "epoch": 0.9442785748049629, "grad_norm": 314.4423828125, "learning_rate": 1.285425865313561e-07, "loss": 14.5962, "step": 467450 }, { "epoch": 0.9442987754376467, "grad_norm": 326.61346435546875, "learning_rate": 1.28463956952532e-07, "loss": 20.0917, "step": 467460 }, { "epoch": 0.9443189760703306, "grad_norm": 434.478271484375, "learning_rate": 1.2838535111698359e-07, "loss": 18.4376, "step": 467470 }, { "epoch": 0.9443391767030144, "grad_norm": 282.2841796875, "learning_rate": 1.2830676902509443e-07, "loss": 14.4992, "step": 467480 }, { "epoch": 0.9443593773356982, "grad_norm": 924.6843872070312, "learning_rate": 1.2822821067724643e-07, "loss": 37.9355, "step": 467490 }, { "epoch": 0.944379577968382, "grad_norm": 236.3780059814453, "learning_rate": 1.2814967607382433e-07, "loss": 7.6149, "step": 467500 }, { "epoch": 0.9443997786010658, "grad_norm": 343.4498596191406, "learning_rate": 1.2807116521520947e-07, "loss": 13.3588, "step": 467510 }, { "epoch": 0.9444199792337497, "grad_norm": 542.5467529296875, "learning_rate": 1.279926781017843e-07, "loss": 26.7331, "step": 467520 }, { "epoch": 0.9444401798664335, "grad_norm": 121.63961029052734, "learning_rate": 1.2791421473393184e-07, "loss": 10.3786, "step": 467530 }, { "epoch": 0.9444603804991172, "grad_norm": 289.2529296875, "learning_rate": 1.2783577511203515e-07, "loss": 14.1991, "step": 467540 }, { "epoch": 0.944480581131801, "grad_norm": 811.0139770507812, "learning_rate": 1.2775735923647614e-07, "loss": 21.7363, "step": 467550 }, { "epoch": 0.9445007817644848, "grad_norm": 155.37222290039062, "learning_rate": 1.2767896710763616e-07, "loss": 15.0662, "step": 467560 }, { "epoch": 0.9445209823971686, "grad_norm": 305.1764221191406, "learning_rate": 1.2760059872589824e-07, "loss": 21.6474, "step": 467570 }, { "epoch": 0.9445411830298525, "grad_norm": 359.0425109863281, "learning_rate": 1.2752225409164432e-07, "loss": 27.2246, "step": 467580 }, { "epoch": 0.9445613836625363, "grad_norm": 193.85736083984375, "learning_rate": 1.2744393320525573e-07, "loss": 9.8475, "step": 467590 }, { "epoch": 0.9445815842952201, "grad_norm": 462.73992919921875, "learning_rate": 1.2736563606711384e-07, "loss": 11.911, "step": 467600 }, { "epoch": 0.9446017849279039, "grad_norm": 240.19923400878906, "learning_rate": 1.2728736267760167e-07, "loss": 43.9946, "step": 467610 }, { "epoch": 0.9446219855605877, "grad_norm": 217.29193115234375, "learning_rate": 1.2720911303710004e-07, "loss": 11.255, "step": 467620 }, { "epoch": 0.9446421861932716, "grad_norm": 166.75306701660156, "learning_rate": 1.2713088714598974e-07, "loss": 10.818, "step": 467630 }, { "epoch": 0.9446623868259554, "grad_norm": 146.30332946777344, "learning_rate": 1.2705268500465274e-07, "loss": 14.2386, "step": 467640 }, { "epoch": 0.9446825874586392, "grad_norm": 37.514007568359375, "learning_rate": 1.2697450661347033e-07, "loss": 14.8072, "step": 467650 }, { "epoch": 0.944702788091323, "grad_norm": 697.3914794921875, "learning_rate": 1.2689635197282224e-07, "loss": 24.3691, "step": 467660 }, { "epoch": 0.9447229887240068, "grad_norm": 776.3318481445312, "learning_rate": 1.2681822108309094e-07, "loss": 21.7652, "step": 467670 }, { "epoch": 0.9447431893566907, "grad_norm": 744.0573120117188, "learning_rate": 1.2674011394465614e-07, "loss": 29.1712, "step": 467680 }, { "epoch": 0.9447633899893745, "grad_norm": 237.00564575195312, "learning_rate": 1.2666203055789915e-07, "loss": 11.0158, "step": 467690 }, { "epoch": 0.9447835906220583, "grad_norm": 423.5158996582031, "learning_rate": 1.2658397092320028e-07, "loss": 18.9339, "step": 467700 }, { "epoch": 0.9448037912547421, "grad_norm": 1079.7054443359375, "learning_rate": 1.2650593504094034e-07, "loss": 16.8912, "step": 467710 }, { "epoch": 0.9448239918874259, "grad_norm": 400.0626525878906, "learning_rate": 1.2642792291149896e-07, "loss": 15.3153, "step": 467720 }, { "epoch": 0.9448441925201098, "grad_norm": 819.558349609375, "learning_rate": 1.2634993453525702e-07, "loss": 27.604, "step": 467730 }, { "epoch": 0.9448643931527936, "grad_norm": 203.84701538085938, "learning_rate": 1.2627196991259473e-07, "loss": 24.7013, "step": 467740 }, { "epoch": 0.9448845937854774, "grad_norm": 247.1985626220703, "learning_rate": 1.261940290438912e-07, "loss": 24.6028, "step": 467750 }, { "epoch": 0.9449047944181612, "grad_norm": 239.46533203125, "learning_rate": 1.2611611192952733e-07, "loss": 15.5538, "step": 467760 }, { "epoch": 0.944924995050845, "grad_norm": 20.467748641967773, "learning_rate": 1.2603821856988218e-07, "loss": 15.1277, "step": 467770 }, { "epoch": 0.9449451956835289, "grad_norm": 612.4995727539062, "learning_rate": 1.259603489653355e-07, "loss": 13.7239, "step": 467780 }, { "epoch": 0.9449653963162126, "grad_norm": 285.17694091796875, "learning_rate": 1.2588250311626693e-07, "loss": 19.4016, "step": 467790 }, { "epoch": 0.9449855969488964, "grad_norm": 300.2528991699219, "learning_rate": 1.258046810230562e-07, "loss": 30.4937, "step": 467800 }, { "epoch": 0.9450057975815802, "grad_norm": 406.70208740234375, "learning_rate": 1.257268826860819e-07, "loss": 25.9061, "step": 467810 }, { "epoch": 0.945025998214264, "grad_norm": 152.28329467773438, "learning_rate": 1.2564910810572317e-07, "loss": 10.9667, "step": 467820 }, { "epoch": 0.9450461988469478, "grad_norm": 489.2743225097656, "learning_rate": 1.255713572823608e-07, "loss": 13.7445, "step": 467830 }, { "epoch": 0.9450663994796317, "grad_norm": 551.2591552734375, "learning_rate": 1.2549363021637174e-07, "loss": 14.6357, "step": 467840 }, { "epoch": 0.9450866001123155, "grad_norm": 263.2279052734375, "learning_rate": 1.2541592690813508e-07, "loss": 17.0744, "step": 467850 }, { "epoch": 0.9451068007449993, "grad_norm": 324.44024658203125, "learning_rate": 1.2533824735803059e-07, "loss": 19.0699, "step": 467860 }, { "epoch": 0.9451270013776831, "grad_norm": 39.75548553466797, "learning_rate": 1.252605915664362e-07, "loss": 18.4461, "step": 467870 }, { "epoch": 0.945147202010367, "grad_norm": 417.4817810058594, "learning_rate": 1.2518295953373005e-07, "loss": 13.1373, "step": 467880 }, { "epoch": 0.9451674026430508, "grad_norm": 178.2801971435547, "learning_rate": 1.2510535126029067e-07, "loss": 15.0186, "step": 467890 }, { "epoch": 0.9451876032757346, "grad_norm": 16.9990177154541, "learning_rate": 1.2502776674649776e-07, "loss": 17.0969, "step": 467900 }, { "epoch": 0.9452078039084184, "grad_norm": 648.9572143554688, "learning_rate": 1.2495020599272766e-07, "loss": 16.402, "step": 467910 }, { "epoch": 0.9452280045411022, "grad_norm": 249.67657470703125, "learning_rate": 1.2487266899935845e-07, "loss": 12.7127, "step": 467920 }, { "epoch": 0.945248205173786, "grad_norm": 551.3310546875, "learning_rate": 1.2479515576676925e-07, "loss": 21.3335, "step": 467930 }, { "epoch": 0.9452684058064699, "grad_norm": 407.8700256347656, "learning_rate": 1.24717666295337e-07, "loss": 13.2619, "step": 467940 }, { "epoch": 0.9452886064391537, "grad_norm": 116.41875457763672, "learning_rate": 1.2464020058543912e-07, "loss": 10.3969, "step": 467950 }, { "epoch": 0.9453088070718375, "grad_norm": 427.3218078613281, "learning_rate": 1.2456275863745426e-07, "loss": 14.2084, "step": 467960 }, { "epoch": 0.9453290077045213, "grad_norm": 1037.89501953125, "learning_rate": 1.2448534045175876e-07, "loss": 23.1926, "step": 467970 }, { "epoch": 0.9453492083372051, "grad_norm": 447.3141174316406, "learning_rate": 1.2440794602873064e-07, "loss": 14.0446, "step": 467980 }, { "epoch": 0.945369408969889, "grad_norm": 353.541015625, "learning_rate": 1.2433057536874682e-07, "loss": 8.0339, "step": 467990 }, { "epoch": 0.9453896096025728, "grad_norm": 710.0747680664062, "learning_rate": 1.2425322847218368e-07, "loss": 22.1698, "step": 468000 }, { "epoch": 0.9454098102352566, "grad_norm": 101.60120391845703, "learning_rate": 1.241759053394198e-07, "loss": 14.5833, "step": 468010 }, { "epoch": 0.9454300108679404, "grad_norm": 127.26005554199219, "learning_rate": 1.2409860597083102e-07, "loss": 9.7244, "step": 468020 }, { "epoch": 0.9454502115006242, "grad_norm": 360.7369384765625, "learning_rate": 1.240213303667942e-07, "loss": 6.6837, "step": 468030 }, { "epoch": 0.9454704121333081, "grad_norm": 610.5038452148438, "learning_rate": 1.239440785276863e-07, "loss": 30.8797, "step": 468040 }, { "epoch": 0.9454906127659918, "grad_norm": 677.2760620117188, "learning_rate": 1.2386685045388313e-07, "loss": 18.1932, "step": 468050 }, { "epoch": 0.9455108133986756, "grad_norm": 348.82879638671875, "learning_rate": 1.2378964614576162e-07, "loss": 23.3082, "step": 468060 }, { "epoch": 0.9455310140313594, "grad_norm": 282.5048828125, "learning_rate": 1.237124656036981e-07, "loss": 20.5746, "step": 468070 }, { "epoch": 0.9455512146640432, "grad_norm": 175.1044158935547, "learning_rate": 1.236353088280684e-07, "loss": 17.5479, "step": 468080 }, { "epoch": 0.9455714152967271, "grad_norm": 675.5391235351562, "learning_rate": 1.2355817581924945e-07, "loss": 11.3713, "step": 468090 }, { "epoch": 0.9455916159294109, "grad_norm": 466.1625061035156, "learning_rate": 1.2348106657761537e-07, "loss": 10.5448, "step": 468100 }, { "epoch": 0.9456118165620947, "grad_norm": 307.6192932128906, "learning_rate": 1.2340398110354424e-07, "loss": 15.6597, "step": 468110 }, { "epoch": 0.9456320171947785, "grad_norm": 307.61944580078125, "learning_rate": 1.2332691939741015e-07, "loss": 7.8915, "step": 468120 }, { "epoch": 0.9456522178274623, "grad_norm": 311.0517578125, "learning_rate": 1.2324988145958895e-07, "loss": 19.7036, "step": 468130 }, { "epoch": 0.9456724184601462, "grad_norm": 85.99491882324219, "learning_rate": 1.2317286729045586e-07, "loss": 21.8538, "step": 468140 }, { "epoch": 0.94569261909283, "grad_norm": 264.7533264160156, "learning_rate": 1.2309587689038783e-07, "loss": 26.0238, "step": 468150 }, { "epoch": 0.9457128197255138, "grad_norm": 210.65113830566406, "learning_rate": 1.2301891025975897e-07, "loss": 16.9391, "step": 468160 }, { "epoch": 0.9457330203581976, "grad_norm": 189.60183715820312, "learning_rate": 1.229419673989435e-07, "loss": 17.2061, "step": 468170 }, { "epoch": 0.9457532209908814, "grad_norm": 383.91912841796875, "learning_rate": 1.2286504830831824e-07, "loss": 19.7732, "step": 468180 }, { "epoch": 0.9457734216235653, "grad_norm": 416.5213928222656, "learning_rate": 1.2278815298825742e-07, "loss": 25.9267, "step": 468190 }, { "epoch": 0.9457936222562491, "grad_norm": 327.18695068359375, "learning_rate": 1.2271128143913458e-07, "loss": 29.0271, "step": 468200 }, { "epoch": 0.9458138228889329, "grad_norm": 160.49041748046875, "learning_rate": 1.2263443366132555e-07, "loss": 7.8713, "step": 468210 }, { "epoch": 0.9458340235216167, "grad_norm": 510.4881896972656, "learning_rate": 1.2255760965520557e-07, "loss": 21.4823, "step": 468220 }, { "epoch": 0.9458542241543005, "grad_norm": 509.4342956542969, "learning_rate": 1.224808094211477e-07, "loss": 20.5117, "step": 468230 }, { "epoch": 0.9458744247869844, "grad_norm": 307.6473693847656, "learning_rate": 1.2240403295952662e-07, "loss": 9.3283, "step": 468240 }, { "epoch": 0.9458946254196682, "grad_norm": 509.9915771484375, "learning_rate": 1.2232728027071704e-07, "loss": 13.2376, "step": 468250 }, { "epoch": 0.945914826052352, "grad_norm": 95.58562469482422, "learning_rate": 1.222505513550931e-07, "loss": 9.5526, "step": 468260 }, { "epoch": 0.9459350266850358, "grad_norm": 732.8162231445312, "learning_rate": 1.221738462130273e-07, "loss": 26.2388, "step": 468270 }, { "epoch": 0.9459552273177196, "grad_norm": 83.42688751220703, "learning_rate": 1.2209716484489543e-07, "loss": 23.4167, "step": 468280 }, { "epoch": 0.9459754279504035, "grad_norm": 442.001220703125, "learning_rate": 1.2202050725106995e-07, "loss": 21.2767, "step": 468290 }, { "epoch": 0.9459956285830872, "grad_norm": 363.59063720703125, "learning_rate": 1.2194387343192504e-07, "loss": 16.9433, "step": 468300 }, { "epoch": 0.946015829215771, "grad_norm": 130.3279266357422, "learning_rate": 1.2186726338783427e-07, "loss": 10.2144, "step": 468310 }, { "epoch": 0.9460360298484548, "grad_norm": 277.4386901855469, "learning_rate": 1.2179067711917015e-07, "loss": 12.461, "step": 468320 }, { "epoch": 0.9460562304811386, "grad_norm": 47.01555633544922, "learning_rate": 1.2171411462630732e-07, "loss": 16.815, "step": 468330 }, { "epoch": 0.9460764311138224, "grad_norm": 436.15606689453125, "learning_rate": 1.216375759096178e-07, "loss": 36.5272, "step": 468340 }, { "epoch": 0.9460966317465063, "grad_norm": 708.5038452148438, "learning_rate": 1.2156106096947563e-07, "loss": 32.5021, "step": 468350 }, { "epoch": 0.9461168323791901, "grad_norm": 106.7961196899414, "learning_rate": 1.2148456980625223e-07, "loss": 14.8968, "step": 468360 }, { "epoch": 0.9461370330118739, "grad_norm": 335.14959716796875, "learning_rate": 1.214081024203223e-07, "loss": 15.8294, "step": 468370 }, { "epoch": 0.9461572336445577, "grad_norm": 152.71038818359375, "learning_rate": 1.2133165881205723e-07, "loss": 19.1025, "step": 468380 }, { "epoch": 0.9461774342772415, "grad_norm": 317.9998779296875, "learning_rate": 1.2125523898182945e-07, "loss": 19.3284, "step": 468390 }, { "epoch": 0.9461976349099254, "grad_norm": 164.97238159179688, "learning_rate": 1.211788429300126e-07, "loss": 18.7075, "step": 468400 }, { "epoch": 0.9462178355426092, "grad_norm": 481.8919372558594, "learning_rate": 1.21102470656978e-07, "loss": 22.586, "step": 468410 }, { "epoch": 0.946238036175293, "grad_norm": 427.742431640625, "learning_rate": 1.2102612216309816e-07, "loss": 16.2481, "step": 468420 }, { "epoch": 0.9462582368079768, "grad_norm": 82.76959991455078, "learning_rate": 1.2094979744874502e-07, "loss": 15.4469, "step": 468430 }, { "epoch": 0.9462784374406606, "grad_norm": 502.79638671875, "learning_rate": 1.2087349651429215e-07, "loss": 15.7268, "step": 468440 }, { "epoch": 0.9462986380733445, "grad_norm": 586.7080688476562, "learning_rate": 1.207972193601087e-07, "loss": 13.1939, "step": 468450 }, { "epoch": 0.9463188387060283, "grad_norm": 500.2250061035156, "learning_rate": 1.207209659865677e-07, "loss": 14.0395, "step": 468460 }, { "epoch": 0.9463390393387121, "grad_norm": 637.3262939453125, "learning_rate": 1.206447363940416e-07, "loss": 19.9108, "step": 468470 }, { "epoch": 0.9463592399713959, "grad_norm": 573.2239990234375, "learning_rate": 1.205685305829013e-07, "loss": 25.3511, "step": 468480 }, { "epoch": 0.9463794406040797, "grad_norm": 55.16313552856445, "learning_rate": 1.204923485535181e-07, "loss": 22.0333, "step": 468490 }, { "epoch": 0.9463996412367636, "grad_norm": 374.0496520996094, "learning_rate": 1.2041619030626283e-07, "loss": 21.0971, "step": 468500 }, { "epoch": 0.9464198418694474, "grad_norm": 87.39411163330078, "learning_rate": 1.2034005584150854e-07, "loss": 12.7182, "step": 468510 }, { "epoch": 0.9464400425021312, "grad_norm": 320.94329833984375, "learning_rate": 1.2026394515962382e-07, "loss": 28.4477, "step": 468520 }, { "epoch": 0.946460243134815, "grad_norm": 220.43370056152344, "learning_rate": 1.2018785826098057e-07, "loss": 9.1108, "step": 468530 }, { "epoch": 0.9464804437674988, "grad_norm": 135.63665771484375, "learning_rate": 1.2011179514595072e-07, "loss": 30.4911, "step": 468540 }, { "epoch": 0.9465006444001827, "grad_norm": 315.3052978515625, "learning_rate": 1.20035755814904e-07, "loss": 17.3008, "step": 468550 }, { "epoch": 0.9465208450328664, "grad_norm": 32.96565246582031, "learning_rate": 1.1995974026821066e-07, "loss": 24.2093, "step": 468560 }, { "epoch": 0.9465410456655502, "grad_norm": 148.7790069580078, "learning_rate": 1.1988374850624208e-07, "loss": 20.4896, "step": 468570 }, { "epoch": 0.946561246298234, "grad_norm": 930.547119140625, "learning_rate": 1.198077805293679e-07, "loss": 9.2134, "step": 468580 }, { "epoch": 0.9465814469309178, "grad_norm": 237.03515625, "learning_rate": 1.1973183633795849e-07, "loss": 52.5735, "step": 468590 }, { "epoch": 0.9466016475636017, "grad_norm": 17.60946273803711, "learning_rate": 1.1965591593238513e-07, "loss": 10.6422, "step": 468600 }, { "epoch": 0.9466218481962855, "grad_norm": 320.0501403808594, "learning_rate": 1.1958001931301587e-07, "loss": 13.3763, "step": 468610 }, { "epoch": 0.9466420488289693, "grad_norm": 115.49630737304688, "learning_rate": 1.195041464802227e-07, "loss": 18.8042, "step": 468620 }, { "epoch": 0.9466622494616531, "grad_norm": 32.85114669799805, "learning_rate": 1.19428297434373e-07, "loss": 22.0479, "step": 468630 }, { "epoch": 0.9466824500943369, "grad_norm": 259.37530517578125, "learning_rate": 1.1935247217583934e-07, "loss": 12.3792, "step": 468640 }, { "epoch": 0.9467026507270208, "grad_norm": 781.1555786132812, "learning_rate": 1.1927667070498916e-07, "loss": 15.1413, "step": 468650 }, { "epoch": 0.9467228513597046, "grad_norm": 385.81744384765625, "learning_rate": 1.1920089302219218e-07, "loss": 16.4108, "step": 468660 }, { "epoch": 0.9467430519923884, "grad_norm": 162.37913513183594, "learning_rate": 1.1912513912781864e-07, "loss": 17.8091, "step": 468670 }, { "epoch": 0.9467632526250722, "grad_norm": 371.38995361328125, "learning_rate": 1.1904940902223661e-07, "loss": 15.9771, "step": 468680 }, { "epoch": 0.946783453257756, "grad_norm": 426.4790954589844, "learning_rate": 1.1897370270581632e-07, "loss": 25.6471, "step": 468690 }, { "epoch": 0.9468036538904399, "grad_norm": 6.888204574584961, "learning_rate": 1.1889802017892638e-07, "loss": 14.0545, "step": 468700 }, { "epoch": 0.9468238545231237, "grad_norm": 34.13553237915039, "learning_rate": 1.1882236144193482e-07, "loss": 19.7614, "step": 468710 }, { "epoch": 0.9468440551558075, "grad_norm": 369.91961669921875, "learning_rate": 1.1874672649521135e-07, "loss": 26.8512, "step": 468720 }, { "epoch": 0.9468642557884913, "grad_norm": 118.35027313232422, "learning_rate": 1.1867111533912457e-07, "loss": 10.5861, "step": 468730 }, { "epoch": 0.9468844564211751, "grad_norm": 473.2422180175781, "learning_rate": 1.1859552797404194e-07, "loss": 11.2697, "step": 468740 }, { "epoch": 0.946904657053859, "grad_norm": 0.0, "learning_rate": 1.185199644003332e-07, "loss": 18.9192, "step": 468750 }, { "epoch": 0.9469248576865428, "grad_norm": 975.2803955078125, "learning_rate": 1.1844442461836636e-07, "loss": 27.8736, "step": 468760 }, { "epoch": 0.9469450583192266, "grad_norm": 175.9191436767578, "learning_rate": 1.1836890862850892e-07, "loss": 19.3225, "step": 468770 }, { "epoch": 0.9469652589519104, "grad_norm": 246.40516662597656, "learning_rate": 1.1829341643112946e-07, "loss": 23.1092, "step": 468780 }, { "epoch": 0.9469854595845942, "grad_norm": 763.1865844726562, "learning_rate": 1.1821794802659603e-07, "loss": 14.0527, "step": 468790 }, { "epoch": 0.9470056602172781, "grad_norm": 257.7726135253906, "learning_rate": 1.1814250341527611e-07, "loss": 23.4074, "step": 468800 }, { "epoch": 0.9470258608499619, "grad_norm": 573.9570922851562, "learning_rate": 1.1806708259753718e-07, "loss": 20.0437, "step": 468810 }, { "epoch": 0.9470460614826456, "grad_norm": 201.02435302734375, "learning_rate": 1.179916855737473e-07, "loss": 16.4613, "step": 468820 }, { "epoch": 0.9470662621153294, "grad_norm": 245.06533813476562, "learning_rate": 1.1791631234427448e-07, "loss": 14.38, "step": 468830 }, { "epoch": 0.9470864627480132, "grad_norm": 862.9598999023438, "learning_rate": 1.1784096290948455e-07, "loss": 24.3121, "step": 468840 }, { "epoch": 0.947106663380697, "grad_norm": 560.0054321289062, "learning_rate": 1.177656372697461e-07, "loss": 15.7914, "step": 468850 }, { "epoch": 0.9471268640133809, "grad_norm": 175.73483276367188, "learning_rate": 1.1769033542542552e-07, "loss": 6.7914, "step": 468860 }, { "epoch": 0.9471470646460647, "grad_norm": 291.7281188964844, "learning_rate": 1.1761505737689082e-07, "loss": 18.2273, "step": 468870 }, { "epoch": 0.9471672652787485, "grad_norm": 338.5940856933594, "learning_rate": 1.175398031245073e-07, "loss": 12.2181, "step": 468880 }, { "epoch": 0.9471874659114323, "grad_norm": 272.7087707519531, "learning_rate": 1.1746457266864297e-07, "loss": 13.4214, "step": 468890 }, { "epoch": 0.9472076665441161, "grad_norm": 311.3247985839844, "learning_rate": 1.1738936600966366e-07, "loss": 14.7226, "step": 468900 }, { "epoch": 0.9472278671768, "grad_norm": 291.2148132324219, "learning_rate": 1.173141831479374e-07, "loss": 17.4352, "step": 468910 }, { "epoch": 0.9472480678094838, "grad_norm": 206.57376098632812, "learning_rate": 1.1723902408382892e-07, "loss": 20.1185, "step": 468920 }, { "epoch": 0.9472682684421676, "grad_norm": 440.7289733886719, "learning_rate": 1.1716388881770513e-07, "loss": 27.5187, "step": 468930 }, { "epoch": 0.9472884690748514, "grad_norm": 608.8489379882812, "learning_rate": 1.1708877734993296e-07, "loss": 20.8657, "step": 468940 }, { "epoch": 0.9473086697075352, "grad_norm": 202.66183471679688, "learning_rate": 1.1701368968087711e-07, "loss": 7.8949, "step": 468950 }, { "epoch": 0.9473288703402191, "grad_norm": 296.52935791015625, "learning_rate": 1.1693862581090453e-07, "loss": 13.0502, "step": 468960 }, { "epoch": 0.9473490709729029, "grad_norm": 659.4253540039062, "learning_rate": 1.1686358574038104e-07, "loss": 20.7425, "step": 468970 }, { "epoch": 0.9473692716055867, "grad_norm": 132.4563751220703, "learning_rate": 1.1678856946967244e-07, "loss": 17.9443, "step": 468980 }, { "epoch": 0.9473894722382705, "grad_norm": 633.8014526367188, "learning_rate": 1.1671357699914343e-07, "loss": 15.5557, "step": 468990 }, { "epoch": 0.9474096728709543, "grad_norm": 66.96441650390625, "learning_rate": 1.166386083291604e-07, "loss": 15.0395, "step": 469000 }, { "epoch": 0.9474298735036382, "grad_norm": 582.8804931640625, "learning_rate": 1.1656366346008862e-07, "loss": 22.1104, "step": 469010 }, { "epoch": 0.947450074136322, "grad_norm": 347.63812255859375, "learning_rate": 1.1648874239229391e-07, "loss": 16.2601, "step": 469020 }, { "epoch": 0.9474702747690058, "grad_norm": 283.6161193847656, "learning_rate": 1.1641384512613985e-07, "loss": 12.2617, "step": 469030 }, { "epoch": 0.9474904754016896, "grad_norm": 430.0794677734375, "learning_rate": 1.1633897166199227e-07, "loss": 19.0489, "step": 469040 }, { "epoch": 0.9475106760343734, "grad_norm": 315.4210510253906, "learning_rate": 1.1626412200021697e-07, "loss": 11.05, "step": 469050 }, { "epoch": 0.9475308766670573, "grad_norm": 376.3665466308594, "learning_rate": 1.1618929614117757e-07, "loss": 21.5259, "step": 469060 }, { "epoch": 0.947551077299741, "grad_norm": 45.962337493896484, "learning_rate": 1.1611449408523879e-07, "loss": 21.0688, "step": 469070 }, { "epoch": 0.9475712779324248, "grad_norm": 248.42181396484375, "learning_rate": 1.1603971583276641e-07, "loss": 11.9986, "step": 469080 }, { "epoch": 0.9475914785651086, "grad_norm": 26.90751838684082, "learning_rate": 1.1596496138412405e-07, "loss": 28.1198, "step": 469090 }, { "epoch": 0.9476116791977924, "grad_norm": 678.9092407226562, "learning_rate": 1.1589023073967586e-07, "loss": 20.818, "step": 469100 }, { "epoch": 0.9476318798304763, "grad_norm": 7.6660566329956055, "learning_rate": 1.1581552389978601e-07, "loss": 9.1425, "step": 469110 }, { "epoch": 0.9476520804631601, "grad_norm": 273.488037109375, "learning_rate": 1.1574084086481973e-07, "loss": 23.9087, "step": 469120 }, { "epoch": 0.9476722810958439, "grad_norm": 672.782958984375, "learning_rate": 1.1566618163513954e-07, "loss": 12.4417, "step": 469130 }, { "epoch": 0.9476924817285277, "grad_norm": 28.96516990661621, "learning_rate": 1.1559154621110957e-07, "loss": 6.9446, "step": 469140 }, { "epoch": 0.9477126823612115, "grad_norm": 1065.329833984375, "learning_rate": 1.155169345930951e-07, "loss": 20.1584, "step": 469150 }, { "epoch": 0.9477328829938954, "grad_norm": 578.9161987304688, "learning_rate": 1.1544234678145805e-07, "loss": 22.021, "step": 469160 }, { "epoch": 0.9477530836265792, "grad_norm": 99.24224853515625, "learning_rate": 1.1536778277656258e-07, "loss": 11.5046, "step": 469170 }, { "epoch": 0.947773284259263, "grad_norm": 570.20166015625, "learning_rate": 1.1529324257877228e-07, "loss": 33.5162, "step": 469180 }, { "epoch": 0.9477934848919468, "grad_norm": 138.69076538085938, "learning_rate": 1.152187261884502e-07, "loss": 13.3612, "step": 469190 }, { "epoch": 0.9478136855246306, "grad_norm": 892.8467407226562, "learning_rate": 1.1514423360595939e-07, "loss": 14.4001, "step": 469200 }, { "epoch": 0.9478338861573145, "grad_norm": 1641.258544921875, "learning_rate": 1.1506976483166343e-07, "loss": 22.1854, "step": 469210 }, { "epoch": 0.9478540867899983, "grad_norm": 1146.048583984375, "learning_rate": 1.1499531986592482e-07, "loss": 25.4028, "step": 469220 }, { "epoch": 0.9478742874226821, "grad_norm": 272.59130859375, "learning_rate": 1.1492089870910662e-07, "loss": 17.5598, "step": 469230 }, { "epoch": 0.9478944880553659, "grad_norm": 321.302734375, "learning_rate": 1.1484650136157127e-07, "loss": 21.2706, "step": 469240 }, { "epoch": 0.9479146886880497, "grad_norm": 702.2314453125, "learning_rate": 1.1477212782368185e-07, "loss": 15.2294, "step": 469250 }, { "epoch": 0.9479348893207336, "grad_norm": 371.1092834472656, "learning_rate": 1.1469777809580084e-07, "loss": 21.5585, "step": 469260 }, { "epoch": 0.9479550899534174, "grad_norm": 443.08245849609375, "learning_rate": 1.1462345217828963e-07, "loss": 10.0246, "step": 469270 }, { "epoch": 0.9479752905861012, "grad_norm": 476.6809997558594, "learning_rate": 1.1454915007151179e-07, "loss": 17.9244, "step": 469280 }, { "epoch": 0.947995491218785, "grad_norm": 626.3543090820312, "learning_rate": 1.1447487177582816e-07, "loss": 18.527, "step": 469290 }, { "epoch": 0.9480156918514688, "grad_norm": 466.4951477050781, "learning_rate": 1.1440061729160235e-07, "loss": 21.489, "step": 469300 }, { "epoch": 0.9480358924841527, "grad_norm": 11.767196655273438, "learning_rate": 1.1432638661919515e-07, "loss": 10.4186, "step": 469310 }, { "epoch": 0.9480560931168365, "grad_norm": 308.8578796386719, "learning_rate": 1.1425217975896796e-07, "loss": 12.2751, "step": 469320 }, { "epoch": 0.9480762937495202, "grad_norm": 282.1939392089844, "learning_rate": 1.1417799671128327e-07, "loss": 15.9067, "step": 469330 }, { "epoch": 0.948096494382204, "grad_norm": 348.49114990234375, "learning_rate": 1.14103837476503e-07, "loss": 17.374, "step": 469340 }, { "epoch": 0.9481166950148878, "grad_norm": 595.0879516601562, "learning_rate": 1.1402970205498742e-07, "loss": 27.4214, "step": 469350 }, { "epoch": 0.9481368956475716, "grad_norm": 424.2049255371094, "learning_rate": 1.1395559044709848e-07, "loss": 11.7823, "step": 469360 }, { "epoch": 0.9481570962802555, "grad_norm": 488.8497009277344, "learning_rate": 1.1388150265319808e-07, "loss": 13.7173, "step": 469370 }, { "epoch": 0.9481772969129393, "grad_norm": 382.3506164550781, "learning_rate": 1.1380743867364596e-07, "loss": 17.2933, "step": 469380 }, { "epoch": 0.9481974975456231, "grad_norm": 171.2713623046875, "learning_rate": 1.1373339850880405e-07, "loss": 17.9502, "step": 469390 }, { "epoch": 0.9482176981783069, "grad_norm": 338.79010009765625, "learning_rate": 1.136593821590326e-07, "loss": 11.2293, "step": 469400 }, { "epoch": 0.9482378988109907, "grad_norm": 173.9486541748047, "learning_rate": 1.1358538962469356e-07, "loss": 21.067, "step": 469410 }, { "epoch": 0.9482580994436746, "grad_norm": 574.244873046875, "learning_rate": 1.1351142090614553e-07, "loss": 22.1407, "step": 469420 }, { "epoch": 0.9482783000763584, "grad_norm": 132.3253936767578, "learning_rate": 1.1343747600375044e-07, "loss": 12.5369, "step": 469430 }, { "epoch": 0.9482985007090422, "grad_norm": 91.43624114990234, "learning_rate": 1.1336355491786966e-07, "loss": 15.2691, "step": 469440 }, { "epoch": 0.948318701341726, "grad_norm": 191.69607543945312, "learning_rate": 1.1328965764886069e-07, "loss": 23.2678, "step": 469450 }, { "epoch": 0.9483389019744098, "grad_norm": 117.98069763183594, "learning_rate": 1.1321578419708545e-07, "loss": 18.1865, "step": 469460 }, { "epoch": 0.9483591026070937, "grad_norm": 142.21937561035156, "learning_rate": 1.1314193456290424e-07, "loss": 22.8095, "step": 469470 }, { "epoch": 0.9483793032397775, "grad_norm": 17.312837600708008, "learning_rate": 1.1306810874667673e-07, "loss": 18.3314, "step": 469480 }, { "epoch": 0.9483995038724613, "grad_norm": 365.4279479980469, "learning_rate": 1.129943067487621e-07, "loss": 15.4938, "step": 469490 }, { "epoch": 0.9484197045051451, "grad_norm": 610.1925659179688, "learning_rate": 1.1292052856952063e-07, "loss": 25.1791, "step": 469500 }, { "epoch": 0.9484399051378289, "grad_norm": 28.289342880249023, "learning_rate": 1.1284677420931201e-07, "loss": 9.2474, "step": 469510 }, { "epoch": 0.9484601057705128, "grad_norm": 530.5923461914062, "learning_rate": 1.1277304366849539e-07, "loss": 16.9483, "step": 469520 }, { "epoch": 0.9484803064031966, "grad_norm": 376.1013488769531, "learning_rate": 1.1269933694742996e-07, "loss": 26.9136, "step": 469530 }, { "epoch": 0.9485005070358804, "grad_norm": 743.5037231445312, "learning_rate": 1.1262565404647485e-07, "loss": 16.8922, "step": 469540 }, { "epoch": 0.9485207076685642, "grad_norm": 290.6438293457031, "learning_rate": 1.1255199496599034e-07, "loss": 13.4206, "step": 469550 }, { "epoch": 0.948540908301248, "grad_norm": 591.5577392578125, "learning_rate": 1.1247835970633392e-07, "loss": 15.4233, "step": 469560 }, { "epoch": 0.9485611089339319, "grad_norm": 413.7424621582031, "learning_rate": 1.1240474826786585e-07, "loss": 11.5132, "step": 469570 }, { "epoch": 0.9485813095666156, "grad_norm": 81.66283416748047, "learning_rate": 1.1233116065094363e-07, "loss": 17.804, "step": 469580 }, { "epoch": 0.9486015101992994, "grad_norm": 712.2706909179688, "learning_rate": 1.1225759685592697e-07, "loss": 17.38, "step": 469590 }, { "epoch": 0.9486217108319832, "grad_norm": 451.8149108886719, "learning_rate": 1.1218405688317447e-07, "loss": 8.9866, "step": 469600 }, { "epoch": 0.948641911464667, "grad_norm": 286.841796875, "learning_rate": 1.1211054073304305e-07, "loss": 14.9223, "step": 469610 }, { "epoch": 0.9486621120973509, "grad_norm": 352.9850769042969, "learning_rate": 1.1203704840589247e-07, "loss": 13.3507, "step": 469620 }, { "epoch": 0.9486823127300347, "grad_norm": 10.075496673583984, "learning_rate": 1.1196357990208074e-07, "loss": 13.7039, "step": 469630 }, { "epoch": 0.9487025133627185, "grad_norm": 428.2384948730469, "learning_rate": 1.1189013522196479e-07, "loss": 24.0886, "step": 469640 }, { "epoch": 0.9487227139954023, "grad_norm": 123.3632583618164, "learning_rate": 1.118167143659038e-07, "loss": 9.5288, "step": 469650 }, { "epoch": 0.9487429146280861, "grad_norm": 335.2364196777344, "learning_rate": 1.1174331733425636e-07, "loss": 19.4018, "step": 469660 }, { "epoch": 0.94876311526077, "grad_norm": 423.3990173339844, "learning_rate": 1.1166994412737774e-07, "loss": 23.5129, "step": 469670 }, { "epoch": 0.9487833158934538, "grad_norm": 360.9956359863281, "learning_rate": 1.1159659474562712e-07, "loss": 13.3685, "step": 469680 }, { "epoch": 0.9488035165261376, "grad_norm": 478.0350341796875, "learning_rate": 1.1152326918936251e-07, "loss": 24.2518, "step": 469690 }, { "epoch": 0.9488237171588214, "grad_norm": 577.0175170898438, "learning_rate": 1.1144996745894033e-07, "loss": 28.4371, "step": 469700 }, { "epoch": 0.9488439177915052, "grad_norm": 551.7817993164062, "learning_rate": 1.1137668955471803e-07, "loss": 10.8502, "step": 469710 }, { "epoch": 0.948864118424189, "grad_norm": 1055.144287109375, "learning_rate": 1.1130343547705257e-07, "loss": 26.8408, "step": 469720 }, { "epoch": 0.9488843190568729, "grad_norm": 444.7875061035156, "learning_rate": 1.1123020522630202e-07, "loss": 24.6248, "step": 469730 }, { "epoch": 0.9489045196895567, "grad_norm": 305.60711669921875, "learning_rate": 1.111569988028216e-07, "loss": 26.628, "step": 469740 }, { "epoch": 0.9489247203222405, "grad_norm": 499.5113220214844, "learning_rate": 1.1108381620696885e-07, "loss": 13.9902, "step": 469750 }, { "epoch": 0.9489449209549243, "grad_norm": 288.8736267089844, "learning_rate": 1.1101065743910122e-07, "loss": 15.2388, "step": 469760 }, { "epoch": 0.9489651215876082, "grad_norm": 506.89111328125, "learning_rate": 1.1093752249957512e-07, "loss": 22.3633, "step": 469770 }, { "epoch": 0.948985322220292, "grad_norm": 421.3926086425781, "learning_rate": 1.1086441138874581e-07, "loss": 35.1609, "step": 469780 }, { "epoch": 0.9490055228529758, "grad_norm": 636.1416015625, "learning_rate": 1.107913241069708e-07, "loss": 17.0688, "step": 469790 }, { "epoch": 0.9490257234856596, "grad_norm": 95.65436553955078, "learning_rate": 1.107182606546059e-07, "loss": 6.2457, "step": 469800 }, { "epoch": 0.9490459241183434, "grad_norm": 57.0366096496582, "learning_rate": 1.1064522103200636e-07, "loss": 10.7217, "step": 469810 }, { "epoch": 0.9490661247510273, "grad_norm": 331.9047546386719, "learning_rate": 1.1057220523953027e-07, "loss": 8.9685, "step": 469820 }, { "epoch": 0.9490863253837111, "grad_norm": 151.98440551757812, "learning_rate": 1.1049921327753121e-07, "loss": 9.8115, "step": 469830 }, { "epoch": 0.9491065260163948, "grad_norm": 602.0835571289062, "learning_rate": 1.1042624514636669e-07, "loss": 16.8972, "step": 469840 }, { "epoch": 0.9491267266490786, "grad_norm": 357.6507873535156, "learning_rate": 1.1035330084639084e-07, "loss": 9.9532, "step": 469850 }, { "epoch": 0.9491469272817624, "grad_norm": 330.49566650390625, "learning_rate": 1.1028038037796063e-07, "loss": 13.1317, "step": 469860 }, { "epoch": 0.9491671279144462, "grad_norm": 263.4090881347656, "learning_rate": 1.1020748374143075e-07, "loss": 17.9668, "step": 469870 }, { "epoch": 0.9491873285471301, "grad_norm": 231.7461700439453, "learning_rate": 1.1013461093715594e-07, "loss": 9.0968, "step": 469880 }, { "epoch": 0.9492075291798139, "grad_norm": 209.28048706054688, "learning_rate": 1.1006176196549256e-07, "loss": 6.7867, "step": 469890 }, { "epoch": 0.9492277298124977, "grad_norm": 297.8096008300781, "learning_rate": 1.0998893682679479e-07, "loss": 23.6673, "step": 469900 }, { "epoch": 0.9492479304451815, "grad_norm": 334.8628234863281, "learning_rate": 1.099161355214179e-07, "loss": 14.8809, "step": 469910 }, { "epoch": 0.9492681310778653, "grad_norm": 610.1708374023438, "learning_rate": 1.0984335804971713e-07, "loss": 16.09, "step": 469920 }, { "epoch": 0.9492883317105492, "grad_norm": 32.4946403503418, "learning_rate": 1.0977060441204612e-07, "loss": 12.6552, "step": 469930 }, { "epoch": 0.949308532343233, "grad_norm": 275.5940856933594, "learning_rate": 1.0969787460876013e-07, "loss": 13.3029, "step": 469940 }, { "epoch": 0.9493287329759168, "grad_norm": 345.1875305175781, "learning_rate": 1.0962516864021388e-07, "loss": 14.9644, "step": 469950 }, { "epoch": 0.9493489336086006, "grad_norm": 191.85562133789062, "learning_rate": 1.0955248650676154e-07, "loss": 14.6402, "step": 469960 }, { "epoch": 0.9493691342412844, "grad_norm": 325.9012145996094, "learning_rate": 1.0947982820875669e-07, "loss": 18.6594, "step": 469970 }, { "epoch": 0.9493893348739683, "grad_norm": 841.303955078125, "learning_rate": 1.0940719374655462e-07, "loss": 34.0621, "step": 469980 }, { "epoch": 0.9494095355066521, "grad_norm": 264.1408386230469, "learning_rate": 1.0933458312050837e-07, "loss": 19.1478, "step": 469990 }, { "epoch": 0.9494297361393359, "grad_norm": 6.626099109649658, "learning_rate": 1.0926199633097156e-07, "loss": 11.4435, "step": 470000 }, { "epoch": 0.9494499367720197, "grad_norm": 991.1474609375, "learning_rate": 1.0918943337829945e-07, "loss": 29.941, "step": 470010 }, { "epoch": 0.9494701374047035, "grad_norm": 58.44243240356445, "learning_rate": 1.091168942628451e-07, "loss": 9.7997, "step": 470020 }, { "epoch": 0.9494903380373874, "grad_norm": 160.3310089111328, "learning_rate": 1.09044378984961e-07, "loss": 13.4106, "step": 470030 }, { "epoch": 0.9495105386700712, "grad_norm": 809.5315551757812, "learning_rate": 1.0897188754500187e-07, "loss": 20.2929, "step": 470040 }, { "epoch": 0.949530739302755, "grad_norm": 105.98553466796875, "learning_rate": 1.0889941994332077e-07, "loss": 14.2779, "step": 470050 }, { "epoch": 0.9495509399354388, "grad_norm": 900.638671875, "learning_rate": 1.0882697618027016e-07, "loss": 15.7529, "step": 470060 }, { "epoch": 0.9495711405681226, "grad_norm": 212.1917266845703, "learning_rate": 1.0875455625620368e-07, "loss": 25.6759, "step": 470070 }, { "epoch": 0.9495913412008065, "grad_norm": 913.4801635742188, "learning_rate": 1.0868216017147437e-07, "loss": 29.1505, "step": 470080 }, { "epoch": 0.9496115418334903, "grad_norm": 361.5392150878906, "learning_rate": 1.0860978792643528e-07, "loss": 10.5928, "step": 470090 }, { "epoch": 0.949631742466174, "grad_norm": 346.39202880859375, "learning_rate": 1.0853743952143836e-07, "loss": 13.5348, "step": 470100 }, { "epoch": 0.9496519430988578, "grad_norm": 312.23748779296875, "learning_rate": 1.084651149568372e-07, "loss": 13.6943, "step": 470110 }, { "epoch": 0.9496721437315416, "grad_norm": 496.2864074707031, "learning_rate": 1.0839281423298375e-07, "loss": 13.6047, "step": 470120 }, { "epoch": 0.9496923443642254, "grad_norm": 494.9438781738281, "learning_rate": 1.0832053735022996e-07, "loss": 14.4703, "step": 470130 }, { "epoch": 0.9497125449969093, "grad_norm": 574.4812622070312, "learning_rate": 1.0824828430892831e-07, "loss": 28.3906, "step": 470140 }, { "epoch": 0.9497327456295931, "grad_norm": 630.9834594726562, "learning_rate": 1.0817605510943241e-07, "loss": 15.1082, "step": 470150 }, { "epoch": 0.9497529462622769, "grad_norm": 492.0447692871094, "learning_rate": 1.0810384975209254e-07, "loss": 17.1864, "step": 470160 }, { "epoch": 0.9497731468949607, "grad_norm": 292.03009033203125, "learning_rate": 1.0803166823726064e-07, "loss": 19.4169, "step": 470170 }, { "epoch": 0.9497933475276445, "grad_norm": 491.9415588378906, "learning_rate": 1.0795951056528974e-07, "loss": 21.1524, "step": 470180 }, { "epoch": 0.9498135481603284, "grad_norm": 447.0008850097656, "learning_rate": 1.0788737673653072e-07, "loss": 30.879, "step": 470190 }, { "epoch": 0.9498337487930122, "grad_norm": 475.52154541015625, "learning_rate": 1.0781526675133492e-07, "loss": 27.2527, "step": 470200 }, { "epoch": 0.949853949425696, "grad_norm": 681.0592651367188, "learning_rate": 1.0774318061005484e-07, "loss": 16.0728, "step": 470210 }, { "epoch": 0.9498741500583798, "grad_norm": 284.41815185546875, "learning_rate": 1.0767111831304022e-07, "loss": 16.9715, "step": 470220 }, { "epoch": 0.9498943506910636, "grad_norm": 381.9979248046875, "learning_rate": 1.0759907986064411e-07, "loss": 17.1089, "step": 470230 }, { "epoch": 0.9499145513237475, "grad_norm": 362.4442443847656, "learning_rate": 1.0752706525321622e-07, "loss": 11.5909, "step": 470240 }, { "epoch": 0.9499347519564313, "grad_norm": 117.89044952392578, "learning_rate": 1.0745507449110792e-07, "loss": 17.8032, "step": 470250 }, { "epoch": 0.9499549525891151, "grad_norm": 243.0460662841797, "learning_rate": 1.0738310757467064e-07, "loss": 18.7897, "step": 470260 }, { "epoch": 0.9499751532217989, "grad_norm": 157.89682006835938, "learning_rate": 1.0731116450425461e-07, "loss": 12.373, "step": 470270 }, { "epoch": 0.9499953538544827, "grad_norm": 399.5901184082031, "learning_rate": 1.0723924528021012e-07, "loss": 18.7036, "step": 470280 }, { "epoch": 0.9500155544871666, "grad_norm": 314.33355712890625, "learning_rate": 1.0716734990288801e-07, "loss": 21.0218, "step": 470290 }, { "epoch": 0.9500357551198504, "grad_norm": 707.6248779296875, "learning_rate": 1.0709547837263967e-07, "loss": 20.888, "step": 470300 }, { "epoch": 0.9500559557525342, "grad_norm": 880.1218872070312, "learning_rate": 1.0702363068981425e-07, "loss": 36.0962, "step": 470310 }, { "epoch": 0.950076156385218, "grad_norm": 407.9019775390625, "learning_rate": 1.0695180685476148e-07, "loss": 16.846, "step": 470320 }, { "epoch": 0.9500963570179018, "grad_norm": 439.6984558105469, "learning_rate": 1.0688000686783272e-07, "loss": 15.0265, "step": 470330 }, { "epoch": 0.9501165576505857, "grad_norm": 486.39654541015625, "learning_rate": 1.0680823072937774e-07, "loss": 20.4392, "step": 470340 }, { "epoch": 0.9501367582832694, "grad_norm": 271.5041198730469, "learning_rate": 1.067364784397451e-07, "loss": 23.7964, "step": 470350 }, { "epoch": 0.9501569589159532, "grad_norm": 1043.4071044921875, "learning_rate": 1.0666474999928566e-07, "loss": 27.549, "step": 470360 }, { "epoch": 0.950177159548637, "grad_norm": 495.0008239746094, "learning_rate": 1.0659304540834914e-07, "loss": 16.5035, "step": 470370 }, { "epoch": 0.9501973601813208, "grad_norm": 503.79620361328125, "learning_rate": 1.0652136466728468e-07, "loss": 18.311, "step": 470380 }, { "epoch": 0.9502175608140047, "grad_norm": 629.4742431640625, "learning_rate": 1.0644970777644093e-07, "loss": 8.9288, "step": 470390 }, { "epoch": 0.9502377614466885, "grad_norm": 632.9424438476562, "learning_rate": 1.0637807473616812e-07, "loss": 36.336, "step": 470400 }, { "epoch": 0.9502579620793723, "grad_norm": 541.6952514648438, "learning_rate": 1.0630646554681545e-07, "loss": 18.185, "step": 470410 }, { "epoch": 0.9502781627120561, "grad_norm": 427.49407958984375, "learning_rate": 1.0623488020873097e-07, "loss": 24.8249, "step": 470420 }, { "epoch": 0.9502983633447399, "grad_norm": 368.714599609375, "learning_rate": 1.0616331872226437e-07, "loss": 17.8403, "step": 470430 }, { "epoch": 0.9503185639774238, "grad_norm": 441.51666259765625, "learning_rate": 1.0609178108776375e-07, "loss": 14.8681, "step": 470440 }, { "epoch": 0.9503387646101076, "grad_norm": 1.7728757858276367, "learning_rate": 1.0602026730557879e-07, "loss": 16.306, "step": 470450 }, { "epoch": 0.9503589652427914, "grad_norm": 660.8228759765625, "learning_rate": 1.0594877737605702e-07, "loss": 13.973, "step": 470460 }, { "epoch": 0.9503791658754752, "grad_norm": 421.2904357910156, "learning_rate": 1.0587731129954815e-07, "loss": 16.3852, "step": 470470 }, { "epoch": 0.950399366508159, "grad_norm": 426.2768249511719, "learning_rate": 1.0580586907639912e-07, "loss": 14.3896, "step": 470480 }, { "epoch": 0.9504195671408429, "grad_norm": 1023.1954956054688, "learning_rate": 1.0573445070695853e-07, "loss": 15.0185, "step": 470490 }, { "epoch": 0.9504397677735267, "grad_norm": 269.4640808105469, "learning_rate": 1.0566305619157502e-07, "loss": 20.0318, "step": 470500 }, { "epoch": 0.9504599684062105, "grad_norm": 215.77854919433594, "learning_rate": 1.0559168553059551e-07, "loss": 26.3668, "step": 470510 }, { "epoch": 0.9504801690388943, "grad_norm": 280.6916809082031, "learning_rate": 1.0552033872436917e-07, "loss": 13.8537, "step": 470520 }, { "epoch": 0.9505003696715781, "grad_norm": 436.7329406738281, "learning_rate": 1.0544901577324351e-07, "loss": 15.7911, "step": 470530 }, { "epoch": 0.950520570304262, "grad_norm": 314.02001953125, "learning_rate": 1.0537771667756436e-07, "loss": 16.0215, "step": 470540 }, { "epoch": 0.9505407709369458, "grad_norm": 383.9037170410156, "learning_rate": 1.0530644143768143e-07, "loss": 18.3283, "step": 470550 }, { "epoch": 0.9505609715696296, "grad_norm": 140.77203369140625, "learning_rate": 1.0523519005394167e-07, "loss": 18.8596, "step": 470560 }, { "epoch": 0.9505811722023134, "grad_norm": 368.7960205078125, "learning_rate": 1.0516396252669092e-07, "loss": 19.5221, "step": 470570 }, { "epoch": 0.9506013728349972, "grad_norm": 476.341796875, "learning_rate": 1.0509275885627779e-07, "loss": 14.5584, "step": 470580 }, { "epoch": 0.9506215734676811, "grad_norm": 769.5809326171875, "learning_rate": 1.0502157904304866e-07, "loss": 15.2152, "step": 470590 }, { "epoch": 0.9506417741003649, "grad_norm": 667.5062255859375, "learning_rate": 1.0495042308735104e-07, "loss": 20.4751, "step": 470600 }, { "epoch": 0.9506619747330486, "grad_norm": 14.904691696166992, "learning_rate": 1.0487929098953131e-07, "loss": 12.7858, "step": 470610 }, { "epoch": 0.9506821753657324, "grad_norm": 1079.2874755859375, "learning_rate": 1.0480818274993587e-07, "loss": 11.7952, "step": 470620 }, { "epoch": 0.9507023759984162, "grad_norm": 685.9215087890625, "learning_rate": 1.0473709836891222e-07, "loss": 11.9369, "step": 470630 }, { "epoch": 0.9507225766311, "grad_norm": 289.9880065917969, "learning_rate": 1.0466603784680562e-07, "loss": 15.8206, "step": 470640 }, { "epoch": 0.9507427772637839, "grad_norm": 980.9338989257812, "learning_rate": 1.0459500118396304e-07, "loss": 19.923, "step": 470650 }, { "epoch": 0.9507629778964677, "grad_norm": 817.2350463867188, "learning_rate": 1.0452398838073141e-07, "loss": 11.4897, "step": 470660 }, { "epoch": 0.9507831785291515, "grad_norm": 450.24676513671875, "learning_rate": 1.0445299943745546e-07, "loss": 18.5014, "step": 470670 }, { "epoch": 0.9508033791618353, "grad_norm": 589.586669921875, "learning_rate": 1.0438203435448157e-07, "loss": 42.9503, "step": 470680 }, { "epoch": 0.9508235797945191, "grad_norm": 976.338623046875, "learning_rate": 1.0431109313215671e-07, "loss": 30.7082, "step": 470690 }, { "epoch": 0.950843780427203, "grad_norm": 527.5633544921875, "learning_rate": 1.0424017577082556e-07, "loss": 13.4547, "step": 470700 }, { "epoch": 0.9508639810598868, "grad_norm": 1180.8414306640625, "learning_rate": 1.0416928227083345e-07, "loss": 31.8732, "step": 470710 }, { "epoch": 0.9508841816925706, "grad_norm": 434.7847900390625, "learning_rate": 1.0409841263252673e-07, "loss": 18.8544, "step": 470720 }, { "epoch": 0.9509043823252544, "grad_norm": 335.68133544921875, "learning_rate": 1.040275668562507e-07, "loss": 19.671, "step": 470730 }, { "epoch": 0.9509245829579382, "grad_norm": 181.6182403564453, "learning_rate": 1.0395674494235064e-07, "loss": 24.6191, "step": 470740 }, { "epoch": 0.9509447835906221, "grad_norm": 255.40704345703125, "learning_rate": 1.038859468911707e-07, "loss": 7.3884, "step": 470750 }, { "epoch": 0.9509649842233059, "grad_norm": 12.816692352294922, "learning_rate": 1.0381517270305786e-07, "loss": 31.8007, "step": 470760 }, { "epoch": 0.9509851848559897, "grad_norm": 0.5190161466598511, "learning_rate": 1.0374442237835625e-07, "loss": 8.8657, "step": 470770 }, { "epoch": 0.9510053854886735, "grad_norm": 197.76315307617188, "learning_rate": 1.036736959174095e-07, "loss": 15.5982, "step": 470780 }, { "epoch": 0.9510255861213573, "grad_norm": 188.831787109375, "learning_rate": 1.03602993320564e-07, "loss": 16.2393, "step": 470790 }, { "epoch": 0.9510457867540412, "grad_norm": 853.7542724609375, "learning_rate": 1.0353231458816338e-07, "loss": 14.2301, "step": 470800 }, { "epoch": 0.951065987386725, "grad_norm": 827.4303588867188, "learning_rate": 1.0346165972055233e-07, "loss": 37.7539, "step": 470810 }, { "epoch": 0.9510861880194088, "grad_norm": 324.36322021484375, "learning_rate": 1.0339102871807505e-07, "loss": 16.0894, "step": 470820 }, { "epoch": 0.9511063886520926, "grad_norm": 1509.9539794921875, "learning_rate": 1.0332042158107624e-07, "loss": 20.8904, "step": 470830 }, { "epoch": 0.9511265892847764, "grad_norm": 88.76578521728516, "learning_rate": 1.032498383099001e-07, "loss": 17.4222, "step": 470840 }, { "epoch": 0.9511467899174603, "grad_norm": 53.25017547607422, "learning_rate": 1.0317927890489021e-07, "loss": 12.9788, "step": 470850 }, { "epoch": 0.951166990550144, "grad_norm": 131.33226013183594, "learning_rate": 1.0310874336639021e-07, "loss": 30.0801, "step": 470860 }, { "epoch": 0.9511871911828278, "grad_norm": 426.4961853027344, "learning_rate": 1.030382316947448e-07, "loss": 13.2985, "step": 470870 }, { "epoch": 0.9512073918155116, "grad_norm": 243.90689086914062, "learning_rate": 1.0296774389029707e-07, "loss": 22.1258, "step": 470880 }, { "epoch": 0.9512275924481954, "grad_norm": 55.12568283081055, "learning_rate": 1.0289727995339005e-07, "loss": 9.4031, "step": 470890 }, { "epoch": 0.9512477930808793, "grad_norm": 184.26426696777344, "learning_rate": 1.0282683988436792e-07, "loss": 12.5956, "step": 470900 }, { "epoch": 0.9512679937135631, "grad_norm": 108.84278869628906, "learning_rate": 1.027564236835743e-07, "loss": 16.8787, "step": 470910 }, { "epoch": 0.9512881943462469, "grad_norm": 94.78299713134766, "learning_rate": 1.0268603135135169e-07, "loss": 12.0234, "step": 470920 }, { "epoch": 0.9513083949789307, "grad_norm": 524.7290649414062, "learning_rate": 1.0261566288804315e-07, "loss": 20.1096, "step": 470930 }, { "epoch": 0.9513285956116145, "grad_norm": 404.2486572265625, "learning_rate": 1.0254531829399228e-07, "loss": 13.8545, "step": 470940 }, { "epoch": 0.9513487962442984, "grad_norm": 185.98550415039062, "learning_rate": 1.024749975695416e-07, "loss": 14.0839, "step": 470950 }, { "epoch": 0.9513689968769822, "grad_norm": 461.5027770996094, "learning_rate": 1.0240470071503306e-07, "loss": 12.5585, "step": 470960 }, { "epoch": 0.951389197509666, "grad_norm": 258.36834716796875, "learning_rate": 1.0233442773081026e-07, "loss": 22.2929, "step": 470970 }, { "epoch": 0.9514093981423498, "grad_norm": 346.71075439453125, "learning_rate": 1.0226417861721571e-07, "loss": 8.3865, "step": 470980 }, { "epoch": 0.9514295987750336, "grad_norm": 485.3686828613281, "learning_rate": 1.0219395337459137e-07, "loss": 9.4013, "step": 470990 }, { "epoch": 0.9514497994077175, "grad_norm": 640.2971801757812, "learning_rate": 1.0212375200327973e-07, "loss": 18.4735, "step": 471000 }, { "epoch": 0.9514700000404013, "grad_norm": 171.5185546875, "learning_rate": 1.0205357450362275e-07, "loss": 12.5515, "step": 471010 }, { "epoch": 0.9514902006730851, "grad_norm": 8.77415657043457, "learning_rate": 1.0198342087596292e-07, "loss": 15.5475, "step": 471020 }, { "epoch": 0.9515104013057689, "grad_norm": 390.6858825683594, "learning_rate": 1.0191329112064164e-07, "loss": 16.3626, "step": 471030 }, { "epoch": 0.9515306019384527, "grad_norm": 611.0020751953125, "learning_rate": 1.0184318523800086e-07, "loss": 13.536, "step": 471040 }, { "epoch": 0.9515508025711366, "grad_norm": 441.7110290527344, "learning_rate": 1.0177310322838251e-07, "loss": 15.6641, "step": 471050 }, { "epoch": 0.9515710032038204, "grad_norm": 280.6241149902344, "learning_rate": 1.0170304509212803e-07, "loss": 21.3834, "step": 471060 }, { "epoch": 0.9515912038365042, "grad_norm": 260.3578796386719, "learning_rate": 1.0163301082957821e-07, "loss": 21.4385, "step": 471070 }, { "epoch": 0.951611404469188, "grad_norm": 502.2814025878906, "learning_rate": 1.0156300044107559e-07, "loss": 12.3369, "step": 471080 }, { "epoch": 0.9516316051018718, "grad_norm": 436.1946716308594, "learning_rate": 1.0149301392696098e-07, "loss": 17.8134, "step": 471090 }, { "epoch": 0.9516518057345557, "grad_norm": 191.00633239746094, "learning_rate": 1.0142305128757468e-07, "loss": 21.5534, "step": 471100 }, { "epoch": 0.9516720063672395, "grad_norm": 350.3988037109375, "learning_rate": 1.0135311252325863e-07, "loss": 25.5265, "step": 471110 }, { "epoch": 0.9516922069999232, "grad_norm": 326.69793701171875, "learning_rate": 1.0128319763435312e-07, "loss": 27.3422, "step": 471120 }, { "epoch": 0.951712407632607, "grad_norm": 208.7720947265625, "learning_rate": 1.0121330662119954e-07, "loss": 11.7793, "step": 471130 }, { "epoch": 0.9517326082652908, "grad_norm": 137.06358337402344, "learning_rate": 1.0114343948413818e-07, "loss": 13.8002, "step": 471140 }, { "epoch": 0.9517528088979746, "grad_norm": 801.980224609375, "learning_rate": 1.0107359622350877e-07, "loss": 30.6869, "step": 471150 }, { "epoch": 0.9517730095306585, "grad_norm": 206.8402099609375, "learning_rate": 1.0100377683965323e-07, "loss": 13.2018, "step": 471160 }, { "epoch": 0.9517932101633423, "grad_norm": 289.8222961425781, "learning_rate": 1.0093398133291132e-07, "loss": 17.1643, "step": 471170 }, { "epoch": 0.9518134107960261, "grad_norm": 523.9519653320312, "learning_rate": 1.0086420970362221e-07, "loss": 18.1259, "step": 471180 }, { "epoch": 0.9518336114287099, "grad_norm": 477.6012268066406, "learning_rate": 1.0079446195212728e-07, "loss": 27.6164, "step": 471190 }, { "epoch": 0.9518538120613937, "grad_norm": 233.2044219970703, "learning_rate": 1.007247380787657e-07, "loss": 29.1043, "step": 471200 }, { "epoch": 0.9518740126940776, "grad_norm": 843.2547607421875, "learning_rate": 1.0065503808387777e-07, "loss": 25.8573, "step": 471210 }, { "epoch": 0.9518942133267614, "grad_norm": 200.23541259765625, "learning_rate": 1.0058536196780266e-07, "loss": 11.6166, "step": 471220 }, { "epoch": 0.9519144139594452, "grad_norm": 330.73638916015625, "learning_rate": 1.0051570973088064e-07, "loss": 18.2436, "step": 471230 }, { "epoch": 0.951934614592129, "grad_norm": 169.0516815185547, "learning_rate": 1.0044608137345091e-07, "loss": 15.007, "step": 471240 }, { "epoch": 0.9519548152248128, "grad_norm": 297.7073974609375, "learning_rate": 1.0037647689585207e-07, "loss": 13.2723, "step": 471250 }, { "epoch": 0.9519750158574967, "grad_norm": 577.2901000976562, "learning_rate": 1.0030689629842382e-07, "loss": 25.1775, "step": 471260 }, { "epoch": 0.9519952164901805, "grad_norm": 361.49591064453125, "learning_rate": 1.0023733958150706e-07, "loss": 18.4722, "step": 471270 }, { "epoch": 0.9520154171228643, "grad_norm": 455.4455261230469, "learning_rate": 1.0016780674543813e-07, "loss": 13.6768, "step": 471280 }, { "epoch": 0.9520356177555481, "grad_norm": 259.64111328125, "learning_rate": 1.0009829779055679e-07, "loss": 5.7533, "step": 471290 }, { "epoch": 0.952055818388232, "grad_norm": 324.5470886230469, "learning_rate": 1.0002881271720222e-07, "loss": 19.4292, "step": 471300 }, { "epoch": 0.9520760190209158, "grad_norm": 1066.47021484375, "learning_rate": 9.995935152571357e-08, "loss": 19.227, "step": 471310 }, { "epoch": 0.9520962196535996, "grad_norm": 455.80023193359375, "learning_rate": 9.988991421642779e-08, "loss": 16.4507, "step": 471320 }, { "epoch": 0.9521164202862834, "grad_norm": 139.85203552246094, "learning_rate": 9.98205007896852e-08, "loss": 11.9968, "step": 471330 }, { "epoch": 0.9521366209189672, "grad_norm": 218.869384765625, "learning_rate": 9.975111124582271e-08, "loss": 17.4944, "step": 471340 }, { "epoch": 0.952156821551651, "grad_norm": 0.0, "learning_rate": 9.968174558517895e-08, "loss": 9.4351, "step": 471350 }, { "epoch": 0.9521770221843349, "grad_norm": 360.7905578613281, "learning_rate": 9.961240380809201e-08, "loss": 17.8279, "step": 471360 }, { "epoch": 0.9521972228170186, "grad_norm": 610.8971557617188, "learning_rate": 9.954308591489991e-08, "loss": 26.3978, "step": 471370 }, { "epoch": 0.9522174234497024, "grad_norm": 0.0, "learning_rate": 9.947379190594076e-08, "loss": 23.4322, "step": 471380 }, { "epoch": 0.9522376240823862, "grad_norm": 613.8010864257812, "learning_rate": 9.940452178155147e-08, "loss": 20.5446, "step": 471390 }, { "epoch": 0.95225782471507, "grad_norm": 466.0672607421875, "learning_rate": 9.933527554207012e-08, "loss": 23.108, "step": 471400 }, { "epoch": 0.9522780253477539, "grad_norm": 515.3238525390625, "learning_rate": 9.926605318783477e-08, "loss": 21.6157, "step": 471410 }, { "epoch": 0.9522982259804377, "grad_norm": 495.3761291503906, "learning_rate": 9.919685471918183e-08, "loss": 34.2023, "step": 471420 }, { "epoch": 0.9523184266131215, "grad_norm": 262.55364990234375, "learning_rate": 9.912768013644936e-08, "loss": 19.4069, "step": 471430 }, { "epoch": 0.9523386272458053, "grad_norm": 674.4443969726562, "learning_rate": 9.905852943997374e-08, "loss": 14.3366, "step": 471440 }, { "epoch": 0.9523588278784891, "grad_norm": 322.1920166015625, "learning_rate": 9.898940263009304e-08, "loss": 18.1222, "step": 471450 }, { "epoch": 0.952379028511173, "grad_norm": 358.52862548828125, "learning_rate": 9.892029970714367e-08, "loss": 16.8095, "step": 471460 }, { "epoch": 0.9523992291438568, "grad_norm": 177.48074340820312, "learning_rate": 9.885122067146147e-08, "loss": 12.4875, "step": 471470 }, { "epoch": 0.9524194297765406, "grad_norm": 130.83697509765625, "learning_rate": 9.878216552338504e-08, "loss": 12.1451, "step": 471480 }, { "epoch": 0.9524396304092244, "grad_norm": 295.5693054199219, "learning_rate": 9.871313426324913e-08, "loss": 32.5697, "step": 471490 }, { "epoch": 0.9524598310419082, "grad_norm": 281.16351318359375, "learning_rate": 9.864412689139124e-08, "loss": 15.9962, "step": 471500 }, { "epoch": 0.952480031674592, "grad_norm": 359.3190612792969, "learning_rate": 9.857514340814667e-08, "loss": 13.375, "step": 471510 }, { "epoch": 0.9525002323072759, "grad_norm": 552.441162109375, "learning_rate": 9.850618381385346e-08, "loss": 12.7265, "step": 471520 }, { "epoch": 0.9525204329399597, "grad_norm": 328.9881286621094, "learning_rate": 9.843724810884636e-08, "loss": 11.7589, "step": 471530 }, { "epoch": 0.9525406335726435, "grad_norm": 326.3606872558594, "learning_rate": 9.836833629346121e-08, "loss": 12.6732, "step": 471540 }, { "epoch": 0.9525608342053273, "grad_norm": 527.2721557617188, "learning_rate": 9.82994483680344e-08, "loss": 20.0105, "step": 471550 }, { "epoch": 0.9525810348380112, "grad_norm": 212.74549865722656, "learning_rate": 9.823058433290178e-08, "loss": 15.8167, "step": 471560 }, { "epoch": 0.952601235470695, "grad_norm": 212.47674560546875, "learning_rate": 9.816174418839863e-08, "loss": 20.676, "step": 471570 }, { "epoch": 0.9526214361033788, "grad_norm": 208.3838348388672, "learning_rate": 9.809292793486025e-08, "loss": 12.077, "step": 471580 }, { "epoch": 0.9526416367360626, "grad_norm": 644.1743774414062, "learning_rate": 9.802413557262302e-08, "loss": 19.7677, "step": 471590 }, { "epoch": 0.9526618373687464, "grad_norm": 483.9356994628906, "learning_rate": 9.795536710202169e-08, "loss": 15.0092, "step": 471600 }, { "epoch": 0.9526820380014303, "grad_norm": 355.7208251953125, "learning_rate": 9.788662252339099e-08, "loss": 16.6463, "step": 471610 }, { "epoch": 0.9527022386341141, "grad_norm": 403.49383544921875, "learning_rate": 9.781790183706674e-08, "loss": 19.6125, "step": 471620 }, { "epoch": 0.9527224392667978, "grad_norm": 338.376708984375, "learning_rate": 9.774920504338315e-08, "loss": 28.9406, "step": 471630 }, { "epoch": 0.9527426398994816, "grad_norm": 0.0, "learning_rate": 9.768053214267548e-08, "loss": 18.7657, "step": 471640 }, { "epoch": 0.9527628405321654, "grad_norm": 458.7870788574219, "learning_rate": 9.761188313527792e-08, "loss": 17.2405, "step": 471650 }, { "epoch": 0.9527830411648492, "grad_norm": 83.17525482177734, "learning_rate": 9.754325802152575e-08, "loss": 9.8613, "step": 471660 }, { "epoch": 0.9528032417975331, "grad_norm": 143.07252502441406, "learning_rate": 9.747465680175316e-08, "loss": 14.7038, "step": 471670 }, { "epoch": 0.9528234424302169, "grad_norm": 373.1251220703125, "learning_rate": 9.740607947629433e-08, "loss": 16.7827, "step": 471680 }, { "epoch": 0.9528436430629007, "grad_norm": 725.1900024414062, "learning_rate": 9.733752604548397e-08, "loss": 23.8853, "step": 471690 }, { "epoch": 0.9528638436955845, "grad_norm": 702.7918701171875, "learning_rate": 9.726899650965626e-08, "loss": 28.6152, "step": 471700 }, { "epoch": 0.9528840443282683, "grad_norm": 432.33929443359375, "learning_rate": 9.720049086914374e-08, "loss": 21.788, "step": 471710 }, { "epoch": 0.9529042449609522, "grad_norm": 272.7337951660156, "learning_rate": 9.713200912428222e-08, "loss": 21.7312, "step": 471720 }, { "epoch": 0.952924445593636, "grad_norm": 105.20574951171875, "learning_rate": 9.706355127540423e-08, "loss": 12.223, "step": 471730 }, { "epoch": 0.9529446462263198, "grad_norm": 39.18534851074219, "learning_rate": 9.699511732284395e-08, "loss": 14.3647, "step": 471740 }, { "epoch": 0.9529648468590036, "grad_norm": 172.9857635498047, "learning_rate": 9.692670726693498e-08, "loss": 12.8175, "step": 471750 }, { "epoch": 0.9529850474916874, "grad_norm": 9.908214569091797, "learning_rate": 9.68583211080104e-08, "loss": 12.9484, "step": 471760 }, { "epoch": 0.9530052481243713, "grad_norm": 11452.4755859375, "learning_rate": 9.678995884640385e-08, "loss": 29.713, "step": 471770 }, { "epoch": 0.9530254487570551, "grad_norm": 133.68270874023438, "learning_rate": 9.672162048244838e-08, "loss": 20.4403, "step": 471780 }, { "epoch": 0.9530456493897389, "grad_norm": 81.29830169677734, "learning_rate": 9.66533060164765e-08, "loss": 11.5564, "step": 471790 }, { "epoch": 0.9530658500224227, "grad_norm": 679.6661376953125, "learning_rate": 9.658501544882182e-08, "loss": 20.2916, "step": 471800 }, { "epoch": 0.9530860506551065, "grad_norm": 374.6275329589844, "learning_rate": 9.651674877981743e-08, "loss": 15.3965, "step": 471810 }, { "epoch": 0.9531062512877904, "grad_norm": 220.9196319580078, "learning_rate": 9.644850600979583e-08, "loss": 29.6173, "step": 471820 }, { "epoch": 0.9531264519204742, "grad_norm": 372.6571350097656, "learning_rate": 9.638028713908898e-08, "loss": 10.7237, "step": 471830 }, { "epoch": 0.953146652553158, "grad_norm": 210.60226440429688, "learning_rate": 9.63120921680305e-08, "loss": 18.6246, "step": 471840 }, { "epoch": 0.9531668531858418, "grad_norm": 299.7548522949219, "learning_rate": 9.62439210969518e-08, "loss": 18.4083, "step": 471850 }, { "epoch": 0.9531870538185256, "grad_norm": 381.3686828613281, "learning_rate": 9.617577392618538e-08, "loss": 17.6152, "step": 471860 }, { "epoch": 0.9532072544512095, "grad_norm": 281.87298583984375, "learning_rate": 9.61076506560632e-08, "loss": 14.8257, "step": 471870 }, { "epoch": 0.9532274550838933, "grad_norm": 464.02081298828125, "learning_rate": 9.603955128691833e-08, "loss": 15.0501, "step": 471880 }, { "epoch": 0.953247655716577, "grad_norm": 191.71810913085938, "learning_rate": 9.597147581908107e-08, "loss": 12.9511, "step": 471890 }, { "epoch": 0.9532678563492608, "grad_norm": 200.65492248535156, "learning_rate": 9.590342425288446e-08, "loss": 15.8451, "step": 471900 }, { "epoch": 0.9532880569819446, "grad_norm": 658.044677734375, "learning_rate": 9.583539658865992e-08, "loss": 13.1211, "step": 471910 }, { "epoch": 0.9533082576146285, "grad_norm": 286.3880615234375, "learning_rate": 9.576739282673886e-08, "loss": 14.6873, "step": 471920 }, { "epoch": 0.9533284582473123, "grad_norm": 544.264404296875, "learning_rate": 9.569941296745212e-08, "loss": 24.7345, "step": 471930 }, { "epoch": 0.9533486588799961, "grad_norm": 443.8762512207031, "learning_rate": 9.563145701113219e-08, "loss": 33.2531, "step": 471940 }, { "epoch": 0.9533688595126799, "grad_norm": 27.334545135498047, "learning_rate": 9.556352495810994e-08, "loss": 13.0649, "step": 471950 }, { "epoch": 0.9533890601453637, "grad_norm": 729.0777587890625, "learning_rate": 9.549561680871566e-08, "loss": 29.3194, "step": 471960 }, { "epoch": 0.9534092607780476, "grad_norm": 566.8876342773438, "learning_rate": 9.542773256328075e-08, "loss": 18.8543, "step": 471970 }, { "epoch": 0.9534294614107314, "grad_norm": 197.99618530273438, "learning_rate": 9.53598722221366e-08, "loss": 14.835, "step": 471980 }, { "epoch": 0.9534496620434152, "grad_norm": 13.030482292175293, "learning_rate": 9.529203578561353e-08, "loss": 16.2845, "step": 471990 }, { "epoch": 0.953469862676099, "grad_norm": 906.8263549804688, "learning_rate": 9.522422325404234e-08, "loss": 25.3349, "step": 472000 }, { "epoch": 0.9534900633087828, "grad_norm": 498.44171142578125, "learning_rate": 9.515643462775337e-08, "loss": 24.0487, "step": 472010 }, { "epoch": 0.9535102639414667, "grad_norm": 541.366943359375, "learning_rate": 9.508866990707688e-08, "loss": 18.6442, "step": 472020 }, { "epoch": 0.9535304645741505, "grad_norm": 613.9030151367188, "learning_rate": 9.502092909234317e-08, "loss": 18.6889, "step": 472030 }, { "epoch": 0.9535506652068343, "grad_norm": 364.0337829589844, "learning_rate": 9.495321218388309e-08, "loss": 25.2835, "step": 472040 }, { "epoch": 0.9535708658395181, "grad_norm": 375.2162170410156, "learning_rate": 9.488551918202527e-08, "loss": 10.3971, "step": 472050 }, { "epoch": 0.9535910664722019, "grad_norm": 430.7357177734375, "learning_rate": 9.481785008710165e-08, "loss": 19.3899, "step": 472060 }, { "epoch": 0.9536112671048858, "grad_norm": 386.5377197265625, "learning_rate": 9.475020489944032e-08, "loss": 7.5816, "step": 472070 }, { "epoch": 0.9536314677375696, "grad_norm": 249.53707885742188, "learning_rate": 9.468258361937155e-08, "loss": 14.0372, "step": 472080 }, { "epoch": 0.9536516683702534, "grad_norm": 207.7149658203125, "learning_rate": 9.461498624722509e-08, "loss": 15.2406, "step": 472090 }, { "epoch": 0.9536718690029372, "grad_norm": 1044.557373046875, "learning_rate": 9.454741278333013e-08, "loss": 20.2056, "step": 472100 }, { "epoch": 0.953692069635621, "grad_norm": 274.39410400390625, "learning_rate": 9.447986322801583e-08, "loss": 17.5673, "step": 472110 }, { "epoch": 0.9537122702683049, "grad_norm": 79.78308868408203, "learning_rate": 9.441233758161139e-08, "loss": 12.5258, "step": 472120 }, { "epoch": 0.9537324709009887, "grad_norm": 32.28790283203125, "learning_rate": 9.434483584444709e-08, "loss": 12.0531, "step": 472130 }, { "epoch": 0.9537526715336724, "grad_norm": 162.91354370117188, "learning_rate": 9.427735801685101e-08, "loss": 12.2741, "step": 472140 }, { "epoch": 0.9537728721663562, "grad_norm": 1072.396484375, "learning_rate": 9.420990409915176e-08, "loss": 30.2842, "step": 472150 }, { "epoch": 0.95379307279904, "grad_norm": 776.0736083984375, "learning_rate": 9.414247409167854e-08, "loss": 22.6709, "step": 472160 }, { "epoch": 0.9538132734317238, "grad_norm": 681.9462280273438, "learning_rate": 9.407506799475996e-08, "loss": 19.9861, "step": 472170 }, { "epoch": 0.9538334740644077, "grad_norm": 22.479015350341797, "learning_rate": 9.400768580872411e-08, "loss": 13.1713, "step": 472180 }, { "epoch": 0.9538536746970915, "grad_norm": 194.88360595703125, "learning_rate": 9.394032753390014e-08, "loss": 11.1674, "step": 472190 }, { "epoch": 0.9538738753297753, "grad_norm": 1.7064508199691772, "learning_rate": 9.387299317061615e-08, "loss": 13.6525, "step": 472200 }, { "epoch": 0.9538940759624591, "grad_norm": 13.816083908081055, "learning_rate": 9.380568271919966e-08, "loss": 5.8827, "step": 472210 }, { "epoch": 0.9539142765951429, "grad_norm": 244.79507446289062, "learning_rate": 9.373839617997926e-08, "loss": 21.1322, "step": 472220 }, { "epoch": 0.9539344772278268, "grad_norm": 331.3328552246094, "learning_rate": 9.367113355328361e-08, "loss": 20.3777, "step": 472230 }, { "epoch": 0.9539546778605106, "grad_norm": 237.48936462402344, "learning_rate": 9.36038948394391e-08, "loss": 20.8298, "step": 472240 }, { "epoch": 0.9539748784931944, "grad_norm": 329.25421142578125, "learning_rate": 9.353668003877437e-08, "loss": 24.5786, "step": 472250 }, { "epoch": 0.9539950791258782, "grad_norm": 491.8194274902344, "learning_rate": 9.346948915161636e-08, "loss": 19.1928, "step": 472260 }, { "epoch": 0.954015279758562, "grad_norm": 353.59442138671875, "learning_rate": 9.340232217829371e-08, "loss": 14.4394, "step": 472270 }, { "epoch": 0.9540354803912459, "grad_norm": 592.506103515625, "learning_rate": 9.333517911913281e-08, "loss": 15.0711, "step": 472280 }, { "epoch": 0.9540556810239297, "grad_norm": 246.64601135253906, "learning_rate": 9.326805997446065e-08, "loss": 31.8441, "step": 472290 }, { "epoch": 0.9540758816566135, "grad_norm": 420.1644287109375, "learning_rate": 9.320096474460527e-08, "loss": 14.5073, "step": 472300 }, { "epoch": 0.9540960822892973, "grad_norm": 676.580078125, "learning_rate": 9.31338934298931e-08, "loss": 17.2554, "step": 472310 }, { "epoch": 0.9541162829219811, "grad_norm": 452.257568359375, "learning_rate": 9.306684603065108e-08, "loss": 18.2278, "step": 472320 }, { "epoch": 0.954136483554665, "grad_norm": 362.83880615234375, "learning_rate": 9.299982254720674e-08, "loss": 12.9905, "step": 472330 }, { "epoch": 0.9541566841873488, "grad_norm": 603.9871215820312, "learning_rate": 9.293282297988537e-08, "loss": 25.1383, "step": 472340 }, { "epoch": 0.9541768848200326, "grad_norm": 383.53717041015625, "learning_rate": 9.28658473290145e-08, "loss": 19.0505, "step": 472350 }, { "epoch": 0.9541970854527164, "grad_norm": 301.22833251953125, "learning_rate": 9.27988955949205e-08, "loss": 20.0336, "step": 472360 }, { "epoch": 0.9542172860854002, "grad_norm": 209.2220458984375, "learning_rate": 9.273196777792926e-08, "loss": 19.8336, "step": 472370 }, { "epoch": 0.9542374867180841, "grad_norm": 757.2785034179688, "learning_rate": 9.266506387836771e-08, "loss": 17.0049, "step": 472380 }, { "epoch": 0.9542576873507679, "grad_norm": 260.1947937011719, "learning_rate": 9.259818389656117e-08, "loss": 10.2656, "step": 472390 }, { "epoch": 0.9542778879834516, "grad_norm": 363.085205078125, "learning_rate": 9.253132783283548e-08, "loss": 17.8424, "step": 472400 }, { "epoch": 0.9542980886161354, "grad_norm": 322.1111145019531, "learning_rate": 9.246449568751702e-08, "loss": 15.6732, "step": 472410 }, { "epoch": 0.9543182892488192, "grad_norm": 152.23348999023438, "learning_rate": 9.239768746093226e-08, "loss": 13.6807, "step": 472420 }, { "epoch": 0.954338489881503, "grad_norm": 496.6689758300781, "learning_rate": 9.233090315340532e-08, "loss": 17.5344, "step": 472430 }, { "epoch": 0.9543586905141869, "grad_norm": 209.66159057617188, "learning_rate": 9.226414276526208e-08, "loss": 20.2614, "step": 472440 }, { "epoch": 0.9543788911468707, "grad_norm": 116.48358917236328, "learning_rate": 9.219740629682838e-08, "loss": 17.466, "step": 472450 }, { "epoch": 0.9543990917795545, "grad_norm": 573.6477661132812, "learning_rate": 9.213069374842953e-08, "loss": 19.1514, "step": 472460 }, { "epoch": 0.9544192924122383, "grad_norm": 676.3889770507812, "learning_rate": 9.206400512039026e-08, "loss": 22.2085, "step": 472470 }, { "epoch": 0.9544394930449221, "grad_norm": 319.9701232910156, "learning_rate": 9.199734041303532e-08, "loss": 18.8759, "step": 472480 }, { "epoch": 0.954459693677606, "grad_norm": 307.66448974609375, "learning_rate": 9.19306996266911e-08, "loss": 13.4954, "step": 472490 }, { "epoch": 0.9544798943102898, "grad_norm": 592.2040405273438, "learning_rate": 9.186408276168012e-08, "loss": 12.9578, "step": 472500 }, { "epoch": 0.9545000949429736, "grad_norm": 497.1847229003906, "learning_rate": 9.179748981832881e-08, "loss": 17.0803, "step": 472510 }, { "epoch": 0.9545202955756574, "grad_norm": 368.4649353027344, "learning_rate": 9.173092079696188e-08, "loss": 10.285, "step": 472520 }, { "epoch": 0.9545404962083412, "grad_norm": 271.83099365234375, "learning_rate": 9.166437569790242e-08, "loss": 34.242, "step": 472530 }, { "epoch": 0.9545606968410251, "grad_norm": 302.1800842285156, "learning_rate": 9.159785452147574e-08, "loss": 7.3704, "step": 472540 }, { "epoch": 0.9545808974737089, "grad_norm": 34.294837951660156, "learning_rate": 9.153135726800599e-08, "loss": 12.2237, "step": 472550 }, { "epoch": 0.9546010981063927, "grad_norm": 589.4528198242188, "learning_rate": 9.146488393781683e-08, "loss": 13.1811, "step": 472560 }, { "epoch": 0.9546212987390765, "grad_norm": 535.8561401367188, "learning_rate": 9.139843453123243e-08, "loss": 18.9766, "step": 472570 }, { "epoch": 0.9546414993717603, "grad_norm": 309.1181335449219, "learning_rate": 9.133200904857642e-08, "loss": 16.5908, "step": 472580 }, { "epoch": 0.9546617000044442, "grad_norm": 90.18566131591797, "learning_rate": 9.126560749017354e-08, "loss": 20.1748, "step": 472590 }, { "epoch": 0.954681900637128, "grad_norm": 374.27630615234375, "learning_rate": 9.119922985634633e-08, "loss": 14.8302, "step": 472600 }, { "epoch": 0.9547021012698118, "grad_norm": 202.9929656982422, "learning_rate": 9.113287614741895e-08, "loss": 19.8109, "step": 472610 }, { "epoch": 0.9547223019024956, "grad_norm": 497.937744140625, "learning_rate": 9.106654636371448e-08, "loss": 15.6629, "step": 472620 }, { "epoch": 0.9547425025351794, "grad_norm": 143.7318878173828, "learning_rate": 9.1000240505556e-08, "loss": 16.0604, "step": 472630 }, { "epoch": 0.9547627031678633, "grad_norm": 204.52513122558594, "learning_rate": 9.093395857326714e-08, "loss": 20.6228, "step": 472640 }, { "epoch": 0.954782903800547, "grad_norm": 343.8081970214844, "learning_rate": 9.086770056717099e-08, "loss": 18.4626, "step": 472650 }, { "epoch": 0.9548031044332308, "grad_norm": 177.4273223876953, "learning_rate": 9.080146648759003e-08, "loss": 38.9969, "step": 472660 }, { "epoch": 0.9548233050659146, "grad_norm": 262.6379089355469, "learning_rate": 9.073525633484737e-08, "loss": 11.4115, "step": 472670 }, { "epoch": 0.9548435056985984, "grad_norm": 410.4691467285156, "learning_rate": 9.066907010926551e-08, "loss": 19.5217, "step": 472680 }, { "epoch": 0.9548637063312823, "grad_norm": 372.9002990722656, "learning_rate": 9.060290781116698e-08, "loss": 35.4899, "step": 472690 }, { "epoch": 0.9548839069639661, "grad_norm": 734.9282836914062, "learning_rate": 9.053676944087542e-08, "loss": 29.2235, "step": 472700 }, { "epoch": 0.9549041075966499, "grad_norm": 508.56658935546875, "learning_rate": 9.04706549987111e-08, "loss": 17.9589, "step": 472710 }, { "epoch": 0.9549243082293337, "grad_norm": 352.7237854003906, "learning_rate": 9.040456448499769e-08, "loss": 18.5129, "step": 472720 }, { "epoch": 0.9549445088620175, "grad_norm": 788.5574951171875, "learning_rate": 9.03384979000571e-08, "loss": 24.0234, "step": 472730 }, { "epoch": 0.9549647094947014, "grad_norm": 231.6894989013672, "learning_rate": 9.027245524421135e-08, "loss": 16.0379, "step": 472740 }, { "epoch": 0.9549849101273852, "grad_norm": 354.6516418457031, "learning_rate": 9.020643651778183e-08, "loss": 26.0841, "step": 472750 }, { "epoch": 0.955005110760069, "grad_norm": 79.38582611083984, "learning_rate": 9.014044172109049e-08, "loss": 10.3183, "step": 472760 }, { "epoch": 0.9550253113927528, "grad_norm": 317.4454650878906, "learning_rate": 9.007447085445987e-08, "loss": 20.7183, "step": 472770 }, { "epoch": 0.9550455120254366, "grad_norm": 140.3369140625, "learning_rate": 9.00085239182108e-08, "loss": 14.6581, "step": 472780 }, { "epoch": 0.9550657126581205, "grad_norm": 209.433349609375, "learning_rate": 8.99426009126636e-08, "loss": 17.313, "step": 472790 }, { "epoch": 0.9550859132908043, "grad_norm": 313.2442321777344, "learning_rate": 8.987670183814134e-08, "loss": 18.3114, "step": 472800 }, { "epoch": 0.9551061139234881, "grad_norm": 0.4536556601524353, "learning_rate": 8.981082669496433e-08, "loss": 20.4024, "step": 472810 }, { "epoch": 0.9551263145561719, "grad_norm": 458.67962646484375, "learning_rate": 8.974497548345396e-08, "loss": 26.9035, "step": 472820 }, { "epoch": 0.9551465151888557, "grad_norm": 449.9855651855469, "learning_rate": 8.967914820393108e-08, "loss": 14.6759, "step": 472830 }, { "epoch": 0.9551667158215396, "grad_norm": 200.32005310058594, "learning_rate": 8.961334485671657e-08, "loss": 10.905, "step": 472840 }, { "epoch": 0.9551869164542234, "grad_norm": 478.463134765625, "learning_rate": 8.954756544213128e-08, "loss": 10.1928, "step": 472850 }, { "epoch": 0.9552071170869072, "grad_norm": 410.7976989746094, "learning_rate": 8.948180996049493e-08, "loss": 26.1069, "step": 472860 }, { "epoch": 0.955227317719591, "grad_norm": 193.8374786376953, "learning_rate": 8.941607841212841e-08, "loss": 12.1814, "step": 472870 }, { "epoch": 0.9552475183522748, "grad_norm": 352.20159912109375, "learning_rate": 8.93503707973531e-08, "loss": 18.8841, "step": 472880 }, { "epoch": 0.9552677189849587, "grad_norm": 54.54193878173828, "learning_rate": 8.928468711648875e-08, "loss": 24.2221, "step": 472890 }, { "epoch": 0.9552879196176425, "grad_norm": 270.1540222167969, "learning_rate": 8.921902736985399e-08, "loss": 13.8838, "step": 472900 }, { "epoch": 0.9553081202503262, "grad_norm": 258.27935791015625, "learning_rate": 8.915339155777136e-08, "loss": 27.3903, "step": 472910 }, { "epoch": 0.95532832088301, "grad_norm": 851.5621948242188, "learning_rate": 8.908777968055893e-08, "loss": 24.5242, "step": 472920 }, { "epoch": 0.9553485215156938, "grad_norm": 504.7754211425781, "learning_rate": 8.902219173853699e-08, "loss": 16.8366, "step": 472930 }, { "epoch": 0.9553687221483776, "grad_norm": 763.4608154296875, "learning_rate": 8.895662773202529e-08, "loss": 18.1817, "step": 472940 }, { "epoch": 0.9553889227810615, "grad_norm": 500.566650390625, "learning_rate": 8.889108766134358e-08, "loss": 21.5876, "step": 472950 }, { "epoch": 0.9554091234137453, "grad_norm": 424.56103515625, "learning_rate": 8.882557152681104e-08, "loss": 12.3775, "step": 472960 }, { "epoch": 0.9554293240464291, "grad_norm": 52.139766693115234, "learning_rate": 8.876007932874686e-08, "loss": 9.1044, "step": 472970 }, { "epoch": 0.9554495246791129, "grad_norm": 101.28936004638672, "learning_rate": 8.869461106747024e-08, "loss": 10.6823, "step": 472980 }, { "epoch": 0.9554697253117967, "grad_norm": 415.1844482421875, "learning_rate": 8.862916674330091e-08, "loss": 25.4574, "step": 472990 }, { "epoch": 0.9554899259444806, "grad_norm": 635.3602905273438, "learning_rate": 8.856374635655696e-08, "loss": 14.1896, "step": 473000 }, { "epoch": 0.9555101265771644, "grad_norm": 320.00335693359375, "learning_rate": 8.849834990755757e-08, "loss": 17.2174, "step": 473010 }, { "epoch": 0.9555303272098482, "grad_norm": 408.1123046875, "learning_rate": 8.843297739662138e-08, "loss": 17.2332, "step": 473020 }, { "epoch": 0.955550527842532, "grad_norm": 37.39815139770508, "learning_rate": 8.836762882406757e-08, "loss": 12.6036, "step": 473030 }, { "epoch": 0.9555707284752158, "grad_norm": 118.26240539550781, "learning_rate": 8.830230419021424e-08, "loss": 26.1722, "step": 473040 }, { "epoch": 0.9555909291078997, "grad_norm": 264.1705017089844, "learning_rate": 8.823700349537945e-08, "loss": 17.247, "step": 473050 }, { "epoch": 0.9556111297405835, "grad_norm": 483.496337890625, "learning_rate": 8.817172673988184e-08, "loss": 48.2525, "step": 473060 }, { "epoch": 0.9556313303732673, "grad_norm": 314.58447265625, "learning_rate": 8.810647392404004e-08, "loss": 15.7937, "step": 473070 }, { "epoch": 0.9556515310059511, "grad_norm": 426.01654052734375, "learning_rate": 8.804124504817046e-08, "loss": 20.1481, "step": 473080 }, { "epoch": 0.955671731638635, "grad_norm": 525.1240844726562, "learning_rate": 8.797604011259287e-08, "loss": 19.8814, "step": 473090 }, { "epoch": 0.9556919322713188, "grad_norm": 566.5120239257812, "learning_rate": 8.791085911762476e-08, "loss": 9.9254, "step": 473100 }, { "epoch": 0.9557121329040026, "grad_norm": 656.9776611328125, "learning_rate": 8.784570206358201e-08, "loss": 26.1737, "step": 473110 }, { "epoch": 0.9557323335366864, "grad_norm": 436.4179382324219, "learning_rate": 8.778056895078435e-08, "loss": 27.7056, "step": 473120 }, { "epoch": 0.9557525341693702, "grad_norm": 674.1171264648438, "learning_rate": 8.77154597795482e-08, "loss": 31.0846, "step": 473130 }, { "epoch": 0.955772734802054, "grad_norm": 75.30695343017578, "learning_rate": 8.765037455019165e-08, "loss": 17.1403, "step": 473140 }, { "epoch": 0.9557929354347379, "grad_norm": 637.0873413085938, "learning_rate": 8.758531326303054e-08, "loss": 12.4042, "step": 473150 }, { "epoch": 0.9558131360674217, "grad_norm": 203.11627197265625, "learning_rate": 8.752027591838352e-08, "loss": 12.956, "step": 473160 }, { "epoch": 0.9558333367001054, "grad_norm": 414.611328125, "learning_rate": 8.7455262516567e-08, "loss": 10.456, "step": 473170 }, { "epoch": 0.9558535373327892, "grad_norm": 177.26925659179688, "learning_rate": 8.739027305789682e-08, "loss": 9.3711, "step": 473180 }, { "epoch": 0.955873737965473, "grad_norm": 434.41229248046875, "learning_rate": 8.732530754269108e-08, "loss": 19.888, "step": 473190 }, { "epoch": 0.9558939385981569, "grad_norm": 155.48361206054688, "learning_rate": 8.726036597126619e-08, "loss": 27.929, "step": 473200 }, { "epoch": 0.9559141392308407, "grad_norm": 146.4423828125, "learning_rate": 8.719544834393855e-08, "loss": 11.0543, "step": 473210 }, { "epoch": 0.9559343398635245, "grad_norm": 6.916018486022949, "learning_rate": 8.713055466102349e-08, "loss": 10.1698, "step": 473220 }, { "epoch": 0.9559545404962083, "grad_norm": 46.781036376953125, "learning_rate": 8.706568492283907e-08, "loss": 22.7213, "step": 473230 }, { "epoch": 0.9559747411288921, "grad_norm": 42.242225646972656, "learning_rate": 8.700083912970058e-08, "loss": 14.4235, "step": 473240 }, { "epoch": 0.955994941761576, "grad_norm": 473.7392578125, "learning_rate": 8.693601728192392e-08, "loss": 19.8019, "step": 473250 }, { "epoch": 0.9560151423942598, "grad_norm": 480.6423034667969, "learning_rate": 8.687121937982545e-08, "loss": 22.3902, "step": 473260 }, { "epoch": 0.9560353430269436, "grad_norm": 186.20558166503906, "learning_rate": 8.680644542372052e-08, "loss": 15.3956, "step": 473270 }, { "epoch": 0.9560555436596274, "grad_norm": 345.1857604980469, "learning_rate": 8.674169541392552e-08, "loss": 12.7148, "step": 473280 }, { "epoch": 0.9560757442923112, "grad_norm": 428.6722717285156, "learning_rate": 8.66769693507552e-08, "loss": 13.5177, "step": 473290 }, { "epoch": 0.9560959449249951, "grad_norm": 675.354248046875, "learning_rate": 8.661226723452542e-08, "loss": 31.9146, "step": 473300 }, { "epoch": 0.9561161455576789, "grad_norm": 294.69195556640625, "learning_rate": 8.65475890655515e-08, "loss": 25.9512, "step": 473310 }, { "epoch": 0.9561363461903627, "grad_norm": 163.40345764160156, "learning_rate": 8.648293484414871e-08, "loss": 13.3244, "step": 473320 }, { "epoch": 0.9561565468230465, "grad_norm": 363.00103759765625, "learning_rate": 8.641830457063239e-08, "loss": 15.7228, "step": 473330 }, { "epoch": 0.9561767474557303, "grad_norm": 229.41160583496094, "learning_rate": 8.63536982453167e-08, "loss": 18.2286, "step": 473340 }, { "epoch": 0.9561969480884142, "grad_norm": 86.52777099609375, "learning_rate": 8.628911586851752e-08, "loss": 19.5806, "step": 473350 }, { "epoch": 0.956217148721098, "grad_norm": 348.8701171875, "learning_rate": 8.622455744054958e-08, "loss": 15.0217, "step": 473360 }, { "epoch": 0.9562373493537818, "grad_norm": 418.804443359375, "learning_rate": 8.616002296172654e-08, "loss": 22.1636, "step": 473370 }, { "epoch": 0.9562575499864656, "grad_norm": 265.16455078125, "learning_rate": 8.609551243236424e-08, "loss": 16.2389, "step": 473380 }, { "epoch": 0.9562777506191494, "grad_norm": 330.1594543457031, "learning_rate": 8.603102585277634e-08, "loss": 21.3645, "step": 473390 }, { "epoch": 0.9562979512518333, "grad_norm": 229.58462524414062, "learning_rate": 8.596656322327645e-08, "loss": 19.1739, "step": 473400 }, { "epoch": 0.9563181518845171, "grad_norm": 329.893310546875, "learning_rate": 8.59021245441799e-08, "loss": 26.2905, "step": 473410 }, { "epoch": 0.9563383525172008, "grad_norm": 271.0732421875, "learning_rate": 8.583770981580142e-08, "loss": 14.1466, "step": 473420 }, { "epoch": 0.9563585531498846, "grad_norm": 37.961769104003906, "learning_rate": 8.577331903845243e-08, "loss": 15.111, "step": 473430 }, { "epoch": 0.9563787537825684, "grad_norm": 156.81661987304688, "learning_rate": 8.57089522124488e-08, "loss": 22.9995, "step": 473440 }, { "epoch": 0.9563989544152522, "grad_norm": 514.8054809570312, "learning_rate": 8.564460933810414e-08, "loss": 15.597, "step": 473450 }, { "epoch": 0.9564191550479361, "grad_norm": 277.9557800292969, "learning_rate": 8.558029041573157e-08, "loss": 18.3253, "step": 473460 }, { "epoch": 0.9564393556806199, "grad_norm": 375.3038635253906, "learning_rate": 8.55159954456436e-08, "loss": 14.3698, "step": 473470 }, { "epoch": 0.9564595563133037, "grad_norm": 115.1312026977539, "learning_rate": 8.545172442815552e-08, "loss": 10.1309, "step": 473480 }, { "epoch": 0.9564797569459875, "grad_norm": 557.439697265625, "learning_rate": 8.538747736357933e-08, "loss": 21.8316, "step": 473490 }, { "epoch": 0.9564999575786713, "grad_norm": 112.39806365966797, "learning_rate": 8.53232542522292e-08, "loss": 23.8843, "step": 473500 }, { "epoch": 0.9565201582113552, "grad_norm": 99.22763061523438, "learning_rate": 8.525905509441656e-08, "loss": 16.9909, "step": 473510 }, { "epoch": 0.956540358844039, "grad_norm": 216.84629821777344, "learning_rate": 8.51948798904556e-08, "loss": 31.9568, "step": 473520 }, { "epoch": 0.9565605594767228, "grad_norm": 413.5600280761719, "learning_rate": 8.513072864065885e-08, "loss": 20.5399, "step": 473530 }, { "epoch": 0.9565807601094066, "grad_norm": 408.629150390625, "learning_rate": 8.506660134533828e-08, "loss": 16.17, "step": 473540 }, { "epoch": 0.9566009607420904, "grad_norm": 635.8742065429688, "learning_rate": 8.500249800480754e-08, "loss": 21.3376, "step": 473550 }, { "epoch": 0.9566211613747743, "grad_norm": 622.0107421875, "learning_rate": 8.493841861937802e-08, "loss": 35.623, "step": 473560 }, { "epoch": 0.9566413620074581, "grad_norm": 311.6985778808594, "learning_rate": 8.487436318936282e-08, "loss": 22.0412, "step": 473570 }, { "epoch": 0.9566615626401419, "grad_norm": 505.4440612792969, "learning_rate": 8.481033171507391e-08, "loss": 22.9561, "step": 473580 }, { "epoch": 0.9566817632728257, "grad_norm": 416.3011474609375, "learning_rate": 8.474632419682327e-08, "loss": 10.0568, "step": 473590 }, { "epoch": 0.9567019639055095, "grad_norm": 246.4512481689453, "learning_rate": 8.468234063492287e-08, "loss": 12.2208, "step": 473600 }, { "epoch": 0.9567221645381934, "grad_norm": 388.3680725097656, "learning_rate": 8.461838102968467e-08, "loss": 27.4346, "step": 473610 }, { "epoch": 0.9567423651708772, "grad_norm": 199.04757690429688, "learning_rate": 8.45544453814201e-08, "loss": 17.5684, "step": 473620 }, { "epoch": 0.956762565803561, "grad_norm": 252.2554168701172, "learning_rate": 8.449053369044058e-08, "loss": 43.0305, "step": 473630 }, { "epoch": 0.9567827664362448, "grad_norm": 469.8954772949219, "learning_rate": 8.442664595705862e-08, "loss": 19.1467, "step": 473640 }, { "epoch": 0.9568029670689286, "grad_norm": 150.32028198242188, "learning_rate": 8.436278218158511e-08, "loss": 13.8263, "step": 473650 }, { "epoch": 0.9568231677016125, "grad_norm": 190.54705810546875, "learning_rate": 8.429894236433089e-08, "loss": 25.0537, "step": 473660 }, { "epoch": 0.9568433683342963, "grad_norm": 288.9213562011719, "learning_rate": 8.423512650560795e-08, "loss": 13.1843, "step": 473670 }, { "epoch": 0.95686356896698, "grad_norm": 79.70426177978516, "learning_rate": 8.417133460572658e-08, "loss": 13.2238, "step": 473680 }, { "epoch": 0.9568837695996638, "grad_norm": 143.64450073242188, "learning_rate": 8.410756666499709e-08, "loss": 10.8533, "step": 473690 }, { "epoch": 0.9569039702323476, "grad_norm": 439.2137756347656, "learning_rate": 8.404382268373145e-08, "loss": 30.3238, "step": 473700 }, { "epoch": 0.9569241708650315, "grad_norm": 410.85736083984375, "learning_rate": 8.39801026622411e-08, "loss": 11.4916, "step": 473710 }, { "epoch": 0.9569443714977153, "grad_norm": 465.1092529296875, "learning_rate": 8.391640660083411e-08, "loss": 17.738, "step": 473720 }, { "epoch": 0.9569645721303991, "grad_norm": 376.08203125, "learning_rate": 8.3852734499823e-08, "loss": 22.8946, "step": 473730 }, { "epoch": 0.9569847727630829, "grad_norm": 195.26583862304688, "learning_rate": 8.3789086359517e-08, "loss": 15.1908, "step": 473740 }, { "epoch": 0.9570049733957667, "grad_norm": 279.01898193359375, "learning_rate": 8.372546218022747e-08, "loss": 16.2743, "step": 473750 }, { "epoch": 0.9570251740284506, "grad_norm": 338.1087951660156, "learning_rate": 8.366186196226311e-08, "loss": 11.4824, "step": 473760 }, { "epoch": 0.9570453746611344, "grad_norm": 680.8377685546875, "learning_rate": 8.35982857059342e-08, "loss": 26.1408, "step": 473770 }, { "epoch": 0.9570655752938182, "grad_norm": 339.9266357421875, "learning_rate": 8.353473341155216e-08, "loss": 16.2152, "step": 473780 }, { "epoch": 0.957085775926502, "grad_norm": 435.17315673828125, "learning_rate": 8.347120507942453e-08, "loss": 24.8962, "step": 473790 }, { "epoch": 0.9571059765591858, "grad_norm": 554.631103515625, "learning_rate": 8.340770070986215e-08, "loss": 11.9722, "step": 473800 }, { "epoch": 0.9571261771918697, "grad_norm": 353.4429626464844, "learning_rate": 8.334422030317424e-08, "loss": 16.6615, "step": 473810 }, { "epoch": 0.9571463778245535, "grad_norm": 341.6706848144531, "learning_rate": 8.328076385967055e-08, "loss": 14.8122, "step": 473820 }, { "epoch": 0.9571665784572373, "grad_norm": 367.8915710449219, "learning_rate": 8.321733137966026e-08, "loss": 16.9164, "step": 473830 }, { "epoch": 0.9571867790899211, "grad_norm": 143.0441436767578, "learning_rate": 8.315392286345203e-08, "loss": 12.8845, "step": 473840 }, { "epoch": 0.9572069797226049, "grad_norm": 177.54774475097656, "learning_rate": 8.30905383113556e-08, "loss": 29.3222, "step": 473850 }, { "epoch": 0.9572271803552888, "grad_norm": 143.09291076660156, "learning_rate": 8.302717772367908e-08, "loss": 15.2372, "step": 473860 }, { "epoch": 0.9572473809879726, "grad_norm": 857.23046875, "learning_rate": 8.296384110073164e-08, "loss": 20.7451, "step": 473870 }, { "epoch": 0.9572675816206564, "grad_norm": 20.85658836364746, "learning_rate": 8.290052844282248e-08, "loss": 24.1314, "step": 473880 }, { "epoch": 0.9572877822533402, "grad_norm": 423.4533386230469, "learning_rate": 8.283723975025971e-08, "loss": 11.0826, "step": 473890 }, { "epoch": 0.957307982886024, "grad_norm": 811.9452514648438, "learning_rate": 8.277397502335194e-08, "loss": 13.4805, "step": 473900 }, { "epoch": 0.9573281835187079, "grad_norm": 382.5894775390625, "learning_rate": 8.271073426240672e-08, "loss": 15.23, "step": 473910 }, { "epoch": 0.9573483841513917, "grad_norm": 535.0155029296875, "learning_rate": 8.264751746773381e-08, "loss": 18.9655, "step": 473920 }, { "epoch": 0.9573685847840754, "grad_norm": 13.196868896484375, "learning_rate": 8.258432463964016e-08, "loss": 9.4939, "step": 473930 }, { "epoch": 0.9573887854167592, "grad_norm": 94.76506042480469, "learning_rate": 8.252115577843444e-08, "loss": 17.6372, "step": 473940 }, { "epoch": 0.957408986049443, "grad_norm": 212.6458740234375, "learning_rate": 8.245801088442362e-08, "loss": 25.0338, "step": 473950 }, { "epoch": 0.9574291866821268, "grad_norm": 258.0672302246094, "learning_rate": 8.239488995791633e-08, "loss": 13.0235, "step": 473960 }, { "epoch": 0.9574493873148107, "grad_norm": 127.04428100585938, "learning_rate": 8.233179299922012e-08, "loss": 9.8082, "step": 473970 }, { "epoch": 0.9574695879474945, "grad_norm": 526.0514526367188, "learning_rate": 8.226872000864194e-08, "loss": 18.7706, "step": 473980 }, { "epoch": 0.9574897885801783, "grad_norm": 562.9580078125, "learning_rate": 8.22056709864899e-08, "loss": 27.8233, "step": 473990 }, { "epoch": 0.9575099892128621, "grad_norm": 26.61240005493164, "learning_rate": 8.214264593307097e-08, "loss": 15.2482, "step": 474000 }, { "epoch": 0.9575301898455459, "grad_norm": 424.70050048828125, "learning_rate": 8.207964484869158e-08, "loss": 23.444, "step": 474010 }, { "epoch": 0.9575503904782298, "grad_norm": 365.2879333496094, "learning_rate": 8.201666773365979e-08, "loss": 13.3401, "step": 474020 }, { "epoch": 0.9575705911109136, "grad_norm": 239.11878967285156, "learning_rate": 8.195371458828316e-08, "loss": 17.2312, "step": 474030 }, { "epoch": 0.9575907917435974, "grad_norm": 1522.04150390625, "learning_rate": 8.1890785412867e-08, "loss": 38.5878, "step": 474040 }, { "epoch": 0.9576109923762812, "grad_norm": 123.2011489868164, "learning_rate": 8.182788020771826e-08, "loss": 13.6253, "step": 474050 }, { "epoch": 0.957631193008965, "grad_norm": 466.6886901855469, "learning_rate": 8.176499897314505e-08, "loss": 18.2509, "step": 474060 }, { "epoch": 0.9576513936416489, "grad_norm": 331.8001708984375, "learning_rate": 8.170214170945212e-08, "loss": 20.0639, "step": 474070 }, { "epoch": 0.9576715942743327, "grad_norm": 618.6167602539062, "learning_rate": 8.163930841694589e-08, "loss": 11.9667, "step": 474080 }, { "epoch": 0.9576917949070165, "grad_norm": 716.3890380859375, "learning_rate": 8.157649909593335e-08, "loss": 22.36, "step": 474090 }, { "epoch": 0.9577119955397003, "grad_norm": 433.8890686035156, "learning_rate": 8.151371374672146e-08, "loss": 25.017, "step": 474100 }, { "epoch": 0.9577321961723841, "grad_norm": 1074.271728515625, "learning_rate": 8.145095236961387e-08, "loss": 20.0564, "step": 474110 }, { "epoch": 0.957752396805068, "grad_norm": 633.5374755859375, "learning_rate": 8.13882149649181e-08, "loss": 19.4297, "step": 474120 }, { "epoch": 0.9577725974377518, "grad_norm": 354.6879577636719, "learning_rate": 8.132550153294005e-08, "loss": 15.8962, "step": 474130 }, { "epoch": 0.9577927980704356, "grad_norm": 583.9142456054688, "learning_rate": 8.1262812073985e-08, "loss": 32.1786, "step": 474140 }, { "epoch": 0.9578129987031194, "grad_norm": 79.23692321777344, "learning_rate": 8.120014658835828e-08, "loss": 18.3146, "step": 474150 }, { "epoch": 0.9578331993358032, "grad_norm": 208.16111755371094, "learning_rate": 8.11375050763652e-08, "loss": 9.589, "step": 474160 }, { "epoch": 0.9578533999684871, "grad_norm": 248.46812438964844, "learning_rate": 8.107488753831161e-08, "loss": 8.9652, "step": 474170 }, { "epoch": 0.9578736006011709, "grad_norm": 376.1187438964844, "learning_rate": 8.101229397450228e-08, "loss": 24.8758, "step": 474180 }, { "epoch": 0.9578938012338546, "grad_norm": 769.6351928710938, "learning_rate": 8.094972438524251e-08, "loss": 23.3915, "step": 474190 }, { "epoch": 0.9579140018665384, "grad_norm": 93.28047943115234, "learning_rate": 8.088717877083706e-08, "loss": 11.5412, "step": 474200 }, { "epoch": 0.9579342024992222, "grad_norm": 330.7162170410156, "learning_rate": 8.082465713159126e-08, "loss": 14.759, "step": 474210 }, { "epoch": 0.957954403131906, "grad_norm": 514.1710205078125, "learning_rate": 8.076215946780874e-08, "loss": 15.7291, "step": 474220 }, { "epoch": 0.9579746037645899, "grad_norm": 361.1650085449219, "learning_rate": 8.069968577979536e-08, "loss": 20.1469, "step": 474230 }, { "epoch": 0.9579948043972737, "grad_norm": 208.582275390625, "learning_rate": 8.063723606785478e-08, "loss": 11.3949, "step": 474240 }, { "epoch": 0.9580150050299575, "grad_norm": 294.64715576171875, "learning_rate": 8.057481033229176e-08, "loss": 12.0631, "step": 474250 }, { "epoch": 0.9580352056626413, "grad_norm": 946.9326782226562, "learning_rate": 8.051240857341102e-08, "loss": 23.1346, "step": 474260 }, { "epoch": 0.9580554062953252, "grad_norm": 424.99945068359375, "learning_rate": 8.045003079151514e-08, "loss": 14.3341, "step": 474270 }, { "epoch": 0.958075606928009, "grad_norm": 271.5953369140625, "learning_rate": 8.038767698690996e-08, "loss": 22.1632, "step": 474280 }, { "epoch": 0.9580958075606928, "grad_norm": 500.50146484375, "learning_rate": 8.032534715989859e-08, "loss": 16.352, "step": 474290 }, { "epoch": 0.9581160081933766, "grad_norm": 4.317878246307373, "learning_rate": 8.02630413107841e-08, "loss": 28.6135, "step": 474300 }, { "epoch": 0.9581362088260604, "grad_norm": 193.65724182128906, "learning_rate": 8.020075943987071e-08, "loss": 27.3813, "step": 474310 }, { "epoch": 0.9581564094587443, "grad_norm": 400.7613525390625, "learning_rate": 8.013850154746317e-08, "loss": 25.7266, "step": 474320 }, { "epoch": 0.9581766100914281, "grad_norm": 190.98426818847656, "learning_rate": 8.007626763386345e-08, "loss": 10.3764, "step": 474330 }, { "epoch": 0.9581968107241119, "grad_norm": 268.8352355957031, "learning_rate": 8.001405769937464e-08, "loss": 44.0869, "step": 474340 }, { "epoch": 0.9582170113567957, "grad_norm": 131.71649169921875, "learning_rate": 7.995187174430152e-08, "loss": 7.6256, "step": 474350 }, { "epoch": 0.9582372119894795, "grad_norm": 427.9704284667969, "learning_rate": 7.988970976894605e-08, "loss": 17.7285, "step": 474360 }, { "epoch": 0.9582574126221634, "grad_norm": 440.0675354003906, "learning_rate": 7.982757177361078e-08, "loss": 27.5012, "step": 474370 }, { "epoch": 0.9582776132548472, "grad_norm": 119.3633804321289, "learning_rate": 7.976545775859934e-08, "loss": 16.8517, "step": 474380 }, { "epoch": 0.958297813887531, "grad_norm": 484.5781555175781, "learning_rate": 7.970336772421483e-08, "loss": 10.7958, "step": 474390 }, { "epoch": 0.9583180145202148, "grad_norm": 140.41004943847656, "learning_rate": 7.964130167075923e-08, "loss": 27.1162, "step": 474400 }, { "epoch": 0.9583382151528986, "grad_norm": 40.1215705871582, "learning_rate": 7.957925959853452e-08, "loss": 16.428, "step": 474410 }, { "epoch": 0.9583584157855825, "grad_norm": 349.8250732421875, "learning_rate": 7.951724150784434e-08, "loss": 15.7237, "step": 474420 }, { "epoch": 0.9583786164182663, "grad_norm": 1398.2322998046875, "learning_rate": 7.945524739899069e-08, "loss": 16.3054, "step": 474430 }, { "epoch": 0.95839881705095, "grad_norm": 201.12515258789062, "learning_rate": 7.939327727227441e-08, "loss": 13.2908, "step": 474440 }, { "epoch": 0.9584190176836338, "grad_norm": 303.38531494140625, "learning_rate": 7.933133112799918e-08, "loss": 5.1576, "step": 474450 }, { "epoch": 0.9584392183163176, "grad_norm": 196.3020782470703, "learning_rate": 7.926940896646584e-08, "loss": 33.5378, "step": 474460 }, { "epoch": 0.9584594189490014, "grad_norm": 862.3712158203125, "learning_rate": 7.920751078797695e-08, "loss": 23.6772, "step": 474470 }, { "epoch": 0.9584796195816853, "grad_norm": 55.28944778442383, "learning_rate": 7.914563659283392e-08, "loss": 6.0035, "step": 474480 }, { "epoch": 0.9584998202143691, "grad_norm": 519.5173950195312, "learning_rate": 7.908378638133762e-08, "loss": 13.2152, "step": 474490 }, { "epoch": 0.9585200208470529, "grad_norm": 744.1380004882812, "learning_rate": 7.90219601537906e-08, "loss": 29.0983, "step": 474500 }, { "epoch": 0.9585402214797367, "grad_norm": 117.6964111328125, "learning_rate": 7.896015791049372e-08, "loss": 21.7835, "step": 474510 }, { "epoch": 0.9585604221124205, "grad_norm": 940.3536376953125, "learning_rate": 7.889837965174784e-08, "loss": 13.7541, "step": 474520 }, { "epoch": 0.9585806227451044, "grad_norm": 88.52216339111328, "learning_rate": 7.883662537785442e-08, "loss": 25.2124, "step": 474530 }, { "epoch": 0.9586008233777882, "grad_norm": 71.35601043701172, "learning_rate": 7.877489508911429e-08, "loss": 29.5819, "step": 474540 }, { "epoch": 0.958621024010472, "grad_norm": 405.7100830078125, "learning_rate": 7.871318878582889e-08, "loss": 18.056, "step": 474550 }, { "epoch": 0.9586412246431558, "grad_norm": 50.22018051147461, "learning_rate": 7.865150646829855e-08, "loss": 6.516, "step": 474560 }, { "epoch": 0.9586614252758396, "grad_norm": 324.6778869628906, "learning_rate": 7.858984813682357e-08, "loss": 13.7222, "step": 474570 }, { "epoch": 0.9586816259085235, "grad_norm": 395.4815368652344, "learning_rate": 7.852821379170538e-08, "loss": 26.4004, "step": 474580 }, { "epoch": 0.9587018265412073, "grad_norm": 268.73529052734375, "learning_rate": 7.846660343324263e-08, "loss": 19.3143, "step": 474590 }, { "epoch": 0.9587220271738911, "grad_norm": 604.1646728515625, "learning_rate": 7.840501706173786e-08, "loss": 19.3332, "step": 474600 }, { "epoch": 0.9587422278065749, "grad_norm": 353.2278747558594, "learning_rate": 7.834345467748972e-08, "loss": 17.0513, "step": 474610 }, { "epoch": 0.9587624284392587, "grad_norm": 295.6312561035156, "learning_rate": 7.828191628079851e-08, "loss": 19.6961, "step": 474620 }, { "epoch": 0.9587826290719426, "grad_norm": 174.08670043945312, "learning_rate": 7.8220401871964e-08, "loss": 25.454, "step": 474630 }, { "epoch": 0.9588028297046264, "grad_norm": 54.75748825073242, "learning_rate": 7.815891145128763e-08, "loss": 11.6366, "step": 474640 }, { "epoch": 0.9588230303373102, "grad_norm": 355.44708251953125, "learning_rate": 7.809744501906635e-08, "loss": 13.3484, "step": 474650 }, { "epoch": 0.958843230969994, "grad_norm": 343.7234191894531, "learning_rate": 7.803600257560162e-08, "loss": 29.4316, "step": 474660 }, { "epoch": 0.9588634316026778, "grad_norm": 491.80804443359375, "learning_rate": 7.797458412119264e-08, "loss": 14.72, "step": 474670 }, { "epoch": 0.9588836322353617, "grad_norm": 367.8491516113281, "learning_rate": 7.791318965613859e-08, "loss": 11.354, "step": 474680 }, { "epoch": 0.9589038328680455, "grad_norm": 538.6314697265625, "learning_rate": 7.785181918073814e-08, "loss": 14.5587, "step": 474690 }, { "epoch": 0.9589240335007292, "grad_norm": 453.12664794921875, "learning_rate": 7.779047269529105e-08, "loss": 28.2727, "step": 474700 }, { "epoch": 0.958944234133413, "grad_norm": 237.34593200683594, "learning_rate": 7.772915020009708e-08, "loss": 16.4047, "step": 474710 }, { "epoch": 0.9589644347660968, "grad_norm": 76.294921875, "learning_rate": 7.766785169545376e-08, "loss": 23.21, "step": 474720 }, { "epoch": 0.9589846353987806, "grad_norm": 149.80584716796875, "learning_rate": 7.760657718165976e-08, "loss": 22.3909, "step": 474730 }, { "epoch": 0.9590048360314645, "grad_norm": 376.0763854980469, "learning_rate": 7.754532665901482e-08, "loss": 29.0698, "step": 474740 }, { "epoch": 0.9590250366641483, "grad_norm": 468.72503662109375, "learning_rate": 7.748410012781705e-08, "loss": 11.0402, "step": 474750 }, { "epoch": 0.9590452372968321, "grad_norm": 140.60446166992188, "learning_rate": 7.742289758836452e-08, "loss": 13.6312, "step": 474760 }, { "epoch": 0.9590654379295159, "grad_norm": 254.8717498779297, "learning_rate": 7.736171904095591e-08, "loss": 33.6136, "step": 474770 }, { "epoch": 0.9590856385621997, "grad_norm": 121.12136840820312, "learning_rate": 7.73005644858893e-08, "loss": 22.7845, "step": 474780 }, { "epoch": 0.9591058391948836, "grad_norm": 93.96916198730469, "learning_rate": 7.723943392346223e-08, "loss": 20.9016, "step": 474790 }, { "epoch": 0.9591260398275674, "grad_norm": 10.694808959960938, "learning_rate": 7.717832735397335e-08, "loss": 6.3451, "step": 474800 }, { "epoch": 0.9591462404602512, "grad_norm": 118.19041442871094, "learning_rate": 7.71172447777202e-08, "loss": 11.2926, "step": 474810 }, { "epoch": 0.959166441092935, "grad_norm": 255.38201904296875, "learning_rate": 7.705618619500032e-08, "loss": 18.6042, "step": 474820 }, { "epoch": 0.9591866417256188, "grad_norm": 29.095916748046875, "learning_rate": 7.699515160611182e-08, "loss": 16.4324, "step": 474830 }, { "epoch": 0.9592068423583027, "grad_norm": 350.3114013671875, "learning_rate": 7.693414101135166e-08, "loss": 13.2131, "step": 474840 }, { "epoch": 0.9592270429909865, "grad_norm": 225.74880981445312, "learning_rate": 7.687315441101795e-08, "loss": 10.0032, "step": 474850 }, { "epoch": 0.9592472436236703, "grad_norm": 950.14892578125, "learning_rate": 7.681219180540655e-08, "loss": 18.5111, "step": 474860 }, { "epoch": 0.9592674442563541, "grad_norm": 336.2343444824219, "learning_rate": 7.675125319481614e-08, "loss": 36.791, "step": 474870 }, { "epoch": 0.959287644889038, "grad_norm": 241.7272186279297, "learning_rate": 7.669033857954255e-08, "loss": 6.8688, "step": 474880 }, { "epoch": 0.9593078455217218, "grad_norm": 182.2271270751953, "learning_rate": 7.662944795988337e-08, "loss": 18.8567, "step": 474890 }, { "epoch": 0.9593280461544056, "grad_norm": 396.48284912109375, "learning_rate": 7.656858133613498e-08, "loss": 15.0419, "step": 474900 }, { "epoch": 0.9593482467870894, "grad_norm": 544.12109375, "learning_rate": 7.65077387085944e-08, "loss": 18.7172, "step": 474910 }, { "epoch": 0.9593684474197732, "grad_norm": 0.0, "learning_rate": 7.64469200775575e-08, "loss": 7.0933, "step": 474920 }, { "epoch": 0.959388648052457, "grad_norm": 231.64244079589844, "learning_rate": 7.638612544332181e-08, "loss": 20.6236, "step": 474930 }, { "epoch": 0.9594088486851409, "grad_norm": 248.8718719482422, "learning_rate": 7.632535480618264e-08, "loss": 29.4236, "step": 474940 }, { "epoch": 0.9594290493178247, "grad_norm": 177.2399139404297, "learning_rate": 7.626460816643588e-08, "loss": 15.6347, "step": 474950 }, { "epoch": 0.9594492499505084, "grad_norm": 396.2425537109375, "learning_rate": 7.620388552437907e-08, "loss": 18.1908, "step": 474960 }, { "epoch": 0.9594694505831922, "grad_norm": 185.56700134277344, "learning_rate": 7.614318688030753e-08, "loss": 22.8882, "step": 474970 }, { "epoch": 0.959489651215876, "grad_norm": 348.06341552734375, "learning_rate": 7.608251223451601e-08, "loss": 14.1523, "step": 474980 }, { "epoch": 0.9595098518485599, "grad_norm": 102.40057373046875, "learning_rate": 7.602186158730152e-08, "loss": 23.2, "step": 474990 }, { "epoch": 0.9595300524812437, "grad_norm": 506.0945739746094, "learning_rate": 7.59612349389599e-08, "loss": 15.9318, "step": 475000 }, { "epoch": 0.9595502531139275, "grad_norm": 328.2109680175781, "learning_rate": 7.590063228978539e-08, "loss": 21.683, "step": 475010 }, { "epoch": 0.9595704537466113, "grad_norm": 660.8423461914062, "learning_rate": 7.584005364007386e-08, "loss": 19.7161, "step": 475020 }, { "epoch": 0.9595906543792951, "grad_norm": 1033.015869140625, "learning_rate": 7.577949899012116e-08, "loss": 18.3523, "step": 475030 }, { "epoch": 0.959610855011979, "grad_norm": 179.03768920898438, "learning_rate": 7.571896834022152e-08, "loss": 13.8454, "step": 475040 }, { "epoch": 0.9596310556446628, "grad_norm": 301.87847900390625, "learning_rate": 7.565846169067026e-08, "loss": 19.6426, "step": 475050 }, { "epoch": 0.9596512562773466, "grad_norm": 212.1444854736328, "learning_rate": 7.559797904176325e-08, "loss": 10.688, "step": 475060 }, { "epoch": 0.9596714569100304, "grad_norm": 205.0482635498047, "learning_rate": 7.553752039379359e-08, "loss": 21.518, "step": 475070 }, { "epoch": 0.9596916575427142, "grad_norm": 354.538818359375, "learning_rate": 7.547708574705714e-08, "loss": 9.6673, "step": 475080 }, { "epoch": 0.9597118581753981, "grad_norm": 517.3529663085938, "learning_rate": 7.541667510184813e-08, "loss": 11.3955, "step": 475090 }, { "epoch": 0.9597320588080819, "grad_norm": 348.29376220703125, "learning_rate": 7.535628845846077e-08, "loss": 22.1537, "step": 475100 }, { "epoch": 0.9597522594407657, "grad_norm": 358.4526672363281, "learning_rate": 7.529592581718981e-08, "loss": 20.0223, "step": 475110 }, { "epoch": 0.9597724600734495, "grad_norm": 477.21746826171875, "learning_rate": 7.52355871783289e-08, "loss": 27.5024, "step": 475120 }, { "epoch": 0.9597926607061333, "grad_norm": 515.2587890625, "learning_rate": 7.517527254217282e-08, "loss": 13.9886, "step": 475130 }, { "epoch": 0.9598128613388172, "grad_norm": 370.2596130371094, "learning_rate": 7.511498190901467e-08, "loss": 17.2683, "step": 475140 }, { "epoch": 0.959833061971501, "grad_norm": 705.9306030273438, "learning_rate": 7.50547152791492e-08, "loss": 12.0799, "step": 475150 }, { "epoch": 0.9598532626041848, "grad_norm": 146.8015594482422, "learning_rate": 7.499447265286952e-08, "loss": 14.1286, "step": 475160 }, { "epoch": 0.9598734632368686, "grad_norm": 332.004638671875, "learning_rate": 7.493425403046928e-08, "loss": 10.444, "step": 475170 }, { "epoch": 0.9598936638695524, "grad_norm": 378.7012023925781, "learning_rate": 7.487405941224268e-08, "loss": 14.7322, "step": 475180 }, { "epoch": 0.9599138645022363, "grad_norm": 475.091552734375, "learning_rate": 7.481388879848228e-08, "loss": 14.2766, "step": 475190 }, { "epoch": 0.9599340651349201, "grad_norm": 111.34712982177734, "learning_rate": 7.475374218948118e-08, "loss": 17.2071, "step": 475200 }, { "epoch": 0.9599542657676038, "grad_norm": 187.49609375, "learning_rate": 7.469361958553356e-08, "loss": 7.586, "step": 475210 }, { "epoch": 0.9599744664002876, "grad_norm": 528.530517578125, "learning_rate": 7.463352098693199e-08, "loss": 9.4207, "step": 475220 }, { "epoch": 0.9599946670329714, "grad_norm": 682.244140625, "learning_rate": 7.457344639396902e-08, "loss": 15.9991, "step": 475230 }, { "epoch": 0.9600148676656552, "grad_norm": 180.2545623779297, "learning_rate": 7.451339580693718e-08, "loss": 15.8759, "step": 475240 }, { "epoch": 0.9600350682983391, "grad_norm": 314.72698974609375, "learning_rate": 7.445336922613067e-08, "loss": 20.9214, "step": 475250 }, { "epoch": 0.9600552689310229, "grad_norm": 167.43357849121094, "learning_rate": 7.439336665184039e-08, "loss": 19.3911, "step": 475260 }, { "epoch": 0.9600754695637067, "grad_norm": 223.72708129882812, "learning_rate": 7.433338808435941e-08, "loss": 16.5949, "step": 475270 }, { "epoch": 0.9600956701963905, "grad_norm": 348.5341491699219, "learning_rate": 7.427343352398031e-08, "loss": 40.8376, "step": 475280 }, { "epoch": 0.9601158708290743, "grad_norm": 1039.63427734375, "learning_rate": 7.421350297099505e-08, "loss": 24.5333, "step": 475290 }, { "epoch": 0.9601360714617582, "grad_norm": 505.9857482910156, "learning_rate": 7.415359642569564e-08, "loss": 43.2792, "step": 475300 }, { "epoch": 0.960156272094442, "grad_norm": 211.31011962890625, "learning_rate": 7.409371388837405e-08, "loss": 16.5823, "step": 475310 }, { "epoch": 0.9601764727271258, "grad_norm": 541.6423950195312, "learning_rate": 7.403385535932284e-08, "loss": 13.8085, "step": 475320 }, { "epoch": 0.9601966733598096, "grad_norm": 231.13262939453125, "learning_rate": 7.397402083883287e-08, "loss": 6.0951, "step": 475330 }, { "epoch": 0.9602168739924934, "grad_norm": 235.80392456054688, "learning_rate": 7.39142103271956e-08, "loss": 35.2601, "step": 475340 }, { "epoch": 0.9602370746251773, "grad_norm": 87.18898010253906, "learning_rate": 7.385442382470354e-08, "loss": 12.2211, "step": 475350 }, { "epoch": 0.9602572752578611, "grad_norm": 449.9172058105469, "learning_rate": 7.379466133164759e-08, "loss": 11.9918, "step": 475360 }, { "epoch": 0.9602774758905449, "grad_norm": 16.43218994140625, "learning_rate": 7.373492284831862e-08, "loss": 10.8565, "step": 475370 }, { "epoch": 0.9602976765232287, "grad_norm": 662.5327758789062, "learning_rate": 7.367520837500808e-08, "loss": 18.2149, "step": 475380 }, { "epoch": 0.9603178771559125, "grad_norm": 198.49664306640625, "learning_rate": 7.361551791200794e-08, "loss": 13.8516, "step": 475390 }, { "epoch": 0.9603380777885964, "grad_norm": 309.06439208984375, "learning_rate": 7.355585145960743e-08, "loss": 20.8389, "step": 475400 }, { "epoch": 0.9603582784212802, "grad_norm": 412.4136962890625, "learning_rate": 7.34962090180985e-08, "loss": 13.8319, "step": 475410 }, { "epoch": 0.960378479053964, "grad_norm": 311.71685791015625, "learning_rate": 7.343659058777098e-08, "loss": 21.7253, "step": 475420 }, { "epoch": 0.9603986796866478, "grad_norm": 0.0, "learning_rate": 7.33769961689168e-08, "loss": 12.6646, "step": 475430 }, { "epoch": 0.9604188803193316, "grad_norm": 291.0174560546875, "learning_rate": 7.331742576182466e-08, "loss": 22.1376, "step": 475440 }, { "epoch": 0.9604390809520155, "grad_norm": 91.20379638671875, "learning_rate": 7.325787936678708e-08, "loss": 18.15, "step": 475450 }, { "epoch": 0.9604592815846993, "grad_norm": 171.52337646484375, "learning_rate": 7.319835698409217e-08, "loss": 16.8848, "step": 475460 }, { "epoch": 0.960479482217383, "grad_norm": 489.40252685546875, "learning_rate": 7.313885861403135e-08, "loss": 20.3697, "step": 475470 }, { "epoch": 0.9604996828500668, "grad_norm": 297.5575256347656, "learning_rate": 7.307938425689388e-08, "loss": 13.0444, "step": 475480 }, { "epoch": 0.9605198834827506, "grad_norm": 816.5662231445312, "learning_rate": 7.301993391297003e-08, "loss": 21.4931, "step": 475490 }, { "epoch": 0.9605400841154345, "grad_norm": 462.9900817871094, "learning_rate": 7.296050758254958e-08, "loss": 16.9646, "step": 475500 }, { "epoch": 0.9605602847481183, "grad_norm": 358.0087890625, "learning_rate": 7.290110526592231e-08, "loss": 13.6412, "step": 475510 }, { "epoch": 0.9605804853808021, "grad_norm": 226.2945098876953, "learning_rate": 7.284172696337688e-08, "loss": 16.7119, "step": 475520 }, { "epoch": 0.9606006860134859, "grad_norm": 57.92973709106445, "learning_rate": 7.27823726752036e-08, "loss": 10.6754, "step": 475530 }, { "epoch": 0.9606208866461697, "grad_norm": 444.6774597167969, "learning_rate": 7.272304240169115e-08, "loss": 13.8628, "step": 475540 }, { "epoch": 0.9606410872788536, "grad_norm": 343.57550048828125, "learning_rate": 7.266373614312927e-08, "loss": 10.9022, "step": 475550 }, { "epoch": 0.9606612879115374, "grad_norm": 770.028076171875, "learning_rate": 7.260445389980609e-08, "loss": 16.674, "step": 475560 }, { "epoch": 0.9606814885442212, "grad_norm": 388.7988586425781, "learning_rate": 7.25451956720119e-08, "loss": 23.1462, "step": 475570 }, { "epoch": 0.960701689176905, "grad_norm": 696.9945068359375, "learning_rate": 7.248596146003484e-08, "loss": 17.2478, "step": 475580 }, { "epoch": 0.9607218898095888, "grad_norm": 542.0843505859375, "learning_rate": 7.242675126416299e-08, "loss": 29.4484, "step": 475590 }, { "epoch": 0.9607420904422727, "grad_norm": 227.0558319091797, "learning_rate": 7.236756508468612e-08, "loss": 15.132, "step": 475600 }, { "epoch": 0.9607622910749565, "grad_norm": 386.5271301269531, "learning_rate": 7.230840292189179e-08, "loss": 9.2645, "step": 475610 }, { "epoch": 0.9607824917076403, "grad_norm": 256.40350341796875, "learning_rate": 7.224926477606864e-08, "loss": 21.749, "step": 475620 }, { "epoch": 0.9608026923403241, "grad_norm": 632.7886962890625, "learning_rate": 7.219015064750478e-08, "loss": 19.2842, "step": 475630 }, { "epoch": 0.9608228929730079, "grad_norm": 666.5670166015625, "learning_rate": 7.213106053648889e-08, "loss": 15.0993, "step": 475640 }, { "epoch": 0.9608430936056918, "grad_norm": 11.963786125183105, "learning_rate": 7.207199444330847e-08, "loss": 14.6905, "step": 475650 }, { "epoch": 0.9608632942383756, "grad_norm": 266.03759765625, "learning_rate": 7.201295236825112e-08, "loss": 13.424, "step": 475660 }, { "epoch": 0.9608834948710594, "grad_norm": 662.6854248046875, "learning_rate": 7.195393431160491e-08, "loss": 12.8577, "step": 475670 }, { "epoch": 0.9609036955037432, "grad_norm": 32.202144622802734, "learning_rate": 7.189494027365795e-08, "loss": 25.5644, "step": 475680 }, { "epoch": 0.960923896136427, "grad_norm": 900.0670166015625, "learning_rate": 7.183597025469669e-08, "loss": 19.6551, "step": 475690 }, { "epoch": 0.9609440967691109, "grad_norm": 299.69903564453125, "learning_rate": 7.177702425500977e-08, "loss": 24.7179, "step": 475700 }, { "epoch": 0.9609642974017947, "grad_norm": 133.36920166015625, "learning_rate": 7.171810227488363e-08, "loss": 10.9328, "step": 475710 }, { "epoch": 0.9609844980344784, "grad_norm": 364.9358215332031, "learning_rate": 7.165920431460637e-08, "loss": 20.8803, "step": 475720 }, { "epoch": 0.9610046986671622, "grad_norm": 402.2028503417969, "learning_rate": 7.16003303744639e-08, "loss": 13.6, "step": 475730 }, { "epoch": 0.961024899299846, "grad_norm": 288.1083679199219, "learning_rate": 7.154148045474319e-08, "loss": 22.305, "step": 475740 }, { "epoch": 0.9610450999325298, "grad_norm": 284.3811950683594, "learning_rate": 7.148265455573233e-08, "loss": 14.0048, "step": 475750 }, { "epoch": 0.9610653005652137, "grad_norm": 295.71832275390625, "learning_rate": 7.142385267771667e-08, "loss": 23.9628, "step": 475760 }, { "epoch": 0.9610855011978975, "grad_norm": 337.1648254394531, "learning_rate": 7.136507482098375e-08, "loss": 17.5986, "step": 475770 }, { "epoch": 0.9611057018305813, "grad_norm": 39.2862434387207, "learning_rate": 7.130632098581947e-08, "loss": 20.2325, "step": 475780 }, { "epoch": 0.9611259024632651, "grad_norm": 1028.286865234375, "learning_rate": 7.124759117251078e-08, "loss": 31.2637, "step": 475790 }, { "epoch": 0.961146103095949, "grad_norm": 658.9299926757812, "learning_rate": 7.118888538134361e-08, "loss": 12.5292, "step": 475800 }, { "epoch": 0.9611663037286328, "grad_norm": 798.3705444335938, "learning_rate": 7.113020361260325e-08, "loss": 18.518, "step": 475810 }, { "epoch": 0.9611865043613166, "grad_norm": 51.175601959228516, "learning_rate": 7.107154586657727e-08, "loss": 14.5358, "step": 475820 }, { "epoch": 0.9612067049940004, "grad_norm": 144.55946350097656, "learning_rate": 7.101291214355043e-08, "loss": 15.6595, "step": 475830 }, { "epoch": 0.9612269056266842, "grad_norm": 248.81558227539062, "learning_rate": 7.095430244380863e-08, "loss": 11.5365, "step": 475840 }, { "epoch": 0.961247106259368, "grad_norm": 728.4669799804688, "learning_rate": 7.089571676763773e-08, "loss": 26.9842, "step": 475850 }, { "epoch": 0.9612673068920519, "grad_norm": 232.49505615234375, "learning_rate": 7.083715511532419e-08, "loss": 16.1763, "step": 475860 }, { "epoch": 0.9612875075247357, "grad_norm": 485.213134765625, "learning_rate": 7.077861748715165e-08, "loss": 14.5382, "step": 475870 }, { "epoch": 0.9613077081574195, "grad_norm": 15.343710899353027, "learning_rate": 7.072010388340656e-08, "loss": 7.3748, "step": 475880 }, { "epoch": 0.9613279087901033, "grad_norm": 376.9869384765625, "learning_rate": 7.066161430437368e-08, "loss": 17.4395, "step": 475890 }, { "epoch": 0.9613481094227871, "grad_norm": 403.6138000488281, "learning_rate": 7.060314875033836e-08, "loss": 11.6849, "step": 475900 }, { "epoch": 0.961368310055471, "grad_norm": 160.56594848632812, "learning_rate": 7.054470722158535e-08, "loss": 17.0336, "step": 475910 }, { "epoch": 0.9613885106881548, "grad_norm": 440.259521484375, "learning_rate": 7.048628971839944e-08, "loss": 25.2777, "step": 475920 }, { "epoch": 0.9614087113208386, "grad_norm": 219.51797485351562, "learning_rate": 7.042789624106594e-08, "loss": 13.5234, "step": 475930 }, { "epoch": 0.9614289119535224, "grad_norm": 540.7685546875, "learning_rate": 7.036952678986852e-08, "loss": 25.3983, "step": 475940 }, { "epoch": 0.9614491125862062, "grad_norm": 333.6399841308594, "learning_rate": 7.031118136509196e-08, "loss": 19.2188, "step": 475950 }, { "epoch": 0.9614693132188901, "grad_norm": 218.74139404296875, "learning_rate": 7.025285996702158e-08, "loss": 25.5605, "step": 475960 }, { "epoch": 0.9614895138515739, "grad_norm": 297.5934753417969, "learning_rate": 7.019456259594049e-08, "loss": 19.9465, "step": 475970 }, { "epoch": 0.9615097144842576, "grad_norm": 229.1687774658203, "learning_rate": 7.01362892521329e-08, "loss": 16.26, "step": 475980 }, { "epoch": 0.9615299151169414, "grad_norm": 333.52423095703125, "learning_rate": 7.007803993588358e-08, "loss": 17.7302, "step": 475990 }, { "epoch": 0.9615501157496252, "grad_norm": 419.1000061035156, "learning_rate": 7.001981464747565e-08, "loss": 21.5595, "step": 476000 }, { "epoch": 0.961570316382309, "grad_norm": 307.7343444824219, "learning_rate": 6.996161338719332e-08, "loss": 21.56, "step": 476010 }, { "epoch": 0.9615905170149929, "grad_norm": 160.56182861328125, "learning_rate": 6.990343615532025e-08, "loss": 19.3788, "step": 476020 }, { "epoch": 0.9616107176476767, "grad_norm": 504.5084228515625, "learning_rate": 6.9845282952139e-08, "loss": 18.6281, "step": 476030 }, { "epoch": 0.9616309182803605, "grad_norm": 415.0246276855469, "learning_rate": 6.978715377793489e-08, "loss": 16.6749, "step": 476040 }, { "epoch": 0.9616511189130443, "grad_norm": 234.70310974121094, "learning_rate": 6.972904863298991e-08, "loss": 17.0805, "step": 476050 }, { "epoch": 0.9616713195457282, "grad_norm": 501.95538330078125, "learning_rate": 6.967096751758773e-08, "loss": 15.4388, "step": 476060 }, { "epoch": 0.961691520178412, "grad_norm": 480.3716735839844, "learning_rate": 6.961291043201145e-08, "loss": 17.3112, "step": 476070 }, { "epoch": 0.9617117208110958, "grad_norm": 252.87176513671875, "learning_rate": 6.955487737654309e-08, "loss": 13.0375, "step": 476080 }, { "epoch": 0.9617319214437796, "grad_norm": 326.9039001464844, "learning_rate": 6.949686835146685e-08, "loss": 15.2768, "step": 476090 }, { "epoch": 0.9617521220764634, "grad_norm": 370.8696594238281, "learning_rate": 6.943888335706472e-08, "loss": 23.2639, "step": 476100 }, { "epoch": 0.9617723227091473, "grad_norm": 819.5240478515625, "learning_rate": 6.938092239361982e-08, "loss": 12.4631, "step": 476110 }, { "epoch": 0.9617925233418311, "grad_norm": 419.8603820800781, "learning_rate": 6.932298546141413e-08, "loss": 13.5396, "step": 476120 }, { "epoch": 0.9618127239745149, "grad_norm": 370.8901062011719, "learning_rate": 6.926507256072967e-08, "loss": 22.5444, "step": 476130 }, { "epoch": 0.9618329246071987, "grad_norm": 274.3182067871094, "learning_rate": 6.920718369185009e-08, "loss": 14.7543, "step": 476140 }, { "epoch": 0.9618531252398825, "grad_norm": 449.9986267089844, "learning_rate": 6.914931885505626e-08, "loss": 13.2917, "step": 476150 }, { "epoch": 0.9618733258725664, "grad_norm": 392.4185485839844, "learning_rate": 6.909147805063021e-08, "loss": 43.4365, "step": 476160 }, { "epoch": 0.9618935265052502, "grad_norm": 379.1201477050781, "learning_rate": 6.903366127885447e-08, "loss": 13.0509, "step": 476170 }, { "epoch": 0.961913727137934, "grad_norm": 269.5074157714844, "learning_rate": 6.897586854001048e-08, "loss": 28.5801, "step": 476180 }, { "epoch": 0.9619339277706178, "grad_norm": 428.8318176269531, "learning_rate": 6.89180998343808e-08, "loss": 23.163, "step": 476190 }, { "epoch": 0.9619541284033016, "grad_norm": 270.17315673828125, "learning_rate": 6.88603551622452e-08, "loss": 16.6752, "step": 476200 }, { "epoch": 0.9619743290359855, "grad_norm": 870.8080444335938, "learning_rate": 6.88026345238868e-08, "loss": 30.2955, "step": 476210 }, { "epoch": 0.9619945296686693, "grad_norm": 471.13787841796875, "learning_rate": 6.874493791958648e-08, "loss": 13.3997, "step": 476220 }, { "epoch": 0.9620147303013531, "grad_norm": 261.5706481933594, "learning_rate": 6.868726534962456e-08, "loss": 20.7547, "step": 476230 }, { "epoch": 0.9620349309340368, "grad_norm": 308.1227722167969, "learning_rate": 6.862961681428304e-08, "loss": 32.5355, "step": 476240 }, { "epoch": 0.9620551315667206, "grad_norm": 321.1436767578125, "learning_rate": 6.857199231384282e-08, "loss": 24.4721, "step": 476250 }, { "epoch": 0.9620753321994044, "grad_norm": 461.12042236328125, "learning_rate": 6.851439184858477e-08, "loss": 23.3178, "step": 476260 }, { "epoch": 0.9620955328320883, "grad_norm": 445.0748596191406, "learning_rate": 6.845681541878924e-08, "loss": 16.4683, "step": 476270 }, { "epoch": 0.9621157334647721, "grad_norm": 237.60678100585938, "learning_rate": 6.83992630247371e-08, "loss": 14.815, "step": 476280 }, { "epoch": 0.9621359340974559, "grad_norm": 29.770912170410156, "learning_rate": 6.834173466670923e-08, "loss": 13.1535, "step": 476290 }, { "epoch": 0.9621561347301397, "grad_norm": 1804.21337890625, "learning_rate": 6.828423034498488e-08, "loss": 13.4355, "step": 476300 }, { "epoch": 0.9621763353628235, "grad_norm": 363.8840637207031, "learning_rate": 6.822675005984547e-08, "loss": 12.4518, "step": 476310 }, { "epoch": 0.9621965359955074, "grad_norm": 387.9690856933594, "learning_rate": 6.816929381157023e-08, "loss": 14.038, "step": 476320 }, { "epoch": 0.9622167366281912, "grad_norm": 282.8274230957031, "learning_rate": 6.811186160044004e-08, "loss": 22.7182, "step": 476330 }, { "epoch": 0.962236937260875, "grad_norm": 395.19390869140625, "learning_rate": 6.805445342673467e-08, "loss": 16.2363, "step": 476340 }, { "epoch": 0.9622571378935588, "grad_norm": 541.1038208007812, "learning_rate": 6.799706929073335e-08, "loss": 17.5635, "step": 476350 }, { "epoch": 0.9622773385262426, "grad_norm": 34.52801513671875, "learning_rate": 6.793970919271642e-08, "loss": 19.4252, "step": 476360 }, { "epoch": 0.9622975391589265, "grad_norm": 540.6629028320312, "learning_rate": 6.788237313296309e-08, "loss": 21.7979, "step": 476370 }, { "epoch": 0.9623177397916103, "grad_norm": 432.80377197265625, "learning_rate": 6.782506111175313e-08, "loss": 21.8245, "step": 476380 }, { "epoch": 0.9623379404242941, "grad_norm": 271.496337890625, "learning_rate": 6.776777312936522e-08, "loss": 8.9662, "step": 476390 }, { "epoch": 0.9623581410569779, "grad_norm": 3.349520444869995, "learning_rate": 6.771050918607913e-08, "loss": 20.389, "step": 476400 }, { "epoch": 0.9623783416896617, "grad_norm": 240.0352325439453, "learning_rate": 6.765326928217408e-08, "loss": 20.2974, "step": 476410 }, { "epoch": 0.9623985423223456, "grad_norm": 186.44142150878906, "learning_rate": 6.759605341792819e-08, "loss": 16.1324, "step": 476420 }, { "epoch": 0.9624187429550294, "grad_norm": 404.8953857421875, "learning_rate": 6.753886159362122e-08, "loss": 17.5296, "step": 476430 }, { "epoch": 0.9624389435877132, "grad_norm": 338.2667541503906, "learning_rate": 6.748169380953184e-08, "loss": 21.9625, "step": 476440 }, { "epoch": 0.962459144220397, "grad_norm": 359.7268371582031, "learning_rate": 6.742455006593762e-08, "loss": 22.326, "step": 476450 }, { "epoch": 0.9624793448530808, "grad_norm": 498.0181579589844, "learning_rate": 6.736743036311832e-08, "loss": 29.3099, "step": 476460 }, { "epoch": 0.9624995454857647, "grad_norm": 707.2603149414062, "learning_rate": 6.731033470135262e-08, "loss": 20.6502, "step": 476470 }, { "epoch": 0.9625197461184485, "grad_norm": 489.98052978515625, "learning_rate": 6.725326308091751e-08, "loss": 23.2261, "step": 476480 }, { "epoch": 0.9625399467511322, "grad_norm": 297.9427490234375, "learning_rate": 6.71962155020911e-08, "loss": 22.8428, "step": 476490 }, { "epoch": 0.962560147383816, "grad_norm": 706.0108642578125, "learning_rate": 6.713919196515317e-08, "loss": 24.6106, "step": 476500 }, { "epoch": 0.9625803480164998, "grad_norm": 370.75567626953125, "learning_rate": 6.708219247038017e-08, "loss": 18.245, "step": 476510 }, { "epoch": 0.9626005486491837, "grad_norm": 375.1898498535156, "learning_rate": 6.702521701804965e-08, "loss": 18.2819, "step": 476520 }, { "epoch": 0.9626207492818675, "grad_norm": 178.1335906982422, "learning_rate": 6.696826560844027e-08, "loss": 18.5652, "step": 476530 }, { "epoch": 0.9626409499145513, "grad_norm": 248.6165313720703, "learning_rate": 6.691133824183016e-08, "loss": 31.3355, "step": 476540 }, { "epoch": 0.9626611505472351, "grad_norm": 87.95904541015625, "learning_rate": 6.685443491849464e-08, "loss": 20.726, "step": 476550 }, { "epoch": 0.9626813511799189, "grad_norm": 90.31098175048828, "learning_rate": 6.679755563871292e-08, "loss": 14.6505, "step": 476560 }, { "epoch": 0.9627015518126028, "grad_norm": 547.4888305664062, "learning_rate": 6.674070040276148e-08, "loss": 22.3104, "step": 476570 }, { "epoch": 0.9627217524452866, "grad_norm": 423.2607421875, "learning_rate": 6.66838692109173e-08, "loss": 23.5625, "step": 476580 }, { "epoch": 0.9627419530779704, "grad_norm": 170.6096649169922, "learning_rate": 6.662706206345793e-08, "loss": 12.0225, "step": 476590 }, { "epoch": 0.9627621537106542, "grad_norm": 213.32203674316406, "learning_rate": 6.657027896065982e-08, "loss": 16.2692, "step": 476600 }, { "epoch": 0.962782354343338, "grad_norm": 164.5532684326172, "learning_rate": 6.651351990279997e-08, "loss": 3.9653, "step": 476610 }, { "epoch": 0.9628025549760219, "grad_norm": 221.3753204345703, "learning_rate": 6.645678489015428e-08, "loss": 18.0769, "step": 476620 }, { "epoch": 0.9628227556087057, "grad_norm": 445.78863525390625, "learning_rate": 6.64000739230003e-08, "loss": 27.9703, "step": 476630 }, { "epoch": 0.9628429562413895, "grad_norm": 475.8753356933594, "learning_rate": 6.634338700161392e-08, "loss": 24.063, "step": 476640 }, { "epoch": 0.9628631568740733, "grad_norm": 65.68719482421875, "learning_rate": 6.628672412627158e-08, "loss": 11.14, "step": 476650 }, { "epoch": 0.9628833575067571, "grad_norm": 330.69525146484375, "learning_rate": 6.623008529724917e-08, "loss": 17.0637, "step": 476660 }, { "epoch": 0.962903558139441, "grad_norm": 321.6661071777344, "learning_rate": 6.617347051482315e-08, "loss": 18.995, "step": 476670 }, { "epoch": 0.9629237587721248, "grad_norm": 493.0853271484375, "learning_rate": 6.611687977926939e-08, "loss": 14.4967, "step": 476680 }, { "epoch": 0.9629439594048086, "grad_norm": 328.06475830078125, "learning_rate": 6.606031309086269e-08, "loss": 17.7986, "step": 476690 }, { "epoch": 0.9629641600374924, "grad_norm": 445.1838684082031, "learning_rate": 6.60037704498806e-08, "loss": 24.9507, "step": 476700 }, { "epoch": 0.9629843606701762, "grad_norm": 174.0291748046875, "learning_rate": 6.594725185659734e-08, "loss": 18.0457, "step": 476710 }, { "epoch": 0.96300456130286, "grad_norm": 1234.39453125, "learning_rate": 6.58907573112888e-08, "loss": 14.8554, "step": 476720 }, { "epoch": 0.9630247619355439, "grad_norm": 286.1321716308594, "learning_rate": 6.583428681423032e-08, "loss": 27.2782, "step": 476730 }, { "epoch": 0.9630449625682277, "grad_norm": 372.0481262207031, "learning_rate": 6.577784036569668e-08, "loss": 15.7032, "step": 476740 }, { "epoch": 0.9630651632009114, "grad_norm": 583.0672607421875, "learning_rate": 6.572141796596376e-08, "loss": 18.4742, "step": 476750 }, { "epoch": 0.9630853638335952, "grad_norm": 189.72605895996094, "learning_rate": 6.566501961530636e-08, "loss": 13.0015, "step": 476760 }, { "epoch": 0.963105564466279, "grad_norm": 115.40235137939453, "learning_rate": 6.560864531399869e-08, "loss": 13.4514, "step": 476770 }, { "epoch": 0.9631257650989629, "grad_norm": 412.1432800292969, "learning_rate": 6.555229506231608e-08, "loss": 22.9218, "step": 476780 }, { "epoch": 0.9631459657316467, "grad_norm": 300.8472900390625, "learning_rate": 6.549596886053334e-08, "loss": 16.8421, "step": 476790 }, { "epoch": 0.9631661663643305, "grad_norm": 140.56094360351562, "learning_rate": 6.543966670892465e-08, "loss": 21.1918, "step": 476800 }, { "epoch": 0.9631863669970143, "grad_norm": 39.55817794799805, "learning_rate": 6.538338860776483e-08, "loss": 10.6026, "step": 476810 }, { "epoch": 0.9632065676296981, "grad_norm": 251.70135498046875, "learning_rate": 6.532713455732753e-08, "loss": 18.2131, "step": 476820 }, { "epoch": 0.963226768262382, "grad_norm": 437.0846862792969, "learning_rate": 6.527090455788754e-08, "loss": 13.2615, "step": 476830 }, { "epoch": 0.9632469688950658, "grad_norm": 323.1280212402344, "learning_rate": 6.521469860971852e-08, "loss": 15.5527, "step": 476840 }, { "epoch": 0.9632671695277496, "grad_norm": 469.2850341796875, "learning_rate": 6.515851671309414e-08, "loss": 23.856, "step": 476850 }, { "epoch": 0.9632873701604334, "grad_norm": 561.0780029296875, "learning_rate": 6.51023588682892e-08, "loss": 17.9284, "step": 476860 }, { "epoch": 0.9633075707931172, "grad_norm": 952.8590698242188, "learning_rate": 6.504622507557679e-08, "loss": 32.2216, "step": 476870 }, { "epoch": 0.9633277714258011, "grad_norm": 398.85455322265625, "learning_rate": 6.499011533523003e-08, "loss": 18.0433, "step": 476880 }, { "epoch": 0.9633479720584849, "grad_norm": 258.42388916015625, "learning_rate": 6.493402964752371e-08, "loss": 19.5101, "step": 476890 }, { "epoch": 0.9633681726911687, "grad_norm": 526.315185546875, "learning_rate": 6.487796801272983e-08, "loss": 16.5671, "step": 476900 }, { "epoch": 0.9633883733238525, "grad_norm": 0.0, "learning_rate": 6.482193043112206e-08, "loss": 10.8797, "step": 476910 }, { "epoch": 0.9634085739565363, "grad_norm": 394.5545654296875, "learning_rate": 6.476591690297407e-08, "loss": 11.1586, "step": 476920 }, { "epoch": 0.9634287745892202, "grad_norm": 235.354736328125, "learning_rate": 6.470992742855786e-08, "loss": 19.3661, "step": 476930 }, { "epoch": 0.963448975221904, "grad_norm": 375.6850280761719, "learning_rate": 6.465396200814766e-08, "loss": 18.9032, "step": 476940 }, { "epoch": 0.9634691758545878, "grad_norm": 444.804931640625, "learning_rate": 6.459802064201437e-08, "loss": 19.9718, "step": 476950 }, { "epoch": 0.9634893764872716, "grad_norm": 340.27679443359375, "learning_rate": 6.454210333043275e-08, "loss": 18.8783, "step": 476960 }, { "epoch": 0.9635095771199554, "grad_norm": 123.67523193359375, "learning_rate": 6.448621007367428e-08, "loss": 21.5001, "step": 476970 }, { "epoch": 0.9635297777526393, "grad_norm": 256.27716064453125, "learning_rate": 6.443034087201095e-08, "loss": 21.0008, "step": 476980 }, { "epoch": 0.9635499783853231, "grad_norm": 645.1035766601562, "learning_rate": 6.437449572571586e-08, "loss": 29.0721, "step": 476990 }, { "epoch": 0.9635701790180068, "grad_norm": 40.7523078918457, "learning_rate": 6.431867463506047e-08, "loss": 11.9315, "step": 477000 }, { "epoch": 0.9635903796506906, "grad_norm": 276.47442626953125, "learning_rate": 6.426287760031736e-08, "loss": 14.655, "step": 477010 }, { "epoch": 0.9636105802833744, "grad_norm": 95.3863525390625, "learning_rate": 6.42071046217585e-08, "loss": 8.3127, "step": 477020 }, { "epoch": 0.9636307809160582, "grad_norm": 156.61651611328125, "learning_rate": 6.415135569965536e-08, "loss": 18.0984, "step": 477030 }, { "epoch": 0.9636509815487421, "grad_norm": 303.97393798828125, "learning_rate": 6.40956308342805e-08, "loss": 26.0625, "step": 477040 }, { "epoch": 0.9636711821814259, "grad_norm": 451.6395568847656, "learning_rate": 6.403993002590425e-08, "loss": 13.9365, "step": 477050 }, { "epoch": 0.9636913828141097, "grad_norm": 103.19915008544922, "learning_rate": 6.398425327479863e-08, "loss": 12.9072, "step": 477060 }, { "epoch": 0.9637115834467935, "grad_norm": 421.86407470703125, "learning_rate": 6.392860058123506e-08, "loss": 14.673, "step": 477070 }, { "epoch": 0.9637317840794773, "grad_norm": 330.48236083984375, "learning_rate": 6.387297194548558e-08, "loss": 29.77, "step": 477080 }, { "epoch": 0.9637519847121612, "grad_norm": 211.17384338378906, "learning_rate": 6.381736736781996e-08, "loss": 11.6155, "step": 477090 }, { "epoch": 0.963772185344845, "grad_norm": 212.2564697265625, "learning_rate": 6.376178684850965e-08, "loss": 19.4258, "step": 477100 }, { "epoch": 0.9637923859775288, "grad_norm": 375.3379211425781, "learning_rate": 6.370623038782608e-08, "loss": 27.173, "step": 477110 }, { "epoch": 0.9638125866102126, "grad_norm": 27.419113159179688, "learning_rate": 6.365069798603962e-08, "loss": 31.2934, "step": 477120 }, { "epoch": 0.9638327872428964, "grad_norm": 335.3210144042969, "learning_rate": 6.359518964342059e-08, "loss": 12.9161, "step": 477130 }, { "epoch": 0.9638529878755803, "grad_norm": 405.4734191894531, "learning_rate": 6.353970536024045e-08, "loss": 16.8732, "step": 477140 }, { "epoch": 0.9638731885082641, "grad_norm": 583.7369384765625, "learning_rate": 6.348424513676898e-08, "loss": 17.2103, "step": 477150 }, { "epoch": 0.9638933891409479, "grad_norm": 540.9656982421875, "learning_rate": 6.342880897327597e-08, "loss": 24.3372, "step": 477160 }, { "epoch": 0.9639135897736317, "grad_norm": 146.86822509765625, "learning_rate": 6.337339687003286e-08, "loss": 13.8029, "step": 477170 }, { "epoch": 0.9639337904063155, "grad_norm": 539.55712890625, "learning_rate": 6.331800882730887e-08, "loss": 15.6876, "step": 477180 }, { "epoch": 0.9639539910389994, "grad_norm": 153.9258575439453, "learning_rate": 6.326264484537437e-08, "loss": 12.555, "step": 477190 }, { "epoch": 0.9639741916716832, "grad_norm": 533.649169921875, "learning_rate": 6.3207304924498e-08, "loss": 22.0611, "step": 477200 }, { "epoch": 0.963994392304367, "grad_norm": 455.38360595703125, "learning_rate": 6.315198906495179e-08, "loss": 21.8381, "step": 477210 }, { "epoch": 0.9640145929370508, "grad_norm": 396.9118347167969, "learning_rate": 6.30966972670033e-08, "loss": 22.9322, "step": 477220 }, { "epoch": 0.9640347935697346, "grad_norm": 515.7533569335938, "learning_rate": 6.304142953092285e-08, "loss": 17.8945, "step": 477230 }, { "epoch": 0.9640549942024185, "grad_norm": 493.3134460449219, "learning_rate": 6.298618585697968e-08, "loss": 13.7864, "step": 477240 }, { "epoch": 0.9640751948351023, "grad_norm": 143.59620666503906, "learning_rate": 6.293096624544304e-08, "loss": 8.9156, "step": 477250 }, { "epoch": 0.964095395467786, "grad_norm": 500.61334228515625, "learning_rate": 6.287577069658213e-08, "loss": 10.7181, "step": 477260 }, { "epoch": 0.9641155961004698, "grad_norm": 179.6245574951172, "learning_rate": 6.282059921066564e-08, "loss": 14.0273, "step": 477270 }, { "epoch": 0.9641357967331536, "grad_norm": 224.61199951171875, "learning_rate": 6.276545178796333e-08, "loss": 9.6322, "step": 477280 }, { "epoch": 0.9641559973658375, "grad_norm": 408.6536865234375, "learning_rate": 6.271032842874281e-08, "loss": 26.8837, "step": 477290 }, { "epoch": 0.9641761979985213, "grad_norm": 296.5220031738281, "learning_rate": 6.265522913327326e-08, "loss": 29.1121, "step": 477300 }, { "epoch": 0.9641963986312051, "grad_norm": 2035.961669921875, "learning_rate": 6.260015390182395e-08, "loss": 31.4718, "step": 477310 }, { "epoch": 0.9642165992638889, "grad_norm": 81.89179992675781, "learning_rate": 6.254510273466186e-08, "loss": 14.0888, "step": 477320 }, { "epoch": 0.9642367998965727, "grad_norm": 422.5835266113281, "learning_rate": 6.249007563205679e-08, "loss": 35.0419, "step": 477330 }, { "epoch": 0.9642570005292566, "grad_norm": 592.8697509765625, "learning_rate": 6.243507259427628e-08, "loss": 23.1118, "step": 477340 }, { "epoch": 0.9642772011619404, "grad_norm": 247.3590545654297, "learning_rate": 6.238009362158793e-08, "loss": 17.3722, "step": 477350 }, { "epoch": 0.9642974017946242, "grad_norm": 285.0309753417969, "learning_rate": 6.232513871426038e-08, "loss": 20.0719, "step": 477360 }, { "epoch": 0.964317602427308, "grad_norm": 174.8265380859375, "learning_rate": 6.227020787256122e-08, "loss": 16.8355, "step": 477370 }, { "epoch": 0.9643378030599918, "grad_norm": 287.5248718261719, "learning_rate": 6.2215301096758e-08, "loss": 11.1058, "step": 477380 }, { "epoch": 0.9643580036926757, "grad_norm": 673.0089721679688, "learning_rate": 6.216041838711828e-08, "loss": 23.4357, "step": 477390 }, { "epoch": 0.9643782043253595, "grad_norm": 94.95233917236328, "learning_rate": 6.210555974391075e-08, "loss": 24.3358, "step": 477400 }, { "epoch": 0.9643984049580433, "grad_norm": 0.0, "learning_rate": 6.205072516740129e-08, "loss": 9.14, "step": 477410 }, { "epoch": 0.9644186055907271, "grad_norm": 367.91717529296875, "learning_rate": 6.199591465785748e-08, "loss": 10.6455, "step": 477420 }, { "epoch": 0.9644388062234109, "grad_norm": 476.6819152832031, "learning_rate": 6.194112821554687e-08, "loss": 29.8962, "step": 477430 }, { "epoch": 0.9644590068560948, "grad_norm": 690.882568359375, "learning_rate": 6.188636584073648e-08, "loss": 19.3234, "step": 477440 }, { "epoch": 0.9644792074887786, "grad_norm": 66.78436279296875, "learning_rate": 6.183162753369221e-08, "loss": 11.4298, "step": 477450 }, { "epoch": 0.9644994081214624, "grad_norm": 493.62298583984375, "learning_rate": 6.177691329468217e-08, "loss": 21.2334, "step": 477460 }, { "epoch": 0.9645196087541462, "grad_norm": 125.45512390136719, "learning_rate": 6.17222231239728e-08, "loss": 21.7523, "step": 477470 }, { "epoch": 0.96453980938683, "grad_norm": 481.461181640625, "learning_rate": 6.166755702183058e-08, "loss": 20.9767, "step": 477480 }, { "epoch": 0.9645600100195139, "grad_norm": 17.118911743164062, "learning_rate": 6.161291498852084e-08, "loss": 19.0655, "step": 477490 }, { "epoch": 0.9645802106521977, "grad_norm": 752.0841064453125, "learning_rate": 6.15582970243117e-08, "loss": 25.7584, "step": 477500 }, { "epoch": 0.9646004112848814, "grad_norm": 101.85774230957031, "learning_rate": 6.150370312946797e-08, "loss": 7.6507, "step": 477510 }, { "epoch": 0.9646206119175652, "grad_norm": 380.5328674316406, "learning_rate": 6.144913330425606e-08, "loss": 32.0278, "step": 477520 }, { "epoch": 0.964640812550249, "grad_norm": 403.0509033203125, "learning_rate": 6.139458754894245e-08, "loss": 21.2497, "step": 477530 }, { "epoch": 0.9646610131829328, "grad_norm": 670.5574340820312, "learning_rate": 6.134006586379249e-08, "loss": 15.94, "step": 477540 }, { "epoch": 0.9646812138156167, "grad_norm": 410.8387145996094, "learning_rate": 6.128556824907205e-08, "loss": 21.5452, "step": 477550 }, { "epoch": 0.9647014144483005, "grad_norm": 534.5658569335938, "learning_rate": 6.12310947050465e-08, "loss": 9.8214, "step": 477560 }, { "epoch": 0.9647216150809843, "grad_norm": 365.89556884765625, "learning_rate": 6.11766452319823e-08, "loss": 15.1667, "step": 477570 }, { "epoch": 0.9647418157136681, "grad_norm": 132.87437438964844, "learning_rate": 6.112221983014366e-08, "loss": 10.2982, "step": 477580 }, { "epoch": 0.964762016346352, "grad_norm": 169.1856689453125, "learning_rate": 6.106781849979648e-08, "loss": 15.1391, "step": 477590 }, { "epoch": 0.9647822169790358, "grad_norm": 393.377197265625, "learning_rate": 6.101344124120557e-08, "loss": 28.7703, "step": 477600 }, { "epoch": 0.9648024176117196, "grad_norm": 539.4514770507812, "learning_rate": 6.095908805463624e-08, "loss": 29.2519, "step": 477610 }, { "epoch": 0.9648226182444034, "grad_norm": 331.7099914550781, "learning_rate": 6.09047589403533e-08, "loss": 24.0463, "step": 477620 }, { "epoch": 0.9648428188770872, "grad_norm": 604.9713745117188, "learning_rate": 6.085045389862154e-08, "loss": 19.9847, "step": 477630 }, { "epoch": 0.964863019509771, "grad_norm": 330.50347900390625, "learning_rate": 6.079617292970519e-08, "loss": 9.4264, "step": 477640 }, { "epoch": 0.9648832201424549, "grad_norm": 481.7924499511719, "learning_rate": 6.074191603386958e-08, "loss": 23.5693, "step": 477650 }, { "epoch": 0.9649034207751387, "grad_norm": 245.63783264160156, "learning_rate": 6.068768321137897e-08, "loss": 10.665, "step": 477660 }, { "epoch": 0.9649236214078225, "grad_norm": 2.3313724994659424, "learning_rate": 6.0633474462497e-08, "loss": 8.234, "step": 477670 }, { "epoch": 0.9649438220405063, "grad_norm": 282.9562683105469, "learning_rate": 6.057928978748906e-08, "loss": 9.9405, "step": 477680 }, { "epoch": 0.9649640226731901, "grad_norm": 374.821044921875, "learning_rate": 6.052512918661879e-08, "loss": 15.405, "step": 477690 }, { "epoch": 0.964984223305874, "grad_norm": 202.2202606201172, "learning_rate": 6.047099266014877e-08, "loss": 22.1854, "step": 477700 }, { "epoch": 0.9650044239385578, "grad_norm": 503.0718688964844, "learning_rate": 6.041688020834491e-08, "loss": 18.3147, "step": 477710 }, { "epoch": 0.9650246245712416, "grad_norm": 5.190357208251953, "learning_rate": 6.036279183146975e-08, "loss": 18.278, "step": 477720 }, { "epoch": 0.9650448252039254, "grad_norm": 569.5199584960938, "learning_rate": 6.030872752978756e-08, "loss": 18.4054, "step": 477730 }, { "epoch": 0.9650650258366092, "grad_norm": 393.5696105957031, "learning_rate": 6.025468730356144e-08, "loss": 16.5542, "step": 477740 }, { "epoch": 0.9650852264692931, "grad_norm": 1.4170738458633423, "learning_rate": 6.020067115305451e-08, "loss": 24.3471, "step": 477750 }, { "epoch": 0.9651054271019769, "grad_norm": 224.58607482910156, "learning_rate": 6.0146679078531e-08, "loss": 7.6774, "step": 477760 }, { "epoch": 0.9651256277346606, "grad_norm": 371.5082092285156, "learning_rate": 6.009271108025294e-08, "loss": 11.425, "step": 477770 }, { "epoch": 0.9651458283673444, "grad_norm": 591.8775634765625, "learning_rate": 6.003876715848345e-08, "loss": 18.0876, "step": 477780 }, { "epoch": 0.9651660290000282, "grad_norm": 681.1532592773438, "learning_rate": 5.998484731348675e-08, "loss": 11.5306, "step": 477790 }, { "epoch": 0.9651862296327121, "grad_norm": 209.39012145996094, "learning_rate": 5.993095154552431e-08, "loss": 12.4092, "step": 477800 }, { "epoch": 0.9652064302653959, "grad_norm": 232.25473022460938, "learning_rate": 5.987707985485925e-08, "loss": 23.2592, "step": 477810 }, { "epoch": 0.9652266308980797, "grad_norm": 493.2044677734375, "learning_rate": 5.982323224175468e-08, "loss": 12.4573, "step": 477820 }, { "epoch": 0.9652468315307635, "grad_norm": 163.55963134765625, "learning_rate": 5.976940870647207e-08, "loss": 29.157, "step": 477830 }, { "epoch": 0.9652670321634473, "grad_norm": 152.7935028076172, "learning_rate": 5.9715609249274e-08, "loss": 16.3347, "step": 477840 }, { "epoch": 0.9652872327961312, "grad_norm": 170.32513427734375, "learning_rate": 5.966183387042246e-08, "loss": 22.4016, "step": 477850 }, { "epoch": 0.965307433428815, "grad_norm": 350.372802734375, "learning_rate": 5.960808257018113e-08, "loss": 17.2876, "step": 477860 }, { "epoch": 0.9653276340614988, "grad_norm": 345.9002990722656, "learning_rate": 5.955435534881038e-08, "loss": 22.5925, "step": 477870 }, { "epoch": 0.9653478346941826, "grad_norm": 49.1010627746582, "learning_rate": 5.950065220657164e-08, "loss": 6.0972, "step": 477880 }, { "epoch": 0.9653680353268664, "grad_norm": 442.44207763671875, "learning_rate": 5.9446973143728605e-08, "loss": 22.0141, "step": 477890 }, { "epoch": 0.9653882359595503, "grad_norm": 59.3377799987793, "learning_rate": 5.939331816054161e-08, "loss": 25.7228, "step": 477900 }, { "epoch": 0.9654084365922341, "grad_norm": 461.59124755859375, "learning_rate": 5.9339687257272126e-08, "loss": 26.488, "step": 477910 }, { "epoch": 0.9654286372249179, "grad_norm": 312.3343505859375, "learning_rate": 5.92860804341816e-08, "loss": 26.8083, "step": 477920 }, { "epoch": 0.9654488378576017, "grad_norm": 451.34051513671875, "learning_rate": 5.9232497691531496e-08, "loss": 20.6006, "step": 477930 }, { "epoch": 0.9654690384902855, "grad_norm": 146.99241638183594, "learning_rate": 5.917893902958327e-08, "loss": 14.9505, "step": 477940 }, { "epoch": 0.9654892391229694, "grad_norm": 617.3377075195312, "learning_rate": 5.9125404448597825e-08, "loss": 9.5861, "step": 477950 }, { "epoch": 0.9655094397556532, "grad_norm": 416.9510192871094, "learning_rate": 5.9071893948835505e-08, "loss": 26.2968, "step": 477960 }, { "epoch": 0.965529640388337, "grad_norm": 112.83805084228516, "learning_rate": 5.901840753055776e-08, "loss": 36.8032, "step": 477970 }, { "epoch": 0.9655498410210208, "grad_norm": 199.9210205078125, "learning_rate": 5.896494519402496e-08, "loss": 15.8749, "step": 477980 }, { "epoch": 0.9655700416537046, "grad_norm": 339.83465576171875, "learning_rate": 5.891150693949743e-08, "loss": 20.127, "step": 477990 }, { "epoch": 0.9655902422863885, "grad_norm": 863.1902465820312, "learning_rate": 5.8858092767236084e-08, "loss": 29.7857, "step": 478000 }, { "epoch": 0.9656104429190723, "grad_norm": 264.4561462402344, "learning_rate": 5.880470267750127e-08, "loss": 27.0998, "step": 478010 }, { "epoch": 0.9656306435517561, "grad_norm": 359.2437744140625, "learning_rate": 5.8751336670552775e-08, "loss": 19.5618, "step": 478020 }, { "epoch": 0.9656508441844398, "grad_norm": 617.5918579101562, "learning_rate": 5.8697994746650946e-08, "loss": 27.1238, "step": 478030 }, { "epoch": 0.9656710448171236, "grad_norm": 2867.60791015625, "learning_rate": 5.864467690605613e-08, "loss": 27.6647, "step": 478040 }, { "epoch": 0.9656912454498074, "grad_norm": 291.1172790527344, "learning_rate": 5.8591383149028126e-08, "loss": 32.4455, "step": 478050 }, { "epoch": 0.9657114460824913, "grad_norm": 40.02949523925781, "learning_rate": 5.8538113475825606e-08, "loss": 23.7419, "step": 478060 }, { "epoch": 0.9657316467151751, "grad_norm": 311.8459777832031, "learning_rate": 5.848486788670893e-08, "loss": 22.3518, "step": 478070 }, { "epoch": 0.9657518473478589, "grad_norm": 315.1097106933594, "learning_rate": 5.843164638193899e-08, "loss": 11.6773, "step": 478080 }, { "epoch": 0.9657720479805427, "grad_norm": 173.48623657226562, "learning_rate": 5.837844896177225e-08, "loss": 15.2753, "step": 478090 }, { "epoch": 0.9657922486132265, "grad_norm": 233.17698669433594, "learning_rate": 5.8325275626470166e-08, "loss": 14.2047, "step": 478100 }, { "epoch": 0.9658124492459104, "grad_norm": 757.6137084960938, "learning_rate": 5.827212637629198e-08, "loss": 20.2655, "step": 478110 }, { "epoch": 0.9658326498785942, "grad_norm": 354.80108642578125, "learning_rate": 5.821900121149582e-08, "loss": 31.6603, "step": 478120 }, { "epoch": 0.965852850511278, "grad_norm": 187.01177978515625, "learning_rate": 5.8165900132340356e-08, "loss": 27.2038, "step": 478130 }, { "epoch": 0.9658730511439618, "grad_norm": 545.61181640625, "learning_rate": 5.8112823139085396e-08, "loss": 23.4992, "step": 478140 }, { "epoch": 0.9658932517766456, "grad_norm": 373.3515625, "learning_rate": 5.80597702319885e-08, "loss": 18.8845, "step": 478150 }, { "epoch": 0.9659134524093295, "grad_norm": 558.9241333007812, "learning_rate": 5.800674141130946e-08, "loss": 19.6263, "step": 478160 }, { "epoch": 0.9659336530420133, "grad_norm": 379.2434387207031, "learning_rate": 5.795373667730586e-08, "loss": 19.4863, "step": 478170 }, { "epoch": 0.9659538536746971, "grad_norm": 53.8942985534668, "learning_rate": 5.7900756030236924e-08, "loss": 19.1105, "step": 478180 }, { "epoch": 0.9659740543073809, "grad_norm": 489.83416748046875, "learning_rate": 5.7847799470360236e-08, "loss": 13.0014, "step": 478190 }, { "epoch": 0.9659942549400647, "grad_norm": 657.77099609375, "learning_rate": 5.7794866997933355e-08, "loss": 11.6912, "step": 478200 }, { "epoch": 0.9660144555727486, "grad_norm": 244.96229553222656, "learning_rate": 5.774195861321552e-08, "loss": 37.3842, "step": 478210 }, { "epoch": 0.9660346562054324, "grad_norm": 335.60382080078125, "learning_rate": 5.76890743164632e-08, "loss": 16.0015, "step": 478220 }, { "epoch": 0.9660548568381162, "grad_norm": 518.923583984375, "learning_rate": 5.763621410793563e-08, "loss": 27.9391, "step": 478230 }, { "epoch": 0.9660750574708, "grad_norm": 56.98911666870117, "learning_rate": 5.758337798788982e-08, "loss": 9.7949, "step": 478240 }, { "epoch": 0.9660952581034838, "grad_norm": 637.66015625, "learning_rate": 5.753056595658224e-08, "loss": 19.3816, "step": 478250 }, { "epoch": 0.9661154587361677, "grad_norm": 145.6067352294922, "learning_rate": 5.7477778014272124e-08, "loss": 21.7067, "step": 478260 }, { "epoch": 0.9661356593688515, "grad_norm": 318.15826416015625, "learning_rate": 5.7425014161215375e-08, "loss": 23.1052, "step": 478270 }, { "epoch": 0.9661558600015352, "grad_norm": 899.2626953125, "learning_rate": 5.737227439766957e-08, "loss": 19.9951, "step": 478280 }, { "epoch": 0.966176060634219, "grad_norm": 159.44384765625, "learning_rate": 5.7319558723892275e-08, "loss": 11.5791, "step": 478290 }, { "epoch": 0.9661962612669028, "grad_norm": 759.2963256835938, "learning_rate": 5.726686714013996e-08, "loss": 20.4163, "step": 478300 }, { "epoch": 0.9662164618995867, "grad_norm": 241.1317596435547, "learning_rate": 5.7214199646669076e-08, "loss": 30.4358, "step": 478310 }, { "epoch": 0.9662366625322705, "grad_norm": 260.4803161621094, "learning_rate": 5.716155624373665e-08, "loss": 16.714, "step": 478320 }, { "epoch": 0.9662568631649543, "grad_norm": 84.16582489013672, "learning_rate": 5.710893693159969e-08, "loss": 18.6407, "step": 478330 }, { "epoch": 0.9662770637976381, "grad_norm": 439.8134765625, "learning_rate": 5.705634171051411e-08, "loss": 19.6875, "step": 478340 }, { "epoch": 0.9662972644303219, "grad_norm": 137.8370819091797, "learning_rate": 5.700377058073636e-08, "loss": 26.0859, "step": 478350 }, { "epoch": 0.9663174650630058, "grad_norm": 316.39593505859375, "learning_rate": 5.6951223542522915e-08, "loss": 34.9143, "step": 478360 }, { "epoch": 0.9663376656956896, "grad_norm": 157.16848754882812, "learning_rate": 5.6898700596129674e-08, "loss": 13.9034, "step": 478370 }, { "epoch": 0.9663578663283734, "grad_norm": 149.63681030273438, "learning_rate": 5.684620174181255e-08, "loss": 13.5584, "step": 478380 }, { "epoch": 0.9663780669610572, "grad_norm": 177.0972442626953, "learning_rate": 5.679372697982688e-08, "loss": 19.0456, "step": 478390 }, { "epoch": 0.966398267593741, "grad_norm": 292.07403564453125, "learning_rate": 5.674127631043025e-08, "loss": 11.7672, "step": 478400 }, { "epoch": 0.9664184682264249, "grad_norm": 1.6651360988616943, "learning_rate": 5.668884973387634e-08, "loss": 14.0168, "step": 478410 }, { "epoch": 0.9664386688591087, "grad_norm": 468.69232177734375, "learning_rate": 5.663644725042161e-08, "loss": 31.5944, "step": 478420 }, { "epoch": 0.9664588694917925, "grad_norm": 530.5999755859375, "learning_rate": 5.658406886032142e-08, "loss": 22.6728, "step": 478430 }, { "epoch": 0.9664790701244763, "grad_norm": 627.4494018554688, "learning_rate": 5.653171456383055e-08, "loss": 21.3161, "step": 478440 }, { "epoch": 0.9664992707571601, "grad_norm": 374.77008056640625, "learning_rate": 5.647938436120437e-08, "loss": 11.9045, "step": 478450 }, { "epoch": 0.966519471389844, "grad_norm": 876.54443359375, "learning_rate": 5.642707825269822e-08, "loss": 20.375, "step": 478460 }, { "epoch": 0.9665396720225278, "grad_norm": 236.10513305664062, "learning_rate": 5.637479623856745e-08, "loss": 18.2886, "step": 478470 }, { "epoch": 0.9665598726552116, "grad_norm": 345.92333984375, "learning_rate": 5.632253831906631e-08, "loss": 19.2766, "step": 478480 }, { "epoch": 0.9665800732878954, "grad_norm": 260.10943603515625, "learning_rate": 5.6270304494449035e-08, "loss": 20.5953, "step": 478490 }, { "epoch": 0.9666002739205792, "grad_norm": 98.62396240234375, "learning_rate": 5.621809476497098e-08, "loss": 31.1268, "step": 478500 }, { "epoch": 0.966620474553263, "grad_norm": 279.5716857910156, "learning_rate": 5.616590913088638e-08, "loss": 19.0142, "step": 478510 }, { "epoch": 0.9666406751859469, "grad_norm": 140.54217529296875, "learning_rate": 5.611374759244892e-08, "loss": 11.6353, "step": 478520 }, { "epoch": 0.9666608758186307, "grad_norm": 748.846435546875, "learning_rate": 5.6061610149913957e-08, "loss": 36.219, "step": 478530 }, { "epoch": 0.9666810764513144, "grad_norm": 199.6036376953125, "learning_rate": 5.6009496803534624e-08, "loss": 25.0226, "step": 478540 }, { "epoch": 0.9667012770839982, "grad_norm": 400.14459228515625, "learning_rate": 5.595740755356627e-08, "loss": 17.4089, "step": 478550 }, { "epoch": 0.966721477716682, "grad_norm": 3818.854736328125, "learning_rate": 5.590534240026146e-08, "loss": 42.379, "step": 478560 }, { "epoch": 0.9667416783493659, "grad_norm": 6.718419075012207, "learning_rate": 5.58533013438739e-08, "loss": 19.4567, "step": 478570 }, { "epoch": 0.9667618789820497, "grad_norm": 202.12559509277344, "learning_rate": 5.580128438465837e-08, "loss": 11.5954, "step": 478580 }, { "epoch": 0.9667820796147335, "grad_norm": 521.642333984375, "learning_rate": 5.574929152286745e-08, "loss": 14.6131, "step": 478590 }, { "epoch": 0.9668022802474173, "grad_norm": 276.46636962890625, "learning_rate": 5.569732275875428e-08, "loss": 18.7578, "step": 478600 }, { "epoch": 0.9668224808801011, "grad_norm": 231.13758850097656, "learning_rate": 5.5645378092573085e-08, "loss": 34.6684, "step": 478610 }, { "epoch": 0.966842681512785, "grad_norm": 396.3559875488281, "learning_rate": 5.559345752457701e-08, "loss": 14.3551, "step": 478620 }, { "epoch": 0.9668628821454688, "grad_norm": 597.891845703125, "learning_rate": 5.554156105501862e-08, "loss": 46.5115, "step": 478630 }, { "epoch": 0.9668830827781526, "grad_norm": 2.6351895332336426, "learning_rate": 5.54896886841505e-08, "loss": 26.3693, "step": 478640 }, { "epoch": 0.9669032834108364, "grad_norm": 122.9947509765625, "learning_rate": 5.543784041222633e-08, "loss": 12.3833, "step": 478650 }, { "epoch": 0.9669234840435202, "grad_norm": 285.569580078125, "learning_rate": 5.538601623949869e-08, "loss": 10.7647, "step": 478660 }, { "epoch": 0.9669436846762041, "grad_norm": 461.61627197265625, "learning_rate": 5.533421616621903e-08, "loss": 18.2578, "step": 478670 }, { "epoch": 0.9669638853088879, "grad_norm": 484.755859375, "learning_rate": 5.528244019264106e-08, "loss": 17.0171, "step": 478680 }, { "epoch": 0.9669840859415717, "grad_norm": 177.4759063720703, "learning_rate": 5.5230688319017344e-08, "loss": 18.0975, "step": 478690 }, { "epoch": 0.9670042865742555, "grad_norm": 557.1797485351562, "learning_rate": 5.517896054559879e-08, "loss": 21.3366, "step": 478700 }, { "epoch": 0.9670244872069393, "grad_norm": 383.9194030761719, "learning_rate": 5.512725687263853e-08, "loss": 15.7219, "step": 478710 }, { "epoch": 0.9670446878396232, "grad_norm": 586.3380126953125, "learning_rate": 5.507557730038859e-08, "loss": 14.0986, "step": 478720 }, { "epoch": 0.967064888472307, "grad_norm": 580.1846923828125, "learning_rate": 5.5023921829100434e-08, "loss": 24.0707, "step": 478730 }, { "epoch": 0.9670850891049908, "grad_norm": 572.0950927734375, "learning_rate": 5.497229045902552e-08, "loss": 33.8662, "step": 478740 }, { "epoch": 0.9671052897376746, "grad_norm": 19.00641632080078, "learning_rate": 5.492068319041588e-08, "loss": 36.7493, "step": 478750 }, { "epoch": 0.9671254903703584, "grad_norm": 484.2851257324219, "learning_rate": 5.4869100023523526e-08, "loss": 18.0603, "step": 478760 }, { "epoch": 0.9671456910030423, "grad_norm": 218.1751251220703, "learning_rate": 5.4817540958598814e-08, "loss": 9.6601, "step": 478770 }, { "epoch": 0.9671658916357261, "grad_norm": 545.3314819335938, "learning_rate": 5.476600599589377e-08, "loss": 25.2217, "step": 478780 }, { "epoch": 0.9671860922684098, "grad_norm": 230.48045349121094, "learning_rate": 5.471449513565985e-08, "loss": 22.9013, "step": 478790 }, { "epoch": 0.9672062929010936, "grad_norm": 163.40428161621094, "learning_rate": 5.466300837814797e-08, "loss": 18.2725, "step": 478800 }, { "epoch": 0.9672264935337774, "grad_norm": 319.80743408203125, "learning_rate": 5.461154572360794e-08, "loss": 21.5176, "step": 478810 }, { "epoch": 0.9672466941664613, "grad_norm": 519.5460205078125, "learning_rate": 5.456010717229177e-08, "loss": 23.0734, "step": 478820 }, { "epoch": 0.9672668947991451, "grad_norm": 264.7811584472656, "learning_rate": 5.4508692724449806e-08, "loss": 19.0381, "step": 478830 }, { "epoch": 0.9672870954318289, "grad_norm": 236.47109985351562, "learning_rate": 5.445730238033298e-08, "loss": 15.7891, "step": 478840 }, { "epoch": 0.9673072960645127, "grad_norm": 237.08204650878906, "learning_rate": 5.440593614019107e-08, "loss": 12.1388, "step": 478850 }, { "epoch": 0.9673274966971965, "grad_norm": 171.9046173095703, "learning_rate": 5.435459400427501e-08, "loss": 13.042, "step": 478860 }, { "epoch": 0.9673476973298804, "grad_norm": 631.8507690429688, "learning_rate": 5.4303275972834577e-08, "loss": 23.7042, "step": 478870 }, { "epoch": 0.9673678979625642, "grad_norm": 141.70753479003906, "learning_rate": 5.42519820461207e-08, "loss": 10.7698, "step": 478880 }, { "epoch": 0.967388098595248, "grad_norm": 266.23284912109375, "learning_rate": 5.4200712224382056e-08, "loss": 16.0063, "step": 478890 }, { "epoch": 0.9674082992279318, "grad_norm": 253.25729370117188, "learning_rate": 5.414946650786957e-08, "loss": 10.512, "step": 478900 }, { "epoch": 0.9674284998606156, "grad_norm": 220.38278198242188, "learning_rate": 5.409824489683247e-08, "loss": 29.1843, "step": 478910 }, { "epoch": 0.9674487004932995, "grad_norm": 304.1505432128906, "learning_rate": 5.4047047391521114e-08, "loss": 23.3969, "step": 478920 }, { "epoch": 0.9674689011259833, "grad_norm": 145.32652282714844, "learning_rate": 5.39958739921842e-08, "loss": 23.0794, "step": 478930 }, { "epoch": 0.9674891017586671, "grad_norm": 579.7205810546875, "learning_rate": 5.394472469907208e-08, "loss": 25.9221, "step": 478940 }, { "epoch": 0.9675093023913509, "grad_norm": 588.6752319335938, "learning_rate": 5.389359951243345e-08, "loss": 9.451, "step": 478950 }, { "epoch": 0.9675295030240347, "grad_norm": 196.82383728027344, "learning_rate": 5.3842498432516986e-08, "loss": 14.1151, "step": 478960 }, { "epoch": 0.9675497036567186, "grad_norm": 685.24951171875, "learning_rate": 5.3791421459571947e-08, "loss": 16.9339, "step": 478970 }, { "epoch": 0.9675699042894024, "grad_norm": 78.54755401611328, "learning_rate": 5.374036859384868e-08, "loss": 10.4572, "step": 478980 }, { "epoch": 0.9675901049220862, "grad_norm": 475.5689697265625, "learning_rate": 5.3689339835594215e-08, "loss": 14.2306, "step": 478990 }, { "epoch": 0.96761030555477, "grad_norm": 257.8653259277344, "learning_rate": 5.363833518505834e-08, "loss": 7.7078, "step": 479000 }, { "epoch": 0.9676305061874538, "grad_norm": 461.0395202636719, "learning_rate": 5.358735464248921e-08, "loss": 17.0037, "step": 479010 }, { "epoch": 0.9676507068201377, "grad_norm": 271.3075256347656, "learning_rate": 5.3536398208135495e-08, "loss": 25.8619, "step": 479020 }, { "epoch": 0.9676709074528215, "grad_norm": 59.960697174072266, "learning_rate": 5.348546588224535e-08, "loss": 22.1884, "step": 479030 }, { "epoch": 0.9676911080855053, "grad_norm": 94.71739959716797, "learning_rate": 5.343455766506689e-08, "loss": 21.2829, "step": 479040 }, { "epoch": 0.967711308718189, "grad_norm": 124.12822723388672, "learning_rate": 5.338367355684881e-08, "loss": 21.4375, "step": 479050 }, { "epoch": 0.9677315093508728, "grad_norm": 197.34124755859375, "learning_rate": 5.33328135578387e-08, "loss": 24.6474, "step": 479060 }, { "epoch": 0.9677517099835566, "grad_norm": 378.1953125, "learning_rate": 5.3281977668284136e-08, "loss": 32.3378, "step": 479070 }, { "epoch": 0.9677719106162405, "grad_norm": 154.08151245117188, "learning_rate": 5.323116588843324e-08, "loss": 16.6362, "step": 479080 }, { "epoch": 0.9677921112489243, "grad_norm": 331.41455078125, "learning_rate": 5.318037821853417e-08, "loss": 24.9352, "step": 479090 }, { "epoch": 0.9678123118816081, "grad_norm": 417.18731689453125, "learning_rate": 5.312961465883393e-08, "loss": 19.8879, "step": 479100 }, { "epoch": 0.9678325125142919, "grad_norm": 524.3778076171875, "learning_rate": 5.307887520957955e-08, "loss": 11.2781, "step": 479110 }, { "epoch": 0.9678527131469757, "grad_norm": 276.419921875, "learning_rate": 5.302815987101917e-08, "loss": 11.3561, "step": 479120 }, { "epoch": 0.9678729137796596, "grad_norm": 623.837646484375, "learning_rate": 5.2977468643399254e-08, "loss": 23.5989, "step": 479130 }, { "epoch": 0.9678931144123434, "grad_norm": 422.738525390625, "learning_rate": 5.292680152696739e-08, "loss": 16.2322, "step": 479140 }, { "epoch": 0.9679133150450272, "grad_norm": 76.41869354248047, "learning_rate": 5.2876158521969476e-08, "loss": 19.3891, "step": 479150 }, { "epoch": 0.967933515677711, "grad_norm": 361.3853759765625, "learning_rate": 5.282553962865422e-08, "loss": 13.3551, "step": 479160 }, { "epoch": 0.9679537163103948, "grad_norm": 378.0127868652344, "learning_rate": 5.2774944847266976e-08, "loss": 15.339, "step": 479170 }, { "epoch": 0.9679739169430787, "grad_norm": 353.4735107421875, "learning_rate": 5.27243741780542e-08, "loss": 21.6208, "step": 479180 }, { "epoch": 0.9679941175757625, "grad_norm": 434.619384765625, "learning_rate": 5.267382762126294e-08, "loss": 19.5157, "step": 479190 }, { "epoch": 0.9680143182084463, "grad_norm": 356.3162536621094, "learning_rate": 5.262330517713965e-08, "loss": 9.1625, "step": 479200 }, { "epoch": 0.9680345188411301, "grad_norm": 324.1009521484375, "learning_rate": 5.2572806845930244e-08, "loss": 25.1147, "step": 479210 }, { "epoch": 0.9680547194738139, "grad_norm": 196.4537353515625, "learning_rate": 5.252233262788065e-08, "loss": 17.194, "step": 479220 }, { "epoch": 0.9680749201064978, "grad_norm": 162.21270751953125, "learning_rate": 5.247188252323787e-08, "loss": 12.6761, "step": 479230 }, { "epoch": 0.9680951207391816, "grad_norm": 512.8888549804688, "learning_rate": 5.242145653224673e-08, "loss": 19.6846, "step": 479240 }, { "epoch": 0.9681153213718654, "grad_norm": 164.4032745361328, "learning_rate": 5.237105465515258e-08, "loss": 16.9834, "step": 479250 }, { "epoch": 0.9681355220045492, "grad_norm": 311.5745544433594, "learning_rate": 5.2320676892202996e-08, "loss": 16.5828, "step": 479260 }, { "epoch": 0.968155722637233, "grad_norm": 305.44390869140625, "learning_rate": 5.227032324364167e-08, "loss": 16.0539, "step": 479270 }, { "epoch": 0.9681759232699169, "grad_norm": 198.4656982421875, "learning_rate": 5.2219993709714535e-08, "loss": 14.3294, "step": 479280 }, { "epoch": 0.9681961239026007, "grad_norm": 338.1217041015625, "learning_rate": 5.2169688290667485e-08, "loss": 17.7753, "step": 479290 }, { "epoch": 0.9682163245352845, "grad_norm": 446.63409423828125, "learning_rate": 5.2119406986745336e-08, "loss": 16.3242, "step": 479300 }, { "epoch": 0.9682365251679682, "grad_norm": 288.9396667480469, "learning_rate": 5.206914979819289e-08, "loss": 18.615, "step": 479310 }, { "epoch": 0.968256725800652, "grad_norm": 410.0702209472656, "learning_rate": 5.2018916725254945e-08, "loss": 31.4091, "step": 479320 }, { "epoch": 0.9682769264333358, "grad_norm": 287.1968994140625, "learning_rate": 5.196870776817742e-08, "loss": 17.3582, "step": 479330 }, { "epoch": 0.9682971270660197, "grad_norm": 151.74069213867188, "learning_rate": 5.191852292720401e-08, "loss": 32.3567, "step": 479340 }, { "epoch": 0.9683173276987035, "grad_norm": 738.2186279296875, "learning_rate": 5.186836220257951e-08, "loss": 19.9806, "step": 479350 }, { "epoch": 0.9683375283313873, "grad_norm": 647.9827880859375, "learning_rate": 5.1818225594548185e-08, "loss": 12.0309, "step": 479360 }, { "epoch": 0.9683577289640711, "grad_norm": 210.44410705566406, "learning_rate": 5.176811310335539e-08, "loss": 19.8676, "step": 479370 }, { "epoch": 0.968377929596755, "grad_norm": 104.06281280517578, "learning_rate": 5.17180247292437e-08, "loss": 9.6487, "step": 479380 }, { "epoch": 0.9683981302294388, "grad_norm": 136.3513641357422, "learning_rate": 5.1667960472459034e-08, "loss": 13.6415, "step": 479390 }, { "epoch": 0.9684183308621226, "grad_norm": 104.18755340576172, "learning_rate": 5.161792033324398e-08, "loss": 12.012, "step": 479400 }, { "epoch": 0.9684385314948064, "grad_norm": 200.2335662841797, "learning_rate": 5.1567904311843886e-08, "loss": 15.0877, "step": 479410 }, { "epoch": 0.9684587321274902, "grad_norm": 316.23980712890625, "learning_rate": 5.151791240850079e-08, "loss": 14.0139, "step": 479420 }, { "epoch": 0.968478932760174, "grad_norm": 485.8829345703125, "learning_rate": 5.14679446234595e-08, "loss": 38.5259, "step": 479430 }, { "epoch": 0.9684991333928579, "grad_norm": 583.5144653320312, "learning_rate": 5.14180009569637e-08, "loss": 24.376, "step": 479440 }, { "epoch": 0.9685193340255417, "grad_norm": 827.423583984375, "learning_rate": 5.136808140925542e-08, "loss": 14.9787, "step": 479450 }, { "epoch": 0.9685395346582255, "grad_norm": 401.1725769042969, "learning_rate": 5.131818598057947e-08, "loss": 17.1449, "step": 479460 }, { "epoch": 0.9685597352909093, "grad_norm": 236.3235626220703, "learning_rate": 5.126831467117843e-08, "loss": 23.819, "step": 479470 }, { "epoch": 0.9685799359235931, "grad_norm": 396.2535400390625, "learning_rate": 5.121846748129544e-08, "loss": 20.7878, "step": 479480 }, { "epoch": 0.968600136556277, "grad_norm": 363.9278259277344, "learning_rate": 5.116864441117364e-08, "loss": 20.4867, "step": 479490 }, { "epoch": 0.9686203371889608, "grad_norm": 392.9410400390625, "learning_rate": 5.111884546105506e-08, "loss": 18.4554, "step": 479500 }, { "epoch": 0.9686405378216446, "grad_norm": 536.6632080078125, "learning_rate": 5.106907063118394e-08, "loss": 13.4975, "step": 479510 }, { "epoch": 0.9686607384543284, "grad_norm": 448.8122863769531, "learning_rate": 5.10193199218012e-08, "loss": 23.2407, "step": 479520 }, { "epoch": 0.9686809390870122, "grad_norm": 314.69012451171875, "learning_rate": 5.0969593333149994e-08, "loss": 17.3976, "step": 479530 }, { "epoch": 0.9687011397196961, "grad_norm": 206.2161407470703, "learning_rate": 5.091989086547289e-08, "loss": 20.0975, "step": 479540 }, { "epoch": 0.9687213403523799, "grad_norm": 59.227203369140625, "learning_rate": 5.0870212519012477e-08, "loss": 14.2258, "step": 479550 }, { "epoch": 0.9687415409850636, "grad_norm": 420.8604431152344, "learning_rate": 5.082055829400967e-08, "loss": 17.1765, "step": 479560 }, { "epoch": 0.9687617416177474, "grad_norm": 511.0311584472656, "learning_rate": 5.077092819070761e-08, "loss": 10.442, "step": 479570 }, { "epoch": 0.9687819422504312, "grad_norm": 241.3959503173828, "learning_rate": 5.072132220934722e-08, "loss": 23.6839, "step": 479580 }, { "epoch": 0.9688021428831151, "grad_norm": 407.5174865722656, "learning_rate": 5.067174035017164e-08, "loss": 11.5223, "step": 479590 }, { "epoch": 0.9688223435157989, "grad_norm": 148.66758728027344, "learning_rate": 5.062218261342122e-08, "loss": 15.6233, "step": 479600 }, { "epoch": 0.9688425441484827, "grad_norm": 478.1081237792969, "learning_rate": 5.0572648999338e-08, "loss": 10.8934, "step": 479610 }, { "epoch": 0.9688627447811665, "grad_norm": 410.79217529296875, "learning_rate": 5.052313950816401e-08, "loss": 14.1215, "step": 479620 }, { "epoch": 0.9688829454138503, "grad_norm": 486.24853515625, "learning_rate": 5.0473654140139604e-08, "loss": 23.106, "step": 479630 }, { "epoch": 0.9689031460465342, "grad_norm": 893.8436279296875, "learning_rate": 5.042419289550571e-08, "loss": 11.5406, "step": 479640 }, { "epoch": 0.968923346679218, "grad_norm": 1036.2552490234375, "learning_rate": 5.0374755774504346e-08, "loss": 23.9848, "step": 479650 }, { "epoch": 0.9689435473119018, "grad_norm": 652.051025390625, "learning_rate": 5.032534277737644e-08, "loss": 14.6965, "step": 479660 }, { "epoch": 0.9689637479445856, "grad_norm": 469.65863037109375, "learning_rate": 5.027595390436235e-08, "loss": 13.7001, "step": 479670 }, { "epoch": 0.9689839485772694, "grad_norm": 601.7528686523438, "learning_rate": 5.0226589155702445e-08, "loss": 16.1731, "step": 479680 }, { "epoch": 0.9690041492099533, "grad_norm": 601.6310424804688, "learning_rate": 5.017724853163819e-08, "loss": 29.1537, "step": 479690 }, { "epoch": 0.9690243498426371, "grad_norm": 367.8590393066406, "learning_rate": 5.012793203240995e-08, "loss": 13.7387, "step": 479700 }, { "epoch": 0.9690445504753209, "grad_norm": 397.095947265625, "learning_rate": 5.007863965825754e-08, "loss": 16.9001, "step": 479710 }, { "epoch": 0.9690647511080047, "grad_norm": 306.54229736328125, "learning_rate": 5.002937140942132e-08, "loss": 9.5529, "step": 479720 }, { "epoch": 0.9690849517406885, "grad_norm": 188.66578674316406, "learning_rate": 4.998012728614221e-08, "loss": 8.5547, "step": 479730 }, { "epoch": 0.9691051523733724, "grad_norm": 368.2060546875, "learning_rate": 4.99309072886589e-08, "loss": 19.5318, "step": 479740 }, { "epoch": 0.9691253530060562, "grad_norm": 2.3029627799987793, "learning_rate": 4.988171141721232e-08, "loss": 14.3379, "step": 479750 }, { "epoch": 0.96914555363874, "grad_norm": 339.0953674316406, "learning_rate": 4.983253967204171e-08, "loss": 30.6589, "step": 479760 }, { "epoch": 0.9691657542714238, "grad_norm": 335.7156982421875, "learning_rate": 4.9783392053386894e-08, "loss": 17.6085, "step": 479770 }, { "epoch": 0.9691859549041076, "grad_norm": 301.7350769042969, "learning_rate": 4.9734268561487665e-08, "loss": 14.41, "step": 479780 }, { "epoch": 0.9692061555367915, "grad_norm": 659.3392944335938, "learning_rate": 4.968516919658328e-08, "loss": 11.3555, "step": 479790 }, { "epoch": 0.9692263561694753, "grad_norm": 292.5453796386719, "learning_rate": 4.9636093958913e-08, "loss": 19.53, "step": 479800 }, { "epoch": 0.9692465568021591, "grad_norm": 249.79486083984375, "learning_rate": 4.958704284871552e-08, "loss": 17.1886, "step": 479810 }, { "epoch": 0.9692667574348428, "grad_norm": 254.720947265625, "learning_rate": 4.9538015866230636e-08, "loss": 19.8521, "step": 479820 }, { "epoch": 0.9692869580675266, "grad_norm": 330.8457946777344, "learning_rate": 4.948901301169706e-08, "loss": 10.1042, "step": 479830 }, { "epoch": 0.9693071587002104, "grad_norm": 244.03285217285156, "learning_rate": 4.944003428535349e-08, "loss": 18.2047, "step": 479840 }, { "epoch": 0.9693273593328943, "grad_norm": 638.8070068359375, "learning_rate": 4.939107968743917e-08, "loss": 16.9893, "step": 479850 }, { "epoch": 0.9693475599655781, "grad_norm": 152.32827758789062, "learning_rate": 4.9342149218191694e-08, "loss": 11.6009, "step": 479860 }, { "epoch": 0.9693677605982619, "grad_norm": 240.5481719970703, "learning_rate": 4.9293242877850866e-08, "loss": 14.4416, "step": 479870 }, { "epoch": 0.9693879612309457, "grad_norm": 169.4164276123047, "learning_rate": 4.9244360666653724e-08, "loss": 37.0584, "step": 479880 }, { "epoch": 0.9694081618636295, "grad_norm": 467.1522216796875, "learning_rate": 4.9195502584839516e-08, "loss": 30.0674, "step": 479890 }, { "epoch": 0.9694283624963134, "grad_norm": 606.3841552734375, "learning_rate": 4.914666863264528e-08, "loss": 13.857, "step": 479900 }, { "epoch": 0.9694485631289972, "grad_norm": 677.8084106445312, "learning_rate": 4.9097858810310815e-08, "loss": 20.1473, "step": 479910 }, { "epoch": 0.969468763761681, "grad_norm": 207.4750518798828, "learning_rate": 4.9049073118072057e-08, "loss": 24.7366, "step": 479920 }, { "epoch": 0.9694889643943648, "grad_norm": 444.7561950683594, "learning_rate": 4.900031155616769e-08, "loss": 21.5432, "step": 479930 }, { "epoch": 0.9695091650270486, "grad_norm": 69.67774963378906, "learning_rate": 4.8951574124835865e-08, "loss": 16.5166, "step": 479940 }, { "epoch": 0.9695293656597325, "grad_norm": 270.2462463378906, "learning_rate": 4.890286082431306e-08, "loss": 32.815, "step": 479950 }, { "epoch": 0.9695495662924163, "grad_norm": 338.75665283203125, "learning_rate": 4.885417165483741e-08, "loss": 17.1081, "step": 479960 }, { "epoch": 0.9695697669251001, "grad_norm": 434.7626037597656, "learning_rate": 4.880550661664541e-08, "loss": 21.953, "step": 479970 }, { "epoch": 0.9695899675577839, "grad_norm": 429.6672058105469, "learning_rate": 4.8756865709976284e-08, "loss": 15.7475, "step": 479980 }, { "epoch": 0.9696101681904677, "grad_norm": 552.9013061523438, "learning_rate": 4.8708248935064315e-08, "loss": 23.5726, "step": 479990 }, { "epoch": 0.9696303688231516, "grad_norm": 270.3870849609375, "learning_rate": 4.865965629214819e-08, "loss": 14.1504, "step": 480000 }, { "epoch": 0.9696505694558354, "grad_norm": 93.67717742919922, "learning_rate": 4.861108778146495e-08, "loss": 12.9809, "step": 480010 }, { "epoch": 0.9696707700885192, "grad_norm": 1092.2213134765625, "learning_rate": 4.856254340325051e-08, "loss": 17.7695, "step": 480020 }, { "epoch": 0.969690970721203, "grad_norm": 579.4368896484375, "learning_rate": 4.851402315774134e-08, "loss": 18.8982, "step": 480030 }, { "epoch": 0.9697111713538868, "grad_norm": 327.2518005371094, "learning_rate": 4.846552704517449e-08, "loss": 19.1692, "step": 480040 }, { "epoch": 0.9697313719865707, "grad_norm": 140.6658172607422, "learning_rate": 4.841705506578587e-08, "loss": 10.5863, "step": 480050 }, { "epoch": 0.9697515726192545, "grad_norm": 232.82550048828125, "learning_rate": 4.836860721981196e-08, "loss": 16.9834, "step": 480060 }, { "epoch": 0.9697717732519382, "grad_norm": 67.43278503417969, "learning_rate": 4.8320183507489236e-08, "loss": 12.7466, "step": 480070 }, { "epoch": 0.969791973884622, "grad_norm": 478.03912353515625, "learning_rate": 4.827178392905307e-08, "loss": 18.2566, "step": 480080 }, { "epoch": 0.9698121745173058, "grad_norm": 125.35701751708984, "learning_rate": 4.822340848473994e-08, "loss": 25.7218, "step": 480090 }, { "epoch": 0.9698323751499897, "grad_norm": 123.36390686035156, "learning_rate": 4.8175057174785766e-08, "loss": 14.2658, "step": 480100 }, { "epoch": 0.9698525757826735, "grad_norm": 257.4310607910156, "learning_rate": 4.81267299994248e-08, "loss": 7.4554, "step": 480110 }, { "epoch": 0.9698727764153573, "grad_norm": 543.759033203125, "learning_rate": 4.807842695889409e-08, "loss": 22.3655, "step": 480120 }, { "epoch": 0.9698929770480411, "grad_norm": 426.5550842285156, "learning_rate": 4.8030148053428424e-08, "loss": 17.6665, "step": 480130 }, { "epoch": 0.9699131776807249, "grad_norm": 199.07061767578125, "learning_rate": 4.798189328326319e-08, "loss": 17.6431, "step": 480140 }, { "epoch": 0.9699333783134088, "grad_norm": 161.0851287841797, "learning_rate": 4.793366264863375e-08, "loss": 16.4152, "step": 480150 }, { "epoch": 0.9699535789460926, "grad_norm": 404.6234436035156, "learning_rate": 4.788545614977491e-08, "loss": 16.7375, "step": 480160 }, { "epoch": 0.9699737795787764, "grad_norm": 1150.251708984375, "learning_rate": 4.783727378692205e-08, "loss": 21.3241, "step": 480170 }, { "epoch": 0.9699939802114602, "grad_norm": 497.9523620605469, "learning_rate": 4.778911556030885e-08, "loss": 15.1136, "step": 480180 }, { "epoch": 0.970014180844144, "grad_norm": 442.69622802734375, "learning_rate": 4.774098147017181e-08, "loss": 30.2108, "step": 480190 }, { "epoch": 0.9700343814768279, "grad_norm": 169.4849090576172, "learning_rate": 4.769287151674407e-08, "loss": 34.2275, "step": 480200 }, { "epoch": 0.9700545821095117, "grad_norm": 871.2241821289062, "learning_rate": 4.764478570026043e-08, "loss": 19.6836, "step": 480210 }, { "epoch": 0.9700747827421955, "grad_norm": 239.61862182617188, "learning_rate": 4.759672402095572e-08, "loss": 15.2729, "step": 480220 }, { "epoch": 0.9700949833748793, "grad_norm": 132.29234313964844, "learning_rate": 4.754868647906419e-08, "loss": 20.5158, "step": 480230 }, { "epoch": 0.9701151840075631, "grad_norm": 95.70121765136719, "learning_rate": 4.750067307481954e-08, "loss": 14.4823, "step": 480240 }, { "epoch": 0.970135384640247, "grad_norm": 423.11871337890625, "learning_rate": 4.7452683808456026e-08, "loss": 15.589, "step": 480250 }, { "epoch": 0.9701555852729308, "grad_norm": 675.8088989257812, "learning_rate": 4.740471868020735e-08, "loss": 16.2484, "step": 480260 }, { "epoch": 0.9701757859056146, "grad_norm": 734.3201904296875, "learning_rate": 4.735677769030722e-08, "loss": 32.3499, "step": 480270 }, { "epoch": 0.9701959865382984, "grad_norm": 365.7787780761719, "learning_rate": 4.730886083898989e-08, "loss": 22.4048, "step": 480280 }, { "epoch": 0.9702161871709822, "grad_norm": 247.57118225097656, "learning_rate": 4.726096812648795e-08, "loss": 19.5636, "step": 480290 }, { "epoch": 0.9702363878036661, "grad_norm": 520.8023071289062, "learning_rate": 4.7213099553035655e-08, "loss": 13.0253, "step": 480300 }, { "epoch": 0.9702565884363499, "grad_norm": 343.294189453125, "learning_rate": 4.716525511886616e-08, "loss": 29.1592, "step": 480310 }, { "epoch": 0.9702767890690337, "grad_norm": 219.90328979492188, "learning_rate": 4.711743482421205e-08, "loss": 22.095, "step": 480320 }, { "epoch": 0.9702969897017174, "grad_norm": 176.9662628173828, "learning_rate": 4.7069638669307026e-08, "loss": 16.2117, "step": 480330 }, { "epoch": 0.9703171903344012, "grad_norm": 482.5318603515625, "learning_rate": 4.702186665438424e-08, "loss": 15.1355, "step": 480340 }, { "epoch": 0.970337390967085, "grad_norm": 220.77462768554688, "learning_rate": 4.697411877967573e-08, "loss": 22.2234, "step": 480350 }, { "epoch": 0.9703575915997689, "grad_norm": 50.13962173461914, "learning_rate": 4.692639504541518e-08, "loss": 7.2214, "step": 480360 }, { "epoch": 0.9703777922324527, "grad_norm": 331.6729736328125, "learning_rate": 4.68786954518341e-08, "loss": 11.8504, "step": 480370 }, { "epoch": 0.9703979928651365, "grad_norm": 470.2253112792969, "learning_rate": 4.683101999916562e-08, "loss": 6.9637, "step": 480380 }, { "epoch": 0.9704181934978203, "grad_norm": 112.66553497314453, "learning_rate": 4.6783368687642325e-08, "loss": 11.6855, "step": 480390 }, { "epoch": 0.9704383941305041, "grad_norm": 532.448486328125, "learning_rate": 4.6735741517495715e-08, "loss": 25.7508, "step": 480400 }, { "epoch": 0.970458594763188, "grad_norm": 284.78729248046875, "learning_rate": 4.668813848895837e-08, "loss": 11.7805, "step": 480410 }, { "epoch": 0.9704787953958718, "grad_norm": 195.6524200439453, "learning_rate": 4.6640559602262325e-08, "loss": 14.5171, "step": 480420 }, { "epoch": 0.9704989960285556, "grad_norm": 151.35105895996094, "learning_rate": 4.6593004857639627e-08, "loss": 7.1785, "step": 480430 }, { "epoch": 0.9705191966612394, "grad_norm": 350.37542724609375, "learning_rate": 4.654547425532119e-08, "loss": 13.3357, "step": 480440 }, { "epoch": 0.9705393972939232, "grad_norm": 365.9966125488281, "learning_rate": 4.649796779554016e-08, "loss": 15.9065, "step": 480450 }, { "epoch": 0.9705595979266071, "grad_norm": 213.68614196777344, "learning_rate": 4.645048547852693e-08, "loss": 17.5773, "step": 480460 }, { "epoch": 0.9705797985592909, "grad_norm": 346.04425048828125, "learning_rate": 4.6403027304513513e-08, "loss": 9.6109, "step": 480470 }, { "epoch": 0.9705999991919747, "grad_norm": 422.04339599609375, "learning_rate": 4.635559327373029e-08, "loss": 14.3172, "step": 480480 }, { "epoch": 0.9706201998246585, "grad_norm": 148.32606506347656, "learning_rate": 4.6308183386409855e-08, "loss": 23.7274, "step": 480490 }, { "epoch": 0.9706404004573423, "grad_norm": 87.15348815917969, "learning_rate": 4.626079764278202e-08, "loss": 15.3824, "step": 480500 }, { "epoch": 0.9706606010900262, "grad_norm": 780.4072875976562, "learning_rate": 4.621343604307826e-08, "loss": 32.2571, "step": 480510 }, { "epoch": 0.97068080172271, "grad_norm": 604.56298828125, "learning_rate": 4.616609858753007e-08, "loss": 16.4787, "step": 480520 }, { "epoch": 0.9707010023553938, "grad_norm": 308.4506530761719, "learning_rate": 4.6118785276366706e-08, "loss": 19.7474, "step": 480530 }, { "epoch": 0.9707212029880776, "grad_norm": 34.33176040649414, "learning_rate": 4.6071496109819643e-08, "loss": 16.4853, "step": 480540 }, { "epoch": 0.9707414036207614, "grad_norm": 516.2884521484375, "learning_rate": 4.6024231088119266e-08, "loss": 20.3538, "step": 480550 }, { "epoch": 0.9707616042534453, "grad_norm": 333.0625305175781, "learning_rate": 4.597699021149649e-08, "loss": 19.302, "step": 480560 }, { "epoch": 0.9707818048861291, "grad_norm": 81.12806701660156, "learning_rate": 4.592977348018002e-08, "loss": 14.8552, "step": 480570 }, { "epoch": 0.9708020055188128, "grad_norm": 467.4275817871094, "learning_rate": 4.588258089440134e-08, "loss": 11.0226, "step": 480580 }, { "epoch": 0.9708222061514966, "grad_norm": 438.2637023925781, "learning_rate": 4.5835412454390823e-08, "loss": 16.0897, "step": 480590 }, { "epoch": 0.9708424067841804, "grad_norm": 253.96768188476562, "learning_rate": 4.578826816037718e-08, "loss": 27.9838, "step": 480600 }, { "epoch": 0.9708626074168643, "grad_norm": 300.85296630859375, "learning_rate": 4.574114801259022e-08, "loss": 13.6882, "step": 480610 }, { "epoch": 0.9708828080495481, "grad_norm": 462.7916259765625, "learning_rate": 4.569405201126087e-08, "loss": 32.8059, "step": 480620 }, { "epoch": 0.9709030086822319, "grad_norm": 599.6968383789062, "learning_rate": 4.5646980156617284e-08, "loss": 14.333, "step": 480630 }, { "epoch": 0.9709232093149157, "grad_norm": 571.0783081054688, "learning_rate": 4.5599932448889276e-08, "loss": 27.9993, "step": 480640 }, { "epoch": 0.9709434099475995, "grad_norm": 412.3912658691406, "learning_rate": 4.5552908888306654e-08, "loss": 13.605, "step": 480650 }, { "epoch": 0.9709636105802834, "grad_norm": 659.2654418945312, "learning_rate": 4.5505909475098144e-08, "loss": 21.4791, "step": 480660 }, { "epoch": 0.9709838112129672, "grad_norm": 343.37347412109375, "learning_rate": 4.545893420949299e-08, "loss": 17.2477, "step": 480670 }, { "epoch": 0.971004011845651, "grad_norm": 263.5906982421875, "learning_rate": 4.5411983091719905e-08, "loss": 10.5555, "step": 480680 }, { "epoch": 0.9710242124783348, "grad_norm": 128.11795043945312, "learning_rate": 4.5365056122007586e-08, "loss": 23.7357, "step": 480690 }, { "epoch": 0.9710444131110186, "grad_norm": 292.9198913574219, "learning_rate": 4.531815330058586e-08, "loss": 20.5026, "step": 480700 }, { "epoch": 0.9710646137437025, "grad_norm": 434.9056091308594, "learning_rate": 4.527127462768233e-08, "loss": 17.0049, "step": 480710 }, { "epoch": 0.9710848143763863, "grad_norm": 501.1333923339844, "learning_rate": 4.5224420103525125e-08, "loss": 18.9389, "step": 480720 }, { "epoch": 0.9711050150090701, "grad_norm": 255.53147888183594, "learning_rate": 4.517758972834352e-08, "loss": 18.6333, "step": 480730 }, { "epoch": 0.9711252156417539, "grad_norm": 63.87846755981445, "learning_rate": 4.5130783502365106e-08, "loss": 12.9465, "step": 480740 }, { "epoch": 0.9711454162744377, "grad_norm": 277.8436279296875, "learning_rate": 4.508400142581859e-08, "loss": 15.9341, "step": 480750 }, { "epoch": 0.9711656169071216, "grad_norm": 372.5913391113281, "learning_rate": 4.503724349893157e-08, "loss": 22.5148, "step": 480760 }, { "epoch": 0.9711858175398054, "grad_norm": 550.0427856445312, "learning_rate": 4.49905097219322e-08, "loss": 22.9235, "step": 480770 }, { "epoch": 0.9712060181724892, "grad_norm": 700.8040161132812, "learning_rate": 4.4943800095048615e-08, "loss": 19.4991, "step": 480780 }, { "epoch": 0.971226218805173, "grad_norm": 490.8512878417969, "learning_rate": 4.4897114618506765e-08, "loss": 19.7064, "step": 480790 }, { "epoch": 0.9712464194378568, "grad_norm": 342.76123046875, "learning_rate": 4.485045329253646e-08, "loss": 17.5235, "step": 480800 }, { "epoch": 0.9712666200705407, "grad_norm": 430.11834716796875, "learning_rate": 4.480381611736362e-08, "loss": 14.5157, "step": 480810 }, { "epoch": 0.9712868207032245, "grad_norm": 151.7615966796875, "learning_rate": 4.4757203093215854e-08, "loss": 17.9508, "step": 480820 }, { "epoch": 0.9713070213359083, "grad_norm": 129.25218200683594, "learning_rate": 4.4710614220320746e-08, "loss": 9.7335, "step": 480830 }, { "epoch": 0.971327221968592, "grad_norm": 260.68408203125, "learning_rate": 4.4664049498904796e-08, "loss": 12.1655, "step": 480840 }, { "epoch": 0.9713474226012758, "grad_norm": 498.87469482421875, "learning_rate": 4.4617508929195585e-08, "loss": 14.879, "step": 480850 }, { "epoch": 0.9713676232339596, "grad_norm": 176.5625762939453, "learning_rate": 4.457099251141961e-08, "loss": 7.3391, "step": 480860 }, { "epoch": 0.9713878238666435, "grad_norm": 308.27056884765625, "learning_rate": 4.4524500245803346e-08, "loss": 15.3614, "step": 480870 }, { "epoch": 0.9714080244993273, "grad_norm": 322.5669250488281, "learning_rate": 4.4478032132573845e-08, "loss": 18.5765, "step": 480880 }, { "epoch": 0.9714282251320111, "grad_norm": 428.40423583984375, "learning_rate": 4.443158817195703e-08, "loss": 37.4654, "step": 480890 }, { "epoch": 0.9714484257646949, "grad_norm": 14.182168006896973, "learning_rate": 4.438516836417994e-08, "loss": 19.9014, "step": 480900 }, { "epoch": 0.9714686263973787, "grad_norm": 493.1097717285156, "learning_rate": 4.4338772709468514e-08, "loss": 14.3817, "step": 480910 }, { "epoch": 0.9714888270300626, "grad_norm": 235.4907684326172, "learning_rate": 4.429240120804923e-08, "loss": 31.6696, "step": 480920 }, { "epoch": 0.9715090276627464, "grad_norm": 473.29937744140625, "learning_rate": 4.424605386014691e-08, "loss": 26.5514, "step": 480930 }, { "epoch": 0.9715292282954302, "grad_norm": 496.5303955078125, "learning_rate": 4.4199730665988594e-08, "loss": 15.9515, "step": 480940 }, { "epoch": 0.971549428928114, "grad_norm": 189.34280395507812, "learning_rate": 4.415343162580022e-08, "loss": 11.0705, "step": 480950 }, { "epoch": 0.9715696295607978, "grad_norm": 182.630126953125, "learning_rate": 4.4107156739806037e-08, "loss": 17.192, "step": 480960 }, { "epoch": 0.9715898301934817, "grad_norm": 19.358842849731445, "learning_rate": 4.40609060082331e-08, "loss": 11.0745, "step": 480970 }, { "epoch": 0.9716100308261655, "grad_norm": 249.55409240722656, "learning_rate": 4.401467943130622e-08, "loss": 13.5404, "step": 480980 }, { "epoch": 0.9716302314588493, "grad_norm": 409.1258544921875, "learning_rate": 4.3968477009250775e-08, "loss": 19.5579, "step": 480990 }, { "epoch": 0.9716504320915331, "grad_norm": 214.1861114501953, "learning_rate": 4.392229874229159e-08, "loss": 17.2364, "step": 481000 }, { "epoch": 0.9716706327242169, "grad_norm": 208.24266052246094, "learning_rate": 4.387614463065404e-08, "loss": 26.5228, "step": 481010 }, { "epoch": 0.9716908333569008, "grad_norm": 916.2860107421875, "learning_rate": 4.383001467456294e-08, "loss": 21.4474, "step": 481020 }, { "epoch": 0.9717110339895846, "grad_norm": 1262.3741455078125, "learning_rate": 4.378390887424366e-08, "loss": 20.0141, "step": 481030 }, { "epoch": 0.9717312346222684, "grad_norm": 484.7181701660156, "learning_rate": 4.3737827229919926e-08, "loss": 24.9658, "step": 481040 }, { "epoch": 0.9717514352549522, "grad_norm": 199.1392822265625, "learning_rate": 4.36917697418171e-08, "loss": 13.892, "step": 481050 }, { "epoch": 0.971771635887636, "grad_norm": 56.355411529541016, "learning_rate": 4.364573641016001e-08, "loss": 16.0134, "step": 481060 }, { "epoch": 0.9717918365203199, "grad_norm": 294.04638671875, "learning_rate": 4.359972723517236e-08, "loss": 25.3006, "step": 481070 }, { "epoch": 0.9718120371530037, "grad_norm": 230.2291259765625, "learning_rate": 4.3553742217077866e-08, "loss": 15.2554, "step": 481080 }, { "epoch": 0.9718322377856875, "grad_norm": 100.92296600341797, "learning_rate": 4.350778135610134e-08, "loss": 11.8245, "step": 481090 }, { "epoch": 0.9718524384183712, "grad_norm": 3.7475879192352295, "learning_rate": 4.346184465246761e-08, "loss": 15.6748, "step": 481100 }, { "epoch": 0.971872639051055, "grad_norm": 409.6090087890625, "learning_rate": 4.3415932106398715e-08, "loss": 20.7157, "step": 481110 }, { "epoch": 0.9718928396837389, "grad_norm": 396.5038757324219, "learning_rate": 4.3370043718119484e-08, "loss": 14.2519, "step": 481120 }, { "epoch": 0.9719130403164227, "grad_norm": 455.0494689941406, "learning_rate": 4.332417948785417e-08, "loss": 20.1041, "step": 481130 }, { "epoch": 0.9719332409491065, "grad_norm": 298.5553283691406, "learning_rate": 4.327833941582538e-08, "loss": 19.1641, "step": 481140 }, { "epoch": 0.9719534415817903, "grad_norm": 259.3094177246094, "learning_rate": 4.3232523502256264e-08, "loss": 30.4921, "step": 481150 }, { "epoch": 0.9719736422144741, "grad_norm": 36.026588439941406, "learning_rate": 4.318673174737109e-08, "loss": 8.7467, "step": 481160 }, { "epoch": 0.971993842847158, "grad_norm": 358.4710693359375, "learning_rate": 4.3140964151393015e-08, "loss": 9.7279, "step": 481170 }, { "epoch": 0.9720140434798418, "grad_norm": 439.7259216308594, "learning_rate": 4.3095220714544084e-08, "loss": 13.9889, "step": 481180 }, { "epoch": 0.9720342441125256, "grad_norm": 202.249755859375, "learning_rate": 4.304950143704745e-08, "loss": 32.0029, "step": 481190 }, { "epoch": 0.9720544447452094, "grad_norm": 282.92864990234375, "learning_rate": 4.3003806319127376e-08, "loss": 13.5526, "step": 481200 }, { "epoch": 0.9720746453778932, "grad_norm": 222.46804809570312, "learning_rate": 4.2958135361004794e-08, "loss": 14.219, "step": 481210 }, { "epoch": 0.972094846010577, "grad_norm": 247.01966857910156, "learning_rate": 4.291248856290342e-08, "loss": 22.9882, "step": 481220 }, { "epoch": 0.9721150466432609, "grad_norm": 363.4715576171875, "learning_rate": 4.28668659250453e-08, "loss": 19.1361, "step": 481230 }, { "epoch": 0.9721352472759447, "grad_norm": 279.6907043457031, "learning_rate": 4.282126744765247e-08, "loss": 18.316, "step": 481240 }, { "epoch": 0.9721554479086285, "grad_norm": 236.50823974609375, "learning_rate": 4.2775693130948094e-08, "loss": 33.653, "step": 481250 }, { "epoch": 0.9721756485413123, "grad_norm": 0.12165041267871857, "learning_rate": 4.2730142975153654e-08, "loss": 23.2239, "step": 481260 }, { "epoch": 0.9721958491739962, "grad_norm": 269.12451171875, "learning_rate": 4.26846169804912e-08, "loss": 7.2478, "step": 481270 }, { "epoch": 0.97221604980668, "grad_norm": 169.82176208496094, "learning_rate": 4.263911514718222e-08, "loss": 23.8434, "step": 481280 }, { "epoch": 0.9722362504393638, "grad_norm": 1032.0400390625, "learning_rate": 4.259363747544931e-08, "loss": 26.5229, "step": 481290 }, { "epoch": 0.9722564510720476, "grad_norm": 326.4361267089844, "learning_rate": 4.2548183965513415e-08, "loss": 19.0609, "step": 481300 }, { "epoch": 0.9722766517047314, "grad_norm": 200.75393676757812, "learning_rate": 4.250275461759712e-08, "loss": 22.7418, "step": 481310 }, { "epoch": 0.9722968523374153, "grad_norm": 357.01751708984375, "learning_rate": 4.245734943192081e-08, "loss": 15.4631, "step": 481320 }, { "epoch": 0.9723170529700991, "grad_norm": 334.8346862792969, "learning_rate": 4.241196840870598e-08, "loss": 16.0759, "step": 481330 }, { "epoch": 0.9723372536027829, "grad_norm": 252.5950469970703, "learning_rate": 4.236661154817412e-08, "loss": 5.5812, "step": 481340 }, { "epoch": 0.9723574542354666, "grad_norm": 310.70068359375, "learning_rate": 4.23212788505456e-08, "loss": 17.8986, "step": 481350 }, { "epoch": 0.9723776548681504, "grad_norm": 362.728515625, "learning_rate": 4.227597031604247e-08, "loss": 13.8651, "step": 481360 }, { "epoch": 0.9723978555008342, "grad_norm": 718.6262817382812, "learning_rate": 4.2230685944884554e-08, "loss": 31.8578, "step": 481370 }, { "epoch": 0.9724180561335181, "grad_norm": 399.081787109375, "learning_rate": 4.218542573729334e-08, "loss": 20.2966, "step": 481380 }, { "epoch": 0.9724382567662019, "grad_norm": 261.89031982421875, "learning_rate": 4.2140189693488654e-08, "loss": 30.2808, "step": 481390 }, { "epoch": 0.9724584573988857, "grad_norm": 309.1327209472656, "learning_rate": 4.209497781369143e-08, "loss": 7.3983, "step": 481400 }, { "epoch": 0.9724786580315695, "grad_norm": 325.05633544921875, "learning_rate": 4.20497900981226e-08, "loss": 17.7839, "step": 481410 }, { "epoch": 0.9724988586642533, "grad_norm": 60.344329833984375, "learning_rate": 4.2004626547000885e-08, "loss": 11.7354, "step": 481420 }, { "epoch": 0.9725190592969372, "grad_norm": 437.69622802734375, "learning_rate": 4.195948716054776e-08, "loss": 23.6131, "step": 481430 }, { "epoch": 0.972539259929621, "grad_norm": 432.4437561035156, "learning_rate": 4.191437193898251e-08, "loss": 26.2536, "step": 481440 }, { "epoch": 0.9725594605623048, "grad_norm": 393.9098205566406, "learning_rate": 4.1869280882525506e-08, "loss": 33.5293, "step": 481450 }, { "epoch": 0.9725796611949886, "grad_norm": 126.11160278320312, "learning_rate": 4.1824213991396024e-08, "loss": 13.8136, "step": 481460 }, { "epoch": 0.9725998618276724, "grad_norm": 60.79416275024414, "learning_rate": 4.1779171265814435e-08, "loss": 10.5208, "step": 481470 }, { "epoch": 0.9726200624603563, "grad_norm": 189.42037963867188, "learning_rate": 4.173415270599945e-08, "loss": 21.2066, "step": 481480 }, { "epoch": 0.9726402630930401, "grad_norm": 373.3421630859375, "learning_rate": 4.168915831217091e-08, "loss": 14.9442, "step": 481490 }, { "epoch": 0.9726604637257239, "grad_norm": 282.920166015625, "learning_rate": 4.164418808454806e-08, "loss": 16.3627, "step": 481500 }, { "epoch": 0.9726806643584077, "grad_norm": 251.2848358154297, "learning_rate": 4.159924202334964e-08, "loss": 21.8468, "step": 481510 }, { "epoch": 0.9727008649910915, "grad_norm": 81.56550598144531, "learning_rate": 4.1554320128795455e-08, "loss": 13.4286, "step": 481520 }, { "epoch": 0.9727210656237754, "grad_norm": 390.93798828125, "learning_rate": 4.150942240110478e-08, "loss": 13.1788, "step": 481530 }, { "epoch": 0.9727412662564592, "grad_norm": 456.1376953125, "learning_rate": 4.146454884049467e-08, "loss": 22.1921, "step": 481540 }, { "epoch": 0.972761466889143, "grad_norm": 565.8245849609375, "learning_rate": 4.1419699447186045e-08, "loss": 55.9171, "step": 481550 }, { "epoch": 0.9727816675218268, "grad_norm": 669.5379028320312, "learning_rate": 4.137487422139541e-08, "loss": 24.39, "step": 481560 }, { "epoch": 0.9728018681545106, "grad_norm": 78.6161117553711, "learning_rate": 4.133007316334259e-08, "loss": 13.201, "step": 481570 }, { "epoch": 0.9728220687871945, "grad_norm": 78.4344711303711, "learning_rate": 4.128529627324573e-08, "loss": 20.3774, "step": 481580 }, { "epoch": 0.9728422694198783, "grad_norm": 380.95587158203125, "learning_rate": 4.124054355132301e-08, "loss": 12.9402, "step": 481590 }, { "epoch": 0.9728624700525621, "grad_norm": 485.6889343261719, "learning_rate": 4.1195814997792014e-08, "loss": 11.6911, "step": 481600 }, { "epoch": 0.9728826706852458, "grad_norm": 471.99139404296875, "learning_rate": 4.1151110612872023e-08, "loss": 18.3957, "step": 481610 }, { "epoch": 0.9729028713179296, "grad_norm": 628.6807861328125, "learning_rate": 4.1106430396778974e-08, "loss": 30.9745, "step": 481620 }, { "epoch": 0.9729230719506134, "grad_norm": 203.78421020507812, "learning_rate": 4.1061774349732686e-08, "loss": 12.6072, "step": 481630 }, { "epoch": 0.9729432725832973, "grad_norm": 325.5924987792969, "learning_rate": 4.10171424719491e-08, "loss": 25.4391, "step": 481640 }, { "epoch": 0.9729634732159811, "grad_norm": 293.34912109375, "learning_rate": 4.097253476364693e-08, "loss": 35.9362, "step": 481650 }, { "epoch": 0.9729836738486649, "grad_norm": 170.9857177734375, "learning_rate": 4.092795122504323e-08, "loss": 17.5913, "step": 481660 }, { "epoch": 0.9730038744813487, "grad_norm": 682.1256103515625, "learning_rate": 4.088339185635504e-08, "loss": 13.5609, "step": 481670 }, { "epoch": 0.9730240751140325, "grad_norm": 350.8077087402344, "learning_rate": 4.083885665779996e-08, "loss": 23.3654, "step": 481680 }, { "epoch": 0.9730442757467164, "grad_norm": 403.890625, "learning_rate": 4.07943456295945e-08, "loss": 25.1687, "step": 481690 }, { "epoch": 0.9730644763794002, "grad_norm": 0.18735237419605255, "learning_rate": 4.0749858771956253e-08, "loss": 10.6021, "step": 481700 }, { "epoch": 0.973084677012084, "grad_norm": 451.27435302734375, "learning_rate": 4.070539608510171e-08, "loss": 25.175, "step": 481710 }, { "epoch": 0.9731048776447678, "grad_norm": 744.0653686523438, "learning_rate": 4.066095756924682e-08, "loss": 34.801, "step": 481720 }, { "epoch": 0.9731250782774516, "grad_norm": 14.12344741821289, "learning_rate": 4.061654322460973e-08, "loss": 19.8248, "step": 481730 }, { "epoch": 0.9731452789101355, "grad_norm": 167.87937927246094, "learning_rate": 4.0572153051406383e-08, "loss": 15.7398, "step": 481740 }, { "epoch": 0.9731654795428193, "grad_norm": 193.6814727783203, "learning_rate": 4.052778704985216e-08, "loss": 25.9701, "step": 481750 }, { "epoch": 0.9731856801755031, "grad_norm": 437.42242431640625, "learning_rate": 4.048344522016356e-08, "loss": 21.1084, "step": 481760 }, { "epoch": 0.9732058808081869, "grad_norm": 628.2918090820312, "learning_rate": 4.043912756255819e-08, "loss": 19.3399, "step": 481770 }, { "epoch": 0.9732260814408707, "grad_norm": 197.848876953125, "learning_rate": 4.039483407725031e-08, "loss": 17.7387, "step": 481780 }, { "epoch": 0.9732462820735546, "grad_norm": 301.0892333984375, "learning_rate": 4.035056476445698e-08, "loss": 24.283, "step": 481790 }, { "epoch": 0.9732664827062384, "grad_norm": 535.1521606445312, "learning_rate": 4.030631962439302e-08, "loss": 13.416, "step": 481800 }, { "epoch": 0.9732866833389222, "grad_norm": 199.53504943847656, "learning_rate": 4.026209865727493e-08, "loss": 22.0506, "step": 481810 }, { "epoch": 0.973306883971606, "grad_norm": 77.13323974609375, "learning_rate": 4.0217901863317534e-08, "loss": 12.3341, "step": 481820 }, { "epoch": 0.9733270846042898, "grad_norm": 553.6863403320312, "learning_rate": 4.017372924273621e-08, "loss": 25.0607, "step": 481830 }, { "epoch": 0.9733472852369737, "grad_norm": 112.62586975097656, "learning_rate": 4.012958079574747e-08, "loss": 17.1677, "step": 481840 }, { "epoch": 0.9733674858696575, "grad_norm": 160.67294311523438, "learning_rate": 4.008545652256502e-08, "loss": 16.0821, "step": 481850 }, { "epoch": 0.9733876865023412, "grad_norm": 88.95194244384766, "learning_rate": 4.004135642340423e-08, "loss": 14.0948, "step": 481860 }, { "epoch": 0.973407887135025, "grad_norm": 199.16061401367188, "learning_rate": 3.999728049848106e-08, "loss": 12.0725, "step": 481870 }, { "epoch": 0.9734280877677088, "grad_norm": 470.1341247558594, "learning_rate": 3.995322874800922e-08, "loss": 26.116, "step": 481880 }, { "epoch": 0.9734482884003927, "grad_norm": 118.9264907836914, "learning_rate": 3.9909201172203537e-08, "loss": 18.3537, "step": 481890 }, { "epoch": 0.9734684890330765, "grad_norm": 12.092803001403809, "learning_rate": 3.986519777127884e-08, "loss": 10.6664, "step": 481900 }, { "epoch": 0.9734886896657603, "grad_norm": 42.88130187988281, "learning_rate": 3.9821218545449956e-08, "loss": 9.6017, "step": 481910 }, { "epoch": 0.9735088902984441, "grad_norm": 111.88728332519531, "learning_rate": 3.977726349493061e-08, "loss": 16.497, "step": 481920 }, { "epoch": 0.9735290909311279, "grad_norm": 265.8011779785156, "learning_rate": 3.973333261993506e-08, "loss": 30.0345, "step": 481930 }, { "epoch": 0.9735492915638118, "grad_norm": 7.396425724029541, "learning_rate": 3.9689425920678146e-08, "loss": 31.8613, "step": 481940 }, { "epoch": 0.9735694921964956, "grad_norm": 209.86404418945312, "learning_rate": 3.964554339737303e-08, "loss": 39.5427, "step": 481950 }, { "epoch": 0.9735896928291794, "grad_norm": 909.5701293945312, "learning_rate": 3.960168505023343e-08, "loss": 30.6708, "step": 481960 }, { "epoch": 0.9736098934618632, "grad_norm": 885.4310302734375, "learning_rate": 3.955785087947473e-08, "loss": 17.3833, "step": 481970 }, { "epoch": 0.973630094094547, "grad_norm": 640.5752563476562, "learning_rate": 3.951404088530841e-08, "loss": 25.2004, "step": 481980 }, { "epoch": 0.9736502947272309, "grad_norm": 345.5538635253906, "learning_rate": 3.947025506794933e-08, "loss": 25.903, "step": 481990 }, { "epoch": 0.9736704953599147, "grad_norm": 286.027099609375, "learning_rate": 3.9426493427611177e-08, "loss": 9.783, "step": 482000 }, { "epoch": 0.9736906959925985, "grad_norm": 334.16693115234375, "learning_rate": 3.938275596450603e-08, "loss": 17.6792, "step": 482010 }, { "epoch": 0.9737108966252823, "grad_norm": 750.4974975585938, "learning_rate": 3.933904267884758e-08, "loss": 16.873, "step": 482020 }, { "epoch": 0.9737310972579661, "grad_norm": 354.44659423828125, "learning_rate": 3.929535357084957e-08, "loss": 33.9045, "step": 482030 }, { "epoch": 0.97375129789065, "grad_norm": 163.46725463867188, "learning_rate": 3.925168864072348e-08, "loss": 12.3901, "step": 482040 }, { "epoch": 0.9737714985233338, "grad_norm": 357.58935546875, "learning_rate": 3.9208047888683597e-08, "loss": 8.9686, "step": 482050 }, { "epoch": 0.9737916991560176, "grad_norm": 256.0655517578125, "learning_rate": 3.9164431314941965e-08, "loss": 18.6118, "step": 482060 }, { "epoch": 0.9738118997887014, "grad_norm": 475.1147155761719, "learning_rate": 3.912083891971119e-08, "loss": 14.304, "step": 482070 }, { "epoch": 0.9738321004213852, "grad_norm": 621.0291137695312, "learning_rate": 3.907727070320389e-08, "loss": 17.7575, "step": 482080 }, { "epoch": 0.9738523010540691, "grad_norm": 294.4523010253906, "learning_rate": 3.9033726665632096e-08, "loss": 11.2396, "step": 482090 }, { "epoch": 0.9738725016867529, "grad_norm": 306.70318603515625, "learning_rate": 3.899020680720844e-08, "loss": 18.0931, "step": 482100 }, { "epoch": 0.9738927023194367, "grad_norm": 857.6680297851562, "learning_rate": 3.894671112814441e-08, "loss": 12.2714, "step": 482110 }, { "epoch": 0.9739129029521204, "grad_norm": 496.441162109375, "learning_rate": 3.8903239628652615e-08, "loss": 12.2289, "step": 482120 }, { "epoch": 0.9739331035848042, "grad_norm": 143.51065063476562, "learning_rate": 3.88597923089451e-08, "loss": 20.1973, "step": 482130 }, { "epoch": 0.973953304217488, "grad_norm": 1033.66796875, "learning_rate": 3.881636916923281e-08, "loss": 20.2224, "step": 482140 }, { "epoch": 0.9739735048501719, "grad_norm": 485.4521179199219, "learning_rate": 3.877297020972781e-08, "loss": 24.38, "step": 482150 }, { "epoch": 0.9739937054828557, "grad_norm": 167.51791381835938, "learning_rate": 3.8729595430641586e-08, "loss": 16.1247, "step": 482160 }, { "epoch": 0.9740139061155395, "grad_norm": 431.7303771972656, "learning_rate": 3.868624483218619e-08, "loss": 22.341, "step": 482170 }, { "epoch": 0.9740341067482233, "grad_norm": 76.6075439453125, "learning_rate": 3.864291841457146e-08, "loss": 22.8067, "step": 482180 }, { "epoch": 0.9740543073809071, "grad_norm": 313.4910583496094, "learning_rate": 3.859961617801e-08, "loss": 23.8506, "step": 482190 }, { "epoch": 0.974074508013591, "grad_norm": 1129.052001953125, "learning_rate": 3.855633812271165e-08, "loss": 20.4741, "step": 482200 }, { "epoch": 0.9740947086462748, "grad_norm": 457.9103698730469, "learning_rate": 3.8513084248888445e-08, "loss": 23.1593, "step": 482210 }, { "epoch": 0.9741149092789586, "grad_norm": 453.9139404296875, "learning_rate": 3.8469854556750785e-08, "loss": 18.997, "step": 482220 }, { "epoch": 0.9741351099116424, "grad_norm": 312.4790954589844, "learning_rate": 3.842664904650906e-08, "loss": 20.262, "step": 482230 }, { "epoch": 0.9741553105443262, "grad_norm": 347.3577575683594, "learning_rate": 3.83834677183742e-08, "loss": 28.9999, "step": 482240 }, { "epoch": 0.9741755111770101, "grad_norm": 345.5404357910156, "learning_rate": 3.83403105725566e-08, "loss": 12.5937, "step": 482250 }, { "epoch": 0.9741957118096939, "grad_norm": 311.71612548828125, "learning_rate": 3.82971776092661e-08, "loss": 17.8435, "step": 482260 }, { "epoch": 0.9742159124423777, "grad_norm": 484.7486267089844, "learning_rate": 3.825406882871363e-08, "loss": 12.1719, "step": 482270 }, { "epoch": 0.9742361130750615, "grad_norm": 942.969970703125, "learning_rate": 3.8210984231109583e-08, "loss": 15.9121, "step": 482280 }, { "epoch": 0.9742563137077453, "grad_norm": 518.7152099609375, "learning_rate": 3.816792381666268e-08, "loss": 13.4377, "step": 482290 }, { "epoch": 0.9742765143404292, "grad_norm": 464.7488708496094, "learning_rate": 3.812488758558386e-08, "loss": 41.7322, "step": 482300 }, { "epoch": 0.974296714973113, "grad_norm": 411.00128173828125, "learning_rate": 3.8081875538082404e-08, "loss": 18.5426, "step": 482310 }, { "epoch": 0.9743169156057968, "grad_norm": 474.0198974609375, "learning_rate": 3.8038887674368697e-08, "loss": 17.4803, "step": 482320 }, { "epoch": 0.9743371162384806, "grad_norm": 637.5534057617188, "learning_rate": 3.799592399465091e-08, "loss": 20.2909, "step": 482330 }, { "epoch": 0.9743573168711644, "grad_norm": 397.6789245605469, "learning_rate": 3.7952984499138864e-08, "loss": 19.7161, "step": 482340 }, { "epoch": 0.9743775175038483, "grad_norm": 28.488502502441406, "learning_rate": 3.791006918804296e-08, "loss": 16.1842, "step": 482350 }, { "epoch": 0.9743977181365321, "grad_norm": 123.11540222167969, "learning_rate": 3.786717806157136e-08, "loss": 16.763, "step": 482360 }, { "epoch": 0.9744179187692159, "grad_norm": 240.61846923828125, "learning_rate": 3.782431111993279e-08, "loss": 27.4726, "step": 482370 }, { "epoch": 0.9744381194018996, "grad_norm": 424.6113586425781, "learning_rate": 3.778146836333707e-08, "loss": 18.4896, "step": 482380 }, { "epoch": 0.9744583200345834, "grad_norm": 316.656494140625, "learning_rate": 3.7738649791992934e-08, "loss": 17.3348, "step": 482390 }, { "epoch": 0.9744785206672673, "grad_norm": 666.3976440429688, "learning_rate": 3.769585540610799e-08, "loss": 24.009, "step": 482400 }, { "epoch": 0.9744987212999511, "grad_norm": 764.1138916015625, "learning_rate": 3.765308520589206e-08, "loss": 63.306, "step": 482410 }, { "epoch": 0.9745189219326349, "grad_norm": 311.10931396484375, "learning_rate": 3.761033919155333e-08, "loss": 17.2908, "step": 482420 }, { "epoch": 0.9745391225653187, "grad_norm": 347.3961486816406, "learning_rate": 3.7567617363299945e-08, "loss": 18.8477, "step": 482430 }, { "epoch": 0.9745593231980025, "grad_norm": 341.66497802734375, "learning_rate": 3.7524919721339535e-08, "loss": 16.1301, "step": 482440 }, { "epoch": 0.9745795238306864, "grad_norm": 432.0102233886719, "learning_rate": 3.748224626588137e-08, "loss": 30.7898, "step": 482450 }, { "epoch": 0.9745997244633702, "grad_norm": 666.8880615234375, "learning_rate": 3.743959699713251e-08, "loss": 43.5052, "step": 482460 }, { "epoch": 0.974619925096054, "grad_norm": 560.2584838867188, "learning_rate": 3.739697191530112e-08, "loss": 19.5189, "step": 482470 }, { "epoch": 0.9746401257287378, "grad_norm": 146.7061309814453, "learning_rate": 3.735437102059536e-08, "loss": 13.0949, "step": 482480 }, { "epoch": 0.9746603263614216, "grad_norm": 568.6498413085938, "learning_rate": 3.731179431322285e-08, "loss": 23.3692, "step": 482490 }, { "epoch": 0.9746805269941055, "grad_norm": 178.3436279296875, "learning_rate": 3.726924179339009e-08, "loss": 16.9201, "step": 482500 }, { "epoch": 0.9747007276267893, "grad_norm": 83.3872299194336, "learning_rate": 3.7226713461305245e-08, "loss": 24.7302, "step": 482510 }, { "epoch": 0.9747209282594731, "grad_norm": 812.8739624023438, "learning_rate": 3.7184209317175366e-08, "loss": 37.4827, "step": 482520 }, { "epoch": 0.9747411288921569, "grad_norm": 431.8472595214844, "learning_rate": 3.714172936120808e-08, "loss": 20.2624, "step": 482530 }, { "epoch": 0.9747613295248407, "grad_norm": 129.54502868652344, "learning_rate": 3.7099273593609316e-08, "loss": 14.5634, "step": 482540 }, { "epoch": 0.9747815301575246, "grad_norm": 368.3341979980469, "learning_rate": 3.7056842014587815e-08, "loss": 23.7714, "step": 482550 }, { "epoch": 0.9748017307902084, "grad_norm": 510.84796142578125, "learning_rate": 3.701443462434895e-08, "loss": 10.4416, "step": 482560 }, { "epoch": 0.9748219314228922, "grad_norm": 61.4462890625, "learning_rate": 3.697205142309923e-08, "loss": 26.5968, "step": 482570 }, { "epoch": 0.974842132055576, "grad_norm": 402.5279235839844, "learning_rate": 3.692969241104683e-08, "loss": 15.7557, "step": 482580 }, { "epoch": 0.9748623326882598, "grad_norm": 422.69580078125, "learning_rate": 3.688735758839601e-08, "loss": 8.5926, "step": 482590 }, { "epoch": 0.9748825333209437, "grad_norm": 440.9681396484375, "learning_rate": 3.684504695535496e-08, "loss": 19.7904, "step": 482600 }, { "epoch": 0.9749027339536275, "grad_norm": 778.0719604492188, "learning_rate": 3.680276051212961e-08, "loss": 17.826, "step": 482610 }, { "epoch": 0.9749229345863113, "grad_norm": 272.3429260253906, "learning_rate": 3.67604982589248e-08, "loss": 16.8534, "step": 482620 }, { "epoch": 0.974943135218995, "grad_norm": 149.3791961669922, "learning_rate": 3.6718260195947594e-08, "loss": 13.4943, "step": 482630 }, { "epoch": 0.9749633358516788, "grad_norm": 983.81640625, "learning_rate": 3.6676046323403934e-08, "loss": 26.2069, "step": 482640 }, { "epoch": 0.9749835364843626, "grad_norm": 727.1124267578125, "learning_rate": 3.663385664149866e-08, "loss": 23.7736, "step": 482650 }, { "epoch": 0.9750037371170465, "grad_norm": 104.9148178100586, "learning_rate": 3.659169115043826e-08, "loss": 17.0994, "step": 482660 }, { "epoch": 0.9750239377497303, "grad_norm": 369.6506042480469, "learning_rate": 3.654954985042869e-08, "loss": 23.4421, "step": 482670 }, { "epoch": 0.9750441383824141, "grad_norm": 291.3814697265625, "learning_rate": 3.650743274167368e-08, "loss": 11.2448, "step": 482680 }, { "epoch": 0.9750643390150979, "grad_norm": 292.85687255859375, "learning_rate": 3.6465339824379165e-08, "loss": 18.7769, "step": 482690 }, { "epoch": 0.9750845396477817, "grad_norm": 549.0516357421875, "learning_rate": 3.642327109875166e-08, "loss": 24.1786, "step": 482700 }, { "epoch": 0.9751047402804656, "grad_norm": 442.29925537109375, "learning_rate": 3.638122656499432e-08, "loss": 19.9398, "step": 482710 }, { "epoch": 0.9751249409131494, "grad_norm": 524.1804809570312, "learning_rate": 3.633920622331311e-08, "loss": 21.6374, "step": 482720 }, { "epoch": 0.9751451415458332, "grad_norm": 135.19679260253906, "learning_rate": 3.629721007391229e-08, "loss": 26.2123, "step": 482730 }, { "epoch": 0.975165342178517, "grad_norm": 287.5704345703125, "learning_rate": 3.625523811699727e-08, "loss": 15.5823, "step": 482740 }, { "epoch": 0.9751855428112008, "grad_norm": 545.2062377929688, "learning_rate": 3.621329035277232e-08, "loss": 12.4626, "step": 482750 }, { "epoch": 0.9752057434438847, "grad_norm": 8.507187843322754, "learning_rate": 3.617136678144173e-08, "loss": 13.9089, "step": 482760 }, { "epoch": 0.9752259440765685, "grad_norm": 1035.490234375, "learning_rate": 3.612946740320977e-08, "loss": 25.8994, "step": 482770 }, { "epoch": 0.9752461447092523, "grad_norm": 542.39306640625, "learning_rate": 3.608759221828073e-08, "loss": 30.5626, "step": 482780 }, { "epoch": 0.9752663453419361, "grad_norm": 566.2247924804688, "learning_rate": 3.604574122685833e-08, "loss": 20.473, "step": 482790 }, { "epoch": 0.97528654597462, "grad_norm": 730.0471801757812, "learning_rate": 3.600391442914741e-08, "loss": 19.1692, "step": 482800 }, { "epoch": 0.9753067466073038, "grad_norm": 455.58203125, "learning_rate": 3.5962111825350585e-08, "loss": 22.6982, "step": 482810 }, { "epoch": 0.9753269472399876, "grad_norm": 464.4102783203125, "learning_rate": 3.592033341567325e-08, "loss": 13.5132, "step": 482820 }, { "epoch": 0.9753471478726714, "grad_norm": 301.8681945800781, "learning_rate": 3.5878579200318006e-08, "loss": 25.8108, "step": 482830 }, { "epoch": 0.9753673485053552, "grad_norm": 403.30450439453125, "learning_rate": 3.583684917948804e-08, "loss": 20.1903, "step": 482840 }, { "epoch": 0.975387549138039, "grad_norm": 253.32778930664062, "learning_rate": 3.579514335338763e-08, "loss": 18.7812, "step": 482850 }, { "epoch": 0.9754077497707229, "grad_norm": 298.9661560058594, "learning_rate": 3.575346172221939e-08, "loss": 23.7279, "step": 482860 }, { "epoch": 0.9754279504034067, "grad_norm": 390.77490234375, "learning_rate": 3.5711804286187035e-08, "loss": 15.822, "step": 482870 }, { "epoch": 0.9754481510360905, "grad_norm": 214.9738311767578, "learning_rate": 3.5670171045492643e-08, "loss": 9.1737, "step": 482880 }, { "epoch": 0.9754683516687742, "grad_norm": 1.0779601335525513, "learning_rate": 3.5628562000339925e-08, "loss": 12.394, "step": 482890 }, { "epoch": 0.975488552301458, "grad_norm": 2.142943859100342, "learning_rate": 3.558697715093207e-08, "loss": 19.8335, "step": 482900 }, { "epoch": 0.9755087529341419, "grad_norm": 170.5755615234375, "learning_rate": 3.554541649747056e-08, "loss": 21.7454, "step": 482910 }, { "epoch": 0.9755289535668257, "grad_norm": 299.7828369140625, "learning_rate": 3.5503880040158586e-08, "loss": 18.7396, "step": 482920 }, { "epoch": 0.9755491541995095, "grad_norm": 310.21197509765625, "learning_rate": 3.546236777919876e-08, "loss": 9.8836, "step": 482930 }, { "epoch": 0.9755693548321933, "grad_norm": 213.90985107421875, "learning_rate": 3.542087971479313e-08, "loss": 11.9233, "step": 482940 }, { "epoch": 0.9755895554648771, "grad_norm": 459.5467224121094, "learning_rate": 3.5379415847143775e-08, "loss": 17.2514, "step": 482950 }, { "epoch": 0.975609756097561, "grad_norm": 460.8121032714844, "learning_rate": 3.5337976176453845e-08, "loss": 16.0871, "step": 482960 }, { "epoch": 0.9756299567302448, "grad_norm": 272.2505798339844, "learning_rate": 3.529656070292375e-08, "loss": 15.9314, "step": 482970 }, { "epoch": 0.9756501573629286, "grad_norm": 543.2966918945312, "learning_rate": 3.525516942675611e-08, "loss": 25.3451, "step": 482980 }, { "epoch": 0.9756703579956124, "grad_norm": 185.53729248046875, "learning_rate": 3.521380234815297e-08, "loss": 29.2085, "step": 482990 }, { "epoch": 0.9756905586282962, "grad_norm": 146.68104553222656, "learning_rate": 3.517245946731529e-08, "loss": 32.7787, "step": 483000 }, { "epoch": 0.97571075926098, "grad_norm": 288.0614013671875, "learning_rate": 3.513114078444513e-08, "loss": 6.5792, "step": 483010 }, { "epoch": 0.9757309598936639, "grad_norm": 698.1644897460938, "learning_rate": 3.508984629974288e-08, "loss": 22.6261, "step": 483020 }, { "epoch": 0.9757511605263477, "grad_norm": 789.5795288085938, "learning_rate": 3.504857601341172e-08, "loss": 14.246, "step": 483030 }, { "epoch": 0.9757713611590315, "grad_norm": 66.64833068847656, "learning_rate": 3.5007329925650925e-08, "loss": 47.8748, "step": 483040 }, { "epoch": 0.9757915617917153, "grad_norm": 508.5932312011719, "learning_rate": 3.4966108036662006e-08, "loss": 9.9282, "step": 483050 }, { "epoch": 0.9758117624243992, "grad_norm": 181.7556915283203, "learning_rate": 3.4924910346647024e-08, "loss": 17.4592, "step": 483060 }, { "epoch": 0.975831963057083, "grad_norm": 498.73529052734375, "learning_rate": 3.488373685580526e-08, "loss": 18.9131, "step": 483070 }, { "epoch": 0.9758521636897668, "grad_norm": 512.0242919921875, "learning_rate": 3.4842587564337674e-08, "loss": 10.8806, "step": 483080 }, { "epoch": 0.9758723643224506, "grad_norm": 297.0984802246094, "learning_rate": 3.48014624724452e-08, "loss": 8.2831, "step": 483090 }, { "epoch": 0.9758925649551344, "grad_norm": 2.16670298576355, "learning_rate": 3.47603615803288e-08, "loss": 17.6927, "step": 483100 }, { "epoch": 0.9759127655878183, "grad_norm": 350.0305480957031, "learning_rate": 3.471928488818776e-08, "loss": 9.4405, "step": 483110 }, { "epoch": 0.9759329662205021, "grad_norm": 14.403679847717285, "learning_rate": 3.467823239622248e-08, "loss": 17.0502, "step": 483120 }, { "epoch": 0.9759531668531859, "grad_norm": 346.641357421875, "learning_rate": 3.463720410463334e-08, "loss": 26.3269, "step": 483130 }, { "epoch": 0.9759733674858696, "grad_norm": 253.56033325195312, "learning_rate": 3.459620001362074e-08, "loss": 15.4785, "step": 483140 }, { "epoch": 0.9759935681185534, "grad_norm": 450.78497314453125, "learning_rate": 3.4555220123383416e-08, "loss": 12.1875, "step": 483150 }, { "epoch": 0.9760137687512372, "grad_norm": 178.17855834960938, "learning_rate": 3.451426443412231e-08, "loss": 8.2043, "step": 483160 }, { "epoch": 0.9760339693839211, "grad_norm": 273.9609680175781, "learning_rate": 3.4473332946036164e-08, "loss": 21.7305, "step": 483170 }, { "epoch": 0.9760541700166049, "grad_norm": 691.238525390625, "learning_rate": 3.443242565932481e-08, "loss": 20.2564, "step": 483180 }, { "epoch": 0.9760743706492887, "grad_norm": 357.8240661621094, "learning_rate": 3.439154257418753e-08, "loss": 12.5185, "step": 483190 }, { "epoch": 0.9760945712819725, "grad_norm": 495.1055603027344, "learning_rate": 3.435068369082306e-08, "loss": 37.3999, "step": 483200 }, { "epoch": 0.9761147719146563, "grad_norm": 254.81387329101562, "learning_rate": 3.4309849009431794e-08, "loss": 21.3162, "step": 483210 }, { "epoch": 0.9761349725473402, "grad_norm": 273.0685729980469, "learning_rate": 3.4269038530211906e-08, "loss": 19.9255, "step": 483220 }, { "epoch": 0.976155173180024, "grad_norm": 44.90702819824219, "learning_rate": 3.4228252253362683e-08, "loss": 13.02, "step": 483230 }, { "epoch": 0.9761753738127078, "grad_norm": 0.0, "learning_rate": 3.41874901790823e-08, "loss": 23.1527, "step": 483240 }, { "epoch": 0.9761955744453916, "grad_norm": 520.8883056640625, "learning_rate": 3.414675230757003e-08, "loss": 11.8354, "step": 483250 }, { "epoch": 0.9762157750780754, "grad_norm": 266.39990234375, "learning_rate": 3.410603863902406e-08, "loss": 17.9792, "step": 483260 }, { "epoch": 0.9762359757107593, "grad_norm": 64.41669464111328, "learning_rate": 3.406534917364257e-08, "loss": 13.4081, "step": 483270 }, { "epoch": 0.9762561763434431, "grad_norm": 334.1542663574219, "learning_rate": 3.402468391162539e-08, "loss": 18.3694, "step": 483280 }, { "epoch": 0.9762763769761269, "grad_norm": 578.1113891601562, "learning_rate": 3.398404285316847e-08, "loss": 10.7203, "step": 483290 }, { "epoch": 0.9762965776088107, "grad_norm": 284.7392272949219, "learning_rate": 3.394342599847111e-08, "loss": 13.1058, "step": 483300 }, { "epoch": 0.9763167782414945, "grad_norm": 366.591796875, "learning_rate": 3.390283334773203e-08, "loss": 19.2009, "step": 483310 }, { "epoch": 0.9763369788741784, "grad_norm": 321.5985107421875, "learning_rate": 3.3862264901147745e-08, "loss": 27.2178, "step": 483320 }, { "epoch": 0.9763571795068622, "grad_norm": 231.1267852783203, "learning_rate": 3.3821720658916426e-08, "loss": 8.5236, "step": 483330 }, { "epoch": 0.976377380139546, "grad_norm": 0.8328532576560974, "learning_rate": 3.378120062123569e-08, "loss": 10.9436, "step": 483340 }, { "epoch": 0.9763975807722298, "grad_norm": 158.7162322998047, "learning_rate": 3.374070478830316e-08, "loss": 12.2283, "step": 483350 }, { "epoch": 0.9764177814049136, "grad_norm": 656.27880859375, "learning_rate": 3.3700233160315897e-08, "loss": 14.6304, "step": 483360 }, { "epoch": 0.9764379820375975, "grad_norm": 404.3321228027344, "learning_rate": 3.365978573747153e-08, "loss": 31.0528, "step": 483370 }, { "epoch": 0.9764581826702813, "grad_norm": 1263.421630859375, "learning_rate": 3.361936251996711e-08, "loss": 28.5038, "step": 483380 }, { "epoch": 0.9764783833029651, "grad_norm": 152.63633728027344, "learning_rate": 3.357896350799916e-08, "loss": 14.4554, "step": 483390 }, { "epoch": 0.9764985839356488, "grad_norm": 400.4908142089844, "learning_rate": 3.3538588701765296e-08, "loss": 15.232, "step": 483400 }, { "epoch": 0.9765187845683326, "grad_norm": 317.1159362792969, "learning_rate": 3.349823810146202e-08, "loss": 8.8856, "step": 483410 }, { "epoch": 0.9765389852010165, "grad_norm": 62.96767044067383, "learning_rate": 3.34579117072864e-08, "loss": 15.2614, "step": 483420 }, { "epoch": 0.9765591858337003, "grad_norm": 244.07969665527344, "learning_rate": 3.341760951943385e-08, "loss": 19.5304, "step": 483430 }, { "epoch": 0.9765793864663841, "grad_norm": 291.3792724609375, "learning_rate": 3.337733153810141e-08, "loss": 21.925, "step": 483440 }, { "epoch": 0.9765995870990679, "grad_norm": 254.8742218017578, "learning_rate": 3.3337077763485605e-08, "loss": 26.2369, "step": 483450 }, { "epoch": 0.9766197877317517, "grad_norm": 504.7132568359375, "learning_rate": 3.329684819578294e-08, "loss": 24.7492, "step": 483460 }, { "epoch": 0.9766399883644356, "grad_norm": 401.4547119140625, "learning_rate": 3.3256642835188816e-08, "loss": 14.8526, "step": 483470 }, { "epoch": 0.9766601889971194, "grad_norm": 68.80532836914062, "learning_rate": 3.321646168189918e-08, "loss": 11.8834, "step": 483480 }, { "epoch": 0.9766803896298032, "grad_norm": 419.8252868652344, "learning_rate": 3.317630473611055e-08, "loss": 26.1874, "step": 483490 }, { "epoch": 0.976700590262487, "grad_norm": 19.07436180114746, "learning_rate": 3.313617199801777e-08, "loss": 15.3926, "step": 483500 }, { "epoch": 0.9767207908951708, "grad_norm": 770.347900390625, "learning_rate": 3.309606346781735e-08, "loss": 16.3904, "step": 483510 }, { "epoch": 0.9767409915278547, "grad_norm": 479.2562561035156, "learning_rate": 3.305597914570413e-08, "loss": 13.9847, "step": 483520 }, { "epoch": 0.9767611921605385, "grad_norm": 422.8157043457031, "learning_rate": 3.301591903187351e-08, "loss": 11.9881, "step": 483530 }, { "epoch": 0.9767813927932223, "grad_norm": 253.63360595703125, "learning_rate": 3.297588312652089e-08, "loss": 22.7496, "step": 483540 }, { "epoch": 0.9768015934259061, "grad_norm": 200.8687286376953, "learning_rate": 3.2935871429841116e-08, "loss": 16.5222, "step": 483550 }, { "epoch": 0.9768217940585899, "grad_norm": 2.260951519012451, "learning_rate": 3.289588394203014e-08, "loss": 5.5559, "step": 483560 }, { "epoch": 0.9768419946912738, "grad_norm": 306.1798400878906, "learning_rate": 3.285592066328169e-08, "loss": 13.5368, "step": 483570 }, { "epoch": 0.9768621953239576, "grad_norm": 683.299560546875, "learning_rate": 3.281598159379118e-08, "loss": 12.1139, "step": 483580 }, { "epoch": 0.9768823959566414, "grad_norm": 286.2330322265625, "learning_rate": 3.277606673375289e-08, "loss": 14.0862, "step": 483590 }, { "epoch": 0.9769025965893252, "grad_norm": 208.06178283691406, "learning_rate": 3.2736176083362216e-08, "loss": 19.0484, "step": 483600 }, { "epoch": 0.976922797222009, "grad_norm": 510.49981689453125, "learning_rate": 3.2696309642812344e-08, "loss": 15.3424, "step": 483610 }, { "epoch": 0.9769429978546929, "grad_norm": 101.3388671875, "learning_rate": 3.2656467412298665e-08, "loss": 25.7158, "step": 483620 }, { "epoch": 0.9769631984873767, "grad_norm": 340.8699645996094, "learning_rate": 3.261664939201436e-08, "loss": 21.4636, "step": 483630 }, { "epoch": 0.9769833991200605, "grad_norm": 482.3835754394531, "learning_rate": 3.2576855582154844e-08, "loss": 25.4405, "step": 483640 }, { "epoch": 0.9770035997527442, "grad_norm": 162.60202026367188, "learning_rate": 3.253708598291272e-08, "loss": 7.1479, "step": 483650 }, { "epoch": 0.977023800385428, "grad_norm": 299.3507080078125, "learning_rate": 3.2497340594482284e-08, "loss": 11.9309, "step": 483660 }, { "epoch": 0.9770440010181118, "grad_norm": 221.28309631347656, "learning_rate": 3.245761941705727e-08, "loss": 12.9233, "step": 483670 }, { "epoch": 0.9770642016507957, "grad_norm": 221.97930908203125, "learning_rate": 3.241792245083142e-08, "loss": 7.916, "step": 483680 }, { "epoch": 0.9770844022834795, "grad_norm": 274.4580383300781, "learning_rate": 3.237824969599845e-08, "loss": 13.6841, "step": 483690 }, { "epoch": 0.9771046029161633, "grad_norm": 267.8779602050781, "learning_rate": 3.2338601152751e-08, "loss": 21.27, "step": 483700 }, { "epoch": 0.9771248035488471, "grad_norm": 591.0781860351562, "learning_rate": 3.2298976821282804e-08, "loss": 28.3484, "step": 483710 }, { "epoch": 0.9771450041815309, "grad_norm": 461.17681884765625, "learning_rate": 3.2259376701787025e-08, "loss": 15.7276, "step": 483720 }, { "epoch": 0.9771652048142148, "grad_norm": 117.9496078491211, "learning_rate": 3.2219800794456304e-08, "loss": 22.1262, "step": 483730 }, { "epoch": 0.9771854054468986, "grad_norm": 188.90655517578125, "learning_rate": 3.2180249099483806e-08, "loss": 12.5037, "step": 483740 }, { "epoch": 0.9772056060795824, "grad_norm": 659.62158203125, "learning_rate": 3.214072161706272e-08, "loss": 18.8168, "step": 483750 }, { "epoch": 0.9772258067122662, "grad_norm": 335.1777648925781, "learning_rate": 3.210121834738456e-08, "loss": 26.0619, "step": 483760 }, { "epoch": 0.97724600734495, "grad_norm": 50.493900299072266, "learning_rate": 3.206173929064304e-08, "loss": 20.8472, "step": 483770 }, { "epoch": 0.9772662079776339, "grad_norm": 134.26231384277344, "learning_rate": 3.20222844470297e-08, "loss": 9.9554, "step": 483780 }, { "epoch": 0.9772864086103177, "grad_norm": 707.6260986328125, "learning_rate": 3.198285381673716e-08, "loss": 31.974, "step": 483790 }, { "epoch": 0.9773066092430015, "grad_norm": 736.2801513671875, "learning_rate": 3.194344739995803e-08, "loss": 26.2879, "step": 483800 }, { "epoch": 0.9773268098756853, "grad_norm": 309.8446044921875, "learning_rate": 3.1904065196883825e-08, "loss": 15.8285, "step": 483810 }, { "epoch": 0.9773470105083691, "grad_norm": 123.22837829589844, "learning_rate": 3.1864707207706624e-08, "loss": 6.2707, "step": 483820 }, { "epoch": 0.977367211141053, "grad_norm": 793.5512084960938, "learning_rate": 3.182537343261849e-08, "loss": 20.4665, "step": 483830 }, { "epoch": 0.9773874117737368, "grad_norm": 303.29290771484375, "learning_rate": 3.178606387181038e-08, "loss": 26.4614, "step": 483840 }, { "epoch": 0.9774076124064206, "grad_norm": 375.2763366699219, "learning_rate": 3.1746778525474916e-08, "loss": 8.8345, "step": 483850 }, { "epoch": 0.9774278130391044, "grad_norm": 121.67900085449219, "learning_rate": 3.1707517393803064e-08, "loss": 8.8945, "step": 483860 }, { "epoch": 0.9774480136717882, "grad_norm": 783.3543701171875, "learning_rate": 3.166828047698578e-08, "loss": 13.7929, "step": 483870 }, { "epoch": 0.9774682143044721, "grad_norm": 465.07061767578125, "learning_rate": 3.1629067775214575e-08, "loss": 23.3499, "step": 483880 }, { "epoch": 0.9774884149371559, "grad_norm": 257.0478515625, "learning_rate": 3.158987928868151e-08, "loss": 13.9445, "step": 483890 }, { "epoch": 0.9775086155698397, "grad_norm": 209.32102966308594, "learning_rate": 3.1550715017575895e-08, "loss": 16.5252, "step": 483900 }, { "epoch": 0.9775288162025234, "grad_norm": 448.6198425292969, "learning_rate": 3.151157496208979e-08, "loss": 12.6523, "step": 483910 }, { "epoch": 0.9775490168352072, "grad_norm": 532.5560913085938, "learning_rate": 3.1472459122414144e-08, "loss": 16.7201, "step": 483920 }, { "epoch": 0.977569217467891, "grad_norm": 70.59700012207031, "learning_rate": 3.143336749873882e-08, "loss": 15.6916, "step": 483930 }, { "epoch": 0.9775894181005749, "grad_norm": 213.04867553710938, "learning_rate": 3.139430009125477e-08, "loss": 18.7005, "step": 483940 }, { "epoch": 0.9776096187332587, "grad_norm": 996.3571166992188, "learning_rate": 3.135525690015184e-08, "loss": 22.9712, "step": 483950 }, { "epoch": 0.9776298193659425, "grad_norm": 182.0755615234375, "learning_rate": 3.131623792562155e-08, "loss": 15.4185, "step": 483960 }, { "epoch": 0.9776500199986263, "grad_norm": 0.8058404326438904, "learning_rate": 3.127724316785263e-08, "loss": 18.1625, "step": 483970 }, { "epoch": 0.9776702206313101, "grad_norm": 46.014915466308594, "learning_rate": 3.1238272627035494e-08, "loss": 20.9273, "step": 483980 }, { "epoch": 0.977690421263994, "grad_norm": 283.42718505859375, "learning_rate": 3.119932630336109e-08, "loss": 32.6546, "step": 483990 }, { "epoch": 0.9777106218966778, "grad_norm": 419.70343017578125, "learning_rate": 3.1160404197018155e-08, "loss": 17.8015, "step": 484000 }, { "epoch": 0.9777308225293616, "grad_norm": 168.8333740234375, "learning_rate": 3.11215063081971e-08, "loss": 27.3313, "step": 484010 }, { "epoch": 0.9777510231620454, "grad_norm": 183.36619567871094, "learning_rate": 3.108263263708666e-08, "loss": 7.288, "step": 484020 }, { "epoch": 0.9777712237947292, "grad_norm": 267.46441650390625, "learning_rate": 3.104378318387724e-08, "loss": 11.842, "step": 484030 }, { "epoch": 0.9777914244274131, "grad_norm": 233.20396423339844, "learning_rate": 3.1004957948757576e-08, "loss": 17.2669, "step": 484040 }, { "epoch": 0.9778116250600969, "grad_norm": 330.2497863769531, "learning_rate": 3.0966156931916955e-08, "loss": 32.6221, "step": 484050 }, { "epoch": 0.9778318256927807, "grad_norm": 300.6095275878906, "learning_rate": 3.092738013354468e-08, "loss": 12.898, "step": 484060 }, { "epoch": 0.9778520263254645, "grad_norm": 370.9511413574219, "learning_rate": 3.088862755383004e-08, "loss": 16.3676, "step": 484070 }, { "epoch": 0.9778722269581483, "grad_norm": 272.9999084472656, "learning_rate": 3.084989919296122e-08, "loss": 25.2239, "step": 484080 }, { "epoch": 0.9778924275908322, "grad_norm": 250.6344757080078, "learning_rate": 3.081119505112751e-08, "loss": 19.4249, "step": 484090 }, { "epoch": 0.977912628223516, "grad_norm": 251.88600158691406, "learning_rate": 3.077251512851709e-08, "loss": 26.6836, "step": 484100 }, { "epoch": 0.9779328288561998, "grad_norm": 395.8818054199219, "learning_rate": 3.07338594253187e-08, "loss": 40.7621, "step": 484110 }, { "epoch": 0.9779530294888836, "grad_norm": 612.4962158203125, "learning_rate": 3.069522794172109e-08, "loss": 18.4162, "step": 484120 }, { "epoch": 0.9779732301215674, "grad_norm": 263.68402099609375, "learning_rate": 3.0656620677911867e-08, "loss": 11.7562, "step": 484130 }, { "epoch": 0.9779934307542513, "grad_norm": 524.9879760742188, "learning_rate": 3.061803763408033e-08, "loss": 21.7564, "step": 484140 }, { "epoch": 0.9780136313869351, "grad_norm": 1036.5638427734375, "learning_rate": 3.057947881041301e-08, "loss": 22.7379, "step": 484150 }, { "epoch": 0.9780338320196189, "grad_norm": 235.7089080810547, "learning_rate": 3.054094420709863e-08, "loss": 15.1089, "step": 484160 }, { "epoch": 0.9780540326523026, "grad_norm": 278.8729553222656, "learning_rate": 3.050243382432483e-08, "loss": 17.9415, "step": 484170 }, { "epoch": 0.9780742332849864, "grad_norm": 382.26318359375, "learning_rate": 3.046394766228034e-08, "loss": 9.5056, "step": 484180 }, { "epoch": 0.9780944339176703, "grad_norm": 449.79052734375, "learning_rate": 3.0425485721151115e-08, "loss": 26.3229, "step": 484190 }, { "epoch": 0.9781146345503541, "grad_norm": 287.5538635253906, "learning_rate": 3.038704800112535e-08, "loss": 8.6234, "step": 484200 }, { "epoch": 0.9781348351830379, "grad_norm": 293.88818359375, "learning_rate": 3.034863450239067e-08, "loss": 32.1613, "step": 484210 }, { "epoch": 0.9781550358157217, "grad_norm": 635.933837890625, "learning_rate": 3.0310245225133595e-08, "loss": 20.5715, "step": 484220 }, { "epoch": 0.9781752364484055, "grad_norm": 546.6377563476562, "learning_rate": 3.027188016954175e-08, "loss": 20.026, "step": 484230 }, { "epoch": 0.9781954370810894, "grad_norm": 340.8769836425781, "learning_rate": 3.0233539335802195e-08, "loss": 22.315, "step": 484240 }, { "epoch": 0.9782156377137732, "grad_norm": 629.7188110351562, "learning_rate": 3.019522272410202e-08, "loss": 16.2137, "step": 484250 }, { "epoch": 0.978235838346457, "grad_norm": 351.24102783203125, "learning_rate": 3.0156930334626633e-08, "loss": 13.6654, "step": 484260 }, { "epoch": 0.9782560389791408, "grad_norm": 620.3857421875, "learning_rate": 3.0118662167564205e-08, "loss": 13.2582, "step": 484270 }, { "epoch": 0.9782762396118246, "grad_norm": 320.65093994140625, "learning_rate": 3.008041822310015e-08, "loss": 15.7659, "step": 484280 }, { "epoch": 0.9782964402445085, "grad_norm": 4.544155597686768, "learning_rate": 3.004219850142209e-08, "loss": 23.1176, "step": 484290 }, { "epoch": 0.9783166408771923, "grad_norm": 330.84075927734375, "learning_rate": 3.0004003002714886e-08, "loss": 38.308, "step": 484300 }, { "epoch": 0.9783368415098761, "grad_norm": 134.98233032226562, "learning_rate": 2.9965831727165603e-08, "loss": 16.8132, "step": 484310 }, { "epoch": 0.9783570421425599, "grad_norm": 240.02906799316406, "learning_rate": 2.992768467496021e-08, "loss": 16.8663, "step": 484320 }, { "epoch": 0.9783772427752437, "grad_norm": 69.66632843017578, "learning_rate": 2.988956184628411e-08, "loss": 17.5265, "step": 484330 }, { "epoch": 0.9783974434079276, "grad_norm": 37.1854133605957, "learning_rate": 2.985146324132438e-08, "loss": 41.3305, "step": 484340 }, { "epoch": 0.9784176440406114, "grad_norm": 91.68061828613281, "learning_rate": 2.981338886026475e-08, "loss": 12.9395, "step": 484350 }, { "epoch": 0.9784378446732952, "grad_norm": 484.4888916015625, "learning_rate": 2.97753387032923e-08, "loss": 21.5611, "step": 484360 }, { "epoch": 0.978458045305979, "grad_norm": 104.05294799804688, "learning_rate": 2.9737312770591887e-08, "loss": 21.4092, "step": 484370 }, { "epoch": 0.9784782459386628, "grad_norm": 284.43402099609375, "learning_rate": 2.9699311062349467e-08, "loss": 16.0598, "step": 484380 }, { "epoch": 0.9784984465713467, "grad_norm": 12.049307823181152, "learning_rate": 2.966133357874934e-08, "loss": 25.8269, "step": 484390 }, { "epoch": 0.9785186472040305, "grad_norm": 63.954917907714844, "learning_rate": 2.9623380319976912e-08, "loss": 13.1331, "step": 484400 }, { "epoch": 0.9785388478367143, "grad_norm": 486.4266662597656, "learning_rate": 2.9585451286217593e-08, "loss": 15.2235, "step": 484410 }, { "epoch": 0.978559048469398, "grad_norm": 151.86865234375, "learning_rate": 2.954754647765623e-08, "loss": 13.0449, "step": 484420 }, { "epoch": 0.9785792491020818, "grad_norm": 803.1467895507812, "learning_rate": 2.950966589447657e-08, "loss": 27.0853, "step": 484430 }, { "epoch": 0.9785994497347656, "grad_norm": 347.372802734375, "learning_rate": 2.947180953686457e-08, "loss": 15.3117, "step": 484440 }, { "epoch": 0.9786196503674495, "grad_norm": 400.6361083984375, "learning_rate": 2.9433977405003976e-08, "loss": 22.2511, "step": 484450 }, { "epoch": 0.9786398510001333, "grad_norm": 344.0408935546875, "learning_rate": 2.9396169499079087e-08, "loss": 14.1736, "step": 484460 }, { "epoch": 0.9786600516328171, "grad_norm": 880.786376953125, "learning_rate": 2.935838581927475e-08, "loss": 34.3599, "step": 484470 }, { "epoch": 0.9786802522655009, "grad_norm": 109.6543960571289, "learning_rate": 2.9320626365774153e-08, "loss": 8.8492, "step": 484480 }, { "epoch": 0.9787004528981847, "grad_norm": 12.039264678955078, "learning_rate": 2.9282891138762148e-08, "loss": 16.186, "step": 484490 }, { "epoch": 0.9787206535308686, "grad_norm": 470.5296325683594, "learning_rate": 2.9245180138423033e-08, "loss": 36.8357, "step": 484500 }, { "epoch": 0.9787408541635524, "grad_norm": 523.0750732421875, "learning_rate": 2.920749336494e-08, "loss": 13.6021, "step": 484510 }, { "epoch": 0.9787610547962362, "grad_norm": 327.95672607421875, "learning_rate": 2.9169830818496226e-08, "loss": 24.0132, "step": 484520 }, { "epoch": 0.97878125542892, "grad_norm": 125.46385955810547, "learning_rate": 2.9132192499276014e-08, "loss": 19.9534, "step": 484530 }, { "epoch": 0.9788014560616038, "grad_norm": 17.092742919921875, "learning_rate": 2.9094578407462547e-08, "loss": 14.0685, "step": 484540 }, { "epoch": 0.9788216566942877, "grad_norm": 230.4193878173828, "learning_rate": 2.9056988543239018e-08, "loss": 6.433, "step": 484550 }, { "epoch": 0.9788418573269715, "grad_norm": 45.332794189453125, "learning_rate": 2.9019422906789162e-08, "loss": 13.0758, "step": 484560 }, { "epoch": 0.9788620579596553, "grad_norm": 377.50286865234375, "learning_rate": 2.8981881498295616e-08, "loss": 27.4283, "step": 484570 }, { "epoch": 0.9788822585923391, "grad_norm": 170.75660705566406, "learning_rate": 2.8944364317941564e-08, "loss": 11.3379, "step": 484580 }, { "epoch": 0.978902459225023, "grad_norm": 463.7169494628906, "learning_rate": 2.8906871365909638e-08, "loss": 13.9359, "step": 484590 }, { "epoch": 0.9789226598577068, "grad_norm": 578.7476806640625, "learning_rate": 2.8869402642382473e-08, "loss": 12.9114, "step": 484600 }, { "epoch": 0.9789428604903906, "grad_norm": 1014.2540893554688, "learning_rate": 2.8831958147543805e-08, "loss": 17.8054, "step": 484610 }, { "epoch": 0.9789630611230744, "grad_norm": 220.0530242919922, "learning_rate": 2.8794537881574046e-08, "loss": 20.0184, "step": 484620 }, { "epoch": 0.9789832617557582, "grad_norm": 673.9566040039062, "learning_rate": 2.87571418446575e-08, "loss": 21.4076, "step": 484630 }, { "epoch": 0.979003462388442, "grad_norm": 667.1533813476562, "learning_rate": 2.871977003697568e-08, "loss": 22.8192, "step": 484640 }, { "epoch": 0.9790236630211259, "grad_norm": 424.0278625488281, "learning_rate": 2.8682422458710667e-08, "loss": 15.4292, "step": 484650 }, { "epoch": 0.9790438636538097, "grad_norm": 268.6829528808594, "learning_rate": 2.864509911004454e-08, "loss": 32.7413, "step": 484660 }, { "epoch": 0.9790640642864935, "grad_norm": 6.1492838859558105, "learning_rate": 2.8607799991159368e-08, "loss": 10.4449, "step": 484670 }, { "epoch": 0.9790842649191772, "grad_norm": 649.07861328125, "learning_rate": 2.857052510223668e-08, "loss": 22.5222, "step": 484680 }, { "epoch": 0.979104465551861, "grad_norm": 450.8924865722656, "learning_rate": 2.853327444345799e-08, "loss": 15.7099, "step": 484690 }, { "epoch": 0.9791246661845449, "grad_norm": 761.1311645507812, "learning_rate": 2.8496048015005385e-08, "loss": 23.6273, "step": 484700 }, { "epoch": 0.9791448668172287, "grad_norm": 269.6807556152344, "learning_rate": 2.8458845817060376e-08, "loss": 13.483, "step": 484710 }, { "epoch": 0.9791650674499125, "grad_norm": 527.2760620117188, "learning_rate": 2.8421667849803937e-08, "loss": 13.3812, "step": 484720 }, { "epoch": 0.9791852680825963, "grad_norm": 7.196564197540283, "learning_rate": 2.8384514113417026e-08, "loss": 17.1273, "step": 484730 }, { "epoch": 0.9792054687152801, "grad_norm": 0.0, "learning_rate": 2.8347384608081173e-08, "loss": 26.7297, "step": 484740 }, { "epoch": 0.979225669347964, "grad_norm": 1.6372706890106201, "learning_rate": 2.8310279333976786e-08, "loss": 22.1027, "step": 484750 }, { "epoch": 0.9792458699806478, "grad_norm": 167.82321166992188, "learning_rate": 2.827319829128594e-08, "loss": 12.3742, "step": 484760 }, { "epoch": 0.9792660706133316, "grad_norm": 317.5897216796875, "learning_rate": 2.823614148018794e-08, "loss": 13.8572, "step": 484770 }, { "epoch": 0.9792862712460154, "grad_norm": 455.1285400390625, "learning_rate": 2.819910890086375e-08, "loss": 26.5275, "step": 484780 }, { "epoch": 0.9793064718786992, "grad_norm": 508.3708801269531, "learning_rate": 2.8162100553494887e-08, "loss": 16.7634, "step": 484790 }, { "epoch": 0.9793266725113831, "grad_norm": 454.05999755859375, "learning_rate": 2.8125116438260104e-08, "loss": 15.6647, "step": 484800 }, { "epoch": 0.9793468731440669, "grad_norm": 482.0390930175781, "learning_rate": 2.8088156555340916e-08, "loss": 26.6385, "step": 484810 }, { "epoch": 0.9793670737767507, "grad_norm": 377.8825988769531, "learning_rate": 2.805122090491719e-08, "loss": 13.5362, "step": 484820 }, { "epoch": 0.9793872744094345, "grad_norm": 116.45448303222656, "learning_rate": 2.801430948716821e-08, "loss": 20.9232, "step": 484830 }, { "epoch": 0.9794074750421183, "grad_norm": 210.9385986328125, "learning_rate": 2.797742230227496e-08, "loss": 16.5787, "step": 484840 }, { "epoch": 0.9794276756748022, "grad_norm": 302.8087463378906, "learning_rate": 2.794055935041673e-08, "loss": 14.3869, "step": 484850 }, { "epoch": 0.979447876307486, "grad_norm": 780.3137817382812, "learning_rate": 2.7903720631772824e-08, "loss": 24.1741, "step": 484860 }, { "epoch": 0.9794680769401698, "grad_norm": 316.7269287109375, "learning_rate": 2.7866906146523098e-08, "loss": 22.5804, "step": 484870 }, { "epoch": 0.9794882775728536, "grad_norm": 630.5425415039062, "learning_rate": 2.783011589484741e-08, "loss": 25.4059, "step": 484880 }, { "epoch": 0.9795084782055374, "grad_norm": 77.95024871826172, "learning_rate": 2.7793349876924503e-08, "loss": 23.828, "step": 484890 }, { "epoch": 0.9795286788382213, "grad_norm": 318.6225280761719, "learning_rate": 2.7756608092933678e-08, "loss": 28.741, "step": 484900 }, { "epoch": 0.9795488794709051, "grad_norm": 187.707763671875, "learning_rate": 2.771989054305424e-08, "loss": 11.4559, "step": 484910 }, { "epoch": 0.9795690801035889, "grad_norm": 259.7579650878906, "learning_rate": 2.768319722746493e-08, "loss": 10.9794, "step": 484920 }, { "epoch": 0.9795892807362726, "grad_norm": 397.9490051269531, "learning_rate": 2.7646528146345053e-08, "loss": 13.0612, "step": 484930 }, { "epoch": 0.9796094813689564, "grad_norm": 629.7482299804688, "learning_rate": 2.760988329987224e-08, "loss": 14.3817, "step": 484940 }, { "epoch": 0.9796296820016402, "grad_norm": 643.6015625, "learning_rate": 2.7573262688226355e-08, "loss": 39.0208, "step": 484950 }, { "epoch": 0.9796498826343241, "grad_norm": 417.1099548339844, "learning_rate": 2.753666631158447e-08, "loss": 13.9643, "step": 484960 }, { "epoch": 0.9796700832670079, "grad_norm": 1445.2420654296875, "learning_rate": 2.7500094170126447e-08, "loss": 12.6738, "step": 484970 }, { "epoch": 0.9796902838996917, "grad_norm": 310.7126770019531, "learning_rate": 2.7463546264029915e-08, "loss": 10.1163, "step": 484980 }, { "epoch": 0.9797104845323755, "grad_norm": 400.7168884277344, "learning_rate": 2.7427022593473074e-08, "loss": 18.6557, "step": 484990 }, { "epoch": 0.9797306851650593, "grad_norm": 440.9306335449219, "learning_rate": 2.7390523158633552e-08, "loss": 16.9205, "step": 485000 }, { "epoch": 0.9797508857977432, "grad_norm": 313.15338134765625, "learning_rate": 2.7354047959689543e-08, "loss": 11.0791, "step": 485010 }, { "epoch": 0.979771086430427, "grad_norm": 488.06695556640625, "learning_rate": 2.7317596996818684e-08, "loss": 31.8022, "step": 485020 }, { "epoch": 0.9797912870631108, "grad_norm": 346.84796142578125, "learning_rate": 2.728117027019861e-08, "loss": 38.3031, "step": 485030 }, { "epoch": 0.9798114876957946, "grad_norm": 608.5032958984375, "learning_rate": 2.7244767780007507e-08, "loss": 31.9193, "step": 485040 }, { "epoch": 0.9798316883284784, "grad_norm": 215.79571533203125, "learning_rate": 2.7208389526421907e-08, "loss": 33.9912, "step": 485050 }, { "epoch": 0.9798518889611623, "grad_norm": 217.80091857910156, "learning_rate": 2.7172035509619442e-08, "loss": 21.6836, "step": 485060 }, { "epoch": 0.9798720895938461, "grad_norm": 467.5257873535156, "learning_rate": 2.713570572977775e-08, "loss": 31.3339, "step": 485070 }, { "epoch": 0.9798922902265299, "grad_norm": 1203.36083984375, "learning_rate": 2.7099400187073356e-08, "loss": 20.5482, "step": 485080 }, { "epoch": 0.9799124908592137, "grad_norm": 406.3395690917969, "learning_rate": 2.7063118881682782e-08, "loss": 23.3002, "step": 485090 }, { "epoch": 0.9799326914918975, "grad_norm": 527.0682983398438, "learning_rate": 2.7026861813783668e-08, "loss": 19.6665, "step": 485100 }, { "epoch": 0.9799528921245814, "grad_norm": 182.63735961914062, "learning_rate": 2.6990628983553093e-08, "loss": 15.1178, "step": 485110 }, { "epoch": 0.9799730927572652, "grad_norm": 384.9981384277344, "learning_rate": 2.6954420391166468e-08, "loss": 23.4295, "step": 485120 }, { "epoch": 0.979993293389949, "grad_norm": 618.421630859375, "learning_rate": 2.691823603680088e-08, "loss": 32.6432, "step": 485130 }, { "epoch": 0.9800134940226328, "grad_norm": 247.70474243164062, "learning_rate": 2.6882075920632854e-08, "loss": 16.8914, "step": 485140 }, { "epoch": 0.9800336946553166, "grad_norm": 258.8690490722656, "learning_rate": 2.684594004283836e-08, "loss": 15.5106, "step": 485150 }, { "epoch": 0.9800538952880005, "grad_norm": 534.8699340820312, "learning_rate": 2.6809828403593363e-08, "loss": 12.8656, "step": 485160 }, { "epoch": 0.9800740959206843, "grad_norm": 45.22439956665039, "learning_rate": 2.6773741003074394e-08, "loss": 18.8632, "step": 485170 }, { "epoch": 0.9800942965533681, "grad_norm": 718.5444946289062, "learning_rate": 2.6737677841456867e-08, "loss": 22.0892, "step": 485180 }, { "epoch": 0.9801144971860518, "grad_norm": 481.2840576171875, "learning_rate": 2.670163891891675e-08, "loss": 17.4947, "step": 485190 }, { "epoch": 0.9801346978187356, "grad_norm": 320.5677490234375, "learning_rate": 2.6665624235629463e-08, "loss": 22.502, "step": 485200 }, { "epoch": 0.9801548984514195, "grad_norm": 519.1952514648438, "learning_rate": 2.662963379177097e-08, "loss": 13.4785, "step": 485210 }, { "epoch": 0.9801750990841033, "grad_norm": 0.9457218647003174, "learning_rate": 2.6593667587516693e-08, "loss": 18.4401, "step": 485220 }, { "epoch": 0.9801952997167871, "grad_norm": 231.81671142578125, "learning_rate": 2.6557725623041487e-08, "loss": 19.6856, "step": 485230 }, { "epoch": 0.9802155003494709, "grad_norm": 233.8401641845703, "learning_rate": 2.6521807898520214e-08, "loss": 10.5556, "step": 485240 }, { "epoch": 0.9802357009821547, "grad_norm": 323.9283142089844, "learning_rate": 2.64859144141294e-08, "loss": 10.9646, "step": 485250 }, { "epoch": 0.9802559016148386, "grad_norm": 152.94651794433594, "learning_rate": 2.6450045170042238e-08, "loss": 16.6187, "step": 485260 }, { "epoch": 0.9802761022475224, "grad_norm": 560.7903442382812, "learning_rate": 2.6414200166434144e-08, "loss": 31.5643, "step": 485270 }, { "epoch": 0.9802963028802062, "grad_norm": 762.9198608398438, "learning_rate": 2.6378379403480536e-08, "loss": 21.9723, "step": 485280 }, { "epoch": 0.98031650351289, "grad_norm": 159.2328643798828, "learning_rate": 2.6342582881355717e-08, "loss": 11.4329, "step": 485290 }, { "epoch": 0.9803367041455738, "grad_norm": 495.5337829589844, "learning_rate": 2.6306810600233435e-08, "loss": 19.3928, "step": 485300 }, { "epoch": 0.9803569047782577, "grad_norm": 289.5658264160156, "learning_rate": 2.6271062560288552e-08, "loss": 20.7861, "step": 485310 }, { "epoch": 0.9803771054109415, "grad_norm": 768.4794311523438, "learning_rate": 2.6235338761695372e-08, "loss": 21.2778, "step": 485320 }, { "epoch": 0.9803973060436253, "grad_norm": 307.91845703125, "learning_rate": 2.6199639204628202e-08, "loss": 15.3602, "step": 485330 }, { "epoch": 0.9804175066763091, "grad_norm": 549.4125366210938, "learning_rate": 2.6163963889260236e-08, "loss": 22.524, "step": 485340 }, { "epoch": 0.9804377073089929, "grad_norm": 22.162643432617188, "learning_rate": 2.6128312815766332e-08, "loss": 21.6474, "step": 485350 }, { "epoch": 0.9804579079416768, "grad_norm": 628.3938598632812, "learning_rate": 2.6092685984319134e-08, "loss": 15.8834, "step": 485360 }, { "epoch": 0.9804781085743606, "grad_norm": 195.3922119140625, "learning_rate": 2.6057083395093495e-08, "loss": 26.0357, "step": 485370 }, { "epoch": 0.9804983092070444, "grad_norm": 303.665283203125, "learning_rate": 2.6021505048262062e-08, "loss": 25.6902, "step": 485380 }, { "epoch": 0.9805185098397282, "grad_norm": 24.05523681640625, "learning_rate": 2.5985950943999137e-08, "loss": 12.9914, "step": 485390 }, { "epoch": 0.980538710472412, "grad_norm": 269.62017822265625, "learning_rate": 2.5950421082476805e-08, "loss": 13.2076, "step": 485400 }, { "epoch": 0.9805589111050959, "grad_norm": 341.3332824707031, "learning_rate": 2.5914915463868816e-08, "loss": 21.3219, "step": 485410 }, { "epoch": 0.9805791117377797, "grad_norm": 0.0197820533066988, "learning_rate": 2.5879434088348364e-08, "loss": 18.7422, "step": 485420 }, { "epoch": 0.9805993123704635, "grad_norm": 641.1902465820312, "learning_rate": 2.584397695608809e-08, "loss": 35.1817, "step": 485430 }, { "epoch": 0.9806195130031473, "grad_norm": 282.08807373046875, "learning_rate": 2.580854406726174e-08, "loss": 9.3104, "step": 485440 }, { "epoch": 0.980639713635831, "grad_norm": 57.96085739135742, "learning_rate": 2.5773135422040296e-08, "loss": 11.5754, "step": 485450 }, { "epoch": 0.9806599142685148, "grad_norm": 624.4923095703125, "learning_rate": 2.5737751020598057e-08, "loss": 10.0956, "step": 485460 }, { "epoch": 0.9806801149011987, "grad_norm": 274.8274230957031, "learning_rate": 2.5702390863105996e-08, "loss": 8.2575, "step": 485470 }, { "epoch": 0.9807003155338825, "grad_norm": 454.69952392578125, "learning_rate": 2.5667054949737315e-08, "loss": 13.499, "step": 485480 }, { "epoch": 0.9807205161665663, "grad_norm": 227.89601135253906, "learning_rate": 2.5631743280664643e-08, "loss": 18.8259, "step": 485490 }, { "epoch": 0.9807407167992501, "grad_norm": 448.7703552246094, "learning_rate": 2.5596455856058966e-08, "loss": 21.568, "step": 485500 }, { "epoch": 0.9807609174319339, "grad_norm": 465.294921875, "learning_rate": 2.556119267609347e-08, "loss": 13.4545, "step": 485510 }, { "epoch": 0.9807811180646178, "grad_norm": 369.0829772949219, "learning_rate": 2.552595374093858e-08, "loss": 16.9684, "step": 485520 }, { "epoch": 0.9808013186973016, "grad_norm": 295.7125549316406, "learning_rate": 2.5490739050767488e-08, "loss": 10.4524, "step": 485530 }, { "epoch": 0.9808215193299854, "grad_norm": 500.7685241699219, "learning_rate": 2.5455548605751167e-08, "loss": 32.5881, "step": 485540 }, { "epoch": 0.9808417199626692, "grad_norm": 364.6302490234375, "learning_rate": 2.5420382406060595e-08, "loss": 26.903, "step": 485550 }, { "epoch": 0.980861920595353, "grad_norm": 335.4365234375, "learning_rate": 2.5385240451867853e-08, "loss": 17.7864, "step": 485560 }, { "epoch": 0.9808821212280369, "grad_norm": 441.4403381347656, "learning_rate": 2.5350122743344476e-08, "loss": 19.7877, "step": 485570 }, { "epoch": 0.9809023218607207, "grad_norm": 378.8863220214844, "learning_rate": 2.531502928066143e-08, "loss": 12.3159, "step": 485580 }, { "epoch": 0.9809225224934045, "grad_norm": 380.1937561035156, "learning_rate": 2.527996006398914e-08, "loss": 7.871, "step": 485590 }, { "epoch": 0.9809427231260883, "grad_norm": 404.0863037109375, "learning_rate": 2.5244915093499134e-08, "loss": 36.1998, "step": 485600 }, { "epoch": 0.9809629237587721, "grad_norm": 184.4862823486328, "learning_rate": 2.5209894369362386e-08, "loss": 13.5838, "step": 485610 }, { "epoch": 0.980983124391456, "grad_norm": 244.2938232421875, "learning_rate": 2.5174897891748762e-08, "loss": 23.7825, "step": 485620 }, { "epoch": 0.9810033250241398, "grad_norm": 220.58428955078125, "learning_rate": 2.5139925660829233e-08, "loss": 19.5785, "step": 485630 }, { "epoch": 0.9810235256568236, "grad_norm": 194.8220672607422, "learning_rate": 2.5104977676774777e-08, "loss": 18.7257, "step": 485640 }, { "epoch": 0.9810437262895074, "grad_norm": 316.3087158203125, "learning_rate": 2.5070053939754702e-08, "loss": 12.6183, "step": 485650 }, { "epoch": 0.9810639269221912, "grad_norm": 525.3484497070312, "learning_rate": 2.5035154449940535e-08, "loss": 27.6374, "step": 485660 }, { "epoch": 0.9810841275548751, "grad_norm": 691.3954467773438, "learning_rate": 2.500027920750103e-08, "loss": 16.5734, "step": 485670 }, { "epoch": 0.9811043281875589, "grad_norm": 410.7261047363281, "learning_rate": 2.496542821260717e-08, "loss": 12.7947, "step": 485680 }, { "epoch": 0.9811245288202427, "grad_norm": 172.7108154296875, "learning_rate": 2.493060146542825e-08, "loss": 22.6328, "step": 485690 }, { "epoch": 0.9811447294529264, "grad_norm": 403.85003662109375, "learning_rate": 2.489579896613359e-08, "loss": 29.9945, "step": 485700 }, { "epoch": 0.9811649300856102, "grad_norm": 175.58148193359375, "learning_rate": 2.4861020714894156e-08, "loss": 10.1164, "step": 485710 }, { "epoch": 0.981185130718294, "grad_norm": 210.02651977539062, "learning_rate": 2.482626671187871e-08, "loss": 9.6241, "step": 485720 }, { "epoch": 0.9812053313509779, "grad_norm": 445.175537109375, "learning_rate": 2.4791536957256e-08, "loss": 14.9926, "step": 485730 }, { "epoch": 0.9812255319836617, "grad_norm": 885.3142700195312, "learning_rate": 2.4756831451196452e-08, "loss": 26.4219, "step": 485740 }, { "epoch": 0.9812457326163455, "grad_norm": 28.76207733154297, "learning_rate": 2.472215019386881e-08, "loss": 15.5411, "step": 485750 }, { "epoch": 0.9812659332490293, "grad_norm": 260.98870849609375, "learning_rate": 2.4687493185441836e-08, "loss": 25.6906, "step": 485760 }, { "epoch": 0.9812861338817132, "grad_norm": 369.857421875, "learning_rate": 2.4652860426084278e-08, "loss": 23.6489, "step": 485770 }, { "epoch": 0.981306334514397, "grad_norm": 128.75570678710938, "learning_rate": 2.46182519159649e-08, "loss": 10.6912, "step": 485780 }, { "epoch": 0.9813265351470808, "grad_norm": 120.6553955078125, "learning_rate": 2.458366765525355e-08, "loss": 9.2437, "step": 485790 }, { "epoch": 0.9813467357797646, "grad_norm": 560.5294189453125, "learning_rate": 2.4549107644117888e-08, "loss": 17.972, "step": 485800 }, { "epoch": 0.9813669364124484, "grad_norm": 62.803314208984375, "learning_rate": 2.4514571882726102e-08, "loss": 16.2346, "step": 485810 }, { "epoch": 0.9813871370451323, "grad_norm": 596.3590087890625, "learning_rate": 2.448006037124695e-08, "loss": 23.8473, "step": 485820 }, { "epoch": 0.9814073376778161, "grad_norm": 491.07147216796875, "learning_rate": 2.444557310984863e-08, "loss": 16.2163, "step": 485830 }, { "epoch": 0.9814275383104999, "grad_norm": 512.9915771484375, "learning_rate": 2.441111009869879e-08, "loss": 25.4548, "step": 485840 }, { "epoch": 0.9814477389431837, "grad_norm": 128.34317016601562, "learning_rate": 2.4376671337966174e-08, "loss": 16.7849, "step": 485850 }, { "epoch": 0.9814679395758675, "grad_norm": 383.1306457519531, "learning_rate": 2.434225682781788e-08, "loss": 21.5805, "step": 485860 }, { "epoch": 0.9814881402085514, "grad_norm": 706.0603637695312, "learning_rate": 2.43078665684221e-08, "loss": 44.5088, "step": 485870 }, { "epoch": 0.9815083408412352, "grad_norm": 494.68304443359375, "learning_rate": 2.427350055994593e-08, "loss": 30.7003, "step": 485880 }, { "epoch": 0.981528541473919, "grad_norm": 279.5955505371094, "learning_rate": 2.423915880255756e-08, "loss": 15.2497, "step": 485890 }, { "epoch": 0.9815487421066028, "grad_norm": 201.189697265625, "learning_rate": 2.4204841296424086e-08, "loss": 13.9122, "step": 485900 }, { "epoch": 0.9815689427392866, "grad_norm": 508.57025146484375, "learning_rate": 2.4170548041712594e-08, "loss": 19.3764, "step": 485910 }, { "epoch": 0.9815891433719705, "grad_norm": 351.7825622558594, "learning_rate": 2.4136279038590727e-08, "loss": 17.9565, "step": 485920 }, { "epoch": 0.9816093440046543, "grad_norm": 316.19219970703125, "learning_rate": 2.4102034287224462e-08, "loss": 11.2892, "step": 485930 }, { "epoch": 0.9816295446373381, "grad_norm": 542.6669311523438, "learning_rate": 2.4067813787782e-08, "loss": 24.9015, "step": 485940 }, { "epoch": 0.9816497452700219, "grad_norm": 197.28306579589844, "learning_rate": 2.403361754042932e-08, "loss": 19.147, "step": 485950 }, { "epoch": 0.9816699459027056, "grad_norm": 50.71059799194336, "learning_rate": 2.3999445545332955e-08, "loss": 16.8103, "step": 485960 }, { "epoch": 0.9816901465353894, "grad_norm": 501.495361328125, "learning_rate": 2.3965297802659993e-08, "loss": 17.9184, "step": 485970 }, { "epoch": 0.9817103471680733, "grad_norm": 310.5088195800781, "learning_rate": 2.3931174312576966e-08, "loss": 20.8237, "step": 485980 }, { "epoch": 0.9817305478007571, "grad_norm": 262.6496276855469, "learning_rate": 2.3897075075249298e-08, "loss": 16.2967, "step": 485990 }, { "epoch": 0.9817507484334409, "grad_norm": 183.33282470703125, "learning_rate": 2.386300009084408e-08, "loss": 19.5586, "step": 486000 }, { "epoch": 0.9817709490661247, "grad_norm": 26.03966522216797, "learning_rate": 2.382894935952729e-08, "loss": 16.2692, "step": 486010 }, { "epoch": 0.9817911496988085, "grad_norm": 364.26153564453125, "learning_rate": 2.3794922881464344e-08, "loss": 20.1034, "step": 486020 }, { "epoch": 0.9818113503314924, "grad_norm": 109.6334457397461, "learning_rate": 2.3760920656821228e-08, "loss": 15.8652, "step": 486030 }, { "epoch": 0.9818315509641762, "grad_norm": 449.1590270996094, "learning_rate": 2.3726942685764474e-08, "loss": 23.835, "step": 486040 }, { "epoch": 0.98185175159686, "grad_norm": 94.95097351074219, "learning_rate": 2.3692988968458398e-08, "loss": 33.35, "step": 486050 }, { "epoch": 0.9818719522295438, "grad_norm": 266.3463439941406, "learning_rate": 2.3659059505069526e-08, "loss": 25.8023, "step": 486060 }, { "epoch": 0.9818921528622276, "grad_norm": 73.2273941040039, "learning_rate": 2.362515429576273e-08, "loss": 14.0591, "step": 486070 }, { "epoch": 0.9819123534949115, "grad_norm": 332.509521484375, "learning_rate": 2.3591273340703436e-08, "loss": 12.0447, "step": 486080 }, { "epoch": 0.9819325541275953, "grad_norm": 1333.14501953125, "learning_rate": 2.3557416640056507e-08, "loss": 16.5212, "step": 486090 }, { "epoch": 0.9819527547602791, "grad_norm": 227.93136596679688, "learning_rate": 2.3523584193986816e-08, "loss": 19.2178, "step": 486100 }, { "epoch": 0.9819729553929629, "grad_norm": 705.9186401367188, "learning_rate": 2.3489776002660337e-08, "loss": 19.5316, "step": 486110 }, { "epoch": 0.9819931560256467, "grad_norm": 301.1523742675781, "learning_rate": 2.3455992066240828e-08, "loss": 32.5375, "step": 486120 }, { "epoch": 0.9820133566583306, "grad_norm": 323.9628601074219, "learning_rate": 2.342223238489316e-08, "loss": 24.6499, "step": 486130 }, { "epoch": 0.9820335572910144, "grad_norm": 148.93313598632812, "learning_rate": 2.3388496958782203e-08, "loss": 19.5107, "step": 486140 }, { "epoch": 0.9820537579236982, "grad_norm": 247.03475952148438, "learning_rate": 2.3354785788072265e-08, "loss": 9.7356, "step": 486150 }, { "epoch": 0.982073958556382, "grad_norm": 1135.49951171875, "learning_rate": 2.3321098872927107e-08, "loss": 39.8069, "step": 486160 }, { "epoch": 0.9820941591890658, "grad_norm": 683.43603515625, "learning_rate": 2.3287436213511038e-08, "loss": 15.9695, "step": 486170 }, { "epoch": 0.9821143598217497, "grad_norm": 344.6324462890625, "learning_rate": 2.3253797809988933e-08, "loss": 15.9136, "step": 486180 }, { "epoch": 0.9821345604544335, "grad_norm": 361.53643798828125, "learning_rate": 2.3220183662523986e-08, "loss": 22.6791, "step": 486190 }, { "epoch": 0.9821547610871173, "grad_norm": 196.98980712890625, "learning_rate": 2.3186593771280518e-08, "loss": 6.6145, "step": 486200 }, { "epoch": 0.982174961719801, "grad_norm": 414.0221252441406, "learning_rate": 2.3153028136421728e-08, "loss": 18.1399, "step": 486210 }, { "epoch": 0.9821951623524848, "grad_norm": 442.22125244140625, "learning_rate": 2.3119486758111375e-08, "loss": 11.9448, "step": 486220 }, { "epoch": 0.9822153629851686, "grad_norm": 446.5096435546875, "learning_rate": 2.3085969636513217e-08, "loss": 15.7097, "step": 486230 }, { "epoch": 0.9822355636178525, "grad_norm": 0.0, "learning_rate": 2.3052476771790454e-08, "loss": 9.7731, "step": 486240 }, { "epoch": 0.9822557642505363, "grad_norm": 379.91375732421875, "learning_rate": 2.301900816410574e-08, "loss": 13.1205, "step": 486250 }, { "epoch": 0.9822759648832201, "grad_norm": 390.3697814941406, "learning_rate": 2.2985563813623378e-08, "loss": 16.514, "step": 486260 }, { "epoch": 0.9822961655159039, "grad_norm": 65.93146514892578, "learning_rate": 2.295214372050547e-08, "loss": 12.4505, "step": 486270 }, { "epoch": 0.9823163661485877, "grad_norm": 382.8099365234375, "learning_rate": 2.2918747884915216e-08, "loss": 19.6996, "step": 486280 }, { "epoch": 0.9823365667812716, "grad_norm": 591.4705200195312, "learning_rate": 2.2885376307015817e-08, "loss": 23.1165, "step": 486290 }, { "epoch": 0.9823567674139554, "grad_norm": 466.58245849609375, "learning_rate": 2.285202898696881e-08, "loss": 18.6542, "step": 486300 }, { "epoch": 0.9823769680466392, "grad_norm": 687.158935546875, "learning_rate": 2.2818705924937402e-08, "loss": 18.8722, "step": 486310 }, { "epoch": 0.982397168679323, "grad_norm": 349.258056640625, "learning_rate": 2.2785407121084236e-08, "loss": 20.9059, "step": 486320 }, { "epoch": 0.9824173693120068, "grad_norm": 295.1773376464844, "learning_rate": 2.2752132575570852e-08, "loss": 24.1587, "step": 486330 }, { "epoch": 0.9824375699446907, "grad_norm": 432.3294372558594, "learning_rate": 2.271888228856045e-08, "loss": 17.8731, "step": 486340 }, { "epoch": 0.9824577705773745, "grad_norm": 789.3005981445312, "learning_rate": 2.268565626021457e-08, "loss": 47.5746, "step": 486350 }, { "epoch": 0.9824779712100583, "grad_norm": 260.5862121582031, "learning_rate": 2.2652454490694752e-08, "loss": 19.524, "step": 486360 }, { "epoch": 0.9824981718427421, "grad_norm": 43.713470458984375, "learning_rate": 2.261927698016364e-08, "loss": 12.7472, "step": 486370 }, { "epoch": 0.982518372475426, "grad_norm": 740.96826171875, "learning_rate": 2.2586123728781663e-08, "loss": 18.64, "step": 486380 }, { "epoch": 0.9825385731081098, "grad_norm": 378.1340637207031, "learning_rate": 2.255299473671202e-08, "loss": 19.8248, "step": 486390 }, { "epoch": 0.9825587737407936, "grad_norm": 174.594970703125, "learning_rate": 2.251989000411514e-08, "loss": 25.4638, "step": 486400 }, { "epoch": 0.9825789743734774, "grad_norm": 139.85891723632812, "learning_rate": 2.2486809531152563e-08, "loss": 9.0486, "step": 486410 }, { "epoch": 0.9825991750061612, "grad_norm": 152.03848266601562, "learning_rate": 2.2453753317985272e-08, "loss": 20.0999, "step": 486420 }, { "epoch": 0.982619375638845, "grad_norm": 287.6670227050781, "learning_rate": 2.2420721364775354e-08, "loss": 25.8652, "step": 486430 }, { "epoch": 0.9826395762715289, "grad_norm": 425.5420837402344, "learning_rate": 2.2387713671682687e-08, "loss": 25.9609, "step": 486440 }, { "epoch": 0.9826597769042127, "grad_norm": 232.87928771972656, "learning_rate": 2.2354730238868804e-08, "loss": 21.0022, "step": 486450 }, { "epoch": 0.9826799775368965, "grad_norm": 308.5003662109375, "learning_rate": 2.2321771066494137e-08, "loss": 13.2866, "step": 486460 }, { "epoch": 0.9827001781695802, "grad_norm": 1615.3829345703125, "learning_rate": 2.2288836154719663e-08, "loss": 24.9597, "step": 486470 }, { "epoch": 0.982720378802264, "grad_norm": 311.37188720703125, "learning_rate": 2.2255925503705255e-08, "loss": 13.8833, "step": 486480 }, { "epoch": 0.9827405794349479, "grad_norm": 163.70361328125, "learning_rate": 2.22230391136119e-08, "loss": 8.973, "step": 486490 }, { "epoch": 0.9827607800676317, "grad_norm": 533.0966796875, "learning_rate": 2.219017698460002e-08, "loss": 19.1744, "step": 486500 }, { "epoch": 0.9827809807003155, "grad_norm": 320.56341552734375, "learning_rate": 2.215733911682949e-08, "loss": 12.5872, "step": 486510 }, { "epoch": 0.9828011813329993, "grad_norm": 284.896728515625, "learning_rate": 2.2124525510459627e-08, "loss": 13.3462, "step": 486520 }, { "epoch": 0.9828213819656831, "grad_norm": 89.13407897949219, "learning_rate": 2.2091736165651966e-08, "loss": 5.4464, "step": 486530 }, { "epoch": 0.982841582598367, "grad_norm": 368.83697509765625, "learning_rate": 2.205897108256472e-08, "loss": 29.8207, "step": 486540 }, { "epoch": 0.9828617832310508, "grad_norm": 172.1982879638672, "learning_rate": 2.202623026135886e-08, "loss": 14.112, "step": 486550 }, { "epoch": 0.9828819838637346, "grad_norm": 460.03369140625, "learning_rate": 2.1993513702193157e-08, "loss": 17.7389, "step": 486560 }, { "epoch": 0.9829021844964184, "grad_norm": 219.40406799316406, "learning_rate": 2.1960821405226928e-08, "loss": 18.4654, "step": 486570 }, { "epoch": 0.9829223851291022, "grad_norm": 308.46697998046875, "learning_rate": 2.1928153370620598e-08, "loss": 20.2735, "step": 486580 }, { "epoch": 0.9829425857617861, "grad_norm": 279.93634033203125, "learning_rate": 2.1895509598532372e-08, "loss": 21.5676, "step": 486590 }, { "epoch": 0.9829627863944699, "grad_norm": 167.79373168945312, "learning_rate": 2.1862890089121567e-08, "loss": 24.1097, "step": 486600 }, { "epoch": 0.9829829870271537, "grad_norm": 129.95025634765625, "learning_rate": 2.1830294842547506e-08, "loss": 8.3408, "step": 486610 }, { "epoch": 0.9830031876598375, "grad_norm": 51.695579528808594, "learning_rate": 2.1797723858968388e-08, "loss": 11.7549, "step": 486620 }, { "epoch": 0.9830233882925213, "grad_norm": 771.5540161132812, "learning_rate": 2.1765177138543535e-08, "loss": 21.0965, "step": 486630 }, { "epoch": 0.9830435889252052, "grad_norm": 1035.76416015625, "learning_rate": 2.173265468143171e-08, "loss": 35.0378, "step": 486640 }, { "epoch": 0.983063789557889, "grad_norm": 270.08856201171875, "learning_rate": 2.1700156487790557e-08, "loss": 13.1302, "step": 486650 }, { "epoch": 0.9830839901905728, "grad_norm": 140.2735137939453, "learning_rate": 2.1667682557779958e-08, "loss": 21.5211, "step": 486660 }, { "epoch": 0.9831041908232566, "grad_norm": 441.3387451171875, "learning_rate": 2.1635232891556446e-08, "loss": 11.5147, "step": 486670 }, { "epoch": 0.9831243914559404, "grad_norm": 725.987548828125, "learning_rate": 2.1602807489279344e-08, "loss": 14.4808, "step": 486680 }, { "epoch": 0.9831445920886243, "grad_norm": 652.3868408203125, "learning_rate": 2.1570406351106298e-08, "loss": 20.0834, "step": 486690 }, { "epoch": 0.9831647927213081, "grad_norm": 388.7825012207031, "learning_rate": 2.1538029477195522e-08, "loss": 15.2553, "step": 486700 }, { "epoch": 0.9831849933539919, "grad_norm": 134.82530212402344, "learning_rate": 2.1505676867704105e-08, "loss": 16.7642, "step": 486710 }, { "epoch": 0.9832051939866756, "grad_norm": 281.6883850097656, "learning_rate": 2.1473348522790262e-08, "loss": 14.2663, "step": 486720 }, { "epoch": 0.9832253946193594, "grad_norm": 546.5551147460938, "learning_rate": 2.1441044442611634e-08, "loss": 19.5316, "step": 486730 }, { "epoch": 0.9832455952520432, "grad_norm": 41.95939254760742, "learning_rate": 2.1408764627325883e-08, "loss": 16.1222, "step": 486740 }, { "epoch": 0.9832657958847271, "grad_norm": 326.4468994140625, "learning_rate": 2.1376509077089546e-08, "loss": 17.1573, "step": 486750 }, { "epoch": 0.9832859965174109, "grad_norm": 345.43109130859375, "learning_rate": 2.1344277792060275e-08, "loss": 12.0682, "step": 486760 }, { "epoch": 0.9833061971500947, "grad_norm": 158.94752502441406, "learning_rate": 2.1312070772395165e-08, "loss": 15.5515, "step": 486770 }, { "epoch": 0.9833263977827785, "grad_norm": 134.98707580566406, "learning_rate": 2.1279888018251317e-08, "loss": 21.1968, "step": 486780 }, { "epoch": 0.9833465984154623, "grad_norm": 548.9357299804688, "learning_rate": 2.1247729529785822e-08, "loss": 19.2239, "step": 486790 }, { "epoch": 0.9833667990481462, "grad_norm": 36.52518844604492, "learning_rate": 2.1215595307154667e-08, "loss": 16.6772, "step": 486800 }, { "epoch": 0.98338699968083, "grad_norm": 527.3377685546875, "learning_rate": 2.1183485350514397e-08, "loss": 12.0839, "step": 486810 }, { "epoch": 0.9834072003135138, "grad_norm": 447.6247863769531, "learning_rate": 2.1151399660022664e-08, "loss": 16.5998, "step": 486820 }, { "epoch": 0.9834274009461976, "grad_norm": 492.91900634765625, "learning_rate": 2.1119338235834897e-08, "loss": 33.839, "step": 486830 }, { "epoch": 0.9834476015788814, "grad_norm": 192.20407104492188, "learning_rate": 2.1087301078107637e-08, "loss": 19.5072, "step": 486840 }, { "epoch": 0.9834678022115653, "grad_norm": 317.2949523925781, "learning_rate": 2.105528818699687e-08, "loss": 15.7755, "step": 486850 }, { "epoch": 0.9834880028442491, "grad_norm": 411.9183654785156, "learning_rate": 2.1023299562658584e-08, "loss": 13.3417, "step": 486860 }, { "epoch": 0.9835082034769329, "grad_norm": 727.0975341796875, "learning_rate": 2.0991335205249318e-08, "loss": 17.7422, "step": 486870 }, { "epoch": 0.9835284041096167, "grad_norm": 776.1140747070312, "learning_rate": 2.0959395114923954e-08, "loss": 19.5978, "step": 486880 }, { "epoch": 0.9835486047423005, "grad_norm": 1540.5836181640625, "learning_rate": 2.0927479291839024e-08, "loss": 17.3088, "step": 486890 }, { "epoch": 0.9835688053749844, "grad_norm": 350.5865478515625, "learning_rate": 2.0895587736149414e-08, "loss": 18.9651, "step": 486900 }, { "epoch": 0.9835890060076682, "grad_norm": 674.6132202148438, "learning_rate": 2.0863720448011106e-08, "loss": 13.8975, "step": 486910 }, { "epoch": 0.983609206640352, "grad_norm": 659.1400146484375, "learning_rate": 2.0831877427578974e-08, "loss": 19.0174, "step": 486920 }, { "epoch": 0.9836294072730358, "grad_norm": 532.0677490234375, "learning_rate": 2.0800058675007894e-08, "loss": 26.3079, "step": 486930 }, { "epoch": 0.9836496079057196, "grad_norm": 321.0643615722656, "learning_rate": 2.076826419045386e-08, "loss": 14.3447, "step": 486940 }, { "epoch": 0.9836698085384035, "grad_norm": 227.18014526367188, "learning_rate": 2.0736493974071736e-08, "loss": 18.2144, "step": 486950 }, { "epoch": 0.9836900091710873, "grad_norm": 471.3290100097656, "learning_rate": 2.0704748026015298e-08, "loss": 12.0176, "step": 486960 }, { "epoch": 0.9837102098037711, "grad_norm": 331.65252685546875, "learning_rate": 2.0673026346440526e-08, "loss": 21.1374, "step": 486970 }, { "epoch": 0.9837304104364548, "grad_norm": 591.6358642578125, "learning_rate": 2.0641328935501748e-08, "loss": 35.3378, "step": 486980 }, { "epoch": 0.9837506110691386, "grad_norm": 253.2311248779297, "learning_rate": 2.0609655793352724e-08, "loss": 15.4262, "step": 486990 }, { "epoch": 0.9837708117018225, "grad_norm": 738.0919799804688, "learning_rate": 2.057800692014833e-08, "loss": 20.5693, "step": 487000 }, { "epoch": 0.9837910123345063, "grad_norm": 395.4708251953125, "learning_rate": 2.054638231604289e-08, "loss": 22.3301, "step": 487010 }, { "epoch": 0.9838112129671901, "grad_norm": 770.7337646484375, "learning_rate": 2.051478198119017e-08, "loss": 25.5808, "step": 487020 }, { "epoch": 0.9838314135998739, "grad_norm": 190.54969787597656, "learning_rate": 2.0483205915745042e-08, "loss": 23.1655, "step": 487030 }, { "epoch": 0.9838516142325577, "grad_norm": 382.62115478515625, "learning_rate": 2.0451654119860164e-08, "loss": 16.6641, "step": 487040 }, { "epoch": 0.9838718148652416, "grad_norm": 330.2117919921875, "learning_rate": 2.0420126593690416e-08, "loss": 9.8796, "step": 487050 }, { "epoch": 0.9838920154979254, "grad_norm": 238.4560546875, "learning_rate": 2.0388623337389003e-08, "loss": 28.3645, "step": 487060 }, { "epoch": 0.9839122161306092, "grad_norm": 222.7383270263672, "learning_rate": 2.0357144351109693e-08, "loss": 11.7348, "step": 487070 }, { "epoch": 0.983932416763293, "grad_norm": 1798.53515625, "learning_rate": 2.0325689635005142e-08, "loss": 24.3655, "step": 487080 }, { "epoch": 0.9839526173959768, "grad_norm": 191.74282836914062, "learning_rate": 2.029425918922967e-08, "loss": 15.1017, "step": 487090 }, { "epoch": 0.9839728180286607, "grad_norm": 223.9886474609375, "learning_rate": 2.026285301393538e-08, "loss": 13.2002, "step": 487100 }, { "epoch": 0.9839930186613445, "grad_norm": 115.09986114501953, "learning_rate": 2.023147110927659e-08, "loss": 12.3963, "step": 487110 }, { "epoch": 0.9840132192940283, "grad_norm": 253.5218048095703, "learning_rate": 2.020011347540596e-08, "loss": 15.6474, "step": 487120 }, { "epoch": 0.9840334199267121, "grad_norm": 283.21771240234375, "learning_rate": 2.016878011247503e-08, "loss": 22.0459, "step": 487130 }, { "epoch": 0.9840536205593959, "grad_norm": 81.14229583740234, "learning_rate": 2.013747102063812e-08, "loss": 11.7839, "step": 487140 }, { "epoch": 0.9840738211920798, "grad_norm": 506.94976806640625, "learning_rate": 2.010618620004734e-08, "loss": 18.794, "step": 487150 }, { "epoch": 0.9840940218247636, "grad_norm": 214.54312133789062, "learning_rate": 2.0074925650854226e-08, "loss": 22.0953, "step": 487160 }, { "epoch": 0.9841142224574474, "grad_norm": 90.94629669189453, "learning_rate": 2.004368937321255e-08, "loss": 14.7933, "step": 487170 }, { "epoch": 0.9841344230901312, "grad_norm": 354.2481384277344, "learning_rate": 2.0012477367273854e-08, "loss": 12.9025, "step": 487180 }, { "epoch": 0.984154623722815, "grad_norm": 485.8455810546875, "learning_rate": 1.9981289633190237e-08, "loss": 18.0831, "step": 487190 }, { "epoch": 0.9841748243554989, "grad_norm": 326.9634094238281, "learning_rate": 1.995012617111436e-08, "loss": 11.4334, "step": 487200 }, { "epoch": 0.9841950249881827, "grad_norm": 440.474853515625, "learning_rate": 1.9918986981196653e-08, "loss": 22.803, "step": 487210 }, { "epoch": 0.9842152256208665, "grad_norm": 384.60382080078125, "learning_rate": 1.988787206359033e-08, "loss": 17.9767, "step": 487220 }, { "epoch": 0.9842354262535503, "grad_norm": 531.9924926757812, "learning_rate": 1.985678141844638e-08, "loss": 17.9372, "step": 487230 }, { "epoch": 0.984255626886234, "grad_norm": 341.8916015625, "learning_rate": 1.9825715045916905e-08, "loss": 18.8524, "step": 487240 }, { "epoch": 0.9842758275189178, "grad_norm": 188.14895629882812, "learning_rate": 1.9794672946152337e-08, "loss": 19.2394, "step": 487250 }, { "epoch": 0.9842960281516017, "grad_norm": 470.08599853515625, "learning_rate": 1.9763655119304227e-08, "loss": 13.1318, "step": 487260 }, { "epoch": 0.9843162287842855, "grad_norm": 235.66885375976562, "learning_rate": 1.973266156552467e-08, "loss": 26.0032, "step": 487270 }, { "epoch": 0.9843364294169693, "grad_norm": 756.0232543945312, "learning_rate": 1.9701692284963547e-08, "loss": 20.216, "step": 487280 }, { "epoch": 0.9843566300496531, "grad_norm": 1019.5380859375, "learning_rate": 1.967074727777296e-08, "loss": 27.1592, "step": 487290 }, { "epoch": 0.984376830682337, "grad_norm": 10.018977165222168, "learning_rate": 1.963982654410279e-08, "loss": 11.0307, "step": 487300 }, { "epoch": 0.9843970313150208, "grad_norm": 255.76251220703125, "learning_rate": 1.9608930084104027e-08, "loss": 12.4662, "step": 487310 }, { "epoch": 0.9844172319477046, "grad_norm": 493.1605529785156, "learning_rate": 1.9578057897927104e-08, "loss": 20.6991, "step": 487320 }, { "epoch": 0.9844374325803884, "grad_norm": 460.5899963378906, "learning_rate": 1.9547209985723015e-08, "loss": 16.1439, "step": 487330 }, { "epoch": 0.9844576332130722, "grad_norm": 289.14593505859375, "learning_rate": 1.9516386347641636e-08, "loss": 28.3802, "step": 487340 }, { "epoch": 0.984477833845756, "grad_norm": 64.37545776367188, "learning_rate": 1.9485586983833404e-08, "loss": 21.3151, "step": 487350 }, { "epoch": 0.9844980344784399, "grad_norm": 271.99395751953125, "learning_rate": 1.94548118944482e-08, "loss": 7.3311, "step": 487360 }, { "epoch": 0.9845182351111237, "grad_norm": 325.72216796875, "learning_rate": 1.9424061079636458e-08, "loss": 18.6132, "step": 487370 }, { "epoch": 0.9845384357438075, "grad_norm": 546.6802368164062, "learning_rate": 1.9393334539547505e-08, "loss": 20.1622, "step": 487380 }, { "epoch": 0.9845586363764913, "grad_norm": 691.5068969726562, "learning_rate": 1.9362632274331215e-08, "loss": 26.9569, "step": 487390 }, { "epoch": 0.9845788370091751, "grad_norm": 243.03543090820312, "learning_rate": 1.9331954284137476e-08, "loss": 11.425, "step": 487400 }, { "epoch": 0.984599037641859, "grad_norm": 403.1186828613281, "learning_rate": 1.9301300569116165e-08, "loss": 10.5166, "step": 487410 }, { "epoch": 0.9846192382745428, "grad_norm": 403.91058349609375, "learning_rate": 1.9270671129415496e-08, "loss": 21.6073, "step": 487420 }, { "epoch": 0.9846394389072266, "grad_norm": 202.64797973632812, "learning_rate": 1.9240065965185907e-08, "loss": 13.2823, "step": 487430 }, { "epoch": 0.9846596395399104, "grad_norm": 330.4432373046875, "learning_rate": 1.9209485076576718e-08, "loss": 17.4824, "step": 487440 }, { "epoch": 0.9846798401725942, "grad_norm": 745.1810302734375, "learning_rate": 1.9178928463735593e-08, "loss": 19.5667, "step": 487450 }, { "epoch": 0.9847000408052781, "grad_norm": 0.0, "learning_rate": 1.9148396126812407e-08, "loss": 21.1232, "step": 487460 }, { "epoch": 0.9847202414379619, "grad_norm": 289.8485107421875, "learning_rate": 1.9117888065955938e-08, "loss": 8.0293, "step": 487470 }, { "epoch": 0.9847404420706457, "grad_norm": 286.2195129394531, "learning_rate": 1.908740428131495e-08, "loss": 14.2113, "step": 487480 }, { "epoch": 0.9847606427033294, "grad_norm": 32.78451156616211, "learning_rate": 1.9056944773037656e-08, "loss": 13.1252, "step": 487490 }, { "epoch": 0.9847808433360132, "grad_norm": 131.9490203857422, "learning_rate": 1.9026509541272276e-08, "loss": 11.4658, "step": 487500 }, { "epoch": 0.984801043968697, "grad_norm": 550.3273315429688, "learning_rate": 1.8996098586168132e-08, "loss": 10.4393, "step": 487510 }, { "epoch": 0.9848212446013809, "grad_norm": 242.36134338378906, "learning_rate": 1.8965711907872885e-08, "loss": 25.7361, "step": 487520 }, { "epoch": 0.9848414452340647, "grad_norm": 413.4701232910156, "learning_rate": 1.8935349506534195e-08, "loss": 24.0202, "step": 487530 }, { "epoch": 0.9848616458667485, "grad_norm": 672.5272827148438, "learning_rate": 1.890501138230083e-08, "loss": 31.6589, "step": 487540 }, { "epoch": 0.9848818464994323, "grad_norm": 291.6829833984375, "learning_rate": 1.8874697535319897e-08, "loss": 14.4903, "step": 487550 }, { "epoch": 0.9849020471321162, "grad_norm": 623.2794189453125, "learning_rate": 1.8844407965740165e-08, "loss": 18.8497, "step": 487560 }, { "epoch": 0.9849222477648, "grad_norm": 336.7108154296875, "learning_rate": 1.881414267370818e-08, "loss": 26.6136, "step": 487570 }, { "epoch": 0.9849424483974838, "grad_norm": 363.7168884277344, "learning_rate": 1.8783901659372162e-08, "loss": 12.3119, "step": 487580 }, { "epoch": 0.9849626490301676, "grad_norm": 188.1642303466797, "learning_rate": 1.875368492287921e-08, "loss": 12.5046, "step": 487590 }, { "epoch": 0.9849828496628514, "grad_norm": 541.2425537109375, "learning_rate": 1.8723492464376992e-08, "loss": 24.7602, "step": 487600 }, { "epoch": 0.9850030502955353, "grad_norm": 335.880859375, "learning_rate": 1.8693324284011495e-08, "loss": 29.85, "step": 487610 }, { "epoch": 0.9850232509282191, "grad_norm": 479.3641662597656, "learning_rate": 1.8663180381931488e-08, "loss": 18.8721, "step": 487620 }, { "epoch": 0.9850434515609029, "grad_norm": 496.9161071777344, "learning_rate": 1.8633060758282418e-08, "loss": 17.1028, "step": 487630 }, { "epoch": 0.9850636521935867, "grad_norm": 662.9967041015625, "learning_rate": 1.860296541321138e-08, "loss": 14.4051, "step": 487640 }, { "epoch": 0.9850838528262705, "grad_norm": 183.82313537597656, "learning_rate": 1.8572894346866043e-08, "loss": 6.9085, "step": 487650 }, { "epoch": 0.9851040534589544, "grad_norm": 275.2166442871094, "learning_rate": 1.854284755939184e-08, "loss": 33.5495, "step": 487660 }, { "epoch": 0.9851242540916382, "grad_norm": 2.3112215995788574, "learning_rate": 1.8512825050935323e-08, "loss": 14.4201, "step": 487670 }, { "epoch": 0.985144454724322, "grad_norm": 338.2336120605469, "learning_rate": 1.8482826821643596e-08, "loss": 35.2743, "step": 487680 }, { "epoch": 0.9851646553570058, "grad_norm": 64.32235717773438, "learning_rate": 1.8452852871662653e-08, "loss": 7.4456, "step": 487690 }, { "epoch": 0.9851848559896896, "grad_norm": 0.2854507565498352, "learning_rate": 1.842290320113793e-08, "loss": 22.3451, "step": 487700 }, { "epoch": 0.9852050566223735, "grad_norm": 331.331787109375, "learning_rate": 1.839297781021543e-08, "loss": 25.7413, "step": 487710 }, { "epoch": 0.9852252572550573, "grad_norm": 257.5212707519531, "learning_rate": 1.8363076699041695e-08, "loss": 16.15, "step": 487720 }, { "epoch": 0.9852454578877411, "grad_norm": 472.3876953125, "learning_rate": 1.8333199867762163e-08, "loss": 15.8237, "step": 487730 }, { "epoch": 0.9852656585204249, "grad_norm": 635.3434448242188, "learning_rate": 1.830334731652228e-08, "loss": 26.1093, "step": 487740 }, { "epoch": 0.9852858591531086, "grad_norm": 5.066963195800781, "learning_rate": 1.8273519045468035e-08, "loss": 11.3581, "step": 487750 }, { "epoch": 0.9853060597857924, "grad_norm": 303.07904052734375, "learning_rate": 1.8243715054744315e-08, "loss": 19.2844, "step": 487760 }, { "epoch": 0.9853262604184763, "grad_norm": 323.21795654296875, "learning_rate": 1.8213935344496002e-08, "loss": 12.7054, "step": 487770 }, { "epoch": 0.9853464610511601, "grad_norm": 187.547119140625, "learning_rate": 1.8184179914869093e-08, "loss": 10.2604, "step": 487780 }, { "epoch": 0.9853666616838439, "grad_norm": 393.1107177734375, "learning_rate": 1.815444876600847e-08, "loss": 11.2483, "step": 487790 }, { "epoch": 0.9853868623165277, "grad_norm": 156.0241241455078, "learning_rate": 1.8124741898058462e-08, "loss": 14.5017, "step": 487800 }, { "epoch": 0.9854070629492115, "grad_norm": 185.0515899658203, "learning_rate": 1.8095059311164508e-08, "loss": 16.2474, "step": 487810 }, { "epoch": 0.9854272635818954, "grad_norm": 177.43402099609375, "learning_rate": 1.8065401005470938e-08, "loss": 17.9147, "step": 487820 }, { "epoch": 0.9854474642145792, "grad_norm": 549.4453125, "learning_rate": 1.803576698112264e-08, "loss": 18.9288, "step": 487830 }, { "epoch": 0.985467664847263, "grad_norm": 317.1197509765625, "learning_rate": 1.8006157238263376e-08, "loss": 22.0972, "step": 487840 }, { "epoch": 0.9854878654799468, "grad_norm": 302.7123107910156, "learning_rate": 1.7976571777038044e-08, "loss": 16.1826, "step": 487850 }, { "epoch": 0.9855080661126306, "grad_norm": 477.897216796875, "learning_rate": 1.7947010597590408e-08, "loss": 7.3532, "step": 487860 }, { "epoch": 0.9855282667453145, "grad_norm": 425.7860412597656, "learning_rate": 1.791747370006536e-08, "loss": 21.8247, "step": 487870 }, { "epoch": 0.9855484673779983, "grad_norm": 265.6903991699219, "learning_rate": 1.7887961084605554e-08, "loss": 17.2382, "step": 487880 }, { "epoch": 0.9855686680106821, "grad_norm": 210.8507537841797, "learning_rate": 1.7858472751355883e-08, "loss": 18.1254, "step": 487890 }, { "epoch": 0.9855888686433659, "grad_norm": 302.1631774902344, "learning_rate": 1.7829008700460116e-08, "loss": 26.4176, "step": 487900 }, { "epoch": 0.9856090692760497, "grad_norm": 559.1700439453125, "learning_rate": 1.779956893206092e-08, "loss": 21.3995, "step": 487910 }, { "epoch": 0.9856292699087336, "grad_norm": 342.4388427734375, "learning_rate": 1.7770153446302618e-08, "loss": 15.9682, "step": 487920 }, { "epoch": 0.9856494705414174, "grad_norm": 166.08718872070312, "learning_rate": 1.7740762243328435e-08, "loss": 19.5306, "step": 487930 }, { "epoch": 0.9856696711741012, "grad_norm": 369.96246337890625, "learning_rate": 1.7711395323281588e-08, "loss": 17.6639, "step": 487940 }, { "epoch": 0.985689871806785, "grad_norm": 270.3794860839844, "learning_rate": 1.768205268630474e-08, "loss": 11.5303, "step": 487950 }, { "epoch": 0.9857100724394688, "grad_norm": 896.9556274414062, "learning_rate": 1.765273433254111e-08, "loss": 13.5518, "step": 487960 }, { "epoch": 0.9857302730721527, "grad_norm": 562.6268920898438, "learning_rate": 1.7623440262134472e-08, "loss": 30.144, "step": 487970 }, { "epoch": 0.9857504737048365, "grad_norm": 626.1119384765625, "learning_rate": 1.759417047522638e-08, "loss": 23.2989, "step": 487980 }, { "epoch": 0.9857706743375203, "grad_norm": 207.8027801513672, "learning_rate": 1.756492497196005e-08, "loss": 27.4454, "step": 487990 }, { "epoch": 0.985790874970204, "grad_norm": 460.8661193847656, "learning_rate": 1.753570375247815e-08, "loss": 22.146, "step": 488000 }, { "epoch": 0.9858110756028878, "grad_norm": 852.23779296875, "learning_rate": 1.7506506816923342e-08, "loss": 33.3133, "step": 488010 }, { "epoch": 0.9858312762355717, "grad_norm": 555.6287231445312, "learning_rate": 1.747733416543662e-08, "loss": 9.9734, "step": 488020 }, { "epoch": 0.9858514768682555, "grad_norm": 302.9903259277344, "learning_rate": 1.7448185798161765e-08, "loss": 25.9241, "step": 488030 }, { "epoch": 0.9858716775009393, "grad_norm": 897.1173706054688, "learning_rate": 1.741906171523977e-08, "loss": 24.4153, "step": 488040 }, { "epoch": 0.9858918781336231, "grad_norm": 478.850341796875, "learning_rate": 1.73899619168133e-08, "loss": 13.5781, "step": 488050 }, { "epoch": 0.9859120787663069, "grad_norm": 30.23406982421875, "learning_rate": 1.7360886403023358e-08, "loss": 7.5778, "step": 488060 }, { "epoch": 0.9859322793989908, "grad_norm": 202.19210815429688, "learning_rate": 1.7331835174012602e-08, "loss": 12.541, "step": 488070 }, { "epoch": 0.9859524800316746, "grad_norm": 158.72364807128906, "learning_rate": 1.7302808229921476e-08, "loss": 11.0494, "step": 488080 }, { "epoch": 0.9859726806643584, "grad_norm": 133.09844970703125, "learning_rate": 1.7273805570892643e-08, "loss": 18.3383, "step": 488090 }, { "epoch": 0.9859928812970422, "grad_norm": 340.03326416015625, "learning_rate": 1.7244827197067103e-08, "loss": 12.8224, "step": 488100 }, { "epoch": 0.986013081929726, "grad_norm": 1257.68994140625, "learning_rate": 1.7215873108585858e-08, "loss": 42.4838, "step": 488110 }, { "epoch": 0.9860332825624099, "grad_norm": 718.265869140625, "learning_rate": 1.71869433055899e-08, "loss": 18.3349, "step": 488120 }, { "epoch": 0.9860534831950937, "grad_norm": 379.7763671875, "learning_rate": 1.7158037788220782e-08, "loss": 11.872, "step": 488130 }, { "epoch": 0.9860736838277775, "grad_norm": 353.88665771484375, "learning_rate": 1.7129156556618398e-08, "loss": 25.6119, "step": 488140 }, { "epoch": 0.9860938844604613, "grad_norm": 317.9150390625, "learning_rate": 1.7100299610924297e-08, "loss": 15.0898, "step": 488150 }, { "epoch": 0.9861140850931451, "grad_norm": 453.9829406738281, "learning_rate": 1.707146695127948e-08, "loss": 13.9989, "step": 488160 }, { "epoch": 0.986134285725829, "grad_norm": 166.3394012451172, "learning_rate": 1.7042658577823833e-08, "loss": 42.1762, "step": 488170 }, { "epoch": 0.9861544863585128, "grad_norm": 283.5535583496094, "learning_rate": 1.7013874490697802e-08, "loss": 11.6909, "step": 488180 }, { "epoch": 0.9861746869911966, "grad_norm": 276.1616516113281, "learning_rate": 1.6985114690041825e-08, "loss": 21.0326, "step": 488190 }, { "epoch": 0.9861948876238804, "grad_norm": 141.51397705078125, "learning_rate": 1.6956379175995796e-08, "loss": 7.7452, "step": 488200 }, { "epoch": 0.9862150882565642, "grad_norm": 171.76841735839844, "learning_rate": 1.6927667948700155e-08, "loss": 12.0104, "step": 488210 }, { "epoch": 0.986235288889248, "grad_norm": 595.8139038085938, "learning_rate": 1.689898100829479e-08, "loss": 14.0837, "step": 488220 }, { "epoch": 0.9862554895219319, "grad_norm": 473.9314880371094, "learning_rate": 1.687031835491959e-08, "loss": 26.5983, "step": 488230 }, { "epoch": 0.9862756901546157, "grad_norm": 283.1180419921875, "learning_rate": 1.6841679988713332e-08, "loss": 12.0923, "step": 488240 }, { "epoch": 0.9862958907872995, "grad_norm": 289.6003112792969, "learning_rate": 1.681306590981702e-08, "loss": 20.0766, "step": 488250 }, { "epoch": 0.9863160914199832, "grad_norm": 386.9776306152344, "learning_rate": 1.678447611836942e-08, "loss": 20.7361, "step": 488260 }, { "epoch": 0.986336292052667, "grad_norm": 179.49264526367188, "learning_rate": 1.6755910614509872e-08, "loss": 13.2435, "step": 488270 }, { "epoch": 0.9863564926853509, "grad_norm": 273.3138427734375, "learning_rate": 1.6727369398377158e-08, "loss": 18.0023, "step": 488280 }, { "epoch": 0.9863766933180347, "grad_norm": 685.1600341796875, "learning_rate": 1.669885247011116e-08, "loss": 22.936, "step": 488290 }, { "epoch": 0.9863968939507185, "grad_norm": 612.9713745117188, "learning_rate": 1.6670359829850657e-08, "loss": 24.6449, "step": 488300 }, { "epoch": 0.9864170945834023, "grad_norm": 168.1580047607422, "learning_rate": 1.664189147773443e-08, "loss": 10.4781, "step": 488310 }, { "epoch": 0.9864372952160861, "grad_norm": 116.18118286132812, "learning_rate": 1.6613447413900696e-08, "loss": 17.8398, "step": 488320 }, { "epoch": 0.98645749584877, "grad_norm": 217.2073974609375, "learning_rate": 1.6585027638489347e-08, "loss": 16.984, "step": 488330 }, { "epoch": 0.9864776964814538, "grad_norm": 219.95492553710938, "learning_rate": 1.655663215163805e-08, "loss": 8.6126, "step": 488340 }, { "epoch": 0.9864978971141376, "grad_norm": 261.9851989746094, "learning_rate": 1.6528260953484476e-08, "loss": 16.6404, "step": 488350 }, { "epoch": 0.9865180977468214, "grad_norm": 305.74176025390625, "learning_rate": 1.6499914044168508e-08, "loss": 14.7852, "step": 488360 }, { "epoch": 0.9865382983795052, "grad_norm": 174.19142150878906, "learning_rate": 1.6471591423827817e-08, "loss": 17.0763, "step": 488370 }, { "epoch": 0.9865584990121891, "grad_norm": 526.5982666015625, "learning_rate": 1.644329309259951e-08, "loss": 23.4136, "step": 488380 }, { "epoch": 0.9865786996448729, "grad_norm": 175.1710968017578, "learning_rate": 1.6415019050622373e-08, "loss": 20.1765, "step": 488390 }, { "epoch": 0.9865989002775567, "grad_norm": 442.1764221191406, "learning_rate": 1.6386769298034067e-08, "loss": 15.7431, "step": 488400 }, { "epoch": 0.9866191009102405, "grad_norm": 491.32269287109375, "learning_rate": 1.635854383497226e-08, "loss": 10.6239, "step": 488410 }, { "epoch": 0.9866393015429243, "grad_norm": 153.86268615722656, "learning_rate": 1.6330342661574072e-08, "loss": 8.8285, "step": 488420 }, { "epoch": 0.9866595021756082, "grad_norm": 534.8939208984375, "learning_rate": 1.6302165777977718e-08, "loss": 15.201, "step": 488430 }, { "epoch": 0.986679702808292, "grad_norm": 687.9249877929688, "learning_rate": 1.6274013184319757e-08, "loss": 24.0827, "step": 488440 }, { "epoch": 0.9866999034409758, "grad_norm": 270.24505615234375, "learning_rate": 1.6245884880738415e-08, "loss": 8.0484, "step": 488450 }, { "epoch": 0.9867201040736596, "grad_norm": 31.995512008666992, "learning_rate": 1.621778086736969e-08, "loss": 15.4762, "step": 488460 }, { "epoch": 0.9867403047063434, "grad_norm": 421.179443359375, "learning_rate": 1.6189701144351254e-08, "loss": 18.3479, "step": 488470 }, { "epoch": 0.9867605053390273, "grad_norm": 912.8563842773438, "learning_rate": 1.6161645711819664e-08, "loss": 37.9865, "step": 488480 }, { "epoch": 0.9867807059717111, "grad_norm": 244.22555541992188, "learning_rate": 1.6133614569912027e-08, "loss": 16.8347, "step": 488490 }, { "epoch": 0.9868009066043949, "grad_norm": 1216.3824462890625, "learning_rate": 1.610560771876435e-08, "loss": 20.0859, "step": 488500 }, { "epoch": 0.9868211072370787, "grad_norm": 310.50262451171875, "learning_rate": 1.607762515851319e-08, "loss": 17.3168, "step": 488510 }, { "epoch": 0.9868413078697624, "grad_norm": 362.2833251953125, "learning_rate": 1.6049666889295657e-08, "loss": 21.6378, "step": 488520 }, { "epoch": 0.9868615085024462, "grad_norm": 901.0900268554688, "learning_rate": 1.6021732911247756e-08, "loss": 22.5777, "step": 488530 }, { "epoch": 0.9868817091351301, "grad_norm": 526.10791015625, "learning_rate": 1.5993823224504935e-08, "loss": 24.0116, "step": 488540 }, { "epoch": 0.9869019097678139, "grad_norm": 450.54888916015625, "learning_rate": 1.5965937829204302e-08, "loss": 17.4813, "step": 488550 }, { "epoch": 0.9869221104004977, "grad_norm": 136.32603454589844, "learning_rate": 1.5938076725480756e-08, "loss": 15.49, "step": 488560 }, { "epoch": 0.9869423110331815, "grad_norm": 276.5377197265625, "learning_rate": 1.5910239913470292e-08, "loss": 8.7842, "step": 488570 }, { "epoch": 0.9869625116658653, "grad_norm": 132.17144775390625, "learning_rate": 1.5882427393309475e-08, "loss": 12.1712, "step": 488580 }, { "epoch": 0.9869827122985492, "grad_norm": 341.4737548828125, "learning_rate": 1.585463916513319e-08, "loss": 14.214, "step": 488590 }, { "epoch": 0.987002912931233, "grad_norm": 552.6314697265625, "learning_rate": 1.582687522907633e-08, "loss": 16.6554, "step": 488600 }, { "epoch": 0.9870231135639168, "grad_norm": 547.34814453125, "learning_rate": 1.5799135585274906e-08, "loss": 21.9563, "step": 488610 }, { "epoch": 0.9870433141966006, "grad_norm": 530.8009643554688, "learning_rate": 1.5771420233864355e-08, "loss": 17.8597, "step": 488620 }, { "epoch": 0.9870635148292844, "grad_norm": 121.2981185913086, "learning_rate": 1.5743729174979016e-08, "loss": 9.4337, "step": 488630 }, { "epoch": 0.9870837154619683, "grad_norm": 421.91375732421875, "learning_rate": 1.571606240875434e-08, "loss": 25.9695, "step": 488640 }, { "epoch": 0.9871039160946521, "grad_norm": 24.188617706298828, "learning_rate": 1.5688419935325216e-08, "loss": 11.5205, "step": 488650 }, { "epoch": 0.9871241167273359, "grad_norm": 170.5167999267578, "learning_rate": 1.5660801754825983e-08, "loss": 10.8688, "step": 488660 }, { "epoch": 0.9871443173600197, "grad_norm": 225.33900451660156, "learning_rate": 1.563320786739153e-08, "loss": 13.0992, "step": 488670 }, { "epoch": 0.9871645179927035, "grad_norm": 530.8095092773438, "learning_rate": 1.56056382731562e-08, "loss": 19.3749, "step": 488680 }, { "epoch": 0.9871847186253874, "grad_norm": 53.277740478515625, "learning_rate": 1.5578092972254875e-08, "loss": 17.041, "step": 488690 }, { "epoch": 0.9872049192580712, "grad_norm": 114.4014663696289, "learning_rate": 1.5550571964820793e-08, "loss": 23.5643, "step": 488700 }, { "epoch": 0.987225119890755, "grad_norm": 106.43096160888672, "learning_rate": 1.5523075250989395e-08, "loss": 4.523, "step": 488710 }, { "epoch": 0.9872453205234388, "grad_norm": 337.0357360839844, "learning_rate": 1.5495602830893354e-08, "loss": 22.5743, "step": 488720 }, { "epoch": 0.9872655211561226, "grad_norm": 556.9434204101562, "learning_rate": 1.546815470466756e-08, "loss": 41.1725, "step": 488730 }, { "epoch": 0.9872857217888065, "grad_norm": 95.35572052001953, "learning_rate": 1.5440730872445242e-08, "loss": 26.158, "step": 488740 }, { "epoch": 0.9873059224214903, "grad_norm": 331.4527893066406, "learning_rate": 1.541333133436018e-08, "loss": 20.8953, "step": 488750 }, { "epoch": 0.9873261230541741, "grad_norm": 0.0, "learning_rate": 1.538595609054616e-08, "loss": 11.3459, "step": 488760 }, { "epoch": 0.9873463236868578, "grad_norm": 317.4156799316406, "learning_rate": 1.5358605141136407e-08, "loss": 15.1851, "step": 488770 }, { "epoch": 0.9873665243195416, "grad_norm": 145.23828125, "learning_rate": 1.5331278486264144e-08, "loss": 11.5322, "step": 488780 }, { "epoch": 0.9873867249522255, "grad_norm": 1322.247802734375, "learning_rate": 1.53039761260626e-08, "loss": 12.257, "step": 488790 }, { "epoch": 0.9874069255849093, "grad_norm": 549.3560791015625, "learning_rate": 1.5276698060665007e-08, "loss": 20.2712, "step": 488800 }, { "epoch": 0.9874271262175931, "grad_norm": 237.40028381347656, "learning_rate": 1.5249444290204584e-08, "loss": 21.5498, "step": 488810 }, { "epoch": 0.9874473268502769, "grad_norm": 361.716796875, "learning_rate": 1.5222214814812897e-08, "loss": 15.8732, "step": 488820 }, { "epoch": 0.9874675274829607, "grad_norm": 281.2061462402344, "learning_rate": 1.519500963462428e-08, "loss": 13.3269, "step": 488830 }, { "epoch": 0.9874877281156446, "grad_norm": 397.55853271484375, "learning_rate": 1.5167828749770853e-08, "loss": 19.2063, "step": 488840 }, { "epoch": 0.9875079287483284, "grad_norm": 1179.2423095703125, "learning_rate": 1.5140672160384174e-08, "loss": 27.6955, "step": 488850 }, { "epoch": 0.9875281293810122, "grad_norm": 160.7357940673828, "learning_rate": 1.511353986659747e-08, "loss": 12.1364, "step": 488860 }, { "epoch": 0.987548330013696, "grad_norm": 302.5446472167969, "learning_rate": 1.508643186854286e-08, "loss": 12.375, "step": 488870 }, { "epoch": 0.9875685306463798, "grad_norm": 17.310794830322266, "learning_rate": 1.505934816635246e-08, "loss": 22.2095, "step": 488880 }, { "epoch": 0.9875887312790637, "grad_norm": 464.3238220214844, "learning_rate": 1.503228876015783e-08, "loss": 14.6066, "step": 488890 }, { "epoch": 0.9876089319117475, "grad_norm": 376.7144775390625, "learning_rate": 1.500525365009109e-08, "loss": 24.8198, "step": 488900 }, { "epoch": 0.9876291325444313, "grad_norm": 296.1361083984375, "learning_rate": 1.4978242836284908e-08, "loss": 18.8745, "step": 488910 }, { "epoch": 0.9876493331771151, "grad_norm": 398.23638916015625, "learning_rate": 1.4951256318869733e-08, "loss": 16.8605, "step": 488920 }, { "epoch": 0.9876695338097989, "grad_norm": 227.01824951171875, "learning_rate": 1.4924294097977687e-08, "loss": 23.3466, "step": 488930 }, { "epoch": 0.9876897344424828, "grad_norm": 2.6604208946228027, "learning_rate": 1.4897356173739774e-08, "loss": 8.3296, "step": 488940 }, { "epoch": 0.9877099350751666, "grad_norm": 390.50103759765625, "learning_rate": 1.4870442546287555e-08, "loss": 10.9325, "step": 488950 }, { "epoch": 0.9877301357078504, "grad_norm": 259.9412536621094, "learning_rate": 1.4843553215752037e-08, "loss": 7.4341, "step": 488960 }, { "epoch": 0.9877503363405342, "grad_norm": 233.6974334716797, "learning_rate": 1.4816688182264782e-08, "loss": 28.2152, "step": 488970 }, { "epoch": 0.987770536973218, "grad_norm": 143.91610717773438, "learning_rate": 1.478984744595624e-08, "loss": 14.1742, "step": 488980 }, { "epoch": 0.9877907376059019, "grad_norm": 380.85211181640625, "learning_rate": 1.4763031006957417e-08, "loss": 19.1149, "step": 488990 }, { "epoch": 0.9878109382385857, "grad_norm": 2.1768627166748047, "learning_rate": 1.4736238865398766e-08, "loss": 23.0437, "step": 489000 }, { "epoch": 0.9878311388712695, "grad_norm": 395.1035461425781, "learning_rate": 1.4709471021411293e-08, "loss": 22.9502, "step": 489010 }, { "epoch": 0.9878513395039533, "grad_norm": 282.9913024902344, "learning_rate": 1.4682727475124891e-08, "loss": 8.5003, "step": 489020 }, { "epoch": 0.987871540136637, "grad_norm": 554.9068603515625, "learning_rate": 1.4656008226670571e-08, "loss": 17.3521, "step": 489030 }, { "epoch": 0.9878917407693208, "grad_norm": 899.536865234375, "learning_rate": 1.462931327617767e-08, "loss": 16.8683, "step": 489040 }, { "epoch": 0.9879119414020047, "grad_norm": 233.77371215820312, "learning_rate": 1.4602642623777752e-08, "loss": 15.6234, "step": 489050 }, { "epoch": 0.9879321420346885, "grad_norm": 900.2330932617188, "learning_rate": 1.4575996269599046e-08, "loss": 23.9059, "step": 489060 }, { "epoch": 0.9879523426673723, "grad_norm": 463.00982666015625, "learning_rate": 1.454937421377256e-08, "loss": 24.6737, "step": 489070 }, { "epoch": 0.9879725433000561, "grad_norm": 685.919921875, "learning_rate": 1.4522776456427635e-08, "loss": 13.0379, "step": 489080 }, { "epoch": 0.98799274393274, "grad_norm": 338.71246337890625, "learning_rate": 1.4496202997694164e-08, "loss": 11.774, "step": 489090 }, { "epoch": 0.9880129445654238, "grad_norm": 429.50885009765625, "learning_rate": 1.4469653837701491e-08, "loss": 19.8358, "step": 489100 }, { "epoch": 0.9880331451981076, "grad_norm": 115.12040710449219, "learning_rate": 1.4443128976579513e-08, "loss": 10.3165, "step": 489110 }, { "epoch": 0.9880533458307914, "grad_norm": 182.71620178222656, "learning_rate": 1.4416628414456457e-08, "loss": 17.5581, "step": 489120 }, { "epoch": 0.9880735464634752, "grad_norm": 444.47491455078125, "learning_rate": 1.4390152151462222e-08, "loss": 13.104, "step": 489130 }, { "epoch": 0.988093747096159, "grad_norm": 201.85137939453125, "learning_rate": 1.4363700187725593e-08, "loss": 6.3272, "step": 489140 }, { "epoch": 0.9881139477288429, "grad_norm": 151.43687438964844, "learning_rate": 1.4337272523375911e-08, "loss": 12.1407, "step": 489150 }, { "epoch": 0.9881341483615267, "grad_norm": 283.36285400390625, "learning_rate": 1.4310869158541408e-08, "loss": 12.8991, "step": 489160 }, { "epoch": 0.9881543489942105, "grad_norm": 352.98529052734375, "learning_rate": 1.4284490093351421e-08, "loss": 19.3828, "step": 489170 }, { "epoch": 0.9881745496268943, "grad_norm": 564.781005859375, "learning_rate": 1.425813532793363e-08, "loss": 10.2488, "step": 489180 }, { "epoch": 0.9881947502595781, "grad_norm": 437.80712890625, "learning_rate": 1.4231804862417375e-08, "loss": 16.5281, "step": 489190 }, { "epoch": 0.988214950892262, "grad_norm": 422.3405456542969, "learning_rate": 1.4205498696930332e-08, "loss": 11.1556, "step": 489200 }, { "epoch": 0.9882351515249458, "grad_norm": 362.02130126953125, "learning_rate": 1.4179216831601284e-08, "loss": 17.0503, "step": 489210 }, { "epoch": 0.9882553521576296, "grad_norm": 266.0114440917969, "learning_rate": 1.4152959266557354e-08, "loss": 9.8067, "step": 489220 }, { "epoch": 0.9882755527903134, "grad_norm": 357.19866943359375, "learning_rate": 1.4126726001927882e-08, "loss": 13.612, "step": 489230 }, { "epoch": 0.9882957534229972, "grad_norm": 479.8896484375, "learning_rate": 1.4100517037839989e-08, "loss": 19.1753, "step": 489240 }, { "epoch": 0.9883159540556811, "grad_norm": 44.63584518432617, "learning_rate": 1.4074332374421351e-08, "loss": 14.044, "step": 489250 }, { "epoch": 0.9883361546883649, "grad_norm": 516.9661865234375, "learning_rate": 1.4048172011799643e-08, "loss": 23.0173, "step": 489260 }, { "epoch": 0.9883563553210487, "grad_norm": 655.0848999023438, "learning_rate": 1.4022035950102541e-08, "loss": 15.7647, "step": 489270 }, { "epoch": 0.9883765559537324, "grad_norm": 5.948569297790527, "learning_rate": 1.3995924189457167e-08, "loss": 14.569, "step": 489280 }, { "epoch": 0.9883967565864162, "grad_norm": 198.19515991210938, "learning_rate": 1.3969836729990637e-08, "loss": 12.5366, "step": 489290 }, { "epoch": 0.9884169572191001, "grad_norm": 334.4207458496094, "learning_rate": 1.3943773571831188e-08, "loss": 5.7287, "step": 489300 }, { "epoch": 0.9884371578517839, "grad_norm": 279.3717956542969, "learning_rate": 1.3917734715104269e-08, "loss": 31.223, "step": 489310 }, { "epoch": 0.9884573584844677, "grad_norm": 610.1446533203125, "learning_rate": 1.3891720159938116e-08, "loss": 22.999, "step": 489320 }, { "epoch": 0.9884775591171515, "grad_norm": 296.9520263671875, "learning_rate": 1.3865729906458735e-08, "loss": 15.9857, "step": 489330 }, { "epoch": 0.9884977597498353, "grad_norm": 521.7493896484375, "learning_rate": 1.3839763954792695e-08, "loss": 25.9349, "step": 489340 }, { "epoch": 0.9885179603825192, "grad_norm": 238.2027587890625, "learning_rate": 1.3813822305067115e-08, "loss": 18.3275, "step": 489350 }, { "epoch": 0.988538161015203, "grad_norm": 464.864501953125, "learning_rate": 1.378790495740856e-08, "loss": 18.1847, "step": 489360 }, { "epoch": 0.9885583616478868, "grad_norm": 605.761474609375, "learning_rate": 1.376201191194304e-08, "loss": 16.1985, "step": 489370 }, { "epoch": 0.9885785622805706, "grad_norm": 100.96516418457031, "learning_rate": 1.3736143168796012e-08, "loss": 15.8867, "step": 489380 }, { "epoch": 0.9885987629132544, "grad_norm": 233.3070831298828, "learning_rate": 1.371029872809515e-08, "loss": 19.417, "step": 489390 }, { "epoch": 0.9886189635459383, "grad_norm": 244.88894653320312, "learning_rate": 1.3684478589964801e-08, "loss": 15.7179, "step": 489400 }, { "epoch": 0.9886391641786221, "grad_norm": 681.5443115234375, "learning_rate": 1.3658682754532082e-08, "loss": 28.7913, "step": 489410 }, { "epoch": 0.9886593648113059, "grad_norm": 0.24459843337535858, "learning_rate": 1.3632911221921896e-08, "loss": 14.4319, "step": 489420 }, { "epoch": 0.9886795654439897, "grad_norm": 497.12847900390625, "learning_rate": 1.3607163992259697e-08, "loss": 11.9154, "step": 489430 }, { "epoch": 0.9886997660766735, "grad_norm": 697.6969604492188, "learning_rate": 1.3581441065672052e-08, "loss": 19.8152, "step": 489440 }, { "epoch": 0.9887199667093574, "grad_norm": 733.26708984375, "learning_rate": 1.355574244228386e-08, "loss": 22.7065, "step": 489450 }, { "epoch": 0.9887401673420412, "grad_norm": 430.14019775390625, "learning_rate": 1.3530068122219464e-08, "loss": 34.7864, "step": 489460 }, { "epoch": 0.988760367974725, "grad_norm": 565.5632934570312, "learning_rate": 1.3504418105604877e-08, "loss": 17.5237, "step": 489470 }, { "epoch": 0.9887805686074088, "grad_norm": 515.758544921875, "learning_rate": 1.3478792392565553e-08, "loss": 20.4981, "step": 489480 }, { "epoch": 0.9888007692400926, "grad_norm": 424.0845947265625, "learning_rate": 1.3453190983225285e-08, "loss": 14.8029, "step": 489490 }, { "epoch": 0.9888209698727765, "grad_norm": 356.08123779296875, "learning_rate": 1.3427613877709523e-08, "loss": 22.7797, "step": 489500 }, { "epoch": 0.9888411705054603, "grad_norm": 481.2988586425781, "learning_rate": 1.3402061076142613e-08, "loss": 18.4806, "step": 489510 }, { "epoch": 0.9888613711381441, "grad_norm": 163.1424560546875, "learning_rate": 1.3376532578649459e-08, "loss": 15.1123, "step": 489520 }, { "epoch": 0.9888815717708279, "grad_norm": 243.24937438964844, "learning_rate": 1.3351028385354402e-08, "loss": 13.5655, "step": 489530 }, { "epoch": 0.9889017724035116, "grad_norm": 319.78265380859375, "learning_rate": 1.3325548496381235e-08, "loss": 18.1097, "step": 489540 }, { "epoch": 0.9889219730361954, "grad_norm": 457.2274475097656, "learning_rate": 1.3300092911854856e-08, "loss": 11.7467, "step": 489550 }, { "epoch": 0.9889421736688793, "grad_norm": 92.75702667236328, "learning_rate": 1.3274661631899055e-08, "loss": 29.7643, "step": 489560 }, { "epoch": 0.9889623743015631, "grad_norm": 312.5694885253906, "learning_rate": 1.3249254656637622e-08, "loss": 10.471, "step": 489570 }, { "epoch": 0.9889825749342469, "grad_norm": 290.5263671875, "learning_rate": 1.3223871986194348e-08, "loss": 27.3382, "step": 489580 }, { "epoch": 0.9890027755669307, "grad_norm": 342.47674560546875, "learning_rate": 1.3198513620693022e-08, "loss": 37.4274, "step": 489590 }, { "epoch": 0.9890229761996145, "grad_norm": 383.2030029296875, "learning_rate": 1.3173179560257432e-08, "loss": 11.5588, "step": 489600 }, { "epoch": 0.9890431768322984, "grad_norm": 267.69140625, "learning_rate": 1.314786980501137e-08, "loss": 7.4811, "step": 489610 }, { "epoch": 0.9890633774649822, "grad_norm": 234.81167602539062, "learning_rate": 1.3122584355076962e-08, "loss": 12.6162, "step": 489620 }, { "epoch": 0.989083578097666, "grad_norm": 293.77703857421875, "learning_rate": 1.3097323210579104e-08, "loss": 19.0022, "step": 489630 }, { "epoch": 0.9891037787303498, "grad_norm": 183.48153686523438, "learning_rate": 1.307208637163937e-08, "loss": 19.9693, "step": 489640 }, { "epoch": 0.9891239793630336, "grad_norm": 7.569753170013428, "learning_rate": 1.3046873838381546e-08, "loss": 11.5113, "step": 489650 }, { "epoch": 0.9891441799957175, "grad_norm": 1021.123779296875, "learning_rate": 1.3021685610928869e-08, "loss": 27.3139, "step": 489660 }, { "epoch": 0.9891643806284013, "grad_norm": 362.0072021484375, "learning_rate": 1.2996521689403463e-08, "loss": 24.0982, "step": 489670 }, { "epoch": 0.9891845812610851, "grad_norm": 657.249267578125, "learning_rate": 1.2971382073928007e-08, "loss": 15.8356, "step": 489680 }, { "epoch": 0.9892047818937689, "grad_norm": 462.3509521484375, "learning_rate": 1.2946266764625182e-08, "loss": 14.8603, "step": 489690 }, { "epoch": 0.9892249825264527, "grad_norm": 413.03753662109375, "learning_rate": 1.292117576161711e-08, "loss": 33.4044, "step": 489700 }, { "epoch": 0.9892451831591366, "grad_norm": 317.8731384277344, "learning_rate": 1.2896109065027029e-08, "loss": 5.5308, "step": 489710 }, { "epoch": 0.9892653837918204, "grad_norm": 156.2231903076172, "learning_rate": 1.2871066674975951e-08, "loss": 13.7939, "step": 489720 }, { "epoch": 0.9892855844245042, "grad_norm": 251.47769165039062, "learning_rate": 1.2846048591586558e-08, "loss": 30.8586, "step": 489730 }, { "epoch": 0.989305785057188, "grad_norm": 72.52568817138672, "learning_rate": 1.2821054814980971e-08, "loss": 10.4397, "step": 489740 }, { "epoch": 0.9893259856898718, "grad_norm": 487.9237365722656, "learning_rate": 1.2796085345280207e-08, "loss": 8.9422, "step": 489750 }, { "epoch": 0.9893461863225557, "grad_norm": 412.9870910644531, "learning_rate": 1.277114018260639e-08, "loss": 34.0147, "step": 489760 }, { "epoch": 0.9893663869552395, "grad_norm": 448.53582763671875, "learning_rate": 1.2746219327081644e-08, "loss": 7.8732, "step": 489770 }, { "epoch": 0.9893865875879233, "grad_norm": 363.7297668457031, "learning_rate": 1.2721322778826983e-08, "loss": 6.6716, "step": 489780 }, { "epoch": 0.989406788220607, "grad_norm": 118.27398681640625, "learning_rate": 1.2696450537963422e-08, "loss": 19.0261, "step": 489790 }, { "epoch": 0.9894269888532908, "grad_norm": 408.3044128417969, "learning_rate": 1.2671602604612531e-08, "loss": 26.6716, "step": 489800 }, { "epoch": 0.9894471894859747, "grad_norm": 380.0968322753906, "learning_rate": 1.2646778978895325e-08, "loss": 21.3715, "step": 489810 }, { "epoch": 0.9894673901186585, "grad_norm": 324.5020446777344, "learning_rate": 1.2621979660932814e-08, "loss": 30.9385, "step": 489820 }, { "epoch": 0.9894875907513423, "grad_norm": 225.742431640625, "learning_rate": 1.2597204650845463e-08, "loss": 8.4652, "step": 489830 }, { "epoch": 0.9895077913840261, "grad_norm": 376.7015686035156, "learning_rate": 1.2572453948755393e-08, "loss": 18.7972, "step": 489840 }, { "epoch": 0.9895279920167099, "grad_norm": 23.484270095825195, "learning_rate": 1.2547727554781398e-08, "loss": 18.036, "step": 489850 }, { "epoch": 0.9895481926493938, "grad_norm": 273.4000549316406, "learning_rate": 1.2523025469045047e-08, "loss": 14.1293, "step": 489860 }, { "epoch": 0.9895683932820776, "grad_norm": 452.1612243652344, "learning_rate": 1.2498347691666801e-08, "loss": 11.7554, "step": 489870 }, { "epoch": 0.9895885939147614, "grad_norm": 178.48948669433594, "learning_rate": 1.2473694222766563e-08, "loss": 14.9273, "step": 489880 }, { "epoch": 0.9896087945474452, "grad_norm": 423.0238342285156, "learning_rate": 1.2449065062464794e-08, "loss": 14.2976, "step": 489890 }, { "epoch": 0.989628995180129, "grad_norm": 938.166015625, "learning_rate": 1.2424460210881394e-08, "loss": 26.3438, "step": 489900 }, { "epoch": 0.9896491958128129, "grad_norm": 0.0, "learning_rate": 1.2399879668136271e-08, "loss": 15.9938, "step": 489910 }, { "epoch": 0.9896693964454967, "grad_norm": 258.1879577636719, "learning_rate": 1.2375323434348773e-08, "loss": 22.4046, "step": 489920 }, { "epoch": 0.9896895970781805, "grad_norm": 267.1070556640625, "learning_rate": 1.235079150963936e-08, "loss": 15.2209, "step": 489930 }, { "epoch": 0.9897097977108643, "grad_norm": 313.09881591796875, "learning_rate": 1.2326283894127378e-08, "loss": 34.5608, "step": 489940 }, { "epoch": 0.9897299983435481, "grad_norm": 358.4794616699219, "learning_rate": 1.2301800587932179e-08, "loss": 32.3783, "step": 489950 }, { "epoch": 0.989750198976232, "grad_norm": 320.7735595703125, "learning_rate": 1.2277341591172553e-08, "loss": 19.7975, "step": 489960 }, { "epoch": 0.9897703996089158, "grad_norm": 479.1424255371094, "learning_rate": 1.225290690396841e-08, "loss": 14.985, "step": 489970 }, { "epoch": 0.9897906002415996, "grad_norm": 0.9020110368728638, "learning_rate": 1.2228496526439093e-08, "loss": 14.1797, "step": 489980 }, { "epoch": 0.9898108008742834, "grad_norm": 21.70526123046875, "learning_rate": 1.2204110458702844e-08, "loss": 10.3022, "step": 489990 }, { "epoch": 0.9898310015069672, "grad_norm": 359.36590576171875, "learning_rate": 1.2179748700879013e-08, "loss": 20.8642, "step": 490000 }, { "epoch": 0.989851202139651, "grad_norm": 230.55552673339844, "learning_rate": 1.2155411253085835e-08, "loss": 12.0643, "step": 490010 }, { "epoch": 0.9898714027723349, "grad_norm": 252.59933471679688, "learning_rate": 1.2131098115442108e-08, "loss": 21.6534, "step": 490020 }, { "epoch": 0.9898916034050187, "grad_norm": 496.96124267578125, "learning_rate": 1.2106809288067178e-08, "loss": 19.3856, "step": 490030 }, { "epoch": 0.9899118040377025, "grad_norm": 88.76834869384766, "learning_rate": 1.208254477107762e-08, "loss": 12.8934, "step": 490040 }, { "epoch": 0.9899320046703862, "grad_norm": 15.983965873718262, "learning_rate": 1.2058304564593893e-08, "loss": 22.7476, "step": 490050 }, { "epoch": 0.98995220530307, "grad_norm": 306.596435546875, "learning_rate": 1.2034088668732568e-08, "loss": 16.6889, "step": 490060 }, { "epoch": 0.9899724059357539, "grad_norm": 292.4953918457031, "learning_rate": 1.2009897083611888e-08, "loss": 13.4741, "step": 490070 }, { "epoch": 0.9899926065684377, "grad_norm": 35.3122673034668, "learning_rate": 1.1985729809350088e-08, "loss": 8.3754, "step": 490080 }, { "epoch": 0.9900128072011215, "grad_norm": 322.98577880859375, "learning_rate": 1.1961586846064855e-08, "loss": 15.9051, "step": 490090 }, { "epoch": 0.9900330078338053, "grad_norm": 405.03387451171875, "learning_rate": 1.1937468193873869e-08, "loss": 14.0023, "step": 490100 }, { "epoch": 0.9900532084664891, "grad_norm": 319.7568359375, "learning_rate": 1.1913373852894816e-08, "loss": 19.7235, "step": 490110 }, { "epoch": 0.990073409099173, "grad_norm": 1.4424026012420654, "learning_rate": 1.1889303823244825e-08, "loss": 13.2511, "step": 490120 }, { "epoch": 0.9900936097318568, "grad_norm": 119.40985107421875, "learning_rate": 1.1865258105041577e-08, "loss": 14.8159, "step": 490130 }, { "epoch": 0.9901138103645406, "grad_norm": 676.8298950195312, "learning_rate": 1.1841236698402202e-08, "loss": 30.2779, "step": 490140 }, { "epoch": 0.9901340109972244, "grad_norm": 87.88396453857422, "learning_rate": 1.1817239603443276e-08, "loss": 21.4629, "step": 490150 }, { "epoch": 0.9901542116299082, "grad_norm": 259.3276672363281, "learning_rate": 1.1793266820282478e-08, "loss": 10.1666, "step": 490160 }, { "epoch": 0.9901744122625921, "grad_norm": 509.7081298828125, "learning_rate": 1.1769318349036385e-08, "loss": 20.826, "step": 490170 }, { "epoch": 0.9901946128952759, "grad_norm": 166.15875244140625, "learning_rate": 1.1745394189821013e-08, "loss": 9.8491, "step": 490180 }, { "epoch": 0.9902148135279597, "grad_norm": 406.0191650390625, "learning_rate": 1.1721494342754048e-08, "loss": 9.9096, "step": 490190 }, { "epoch": 0.9902350141606435, "grad_norm": 195.5973358154297, "learning_rate": 1.1697618807951504e-08, "loss": 10.4411, "step": 490200 }, { "epoch": 0.9902552147933273, "grad_norm": 97.89176177978516, "learning_rate": 1.1673767585529404e-08, "loss": 11.0284, "step": 490210 }, { "epoch": 0.9902754154260112, "grad_norm": 1873.49365234375, "learning_rate": 1.1649940675604876e-08, "loss": 25.0696, "step": 490220 }, { "epoch": 0.990295616058695, "grad_norm": 467.4261779785156, "learning_rate": 1.1626138078293381e-08, "loss": 14.5886, "step": 490230 }, { "epoch": 0.9903158166913788, "grad_norm": 507.177734375, "learning_rate": 1.1602359793710938e-08, "loss": 23.0854, "step": 490240 }, { "epoch": 0.9903360173240626, "grad_norm": 327.03594970703125, "learning_rate": 1.1578605821973566e-08, "loss": 17.8433, "step": 490250 }, { "epoch": 0.9903562179567464, "grad_norm": 259.65167236328125, "learning_rate": 1.1554876163197282e-08, "loss": 12.3252, "step": 490260 }, { "epoch": 0.9903764185894303, "grad_norm": 600.3280029296875, "learning_rate": 1.1531170817496995e-08, "loss": 13.0561, "step": 490270 }, { "epoch": 0.9903966192221141, "grad_norm": 763.4371337890625, "learning_rate": 1.1507489784989278e-08, "loss": 14.3357, "step": 490280 }, { "epoch": 0.9904168198547979, "grad_norm": 28.504867553710938, "learning_rate": 1.1483833065789041e-08, "loss": 14.0311, "step": 490290 }, { "epoch": 0.9904370204874817, "grad_norm": 524.5272827148438, "learning_rate": 1.146020066001119e-08, "loss": 14.7662, "step": 490300 }, { "epoch": 0.9904572211201654, "grad_norm": 280.05999755859375, "learning_rate": 1.1436592567771188e-08, "loss": 20.0143, "step": 490310 }, { "epoch": 0.9904774217528493, "grad_norm": 394.6064147949219, "learning_rate": 1.1413008789184498e-08, "loss": 18.4672, "step": 490320 }, { "epoch": 0.9904976223855331, "grad_norm": 38.09982681274414, "learning_rate": 1.1389449324365476e-08, "loss": 15.9906, "step": 490330 }, { "epoch": 0.9905178230182169, "grad_norm": 122.05206298828125, "learning_rate": 1.1365914173429582e-08, "loss": 5.9164, "step": 490340 }, { "epoch": 0.9905380236509007, "grad_norm": 219.7208709716797, "learning_rate": 1.134240333649117e-08, "loss": 18.8033, "step": 490350 }, { "epoch": 0.9905582242835845, "grad_norm": 952.7105712890625, "learning_rate": 1.1318916813664594e-08, "loss": 21.4837, "step": 490360 }, { "epoch": 0.9905784249162684, "grad_norm": 0.0, "learning_rate": 1.129545460506476e-08, "loss": 24.7407, "step": 490370 }, { "epoch": 0.9905986255489522, "grad_norm": 585.8445434570312, "learning_rate": 1.1272016710806021e-08, "loss": 27.2635, "step": 490380 }, { "epoch": 0.990618826181636, "grad_norm": 704.29345703125, "learning_rate": 1.1248603131002178e-08, "loss": 48.7618, "step": 490390 }, { "epoch": 0.9906390268143198, "grad_norm": 1576.3946533203125, "learning_rate": 1.1225213865767026e-08, "loss": 40.8802, "step": 490400 }, { "epoch": 0.9906592274470036, "grad_norm": 373.20452880859375, "learning_rate": 1.1201848915216029e-08, "loss": 14.5402, "step": 490410 }, { "epoch": 0.9906794280796875, "grad_norm": 277.86138916015625, "learning_rate": 1.1178508279461875e-08, "loss": 26.6005, "step": 490420 }, { "epoch": 0.9906996287123713, "grad_norm": 628.8119506835938, "learning_rate": 1.115519195861836e-08, "loss": 22.6955, "step": 490430 }, { "epoch": 0.9907198293450551, "grad_norm": 659.1356201171875, "learning_rate": 1.1131899952799285e-08, "loss": 26.2416, "step": 490440 }, { "epoch": 0.9907400299777389, "grad_norm": 454.5382995605469, "learning_rate": 1.1108632262118446e-08, "loss": 27.2168, "step": 490450 }, { "epoch": 0.9907602306104227, "grad_norm": 0.0, "learning_rate": 1.1085388886689085e-08, "loss": 13.5798, "step": 490460 }, { "epoch": 0.9907804312431066, "grad_norm": 494.09881591796875, "learning_rate": 1.1062169826624447e-08, "loss": 23.5275, "step": 490470 }, { "epoch": 0.9908006318757904, "grad_norm": 211.5029296875, "learning_rate": 1.1038975082037772e-08, "loss": 8.1438, "step": 490480 }, { "epoch": 0.9908208325084742, "grad_norm": 463.34075927734375, "learning_rate": 1.101580465304175e-08, "loss": 17.0053, "step": 490490 }, { "epoch": 0.990841033141158, "grad_norm": 733.0955810546875, "learning_rate": 1.0992658539750179e-08, "loss": 28.9807, "step": 490500 }, { "epoch": 0.9908612337738418, "grad_norm": 309.8749084472656, "learning_rate": 1.0969536742274633e-08, "loss": 21.6901, "step": 490510 }, { "epoch": 0.9908814344065257, "grad_norm": 7.222733020782471, "learning_rate": 1.0946439260728914e-08, "loss": 8.0209, "step": 490520 }, { "epoch": 0.9909016350392095, "grad_norm": 118.79547882080078, "learning_rate": 1.0923366095225152e-08, "loss": 19.2935, "step": 490530 }, { "epoch": 0.9909218356718933, "grad_norm": 16.88963508605957, "learning_rate": 1.090031724587548e-08, "loss": 7.4517, "step": 490540 }, { "epoch": 0.9909420363045771, "grad_norm": 375.42877197265625, "learning_rate": 1.0877292712792586e-08, "loss": 16.8823, "step": 490550 }, { "epoch": 0.9909622369372608, "grad_norm": 546.7255859375, "learning_rate": 1.0854292496089158e-08, "loss": 18.4394, "step": 490560 }, { "epoch": 0.9909824375699446, "grad_norm": 4.430992126464844, "learning_rate": 1.0831316595876218e-08, "loss": 13.5275, "step": 490570 }, { "epoch": 0.9910026382026285, "grad_norm": 289.63616943359375, "learning_rate": 1.0808365012266454e-08, "loss": 14.6157, "step": 490580 }, { "epoch": 0.9910228388353123, "grad_norm": 147.71971130371094, "learning_rate": 1.0785437745371996e-08, "loss": 12.5525, "step": 490590 }, { "epoch": 0.9910430394679961, "grad_norm": 162.67674255371094, "learning_rate": 1.076253479530387e-08, "loss": 12.0218, "step": 490600 }, { "epoch": 0.9910632401006799, "grad_norm": 108.62178039550781, "learning_rate": 1.0739656162174205e-08, "loss": 16.5417, "step": 490610 }, { "epoch": 0.9910834407333637, "grad_norm": 781.3118286132812, "learning_rate": 1.0716801846094026e-08, "loss": 30.3789, "step": 490620 }, { "epoch": 0.9911036413660476, "grad_norm": 172.30718994140625, "learning_rate": 1.0693971847175466e-08, "loss": 23.6568, "step": 490630 }, { "epoch": 0.9911238419987314, "grad_norm": 730.6552124023438, "learning_rate": 1.067116616552899e-08, "loss": 20.7877, "step": 490640 }, { "epoch": 0.9911440426314152, "grad_norm": 232.4882354736328, "learning_rate": 1.0648384801266176e-08, "loss": 8.2937, "step": 490650 }, { "epoch": 0.991164243264099, "grad_norm": 0.0, "learning_rate": 1.0625627754498048e-08, "loss": 10.4492, "step": 490660 }, { "epoch": 0.9911844438967828, "grad_norm": 841.0191040039062, "learning_rate": 1.0602895025335624e-08, "loss": 15.407, "step": 490670 }, { "epoch": 0.9912046445294667, "grad_norm": 399.9212341308594, "learning_rate": 1.0580186613888822e-08, "loss": 14.38, "step": 490680 }, { "epoch": 0.9912248451621505, "grad_norm": 776.1848754882812, "learning_rate": 1.055750252026977e-08, "loss": 33.8655, "step": 490690 }, { "epoch": 0.9912450457948343, "grad_norm": 385.7315979003906, "learning_rate": 1.0534842744588381e-08, "loss": 29.3024, "step": 490700 }, { "epoch": 0.9912652464275181, "grad_norm": 324.3099060058594, "learning_rate": 1.0512207286954568e-08, "loss": 23.4997, "step": 490710 }, { "epoch": 0.9912854470602019, "grad_norm": 486.59674072265625, "learning_rate": 1.0489596147479353e-08, "loss": 24.3618, "step": 490720 }, { "epoch": 0.9913056476928858, "grad_norm": 459.3517761230469, "learning_rate": 1.0467009326272648e-08, "loss": 15.0182, "step": 490730 }, { "epoch": 0.9913258483255696, "grad_norm": 488.06982421875, "learning_rate": 1.044444682344492e-08, "loss": 16.9264, "step": 490740 }, { "epoch": 0.9913460489582534, "grad_norm": 2.73164701461792, "learning_rate": 1.0421908639104971e-08, "loss": 31.6905, "step": 490750 }, { "epoch": 0.9913662495909372, "grad_norm": 74.6050796508789, "learning_rate": 1.039939477336438e-08, "loss": 9.8221, "step": 490760 }, { "epoch": 0.991386450223621, "grad_norm": 743.1727294921875, "learning_rate": 1.0376905226331391e-08, "loss": 27.881, "step": 490770 }, { "epoch": 0.9914066508563049, "grad_norm": 181.60882568359375, "learning_rate": 1.0354439998116473e-08, "loss": 13.2241, "step": 490780 }, { "epoch": 0.9914268514889887, "grad_norm": 359.5448913574219, "learning_rate": 1.0331999088828425e-08, "loss": 25.4082, "step": 490790 }, { "epoch": 0.9914470521216725, "grad_norm": 87.32952880859375, "learning_rate": 1.030958249857772e-08, "loss": 16.0133, "step": 490800 }, { "epoch": 0.9914672527543563, "grad_norm": 140.4916229248047, "learning_rate": 1.02871902274726e-08, "loss": 19.8484, "step": 490810 }, { "epoch": 0.99148745338704, "grad_norm": 243.94195556640625, "learning_rate": 1.026482227562242e-08, "loss": 16.5282, "step": 490820 }, { "epoch": 0.9915076540197238, "grad_norm": 245.15191650390625, "learning_rate": 1.0242478643136545e-08, "loss": 23.5337, "step": 490830 }, { "epoch": 0.9915278546524077, "grad_norm": 496.2458801269531, "learning_rate": 1.0220159330123214e-08, "loss": 16.6147, "step": 490840 }, { "epoch": 0.9915480552850915, "grad_norm": 665.3242797851562, "learning_rate": 1.0197864336691788e-08, "loss": 22.6857, "step": 490850 }, { "epoch": 0.9915682559177753, "grad_norm": 360.8750915527344, "learning_rate": 1.0175593662951066e-08, "loss": 18.3113, "step": 490860 }, { "epoch": 0.9915884565504591, "grad_norm": 198.45236206054688, "learning_rate": 1.0153347309009299e-08, "loss": 12.9173, "step": 490870 }, { "epoch": 0.991608657183143, "grad_norm": 1000.5813598632812, "learning_rate": 1.013112527497473e-08, "loss": 22.6931, "step": 490880 }, { "epoch": 0.9916288578158268, "grad_norm": 198.7476806640625, "learning_rate": 1.0108927560955606e-08, "loss": 13.5263, "step": 490890 }, { "epoch": 0.9916490584485106, "grad_norm": 586.5575561523438, "learning_rate": 1.008675416706073e-08, "loss": 18.2384, "step": 490900 }, { "epoch": 0.9916692590811944, "grad_norm": 230.30209350585938, "learning_rate": 1.0064605093397794e-08, "loss": 20.9739, "step": 490910 }, { "epoch": 0.9916894597138782, "grad_norm": 1089.9083251953125, "learning_rate": 1.0042480340075045e-08, "loss": 22.8103, "step": 490920 }, { "epoch": 0.991709660346562, "grad_norm": 1334.685791015625, "learning_rate": 1.0020379907199618e-08, "loss": 28.0976, "step": 490930 }, { "epoch": 0.9917298609792459, "grad_norm": 217.3130340576172, "learning_rate": 9.99830379487976e-09, "loss": 20.0891, "step": 490940 }, { "epoch": 0.9917500616119297, "grad_norm": 727.6640014648438, "learning_rate": 9.976252003223164e-09, "loss": 32.0702, "step": 490950 }, { "epoch": 0.9917702622446135, "grad_norm": 117.57544708251953, "learning_rate": 9.954224532336965e-09, "loss": 10.9159, "step": 490960 }, { "epoch": 0.9917904628772973, "grad_norm": 627.7258911132812, "learning_rate": 9.932221382328299e-09, "loss": 12.5682, "step": 490970 }, { "epoch": 0.9918106635099811, "grad_norm": 19.738685607910156, "learning_rate": 9.91024255330486e-09, "loss": 12.3592, "step": 490980 }, { "epoch": 0.991830864142665, "grad_norm": 609.0294189453125, "learning_rate": 9.888288045374339e-09, "loss": 15.5357, "step": 490990 }, { "epoch": 0.9918510647753488, "grad_norm": 381.98956298828125, "learning_rate": 9.866357858642206e-09, "loss": 11.6295, "step": 491000 }, { "epoch": 0.9918712654080326, "grad_norm": 250.50408935546875, "learning_rate": 9.844451993216708e-09, "loss": 19.4155, "step": 491010 }, { "epoch": 0.9918914660407164, "grad_norm": 813.5974731445312, "learning_rate": 9.822570449203873e-09, "loss": 14.552, "step": 491020 }, { "epoch": 0.9919116666734002, "grad_norm": 275.0490417480469, "learning_rate": 9.800713226710834e-09, "loss": 12.0307, "step": 491030 }, { "epoch": 0.9919318673060841, "grad_norm": 116.39804077148438, "learning_rate": 9.77888032584362e-09, "loss": 17.4847, "step": 491040 }, { "epoch": 0.9919520679387679, "grad_norm": 178.0388946533203, "learning_rate": 9.757071746708812e-09, "loss": 12.5501, "step": 491050 }, { "epoch": 0.9919722685714517, "grad_norm": 22.949909210205078, "learning_rate": 9.735287489413547e-09, "loss": 10.2942, "step": 491060 }, { "epoch": 0.9919924692041354, "grad_norm": 140.55003356933594, "learning_rate": 9.71352755406274e-09, "loss": 8.9796, "step": 491070 }, { "epoch": 0.9920126698368192, "grad_norm": 187.78439331054688, "learning_rate": 9.691791940762418e-09, "loss": 26.1984, "step": 491080 }, { "epoch": 0.9920328704695031, "grad_norm": 636.652587890625, "learning_rate": 9.670080649619717e-09, "loss": 33.8739, "step": 491090 }, { "epoch": 0.9920530711021869, "grad_norm": 764.0996704101562, "learning_rate": 9.64839368074011e-09, "loss": 25.3695, "step": 491100 }, { "epoch": 0.9920732717348707, "grad_norm": 547.4270629882812, "learning_rate": 9.626731034227954e-09, "loss": 18.0425, "step": 491110 }, { "epoch": 0.9920934723675545, "grad_norm": 267.7295227050781, "learning_rate": 9.605092710190943e-09, "loss": 17.5693, "step": 491120 }, { "epoch": 0.9921136730002383, "grad_norm": 484.971435546875, "learning_rate": 9.583478708732886e-09, "loss": 15.021, "step": 491130 }, { "epoch": 0.9921338736329222, "grad_norm": 204.47447204589844, "learning_rate": 9.561889029959249e-09, "loss": 12.9957, "step": 491140 }, { "epoch": 0.992154074265606, "grad_norm": 331.8847351074219, "learning_rate": 9.540323673976615e-09, "loss": 19.1705, "step": 491150 }, { "epoch": 0.9921742748982898, "grad_norm": 244.10409545898438, "learning_rate": 9.518782640888235e-09, "loss": 19.9217, "step": 491160 }, { "epoch": 0.9921944755309736, "grad_norm": 487.3038024902344, "learning_rate": 9.497265930800691e-09, "loss": 23.7191, "step": 491170 }, { "epoch": 0.9922146761636574, "grad_norm": 764.28955078125, "learning_rate": 9.475773543818345e-09, "loss": 22.5038, "step": 491180 }, { "epoch": 0.9922348767963413, "grad_norm": 481.8316345214844, "learning_rate": 9.454305480045556e-09, "loss": 22.492, "step": 491190 }, { "epoch": 0.9922550774290251, "grad_norm": 852.2228393554688, "learning_rate": 9.432861739586685e-09, "loss": 21.2637, "step": 491200 }, { "epoch": 0.9922752780617089, "grad_norm": 528.0516967773438, "learning_rate": 9.411442322547204e-09, "loss": 29.3942, "step": 491210 }, { "epoch": 0.9922954786943927, "grad_norm": 437.1981201171875, "learning_rate": 9.390047229031474e-09, "loss": 12.1818, "step": 491220 }, { "epoch": 0.9923156793270765, "grad_norm": 313.25225830078125, "learning_rate": 9.368676459142744e-09, "loss": 23.0166, "step": 491230 }, { "epoch": 0.9923358799597604, "grad_norm": 614.193115234375, "learning_rate": 9.347330012985933e-09, "loss": 25.4203, "step": 491240 }, { "epoch": 0.9923560805924442, "grad_norm": 527.8973999023438, "learning_rate": 9.3260078906654e-09, "loss": 21.4504, "step": 491250 }, { "epoch": 0.992376281225128, "grad_norm": 103.88162994384766, "learning_rate": 9.304710092283842e-09, "loss": 19.1815, "step": 491260 }, { "epoch": 0.9923964818578118, "grad_norm": 441.32818603515625, "learning_rate": 9.283436617946173e-09, "loss": 9.0619, "step": 491270 }, { "epoch": 0.9924166824904956, "grad_norm": 191.62818908691406, "learning_rate": 9.262187467756201e-09, "loss": 16.3227, "step": 491280 }, { "epoch": 0.9924368831231795, "grad_norm": 355.4009094238281, "learning_rate": 9.24096264181662e-09, "loss": 22.5769, "step": 491290 }, { "epoch": 0.9924570837558633, "grad_norm": 576.244384765625, "learning_rate": 9.219762140231237e-09, "loss": 16.6766, "step": 491300 }, { "epoch": 0.9924772843885471, "grad_norm": 161.81101989746094, "learning_rate": 9.198585963103302e-09, "loss": 16.9969, "step": 491310 }, { "epoch": 0.9924974850212309, "grad_norm": 281.2635192871094, "learning_rate": 9.177434110536065e-09, "loss": 13.0341, "step": 491320 }, { "epoch": 0.9925176856539146, "grad_norm": 40.424774169921875, "learning_rate": 9.156306582633334e-09, "loss": 11.9053, "step": 491330 }, { "epoch": 0.9925378862865984, "grad_norm": 138.92991638183594, "learning_rate": 9.135203379496693e-09, "loss": 13.6707, "step": 491340 }, { "epoch": 0.9925580869192823, "grad_norm": 447.6842956542969, "learning_rate": 9.114124501230504e-09, "loss": 21.1689, "step": 491350 }, { "epoch": 0.9925782875519661, "grad_norm": 481.4045104980469, "learning_rate": 9.09306994793635e-09, "loss": 20.5411, "step": 491360 }, { "epoch": 0.9925984881846499, "grad_norm": 524.9292602539062, "learning_rate": 9.07203971971693e-09, "loss": 30.819, "step": 491370 }, { "epoch": 0.9926186888173337, "grad_norm": 438.7566833496094, "learning_rate": 9.051033816675492e-09, "loss": 13.5019, "step": 491380 }, { "epoch": 0.9926388894500175, "grad_norm": 511.960205078125, "learning_rate": 9.030052238913622e-09, "loss": 18.0979, "step": 491390 }, { "epoch": 0.9926590900827014, "grad_norm": 163.58566284179688, "learning_rate": 9.009094986534572e-09, "loss": 16.3686, "step": 491400 }, { "epoch": 0.9926792907153852, "grad_norm": 2.4858508110046387, "learning_rate": 8.988162059639371e-09, "loss": 18.2185, "step": 491410 }, { "epoch": 0.992699491348069, "grad_norm": 107.9386215209961, "learning_rate": 8.967253458330715e-09, "loss": 19.7233, "step": 491420 }, { "epoch": 0.9927196919807528, "grad_norm": 398.495361328125, "learning_rate": 8.946369182710191e-09, "loss": 16.6712, "step": 491430 }, { "epoch": 0.9927398926134366, "grad_norm": 326.6344909667969, "learning_rate": 8.925509232879937e-09, "loss": 17.6045, "step": 491440 }, { "epoch": 0.9927600932461205, "grad_norm": 309.113525390625, "learning_rate": 8.904673608940983e-09, "loss": 19.8827, "step": 491450 }, { "epoch": 0.9927802938788043, "grad_norm": 170.3911590576172, "learning_rate": 8.883862310995473e-09, "loss": 13.5585, "step": 491460 }, { "epoch": 0.9928004945114881, "grad_norm": 92.2303466796875, "learning_rate": 8.863075339144988e-09, "loss": 16.4295, "step": 491470 }, { "epoch": 0.9928206951441719, "grad_norm": 351.38494873046875, "learning_rate": 8.842312693490563e-09, "loss": 19.5431, "step": 491480 }, { "epoch": 0.9928408957768557, "grad_norm": 807.0746459960938, "learning_rate": 8.821574374132669e-09, "loss": 16.084, "step": 491490 }, { "epoch": 0.9928610964095396, "grad_norm": 412.27001953125, "learning_rate": 8.800860381173448e-09, "loss": 25.2724, "step": 491500 }, { "epoch": 0.9928812970422234, "grad_norm": 172.590087890625, "learning_rate": 8.780170714713931e-09, "loss": 13.0839, "step": 491510 }, { "epoch": 0.9929014976749072, "grad_norm": 154.42572021484375, "learning_rate": 8.759505374854038e-09, "loss": 19.079, "step": 491520 }, { "epoch": 0.992921698307591, "grad_norm": 206.93417358398438, "learning_rate": 8.738864361694799e-09, "loss": 32.277, "step": 491530 }, { "epoch": 0.9929418989402748, "grad_norm": 604.6255493164062, "learning_rate": 8.718247675337243e-09, "loss": 26.7745, "step": 491540 }, { "epoch": 0.9929620995729587, "grad_norm": 462.11669921875, "learning_rate": 8.697655315881293e-09, "loss": 19.3062, "step": 491550 }, { "epoch": 0.9929823002056425, "grad_norm": 433.7647399902344, "learning_rate": 8.677087283427976e-09, "loss": 20.0991, "step": 491560 }, { "epoch": 0.9930025008383263, "grad_norm": 607.44287109375, "learning_rate": 8.656543578077215e-09, "loss": 19.0045, "step": 491570 }, { "epoch": 0.9930227014710101, "grad_norm": 65.2020263671875, "learning_rate": 8.636024199928927e-09, "loss": 27.6576, "step": 491580 }, { "epoch": 0.9930429021036938, "grad_norm": 720.201171875, "learning_rate": 8.615529149083034e-09, "loss": 27.6301, "step": 491590 }, { "epoch": 0.9930631027363777, "grad_norm": 217.3624725341797, "learning_rate": 8.595058425640012e-09, "loss": 16.5394, "step": 491600 }, { "epoch": 0.9930833033690615, "grad_norm": 468.4784851074219, "learning_rate": 8.574612029699224e-09, "loss": 21.0743, "step": 491610 }, { "epoch": 0.9931035040017453, "grad_norm": 508.72381591796875, "learning_rate": 8.554189961360037e-09, "loss": 24.9038, "step": 491620 }, { "epoch": 0.9931237046344291, "grad_norm": 186.89498901367188, "learning_rate": 8.53379222072237e-09, "loss": 14.5935, "step": 491630 }, { "epoch": 0.9931439052671129, "grad_norm": 622.7369995117188, "learning_rate": 8.513418807886142e-09, "loss": 19.9239, "step": 491640 }, { "epoch": 0.9931641058997968, "grad_norm": 121.98161315917969, "learning_rate": 8.49306972294961e-09, "loss": 14.8514, "step": 491650 }, { "epoch": 0.9931843065324806, "grad_norm": 531.5045166015625, "learning_rate": 8.472744966012691e-09, "loss": 17.4055, "step": 491660 }, { "epoch": 0.9932045071651644, "grad_norm": 134.05284118652344, "learning_rate": 8.452444537174198e-09, "loss": 21.0864, "step": 491670 }, { "epoch": 0.9932247077978482, "grad_norm": 486.0033264160156, "learning_rate": 8.43216843653294e-09, "loss": 9.6031, "step": 491680 }, { "epoch": 0.993244908430532, "grad_norm": 10.919336318969727, "learning_rate": 8.41191666418828e-09, "loss": 17.866, "step": 491690 }, { "epoch": 0.9932651090632159, "grad_norm": 273.6304626464844, "learning_rate": 8.391689220238474e-09, "loss": 14.4463, "step": 491700 }, { "epoch": 0.9932853096958997, "grad_norm": 598.221435546875, "learning_rate": 8.37148610478178e-09, "loss": 18.2435, "step": 491710 }, { "epoch": 0.9933055103285835, "grad_norm": 382.8218078613281, "learning_rate": 8.351307317917002e-09, "loss": 19.4453, "step": 491720 }, { "epoch": 0.9933257109612673, "grad_norm": 47.96708679199219, "learning_rate": 8.331152859742952e-09, "loss": 31.3714, "step": 491730 }, { "epoch": 0.9933459115939511, "grad_norm": 505.9755554199219, "learning_rate": 8.311022730357331e-09, "loss": 20.7981, "step": 491740 }, { "epoch": 0.993366112226635, "grad_norm": 58.72330093383789, "learning_rate": 8.290916929858394e-09, "loss": 30.5194, "step": 491750 }, { "epoch": 0.9933863128593188, "grad_norm": 519.745361328125, "learning_rate": 8.27083545834384e-09, "loss": 20.2842, "step": 491760 }, { "epoch": 0.9934065134920026, "grad_norm": 458.9234924316406, "learning_rate": 8.250778315911922e-09, "loss": 19.8348, "step": 491770 }, { "epoch": 0.9934267141246864, "grad_norm": 184.57225036621094, "learning_rate": 8.230745502660343e-09, "loss": 19.1662, "step": 491780 }, { "epoch": 0.9934469147573702, "grad_norm": 488.2207946777344, "learning_rate": 8.210737018686798e-09, "loss": 18.1941, "step": 491790 }, { "epoch": 0.9934671153900541, "grad_norm": 328.6118469238281, "learning_rate": 8.190752864088436e-09, "loss": 27.4498, "step": 491800 }, { "epoch": 0.9934873160227379, "grad_norm": 366.3116455078125, "learning_rate": 8.17079303896351e-09, "loss": 29.1802, "step": 491810 }, { "epoch": 0.9935075166554217, "grad_norm": 229.66558837890625, "learning_rate": 8.150857543408054e-09, "loss": 20.8235, "step": 491820 }, { "epoch": 0.9935277172881055, "grad_norm": 235.47610473632812, "learning_rate": 8.130946377519767e-09, "loss": 22.9774, "step": 491830 }, { "epoch": 0.9935479179207892, "grad_norm": 223.5673370361328, "learning_rate": 8.11105954139635e-09, "loss": 12.8811, "step": 491840 }, { "epoch": 0.993568118553473, "grad_norm": 230.26121520996094, "learning_rate": 8.091197035133836e-09, "loss": 18.2644, "step": 491850 }, { "epoch": 0.9935883191861569, "grad_norm": 292.3512268066406, "learning_rate": 8.07135885882937e-09, "loss": 14.2108, "step": 491860 }, { "epoch": 0.9936085198188407, "grad_norm": 438.98638916015625, "learning_rate": 8.051545012580097e-09, "loss": 12.9736, "step": 491870 }, { "epoch": 0.9936287204515245, "grad_norm": 299.19549560546875, "learning_rate": 8.031755496481496e-09, "loss": 14.6742, "step": 491880 }, { "epoch": 0.9936489210842083, "grad_norm": 364.4718017578125, "learning_rate": 8.011990310631269e-09, "loss": 15.7653, "step": 491890 }, { "epoch": 0.9936691217168921, "grad_norm": 258.502197265625, "learning_rate": 7.992249455124889e-09, "loss": 10.2188, "step": 491900 }, { "epoch": 0.993689322349576, "grad_norm": 228.58082580566406, "learning_rate": 7.972532930058396e-09, "loss": 14.5795, "step": 491910 }, { "epoch": 0.9937095229822598, "grad_norm": 0.3197769820690155, "learning_rate": 7.952840735528933e-09, "loss": 12.5138, "step": 491920 }, { "epoch": 0.9937297236149436, "grad_norm": 369.63385009765625, "learning_rate": 7.933172871631978e-09, "loss": 12.8713, "step": 491930 }, { "epoch": 0.9937499242476274, "grad_norm": 102.29288482666016, "learning_rate": 7.913529338463011e-09, "loss": 16.312, "step": 491940 }, { "epoch": 0.9937701248803112, "grad_norm": 18.225814819335938, "learning_rate": 7.89391013611751e-09, "loss": 18.8131, "step": 491950 }, { "epoch": 0.9937903255129951, "grad_norm": 441.5530700683594, "learning_rate": 7.874315264692622e-09, "loss": 15.199, "step": 491960 }, { "epoch": 0.9938105261456789, "grad_norm": 458.37664794921875, "learning_rate": 7.85474472428216e-09, "loss": 14.9665, "step": 491970 }, { "epoch": 0.9938307267783627, "grad_norm": 374.7511901855469, "learning_rate": 7.835198514982156e-09, "loss": 17.1539, "step": 491980 }, { "epoch": 0.9938509274110465, "grad_norm": 124.7674560546875, "learning_rate": 7.815676636888093e-09, "loss": 16.2321, "step": 491990 }, { "epoch": 0.9938711280437303, "grad_norm": 516.1492919921875, "learning_rate": 7.796179090094891e-09, "loss": 24.4474, "step": 492000 }, { "epoch": 0.9938913286764142, "grad_norm": 819.0858154296875, "learning_rate": 7.776705874698032e-09, "loss": 16.4256, "step": 492010 }, { "epoch": 0.993911529309098, "grad_norm": 260.1531982421875, "learning_rate": 7.757256990791328e-09, "loss": 11.4037, "step": 492020 }, { "epoch": 0.9939317299417818, "grad_norm": 52.56280517578125, "learning_rate": 7.737832438470816e-09, "loss": 16.7869, "step": 492030 }, { "epoch": 0.9939519305744656, "grad_norm": 293.88433837890625, "learning_rate": 7.718432217830307e-09, "loss": 20.3442, "step": 492040 }, { "epoch": 0.9939721312071494, "grad_norm": 143.10574340820312, "learning_rate": 7.699056328964726e-09, "loss": 18.5877, "step": 492050 }, { "epoch": 0.9939923318398333, "grad_norm": 289.1720886230469, "learning_rate": 7.679704771968998e-09, "loss": 12.983, "step": 492060 }, { "epoch": 0.9940125324725171, "grad_norm": 198.0683135986328, "learning_rate": 7.660377546936382e-09, "loss": 13.1834, "step": 492070 }, { "epoch": 0.9940327331052009, "grad_norm": 412.1947326660156, "learning_rate": 7.641074653961244e-09, "loss": 16.0309, "step": 492080 }, { "epoch": 0.9940529337378847, "grad_norm": 140.10911560058594, "learning_rate": 7.621796093138512e-09, "loss": 9.8632, "step": 492090 }, { "epoch": 0.9940731343705684, "grad_norm": 324.3037109375, "learning_rate": 7.602541864561442e-09, "loss": 15.7889, "step": 492100 }, { "epoch": 0.9940933350032523, "grad_norm": 282.5607604980469, "learning_rate": 7.583311968324403e-09, "loss": 22.3234, "step": 492110 }, { "epoch": 0.9941135356359361, "grad_norm": 293.5461120605469, "learning_rate": 7.564106404520654e-09, "loss": 31.9959, "step": 492120 }, { "epoch": 0.9941337362686199, "grad_norm": 317.7358093261719, "learning_rate": 7.544925173243455e-09, "loss": 14.6415, "step": 492130 }, { "epoch": 0.9941539369013037, "grad_norm": 34.03724670410156, "learning_rate": 7.525768274587175e-09, "loss": 27.2084, "step": 492140 }, { "epoch": 0.9941741375339875, "grad_norm": 0.0, "learning_rate": 7.506635708645072e-09, "loss": 17.1881, "step": 492150 }, { "epoch": 0.9941943381666714, "grad_norm": 349.0623779296875, "learning_rate": 7.487527475509848e-09, "loss": 11.0595, "step": 492160 }, { "epoch": 0.9942145387993552, "grad_norm": 590.5459594726562, "learning_rate": 7.468443575274764e-09, "loss": 23.4729, "step": 492170 }, { "epoch": 0.994234739432039, "grad_norm": 46.546485900878906, "learning_rate": 7.449384008033078e-09, "loss": 20.6852, "step": 492180 }, { "epoch": 0.9942549400647228, "grad_norm": 594.9143676757812, "learning_rate": 7.430348773877494e-09, "loss": 22.5141, "step": 492190 }, { "epoch": 0.9942751406974066, "grad_norm": 223.10205078125, "learning_rate": 7.411337872900715e-09, "loss": 20.5256, "step": 492200 }, { "epoch": 0.9942953413300905, "grad_norm": 258.2781677246094, "learning_rate": 7.392351305195999e-09, "loss": 18.0094, "step": 492210 }, { "epoch": 0.9943155419627743, "grad_norm": 1.6258138418197632, "learning_rate": 7.373389070854941e-09, "loss": 17.1076, "step": 492220 }, { "epoch": 0.9943357425954581, "grad_norm": 786.12109375, "learning_rate": 7.3544511699708e-09, "loss": 21.3983, "step": 492230 }, { "epoch": 0.9943559432281419, "grad_norm": 660.6713256835938, "learning_rate": 7.335537602635723e-09, "loss": 16.2497, "step": 492240 }, { "epoch": 0.9943761438608257, "grad_norm": 131.5867156982422, "learning_rate": 7.3166483689413035e-09, "loss": 18.4497, "step": 492250 }, { "epoch": 0.9943963444935096, "grad_norm": 454.113037109375, "learning_rate": 7.297783468980246e-09, "loss": 17.6033, "step": 492260 }, { "epoch": 0.9944165451261934, "grad_norm": 191.77944946289062, "learning_rate": 7.278942902843589e-09, "loss": 13.1115, "step": 492270 }, { "epoch": 0.9944367457588772, "grad_norm": 182.51564025878906, "learning_rate": 7.26012667062459e-09, "loss": 11.9545, "step": 492280 }, { "epoch": 0.994456946391561, "grad_norm": 196.82113647460938, "learning_rate": 7.241334772414288e-09, "loss": 20.5471, "step": 492290 }, { "epoch": 0.9944771470242448, "grad_norm": 325.7108154296875, "learning_rate": 7.222567208303721e-09, "loss": 24.7077, "step": 492300 }, { "epoch": 0.9944973476569287, "grad_norm": 825.5935668945312, "learning_rate": 7.203823978384483e-09, "loss": 36.0086, "step": 492310 }, { "epoch": 0.9945175482896125, "grad_norm": 120.90480041503906, "learning_rate": 7.185105082748722e-09, "loss": 19.9979, "step": 492320 }, { "epoch": 0.9945377489222963, "grad_norm": 310.6450500488281, "learning_rate": 7.166410521487477e-09, "loss": 21.8731, "step": 492330 }, { "epoch": 0.9945579495549801, "grad_norm": 113.73008728027344, "learning_rate": 7.14774029469123e-09, "loss": 21.9454, "step": 492340 }, { "epoch": 0.9945781501876638, "grad_norm": 330.3626708984375, "learning_rate": 7.129094402451575e-09, "loss": 20.177, "step": 492350 }, { "epoch": 0.9945983508203476, "grad_norm": 394.8288879394531, "learning_rate": 7.11047284485844e-09, "loss": 15.473, "step": 492360 }, { "epoch": 0.9946185514530315, "grad_norm": 624.0137939453125, "learning_rate": 7.0918756220039745e-09, "loss": 27.1499, "step": 492370 }, { "epoch": 0.9946387520857153, "grad_norm": 167.65707397460938, "learning_rate": 7.073302733978104e-09, "loss": 17.0275, "step": 492380 }, { "epoch": 0.9946589527183991, "grad_norm": 277.8253173828125, "learning_rate": 7.054754180871315e-09, "loss": 6.4159, "step": 492390 }, { "epoch": 0.9946791533510829, "grad_norm": 290.10595703125, "learning_rate": 7.036229962774088e-09, "loss": 21.9664, "step": 492400 }, { "epoch": 0.9946993539837667, "grad_norm": 244.20114135742188, "learning_rate": 7.0177300797763526e-09, "loss": 34.4278, "step": 492410 }, { "epoch": 0.9947195546164506, "grad_norm": 656.7611083984375, "learning_rate": 6.999254531969146e-09, "loss": 17.8188, "step": 492420 }, { "epoch": 0.9947397552491344, "grad_norm": 873.718017578125, "learning_rate": 6.980803319441842e-09, "loss": 33.7453, "step": 492430 }, { "epoch": 0.9947599558818182, "grad_norm": 121.09410858154297, "learning_rate": 6.962376442284368e-09, "loss": 23.1687, "step": 492440 }, { "epoch": 0.994780156514502, "grad_norm": 506.382080078125, "learning_rate": 6.943973900586654e-09, "loss": 34.1638, "step": 492450 }, { "epoch": 0.9948003571471858, "grad_norm": 670.7462768554688, "learning_rate": 6.925595694438625e-09, "loss": 17.3223, "step": 492460 }, { "epoch": 0.9948205577798697, "grad_norm": 382.9184875488281, "learning_rate": 6.9072418239296556e-09, "loss": 40.3718, "step": 492470 }, { "epoch": 0.9948407584125535, "grad_norm": 1033.291015625, "learning_rate": 6.888912289149119e-09, "loss": 20.0352, "step": 492480 }, { "epoch": 0.9948609590452373, "grad_norm": 244.9137725830078, "learning_rate": 6.8706070901863876e-09, "loss": 33.7851, "step": 492490 }, { "epoch": 0.9948811596779211, "grad_norm": 129.25210571289062, "learning_rate": 6.852326227130835e-09, "loss": 13.3353, "step": 492500 }, { "epoch": 0.9949013603106049, "grad_norm": 396.7117614746094, "learning_rate": 6.834069700071277e-09, "loss": 17.8248, "step": 492510 }, { "epoch": 0.9949215609432888, "grad_norm": 13.202730178833008, "learning_rate": 6.81583750909709e-09, "loss": 15.2326, "step": 492520 }, { "epoch": 0.9949417615759726, "grad_norm": 197.49044799804688, "learning_rate": 6.797629654296533e-09, "loss": 18.0699, "step": 492530 }, { "epoch": 0.9949619622086564, "grad_norm": 620.369140625, "learning_rate": 6.779446135758982e-09, "loss": 22.7365, "step": 492540 }, { "epoch": 0.9949821628413402, "grad_norm": 262.18646240234375, "learning_rate": 6.761286953572699e-09, "loss": 15.2508, "step": 492550 }, { "epoch": 0.995002363474024, "grad_norm": 847.1273803710938, "learning_rate": 6.7431521078265e-09, "loss": 29.8875, "step": 492560 }, { "epoch": 0.9950225641067079, "grad_norm": 519.87548828125, "learning_rate": 6.725041598608651e-09, "loss": 18.5069, "step": 492570 }, { "epoch": 0.9950427647393917, "grad_norm": 781.2832641601562, "learning_rate": 6.706955426006856e-09, "loss": 16.7449, "step": 492580 }, { "epoch": 0.9950629653720755, "grad_norm": 192.6318817138672, "learning_rate": 6.688893590109935e-09, "loss": 15.2852, "step": 492590 }, { "epoch": 0.9950831660047593, "grad_norm": 322.802490234375, "learning_rate": 6.670856091006151e-09, "loss": 14.4477, "step": 492600 }, { "epoch": 0.995103366637443, "grad_norm": 30.472148895263672, "learning_rate": 6.652842928782655e-09, "loss": 22.5054, "step": 492610 }, { "epoch": 0.9951235672701269, "grad_norm": 488.73419189453125, "learning_rate": 6.63485410352771e-09, "loss": 23.5498, "step": 492620 }, { "epoch": 0.9951437679028107, "grad_norm": 211.9298095703125, "learning_rate": 6.61688961532847e-09, "loss": 6.6318, "step": 492630 }, { "epoch": 0.9951639685354945, "grad_norm": 296.3676452636719, "learning_rate": 6.598949464273196e-09, "loss": 18.351, "step": 492640 }, { "epoch": 0.9951841691681783, "grad_norm": 526.3226318359375, "learning_rate": 6.581033650449043e-09, "loss": 16.0651, "step": 492650 }, { "epoch": 0.9952043698008621, "grad_norm": 506.3572692871094, "learning_rate": 6.563142173943715e-09, "loss": 27.0753, "step": 492660 }, { "epoch": 0.995224570433546, "grad_norm": 337.43609619140625, "learning_rate": 6.545275034843257e-09, "loss": 14.6582, "step": 492670 }, { "epoch": 0.9952447710662298, "grad_norm": 195.84539794921875, "learning_rate": 6.527432233235931e-09, "loss": 24.3385, "step": 492680 }, { "epoch": 0.9952649716989136, "grad_norm": 389.5736999511719, "learning_rate": 6.509613769207778e-09, "loss": 27.1965, "step": 492690 }, { "epoch": 0.9952851723315974, "grad_norm": 23.38743019104004, "learning_rate": 6.491819642846509e-09, "loss": 16.0401, "step": 492700 }, { "epoch": 0.9953053729642812, "grad_norm": 15.392356872558594, "learning_rate": 6.4740498542387174e-09, "loss": 5.5294, "step": 492710 }, { "epoch": 0.995325573596965, "grad_norm": 100.64783477783203, "learning_rate": 6.456304403470448e-09, "loss": 13.0771, "step": 492720 }, { "epoch": 0.9953457742296489, "grad_norm": 657.622314453125, "learning_rate": 6.438583290628298e-09, "loss": 19.4515, "step": 492730 }, { "epoch": 0.9953659748623327, "grad_norm": 298.1041259765625, "learning_rate": 6.420886515799418e-09, "loss": 22.1768, "step": 492740 }, { "epoch": 0.9953861754950165, "grad_norm": 306.6492004394531, "learning_rate": 6.403214079069298e-09, "loss": 23.7699, "step": 492750 }, { "epoch": 0.9954063761277003, "grad_norm": 341.74298095703125, "learning_rate": 6.385565980523978e-09, "loss": 8.2629, "step": 492760 }, { "epoch": 0.9954265767603842, "grad_norm": 176.78253173828125, "learning_rate": 6.3679422202495015e-09, "loss": 13.1727, "step": 492770 }, { "epoch": 0.995446777393068, "grad_norm": 282.4524841308594, "learning_rate": 6.350342798332465e-09, "loss": 19.1546, "step": 492780 }, { "epoch": 0.9954669780257518, "grad_norm": 310.2063293457031, "learning_rate": 6.332767714858357e-09, "loss": 23.2967, "step": 492790 }, { "epoch": 0.9954871786584356, "grad_norm": 644.8700561523438, "learning_rate": 6.315216969912663e-09, "loss": 14.4751, "step": 492800 }, { "epoch": 0.9955073792911194, "grad_norm": 546.1470947265625, "learning_rate": 6.2976905635803165e-09, "loss": 21.2804, "step": 492810 }, { "epoch": 0.9955275799238033, "grad_norm": 534.7316284179688, "learning_rate": 6.280188495947914e-09, "loss": 16.5509, "step": 492820 }, { "epoch": 0.9955477805564871, "grad_norm": 662.7942504882812, "learning_rate": 6.262710767100388e-09, "loss": 22.4259, "step": 492830 }, { "epoch": 0.9955679811891709, "grad_norm": 319.81024169921875, "learning_rate": 6.245257377122116e-09, "loss": 22.5629, "step": 492840 }, { "epoch": 0.9955881818218547, "grad_norm": 84.95552825927734, "learning_rate": 6.227828326099139e-09, "loss": 16.1414, "step": 492850 }, { "epoch": 0.9956083824545384, "grad_norm": 303.6583557128906, "learning_rate": 6.21042361411639e-09, "loss": 12.8599, "step": 492860 }, { "epoch": 0.9956285830872222, "grad_norm": 281.2415466308594, "learning_rate": 6.19304324125769e-09, "loss": 15.6234, "step": 492870 }, { "epoch": 0.9956487837199061, "grad_norm": 78.5985336303711, "learning_rate": 6.175687207609082e-09, "loss": 12.9739, "step": 492880 }, { "epoch": 0.9956689843525899, "grad_norm": 711.9841918945312, "learning_rate": 6.1583555132543886e-09, "loss": 25.8945, "step": 492890 }, { "epoch": 0.9956891849852737, "grad_norm": 302.2545471191406, "learning_rate": 6.141048158277429e-09, "loss": 12.0464, "step": 492900 }, { "epoch": 0.9957093856179575, "grad_norm": 197.092041015625, "learning_rate": 6.123765142764249e-09, "loss": 11.8187, "step": 492910 }, { "epoch": 0.9957295862506413, "grad_norm": 53.73894500732422, "learning_rate": 6.106506466797557e-09, "loss": 11.5132, "step": 492920 }, { "epoch": 0.9957497868833252, "grad_norm": 264.3664245605469, "learning_rate": 6.0892721304622874e-09, "loss": 24.6879, "step": 492930 }, { "epoch": 0.995769987516009, "grad_norm": 228.70321655273438, "learning_rate": 6.0720621338422606e-09, "loss": 15.5944, "step": 492940 }, { "epoch": 0.9957901881486928, "grad_norm": 371.2651062011719, "learning_rate": 6.054876477021299e-09, "loss": 9.8062, "step": 492950 }, { "epoch": 0.9958103887813766, "grad_norm": 109.58891296386719, "learning_rate": 6.037715160083224e-09, "loss": 10.4359, "step": 492960 }, { "epoch": 0.9958305894140604, "grad_norm": 575.9791259765625, "learning_rate": 6.020578183111303e-09, "loss": 14.5963, "step": 492970 }, { "epoch": 0.9958507900467443, "grad_norm": 1932.4429931640625, "learning_rate": 6.003465546189358e-09, "loss": 40.4509, "step": 492980 }, { "epoch": 0.9958709906794281, "grad_norm": 613.677978515625, "learning_rate": 5.98637724940121e-09, "loss": 20.3188, "step": 492990 }, { "epoch": 0.9958911913121119, "grad_norm": 369.6727294921875, "learning_rate": 5.969313292830126e-09, "loss": 8.9391, "step": 493000 }, { "epoch": 0.9959113919447957, "grad_norm": 160.49456787109375, "learning_rate": 5.952273676558262e-09, "loss": 9.336, "step": 493010 }, { "epoch": 0.9959315925774795, "grad_norm": 788.9593505859375, "learning_rate": 5.935258400669442e-09, "loss": 15.4208, "step": 493020 }, { "epoch": 0.9959517932101634, "grad_norm": 295.59832763671875, "learning_rate": 5.918267465246374e-09, "loss": 12.4223, "step": 493030 }, { "epoch": 0.9959719938428472, "grad_norm": 255.62750244140625, "learning_rate": 5.901300870372329e-09, "loss": 20.9779, "step": 493040 }, { "epoch": 0.995992194475531, "grad_norm": 254.54164123535156, "learning_rate": 5.8843586161289045e-09, "loss": 9.4201, "step": 493050 }, { "epoch": 0.9960123951082148, "grad_norm": 1058.5753173828125, "learning_rate": 5.867440702599925e-09, "loss": 14.6705, "step": 493060 }, { "epoch": 0.9960325957408986, "grad_norm": 930.560791015625, "learning_rate": 5.850547129867546e-09, "loss": 30.7085, "step": 493070 }, { "epoch": 0.9960527963735825, "grad_norm": 206.38072204589844, "learning_rate": 5.833677898013368e-09, "loss": 24.0558, "step": 493080 }, { "epoch": 0.9960729970062663, "grad_norm": 428.0592041015625, "learning_rate": 5.816833007120659e-09, "loss": 19.56, "step": 493090 }, { "epoch": 0.9960931976389501, "grad_norm": 260.1087341308594, "learning_rate": 5.800012457270466e-09, "loss": 22.4628, "step": 493100 }, { "epoch": 0.9961133982716339, "grad_norm": 192.6460723876953, "learning_rate": 5.783216248545498e-09, "loss": 12.9076, "step": 493110 }, { "epoch": 0.9961335989043176, "grad_norm": 350.8668212890625, "learning_rate": 5.766444381027358e-09, "loss": 16.6594, "step": 493120 }, { "epoch": 0.9961537995370014, "grad_norm": 180.49252319335938, "learning_rate": 5.749696854798204e-09, "loss": 20.0456, "step": 493130 }, { "epoch": 0.9961740001696853, "grad_norm": 13.434219360351562, "learning_rate": 5.732973669939079e-09, "loss": 16.6701, "step": 493140 }, { "epoch": 0.9961942008023691, "grad_norm": 564.3707885742188, "learning_rate": 5.716274826531587e-09, "loss": 18.9161, "step": 493150 }, { "epoch": 0.9962144014350529, "grad_norm": 249.492919921875, "learning_rate": 5.699600324657328e-09, "loss": 21.4974, "step": 493160 }, { "epoch": 0.9962346020677367, "grad_norm": 149.17922973632812, "learning_rate": 5.682950164397349e-09, "loss": 11.9207, "step": 493170 }, { "epoch": 0.9962548027004205, "grad_norm": 593.6851806640625, "learning_rate": 5.6663243458332514e-09, "loss": 21.1945, "step": 493180 }, { "epoch": 0.9962750033331044, "grad_norm": 76.89675903320312, "learning_rate": 5.649722869044971e-09, "loss": 29.9984, "step": 493190 }, { "epoch": 0.9962952039657882, "grad_norm": 319.2921142578125, "learning_rate": 5.633145734114665e-09, "loss": 19.5254, "step": 493200 }, { "epoch": 0.996315404598472, "grad_norm": 343.31097412109375, "learning_rate": 5.616592941123378e-09, "loss": 25.4214, "step": 493210 }, { "epoch": 0.9963356052311558, "grad_norm": 247.19534301757812, "learning_rate": 5.600064490149937e-09, "loss": 16.5237, "step": 493220 }, { "epoch": 0.9963558058638396, "grad_norm": 125.71563720703125, "learning_rate": 5.583560381276498e-09, "loss": 23.1971, "step": 493230 }, { "epoch": 0.9963760064965235, "grad_norm": 387.4584655761719, "learning_rate": 5.5670806145835536e-09, "loss": 30.7633, "step": 493240 }, { "epoch": 0.9963962071292073, "grad_norm": 239.5253143310547, "learning_rate": 5.5506251901504825e-09, "loss": 18.8652, "step": 493250 }, { "epoch": 0.9964164077618911, "grad_norm": 265.25750732421875, "learning_rate": 5.534194108057778e-09, "loss": 7.082, "step": 493260 }, { "epoch": 0.9964366083945749, "grad_norm": 351.1151428222656, "learning_rate": 5.517787368385375e-09, "loss": 11.4043, "step": 493270 }, { "epoch": 0.9964568090272587, "grad_norm": 534.319580078125, "learning_rate": 5.501404971214319e-09, "loss": 16.0895, "step": 493280 }, { "epoch": 0.9964770096599426, "grad_norm": 549.73095703125, "learning_rate": 5.485046916622883e-09, "loss": 8.4059, "step": 493290 }, { "epoch": 0.9964972102926264, "grad_norm": 168.16229248046875, "learning_rate": 5.468713204692111e-09, "loss": 11.6919, "step": 493300 }, { "epoch": 0.9965174109253102, "grad_norm": 233.44361877441406, "learning_rate": 5.45240383550083e-09, "loss": 15.7062, "step": 493310 }, { "epoch": 0.996537611557994, "grad_norm": 671.6220092773438, "learning_rate": 5.436118809128421e-09, "loss": 20.8994, "step": 493320 }, { "epoch": 0.9965578121906778, "grad_norm": 164.53518676757812, "learning_rate": 5.419858125655375e-09, "loss": 12.1398, "step": 493330 }, { "epoch": 0.9965780128233617, "grad_norm": 32.66850662231445, "learning_rate": 5.403621785159407e-09, "loss": 15.2787, "step": 493340 }, { "epoch": 0.9965982134560455, "grad_norm": 73.91290283203125, "learning_rate": 5.38740978772101e-09, "loss": 11.4727, "step": 493350 }, { "epoch": 0.9966184140887293, "grad_norm": 656.6400756835938, "learning_rate": 5.371222133418452e-09, "loss": 15.5917, "step": 493360 }, { "epoch": 0.9966386147214131, "grad_norm": 161.31642150878906, "learning_rate": 5.355058822330561e-09, "loss": 17.0245, "step": 493370 }, { "epoch": 0.9966588153540968, "grad_norm": 505.07989501953125, "learning_rate": 5.338919854536162e-09, "loss": 14.3958, "step": 493380 }, { "epoch": 0.9966790159867807, "grad_norm": 274.6875915527344, "learning_rate": 5.322805230114636e-09, "loss": 17.4259, "step": 493390 }, { "epoch": 0.9966992166194645, "grad_norm": 466.4015197753906, "learning_rate": 5.306714949143699e-09, "loss": 10.4244, "step": 493400 }, { "epoch": 0.9967194172521483, "grad_norm": 592.9553833007812, "learning_rate": 5.290649011702176e-09, "loss": 26.8744, "step": 493410 }, { "epoch": 0.9967396178848321, "grad_norm": 434.5201416015625, "learning_rate": 5.2746074178683385e-09, "loss": 17.5278, "step": 493420 }, { "epoch": 0.9967598185175159, "grad_norm": 162.36619567871094, "learning_rate": 5.258590167719901e-09, "loss": 27.6637, "step": 493430 }, { "epoch": 0.9967800191501998, "grad_norm": 313.357666015625, "learning_rate": 5.242597261335691e-09, "loss": 20.6985, "step": 493440 }, { "epoch": 0.9968002197828836, "grad_norm": 172.16201782226562, "learning_rate": 5.226628698792868e-09, "loss": 18.3757, "step": 493450 }, { "epoch": 0.9968204204155674, "grad_norm": 502.9991760253906, "learning_rate": 5.210684480169703e-09, "loss": 20.8665, "step": 493460 }, { "epoch": 0.9968406210482512, "grad_norm": 40.60356903076172, "learning_rate": 5.1947646055444665e-09, "loss": 18.3703, "step": 493470 }, { "epoch": 0.996860821680935, "grad_norm": 549.3845825195312, "learning_rate": 5.178869074993209e-09, "loss": 21.0244, "step": 493480 }, { "epoch": 0.9968810223136189, "grad_norm": 584.9131469726562, "learning_rate": 5.162997888595312e-09, "loss": 15.8863, "step": 493490 }, { "epoch": 0.9969012229463027, "grad_norm": 537.6060180664062, "learning_rate": 5.147151046426824e-09, "loss": 15.1142, "step": 493500 }, { "epoch": 0.9969214235789865, "grad_norm": 36.83633041381836, "learning_rate": 5.1313285485649064e-09, "loss": 16.8483, "step": 493510 }, { "epoch": 0.9969416242116703, "grad_norm": 2115.87255859375, "learning_rate": 5.115530395087276e-09, "loss": 19.0449, "step": 493520 }, { "epoch": 0.9969618248443541, "grad_norm": 4.222888946533203, "learning_rate": 5.099756586071092e-09, "loss": 9.6148, "step": 493530 }, { "epoch": 0.996982025477038, "grad_norm": 215.87461853027344, "learning_rate": 5.084007121592405e-09, "loss": 25.4709, "step": 493540 }, { "epoch": 0.9970022261097218, "grad_norm": 441.0953369140625, "learning_rate": 5.06828200172893e-09, "loss": 13.4962, "step": 493550 }, { "epoch": 0.9970224267424056, "grad_norm": 324.8075256347656, "learning_rate": 5.052581226556719e-09, "loss": 19.6106, "step": 493560 }, { "epoch": 0.9970426273750894, "grad_norm": 791.2510986328125, "learning_rate": 5.036904796152375e-09, "loss": 28.8694, "step": 493570 }, { "epoch": 0.9970628280077732, "grad_norm": 791.0879516601562, "learning_rate": 5.02125271059195e-09, "loss": 27.5417, "step": 493580 }, { "epoch": 0.9970830286404571, "grad_norm": 248.20809936523438, "learning_rate": 5.0056249699526046e-09, "loss": 24.7889, "step": 493590 }, { "epoch": 0.9971032292731409, "grad_norm": 309.5415954589844, "learning_rate": 4.990021574309834e-09, "loss": 14.6246, "step": 493600 }, { "epoch": 0.9971234299058247, "grad_norm": 626.6513671875, "learning_rate": 4.9744425237396865e-09, "loss": 12.9801, "step": 493610 }, { "epoch": 0.9971436305385085, "grad_norm": 795.0584106445312, "learning_rate": 4.95888781831877e-09, "loss": 16.2975, "step": 493620 }, { "epoch": 0.9971638311711922, "grad_norm": 247.36184692382812, "learning_rate": 4.9433574581220225e-09, "loss": 12.3557, "step": 493630 }, { "epoch": 0.997184031803876, "grad_norm": 297.8055419921875, "learning_rate": 4.927851443225495e-09, "loss": 18.0591, "step": 493640 }, { "epoch": 0.9972042324365599, "grad_norm": 682.8153076171875, "learning_rate": 4.9123697737052386e-09, "loss": 26.2157, "step": 493650 }, { "epoch": 0.9972244330692437, "grad_norm": 850.35546875, "learning_rate": 4.896912449635638e-09, "loss": 21.9621, "step": 493660 }, { "epoch": 0.9972446337019275, "grad_norm": 273.8650817871094, "learning_rate": 4.881479471093298e-09, "loss": 29.0988, "step": 493670 }, { "epoch": 0.9972648343346113, "grad_norm": 239.4381561279297, "learning_rate": 4.866070838152049e-09, "loss": 12.6839, "step": 493680 }, { "epoch": 0.9972850349672951, "grad_norm": 415.7494812011719, "learning_rate": 4.850686550888495e-09, "loss": 21.6229, "step": 493690 }, { "epoch": 0.997305235599979, "grad_norm": 624.9031982421875, "learning_rate": 4.835326609376468e-09, "loss": 13.9667, "step": 493700 }, { "epoch": 0.9973254362326628, "grad_norm": 333.7686462402344, "learning_rate": 4.81999101369146e-09, "loss": 22.3211, "step": 493710 }, { "epoch": 0.9973456368653466, "grad_norm": 330.7865295410156, "learning_rate": 4.804679763907305e-09, "loss": 17.5816, "step": 493720 }, { "epoch": 0.9973658374980304, "grad_norm": 478.1622619628906, "learning_rate": 4.789392860100051e-09, "loss": 26.7659, "step": 493730 }, { "epoch": 0.9973860381307142, "grad_norm": 290.862060546875, "learning_rate": 4.774130302342972e-09, "loss": 9.9758, "step": 493740 }, { "epoch": 0.9974062387633981, "grad_norm": 360.92816162109375, "learning_rate": 4.758892090711009e-09, "loss": 18.2678, "step": 493750 }, { "epoch": 0.9974264393960819, "grad_norm": 1411.4190673828125, "learning_rate": 4.743678225278547e-09, "loss": 22.8019, "step": 493760 }, { "epoch": 0.9974466400287657, "grad_norm": 82.69062042236328, "learning_rate": 4.7284887061194165e-09, "loss": 12.9301, "step": 493770 }, { "epoch": 0.9974668406614495, "grad_norm": 296.7131042480469, "learning_rate": 4.713323533308001e-09, "loss": 21.2306, "step": 493780 }, { "epoch": 0.9974870412941333, "grad_norm": 254.96810913085938, "learning_rate": 4.6981827069181305e-09, "loss": 26.7119, "step": 493790 }, { "epoch": 0.9975072419268172, "grad_norm": 617.3046264648438, "learning_rate": 4.683066227023081e-09, "loss": 11.8411, "step": 493800 }, { "epoch": 0.997527442559501, "grad_norm": 158.57591247558594, "learning_rate": 4.667974093696681e-09, "loss": 10.8231, "step": 493810 }, { "epoch": 0.9975476431921848, "grad_norm": 363.39715576171875, "learning_rate": 4.6529063070133165e-09, "loss": 13.0921, "step": 493820 }, { "epoch": 0.9975678438248686, "grad_norm": 448.3036804199219, "learning_rate": 4.637862867045151e-09, "loss": 15.935, "step": 493830 }, { "epoch": 0.9975880444575524, "grad_norm": 701.3001098632812, "learning_rate": 4.6228437738665695e-09, "loss": 12.3682, "step": 493840 }, { "epoch": 0.9976082450902363, "grad_norm": 989.6455078125, "learning_rate": 4.607849027550293e-09, "loss": 14.8341, "step": 493850 }, { "epoch": 0.9976284457229201, "grad_norm": 503.3165283203125, "learning_rate": 4.592878628169595e-09, "loss": 27.2796, "step": 493860 }, { "epoch": 0.9976486463556039, "grad_norm": 349.30303955078125, "learning_rate": 4.577932575797195e-09, "loss": 20.6817, "step": 493870 }, { "epoch": 0.9976688469882877, "grad_norm": 1218.044189453125, "learning_rate": 4.5630108705063684e-09, "loss": 17.9004, "step": 493880 }, { "epoch": 0.9976890476209714, "grad_norm": 834.479248046875, "learning_rate": 4.5481135123692786e-09, "loss": 17.5298, "step": 493890 }, { "epoch": 0.9977092482536553, "grad_norm": 583.0570068359375, "learning_rate": 4.533240501459202e-09, "loss": 17.883, "step": 493900 }, { "epoch": 0.9977294488863391, "grad_norm": 228.48049926757812, "learning_rate": 4.518391837847747e-09, "loss": 10.9652, "step": 493910 }, { "epoch": 0.9977496495190229, "grad_norm": 477.0223083496094, "learning_rate": 4.503567521608187e-09, "loss": 18.4236, "step": 493920 }, { "epoch": 0.9977698501517067, "grad_norm": 113.33273315429688, "learning_rate": 4.4887675528121345e-09, "loss": 28.0155, "step": 493930 }, { "epoch": 0.9977900507843905, "grad_norm": 313.23504638671875, "learning_rate": 4.473991931531752e-09, "loss": 25.8596, "step": 493940 }, { "epoch": 0.9978102514170744, "grad_norm": 135.3140869140625, "learning_rate": 4.459240657839203e-09, "loss": 28.1376, "step": 493950 }, { "epoch": 0.9978304520497582, "grad_norm": 275.48236083984375, "learning_rate": 4.4445137318072096e-09, "loss": 22.8885, "step": 493960 }, { "epoch": 0.997850652682442, "grad_norm": 0.3670414090156555, "learning_rate": 4.429811153505714e-09, "loss": 7.0648, "step": 493970 }, { "epoch": 0.9978708533151258, "grad_norm": 560.8939819335938, "learning_rate": 4.415132923007992e-09, "loss": 14.6677, "step": 493980 }, { "epoch": 0.9978910539478096, "grad_norm": 28.338668823242188, "learning_rate": 4.400479040385098e-09, "loss": 13.3111, "step": 493990 }, { "epoch": 0.9979112545804935, "grad_norm": 4389.14111328125, "learning_rate": 4.385849505708084e-09, "loss": 41.6417, "step": 494000 }, { "epoch": 0.9979314552131773, "grad_norm": 454.45654296875, "learning_rate": 4.3712443190491175e-09, "loss": 16.7873, "step": 494010 }, { "epoch": 0.9979516558458611, "grad_norm": 189.19061279296875, "learning_rate": 4.3566634804781405e-09, "loss": 16.8721, "step": 494020 }, { "epoch": 0.9979718564785449, "grad_norm": 686.783447265625, "learning_rate": 4.342106990067319e-09, "loss": 24.4592, "step": 494030 }, { "epoch": 0.9979920571112287, "grad_norm": 471.0889892578125, "learning_rate": 4.327574847886595e-09, "loss": 17.7554, "step": 494040 }, { "epoch": 0.9980122577439126, "grad_norm": 392.9156494140625, "learning_rate": 4.313067054008135e-09, "loss": 20.7474, "step": 494050 }, { "epoch": 0.9980324583765964, "grad_norm": 868.9571533203125, "learning_rate": 4.298583608501328e-09, "loss": 27.625, "step": 494060 }, { "epoch": 0.9980526590092802, "grad_norm": 2032.013916015625, "learning_rate": 4.284124511437782e-09, "loss": 34.6727, "step": 494070 }, { "epoch": 0.998072859641964, "grad_norm": 257.8091735839844, "learning_rate": 4.269689762886886e-09, "loss": 22.6591, "step": 494080 }, { "epoch": 0.9980930602746478, "grad_norm": 320.83758544921875, "learning_rate": 4.2552793629202506e-09, "loss": 20.1354, "step": 494090 }, { "epoch": 0.9981132609073317, "grad_norm": 123.14933013916016, "learning_rate": 4.2408933116072635e-09, "loss": 24.1403, "step": 494100 }, { "epoch": 0.9981334615400155, "grad_norm": 206.25991821289062, "learning_rate": 4.22653160901787e-09, "loss": 17.0927, "step": 494110 }, { "epoch": 0.9981536621726993, "grad_norm": 260.6522216796875, "learning_rate": 4.212194255222568e-09, "loss": 12.1673, "step": 494120 }, { "epoch": 0.9981738628053831, "grad_norm": 208.68922424316406, "learning_rate": 4.197881250291302e-09, "loss": 33.1985, "step": 494130 }, { "epoch": 0.9981940634380668, "grad_norm": 363.8871154785156, "learning_rate": 4.183592594294017e-09, "loss": 13.1031, "step": 494140 }, { "epoch": 0.9982142640707506, "grad_norm": 508.0299377441406, "learning_rate": 4.169328287299545e-09, "loss": 23.8725, "step": 494150 }, { "epoch": 0.9982344647034345, "grad_norm": 411.2216796875, "learning_rate": 4.155088329377832e-09, "loss": 28.0018, "step": 494160 }, { "epoch": 0.9982546653361183, "grad_norm": 106.09420776367188, "learning_rate": 4.140872720598266e-09, "loss": 16.5168, "step": 494170 }, { "epoch": 0.9982748659688021, "grad_norm": 525.4610595703125, "learning_rate": 4.126681461030236e-09, "loss": 12.5805, "step": 494180 }, { "epoch": 0.9982950666014859, "grad_norm": 475.1921081542969, "learning_rate": 4.11251455074313e-09, "loss": 27.8379, "step": 494190 }, { "epoch": 0.9983152672341697, "grad_norm": 339.93804931640625, "learning_rate": 4.098371989805227e-09, "loss": 24.0035, "step": 494200 }, { "epoch": 0.9983354678668536, "grad_norm": 270.710205078125, "learning_rate": 4.0842537782859185e-09, "loss": 13.355, "step": 494210 }, { "epoch": 0.9983556684995374, "grad_norm": 577.5907592773438, "learning_rate": 4.07015991625459e-09, "loss": 17.0013, "step": 494220 }, { "epoch": 0.9983758691322212, "grad_norm": 235.8202362060547, "learning_rate": 4.056090403778967e-09, "loss": 27.3389, "step": 494230 }, { "epoch": 0.998396069764905, "grad_norm": 578.2260131835938, "learning_rate": 4.042045240927883e-09, "loss": 14.395, "step": 494240 }, { "epoch": 0.9984162703975888, "grad_norm": 50.1723747253418, "learning_rate": 4.028024427770172e-09, "loss": 8.5401, "step": 494250 }, { "epoch": 0.9984364710302727, "grad_norm": 268.9642333984375, "learning_rate": 4.014027964373557e-09, "loss": 10.8139, "step": 494260 }, { "epoch": 0.9984566716629565, "grad_norm": 42.60501480102539, "learning_rate": 4.000055850807427e-09, "loss": 12.8204, "step": 494270 }, { "epoch": 0.9984768722956403, "grad_norm": 512.7504272460938, "learning_rate": 3.986108087138396e-09, "loss": 15.5377, "step": 494280 }, { "epoch": 0.9984970729283241, "grad_norm": 170.0618896484375, "learning_rate": 3.972184673435297e-09, "loss": 15.9197, "step": 494290 }, { "epoch": 0.998517273561008, "grad_norm": 358.6625061035156, "learning_rate": 3.9582856097658554e-09, "loss": 19.4949, "step": 494300 }, { "epoch": 0.9985374741936918, "grad_norm": 353.5986633300781, "learning_rate": 3.944410896197792e-09, "loss": 27.7585, "step": 494310 }, { "epoch": 0.9985576748263756, "grad_norm": 700.8291015625, "learning_rate": 3.930560532798832e-09, "loss": 20.9409, "step": 494320 }, { "epoch": 0.9985778754590594, "grad_norm": 306.12103271484375, "learning_rate": 3.9167345196361454e-09, "loss": 18.3705, "step": 494330 }, { "epoch": 0.9985980760917432, "grad_norm": 338.6306457519531, "learning_rate": 3.902932856777453e-09, "loss": 9.2328, "step": 494340 }, { "epoch": 0.998618276724427, "grad_norm": 335.12652587890625, "learning_rate": 3.889155544289924e-09, "loss": 18.5403, "step": 494350 }, { "epoch": 0.9986384773571109, "grad_norm": 311.6947326660156, "learning_rate": 3.8754025822407285e-09, "loss": 21.9443, "step": 494360 }, { "epoch": 0.9986586779897947, "grad_norm": 355.5566101074219, "learning_rate": 3.861673970697033e-09, "loss": 14.4083, "step": 494370 }, { "epoch": 0.9986788786224785, "grad_norm": 265.0316162109375, "learning_rate": 3.847969709725452e-09, "loss": 17.4924, "step": 494380 }, { "epoch": 0.9986990792551623, "grad_norm": 131.7287139892578, "learning_rate": 3.834289799392598e-09, "loss": 24.5317, "step": 494390 }, { "epoch": 0.998719279887846, "grad_norm": 229.21507263183594, "learning_rate": 3.820634239765642e-09, "loss": 18.5032, "step": 494400 }, { "epoch": 0.9987394805205299, "grad_norm": 466.8297424316406, "learning_rate": 3.8070030309111935e-09, "loss": 12.6156, "step": 494410 }, { "epoch": 0.9987596811532137, "grad_norm": 680.4591064453125, "learning_rate": 3.793396172895314e-09, "loss": 22.3812, "step": 494420 }, { "epoch": 0.9987798817858975, "grad_norm": 295.6737365722656, "learning_rate": 3.77981366578406e-09, "loss": 8.7531, "step": 494430 }, { "epoch": 0.9988000824185813, "grad_norm": 32.82207489013672, "learning_rate": 3.766255509644601e-09, "loss": 18.1024, "step": 494440 }, { "epoch": 0.9988202830512651, "grad_norm": 390.6903076171875, "learning_rate": 3.752721704541884e-09, "loss": 21.0745, "step": 494450 }, { "epoch": 0.998840483683949, "grad_norm": 1059.0428466796875, "learning_rate": 3.739212250543078e-09, "loss": 23.7416, "step": 494460 }, { "epoch": 0.9988606843166328, "grad_norm": 374.59564208984375, "learning_rate": 3.7257271477131314e-09, "loss": 25.2496, "step": 494470 }, { "epoch": 0.9988808849493166, "grad_norm": 724.3229370117188, "learning_rate": 3.7122663961175477e-09, "loss": 16.3551, "step": 494480 }, { "epoch": 0.9989010855820004, "grad_norm": 345.5867919921875, "learning_rate": 3.698829995822939e-09, "loss": 15.4175, "step": 494490 }, { "epoch": 0.9989212862146842, "grad_norm": 16.395706176757812, "learning_rate": 3.685417946894254e-09, "loss": 26.0611, "step": 494500 }, { "epoch": 0.998941486847368, "grad_norm": 83.29167175292969, "learning_rate": 3.672030249396441e-09, "loss": 9.1453, "step": 494510 }, { "epoch": 0.9989616874800519, "grad_norm": 183.70750427246094, "learning_rate": 3.6586669033955578e-09, "loss": 12.162, "step": 494520 }, { "epoch": 0.9989818881127357, "grad_norm": 322.9624328613281, "learning_rate": 3.645327908955998e-09, "loss": 18.2431, "step": 494530 }, { "epoch": 0.9990020887454195, "grad_norm": 54.37944793701172, "learning_rate": 3.632013266143264e-09, "loss": 18.6937, "step": 494540 }, { "epoch": 0.9990222893781033, "grad_norm": 192.83071899414062, "learning_rate": 3.618722975022304e-09, "loss": 11.3923, "step": 494550 }, { "epoch": 0.9990424900107872, "grad_norm": 430.22259521484375, "learning_rate": 3.605457035657511e-09, "loss": 18.7379, "step": 494560 }, { "epoch": 0.999062690643471, "grad_norm": 223.731201171875, "learning_rate": 3.592215448113834e-09, "loss": 20.3571, "step": 494570 }, { "epoch": 0.9990828912761548, "grad_norm": 1489.7357177734375, "learning_rate": 3.5789982124556646e-09, "loss": 23.1222, "step": 494580 }, { "epoch": 0.9991030919088386, "grad_norm": 574.494384765625, "learning_rate": 3.565805328747951e-09, "loss": 11.0038, "step": 494590 }, { "epoch": 0.9991232925415224, "grad_norm": 160.38998413085938, "learning_rate": 3.5526367970539765e-09, "loss": 19.1827, "step": 494600 }, { "epoch": 0.9991434931742063, "grad_norm": 358.2546081542969, "learning_rate": 3.5394926174381338e-09, "loss": 37.2199, "step": 494610 }, { "epoch": 0.9991636938068901, "grad_norm": 515.025634765625, "learning_rate": 3.526372789965371e-09, "loss": 24.1164, "step": 494620 }, { "epoch": 0.9991838944395739, "grad_norm": 357.302734375, "learning_rate": 3.5132773146989706e-09, "loss": 13.1605, "step": 494630 }, { "epoch": 0.9992040950722577, "grad_norm": 5.499563217163086, "learning_rate": 3.5002061917027708e-09, "loss": 18.9358, "step": 494640 }, { "epoch": 0.9992242957049415, "grad_norm": 177.49220275878906, "learning_rate": 3.487159421040609e-09, "loss": 9.681, "step": 494650 }, { "epoch": 0.9992444963376252, "grad_norm": 444.80841064453125, "learning_rate": 3.474137002775768e-09, "loss": 18.1927, "step": 494660 }, { "epoch": 0.9992646969703091, "grad_norm": 235.77890014648438, "learning_rate": 3.461138936972086e-09, "loss": 20.3918, "step": 494670 }, { "epoch": 0.9992848976029929, "grad_norm": 311.6514892578125, "learning_rate": 3.4481652236934006e-09, "loss": 15.494, "step": 494680 }, { "epoch": 0.9993050982356767, "grad_norm": 423.1574401855469, "learning_rate": 3.4352158630018837e-09, "loss": 20.682, "step": 494690 }, { "epoch": 0.9993252988683605, "grad_norm": 421.1527099609375, "learning_rate": 3.4222908549608193e-09, "loss": 15.1917, "step": 494700 }, { "epoch": 0.9993454995010443, "grad_norm": 420.02874755859375, "learning_rate": 3.409390199634044e-09, "loss": 23.7051, "step": 494710 }, { "epoch": 0.9993657001337282, "grad_norm": 426.9223327636719, "learning_rate": 3.3965138970831758e-09, "loss": 29.5462, "step": 494720 }, { "epoch": 0.999385900766412, "grad_norm": 436.4137268066406, "learning_rate": 3.3836619473720522e-09, "loss": 11.2342, "step": 494730 }, { "epoch": 0.9994061013990958, "grad_norm": 465.861328125, "learning_rate": 3.370834350563401e-09, "loss": 15.2891, "step": 494740 }, { "epoch": 0.9994263020317796, "grad_norm": 410.02935791015625, "learning_rate": 3.3580311067188396e-09, "loss": 14.8325, "step": 494750 }, { "epoch": 0.9994465026644634, "grad_norm": 138.91632080078125, "learning_rate": 3.3452522159010957e-09, "loss": 21.0164, "step": 494760 }, { "epoch": 0.9994667032971473, "grad_norm": 314.4662170410156, "learning_rate": 3.332497678172897e-09, "loss": 15.5543, "step": 494770 }, { "epoch": 0.9994869039298311, "grad_norm": 557.0912475585938, "learning_rate": 3.31976749359586e-09, "loss": 21.8675, "step": 494780 }, { "epoch": 0.9995071045625149, "grad_norm": 589.8872680664062, "learning_rate": 3.3070616622321584e-09, "loss": 10.1348, "step": 494790 }, { "epoch": 0.9995273051951987, "grad_norm": 199.4197998046875, "learning_rate": 3.294380184143964e-09, "loss": 19.031, "step": 494800 }, { "epoch": 0.9995475058278825, "grad_norm": 300.7760009765625, "learning_rate": 3.2817230593928938e-09, "loss": 16.9388, "step": 494810 }, { "epoch": 0.9995677064605664, "grad_norm": 753.2967529296875, "learning_rate": 3.269090288041121e-09, "loss": 22.7836, "step": 494820 }, { "epoch": 0.9995879070932502, "grad_norm": 262.03558349609375, "learning_rate": 3.256481870149153e-09, "loss": 30.8767, "step": 494830 }, { "epoch": 0.999608107725934, "grad_norm": 381.8133239746094, "learning_rate": 3.2438978057791615e-09, "loss": 22.6111, "step": 494840 }, { "epoch": 0.9996283083586178, "grad_norm": 598.3963012695312, "learning_rate": 3.2313380949927643e-09, "loss": 15.5221, "step": 494850 }, { "epoch": 0.9996485089913016, "grad_norm": 212.82321166992188, "learning_rate": 3.218802737850468e-09, "loss": 14.6045, "step": 494860 }, { "epoch": 0.9996687096239855, "grad_norm": 234.849609375, "learning_rate": 3.206291734413891e-09, "loss": 12.5164, "step": 494870 }, { "epoch": 0.9996889102566693, "grad_norm": 207.75067138671875, "learning_rate": 3.1938050847435398e-09, "loss": 9.634, "step": 494880 }, { "epoch": 0.9997091108893531, "grad_norm": 154.989501953125, "learning_rate": 3.1813427889004767e-09, "loss": 13.5645, "step": 494890 }, { "epoch": 0.9997293115220369, "grad_norm": 370.97998046875, "learning_rate": 3.1689048469457638e-09, "loss": 13.6074, "step": 494900 }, { "epoch": 0.9997495121547206, "grad_norm": 14.729854583740234, "learning_rate": 3.156491258939909e-09, "loss": 19.7983, "step": 494910 }, { "epoch": 0.9997697127874045, "grad_norm": 132.00875854492188, "learning_rate": 3.1441020249428635e-09, "loss": 16.1261, "step": 494920 }, { "epoch": 0.9997899134200883, "grad_norm": 9.60148811340332, "learning_rate": 3.1317371450156897e-09, "loss": 8.5566, "step": 494930 }, { "epoch": 0.9998101140527721, "grad_norm": 168.33929443359375, "learning_rate": 3.11939661921834e-09, "loss": 15.349, "step": 494940 }, { "epoch": 0.9998303146854559, "grad_norm": 572.34423828125, "learning_rate": 3.1070804476113213e-09, "loss": 8.7059, "step": 494950 }, { "epoch": 0.9998505153181397, "grad_norm": 407.3402099609375, "learning_rate": 3.094788630254031e-09, "loss": 29.6567, "step": 494960 }, { "epoch": 0.9998707159508236, "grad_norm": 196.3535919189453, "learning_rate": 3.0825211672064203e-09, "loss": 16.6998, "step": 494970 }, { "epoch": 0.9998909165835074, "grad_norm": 592.755615234375, "learning_rate": 3.070278058528997e-09, "loss": 24.6521, "step": 494980 }, { "epoch": 0.9999111172161912, "grad_norm": 352.9114990234375, "learning_rate": 3.058059304280603e-09, "loss": 28.3788, "step": 494990 }, { "epoch": 0.999931317848875, "grad_norm": 373.85968017578125, "learning_rate": 3.0458649045211897e-09, "loss": 17.7365, "step": 495000 }, { "epoch": 0.9999515184815588, "grad_norm": 190.60629272460938, "learning_rate": 3.03369485931071e-09, "loss": 21.5938, "step": 495010 }, { "epoch": 0.9999717191142427, "grad_norm": 508.1072082519531, "learning_rate": 3.0215491687074492e-09, "loss": 23.1618, "step": 495020 }, { "epoch": 0.9999919197469265, "grad_norm": 324.51171875, "learning_rate": 3.009427832771361e-09, "loss": 29.7145, "step": 495030 }, { "epoch": 1.0, "eval_loss": 18.863046646118164, "eval_runtime": 407.6455, "eval_samples_per_second": 24.534, "eval_steps_per_second": 12.268, "step": 495034 }, { "epoch": 1.0000121203796102, "grad_norm": 535.4615478515625, "learning_rate": 2.9973308515607313e-09, "loss": 24.5712, "step": 495040 }, { "epoch": 1.0000323210122941, "grad_norm": 304.3030700683594, "learning_rate": 2.9852582251355124e-09, "loss": 19.0508, "step": 495050 }, { "epoch": 1.0000525216449778, "grad_norm": 296.67218017578125, "learning_rate": 2.9732099535539905e-09, "loss": 13.083, "step": 495060 }, { "epoch": 1.0000727222776618, "grad_norm": 538.2628784179688, "learning_rate": 2.961186036875008e-09, "loss": 28.6931, "step": 495070 }, { "epoch": 1.0000929229103455, "grad_norm": 728.6567993164062, "learning_rate": 2.949186475157406e-09, "loss": 16.9243, "step": 495080 }, { "epoch": 1.0001131235430294, "grad_norm": 619.2899780273438, "learning_rate": 2.937211268458917e-09, "loss": 24.1662, "step": 495090 }, { "epoch": 1.000133324175713, "grad_norm": 181.85565185546875, "learning_rate": 2.9252604168383826e-09, "loss": 11.0639, "step": 495100 }, { "epoch": 1.000153524808397, "grad_norm": 187.00706481933594, "learning_rate": 2.913333920354644e-09, "loss": 21.9745, "step": 495110 }, { "epoch": 1.0001737254410807, "grad_norm": 322.39569091796875, "learning_rate": 2.901431779064323e-09, "loss": 17.245, "step": 495120 }, { "epoch": 1.0001939260737647, "grad_norm": 399.79571533203125, "learning_rate": 2.889553993027372e-09, "loss": 17.0467, "step": 495130 }, { "epoch": 1.0002141267064484, "grad_norm": 650.3701171875, "learning_rate": 2.8777005622998567e-09, "loss": 13.9672, "step": 495140 }, { "epoch": 1.0002343273391323, "grad_norm": 128.86141967773438, "learning_rate": 2.865871486940619e-09, "loss": 11.1238, "step": 495150 }, { "epoch": 1.000254527971816, "grad_norm": 311.48162841796875, "learning_rate": 2.8540667670073905e-09, "loss": 13.0761, "step": 495160 }, { "epoch": 1.0002747286045, "grad_norm": 371.01263427734375, "learning_rate": 2.842286402556793e-09, "loss": 21.9169, "step": 495170 }, { "epoch": 1.0002949292371837, "grad_norm": 485.1094055175781, "learning_rate": 2.830530393647113e-09, "loss": 21.921, "step": 495180 }, { "epoch": 1.0003151298698676, "grad_norm": 277.4830322265625, "learning_rate": 2.8187987403355268e-09, "loss": 21.0184, "step": 495190 }, { "epoch": 1.0003353305025513, "grad_norm": 118.95313262939453, "learning_rate": 2.8070914426786555e-09, "loss": 36.1744, "step": 495200 }, { "epoch": 1.0003555311352352, "grad_norm": 309.14556884765625, "learning_rate": 2.7954085007342315e-09, "loss": 29.7963, "step": 495210 }, { "epoch": 1.000375731767919, "grad_norm": 127.88665008544922, "learning_rate": 2.78374991455832e-09, "loss": 21.9706, "step": 495220 }, { "epoch": 1.0003959324006029, "grad_norm": 19.653532028198242, "learning_rate": 2.772115684209209e-09, "loss": 9.5147, "step": 495230 }, { "epoch": 1.0004161330332866, "grad_norm": 220.71902465820312, "learning_rate": 2.7605058097418536e-09, "loss": 15.3792, "step": 495240 }, { "epoch": 1.0004363336659705, "grad_norm": 282.1274719238281, "learning_rate": 2.748920291214541e-09, "loss": 9.1682, "step": 495250 }, { "epoch": 1.0004565342986542, "grad_norm": 457.2867431640625, "learning_rate": 2.7373591286822266e-09, "loss": 18.318, "step": 495260 }, { "epoch": 1.0004767349313382, "grad_norm": 1289.856689453125, "learning_rate": 2.7258223222020876e-09, "loss": 33.7968, "step": 495270 }, { "epoch": 1.0004969355640219, "grad_norm": 611.5613403320312, "learning_rate": 2.7143098718301896e-09, "loss": 21.0783, "step": 495280 }, { "epoch": 1.0005171361967056, "grad_norm": 244.9925537109375, "learning_rate": 2.7028217776225994e-09, "loss": 8.253, "step": 495290 }, { "epoch": 1.0005373368293895, "grad_norm": 187.85861206054688, "learning_rate": 2.6913580396359384e-09, "loss": 16.2827, "step": 495300 }, { "epoch": 1.0005575374620732, "grad_norm": 679.815185546875, "learning_rate": 2.6799186579246074e-09, "loss": 15.515, "step": 495310 }, { "epoch": 1.0005777380947571, "grad_norm": 716.7931518554688, "learning_rate": 2.6685036325457826e-09, "loss": 29.7323, "step": 495320 }, { "epoch": 1.0005979387274408, "grad_norm": 225.07211303710938, "learning_rate": 2.65711296355442e-09, "loss": 22.4483, "step": 495330 }, { "epoch": 1.0006181393601248, "grad_norm": 456.4980163574219, "learning_rate": 2.6457466510065866e-09, "loss": 13.9316, "step": 495340 }, { "epoch": 1.0006383399928085, "grad_norm": 128.2577362060547, "learning_rate": 2.6344046949566825e-09, "loss": 19.9119, "step": 495350 }, { "epoch": 1.0006585406254924, "grad_norm": 564.334228515625, "learning_rate": 2.6230870954607746e-09, "loss": 20.1779, "step": 495360 }, { "epoch": 1.0006787412581761, "grad_norm": 263.80462646484375, "learning_rate": 2.6117938525738185e-09, "loss": 13.3283, "step": 495370 }, { "epoch": 1.00069894189086, "grad_norm": 158.897705078125, "learning_rate": 2.6005249663513254e-09, "loss": 20.4164, "step": 495380 }, { "epoch": 1.0007191425235438, "grad_norm": 281.79766845703125, "learning_rate": 2.5892804368471414e-09, "loss": 19.5175, "step": 495390 }, { "epoch": 1.0007393431562277, "grad_norm": 556.7461547851562, "learning_rate": 2.5780602641167774e-09, "loss": 19.6771, "step": 495400 }, { "epoch": 1.0007595437889114, "grad_norm": 218.8365478515625, "learning_rate": 2.5668644482151892e-09, "loss": 24.8257, "step": 495410 }, { "epoch": 1.0007797444215953, "grad_norm": 628.21533203125, "learning_rate": 2.5556929891962234e-09, "loss": 11.3612, "step": 495420 }, { "epoch": 1.000799945054279, "grad_norm": 698.1282348632812, "learning_rate": 2.5445458871148353e-09, "loss": 18.1672, "step": 495430 }, { "epoch": 1.000820145686963, "grad_norm": 259.7185974121094, "learning_rate": 2.5334231420254262e-09, "loss": 14.2079, "step": 495440 }, { "epoch": 1.0008403463196467, "grad_norm": 70.15369415283203, "learning_rate": 2.522324753981842e-09, "loss": 8.7467, "step": 495450 }, { "epoch": 1.0008605469523306, "grad_norm": 312.29986572265625, "learning_rate": 2.511250723037928e-09, "loss": 29.1994, "step": 495460 }, { "epoch": 1.0008807475850143, "grad_norm": 279.4052734375, "learning_rate": 2.5002010492486405e-09, "loss": 16.2657, "step": 495470 }, { "epoch": 1.0009009482176983, "grad_norm": 144.45155334472656, "learning_rate": 2.4891757326667154e-09, "loss": 24.549, "step": 495480 }, { "epoch": 1.000921148850382, "grad_norm": 1126.0965576171875, "learning_rate": 2.4781747733471085e-09, "loss": 45.3086, "step": 495490 }, { "epoch": 1.000941349483066, "grad_norm": 599.3152465820312, "learning_rate": 2.4671981713420003e-09, "loss": 25.8712, "step": 495500 }, { "epoch": 1.0009615501157496, "grad_norm": 250.93577575683594, "learning_rate": 2.4562459267063466e-09, "loss": 13.252, "step": 495510 }, { "epoch": 1.0009817507484335, "grad_norm": 290.2425231933594, "learning_rate": 2.445318039492328e-09, "loss": 8.9645, "step": 495520 }, { "epoch": 1.0010019513811172, "grad_norm": 106.93073272705078, "learning_rate": 2.4344145097537906e-09, "loss": 13.6351, "step": 495530 }, { "epoch": 1.0010221520138012, "grad_norm": 529.4349975585938, "learning_rate": 2.423535337544025e-09, "loss": 16.4544, "step": 495540 }, { "epoch": 1.001042352646485, "grad_norm": 345.8415222167969, "learning_rate": 2.412680522915767e-09, "loss": 22.4422, "step": 495550 }, { "epoch": 1.0010625532791686, "grad_norm": 712.0042114257812, "learning_rate": 2.4018500659217515e-09, "loss": 25.6466, "step": 495560 }, { "epoch": 1.0010827539118525, "grad_norm": 179.86766052246094, "learning_rate": 2.3910439666147147e-09, "loss": 16.1174, "step": 495570 }, { "epoch": 1.0011029545445362, "grad_norm": 207.787841796875, "learning_rate": 2.380262225047947e-09, "loss": 10.0085, "step": 495580 }, { "epoch": 1.0011231551772202, "grad_norm": 695.45751953125, "learning_rate": 2.369504841273629e-09, "loss": 16.8075, "step": 495590 }, { "epoch": 1.0011433558099039, "grad_norm": 158.43753051757812, "learning_rate": 2.358771815344496e-09, "loss": 10.7488, "step": 495600 }, { "epoch": 1.0011635564425878, "grad_norm": 536.5902709960938, "learning_rate": 2.348063147312174e-09, "loss": 17.2911, "step": 495610 }, { "epoch": 1.0011837570752715, "grad_norm": 449.4764709472656, "learning_rate": 2.337378837229398e-09, "loss": 19.0239, "step": 495620 }, { "epoch": 1.0012039577079554, "grad_norm": 527.6913452148438, "learning_rate": 2.326718885147794e-09, "loss": 8.5869, "step": 495630 }, { "epoch": 1.0012241583406392, "grad_norm": 2.247581958770752, "learning_rate": 2.316083291120097e-09, "loss": 11.1301, "step": 495640 }, { "epoch": 1.001244358973323, "grad_norm": 163.10150146484375, "learning_rate": 2.3054720551973773e-09, "loss": 17.8689, "step": 495650 }, { "epoch": 1.0012645596060068, "grad_norm": 183.3430938720703, "learning_rate": 2.294885177431816e-09, "loss": 15.3342, "step": 495660 }, { "epoch": 1.0012847602386907, "grad_norm": 152.39137268066406, "learning_rate": 2.2843226578744826e-09, "loss": 13.1861, "step": 495670 }, { "epoch": 1.0013049608713744, "grad_norm": 1099.2608642578125, "learning_rate": 2.2737844965775578e-09, "loss": 23.4051, "step": 495680 }, { "epoch": 1.0013251615040584, "grad_norm": 121.2804183959961, "learning_rate": 2.263270693592112e-09, "loss": 12.0707, "step": 495690 }, { "epoch": 1.001345362136742, "grad_norm": 831.40966796875, "learning_rate": 2.2527812489692156e-09, "loss": 28.1774, "step": 495700 }, { "epoch": 1.001365562769426, "grad_norm": 540.88671875, "learning_rate": 2.2423161627599386e-09, "loss": 17.6186, "step": 495710 }, { "epoch": 1.0013857634021097, "grad_norm": 46.36670684814453, "learning_rate": 2.2318754350159067e-09, "loss": 20.9344, "step": 495720 }, { "epoch": 1.0014059640347936, "grad_norm": 212.47543334960938, "learning_rate": 2.2214590657870795e-09, "loss": 16.6399, "step": 495730 }, { "epoch": 1.0014261646674774, "grad_norm": 47.5765266418457, "learning_rate": 2.211067055124527e-09, "loss": 11.9324, "step": 495740 }, { "epoch": 1.0014463653001613, "grad_norm": 1.8978875875473022, "learning_rate": 2.2006994030798758e-09, "loss": 7.8692, "step": 495750 }, { "epoch": 1.001466565932845, "grad_norm": 353.38153076171875, "learning_rate": 2.1903561097019744e-09, "loss": 15.6718, "step": 495760 }, { "epoch": 1.001486766565529, "grad_norm": 534.5007934570312, "learning_rate": 2.1800371750430037e-09, "loss": 34.5303, "step": 495770 }, { "epoch": 1.0015069671982126, "grad_norm": 166.82882690429688, "learning_rate": 2.169742599151814e-09, "loss": 22.251, "step": 495780 }, { "epoch": 1.0015271678308966, "grad_norm": 594.2769165039062, "learning_rate": 2.15947238207892e-09, "loss": 16.6582, "step": 495790 }, { "epoch": 1.0015473684635803, "grad_norm": 511.8836364746094, "learning_rate": 2.149226523874837e-09, "loss": 12.1377, "step": 495800 }, { "epoch": 1.001567569096264, "grad_norm": 437.1927185058594, "learning_rate": 2.1390050245895246e-09, "loss": 20.587, "step": 495810 }, { "epoch": 1.001587769728948, "grad_norm": 478.9796447753906, "learning_rate": 2.128807884272388e-09, "loss": 23.5372, "step": 495820 }, { "epoch": 1.0016079703616316, "grad_norm": 479.27288818359375, "learning_rate": 2.1186351029733877e-09, "loss": 15.6252, "step": 495830 }, { "epoch": 1.0016281709943156, "grad_norm": 560.8809204101562, "learning_rate": 2.1084866807413727e-09, "loss": 31.4543, "step": 495840 }, { "epoch": 1.0016483716269993, "grad_norm": 632.0659790039062, "learning_rate": 2.098362617626859e-09, "loss": 12.4084, "step": 495850 }, { "epoch": 1.0016685722596832, "grad_norm": 166.77639770507812, "learning_rate": 2.088262913679251e-09, "loss": 5.7719, "step": 495860 }, { "epoch": 1.001688772892367, "grad_norm": 135.89581298828125, "learning_rate": 2.078187568946288e-09, "loss": 12.6637, "step": 495870 }, { "epoch": 1.0017089735250508, "grad_norm": 227.5363311767578, "learning_rate": 2.0681365834790413e-09, "loss": 15.5393, "step": 495880 }, { "epoch": 1.0017291741577345, "grad_norm": 291.6004333496094, "learning_rate": 2.0581099573246943e-09, "loss": 36.1768, "step": 495890 }, { "epoch": 1.0017493747904185, "grad_norm": 282.8682556152344, "learning_rate": 2.0481076905332074e-09, "loss": 8.65, "step": 495900 }, { "epoch": 1.0017695754231022, "grad_norm": 189.76792907714844, "learning_rate": 2.038129783153431e-09, "loss": 11.4054, "step": 495910 }, { "epoch": 1.0017897760557861, "grad_norm": 233.2994384765625, "learning_rate": 2.0281762352331034e-09, "loss": 18.2021, "step": 495920 }, { "epoch": 1.0018099766884698, "grad_norm": 176.9680633544922, "learning_rate": 2.018247046821631e-09, "loss": 11.5819, "step": 495930 }, { "epoch": 1.0018301773211538, "grad_norm": 153.39129638671875, "learning_rate": 2.008342217966752e-09, "loss": 24.4084, "step": 495940 }, { "epoch": 1.0018503779538375, "grad_norm": 409.62103271484375, "learning_rate": 1.9984617487173174e-09, "loss": 17.9044, "step": 495950 }, { "epoch": 1.0018705785865214, "grad_norm": 258.4937744140625, "learning_rate": 1.9886056391210663e-09, "loss": 23.0219, "step": 495960 }, { "epoch": 1.001890779219205, "grad_norm": 90.13101196289062, "learning_rate": 1.9787738892262932e-09, "loss": 5.2013, "step": 495970 }, { "epoch": 1.001910979851889, "grad_norm": 273.6362609863281, "learning_rate": 1.968966499080738e-09, "loss": 14.6099, "step": 495980 }, { "epoch": 1.0019311804845727, "grad_norm": 562.908935546875, "learning_rate": 1.95918346873214e-09, "loss": 13.6189, "step": 495990 }, { "epoch": 1.0019513811172567, "grad_norm": 322.1513366699219, "learning_rate": 1.9494247982282386e-09, "loss": 31.2791, "step": 496000 }, { "epoch": 1.0019715817499404, "grad_norm": 57.95103073120117, "learning_rate": 1.9396904876167742e-09, "loss": 16.7426, "step": 496010 }, { "epoch": 1.0019917823826243, "grad_norm": 270.1626281738281, "learning_rate": 1.9299805369449307e-09, "loss": 15.8352, "step": 496020 }, { "epoch": 1.002011983015308, "grad_norm": 181.19337463378906, "learning_rate": 1.920294946260448e-09, "loss": 12.8496, "step": 496030 }, { "epoch": 1.002032183647992, "grad_norm": 279.1658630371094, "learning_rate": 1.9106337156099553e-09, "loss": 12.1115, "step": 496040 }, { "epoch": 1.0020523842806757, "grad_norm": 173.3330841064453, "learning_rate": 1.9009968450406368e-09, "loss": 24.0644, "step": 496050 }, { "epoch": 1.0020725849133594, "grad_norm": 280.337158203125, "learning_rate": 1.8913843346002324e-09, "loss": 14.3357, "step": 496060 }, { "epoch": 1.0020927855460433, "grad_norm": 222.7508087158203, "learning_rate": 1.8817961843348166e-09, "loss": 12.7869, "step": 496070 }, { "epoch": 1.002112986178727, "grad_norm": 387.0539245605469, "learning_rate": 1.872232394291018e-09, "loss": 13.7058, "step": 496080 }, { "epoch": 1.002133186811411, "grad_norm": 144.1525115966797, "learning_rate": 1.8626929645160218e-09, "loss": 24.1266, "step": 496090 }, { "epoch": 1.0021533874440947, "grad_norm": 233.26426696777344, "learning_rate": 1.8531778950564572e-09, "loss": 18.7332, "step": 496100 }, { "epoch": 1.0021735880767786, "grad_norm": 364.735595703125, "learning_rate": 1.8436871859578431e-09, "loss": 18.3567, "step": 496110 }, { "epoch": 1.0021937887094623, "grad_norm": 305.4839782714844, "learning_rate": 1.834220837266809e-09, "loss": 16.8414, "step": 496120 }, { "epoch": 1.0022139893421462, "grad_norm": 80.03901672363281, "learning_rate": 1.8247788490299846e-09, "loss": 22.3505, "step": 496130 }, { "epoch": 1.00223418997483, "grad_norm": 91.74203491210938, "learning_rate": 1.8153612212923333e-09, "loss": 13.1593, "step": 496140 }, { "epoch": 1.0022543906075139, "grad_norm": 83.57035064697266, "learning_rate": 1.80596795410104e-09, "loss": 28.7778, "step": 496150 }, { "epoch": 1.0022745912401976, "grad_norm": 89.92529296875, "learning_rate": 1.7965990475010686e-09, "loss": 20.5896, "step": 496160 }, { "epoch": 1.0022947918728815, "grad_norm": 365.5494689941406, "learning_rate": 1.7872545015379382e-09, "loss": 17.4617, "step": 496170 }, { "epoch": 1.0023149925055652, "grad_norm": 228.63990783691406, "learning_rate": 1.7779343162577233e-09, "loss": 9.7855, "step": 496180 }, { "epoch": 1.0023351931382491, "grad_norm": 471.0659484863281, "learning_rate": 1.7686384917059429e-09, "loss": 29.9951, "step": 496190 }, { "epoch": 1.0023553937709329, "grad_norm": 6.007596969604492, "learning_rate": 1.759367027927561e-09, "loss": 8.9301, "step": 496200 }, { "epoch": 1.0023755944036168, "grad_norm": 310.35614013671875, "learning_rate": 1.7501199249675416e-09, "loss": 11.2919, "step": 496210 }, { "epoch": 1.0023957950363005, "grad_norm": 67.65311431884766, "learning_rate": 1.740897182871404e-09, "loss": 12.9013, "step": 496220 }, { "epoch": 1.0024159956689844, "grad_norm": 271.9510498046875, "learning_rate": 1.7316988016835567e-09, "loss": 10.5365, "step": 496230 }, { "epoch": 1.0024361963016681, "grad_norm": 577.5405883789062, "learning_rate": 1.7225247814495194e-09, "loss": 11.4776, "step": 496240 }, { "epoch": 1.002456396934352, "grad_norm": 158.23123168945312, "learning_rate": 1.7133751222137007e-09, "loss": 16.7018, "step": 496250 }, { "epoch": 1.0024765975670358, "grad_norm": 151.22882080078125, "learning_rate": 1.7042498240205097e-09, "loss": 17.0154, "step": 496260 }, { "epoch": 1.0024967981997197, "grad_norm": 351.3409118652344, "learning_rate": 1.6951488869149103e-09, "loss": 5.6745, "step": 496270 }, { "epoch": 1.0025169988324034, "grad_norm": 291.2845153808594, "learning_rate": 1.686072310940201e-09, "loss": 9.1775, "step": 496280 }, { "epoch": 1.0025371994650873, "grad_norm": 526.7337036132812, "learning_rate": 1.6770200961419015e-09, "loss": 18.1165, "step": 496290 }, { "epoch": 1.002557400097771, "grad_norm": 308.7669372558594, "learning_rate": 1.6679922425638651e-09, "loss": 25.7948, "step": 496300 }, { "epoch": 1.002577600730455, "grad_norm": 240.68191528320312, "learning_rate": 1.6589887502493907e-09, "loss": 16.251, "step": 496310 }, { "epoch": 1.0025978013631387, "grad_norm": 253.4486846923828, "learning_rate": 1.650009619242887e-09, "loss": 12.1938, "step": 496320 }, { "epoch": 1.0026180019958224, "grad_norm": 144.76792907714844, "learning_rate": 1.6410548495876533e-09, "loss": 12.4415, "step": 496330 }, { "epoch": 1.0026382026285063, "grad_norm": 126.26376342773438, "learning_rate": 1.632124441328098e-09, "loss": 16.3367, "step": 496340 }, { "epoch": 1.00265840326119, "grad_norm": 622.173095703125, "learning_rate": 1.6232183945075197e-09, "loss": 20.4622, "step": 496350 }, { "epoch": 1.002678603893874, "grad_norm": 404.68792724609375, "learning_rate": 1.6143367091686624e-09, "loss": 17.3768, "step": 496360 }, { "epoch": 1.0026988045265577, "grad_norm": 523.90869140625, "learning_rate": 1.6054793853553797e-09, "loss": 21.2731, "step": 496370 }, { "epoch": 1.0027190051592416, "grad_norm": 419.3081970214844, "learning_rate": 1.59664642311097e-09, "loss": 19.0114, "step": 496380 }, { "epoch": 1.0027392057919253, "grad_norm": 345.4902648925781, "learning_rate": 1.5878378224781777e-09, "loss": 8.9369, "step": 496390 }, { "epoch": 1.0027594064246093, "grad_norm": 372.1741943359375, "learning_rate": 1.5790535835003006e-09, "loss": 10.9754, "step": 496400 }, { "epoch": 1.002779607057293, "grad_norm": 194.96832275390625, "learning_rate": 1.570293706219528e-09, "loss": 5.5389, "step": 496410 }, { "epoch": 1.002799807689977, "grad_norm": 182.9541473388672, "learning_rate": 1.5615581906791576e-09, "loss": 7.1093, "step": 496420 }, { "epoch": 1.0028200083226606, "grad_norm": 239.05899047851562, "learning_rate": 1.5528470369208238e-09, "loss": 20.723, "step": 496430 }, { "epoch": 1.0028402089553445, "grad_norm": 1060.20361328125, "learning_rate": 1.5441602449883797e-09, "loss": 15.2947, "step": 496440 }, { "epoch": 1.0028604095880282, "grad_norm": 0.0, "learning_rate": 1.535497814923459e-09, "loss": 8.3481, "step": 496450 }, { "epoch": 1.0028806102207122, "grad_norm": 10.102018356323242, "learning_rate": 1.52685974676825e-09, "loss": 21.2022, "step": 496460 }, { "epoch": 1.0029008108533959, "grad_norm": 339.6944885253906, "learning_rate": 1.518246040564386e-09, "loss": 10.4844, "step": 496470 }, { "epoch": 1.0029210114860798, "grad_norm": 1862.4864501953125, "learning_rate": 1.509656696354611e-09, "loss": 29.4656, "step": 496480 }, { "epoch": 1.0029412121187635, "grad_norm": 468.07958984375, "learning_rate": 1.5010917141811132e-09, "loss": 25.402, "step": 496490 }, { "epoch": 1.0029614127514475, "grad_norm": 325.1629943847656, "learning_rate": 1.4925510940844157e-09, "loss": 13.7435, "step": 496500 }, { "epoch": 1.0029816133841312, "grad_norm": 661.4666137695312, "learning_rate": 1.4840348361067069e-09, "loss": 30.6057, "step": 496510 }, { "epoch": 1.003001814016815, "grad_norm": 336.5476379394531, "learning_rate": 1.4755429402901755e-09, "loss": 10.5586, "step": 496520 }, { "epoch": 1.0030220146494988, "grad_norm": 724.3787841796875, "learning_rate": 1.4670754066747895e-09, "loss": 11.5721, "step": 496530 }, { "epoch": 1.0030422152821827, "grad_norm": 546.8881225585938, "learning_rate": 1.4586322353032923e-09, "loss": 14.4762, "step": 496540 }, { "epoch": 1.0030624159148664, "grad_norm": 330.6160583496094, "learning_rate": 1.4502134262156519e-09, "loss": 11.2739, "step": 496550 }, { "epoch": 1.0030826165475504, "grad_norm": 6.230769157409668, "learning_rate": 1.4418189794540572e-09, "loss": 11.9299, "step": 496560 }, { "epoch": 1.003102817180234, "grad_norm": 172.46426391601562, "learning_rate": 1.4334488950579206e-09, "loss": 14.5759, "step": 496570 }, { "epoch": 1.0031230178129178, "grad_norm": 20.506919860839844, "learning_rate": 1.425103173069986e-09, "loss": 11.4367, "step": 496580 }, { "epoch": 1.0031432184456017, "grad_norm": 417.68963623046875, "learning_rate": 1.4167818135291112e-09, "loss": 13.0775, "step": 496590 }, { "epoch": 1.0031634190782854, "grad_norm": 24.981063842773438, "learning_rate": 1.4084848164763742e-09, "loss": 12.4334, "step": 496600 }, { "epoch": 1.0031836197109694, "grad_norm": 6.88071346282959, "learning_rate": 1.4002121819528535e-09, "loss": 15.9379, "step": 496610 }, { "epoch": 1.003203820343653, "grad_norm": 431.84326171875, "learning_rate": 1.3919639099985171e-09, "loss": 23.2785, "step": 496620 }, { "epoch": 1.003224020976337, "grad_norm": 85.91773986816406, "learning_rate": 1.3837400006533331e-09, "loss": 17.4635, "step": 496630 }, { "epoch": 1.0032442216090207, "grad_norm": 304.8151550292969, "learning_rate": 1.3755404539572692e-09, "loss": 14.6721, "step": 496640 }, { "epoch": 1.0032644222417046, "grad_norm": 359.48175048828125, "learning_rate": 1.3673652699508487e-09, "loss": 10.4681, "step": 496650 }, { "epoch": 1.0032846228743884, "grad_norm": 236.9304656982422, "learning_rate": 1.3592144486740399e-09, "loss": 19.3031, "step": 496660 }, { "epoch": 1.0033048235070723, "grad_norm": 16.39584732055664, "learning_rate": 1.3510879901657003e-09, "loss": 5.941, "step": 496670 }, { "epoch": 1.003325024139756, "grad_norm": 309.649169921875, "learning_rate": 1.342985894465798e-09, "loss": 13.6341, "step": 496680 }, { "epoch": 1.00334522477244, "grad_norm": 94.05716705322266, "learning_rate": 1.3349081616143012e-09, "loss": 21.2653, "step": 496690 }, { "epoch": 1.0033654254051236, "grad_norm": 533.8690795898438, "learning_rate": 1.3268547916495124e-09, "loss": 9.9349, "step": 496700 }, { "epoch": 1.0033856260378076, "grad_norm": 14.093035697937012, "learning_rate": 1.3188257846119545e-09, "loss": 12.0922, "step": 496710 }, { "epoch": 1.0034058266704913, "grad_norm": 20.148250579833984, "learning_rate": 1.3108211405399307e-09, "loss": 9.9139, "step": 496720 }, { "epoch": 1.0034260273031752, "grad_norm": 665.7136840820312, "learning_rate": 1.3028408594728536e-09, "loss": 15.5665, "step": 496730 }, { "epoch": 1.003446227935859, "grad_norm": 220.5654296875, "learning_rate": 1.2948849414495811e-09, "loss": 14.6036, "step": 496740 }, { "epoch": 1.0034664285685428, "grad_norm": 269.35601806640625, "learning_rate": 1.286953386508416e-09, "loss": 25.4018, "step": 496750 }, { "epoch": 1.0034866292012266, "grad_norm": 287.6634521484375, "learning_rate": 1.2790461946887712e-09, "loss": 23.9352, "step": 496760 }, { "epoch": 1.0035068298339105, "grad_norm": 195.8697509765625, "learning_rate": 1.271163366028394e-09, "loss": 24.3558, "step": 496770 }, { "epoch": 1.0035270304665942, "grad_norm": 360.56964111328125, "learning_rate": 1.2633049005661423e-09, "loss": 6.6644, "step": 496780 }, { "epoch": 1.0035472310992781, "grad_norm": 355.21771240234375, "learning_rate": 1.2554707983403192e-09, "loss": 18.4967, "step": 496790 }, { "epoch": 1.0035674317319618, "grad_norm": 90.62782287597656, "learning_rate": 1.247661059389227e-09, "loss": 17.228, "step": 496800 }, { "epoch": 1.0035876323646458, "grad_norm": 209.0258331298828, "learning_rate": 1.2398756837506131e-09, "loss": 15.8311, "step": 496810 }, { "epoch": 1.0036078329973295, "grad_norm": 381.7867126464844, "learning_rate": 1.2321146714627807e-09, "loss": 8.8757, "step": 496820 }, { "epoch": 1.0036280336300132, "grad_norm": 236.79905700683594, "learning_rate": 1.224378022562922e-09, "loss": 16.2411, "step": 496830 }, { "epoch": 1.0036482342626971, "grad_norm": 210.00741577148438, "learning_rate": 1.2166657370898948e-09, "loss": 14.6875, "step": 496840 }, { "epoch": 1.0036684348953808, "grad_norm": 392.40838623046875, "learning_rate": 1.2089778150797816e-09, "loss": 20.7608, "step": 496850 }, { "epoch": 1.0036886355280648, "grad_norm": 65.0674819946289, "learning_rate": 1.2013142565708845e-09, "loss": 15.7522, "step": 496860 }, { "epoch": 1.0037088361607485, "grad_norm": 382.4653625488281, "learning_rate": 1.193675061600952e-09, "loss": 15.6798, "step": 496870 }, { "epoch": 1.0037290367934324, "grad_norm": 442.8677978515625, "learning_rate": 1.1860602302066203e-09, "loss": 26.6604, "step": 496880 }, { "epoch": 1.003749237426116, "grad_norm": 1905.3214111328125, "learning_rate": 1.178469762425083e-09, "loss": 11.8643, "step": 496890 }, { "epoch": 1.0037694380588, "grad_norm": 541.8901977539062, "learning_rate": 1.170903658293532e-09, "loss": 10.6899, "step": 496900 }, { "epoch": 1.0037896386914837, "grad_norm": 480.2037353515625, "learning_rate": 1.1633619178486044e-09, "loss": 13.7109, "step": 496910 }, { "epoch": 1.0038098393241677, "grad_norm": 348.5443115234375, "learning_rate": 1.155844541126938e-09, "loss": 23.0337, "step": 496920 }, { "epoch": 1.0038300399568514, "grad_norm": 276.6700744628906, "learning_rate": 1.1483515281657254e-09, "loss": 14.6272, "step": 496930 }, { "epoch": 1.0038502405895353, "grad_norm": 170.61239624023438, "learning_rate": 1.1408828790010484e-09, "loss": 10.445, "step": 496940 }, { "epoch": 1.003870441222219, "grad_norm": 44.20932388305664, "learning_rate": 1.1334385936695447e-09, "loss": 20.9744, "step": 496950 }, { "epoch": 1.003890641854903, "grad_norm": 435.5612487792969, "learning_rate": 1.1260186722067411e-09, "loss": 20.1963, "step": 496960 }, { "epoch": 1.0039108424875867, "grad_norm": 257.3517150878906, "learning_rate": 1.1186231146503856e-09, "loss": 24.552, "step": 496970 }, { "epoch": 1.0039310431202706, "grad_norm": 536.3111572265625, "learning_rate": 1.111251921034895e-09, "loss": 29.9063, "step": 496980 }, { "epoch": 1.0039512437529543, "grad_norm": 302.0859375, "learning_rate": 1.1039050913969062e-09, "loss": 12.5154, "step": 496990 }, { "epoch": 1.0039714443856382, "grad_norm": 840.5151977539062, "learning_rate": 1.096582625772502e-09, "loss": 29.2025, "step": 497000 }, { "epoch": 1.003991645018322, "grad_norm": 393.2088623046875, "learning_rate": 1.0892845241972094e-09, "loss": 14.4391, "step": 497010 }, { "epoch": 1.0040118456510059, "grad_norm": 262.2140197753906, "learning_rate": 1.0820107867060004e-09, "loss": 19.033, "step": 497020 }, { "epoch": 1.0040320462836896, "grad_norm": 135.4094696044922, "learning_rate": 1.074761413334957e-09, "loss": 32.1915, "step": 497030 }, { "epoch": 1.0040522469163735, "grad_norm": 378.78863525390625, "learning_rate": 1.0675364041190516e-09, "loss": 13.1999, "step": 497040 }, { "epoch": 1.0040724475490572, "grad_norm": 364.27679443359375, "learning_rate": 1.0603357590938112e-09, "loss": 7.9666, "step": 497050 }, { "epoch": 1.0040926481817412, "grad_norm": 273.89202880859375, "learning_rate": 1.0531594782942079e-09, "loss": 14.7918, "step": 497060 }, { "epoch": 1.0041128488144249, "grad_norm": 620.6331787109375, "learning_rate": 1.0460075617552134e-09, "loss": 18.9597, "step": 497070 }, { "epoch": 1.0041330494471086, "grad_norm": 967.349365234375, "learning_rate": 1.0388800095118002e-09, "loss": 16.9013, "step": 497080 }, { "epoch": 1.0041532500797925, "grad_norm": 378.5223693847656, "learning_rate": 1.0317768215983847e-09, "loss": 11.8875, "step": 497090 }, { "epoch": 1.0041734507124762, "grad_norm": 255.29290771484375, "learning_rate": 1.0246979980499395e-09, "loss": 21.5511, "step": 497100 }, { "epoch": 1.0041936513451601, "grad_norm": 152.40760803222656, "learning_rate": 1.017643538900881e-09, "loss": 8.6306, "step": 497110 }, { "epoch": 1.0042138519778439, "grad_norm": 301.55950927734375, "learning_rate": 1.0106134441850712e-09, "loss": 10.1663, "step": 497120 }, { "epoch": 1.0042340526105278, "grad_norm": 366.7298583984375, "learning_rate": 1.0036077139380373e-09, "loss": 9.4549, "step": 497130 }, { "epoch": 1.0042542532432115, "grad_norm": 277.9081726074219, "learning_rate": 9.96626348192531e-10, "loss": 44.1801, "step": 497140 }, { "epoch": 1.0042744538758954, "grad_norm": 167.16905212402344, "learning_rate": 9.896693469829689e-10, "loss": 25.4072, "step": 497150 }, { "epoch": 1.0042946545085791, "grad_norm": 246.45228576660156, "learning_rate": 9.827367103437679e-10, "loss": 15.8333, "step": 497160 }, { "epoch": 1.004314855141263, "grad_norm": 320.1998291015625, "learning_rate": 9.758284383082351e-10, "loss": 15.7299, "step": 497170 }, { "epoch": 1.0043350557739468, "grad_norm": 255.04876708984375, "learning_rate": 9.68944530910787e-10, "loss": 21.4589, "step": 497180 }, { "epoch": 1.0043552564066307, "grad_norm": 858.8927612304688, "learning_rate": 9.620849881836203e-10, "loss": 11.0519, "step": 497190 }, { "epoch": 1.0043754570393144, "grad_norm": 290.14703369140625, "learning_rate": 9.55249810161152e-10, "loss": 16.7142, "step": 497200 }, { "epoch": 1.0043956576719983, "grad_norm": 79.18550872802734, "learning_rate": 9.484389968766882e-10, "loss": 14.6362, "step": 497210 }, { "epoch": 1.004415858304682, "grad_norm": 2.3829500675201416, "learning_rate": 9.416525483635364e-10, "loss": 11.5494, "step": 497220 }, { "epoch": 1.004436058937366, "grad_norm": 472.21539306640625, "learning_rate": 9.348904646538925e-10, "loss": 20.2588, "step": 497230 }, { "epoch": 1.0044562595700497, "grad_norm": 422.0057678222656, "learning_rate": 9.281527457816186e-10, "loss": 15.2321, "step": 497240 }, { "epoch": 1.0044764602027336, "grad_norm": 235.6026153564453, "learning_rate": 9.214393917789111e-10, "loss": 14.2804, "step": 497250 }, { "epoch": 1.0044966608354173, "grad_norm": 138.6964569091797, "learning_rate": 9.147504026790766e-10, "loss": 7.2866, "step": 497260 }, { "epoch": 1.0045168614681013, "grad_norm": 85.42167663574219, "learning_rate": 9.080857785137564e-10, "loss": 16.7213, "step": 497270 }, { "epoch": 1.004537062100785, "grad_norm": 331.20611572265625, "learning_rate": 9.014455193168125e-10, "loss": 7.9069, "step": 497280 }, { "epoch": 1.004557262733469, "grad_norm": 794.2719116210938, "learning_rate": 8.948296251198863e-10, "loss": 21.0431, "step": 497290 }, { "epoch": 1.0045774633661526, "grad_norm": 445.93719482421875, "learning_rate": 8.88238095955174e-10, "loss": 17.8171, "step": 497300 }, { "epoch": 1.0045976639988365, "grad_norm": 862.58984375, "learning_rate": 8.816709318543171e-10, "loss": 25.3989, "step": 497310 }, { "epoch": 1.0046178646315203, "grad_norm": 626.967041015625, "learning_rate": 8.751281328506223e-10, "loss": 20.0826, "step": 497320 }, { "epoch": 1.0046380652642042, "grad_norm": 348.0452575683594, "learning_rate": 8.686096989751758e-10, "loss": 17.8535, "step": 497330 }, { "epoch": 1.004658265896888, "grad_norm": 500.5772705078125, "learning_rate": 8.621156302590639e-10, "loss": 10.7476, "step": 497340 }, { "epoch": 1.0046784665295716, "grad_norm": 139.4596710205078, "learning_rate": 8.556459267355932e-10, "loss": 10.1938, "step": 497350 }, { "epoch": 1.0046986671622555, "grad_norm": 663.2300415039062, "learning_rate": 8.492005884347398e-10, "loss": 27.1731, "step": 497360 }, { "epoch": 1.0047188677949392, "grad_norm": 286.3394775390625, "learning_rate": 8.427796153887002e-10, "loss": 19.3634, "step": 497370 }, { "epoch": 1.0047390684276232, "grad_norm": 329.2220153808594, "learning_rate": 8.363830076285606e-10, "loss": 8.8834, "step": 497380 }, { "epoch": 1.0047592690603069, "grad_norm": 248.72828674316406, "learning_rate": 8.300107651859623e-10, "loss": 21.1644, "step": 497390 }, { "epoch": 1.0047794696929908, "grad_norm": 326.47314453125, "learning_rate": 8.236628880914365e-10, "loss": 17.0379, "step": 497400 }, { "epoch": 1.0047996703256745, "grad_norm": 615.4883422851562, "learning_rate": 8.173393763760695e-10, "loss": 24.0844, "step": 497410 }, { "epoch": 1.0048198709583585, "grad_norm": 468.8843688964844, "learning_rate": 8.110402300703924e-10, "loss": 22.9501, "step": 497420 }, { "epoch": 1.0048400715910422, "grad_norm": 204.00584411621094, "learning_rate": 8.047654492054913e-10, "loss": 6.4912, "step": 497430 }, { "epoch": 1.004860272223726, "grad_norm": 430.0341796875, "learning_rate": 7.985150338118974e-10, "loss": 19.2455, "step": 497440 }, { "epoch": 1.0048804728564098, "grad_norm": 214.84359741210938, "learning_rate": 7.92288983920142e-10, "loss": 20.7306, "step": 497450 }, { "epoch": 1.0049006734890937, "grad_norm": 365.97454833984375, "learning_rate": 7.860872995602009e-10, "loss": 11.9094, "step": 497460 }, { "epoch": 1.0049208741217774, "grad_norm": 416.4472351074219, "learning_rate": 7.799099807626054e-10, "loss": 12.0364, "step": 497470 }, { "epoch": 1.0049410747544614, "grad_norm": 1460.086669921875, "learning_rate": 7.737570275573314e-10, "loss": 19.3271, "step": 497480 }, { "epoch": 1.004961275387145, "grad_norm": 724.4830322265625, "learning_rate": 7.67628439974355e-10, "loss": 14.0878, "step": 497490 }, { "epoch": 1.004981476019829, "grad_norm": 168.28736877441406, "learning_rate": 7.615242180436521e-10, "loss": 10.0536, "step": 497500 }, { "epoch": 1.0050016766525127, "grad_norm": 419.37451171875, "learning_rate": 7.55444361795199e-10, "loss": 11.5683, "step": 497510 }, { "epoch": 1.0050218772851967, "grad_norm": 365.8724060058594, "learning_rate": 7.493888712584163e-10, "loss": 10.9492, "step": 497520 }, { "epoch": 1.0050420779178804, "grad_norm": 379.0532531738281, "learning_rate": 7.433577464621699e-10, "loss": 17.9451, "step": 497530 }, { "epoch": 1.0050622785505643, "grad_norm": 64.69229888916016, "learning_rate": 7.373509874369911e-10, "loss": 17.502, "step": 497540 }, { "epoch": 1.005082479183248, "grad_norm": 791.7032470703125, "learning_rate": 7.313685942117454e-10, "loss": 18.4521, "step": 497550 }, { "epoch": 1.005102679815932, "grad_norm": 498.0197448730469, "learning_rate": 7.254105668152988e-10, "loss": 26.5022, "step": 497560 }, { "epoch": 1.0051228804486156, "grad_norm": 162.7668914794922, "learning_rate": 7.194769052765171e-10, "loss": 10.4517, "step": 497570 }, { "epoch": 1.0051430810812996, "grad_norm": 347.5716857910156, "learning_rate": 7.135676096253763e-10, "loss": 21.9197, "step": 497580 }, { "epoch": 1.0051632817139833, "grad_norm": 225.25503540039062, "learning_rate": 7.076826798890768e-10, "loss": 14.4115, "step": 497590 }, { "epoch": 1.005183482346667, "grad_norm": 222.23606872558594, "learning_rate": 7.018221160981498e-10, "loss": 10.2202, "step": 497600 }, { "epoch": 1.005203682979351, "grad_norm": 390.8809814453125, "learning_rate": 6.959859182792406e-10, "loss": 11.5496, "step": 497610 }, { "epoch": 1.0052238836120346, "grad_norm": 0.0, "learning_rate": 6.901740864623252e-10, "loss": 14.3404, "step": 497620 }, { "epoch": 1.0052440842447186, "grad_norm": 217.84158325195312, "learning_rate": 6.843866206751593e-10, "loss": 6.5539, "step": 497630 }, { "epoch": 1.0052642848774023, "grad_norm": 186.2528076171875, "learning_rate": 6.786235209460534e-10, "loss": 20.873, "step": 497640 }, { "epoch": 1.0052844855100862, "grad_norm": 452.13299560546875, "learning_rate": 6.728847873027633e-10, "loss": 18.4681, "step": 497650 }, { "epoch": 1.00530468614277, "grad_norm": 740.79296875, "learning_rate": 6.671704197735995e-10, "loss": 17.4599, "step": 497660 }, { "epoch": 1.0053248867754538, "grad_norm": 409.2422180175781, "learning_rate": 6.614804183857626e-10, "loss": 19.8806, "step": 497670 }, { "epoch": 1.0053450874081375, "grad_norm": 237.03514099121094, "learning_rate": 6.558147831681183e-10, "loss": 11.4689, "step": 497680 }, { "epoch": 1.0053652880408215, "grad_norm": 358.61102294921875, "learning_rate": 6.501735141478672e-10, "loss": 23.0816, "step": 497690 }, { "epoch": 1.0053854886735052, "grad_norm": 44.100135803222656, "learning_rate": 6.445566113516544e-10, "loss": 15.8719, "step": 497700 }, { "epoch": 1.0054056893061891, "grad_norm": 798.8607788085938, "learning_rate": 6.389640748077907e-10, "loss": 17.1606, "step": 497710 }, { "epoch": 1.0054258899388728, "grad_norm": 247.9388885498047, "learning_rate": 6.333959045434768e-10, "loss": 8.721, "step": 497720 }, { "epoch": 1.0054460905715568, "grad_norm": 454.4766540527344, "learning_rate": 6.278521005853578e-10, "loss": 11.72, "step": 497730 }, { "epoch": 1.0054662912042405, "grad_norm": 138.5755157470703, "learning_rate": 6.223326629611893e-10, "loss": 12.6029, "step": 497740 }, { "epoch": 1.0054864918369244, "grad_norm": 273.68939208984375, "learning_rate": 6.168375916970615e-10, "loss": 11.2898, "step": 497750 }, { "epoch": 1.005506692469608, "grad_norm": 74.56575775146484, "learning_rate": 6.11366886820175e-10, "loss": 16.079, "step": 497760 }, { "epoch": 1.005526893102292, "grad_norm": 563.3392333984375, "learning_rate": 6.05920548357175e-10, "loss": 20.3108, "step": 497770 }, { "epoch": 1.0055470937349757, "grad_norm": 344.8863220214844, "learning_rate": 6.00498576334152e-10, "loss": 21.3803, "step": 497780 }, { "epoch": 1.0055672943676597, "grad_norm": 591.0133666992188, "learning_rate": 5.951009707783062e-10, "loss": 15.0725, "step": 497790 }, { "epoch": 1.0055874950003434, "grad_norm": 220.0905303955078, "learning_rate": 5.897277317157279e-10, "loss": 19.6494, "step": 497800 }, { "epoch": 1.0056076956330273, "grad_norm": 382.07415771484375, "learning_rate": 5.843788591725074e-10, "loss": 17.0656, "step": 497810 }, { "epoch": 1.005627896265711, "grad_norm": 285.0874938964844, "learning_rate": 5.790543531741799e-10, "loss": 15.2152, "step": 497820 }, { "epoch": 1.005648096898395, "grad_norm": 224.52911376953125, "learning_rate": 5.737542137479457e-10, "loss": 32.5519, "step": 497830 }, { "epoch": 1.0056682975310787, "grad_norm": 124.53401947021484, "learning_rate": 5.684784409182298e-10, "loss": 11.5611, "step": 497840 }, { "epoch": 1.0056884981637624, "grad_norm": 38.93964767456055, "learning_rate": 5.632270347116775e-10, "loss": 9.7367, "step": 497850 }, { "epoch": 1.0057086987964463, "grad_norm": 280.2174987792969, "learning_rate": 5.579999951532688e-10, "loss": 22.2611, "step": 497860 }, { "epoch": 1.00572889942913, "grad_norm": 653.9365844726562, "learning_rate": 5.527973222690941e-10, "loss": 23.448, "step": 497870 }, { "epoch": 1.005749100061814, "grad_norm": 411.93145751953125, "learning_rate": 5.476190160841333e-10, "loss": 18.0152, "step": 497880 }, { "epoch": 1.0057693006944977, "grad_norm": 292.06671142578125, "learning_rate": 5.424650766239215e-10, "loss": 24.901, "step": 497890 }, { "epoch": 1.0057895013271816, "grad_norm": 1511.5247802734375, "learning_rate": 5.373355039128836e-10, "loss": 25.5441, "step": 497900 }, { "epoch": 1.0058097019598653, "grad_norm": 163.15565490722656, "learning_rate": 5.322302979771099e-10, "loss": 13.7009, "step": 497910 }, { "epoch": 1.0058299025925492, "grad_norm": 459.66351318359375, "learning_rate": 5.271494588404702e-10, "loss": 16.1014, "step": 497920 }, { "epoch": 1.005850103225233, "grad_norm": 14.534080505371094, "learning_rate": 5.220929865284996e-10, "loss": 12.6089, "step": 497930 }, { "epoch": 1.0058703038579169, "grad_norm": 387.5732727050781, "learning_rate": 5.170608810650679e-10, "loss": 22.0805, "step": 497940 }, { "epoch": 1.0058905044906006, "grad_norm": 884.1080322265625, "learning_rate": 5.120531424751551e-10, "loss": 13.3409, "step": 497950 }, { "epoch": 1.0059107051232845, "grad_norm": 547.0142822265625, "learning_rate": 5.070697707837413e-10, "loss": 39.905, "step": 497960 }, { "epoch": 1.0059309057559682, "grad_norm": 529.3761596679688, "learning_rate": 5.02110766013586e-10, "loss": 14.4093, "step": 497970 }, { "epoch": 1.0059511063886521, "grad_norm": 484.35809326171875, "learning_rate": 4.971761281907795e-10, "loss": 15.3145, "step": 497980 }, { "epoch": 1.0059713070213359, "grad_norm": 1.8353664875030518, "learning_rate": 4.922658573375261e-10, "loss": 13.9594, "step": 497990 }, { "epoch": 1.0059915076540198, "grad_norm": 90.44378662109375, "learning_rate": 4.87379953478806e-10, "loss": 14.71, "step": 498000 }, { "epoch": 1.0060117082867035, "grad_norm": 336.6805114746094, "learning_rate": 4.825184166384888e-10, "loss": 27.1868, "step": 498010 }, { "epoch": 1.0060319089193874, "grad_norm": 479.4397888183594, "learning_rate": 4.776812468398895e-10, "loss": 9.6992, "step": 498020 }, { "epoch": 1.0060521095520711, "grad_norm": 310.9299011230469, "learning_rate": 4.728684441068776e-10, "loss": 18.8371, "step": 498030 }, { "epoch": 1.006072310184755, "grad_norm": 168.52430725097656, "learning_rate": 4.680800084622128e-10, "loss": 17.1496, "step": 498040 }, { "epoch": 1.0060925108174388, "grad_norm": 8.164981842041016, "learning_rate": 4.6331593993032e-10, "loss": 11.4686, "step": 498050 }, { "epoch": 1.0061127114501227, "grad_norm": 98.24127197265625, "learning_rate": 4.585762385334036e-10, "loss": 4.2927, "step": 498060 }, { "epoch": 1.0061329120828064, "grad_norm": 165.088623046875, "learning_rate": 4.538609042953335e-10, "loss": 15.4089, "step": 498070 }, { "epoch": 1.0061531127154903, "grad_norm": 589.4951782226562, "learning_rate": 4.49169937238314e-10, "loss": 12.651, "step": 498080 }, { "epoch": 1.006173313348174, "grad_norm": 113.78429412841797, "learning_rate": 4.445033373862151e-10, "loss": 14.0095, "step": 498090 }, { "epoch": 1.006193513980858, "grad_norm": 479.8966064453125, "learning_rate": 4.398611047612411e-10, "loss": 17.6846, "step": 498100 }, { "epoch": 1.0062137146135417, "grad_norm": 600.0286254882812, "learning_rate": 4.3524323938559655e-10, "loss": 16.7164, "step": 498110 }, { "epoch": 1.0062339152462254, "grad_norm": 245.16664123535156, "learning_rate": 4.3064974128259605e-10, "loss": 11.4906, "step": 498120 }, { "epoch": 1.0062541158789093, "grad_norm": 233.82284545898438, "learning_rate": 4.2608061047388905e-10, "loss": 18.9105, "step": 498130 }, { "epoch": 1.006274316511593, "grad_norm": 74.9719009399414, "learning_rate": 4.21535846982235e-10, "loss": 11.5325, "step": 498140 }, { "epoch": 1.006294517144277, "grad_norm": 379.4974365234375, "learning_rate": 4.1701545082928343e-10, "loss": 26.145, "step": 498150 }, { "epoch": 1.0063147177769607, "grad_norm": 402.6711120605469, "learning_rate": 4.125194220377937e-10, "loss": 18.0747, "step": 498160 }, { "epoch": 1.0063349184096446, "grad_norm": 253.51841735839844, "learning_rate": 4.0804776062941533e-10, "loss": 5.7595, "step": 498170 }, { "epoch": 1.0063551190423283, "grad_norm": 314.94598388671875, "learning_rate": 4.0360046662579753e-10, "loss": 13.1086, "step": 498180 }, { "epoch": 1.0063753196750123, "grad_norm": 128.70509338378906, "learning_rate": 3.991775400485898e-10, "loss": 19.3338, "step": 498190 }, { "epoch": 1.006395520307696, "grad_norm": 373.6906433105469, "learning_rate": 3.9477898091944135e-10, "loss": 24.4669, "step": 498200 }, { "epoch": 1.00641572094038, "grad_norm": 681.62451171875, "learning_rate": 3.9040478925944645e-10, "loss": 32.6684, "step": 498210 }, { "epoch": 1.0064359215730636, "grad_norm": 95.27474975585938, "learning_rate": 3.8605496509080966e-10, "loss": 11.4944, "step": 498220 }, { "epoch": 1.0064561222057475, "grad_norm": 506.1835021972656, "learning_rate": 3.8172950843351485e-10, "loss": 26.6546, "step": 498230 }, { "epoch": 1.0064763228384312, "grad_norm": 118.24510192871094, "learning_rate": 3.774284193097666e-10, "loss": 8.1574, "step": 498240 }, { "epoch": 1.0064965234711152, "grad_norm": 354.12835693359375, "learning_rate": 3.7315169774010397e-10, "loss": 9.9255, "step": 498250 }, { "epoch": 1.0065167241037989, "grad_norm": 302.7308654785156, "learning_rate": 3.6889934374506606e-10, "loss": 29.9269, "step": 498260 }, { "epoch": 1.0065369247364828, "grad_norm": 262.63055419921875, "learning_rate": 3.646713573457472e-10, "loss": 17.3862, "step": 498270 }, { "epoch": 1.0065571253691665, "grad_norm": 368.1761474609375, "learning_rate": 3.604677385626865e-10, "loss": 13.9558, "step": 498280 }, { "epoch": 1.0065773260018505, "grad_norm": 139.5403289794922, "learning_rate": 3.562884874158679e-10, "loss": 14.0402, "step": 498290 }, { "epoch": 1.0065975266345342, "grad_norm": 166.00411987304688, "learning_rate": 3.521336039263856e-10, "loss": 4.6587, "step": 498300 }, { "epoch": 1.006617727267218, "grad_norm": 315.66888427734375, "learning_rate": 3.480030881147789e-10, "loss": 11.2647, "step": 498310 }, { "epoch": 1.0066379278999018, "grad_norm": 592.1396484375, "learning_rate": 3.4389693999992146e-10, "loss": 17.5871, "step": 498320 }, { "epoch": 1.0066581285325857, "grad_norm": 167.7822265625, "learning_rate": 3.3981515960290757e-10, "loss": 14.7794, "step": 498330 }, { "epoch": 1.0066783291652694, "grad_norm": 191.7164306640625, "learning_rate": 3.357577469431661e-10, "loss": 16.5493, "step": 498340 }, { "epoch": 1.0066985297979534, "grad_norm": 135.2027587890625, "learning_rate": 3.3172470204012597e-10, "loss": 14.2077, "step": 498350 }, { "epoch": 1.006718730430637, "grad_norm": 565.5507202148438, "learning_rate": 3.277160249143263e-10, "loss": 30.6504, "step": 498360 }, { "epoch": 1.0067389310633208, "grad_norm": 243.60830688476562, "learning_rate": 3.237317155846409e-10, "loss": 13.5072, "step": 498370 }, { "epoch": 1.0067591316960047, "grad_norm": 880.7012329101562, "learning_rate": 3.1977177407105376e-10, "loss": 13.7841, "step": 498380 }, { "epoch": 1.0067793323286884, "grad_norm": 417.2605895996094, "learning_rate": 3.158362003918836e-10, "loss": 10.2344, "step": 498390 }, { "epoch": 1.0067995329613724, "grad_norm": 372.4492492675781, "learning_rate": 3.1192499456766947e-10, "loss": 17.3512, "step": 498400 }, { "epoch": 1.006819733594056, "grad_norm": 58.83364486694336, "learning_rate": 3.0803815661617495e-10, "loss": 11.6652, "step": 498410 }, { "epoch": 1.00683993422674, "grad_norm": 2088.984375, "learning_rate": 3.0417568655738416e-10, "loss": 14.8833, "step": 498420 }, { "epoch": 1.0068601348594237, "grad_norm": 418.00531005859375, "learning_rate": 3.003375844090606e-10, "loss": 19.6728, "step": 498430 }, { "epoch": 1.0068803354921076, "grad_norm": 490.3150634765625, "learning_rate": 2.9652385019118823e-10, "loss": 18.3427, "step": 498440 }, { "epoch": 1.0069005361247914, "grad_norm": 13.315130233764648, "learning_rate": 2.9273448392097557e-10, "loss": 17.9149, "step": 498450 }, { "epoch": 1.0069207367574753, "grad_norm": 384.3954772949219, "learning_rate": 2.8896948561785156e-10, "loss": 22.0337, "step": 498460 }, { "epoch": 1.006940937390159, "grad_norm": 199.98281860351562, "learning_rate": 2.8522885530013475e-10, "loss": 15.961, "step": 498470 }, { "epoch": 1.006961138022843, "grad_norm": 133.38661193847656, "learning_rate": 2.8151259298558884e-10, "loss": 12.1903, "step": 498480 }, { "epoch": 1.0069813386555266, "grad_norm": 370.10784912109375, "learning_rate": 2.7782069869253247e-10, "loss": 10.3582, "step": 498490 }, { "epoch": 1.0070015392882106, "grad_norm": 646.2947387695312, "learning_rate": 2.741531724392843e-10, "loss": 13.1196, "step": 498500 }, { "epoch": 1.0070217399208943, "grad_norm": 270.6547546386719, "learning_rate": 2.705100142430528e-10, "loss": 18.6352, "step": 498510 }, { "epoch": 1.0070419405535782, "grad_norm": 250.91160583496094, "learning_rate": 2.668912241221566e-10, "loss": 13.065, "step": 498520 }, { "epoch": 1.007062141186262, "grad_norm": 208.54881286621094, "learning_rate": 2.6329680209435935e-10, "loss": 17.9283, "step": 498530 }, { "epoch": 1.0070823418189458, "grad_norm": 285.6217041015625, "learning_rate": 2.597267481763144e-10, "loss": 16.4179, "step": 498540 }, { "epoch": 1.0071025424516296, "grad_norm": 315.50543212890625, "learning_rate": 2.5618106238634033e-10, "loss": 16.7256, "step": 498550 }, { "epoch": 1.0071227430843135, "grad_norm": 163.26913452148438, "learning_rate": 2.5265974474109054e-10, "loss": 10.2823, "step": 498560 }, { "epoch": 1.0071429437169972, "grad_norm": 439.5351867675781, "learning_rate": 2.4916279525777356e-10, "loss": 11.0456, "step": 498570 }, { "epoch": 1.0071631443496811, "grad_norm": 148.00035095214844, "learning_rate": 2.4569021395415283e-10, "loss": 18.7685, "step": 498580 }, { "epoch": 1.0071833449823648, "grad_norm": 350.3221740722656, "learning_rate": 2.4224200084632664e-10, "loss": 6.9299, "step": 498590 }, { "epoch": 1.0072035456150488, "grad_norm": 314.0454406738281, "learning_rate": 2.388181559515035e-10, "loss": 10.4755, "step": 498600 }, { "epoch": 1.0072237462477325, "grad_norm": 290.7841796875, "learning_rate": 2.3541867928633665e-10, "loss": 26.1827, "step": 498610 }, { "epoch": 1.0072439468804162, "grad_norm": 380.9477844238281, "learning_rate": 2.3204357086747952e-10, "loss": 11.4454, "step": 498620 }, { "epoch": 1.0072641475131001, "grad_norm": 363.6810607910156, "learning_rate": 2.2869283071103032e-10, "loss": 13.2469, "step": 498630 }, { "epoch": 1.0072843481457838, "grad_norm": 462.4739685058594, "learning_rate": 2.2536645883308728e-10, "loss": 15.2276, "step": 498640 }, { "epoch": 1.0073045487784678, "grad_norm": 88.65006256103516, "learning_rate": 2.2206445525085886e-10, "loss": 11.941, "step": 498650 }, { "epoch": 1.0073247494111515, "grad_norm": 690.76416015625, "learning_rate": 2.1878681997988816e-10, "loss": 19.5642, "step": 498660 }, { "epoch": 1.0073449500438354, "grad_norm": 256.72381591796875, "learning_rate": 2.1553355303627343e-10, "loss": 11.1942, "step": 498670 }, { "epoch": 1.007365150676519, "grad_norm": 402.1224670410156, "learning_rate": 2.123046544355578e-10, "loss": 25.1618, "step": 498680 }, { "epoch": 1.007385351309203, "grad_norm": 367.8741149902344, "learning_rate": 2.091001241932844e-10, "loss": 13.3203, "step": 498690 }, { "epoch": 1.0074055519418867, "grad_norm": 30.525959014892578, "learning_rate": 2.0591996232610656e-10, "loss": 17.055, "step": 498700 }, { "epoch": 1.0074257525745707, "grad_norm": 425.9206237792969, "learning_rate": 2.0276416884845718e-10, "loss": 12.6519, "step": 498710 }, { "epoch": 1.0074459532072544, "grad_norm": 529.9754638671875, "learning_rate": 1.9963274377643448e-10, "loss": 14.4685, "step": 498720 }, { "epoch": 1.0074661538399383, "grad_norm": 270.33941650390625, "learning_rate": 1.965256871244714e-10, "loss": 18.3609, "step": 498730 }, { "epoch": 1.007486354472622, "grad_norm": 296.189208984375, "learning_rate": 1.9344299890866614e-10, "loss": 19.2853, "step": 498740 }, { "epoch": 1.007506555105306, "grad_norm": 559.3936157226562, "learning_rate": 1.903846791434516e-10, "loss": 17.3757, "step": 498750 }, { "epoch": 1.0075267557379897, "grad_norm": 5.866914749145508, "learning_rate": 1.873507278438158e-10, "loss": 13.317, "step": 498760 }, { "epoch": 1.0075469563706736, "grad_norm": 280.8948974609375, "learning_rate": 1.8434114502530187e-10, "loss": 16.1338, "step": 498770 }, { "epoch": 1.0075671570033573, "grad_norm": 569.0048217773438, "learning_rate": 1.8135593070123246e-10, "loss": 27.7739, "step": 498780 }, { "epoch": 1.0075873576360412, "grad_norm": 373.78021240234375, "learning_rate": 1.7839508488715075e-10, "loss": 15.2462, "step": 498790 }, { "epoch": 1.007607558268725, "grad_norm": 307.22509765625, "learning_rate": 1.7545860759693446e-10, "loss": 7.8948, "step": 498800 }, { "epoch": 1.0076277589014089, "grad_norm": 312.2191467285156, "learning_rate": 1.725464988450165e-10, "loss": 7.6725, "step": 498810 }, { "epoch": 1.0076479595340926, "grad_norm": 427.0631103515625, "learning_rate": 1.6965875864582983e-10, "loss": 13.7366, "step": 498820 }, { "epoch": 1.0076681601667765, "grad_norm": 285.7724914550781, "learning_rate": 1.6679538701325215e-10, "loss": 23.4431, "step": 498830 }, { "epoch": 1.0076883607994602, "grad_norm": 444.3369445800781, "learning_rate": 1.6395638396171643e-10, "loss": 11.7128, "step": 498840 }, { "epoch": 1.0077085614321442, "grad_norm": 323.95343017578125, "learning_rate": 1.611417495045453e-10, "loss": 19.7216, "step": 498850 }, { "epoch": 1.0077287620648279, "grad_norm": 865.90771484375, "learning_rate": 1.5835148365506148e-10, "loss": 26.0676, "step": 498860 }, { "epoch": 1.0077489626975118, "grad_norm": 140.72048950195312, "learning_rate": 1.5558558642769782e-10, "loss": 13.2926, "step": 498870 }, { "epoch": 1.0077691633301955, "grad_norm": 154.49485778808594, "learning_rate": 1.5284405783577706e-10, "loss": 14.5025, "step": 498880 }, { "epoch": 1.0077893639628792, "grad_norm": 326.7127685546875, "learning_rate": 1.501268978920667e-10, "loss": 15.5334, "step": 498890 }, { "epoch": 1.0078095645955631, "grad_norm": 357.9048767089844, "learning_rate": 1.4743410661044454e-10, "loss": 18.7476, "step": 498900 }, { "epoch": 1.0078297652282469, "grad_norm": 217.31222534179688, "learning_rate": 1.4476568400367819e-10, "loss": 6.5501, "step": 498910 }, { "epoch": 1.0078499658609308, "grad_norm": 693.2562255859375, "learning_rate": 1.4212163008509028e-10, "loss": 19.8704, "step": 498920 }, { "epoch": 1.0078701664936145, "grad_norm": 393.9979248046875, "learning_rate": 1.3950194486744838e-10, "loss": 16.0629, "step": 498930 }, { "epoch": 1.0078903671262984, "grad_norm": 578.2315063476562, "learning_rate": 1.369066283635201e-10, "loss": 15.7025, "step": 498940 }, { "epoch": 1.0079105677589821, "grad_norm": 516.0930786132812, "learning_rate": 1.3433568058607293e-10, "loss": 21.4167, "step": 498950 }, { "epoch": 1.007930768391666, "grad_norm": 320.8323669433594, "learning_rate": 1.3178910154676427e-10, "loss": 16.2749, "step": 498960 }, { "epoch": 1.0079509690243498, "grad_norm": 651.0137329101562, "learning_rate": 1.292668912594719e-10, "loss": 14.6414, "step": 498970 }, { "epoch": 1.0079711696570337, "grad_norm": 120.41197204589844, "learning_rate": 1.2676904973529802e-10, "loss": 32.7481, "step": 498980 }, { "epoch": 1.0079913702897174, "grad_norm": 148.75979614257812, "learning_rate": 1.2429557698645512e-10, "loss": 16.3187, "step": 498990 }, { "epoch": 1.0080115709224013, "grad_norm": 297.499755859375, "learning_rate": 1.2184647302626585e-10, "loss": 11.6246, "step": 499000 }, { "epoch": 1.008031771555085, "grad_norm": 32.80855178833008, "learning_rate": 1.1942173786527732e-10, "loss": 14.9501, "step": 499010 }, { "epoch": 1.008051972187769, "grad_norm": 151.65139770507812, "learning_rate": 1.1702137151570203e-10, "loss": 13.6701, "step": 499020 }, { "epoch": 1.0080721728204527, "grad_norm": 720.9283447265625, "learning_rate": 1.146453739897524e-10, "loss": 14.7302, "step": 499030 }, { "epoch": 1.0080923734531366, "grad_norm": 1566.2060546875, "learning_rate": 1.1229374529797555e-10, "loss": 20.6653, "step": 499040 }, { "epoch": 1.0081125740858203, "grad_norm": 130.35076904296875, "learning_rate": 1.0996648545313904e-10, "loss": 8.3957, "step": 499050 }, { "epoch": 1.0081327747185043, "grad_norm": 219.70742797851562, "learning_rate": 1.0766359446579e-10, "loss": 11.406, "step": 499060 }, { "epoch": 1.008152975351188, "grad_norm": 95.866455078125, "learning_rate": 1.0538507234703066e-10, "loss": 14.2112, "step": 499070 }, { "epoch": 1.008173175983872, "grad_norm": 11.98952579498291, "learning_rate": 1.0313091910796324e-10, "loss": 13.1056, "step": 499080 }, { "epoch": 1.0081933766165556, "grad_norm": 3.0641424655914307, "learning_rate": 1.009011347602451e-10, "loss": 11.0817, "step": 499090 }, { "epoch": 1.0082135772492395, "grad_norm": 308.2926025390625, "learning_rate": 9.869571931442334e-11, "loss": 15.2073, "step": 499100 }, { "epoch": 1.0082337778819233, "grad_norm": 312.8187255859375, "learning_rate": 9.65146727810451e-11, "loss": 14.3809, "step": 499110 }, { "epoch": 1.0082539785146072, "grad_norm": 122.9480972290039, "learning_rate": 9.435799517065746e-11, "loss": 12.6968, "step": 499120 }, { "epoch": 1.008274179147291, "grad_norm": 447.3537902832031, "learning_rate": 9.222568649380759e-11, "loss": 29.2527, "step": 499130 }, { "epoch": 1.0082943797799746, "grad_norm": 555.0584106445312, "learning_rate": 9.011774676159767e-11, "loss": 10.0256, "step": 499140 }, { "epoch": 1.0083145804126585, "grad_norm": 856.760498046875, "learning_rate": 8.803417598346465e-11, "loss": 22.6539, "step": 499150 }, { "epoch": 1.0083347810453422, "grad_norm": 121.70764923095703, "learning_rate": 8.597497416940048e-11, "loss": 17.7426, "step": 499160 }, { "epoch": 1.0083549816780262, "grad_norm": 831.8187255859375, "learning_rate": 8.394014133050743e-11, "loss": 19.8091, "step": 499170 }, { "epoch": 1.0083751823107099, "grad_norm": 0.0, "learning_rate": 8.192967747566727e-11, "loss": 16.5445, "step": 499180 }, { "epoch": 1.0083953829433938, "grad_norm": 333.3159484863281, "learning_rate": 7.994358261542712e-11, "loss": 29.0098, "step": 499190 }, { "epoch": 1.0084155835760775, "grad_norm": 169.3303680419922, "learning_rate": 7.798185675866876e-11, "loss": 10.5793, "step": 499200 }, { "epoch": 1.0084357842087615, "grad_norm": 539.6227416992188, "learning_rate": 7.604449991593932e-11, "loss": 16.7954, "step": 499210 }, { "epoch": 1.0084559848414452, "grad_norm": 514.5540161132812, "learning_rate": 7.413151209612057e-11, "loss": 17.1064, "step": 499220 }, { "epoch": 1.008476185474129, "grad_norm": 317.1817321777344, "learning_rate": 7.224289330809431e-11, "loss": 13.7756, "step": 499230 }, { "epoch": 1.0084963861068128, "grad_norm": 243.17190551757812, "learning_rate": 7.037864356185254e-11, "loss": 9.7673, "step": 499240 }, { "epoch": 1.0085165867394967, "grad_norm": 239.8211212158203, "learning_rate": 6.853876286627703e-11, "loss": 15.4792, "step": 499250 }, { "epoch": 1.0085367873721804, "grad_norm": 420.01849365234375, "learning_rate": 6.672325122969447e-11, "loss": 27.2993, "step": 499260 }, { "epoch": 1.0085569880048644, "grad_norm": 167.06922912597656, "learning_rate": 6.493210866209687e-11, "loss": 9.2243, "step": 499270 }, { "epoch": 1.008577188637548, "grad_norm": 240.788818359375, "learning_rate": 6.316533517125578e-11, "loss": 16.8287, "step": 499280 }, { "epoch": 1.008597389270232, "grad_norm": 162.93699645996094, "learning_rate": 6.142293076605299e-11, "loss": 13.0423, "step": 499290 }, { "epoch": 1.0086175899029157, "grad_norm": 256.6506042480469, "learning_rate": 5.970489545537028e-11, "loss": 14.9599, "step": 499300 }, { "epoch": 1.0086377905355997, "grad_norm": 299.09710693359375, "learning_rate": 5.801122924697922e-11, "loss": 9.736, "step": 499310 }, { "epoch": 1.0086579911682834, "grad_norm": 406.6426696777344, "learning_rate": 5.634193214976158e-11, "loss": 23.1644, "step": 499320 }, { "epoch": 1.0086781918009673, "grad_norm": 885.6886596679688, "learning_rate": 5.469700417093382e-11, "loss": 26.4493, "step": 499330 }, { "epoch": 1.008698392433651, "grad_norm": 235.24378967285156, "learning_rate": 5.3076445319932835e-11, "loss": 13.3897, "step": 499340 }, { "epoch": 1.008718593066335, "grad_norm": 318.3589782714844, "learning_rate": 5.148025560341996e-11, "loss": 10.6445, "step": 499350 }, { "epoch": 1.0087387936990186, "grad_norm": 773.137939453125, "learning_rate": 4.990843502916676e-11, "loss": 22.0634, "step": 499360 }, { "epoch": 1.0087589943317026, "grad_norm": 497.8670654296875, "learning_rate": 4.83609836054999e-11, "loss": 17.3824, "step": 499370 }, { "epoch": 1.0087791949643863, "grad_norm": 262.0536193847656, "learning_rate": 4.683790134019095e-11, "loss": 15.0474, "step": 499380 }, { "epoch": 1.00879939559707, "grad_norm": 205.39247131347656, "learning_rate": 4.533918823934613e-11, "loss": 16.6214, "step": 499390 }, { "epoch": 1.008819596229754, "grad_norm": 245.76242065429688, "learning_rate": 4.3864844311847235e-11, "loss": 13.7095, "step": 499400 }, { "epoch": 1.0088397968624376, "grad_norm": 106.1578598022461, "learning_rate": 4.2414869563800475e-11, "loss": 12.3088, "step": 499410 }, { "epoch": 1.0088599974951216, "grad_norm": 225.17047119140625, "learning_rate": 4.0989264002422315e-11, "loss": 21.1545, "step": 499420 }, { "epoch": 1.0088801981278053, "grad_norm": 1929.8975830078125, "learning_rate": 3.9588027634929195e-11, "loss": 29.5104, "step": 499430 }, { "epoch": 1.0089003987604892, "grad_norm": 284.91241455078125, "learning_rate": 3.8211160467982453e-11, "loss": 21.3082, "step": 499440 }, { "epoch": 1.008920599393173, "grad_norm": 128.1702880859375, "learning_rate": 3.685866250879855e-11, "loss": 8.1251, "step": 499450 }, { "epoch": 1.0089408000258568, "grad_norm": 333.2735595703125, "learning_rate": 3.55305337634837e-11, "loss": 10.4189, "step": 499460 }, { "epoch": 1.0089610006585406, "grad_norm": 577.0447387695312, "learning_rate": 3.4226774238144135e-11, "loss": 20.4437, "step": 499470 }, { "epoch": 1.0089812012912245, "grad_norm": 200.242431640625, "learning_rate": 3.29473839399963e-11, "loss": 8.238, "step": 499480 }, { "epoch": 1.0090014019239082, "grad_norm": 446.62591552734375, "learning_rate": 3.169236287459132e-11, "loss": 21.3551, "step": 499490 }, { "epoch": 1.0090216025565921, "grad_norm": 245.18592834472656, "learning_rate": 3.0461711048035415e-11, "loss": 14.5327, "step": 499500 }, { "epoch": 1.0090418031892758, "grad_norm": 302.41912841796875, "learning_rate": 2.925542846698992e-11, "loss": 16.4099, "step": 499510 }, { "epoch": 1.0090620038219598, "grad_norm": 134.86671447753906, "learning_rate": 2.8073515137005957e-11, "loss": 22.4217, "step": 499520 }, { "epoch": 1.0090822044546435, "grad_norm": 9.022638320922852, "learning_rate": 2.6915971063079527e-11, "loss": 11.245, "step": 499530 }, { "epoch": 1.0091024050873274, "grad_norm": 97.82958221435547, "learning_rate": 2.5782796252427078e-11, "loss": 21.2855, "step": 499540 }, { "epoch": 1.0091226057200111, "grad_norm": 312.6911315917969, "learning_rate": 2.467399070893439e-11, "loss": 11.174, "step": 499550 }, { "epoch": 1.009142806352695, "grad_norm": 215.1186981201172, "learning_rate": 2.3589554439262807e-11, "loss": 9.1901, "step": 499560 }, { "epoch": 1.0091630069853788, "grad_norm": 136.40174865722656, "learning_rate": 2.252948744840833e-11, "loss": 16.0177, "step": 499570 }, { "epoch": 1.0091832076180627, "grad_norm": 324.7275085449219, "learning_rate": 2.1493789740811843e-11, "loss": 24.6827, "step": 499580 }, { "epoch": 1.0092034082507464, "grad_norm": 0.0, "learning_rate": 2.048246132202447e-11, "loss": 16.4556, "step": 499590 }, { "epoch": 1.0092236088834303, "grad_norm": 422.9095153808594, "learning_rate": 1.9495502197042214e-11, "loss": 18.1066, "step": 499600 }, { "epoch": 1.009243809516114, "grad_norm": 362.4075622558594, "learning_rate": 1.8532912370861077e-11, "loss": 17.0982, "step": 499610 }, { "epoch": 1.009264010148798, "grad_norm": 233.91114807128906, "learning_rate": 1.759469184792195e-11, "loss": 16.0633, "step": 499620 }, { "epoch": 1.0092842107814817, "grad_norm": 72.83248138427734, "learning_rate": 1.668084063266573e-11, "loss": 15.5275, "step": 499630 }, { "epoch": 1.0093044114141656, "grad_norm": 169.59181213378906, "learning_rate": 1.57913587295333e-11, "loss": 15.0927, "step": 499640 }, { "epoch": 1.0093246120468493, "grad_norm": 277.2950134277344, "learning_rate": 1.4926246142965562e-11, "loss": 19.717, "step": 499650 }, { "epoch": 1.009344812679533, "grad_norm": 350.11395263671875, "learning_rate": 1.40855028774034e-11, "loss": 12.4338, "step": 499660 }, { "epoch": 1.009365013312217, "grad_norm": 398.5290832519531, "learning_rate": 1.32691289367326e-11, "loss": 16.4635, "step": 499670 }, { "epoch": 1.0093852139449007, "grad_norm": 327.2204895019531, "learning_rate": 1.2477124325394052e-11, "loss": 13.1397, "step": 499680 }, { "epoch": 1.0094054145775846, "grad_norm": 458.7875671386719, "learning_rate": 1.1709489046163313e-11, "loss": 12.1547, "step": 499690 }, { "epoch": 1.0094256152102683, "grad_norm": 587.9765014648438, "learning_rate": 1.0966223103481278e-11, "loss": 20.2309, "step": 499700 }, { "epoch": 1.0094458158429522, "grad_norm": 238.51296997070312, "learning_rate": 1.0247326501233723e-11, "loss": 16.5125, "step": 499710 }, { "epoch": 1.009466016475636, "grad_norm": 115.95252227783203, "learning_rate": 9.55279924275132e-12, "loss": 20.8501, "step": 499720 }, { "epoch": 1.0094862171083199, "grad_norm": 163.24656677246094, "learning_rate": 8.882641330809627e-12, "loss": 22.1904, "step": 499730 }, { "epoch": 1.0095064177410036, "grad_norm": 232.28338623046875, "learning_rate": 8.236852769294424e-12, "loss": 9.229, "step": 499740 }, { "epoch": 1.0095266183736875, "grad_norm": 515.9867553710938, "learning_rate": 7.615433561536379e-12, "loss": 20.9562, "step": 499750 }, { "epoch": 1.0095468190063712, "grad_norm": 247.70721435546875, "learning_rate": 7.018383709755938e-12, "loss": 20.4152, "step": 499760 }, { "epoch": 1.0095670196390552, "grad_norm": 192.70872497558594, "learning_rate": 6.445703217838883e-12, "loss": 19.3428, "step": 499770 }, { "epoch": 1.0095872202717389, "grad_norm": 335.126220703125, "learning_rate": 5.89739208800566e-12, "loss": 8.8592, "step": 499780 }, { "epoch": 1.0096074209044228, "grad_norm": 164.87554931640625, "learning_rate": 5.373450322476714e-12, "loss": 11.72, "step": 499790 }, { "epoch": 1.0096276215371065, "grad_norm": 360.1464538574219, "learning_rate": 4.873877924582715e-12, "loss": 16.2168, "step": 499800 }, { "epoch": 1.0096478221697904, "grad_norm": 190.5187225341797, "learning_rate": 4.398674896544109e-12, "loss": 9.1129, "step": 499810 }, { "epoch": 1.0096680228024741, "grad_norm": 275.7220458984375, "learning_rate": 3.947841241136452e-12, "loss": 10.8498, "step": 499820 }, { "epoch": 1.009688223435158, "grad_norm": 530.927490234375, "learning_rate": 3.5213769594699687e-12, "loss": 12.1524, "step": 499830 }, { "epoch": 1.0097084240678418, "grad_norm": 36.31602478027344, "learning_rate": 3.119282054320216e-12, "loss": 24.7156, "step": 499840 }, { "epoch": 1.0097286247005257, "grad_norm": 366.23614501953125, "learning_rate": 2.741556527352529e-12, "loss": 18.4046, "step": 499850 }, { "epoch": 1.0097488253332094, "grad_norm": 598.1944580078125, "learning_rate": 2.388200380787353e-12, "loss": 15.4348, "step": 499860 }, { "epoch": 1.0097690259658934, "grad_norm": 451.7855529785156, "learning_rate": 2.0592136162900234e-12, "loss": 26.9653, "step": 499870 }, { "epoch": 1.009789226598577, "grad_norm": 644.4403076171875, "learning_rate": 1.754596235525874e-12, "loss": 16.8859, "step": 499880 }, { "epoch": 1.009809427231261, "grad_norm": 555.8035888671875, "learning_rate": 1.4743482390500164e-12, "loss": 10.2884, "step": 499890 }, { "epoch": 1.0098296278639447, "grad_norm": 84.61672973632812, "learning_rate": 1.2184696296380083e-12, "loss": 21.937, "step": 499900 }, { "epoch": 1.0098498284966284, "grad_norm": 516.1853637695312, "learning_rate": 9.869604078449612e-13, "loss": 20.4575, "step": 499910 }, { "epoch": 1.0098700291293123, "grad_norm": 22.995227813720703, "learning_rate": 7.798205742259868e-13, "loss": 25.0565, "step": 499920 }, { "epoch": 1.009890229761996, "grad_norm": 109.76983642578125, "learning_rate": 5.970501310015308e-13, "loss": 18.0619, "step": 499930 }, { "epoch": 1.00991043039468, "grad_norm": 89.95011901855469, "learning_rate": 4.386490781715935e-13, "loss": 22.4786, "step": 499940 }, { "epoch": 1.0099306310273637, "grad_norm": 546.2017211914062, "learning_rate": 3.046174168463978e-13, "loss": 16.8511, "step": 499950 }, { "epoch": 1.0099508316600476, "grad_norm": 125.24714660644531, "learning_rate": 1.9495514758105516e-13, "loss": 25.8975, "step": 499960 }, { "epoch": 1.0099710322927313, "grad_norm": 319.3497619628906, "learning_rate": 1.0966227093067716e-13, "loss": 18.7779, "step": 499970 }, { "epoch": 1.0099912329254153, "grad_norm": 271.3671569824219, "learning_rate": 4.873878689526379e-14, "loss": 13.7524, "step": 499980 }, { "epoch": 1.010011433558099, "grad_norm": 145.14639282226562, "learning_rate": 1.218469658503807e-14, "loss": 12.4994, "step": 499990 }, { "epoch": 1.010031634190783, "grad_norm": 309.948486328125, "learning_rate": 0.0, "loss": 8.6393, "step": 500000 } ], "logging_steps": 10, "max_steps": 500000, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 4000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }